#! /usr/bin/python3
# remove virtual packages from disjunctions with real packages
# the idea to clean up a repository like this came from Helmut Grohne
#
# applying this solution to the problem might create uninstallable packages due
# to conflicts:
#
# Package: foo
# Depends: A | B, C
#
# Package: bar
# Provides: B
#
# Package: A
#
# Package: C
# Depends: D
#
# Package: D
# Conflicts: A
#
# In above case, B would be removed from the disjunction of package "foo",
# leaving only A. But through D, C conflicts with A, making "foo"
# uninstallable

import sys
sys.path.append('/usr/share/botch')
from util import get_fh_out, read_tag_file
import re


def extract_name_ver(pkgrel):
    m = re.match(
        r"\s*([a-zA-Z0-9][a-zA-Z0-9+.-]*)(?::[a-zA-Z0-9][a-zA-Z0-9-]*)?" +
        r"(?:\s+\(\s*(<<|<=|=|>=|>>|<|>).*?\))?.*", pkgrel)
    return (pkgrel, m.group(1), m.group(2))


def cleandisj(disj, real_pkgs, virtual_pkgs):
    # do not handle simple disjunctions
    if len(disj) == 1:
        return disj
    res = []
    # reduce the disjunction to dependencies on real packages
    extdisj = [extract_name_ver(pkgrel) for pkgrel in disj]
    for pkgrel, pkg, ver in extdisj:
        # versioned dependencies must be provided by real packages
        if not ver and pkg not in real_pkgs:
            continue
        res.append(pkgrel)
    if not res:
        # if nothing remains return first virtual dependency that is provided
        # by something or first versioned dependency
        for pkgrel, pkg, ver in extdisj:
            if ver or pkg in virtual_pkgs:
                return [pkgrel]
        # no package satisfying this disjunction was found so return original
        return disj
    else:
        # if one or more real package remains, return only first
        return [res[0]]


def cleansrcdisj(disj, real_pkgs, virtual_pkgs):
    # for source packages, remove all but the first part of the disjunction as
    # done by sbuild in setup_apt_archive in lib/Sbuild/ResolverBase.pm
    # do not handle simple disjunctions
    if len(disj) == 1:
        return disj
    extdisj = [extract_name_ver(pkgrel) for pkgrel in disj]
    res = [extdisj[0]]
    # keep disjunctions like "foo (rel x) | foo (rel y)"
    if res[0][2]:
        for pkgrel, pkg, ver in extdisj[1:]:
            # compare package name with the first
            if ver and pkg == res[0][1]:
                res.append((pkgrel,))
    return [r[0] for r in res]


def modify_field(field, real_pkgs, virtual_pkgs, cleanfunc):
    # we do it without regexes so that we can assemble the original
    # dependency string later
    deps = [[pkgrel for pkgrel in disj.split('|')]
            for disj in field.split(',')]
    deps = [cleanfunc(disj, real_pkgs, virtual_pkgs) for disj in deps]
    return ",".join(["|".join(disj) for disj in deps])


def remove_virtual_disjunctions(
        inPackages, inSources, outPackages, outSources, remove_nonvirtual,
        verbose=False):
    real_pkgs = set()
    virtual_pkgs = set()
    # create a list of real packages
    for pkg in inPackages:
        real_pkgs.add(pkg['Package'])
        if not pkg.get('Provides'):
            continue
        for pkg in pkg['Provides'].split(','):
            virtual_pkgs.add(pkg.strip())
    # modify binary dependencies
    for pkg in inPackages:
        for field in ['Depends', 'Pre-Depends']:
            if pkg.get(field):
                if remove_nonvirtual:
                    # if --remove-nonvirtual was given, use source package
                    # behaviour for binary packages as well
                    pkg[field] = modify_field(
                        pkg[field], real_pkgs, virtual_pkgs, cleansrcdisj)
                else:
                    pkg[field] = modify_field(
                        pkg[field], real_pkgs, virtual_pkgs, cleandisj)
    # modify build dependencies
    for pkg in inSources:
        for field in ['Build-Depends', 'Build-Depends-Indep',
                      'Build-Depends-Arch']:
            if pkg.get(field):
                pkg[field] = modify_field(
                    pkg[field], real_pkgs, virtual_pkgs, cleansrcdisj)
    with outPackages as outfile:
        for pkg in inPackages:
            pkg.dump(outfile)
            outfile.write(b"\n")
    with outSources as outfile:
        for pkg in inSources:
            pkg.dump(outfile)
            outfile.write(b"\n")
    return True

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(
        description=("remove virtual packages from disjunctions with real " +
                     "packages"))
    parser.add_argument(
        'inPackages', type=read_tag_file, help='input Packages files')
    parser.add_argument(
        'inSources', type=read_tag_file, help='input Sources files')
    parser.add_argument(
        'outPackages', type=get_fh_out, help='output Packages file')
    parser.add_argument(
        'outSources', type=get_fh_out, help='output Sources file')
    parser.add_argument('--remove-nonvirtual', action='store_true',
                        help="also remove all but the first alternative from "
                             "binary packages. This applies the same "
                             "algorithm to binary packages as is applied to "
                             "source packages")
    parser.add_argument('--verbose', action='store_true', help='be verbose')
    args = parser.parse_args()
    ret = remove_virtual_disjunctions(args.inPackages, args.inSources,
                                      args.outPackages, args.outSources,
                                      args.remove_nonvirtual, args.verbose)
    exit(not ret)
