#! /usr/bin/env python3
"""
Check for obsolete binary packages
@contact: Debian FTP Master <ftpmaster@debian.org>
@copyright: 2000-2006 James Troup <james@nocrew.org>
@copyright: 2009 Torsten Werner <twerner@debian.org>
@license: GNU General Public License version 2 or later
"""
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
################################################################################
# ``If you're claiming that's a "problem" that needs to be "fixed",
# you might as well write some letters to God about how unfair entropy
# is while you're at it.'' -- 20020802143104.GA5628@azure.humbug.org.au
## TODO: fix NBS looping for version, implement Dubious NBS, fix up output of
## duplicate source package stuff, improve experimental ?, add overrides,
## avoid ANAIS for duplicated packages
################################################################################
import functools
import os
import re
import sys
from collections import defaultdict
import apt_pkg
from daklib import utils
from daklib.config import Config
from daklib.cruft import (
newer_version,
query_without_source,
queryNBS,
queryNBS_metadata,
report_multiple_source,
)
from daklib.dbconn import DBConn, get_suite, get_suite_architectures
from daklib.regexes import re_extract_src_version
################################################################################
no_longer_in_suite = {} # Really should be static to add_nbs, but I'm lazy
source_binaries = {}
source_versions = {}
################################################################################
[docs]def usage(exit_code=0):
print(
"""Usage: dak cruft-report
Check for obsolete or duplicated packages.
-h, --help show this help and exit.
-m, --mode=MODE chose the MODE to run in (full, daily, bdo).
-s, --suite=SUITE check suite SUITE.
-R, --rdep-check check reverse dependencies
-w, --wanna-build-dump where to find the copies of https://buildd.debian.org/stats/*.txt"""
)
sys.exit(exit_code)
################################################################################
[docs]def print_info(s=""):
cnf = Config()
if cnf.subtree("Cruft-Report::Options")["Commands-Only"]:
return
print(s)
[docs]def print_cmd(s, indent=4):
cnf = Config()
# Indent if doing the human readable display
if not cnf.subtree("Cruft-Report::Options")["Commands-Only"]:
ind = " " * indent
s = ind + s
print(s)
################################################################################
[docs]def add_nbs(nbs_d, source, version, package, suite_id, session):
# Ensure the package is still in the suite (someone may have already removed it)
if package in no_longer_in_suite:
return
else:
q = session.execute(
"""SELECT b.id FROM binaries b, bin_associations ba
WHERE ba.bin = b.id AND ba.suite = :suite_id
AND b.package = :package LIMIT 1""",
{"suite_id": suite_id, "package": package},
)
if not q.fetchall():
no_longer_in_suite[package] = ""
return
nbs_d[source][version].add(package)
################################################################################
# Check for packages built on architectures they shouldn't be.
[docs]def do_anais(architecture, binaries_list, source, session):
if architecture == "any" or architecture == "all":
return ""
version_sort_key = functools.cmp_to_key(apt_pkg.version_compare)
anais_output = ""
architectures = {}
for arch in architecture.split():
architectures[arch.strip()] = ""
for binary in binaries_list:
q = session.execute(
"""SELECT a.arch_string, b.version
FROM binaries b, bin_associations ba, architecture a
WHERE ba.suite = :suiteid AND ba.bin = b.id
AND b.architecture = a.id AND b.package = :package""",
{"suiteid": suite_id, "package": binary},
)
ql = q.fetchall()
versions = []
for arch, version in ql:
if arch in architectures:
versions.append(version)
versions.sort(key=version_sort_key)
if versions:
latest_version = versions.pop()
else:
latest_version = None
# Check for 'invalid' architectures
versions_d = defaultdict(list)
for arch, version in ql:
if arch not in architectures:
versions_d[version].append(arch)
if versions_d:
anais_output += "\n (*) %s_%s [%s]: %s\n" % (
binary,
latest_version,
source,
architecture,
)
for version in sorted(versions_d, key=version_sort_key):
arches = sorted(versions_d[version])
anais_output += " o %s: %s\n" % (version, ", ".join(arches))
return anais_output
################################################################################
# Check for out-of-date binaries on architectures that do not want to build that
# package any more, and have them listed as Not-For-Us
[docs]def do_nfu(nfu_packages):
output = ""
a2p = {}
for architecture in nfu_packages:
a2p[architecture] = []
for package, bver, sver in nfu_packages[architecture]:
output += " * [%s] does not want %s (binary %s, source %s)\n" % (
architecture,
package,
bver,
sver,
)
a2p[architecture].append(package)
if output:
print_info("Obsolete by Not-For-Us")
print_info("----------------------")
print_info()
print_info(output)
print_info("Suggested commands:")
for architecture in a2p:
if a2p[architecture]:
print_cmd(
(
'dak rm -o -m "[auto-cruft] NFU" -s %s -a %s -b %s'
% (suite.suite_name, architecture, " ".join(a2p[architecture]))
),
indent=1,
)
print_info()
[docs]def parse_nfu(architecture):
cnf = Config()
# utils/hpodder_1.1.5.0: Not-For-Us [optional:out-of-date]
r = re.compile(r"^\w+/([^_]+)_.*: Not-For-Us")
ret = set()
filename = "%s/%s-all.txt" % (
cnf["Cruft-Report::Options::Wanna-Build-Dump"],
architecture,
)
# Not all architectures may have a wanna-build dump, so we want to ignore missin
# files
if os.path.exists(filename):
with open(filename) as f:
for line in f:
if line[0] == " ":
continue
m = r.match(line)
if m:
ret.add(m.group(1))
else:
utils.warn("No wanna-build dump file for architecture %s" % architecture)
return ret
################################################################################
[docs]def do_newer_version(lowersuite_name, highersuite_name, code, session):
list = newer_version(lowersuite_name, highersuite_name, session)
if len(list) > 0:
nv_to_remove = []
title = "Newer version in %s" % lowersuite_name
print_info(title)
print_info("-" * len(title))
print_info()
for i in list:
(source, higher_version, lower_version) = i
print_info(" o %s (%s, %s)" % (source, higher_version, lower_version))
nv_to_remove.append(source)
print_info()
print_info("Suggested command:")
print_cmd(
'dak rm -m "[auto-cruft] %s" -s %s %s'
% (code, highersuite_name, " ".join(nv_to_remove)),
indent=1,
)
print_info()
################################################################################
[docs]def reportWithoutSource(suite_name, suite_id, session, rdeps=False):
rows = query_without_source(suite_id, session)
title = "packages without source in suite %s" % suite_name
if rows.rowcount > 0:
print_info("%s\n%s\n" % (title, "-" * len(title)))
message = '"[auto-cruft] no longer built from source"'
for row in rows:
(package, version) = row
print_info(
"* package %s in version %s is no longer built from source"
% (package, version)
)
print_info(" - suggested command:")
print_cmd(
"dak rm -m %s -s %s -a all -p -R -b %s" % (message, suite_name, package)
)
if rdeps:
if utils.check_reverse_depends([package], suite_name, [], session, True):
print_info()
else:
print_info(" - No dependency problem found\n")
else:
print_info()
[docs]def queryNewerAll(suite_name, session):
"""searches for arch != all packages that have an arch == all
package with a higher version in the same suite"""
query = """
select bab1.package, bab1.version as oldver,
array_to_string(array_agg(a.arch_string), ',') as oldarch,
bab2.version as newver
from bin_associations_binaries bab1
join bin_associations_binaries bab2
on bab1.package = bab2.package and bab1.version < bab2.version and
bab1.suite = bab2.suite and bab1.architecture > 2 and
bab2.architecture = 2
join architecture a on bab1.architecture = a.id
join suite s on bab1.suite = s.id
where s.suite_name = :suite_name
group by bab1.package, oldver, bab1.suite, newver"""
return session.execute(query, {"suite_name": suite_name})
[docs]def reportNewerAll(suite_name, session):
rows = queryNewerAll(suite_name, session)
title = "obsolete arch any packages in suite %s" % suite_name
if rows.rowcount > 0:
print_info("%s\n%s\n" % (title, "-" * len(title)))
message = '"[auto-cruft] obsolete arch any package"'
for row in rows:
(package, oldver, oldarch, newver) = row
print_info(
"* package %s is arch any in version %s but arch all in version %s"
% (package, oldver, newver)
)
print_info(" - suggested command:")
print_cmd(
"dak rm -o -m %s -s %s -a %s -p -b %s\n"
% (message, suite_name, oldarch, package)
)
[docs]def reportNBS(suite_name, suite_id, rdeps=False):
session = DBConn().session()
nbsRows = queryNBS(suite_id, session)
title = "NBS packages in suite %s" % suite_name
if nbsRows.rowcount > 0:
print_info("%s\n%s\n" % (title, "-" * len(title)))
for row in nbsRows:
(pkg_list, arch_list, source, version) = row
pkg_string = " ".join(pkg_list)
arch_string = ",".join(arch_list)
print_info(
"* source package %s version %s no longer builds" % (source, version)
)
print_info(" binary package(s): %s" % pkg_string)
print_info(" on %s" % arch_string)
print_info(" - suggested command:")
message = '"[auto-cruft] NBS (no longer built by %s)"' % source
print_cmd(
"dak rm -o -m %s -s %s -a %s -p -R -b %s"
% (message, suite_name, arch_string, pkg_string)
)
if rdeps:
if utils.check_reverse_depends(
pkg_list, suite_name, arch_list, session, True
):
print_info()
else:
print_info(" - No dependency problem found\n")
else:
print_info()
session.close()
[docs]def reportAllNBS(suite_name, suite_id, session, rdeps=False):
reportWithoutSource(suite_name, suite_id, session, rdeps)
reportNewerAll(suite_name, session)
reportNBS(suite_name, suite_id, rdeps)
################################################################################
[docs]def do_dubious_nbs(dubious_nbs):
print_info("Dubious NBS")
print_info("-----------")
print_info()
version_sort_key = functools.cmp_to_key(apt_pkg.version_compare)
for source in sorted(dubious_nbs):
print_info(
" * %s_%s builds: %s"
% (
source,
source_versions.get(source, "??"),
source_binaries.get(source, "(source does not exist)"),
)
)
print_info(" won't admit to building:")
versions = sorted(dubious_nbs[source], key=version_sort_key)
for version in versions:
packages = sorted(dubious_nbs[source][version])
print_info(" o %s: %s" % (version, ", ".join(packages)))
print_info()
################################################################################
[docs]def obsolete_source(suite_name, session):
"""returns obsolete source packages for suite_name without binaries
in the same suite sorted by install_date; install_date should help
detecting source only (or binary throw away) uploads; duplicates in
the suite are skipped
subquery 'source_suite_unique' returns source package names from
suite without duplicates; the rationale behind is that neither
cruft-report nor rm cannot handle duplicates (yet)"""
query = """
WITH source_suite_unique AS
(SELECT source, suite
FROM source_suite GROUP BY source, suite HAVING count(*) = 1)
SELECT ss.src, ss.source, ss.version,
to_char(ss.install_date, 'YYYY-MM-DD') AS install_date
FROM source_suite ss
JOIN source_suite_unique ssu
ON ss.source = ssu.source AND ss.suite = ssu.suite
JOIN suite s ON s.id = ss.suite
LEFT JOIN bin_associations_binaries bab
ON ss.src = bab.source AND ss.suite = bab.suite
WHERE s.suite_name = :suite_name AND bab.id IS NULL
AND now() - ss.install_date > '1 day'::interval
ORDER BY install_date"""
args = {"suite_name": suite_name}
return session.execute(query, args)
[docs]def source_bin(source, session):
"""returns binaries built by source for all or no suite grouped and
ordered by package name"""
query = """
SELECT b.package
FROM binaries b
JOIN src_associations_src sas ON b.source = sas.src
WHERE sas.source = :source
GROUP BY b.package
ORDER BY b.package"""
args = {"source": source}
return session.execute(query, args)
[docs]def newest_source_bab(suite_name, package, session):
"""returns newest source that builds binary package in suite grouped
and sorted by source and package name"""
query = """
SELECT sas.source, MAX(sas.version) AS srcver
FROM src_associations_src sas
JOIN bin_associations_binaries bab ON sas.src = bab.source
JOIN suite s on s.id = bab.suite
WHERE s.suite_name = :suite_name AND bab.package = :package
GROUP BY sas.source, bab.package
ORDER BY sas.source, bab.package"""
args = {"suite_name": suite_name, "package": package}
return session.execute(query, args)
[docs]def report_obsolete_source(suite_name, session):
rows = obsolete_source(suite_name, session)
if rows.rowcount == 0:
return
print_info(
"""Obsolete source packages in suite %s
----------------------------------%s\n"""
% (suite_name, "-" * len(suite_name))
)
for os_row in rows.fetchall():
(src, old_source, version, install_date) = os_row
print_info(
" * obsolete source %s version %s installed at %s"
% (old_source, version, install_date)
)
for sb_row in source_bin(old_source, session):
(package,) = sb_row
print_info(" - has built binary %s" % package)
for nsb_row in newest_source_bab(suite_name, package, session):
(new_source, srcver) = nsb_row
print_info(
" currently built by source %s version %s"
% (new_source, srcver)
)
print_info(" - suggested command:")
rm_opts = '-S -p -m "[auto-cruft] obsolete source package"'
print_cmd("dak rm -s %s %s %s\n" % (suite_name, rm_opts, old_source))
[docs]def get_suite_binaries(suite, session):
# Initalize a large hash table of all binary packages
binaries = {}
print_info("Getting a list of binary packages in %s..." % suite.suite_name)
q = session.execute(
"""SELECT distinct b.package
FROM binaries b, bin_associations ba
WHERE ba.suite = :suiteid AND ba.bin = b.id""",
{"suiteid": suite.suite_id},
)
for i in q.fetchall():
binaries[i[0]] = ""
return binaries
################################################################################
[docs]def report_outdated_nonfree(suite, session, rdeps=False):
packages = {}
query = """WITH outdated_sources AS (
SELECT s.source, s.version, s.id
FROM source s
JOIN src_associations sa ON sa.source = s.id
WHERE sa.suite IN (
SELECT id
FROM suite
WHERE suite_name = :suite )
AND sa.created < (now() - interval :delay)
EXCEPT SELECT s.source, max(s.version) AS version, max(s.id)
FROM source s
JOIN src_associations sa ON sa.source = s.id
WHERE sa.suite IN (
SELECT id
FROM suite
WHERE suite_name = :suite )
AND sa.created < (now() - interval :delay)
GROUP BY s.source ),
binaries AS (
SELECT b.package, s.source, (
SELECT a.arch_string
FROM architecture a
WHERE a.id = b.architecture ) AS arch
FROM binaries b
JOIN outdated_sources s ON s.id = b.source
JOIN bin_associations ba ON ba.bin = b.id
JOIN override o ON o.package = b.package AND o.suite = ba.suite
WHERE ba.suite IN (
SELECT id
FROM suite
WHERE suite_name = :suite )
AND o.component IN (
SELECT id
FROM component
WHERE name = 'non-free' ) )
SELECT DISTINCT package, source, arch
FROM binaries
ORDER BY source, package, arch"""
res = session.execute(query, {"suite": suite, "delay": "'15 days'"})
for package in res:
binary = package[0]
source = package[1]
arch = package[2]
if arch == "all":
continue
if source not in packages:
packages[source] = {}
if binary not in packages[source]:
packages[source][binary] = set()
packages[source][binary].add(arch)
if packages:
title = "Outdated non-free binaries in suite %s" % suite
message = '"[auto-cruft] outdated non-free binaries"'
print_info("%s\n%s\n" % (title, "-" * len(title)))
for source in sorted(packages):
archs = set()
binaries = set()
print_info("* package %s has outdated non-free binaries" % source)
print_info(" - suggested command:")
for binary in sorted(packages[source]):
binaries.add(binary)
archs = archs.union(packages[source][binary])
print_cmd(
"dak rm -o -m %s -s %s -a %s -p -R -b %s"
% (message, suite, ",".join(archs), " ".join(binaries))
)
if rdeps:
if utils.check_reverse_depends(
list(binaries), suite, archs, session, True
):
print_info()
else:
print_info(" - No dependency problem found\n")
else:
print_info()
################################################################################
[docs]def main():
global suite, suite_id, source_binaries, source_versions
cnf = Config()
Arguments = [
("h", "help", "Cruft-Report::Options::Help"),
("m", "mode", "Cruft-Report::Options::Mode", "HasArg"),
("R", "rdep-check", "Cruft-Report::Options::Rdep-Check"),
("s", "suite", "Cruft-Report::Options::Suite", "HasArg"),
("w", "wanna-build-dump", "Cruft-Report::Options::Wanna-Build-Dump", "HasArg"),
("c", "commands-only", "Cruft-Report::Options::Commands-Only"),
]
for i in ["help", "Rdep-Check"]:
key = "Cruft-Report::Options::%s" % i
if key not in cnf:
cnf[key] = ""
if "Cruft-Report::Options::Commands-Only" not in cnf:
cnf["Cruft-Report::Options::Commands-Only"] = ""
cnf["Cruft-Report::Options::Suite"] = cnf.get("Dinstall::DefaultSuite", "unstable")
if "Cruft-Report::Options::Mode" not in cnf:
cnf["Cruft-Report::Options::Mode"] = "daily"
if "Cruft-Report::Options::Wanna-Build-Dump" not in cnf:
cnf["Cruft-Report::Options::Wanna-Build-Dump"] = (
"/srv/ftp-master.debian.org/scripts/nfu"
)
apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
Options = cnf.subtree("Cruft-Report::Options")
if Options["Help"]:
usage()
if Options["Rdep-Check"]:
rdeps = True
else:
rdeps = False
# Set up checks based on mode
if Options["Mode"] == "daily":
checks = [
"nbs",
"nviu",
"nvit",
"obsolete source",
"outdated non-free",
"nfu",
"nbs metadata",
]
elif Options["Mode"] == "full":
checks = [
"nbs",
"nviu",
"nvit",
"obsolete source",
"outdated non-free",
"nfu",
"nbs metadata",
"dubious nbs",
"bnb",
"bms",
"anais",
]
elif Options["Mode"] == "bdo":
checks = ["nbs", "obsolete source"]
else:
utils.warn(
"%s is not a recognised mode - only 'full', 'daily' or 'bdo' are understood."
% (Options["Mode"])
)
usage(1)
session = DBConn().session()
bin_pkgs = {}
src_pkgs = {}
bin2source = {}
bins_in_suite = {}
nbs = defaultdict(lambda: defaultdict(set))
source_versions = {}
anais_output = ""
nfu_packages = defaultdict(list)
suite = get_suite(Options["Suite"].lower(), session)
if not suite:
utils.fubar("Cannot find suite %s" % Options["Suite"].lower())
suite_id = suite.suite_id
suite_name = suite.suite_name.lower()
if "obsolete source" in checks:
report_obsolete_source(suite_name, session)
if "nbs" in checks:
reportAllNBS(suite_name, suite_id, session, rdeps)
if "nbs metadata" in checks:
reportNBSMetadata(suite_name, suite_id, session, rdeps)
if "outdated non-free" in checks:
report_outdated_nonfree(suite_name, session, rdeps)
bin_not_built = defaultdict(set)
if "bnb" in checks:
bins_in_suite = get_suite_binaries(suite, session)
# Checks based on the Sources files
components = [c.component_name for c in suite.components]
for component in [c.component_name for c in suite.components]:
filename = "%s/dists/%s/%s/source/Sources" % (
suite.archive.path,
suite_name,
component,
)
filename = utils.find_possibly_compressed_file(filename)
with apt_pkg.TagFile(filename) as Sources:
while Sources.step():
source = Sources.section.find("Package")
source_version = Sources.section.find("Version")
architecture = Sources.section.find("Architecture")
binaries = Sources.section.find("Binary")
binaries_list = [i.strip() for i in binaries.split(",")]
if "bnb" in checks:
# Check for binaries not built on any architecture.
for binary in binaries_list:
if binary not in bins_in_suite:
bin_not_built[source].add(binary)
if "anais" in checks:
anais_output += do_anais(
architecture, binaries_list, source, session
)
# build indices for checking "no source" later
source_index = component + "/" + source
src_pkgs[source] = source_index
for binary in binaries_list:
bin_pkgs[binary] = source
source_binaries[source] = binaries
source_versions[source] = source_version
# Checks based on the Packages files
check_components = components[:]
if suite_name != "experimental":
check_components.append("main/debian-installer")
for component in check_components:
architectures = [
a.arch_string
for a in get_suite_architectures(
suite_name, skipsrc=True, skipall=True, session=session
)
]
for architecture in architectures:
if component == "main/debian-installer" and re.match(
"kfreebsd", architecture
):
continue
if "nfu" in checks:
nfu_entries = parse_nfu(architecture)
filename = "%s/dists/%s/%s/binary-%s/Packages" % (
suite.archive.path,
suite_name,
component,
architecture,
)
filename = utils.find_possibly_compressed_file(filename)
with apt_pkg.TagFile(filename) as Packages:
while Packages.step():
package = Packages.section.find("Package")
source = Packages.section.find("Source", "")
version = Packages.section.find("Version")
if source == "":
source = package
if (
package in bin2source
and apt_pkg.version_compare(
version, bin2source[package]["version"]
)
> 0
):
bin2source[package]["version"] = version
bin2source[package]["source"] = source
else:
bin2source[package] = {}
bin2source[package]["version"] = version
bin2source[package]["source"] = source
if source.find("(") != -1:
m = re_extract_src_version.match(source)
source = m.group(1)
version = m.group(2)
if package not in bin_pkgs:
nbs[source][package].add(version)
else:
if "nfu" in checks:
if (
package in nfu_entries
and version != source_versions[source]
): # only suggest to remove out-of-date packages
nfu_packages[architecture].append(
(package, version, source_versions[source])
)
# Distinguish dubious (version numbers match) and 'real' NBS (they don't)
dubious_nbs = defaultdict(lambda: defaultdict(set))
version_sort_key = functools.cmp_to_key(apt_pkg.version_compare)
for source in nbs:
for package in nbs[source]:
latest_version = max(nbs[source][package], key=version_sort_key)
source_version = source_versions.get(source, "0")
if apt_pkg.version_compare(latest_version, source_version) == 0:
add_nbs(dubious_nbs, source, latest_version, package, suite_id, session)
if "nviu" in checks:
do_newer_version("unstable", "experimental", "NVIU", session)
if "nvit" in checks:
do_newer_version("testing", "testing-proposed-updates", "NVIT", session)
###
if Options["Mode"] == "full":
print_info("=" * 75)
print_info()
if "nfu" in checks:
do_nfu(nfu_packages)
if "bnb" in checks:
print_info("Unbuilt binary packages")
print_info("-----------------------")
print_info()
for source in sorted(bin_not_built):
binaries = sorted(bin_not_built[source])
print_info(" o %s: %s" % (source, ", ".join(binaries)))
print_info()
if "bms" in checks:
report_multiple_source(suite)
if "anais" in checks:
print_info("Architecture Not Allowed In Source")
print_info("----------------------------------")
print_info(anais_output)
print_info()
if "dubious nbs" in checks:
do_dubious_nbs(dubious_nbs)
################################################################################
if __name__ == "__main__":
main()