Coverage for dak/auto_decruft.py: 37%
212 statements
« prev ^ index » next coverage.py v7.6.0, created at 2026-01-04 16:18 +0000
« prev ^ index » next coverage.py v7.6.0, created at 2026-01-04 16:18 +0000
1#! /usr/bin/env python3
3"""
4Check for obsolete binary packages
6@contact: Debian FTP Master <ftpmaster@debian.org>
7@copyright: 2000-2006 James Troup <james@nocrew.org>
8@copyright: 2009 Torsten Werner <twerner@debian.org>
9@copyright: 2015 Niels Thykier <niels@thykier.net>
10@license: GNU General Public License version 2 or later
11"""
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27################################################################################
29# | priviledged positions? What privilege? The honour of working harder
30# | than most people for absolutely no recognition?
31#
32# Manoj Srivastava <srivasta@debian.org> in <87lln8aqfm.fsf@glaurung.internal.golden-gryphon.com>
34################################################################################
36import sys
37from collections import defaultdict
38from collections.abc import Hashable, Iterable
39from itertools import chain, product
40from typing import TYPE_CHECKING, NoReturn, TypedDict
42import apt_pkg
43import sqlalchemy.sql as sql
44from sqlalchemy.engine import Result
46from daklib import utils
47from daklib.config import Config
48from daklib.cruft import newer_version, query_without_source, queryNBS
49from daklib.dbconn import DBConn, get_architecture, get_suite, get_suite_architectures
50from daklib.rm import ReverseDependencyChecker, remove
52if TYPE_CHECKING:
53 from sqlalchemy.orm import Session
55Options: apt_pkg.Configuration
57################################################################################
60def usage(exit_code=0) -> NoReturn:
61 print(
62 """Usage: dak auto-decruft
63Automatic removal of common kinds of cruft
65 -h, --help show this help and exit.
66 -n, --dry-run don't do anything, just show what would have been done
67 -s, --suite=SUITE check suite SUITE.
68 --if-newer-version-in OS remove all packages in SUITE with a lower version than
69 in OS (e.g. -s experimental --if-newer-version-in
70 unstable)
71 --if-newer-version-in-rm-msg RMMSG
72 use RMMSG in the removal message (e.g. "NVIU")
73 --decruft-equal-versions use with --if-newer-version-in to also decruft versions
74 that are identical in both suites.
75 """
76 )
77 sys.exit(exit_code)
80################################################################################
83class Group(TypedDict):
84 name: str
85 packages: tuple[str, ...]
86 architectures: list[str]
87 architecture_ids: tuple[int, ...]
88 message: str
89 removal_request: dict[str, list[str]]
92def compute_sourceless_groups(suite_id: int, session: "Session") -> Iterable[Group]:
93 """Find binaries without a source
95 :param suite_id: The id of the suite denoted by suite_name
96 :param session: The database session in use
97 """
98 rows = query_without_source(suite_id, session)
99 message = "[auto-cruft] no longer built from source, no reverse dependencies"
100 arch = get_architecture("all", session=session)
101 assert arch is not None
102 arch_all_id_tuple = tuple([arch.arch_id])
103 arch_all_list = ["all"]
104 for row in rows: 104 ↛ 105line 104 didn't jump to line 105 because the loop on line 104 never started
105 package = row[0]
106 group_info: Group = {
107 "name": "sourceless:%s" % package,
108 "packages": tuple([package]),
109 "architectures": arch_all_list,
110 "architecture_ids": arch_all_id_tuple,
111 "message": message,
112 "removal_request": {
113 package: arch_all_list,
114 },
115 }
116 yield group_info
119def compute_nbs_groups(
120 suite_id: int, suite_name: str, session: "Session"
121) -> Iterable[Group]:
122 """Find binaries no longer built
124 :param suite_id: The id of the suite denoted by suite_name
125 :param suite_name: The name of the suite to remove from
126 :param session: The database session in use
127 """
128 rows = queryNBS(suite_id, session)
129 arch2ids = dict(
130 (a.arch_string, a.arch_id) for a in get_suite_architectures(suite_name)
131 )
133 for row in rows: 133 ↛ 134line 133 didn't jump to line 134 because the loop on line 133 never started
134 (pkg_list, arch_list, source, _) = row
135 message = (
136 "[auto-cruft] NBS (no longer built by %s, no reverse dependencies)" % source
137 )
138 removal_request = dict((pkg, arch_list) for pkg in pkg_list)
139 group_info: Group = {
140 "name": "NBS:%s" % source,
141 "packages": tuple(sorted(pkg_list)),
142 "architectures": sorted(arch_list),
143 "architecture_ids": tuple(arch2ids[arch] for arch in arch_list),
144 "message": message,
145 "removal_request": removal_request,
146 }
147 yield group_info
150def remove_groups(
151 groups: Iterable[Group], suite_id: int, suite_name: str, session: "Session"
152) -> None:
153 for group in groups:
154 message = group["message"]
155 params = {
156 "architecture_ids": group["architecture_ids"],
157 "packages": group["packages"],
158 "suite_id": suite_id,
159 }
160 q: Result[tuple[str, str, str, int]] = session.execute(
161 sql.text(
162 """
163 SELECT b.package, b.version, a.arch_string, b.id
164 FROM binaries b
165 JOIN bin_associations ba ON b.id = ba.bin
166 JOIN architecture a ON b.architecture = a.id
167 JOIN suite su ON ba.suite = su.id
168 WHERE a.id IN :architecture_ids AND b.package IN :packages AND su.id = :suite_id
169 """
170 ),
171 params,
172 )
174 remove(
175 session,
176 message,
177 [suite_name],
178 [*q],
179 partial=True,
180 whoami="DAK's auto-decrufter",
181 )
184def dedup[T: Hashable](*args: Iterable[T]) -> Iterable[T]:
185 seen = set()
186 for iterable in args:
187 for value in iterable:
188 if value not in seen:
189 seen.add(value)
190 yield value
193def merge_group(groupA: Group, groupB: Group) -> Group:
194 """Merges two removal groups into one
196 Note that some values are taken entirely from groupA (e.g. name and message)
198 :param groupA: A removal group
199 :param groupB: Another removal group
200 :return: A merged group
201 """
202 pkg_list = sorted(dedup(groupA["packages"], groupB["packages"]))
203 arch_list = sorted(dedup(groupA["architectures"], groupB["architectures"]))
204 arch_list_id = tuple(
205 [*dedup(groupA["architecture_ids"], groupB["architecture_ids"])]
206 )
207 removalA = groupA["removal_request"]
208 removalB = groupB["removal_request"]
209 new_removal = {}
210 for pkg in dedup(removalA, removalB):
211 listA = removalA.get(pkg, [])
212 listB = removalB.get(pkg, [])
213 new_removal[pkg] = sorted(dedup(listA, listB))
215 return {
216 "name": groupA["name"],
217 "packages": tuple(pkg_list),
218 "architectures": arch_list,
219 "architecture_ids": arch_list_id,
220 "message": groupA["message"],
221 "removal_request": new_removal,
222 }
225def auto_decruft_suite(
226 suite_name: str, suite_id: int, session: "Session", dryrun: bool, debug: bool
227) -> None:
228 """Run the auto-decrufter on a given suite
230 :param suite_name: The name of the suite to remove from
231 :param suite_id: The id of the suite denoted by suite_name
232 :param session: The database session in use
233 :param dryrun: If True, just print the actions rather than actually doing them
234 :param debug: If True, print some extra information
235 """
236 all_architectures = [a.arch_string for a in get_suite_architectures(suite_name)]
237 pkg_arch2groups = defaultdict(set)
238 group_order = []
239 groups = {}
240 full_removal_request: list[tuple[str, list[str]]] = []
241 group_generator = chain(
242 compute_sourceless_groups(suite_id, session),
243 compute_nbs_groups(suite_id, suite_name, session),
244 )
245 for group in group_generator: 245 ↛ 246line 245 didn't jump to line 246 because the loop on line 245 never started
246 group_name = group["name"]
247 pkgs = group["packages"]
248 affected_archs = group["architectures"]
249 # If we remove an arch:all package, then the breakage can occur on any
250 # of the architectures.
251 if "all" in affected_archs:
252 affected_archs = all_architectures
253 for pkg_arch in product(pkgs, affected_archs):
254 pkg_arch2groups[pkg_arch].add(group_name)
255 if group_name not in groups:
256 groups[group_name] = group
257 group_order.append(group_name)
258 else:
259 # This case usually happens when versions differ between architectures...
260 if debug:
261 print("N: Merging group %s" % (group_name))
262 groups[group_name] = merge_group(groups[group_name], group)
264 for group_name in group_order: 264 ↛ 265line 264 didn't jump to line 265 because the loop on line 264 never started
265 removal_request = groups[group_name]["removal_request"]
266 full_removal_request.extend(removal_request.items())
268 if not groups: 268 ↛ 273line 268 didn't jump to line 273 because the condition on line 268 was always true
269 if debug: 269 ↛ 270line 269 didn't jump to line 270 because the condition on line 269 was never true
270 print("N: Found no candidates")
271 return
273 if debug:
274 print("N: Considering to remove the following packages:")
275 for group_name in sorted(groups):
276 group_info = groups[group_name]
277 pkgs = group_info["packages"]
278 archs = group_info["architectures"]
279 print("N: * %s: %s [%s]" % (group_name, ", ".join(pkgs), " ".join(archs)))
281 if debug:
282 print("N: Compiling ReverseDependencyChecker (RDC) - please hold ...")
283 rdc = ReverseDependencyChecker(session, suite_name)
284 if debug:
285 print("N: Computing initial breakage...")
287 breakage = rdc.check_reverse_depends(full_removal_request)
288 while breakage:
289 by_breakers = [(len(breakage[x]), x, breakage[x]) for x in breakage]
290 by_breakers.sort(reverse=True)
291 if debug:
292 print(
293 "N: - Removal would break %s (package, architecture)-pairs"
294 % (len(breakage))
295 )
296 print("N: - full breakage:")
297 for _, breaker, broken in by_breakers:
298 bname = "%s/%s" % breaker
299 broken_str = ", ".join("%s/%s" % b for b in sorted(broken))
300 print("N: * %s => %s" % (bname, broken_str))
302 averted_breakage: set[tuple[str, str]] = set()
304 for _, package_arch, breakage2 in by_breakers:
305 if breakage2 <= averted_breakage:
306 # We already avoided this break
307 continue
308 guilty_groups = pkg_arch2groups[package_arch]
310 if not guilty_groups:
311 utils.fubar("Cannot figure what group provided %s" % str(package_arch))
313 if debug:
314 # Only output it, if it truly a new group being discarded
315 # - a group can reach this part multiple times, if it breaks things on
316 # more than one architecture. This being rather common in fact.
317 already_discard = True
318 if any(
319 group_name for group_name in guilty_groups if group_name in groups
320 ):
321 already_discard = False
323 if not already_discard:
324 avoided = sorted(breakage2 - averted_breakage)
325 print(
326 "N: - skipping removal of %s (breakage: %s)"
327 % (", ".join(sorted(guilty_groups)), str(avoided))
328 )
330 averted_breakage |= breakage2
331 for group_name in guilty_groups:
332 if group_name in groups:
333 del groups[group_name]
335 if not groups:
336 if debug:
337 print("N: Nothing left to remove")
338 return
340 if debug:
341 print(
342 "N: Now considering to remove: %s"
343 % str(", ".join(sorted(groups.keys())))
344 )
346 # Rebuild the removal request with the remaining groups and off
347 # we go to (not) break the world once more time
348 full_removal_request = []
349 for group_info in groups.values():
350 full_removal_request.extend(group_info["removal_request"].items())
351 breakage = rdc.check_reverse_depends(full_removal_request)
353 if debug:
354 print("N: Removal looks good")
356 if dryrun:
357 print("Would remove the equivalent of:")
358 for group_name in group_order:
359 if group_name not in groups:
360 continue
361 group_info = groups[group_name]
362 pkgs = group_info["packages"]
363 archs = group_info["architectures"]
364 message = group_info["message"]
366 # Embed the -R just in case someone wants to run it manually later
367 print(
368 ' dak rm -m "{message}" -s {suite} -a {architectures} -p -R -b {packages}'.format(
369 message=message,
370 suite=suite_name,
371 architectures=",".join(archs),
372 packages=" ".join(pkgs),
373 )
374 )
376 print()
377 print(
378 "Note: The removals may be interdependent. A non-breaking result may require the execution of all"
379 )
380 print("of the removals")
381 else:
382 remove_groups(groups.values(), suite_id, suite_name, session)
385def sources2removals(
386 source_list: Iterable[str], suite_id: int, session: "Session"
387) -> list[tuple[str, str, str, int]]:
388 """Compute removals items given a list of names of source packages
390 :param source_list: A list of names of source packages
391 :param suite_id: The id of the suite from which these sources should be removed
392 :param session: The database session in use
393 :return: A list of items to be removed to remove all sources and their binaries from the given suite
394 """
395 params = {"suite_id": suite_id, "sources": tuple(source_list)}
396 return [
397 *session.execute(
398 sql.text(
399 """
400 SELECT s.source, s.version, 'source', s.id
401 FROM source s
402 JOIN src_associations sa ON sa.source = s.id
403 WHERE sa.suite = :suite_id AND s.source IN :sources"""
404 ),
405 params,
406 ),
407 *session.execute(
408 sql.text(
409 """
410 SELECT b.package, b.version, a.arch_string, b.id
411 FROM binaries b
412 JOIN bin_associations ba ON b.id = ba.bin
413 JOIN architecture a ON b.architecture = a.id
414 JOIN source s ON b.source = s.id
415 WHERE ba.suite = :suite_id AND s.source IN :sources"""
416 ),
417 params,
418 ),
419 ]
422def decruft_newer_version_in(
423 othersuite: str,
424 suite_name: str,
425 suite_id: int,
426 rm_msg: str,
427 session: "Session",
428 dryrun: bool,
429 decruft_equal_versions: bool,
430) -> None:
431 """Compute removals items given a list of names of source packages
433 :param othersuite: The name of the suite to compare with (e.g. "unstable" for "NVIU")
434 :param suite: The name of the suite from which to do removals (e.g. "experimental" for "NVIU")
435 :param suite_id: The id of the suite from which these sources should be removed
436 :param rm_msg: The removal message (or tag, e.g. "NVIU")
437 :param session: The database session in use
438 :param dryrun: If True, just print the actions rather than actually doing them
439 :param decruft_equal_versions: If True, use >= instead of > for finding decruftable packages.
440 """
441 nvi_list = [
442 x[0]
443 for x in newer_version(
444 othersuite, suite_name, session, include_equal=decruft_equal_versions
445 )
446 ]
447 if nvi_list:
448 message = "[auto-cruft] %s" % rm_msg
449 if dryrun: 449 ↛ 450line 449 didn't jump to line 450 because the condition on line 449 was never true
450 print(
451 ' dak rm -m "%s" -s %s %s'
452 % (message, suite_name, " ".join(nvi_list))
453 )
454 else:
455 removals = sources2removals(nvi_list, suite_id, session)
456 remove(
457 session, message, [suite_name], removals, whoami="DAK's auto-decrufter"
458 )
461################################################################################
464def main() -> None:
465 global Options
466 cnf = Config()
468 Arguments = [
469 ("h", "help", "Auto-Decruft::Options::Help"),
470 ("n", "dry-run", "Auto-Decruft::Options::Dry-Run"),
471 ("d", "debug", "Auto-Decruft::Options::Debug"),
472 ("s", "suite", "Auto-Decruft::Options::Suite", "HasArg"),
473 # The "\0" seems to be the only way to disable short options.
474 ("\0", "if-newer-version-in", "Auto-Decruft::Options::OtherSuite", "HasArg"),
475 (
476 "\0",
477 "if-newer-version-in-rm-msg",
478 "Auto-Decruft::Options::OtherSuiteRMMsg",
479 "HasArg",
480 ),
481 (
482 "\0",
483 "decruft-equal-versions",
484 "Auto-Decruft::Options::OtherSuiteDecruftEqual",
485 ),
486 ]
487 for i in [
488 "help",
489 "Dry-Run",
490 "Debug",
491 "OtherSuite",
492 "OtherSuiteRMMsg",
493 "OtherSuiteDecruftEqual",
494 ]:
495 key = "Auto-Decruft::Options::%s" % i
496 if key not in cnf: 496 ↛ 487line 496 didn't jump to line 487
497 cnf[key] = ""
499 cnf["Auto-Decruft::Options::Suite"] = cnf.get("Dinstall::DefaultSuite", "unstable")
501 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) # type: ignore[attr-defined]
503 Options = cnf.subtree("Auto-Decruft::Options")
504 if Options["Help"]:
505 usage()
507 debug = False
508 dryrun = False
509 decruft_equal_versions = False
510 if Options["Dry-Run"]: 510 ↛ 511line 510 didn't jump to line 511 because the condition on line 510 was never true
511 dryrun = True
512 if Options["Debug"]: 512 ↛ 513line 512 didn't jump to line 513 because the condition on line 512 was never true
513 debug = True
514 if Options["OtherSuiteDecruftEqual"]:
515 decruft_equal_versions = True
517 if Options["OtherSuite"] and not Options["OtherSuiteRMMsg"]: 517 ↛ 518line 517 didn't jump to line 518 because the condition on line 517 was never true
518 utils.fubar("--if-newer-version-in requires --if-newer-version-in-rm-msg")
520 session = DBConn().session()
522 suite = get_suite(Options["Suite"].lower(), session)
523 if not suite: 523 ↛ 524line 523 didn't jump to line 524 because the condition on line 523 was never true
524 utils.fubar("Cannot find suite %s" % Options["Suite"].lower())
526 suite_id = suite.suite_id
527 suite_name = suite.suite_name.lower()
529 auto_decruft_suite(suite_name, suite_id, session, dryrun, debug)
531 if Options["OtherSuite"]:
532 osuite = get_suite(Options["OtherSuite"].lower(), session)
533 assert osuite is not None
534 osuite_name = osuite.suite_name
535 decruft_newer_version_in(
536 osuite_name,
537 suite_name,
538 suite_id,
539 Options["OtherSuiteRMMsg"],
540 session,
541 dryrun,
542 decruft_equal_versions,
543 )
545 if not dryrun: 545 ↛ exitline 545 didn't return from function 'main' because the condition on line 545 was always true
546 session.commit()
549################################################################################
552if __name__ == "__main__":
553 main()