Coverage for dak/auto_decruft.py: 37%

212 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2026-01-04 16:18 +0000

1#! /usr/bin/env python3 

2 

3""" 

4Check for obsolete binary packages 

5 

6@contact: Debian FTP Master <ftpmaster@debian.org> 

7@copyright: 2000-2006 James Troup <james@nocrew.org> 

8@copyright: 2009 Torsten Werner <twerner@debian.org> 

9@copyright: 2015 Niels Thykier <niels@thykier.net> 

10@license: GNU General Public License version 2 or later 

11""" 

12 

13# This program is free software; you can redistribute it and/or modify 

14# it under the terms of the GNU General Public License as published by 

15# the Free Software Foundation; either version 2 of the License, or 

16# (at your option) any later version. 

17 

18# This program is distributed in the hope that it will be useful, 

19# but WITHOUT ANY WARRANTY; without even the implied warranty of 

20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

21# GNU General Public License for more details. 

22 

23# You should have received a copy of the GNU General Public License 

24# along with this program; if not, write to the Free Software 

25# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 

26 

27################################################################################ 

28 

29# | priviledged positions? What privilege? The honour of working harder 

30# | than most people for absolutely no recognition? 

31# 

32# Manoj Srivastava <srivasta@debian.org> in <87lln8aqfm.fsf@glaurung.internal.golden-gryphon.com> 

33 

34################################################################################ 

35 

36import sys 

37from collections import defaultdict 

38from collections.abc import Hashable, Iterable 

39from itertools import chain, product 

40from typing import TYPE_CHECKING, NoReturn, TypedDict 

41 

42import apt_pkg 

43import sqlalchemy.sql as sql 

44from sqlalchemy.engine import Result 

45 

46from daklib import utils 

47from daklib.config import Config 

48from daklib.cruft import newer_version, query_without_source, queryNBS 

49from daklib.dbconn import DBConn, get_architecture, get_suite, get_suite_architectures 

50from daklib.rm import ReverseDependencyChecker, remove 

51 

52if TYPE_CHECKING: 

53 from sqlalchemy.orm import Session 

54 

55Options: apt_pkg.Configuration 

56 

57################################################################################ 

58 

59 

60def usage(exit_code=0) -> NoReturn: 

61 print( 

62 """Usage: dak auto-decruft 

63Automatic removal of common kinds of cruft 

64 

65 -h, --help show this help and exit. 

66 -n, --dry-run don't do anything, just show what would have been done 

67 -s, --suite=SUITE check suite SUITE. 

68 --if-newer-version-in OS remove all packages in SUITE with a lower version than 

69 in OS (e.g. -s experimental --if-newer-version-in 

70 unstable) 

71 --if-newer-version-in-rm-msg RMMSG 

72 use RMMSG in the removal message (e.g. "NVIU") 

73 --decruft-equal-versions use with --if-newer-version-in to also decruft versions 

74 that are identical in both suites. 

75 """ 

76 ) 

77 sys.exit(exit_code) 

78 

79 

80################################################################################ 

81 

82 

83class Group(TypedDict): 

84 name: str 

85 packages: tuple[str, ...] 

86 architectures: list[str] 

87 architecture_ids: tuple[int, ...] 

88 message: str 

89 removal_request: dict[str, list[str]] 

90 

91 

92def compute_sourceless_groups(suite_id: int, session: "Session") -> Iterable[Group]: 

93 """Find binaries without a source 

94 

95 :param suite_id: The id of the suite denoted by suite_name 

96 :param session: The database session in use 

97 """ 

98 rows = query_without_source(suite_id, session) 

99 message = "[auto-cruft] no longer built from source, no reverse dependencies" 

100 arch = get_architecture("all", session=session) 

101 assert arch is not None 

102 arch_all_id_tuple = tuple([arch.arch_id]) 

103 arch_all_list = ["all"] 

104 for row in rows: 104 ↛ 105line 104 didn't jump to line 105 because the loop on line 104 never started

105 package = row[0] 

106 group_info: Group = { 

107 "name": "sourceless:%s" % package, 

108 "packages": tuple([package]), 

109 "architectures": arch_all_list, 

110 "architecture_ids": arch_all_id_tuple, 

111 "message": message, 

112 "removal_request": { 

113 package: arch_all_list, 

114 }, 

115 } 

116 yield group_info 

117 

118 

119def compute_nbs_groups( 

120 suite_id: int, suite_name: str, session: "Session" 

121) -> Iterable[Group]: 

122 """Find binaries no longer built 

123 

124 :param suite_id: The id of the suite denoted by suite_name 

125 :param suite_name: The name of the suite to remove from 

126 :param session: The database session in use 

127 """ 

128 rows = queryNBS(suite_id, session) 

129 arch2ids = dict( 

130 (a.arch_string, a.arch_id) for a in get_suite_architectures(suite_name) 

131 ) 

132 

133 for row in rows: 133 ↛ 134line 133 didn't jump to line 134 because the loop on line 133 never started

134 (pkg_list, arch_list, source, _) = row 

135 message = ( 

136 "[auto-cruft] NBS (no longer built by %s, no reverse dependencies)" % source 

137 ) 

138 removal_request = dict((pkg, arch_list) for pkg in pkg_list) 

139 group_info: Group = { 

140 "name": "NBS:%s" % source, 

141 "packages": tuple(sorted(pkg_list)), 

142 "architectures": sorted(arch_list), 

143 "architecture_ids": tuple(arch2ids[arch] for arch in arch_list), 

144 "message": message, 

145 "removal_request": removal_request, 

146 } 

147 yield group_info 

148 

149 

150def remove_groups( 

151 groups: Iterable[Group], suite_id: int, suite_name: str, session: "Session" 

152) -> None: 

153 for group in groups: 

154 message = group["message"] 

155 params = { 

156 "architecture_ids": group["architecture_ids"], 

157 "packages": group["packages"], 

158 "suite_id": suite_id, 

159 } 

160 q: Result[tuple[str, str, str, int]] = session.execute( 

161 sql.text( 

162 """ 

163 SELECT b.package, b.version, a.arch_string, b.id 

164 FROM binaries b 

165 JOIN bin_associations ba ON b.id = ba.bin 

166 JOIN architecture a ON b.architecture = a.id 

167 JOIN suite su ON ba.suite = su.id 

168 WHERE a.id IN :architecture_ids AND b.package IN :packages AND su.id = :suite_id 

169 """ 

170 ), 

171 params, 

172 ) 

173 

174 remove( 

175 session, 

176 message, 

177 [suite_name], 

178 [*q], 

179 partial=True, 

180 whoami="DAK's auto-decrufter", 

181 ) 

182 

183 

184def dedup[T: Hashable](*args: Iterable[T]) -> Iterable[T]: 

185 seen = set() 

186 for iterable in args: 

187 for value in iterable: 

188 if value not in seen: 

189 seen.add(value) 

190 yield value 

191 

192 

193def merge_group(groupA: Group, groupB: Group) -> Group: 

194 """Merges two removal groups into one 

195 

196 Note that some values are taken entirely from groupA (e.g. name and message) 

197 

198 :param groupA: A removal group 

199 :param groupB: Another removal group 

200 :return: A merged group 

201 """ 

202 pkg_list = sorted(dedup(groupA["packages"], groupB["packages"])) 

203 arch_list = sorted(dedup(groupA["architectures"], groupB["architectures"])) 

204 arch_list_id = tuple( 

205 [*dedup(groupA["architecture_ids"], groupB["architecture_ids"])] 

206 ) 

207 removalA = groupA["removal_request"] 

208 removalB = groupB["removal_request"] 

209 new_removal = {} 

210 for pkg in dedup(removalA, removalB): 

211 listA = removalA.get(pkg, []) 

212 listB = removalB.get(pkg, []) 

213 new_removal[pkg] = sorted(dedup(listA, listB)) 

214 

215 return { 

216 "name": groupA["name"], 

217 "packages": tuple(pkg_list), 

218 "architectures": arch_list, 

219 "architecture_ids": arch_list_id, 

220 "message": groupA["message"], 

221 "removal_request": new_removal, 

222 } 

223 

224 

225def auto_decruft_suite( 

226 suite_name: str, suite_id: int, session: "Session", dryrun: bool, debug: bool 

227) -> None: 

228 """Run the auto-decrufter on a given suite 

229 

230 :param suite_name: The name of the suite to remove from 

231 :param suite_id: The id of the suite denoted by suite_name 

232 :param session: The database session in use 

233 :param dryrun: If True, just print the actions rather than actually doing them 

234 :param debug: If True, print some extra information 

235 """ 

236 all_architectures = [a.arch_string for a in get_suite_architectures(suite_name)] 

237 pkg_arch2groups = defaultdict(set) 

238 group_order = [] 

239 groups = {} 

240 full_removal_request: list[tuple[str, list[str]]] = [] 

241 group_generator = chain( 

242 compute_sourceless_groups(suite_id, session), 

243 compute_nbs_groups(suite_id, suite_name, session), 

244 ) 

245 for group in group_generator: 245 ↛ 246line 245 didn't jump to line 246 because the loop on line 245 never started

246 group_name = group["name"] 

247 pkgs = group["packages"] 

248 affected_archs = group["architectures"] 

249 # If we remove an arch:all package, then the breakage can occur on any 

250 # of the architectures. 

251 if "all" in affected_archs: 

252 affected_archs = all_architectures 

253 for pkg_arch in product(pkgs, affected_archs): 

254 pkg_arch2groups[pkg_arch].add(group_name) 

255 if group_name not in groups: 

256 groups[group_name] = group 

257 group_order.append(group_name) 

258 else: 

259 # This case usually happens when versions differ between architectures... 

260 if debug: 

261 print("N: Merging group %s" % (group_name)) 

262 groups[group_name] = merge_group(groups[group_name], group) 

263 

264 for group_name in group_order: 264 ↛ 265line 264 didn't jump to line 265 because the loop on line 264 never started

265 removal_request = groups[group_name]["removal_request"] 

266 full_removal_request.extend(removal_request.items()) 

267 

268 if not groups: 268 ↛ 273line 268 didn't jump to line 273 because the condition on line 268 was always true

269 if debug: 269 ↛ 270line 269 didn't jump to line 270 because the condition on line 269 was never true

270 print("N: Found no candidates") 

271 return 

272 

273 if debug: 

274 print("N: Considering to remove the following packages:") 

275 for group_name in sorted(groups): 

276 group_info = groups[group_name] 

277 pkgs = group_info["packages"] 

278 archs = group_info["architectures"] 

279 print("N: * %s: %s [%s]" % (group_name, ", ".join(pkgs), " ".join(archs))) 

280 

281 if debug: 

282 print("N: Compiling ReverseDependencyChecker (RDC) - please hold ...") 

283 rdc = ReverseDependencyChecker(session, suite_name) 

284 if debug: 

285 print("N: Computing initial breakage...") 

286 

287 breakage = rdc.check_reverse_depends(full_removal_request) 

288 while breakage: 

289 by_breakers = [(len(breakage[x]), x, breakage[x]) for x in breakage] 

290 by_breakers.sort(reverse=True) 

291 if debug: 

292 print( 

293 "N: - Removal would break %s (package, architecture)-pairs" 

294 % (len(breakage)) 

295 ) 

296 print("N: - full breakage:") 

297 for _, breaker, broken in by_breakers: 

298 bname = "%s/%s" % breaker 

299 broken_str = ", ".join("%s/%s" % b for b in sorted(broken)) 

300 print("N: * %s => %s" % (bname, broken_str)) 

301 

302 averted_breakage: set[tuple[str, str]] = set() 

303 

304 for _, package_arch, breakage2 in by_breakers: 

305 if breakage2 <= averted_breakage: 

306 # We already avoided this break 

307 continue 

308 guilty_groups = pkg_arch2groups[package_arch] 

309 

310 if not guilty_groups: 

311 utils.fubar("Cannot figure what group provided %s" % str(package_arch)) 

312 

313 if debug: 

314 # Only output it, if it truly a new group being discarded 

315 # - a group can reach this part multiple times, if it breaks things on 

316 # more than one architecture. This being rather common in fact. 

317 already_discard = True 

318 if any( 

319 group_name for group_name in guilty_groups if group_name in groups 

320 ): 

321 already_discard = False 

322 

323 if not already_discard: 

324 avoided = sorted(breakage2 - averted_breakage) 

325 print( 

326 "N: - skipping removal of %s (breakage: %s)" 

327 % (", ".join(sorted(guilty_groups)), str(avoided)) 

328 ) 

329 

330 averted_breakage |= breakage2 

331 for group_name in guilty_groups: 

332 if group_name in groups: 

333 del groups[group_name] 

334 

335 if not groups: 

336 if debug: 

337 print("N: Nothing left to remove") 

338 return 

339 

340 if debug: 

341 print( 

342 "N: Now considering to remove: %s" 

343 % str(", ".join(sorted(groups.keys()))) 

344 ) 

345 

346 # Rebuild the removal request with the remaining groups and off 

347 # we go to (not) break the world once more time 

348 full_removal_request = [] 

349 for group_info in groups.values(): 

350 full_removal_request.extend(group_info["removal_request"].items()) 

351 breakage = rdc.check_reverse_depends(full_removal_request) 

352 

353 if debug: 

354 print("N: Removal looks good") 

355 

356 if dryrun: 

357 print("Would remove the equivalent of:") 

358 for group_name in group_order: 

359 if group_name not in groups: 

360 continue 

361 group_info = groups[group_name] 

362 pkgs = group_info["packages"] 

363 archs = group_info["architectures"] 

364 message = group_info["message"] 

365 

366 # Embed the -R just in case someone wants to run it manually later 

367 print( 

368 ' dak rm -m "{message}" -s {suite} -a {architectures} -p -R -b {packages}'.format( 

369 message=message, 

370 suite=suite_name, 

371 architectures=",".join(archs), 

372 packages=" ".join(pkgs), 

373 ) 

374 ) 

375 

376 print() 

377 print( 

378 "Note: The removals may be interdependent. A non-breaking result may require the execution of all" 

379 ) 

380 print("of the removals") 

381 else: 

382 remove_groups(groups.values(), suite_id, suite_name, session) 

383 

384 

385def sources2removals( 

386 source_list: Iterable[str], suite_id: int, session: "Session" 

387) -> list[tuple[str, str, str, int]]: 

388 """Compute removals items given a list of names of source packages 

389 

390 :param source_list: A list of names of source packages 

391 :param suite_id: The id of the suite from which these sources should be removed 

392 :param session: The database session in use 

393 :return: A list of items to be removed to remove all sources and their binaries from the given suite 

394 """ 

395 params = {"suite_id": suite_id, "sources": tuple(source_list)} 

396 return [ 

397 *session.execute( 

398 sql.text( 

399 """ 

400 SELECT s.source, s.version, 'source', s.id 

401 FROM source s 

402 JOIN src_associations sa ON sa.source = s.id 

403 WHERE sa.suite = :suite_id AND s.source IN :sources""" 

404 ), 

405 params, 

406 ), 

407 *session.execute( 

408 sql.text( 

409 """ 

410 SELECT b.package, b.version, a.arch_string, b.id 

411 FROM binaries b 

412 JOIN bin_associations ba ON b.id = ba.bin 

413 JOIN architecture a ON b.architecture = a.id 

414 JOIN source s ON b.source = s.id 

415 WHERE ba.suite = :suite_id AND s.source IN :sources""" 

416 ), 

417 params, 

418 ), 

419 ] 

420 

421 

422def decruft_newer_version_in( 

423 othersuite: str, 

424 suite_name: str, 

425 suite_id: int, 

426 rm_msg: str, 

427 session: "Session", 

428 dryrun: bool, 

429 decruft_equal_versions: bool, 

430) -> None: 

431 """Compute removals items given a list of names of source packages 

432 

433 :param othersuite: The name of the suite to compare with (e.g. "unstable" for "NVIU") 

434 :param suite: The name of the suite from which to do removals (e.g. "experimental" for "NVIU") 

435 :param suite_id: The id of the suite from which these sources should be removed 

436 :param rm_msg: The removal message (or tag, e.g. "NVIU") 

437 :param session: The database session in use 

438 :param dryrun: If True, just print the actions rather than actually doing them 

439 :param decruft_equal_versions: If True, use >= instead of > for finding decruftable packages. 

440 """ 

441 nvi_list = [ 

442 x[0] 

443 for x in newer_version( 

444 othersuite, suite_name, session, include_equal=decruft_equal_versions 

445 ) 

446 ] 

447 if nvi_list: 

448 message = "[auto-cruft] %s" % rm_msg 

449 if dryrun: 449 ↛ 450line 449 didn't jump to line 450 because the condition on line 449 was never true

450 print( 

451 ' dak rm -m "%s" -s %s %s' 

452 % (message, suite_name, " ".join(nvi_list)) 

453 ) 

454 else: 

455 removals = sources2removals(nvi_list, suite_id, session) 

456 remove( 

457 session, message, [suite_name], removals, whoami="DAK's auto-decrufter" 

458 ) 

459 

460 

461################################################################################ 

462 

463 

464def main() -> None: 

465 global Options 

466 cnf = Config() 

467 

468 Arguments = [ 

469 ("h", "help", "Auto-Decruft::Options::Help"), 

470 ("n", "dry-run", "Auto-Decruft::Options::Dry-Run"), 

471 ("d", "debug", "Auto-Decruft::Options::Debug"), 

472 ("s", "suite", "Auto-Decruft::Options::Suite", "HasArg"), 

473 # The "\0" seems to be the only way to disable short options. 

474 ("\0", "if-newer-version-in", "Auto-Decruft::Options::OtherSuite", "HasArg"), 

475 ( 

476 "\0", 

477 "if-newer-version-in-rm-msg", 

478 "Auto-Decruft::Options::OtherSuiteRMMsg", 

479 "HasArg", 

480 ), 

481 ( 

482 "\0", 

483 "decruft-equal-versions", 

484 "Auto-Decruft::Options::OtherSuiteDecruftEqual", 

485 ), 

486 ] 

487 for i in [ 

488 "help", 

489 "Dry-Run", 

490 "Debug", 

491 "OtherSuite", 

492 "OtherSuiteRMMsg", 

493 "OtherSuiteDecruftEqual", 

494 ]: 

495 key = "Auto-Decruft::Options::%s" % i 

496 if key not in cnf: 496 ↛ 487line 496 didn't jump to line 487

497 cnf[key] = "" 

498 

499 cnf["Auto-Decruft::Options::Suite"] = cnf.get("Dinstall::DefaultSuite", "unstable") 

500 

501 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) # type: ignore[attr-defined] 

502 

503 Options = cnf.subtree("Auto-Decruft::Options") 

504 if Options["Help"]: 

505 usage() 

506 

507 debug = False 

508 dryrun = False 

509 decruft_equal_versions = False 

510 if Options["Dry-Run"]: 510 ↛ 511line 510 didn't jump to line 511 because the condition on line 510 was never true

511 dryrun = True 

512 if Options["Debug"]: 512 ↛ 513line 512 didn't jump to line 513 because the condition on line 512 was never true

513 debug = True 

514 if Options["OtherSuiteDecruftEqual"]: 

515 decruft_equal_versions = True 

516 

517 if Options["OtherSuite"] and not Options["OtherSuiteRMMsg"]: 517 ↛ 518line 517 didn't jump to line 518 because the condition on line 517 was never true

518 utils.fubar("--if-newer-version-in requires --if-newer-version-in-rm-msg") 

519 

520 session = DBConn().session() 

521 

522 suite = get_suite(Options["Suite"].lower(), session) 

523 if not suite: 523 ↛ 524line 523 didn't jump to line 524 because the condition on line 523 was never true

524 utils.fubar("Cannot find suite %s" % Options["Suite"].lower()) 

525 

526 suite_id = suite.suite_id 

527 suite_name = suite.suite_name.lower() 

528 

529 auto_decruft_suite(suite_name, suite_id, session, dryrun, debug) 

530 

531 if Options["OtherSuite"]: 

532 osuite = get_suite(Options["OtherSuite"].lower(), session) 

533 assert osuite is not None 

534 osuite_name = osuite.suite_name 

535 decruft_newer_version_in( 

536 osuite_name, 

537 suite_name, 

538 suite_id, 

539 Options["OtherSuiteRMMsg"], 

540 session, 

541 dryrun, 

542 decruft_equal_versions, 

543 ) 

544 

545 if not dryrun: 545 ↛ exitline 545 didn't return from function 'main' because the condition on line 545 was always true

546 session.commit() 

547 

548 

549################################################################################ 

550 

551 

552if __name__ == "__main__": 

553 main()