Coverage for dak/clean_suites.py: 72%

212 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2026-01-04 16:18 +0000

1#! /usr/bin/env python3 

2 

3"""Cleans up unassociated binary and source packages 

4 

5@contact: Debian FTPMaster <ftpmaster@debian.org> 

6@copyright: 2000, 2001, 2002, 2003, 2006 James Troup <james@nocrew.org> 

7@copyright: 2009 Mark Hymers <mhy@debian.org> 

8@copyright: 2010 Joerg Jaspert <joerg@debian.org> 

9@license: GNU General Public License version 2 or later 

10""" 

11 

12# This program is free software; you can redistribute it and/or modify 

13# it under the terms of the GNU General Public License as published by 

14# the Free Software Foundation; either version 2 of the License, or 

15# (at your option) any later version. 

16 

17# This program is distributed in the hope that it will be useful, 

18# but WITHOUT ANY WARRANTY; without even the implied warranty of 

19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

20# GNU General Public License for more details. 

21 

22# You should have received a copy of the GNU General Public License 

23# along with this program; if not, write to the Free Software 

24# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 

25 

26################################################################################ 

27 

28# 07:05|<elmo> well.. *shrug*.. no, probably not.. but to fix it, 

29# | we're going to have to implement reference counting 

30# | through dependencies.. do we really want to go down 

31# | that road? 

32# 

33# 07:05|<Culus> elmo: Augh! <brain jumps out of skull> 

34 

35################################################################################ 

36 

37import errno 

38import os 

39import stat 

40import sys 

41from datetime import datetime 

42 

43import apt_pkg 

44import sqlalchemy.sql as sql 

45 

46from daklib import daklog, utils 

47from daklib.config import Config 

48from daklib.dbconn import Archive, ArchiveFile, DBConn 

49 

50################################################################################ 

51 

52Options: apt_pkg.Configuration 

53Logger: daklog.Logger 

54 

55################################################################################ 

56 

57 

58def usage(exit_code=0): 

59 print( 

60 """Usage: dak clean-suites [OPTIONS] 

61Clean old packages from suites. 

62 

63 -n, --no-action don't do anything 

64 -h, --help show this help and exit 

65 -m, --maximum maximum number of files to remove""" 

66 ) 

67 sys.exit(exit_code) 

68 

69 

70################################################################################ 

71 

72 

73def check_binaries(now_date, session): 

74 Logger.log(["Checking for orphaned binary packages..."]) 

75 

76 # Get the list of binary packages not in a suite and mark them for 

77 # deletion. 

78 # Check for any binaries which are marked for eventual deletion 

79 # but are now used again. 

80 

81 query = """ 

82 WITH usage AS ( 

83 SELECT 

84 af.archive_id AS archive_id, 

85 af.file_id AS file_id, 

86 af.component_id AS component_id, 

87 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba 

88 JOIN suite s ON ba.suite = s.id 

89 WHERE ba.bin = b.id 

90 AND s.archive_id = af.archive_id)) 

91 AS in_use 

92 FROM files_archive_map af 

93 JOIN binaries b ON af.file_id = b.file 

94 GROUP BY af.archive_id, af.file_id, af.component_id 

95 ) 

96 

97 UPDATE files_archive_map af 

98 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END 

99 FROM usage, files f, archive 

100 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id 

101 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) 

102 AND af.file_id = f.id 

103 AND af.archive_id = archive.id 

104 RETURNING archive.name, f.filename, af.last_used IS NULL""" 

105 

106 res = session.execute(sql.text(query), {"last_used": now_date}) 

107 for i in res: 

108 op = "set lastused" 

109 if i[2]: 109 ↛ 110line 109 didn't jump to line 110 because the condition on line 109 was never true

110 op = "unset lastused" 

111 Logger.log([op, i[0], i[1]]) 

112 

113 

114######################################## 

115 

116 

117def check_sources(now_date, session): 

118 Logger.log(["Checking for orphaned source packages..."]) 

119 

120 # Get the list of source packages not in a suite and not used by 

121 # any binaries. 

122 

123 # Check for any sources which are marked for deletion but which 

124 # are now used again. 

125 

126 # TODO: the UPDATE part is the same as in check_binaries. Merge? 

127 

128 query = """ 

129 WITH usage AS ( 

130 SELECT 

131 af.archive_id AS archive_id, 

132 af.file_id AS file_id, 

133 af.component_id AS component_id, 

134 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa 

135 JOIN suite s ON sa.suite = s.id 

136 WHERE sa.source = df.source 

137 AND s.archive_id = af.archive_id) 

138 OR EXISTS (SELECT 1 FROM files_archive_map af_bin 

139 JOIN binaries b ON af_bin.file_id = b.file 

140 WHERE b.source = df.source 

141 AND af_bin.archive_id = af.archive_id 

142 AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date)) 

143 OR EXISTS (SELECT 1 FROM extra_src_references esr 

144 JOIN bin_associations ba ON esr.bin_id = ba.bin 

145 JOIN binaries b ON ba.bin = b.id 

146 JOIN suite s ON ba.suite = s.id 

147 WHERE esr.src_id = df.source 

148 AND s.archive_id = af.archive_id)) 

149 AS in_use 

150 FROM files_archive_map af 

151 JOIN dsc_files df ON af.file_id = df.file 

152 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 

153 GROUP BY af.archive_id, af.file_id, af.component_id 

154 ) 

155 

156 UPDATE files_archive_map af 

157 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END 

158 FROM usage, files f, archive 

159 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id 

160 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) 

161 AND af.file_id = f.id 

162 AND af.archive_id = archive.id 

163 

164 RETURNING archive.name, f.filename, af.last_used IS NULL 

165 """ 

166 

167 res = session.execute(sql.text(query), {"last_used": now_date}) 

168 for i in res: 

169 op = "set lastused" 

170 if i[2]: 170 ↛ 171line 170 didn't jump to line 171 because the condition on line 170 was never true

171 op = "unset lastused" 

172 Logger.log([op, i[0], i[1]]) 

173 

174 

175######################################## 

176 

177 

178def check_files(now_date, session): 

179 # FIXME: this is evil; nothing should ever be in this state. if 

180 # they are, it's a bug. 

181 

182 # However, we've discovered it happens sometimes so we print a huge warning 

183 # and then mark the file for deletion. This probably masks a bug somwhere 

184 # else but is better than collecting cruft forever 

185 

186 Logger.log(["Checking for unused files..."]) 

187 q = session.execute( 

188 sql.text( 

189 """ 

190 UPDATE files_archive_map af 

191 SET last_used = :last_used 

192 FROM files f, archive 

193 WHERE af.file_id = f.id 

194 AND af.archive_id = archive.id 

195 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id) 

196 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) 

197 AND af.last_used IS NULL 

198 RETURNING archive.name, f.filename""" 

199 ), 

200 {"last_used": now_date}, 

201 ) 

202 

203 for x in q: 203 ↛ 204line 203 didn't jump to line 204 because the loop on line 203 never started

204 utils.warn("orphaned file: {0}".format(x)) 

205 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"]) 

206 

207 if not Options["No-Action"]: 207 ↛ exitline 207 didn't return from function 'check_files' because the condition on line 207 was always true

208 session.commit() 

209 

210 

211def clean_binaries(now_date, session): 

212 # We do this here so that the binaries we remove will have their 

213 # source also removed (if possible). 

214 

215 # XXX: why doesn't this remove the files here as well? I don't think it 

216 # buys anything keeping this separate 

217 

218 Logger.log(["Deleting from binaries table... "]) 

219 q = session.execute( 

220 sql.text( 

221 """ 

222 DELETE FROM binaries b 

223 USING files f 

224 WHERE f.id = b.file 

225 AND NOT EXISTS (SELECT 1 FROM files_archive_map af 

226 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 

227 WHERE af.file_id = b.file 

228 AND (af.last_used IS NULL OR af.last_used > ad.delete_date)) 

229 RETURNING f.filename 

230 """ 

231 ) 

232 ) 

233 for b in q: 

234 Logger.log(["delete binary", b[0]]) 

235 

236 

237######################################## 

238 

239 

240def clean(now_date, archives, max_delete, session): 

241 cnf = Config() 

242 

243 count = 0 

244 size = 0 

245 

246 Logger.log(["Cleaning out packages..."]) 

247 

248 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", "morgue")) 

249 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", "pool") 

250 

251 # Build directory as morguedir/morguesubdir/year/month/day 

252 dest = os.path.join( 

253 morguedir, 

254 morguesubdir, 

255 str(now_date.year), 

256 "%.2d" % now_date.month, 

257 "%.2d" % now_date.day, 

258 ) 

259 

260 if not Options["No-Action"] and not os.path.exists(dest): 

261 os.makedirs(dest) 

262 

263 # Delete from source 

264 Logger.log(["Deleting from source table..."]) 

265 q = session.execute( 

266 sql.text( 

267 """ 

268 WITH 

269 deleted_sources AS ( 

270 DELETE FROM source 

271 USING files f 

272 WHERE source.file = f.id 

273 AND NOT EXISTS (SELECT 1 FROM files_archive_map af 

274 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 

275 WHERE af.file_id = source.file 

276 AND (af.last_used IS NULL OR af.last_used > ad.delete_date)) 

277 RETURNING source.id AS id, f.filename AS filename 

278 ), 

279 deleted_dsc_files AS ( 

280 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources) 

281 RETURNING df.file AS file_id 

282 ), 

283 now_unused_source_files AS ( 

284 UPDATE files_archive_map af 

285 SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc. 

286 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files) 

287 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) 

288 ) 

289 SELECT filename FROM deleted_sources""" 

290 ) 

291 ) 

292 for s in q: 292 ↛ 293line 292 didn't jump to line 293 because the loop on line 292 never started

293 Logger.log(["delete source", s[0]]) 

294 

295 if not Options["No-Action"]: 295 ↛ 299line 295 didn't jump to line 299

296 session.commit() 

297 

298 # Delete files from the pool 

299 old_files = ( 

300 session.query(ArchiveFile) 

301 .filter( 

302 sql.text( 

303 "files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)" 

304 ) 

305 ) 

306 .join(Archive) 

307 ) 

308 if max_delete is not None: 308 ↛ 309line 308 didn't jump to line 309 because the condition on line 308 was never true

309 old_files = old_files.limit(max_delete) 

310 Logger.log(["Limiting removals to %d" % max_delete]) 

311 

312 if archives is not None: 

313 archive_ids = [a.archive_id for a in archives] 

314 old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids)) 

315 

316 for af in old_files: 

317 filename = af.path 

318 try: 

319 st = os.lstat(filename) 

320 except FileNotFoundError: 

321 Logger.log(["database referred to non-existing file", filename]) 

322 session.delete(af) 

323 continue 

324 Logger.log(["delete archive file", filename]) 

325 if stat.S_ISLNK(st.st_mode): 325 ↛ 326line 325 didn't jump to line 326 because the condition on line 325 was never true

326 count += 1 

327 Logger.log(["delete symlink", filename]) 

328 if not Options["No-Action"]: 

329 os.unlink(filename) 

330 session.delete(af) 

331 elif stat.S_ISREG(st.st_mode): 331 ↛ 350line 331 didn't jump to line 350 because the condition on line 331 was always true

332 size += st.st_size 

333 count += 1 

334 

335 dest_filename = dest + "/" + os.path.basename(filename) 

336 # If the destination file exists; try to find another filename to use 

337 if os.path.lexists(dest_filename): 337 ↛ 338line 337 didn't jump to line 338 because the condition on line 337 was never true

338 dest_filename = utils.find_next_free(dest_filename) 

339 

340 if not Options["No-Action"]: 340 ↛ 316line 340 didn't jump to line 316 because the condition on line 340 was always true

341 if af.archive.use_morgue: 341 ↛ 342line 341 didn't jump to line 342 because the condition on line 341 was never true

342 Logger.log(["move to morgue", filename, dest_filename]) 

343 utils.move(filename, dest_filename) 

344 else: 

345 Logger.log(["removed file", filename]) 

346 os.unlink(filename) 

347 session.delete(af) 

348 

349 else: 

350 utils.fubar("%s is neither symlink nor file?!" % (filename)) 

351 

352 if count > 0: 352 ↛ 356line 352 didn't jump to line 356 because the condition on line 352 was always true

353 Logger.log(["total", count, utils.size_type(size)]) 

354 

355 # Delete entries in files no longer referenced by any archive 

356 query = """ 

357 DELETE FROM files f 

358 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id) 

359 """ 

360 session.execute(sql.text(query)) 

361 

362 if not Options["No-Action"]: 362 ↛ exitline 362 didn't return from function 'clean' because the condition on line 362 was always true

363 session.commit() 

364 

365 

366################################################################################ 

367 

368 

369def clean_maintainers(now_date, session): 

370 Logger.log(["Cleaning out unused Maintainer entries..."]) 

371 

372 # TODO Replace this whole thing with one SQL statement 

373 q = session.execute( 

374 sql.text( 

375 """ 

376SELECT m.id, m.name FROM maintainer m 

377 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id) 

378 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id) 

379 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)""" 

380 ) 

381 ) 

382 

383 count = 0 

384 

385 for i in q.fetchall(): 385 ↛ 386line 385 didn't jump to line 386 because the loop on line 385 never started

386 maintainer_id = i[0] 

387 Logger.log(["delete maintainer", i[1]]) 

388 if not Options["No-Action"]: 

389 session.execute( 

390 sql.text("DELETE FROM maintainer WHERE id = :maint"), 

391 {"maint": maintainer_id}, 

392 ) 

393 count += 1 

394 

395 if not Options["No-Action"]: 395 ↛ 398line 395 didn't jump to line 398 because the condition on line 395 was always true

396 session.commit() 

397 

398 if count > 0: 398 ↛ 399line 398 didn't jump to line 399 because the condition on line 398 was never true

399 Logger.log(["total", count]) 

400 

401 

402################################################################################ 

403 

404 

405def clean_fingerprints(now_date, session): 

406 Logger.log(["Cleaning out unused fingerprint entries..."]) 

407 

408 # TODO Replace this whole thing with one SQL statement 

409 q = session.execute( 

410 sql.text( 

411 """ 

412SELECT f.id, f.fingerprint FROM fingerprint f 

413 WHERE f.keyring IS NULL 

414 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id) 

415 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id OR s.authorized_by_fingerprint_id = f.id) 

416 AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)""" 

417 ) 

418 ) 

419 

420 count = 0 

421 

422 for i in q.fetchall(): 422 ↛ 423line 422 didn't jump to line 423 because the loop on line 422 never started

423 fingerprint_id = i[0] 

424 Logger.log(["delete fingerprint", i[1]]) 

425 if not Options["No-Action"]: 

426 session.execute( 

427 sql.text("DELETE FROM fingerprint WHERE id = :fpr"), 

428 {"fpr": fingerprint_id}, 

429 ) 

430 count += 1 

431 

432 if not Options["No-Action"]: 432 ↛ 435line 432 didn't jump to line 435 because the condition on line 432 was always true

433 session.commit() 

434 

435 if count > 0: 435 ↛ 436line 435 didn't jump to line 436 because the condition on line 435 was never true

436 Logger.log(["total", count]) 

437 

438 

439################################################################################ 

440 

441 

442def clean_byhash(now_date, session): 

443 Logger.log(["Cleaning out unused by-hash files..."]) 

444 

445 q = session.execute( 

446 sql.text( 

447 """ 

448 DELETE FROM hashfile h 

449 USING suite s, archive a 

450 WHERE s.id = h.suite_id 

451 AND a.id = s.archive_id 

452 AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP 

453 RETURNING a.path, s.suite_name, h.path""" 

454 ) 

455 ) 

456 count = q.rowcount 

457 

458 if not Options["No-Action"]: 458 ↛ 472line 458 didn't jump to line 472 because the condition on line 458 was always true

459 for base, suite, path in q: 459 ↛ 460line 459 didn't jump to line 460 because the loop on line 459 never started

460 suite_suffix = utils.suite_suffix(suite) 

461 filename = os.path.join(base, "dists", suite, suite_suffix, path) 

462 try: 

463 os.unlink(filename) 

464 except OSError as exc: 

465 if exc.errno != errno.ENOENT: 

466 raise 

467 Logger.log(["database referred to non-existing file", filename]) 

468 else: 

469 Logger.log(["delete hashfile", suite, path]) 

470 session.commit() 

471 

472 if count > 0: 472 ↛ 473line 472 didn't jump to line 473 because the condition on line 472 was never true

473 Logger.log(["total", count]) 

474 

475 

476################################################################################ 

477 

478 

479def clean_empty_directories(session): 

480 """ 

481 Removes empty directories from pool directories. 

482 """ 

483 

484 Logger.log(["Cleaning out empty directories..."]) 

485 

486 count = 0 

487 

488 cursor = session.execute(sql.text("""SELECT DISTINCT(path) FROM archive""")) 

489 bases = [x[0] for x in cursor.fetchall()] 

490 

491 for base in bases: 

492 for dirpath, dirnames, filenames in os.walk(base, topdown=False): 

493 if not filenames and not dirnames: 

494 to_remove = os.path.join(base, dirpath) 

495 if not Options["No-Action"]: 495 ↛ 498line 495 didn't jump to line 498 because the condition on line 495 was always true

496 Logger.log(["removing directory", to_remove]) 

497 os.removedirs(to_remove) 

498 count += 1 

499 

500 if count: 500 ↛ exitline 500 didn't return from function 'clean_empty_directories' because the condition on line 500 was always true

501 Logger.log(["total removed directories", count]) 

502 

503 

504################################################################################ 

505 

506 

507def set_archive_delete_dates(now_date, session): 

508 session.execute( 

509 sql.text( 

510 """ 

511 CREATE TEMPORARY TABLE archive_delete_date ( 

512 archive_id INT NOT NULL, 

513 delete_date TIMESTAMP NOT NULL 

514 )""" 

515 ) 

516 ) 

517 

518 session.execute( 

519 sql.text( 

520 """ 

521 INSERT INTO archive_delete_date 

522 (archive_id, delete_date) 

523 SELECT 

524 archive.id, :now_date - archive.stayofexecution 

525 FROM archive""" 

526 ), 

527 {"now_date": now_date}, 

528 ) 

529 

530 session.flush() 

531 

532 

533################################################################################ 

534 

535 

536def main(): 

537 global Options, Logger 

538 

539 cnf = Config() 

540 

541 for i in ["Help", "No-Action", "Maximum"]: 

542 key = "Clean-Suites::Options::%s" % i 

543 if key not in cnf: 543 ↛ 541line 543 didn't jump to line 541 because the condition on line 543 was always true

544 cnf[key] = "" 

545 

546 Arguments = [ 

547 ("h", "help", "Clean-Suites::Options::Help"), 

548 ("a", "archive", "Clean-Suites::Options::Archive", "HasArg"), 

549 ("n", "no-action", "Clean-Suites::Options::No-Action"), 

550 ("m", "maximum", "Clean-Suites::Options::Maximum", "HasArg"), 

551 ] 

552 

553 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) # type: ignore[attr-defined] 

554 Options = cnf.subtree("Clean-Suites::Options") 

555 

556 if cnf["Clean-Suites::Options::Maximum"] != "": 556 ↛ 557line 556 didn't jump to line 557 because the condition on line 556 was never true

557 try: 

558 # Only use Maximum if it's an integer 

559 max_delete = int(cnf["Clean-Suites::Options::Maximum"]) 

560 if max_delete < 1: 

561 utils.fubar("If given, Maximum must be at least 1") 

562 except ValueError: 

563 utils.fubar("If given, Maximum must be an integer") 

564 else: 

565 max_delete = None 

566 

567 if Options["Help"]: 

568 usage() 

569 

570 program = "clean-suites" 

571 if Options["No-Action"]: 571 ↛ 572line 571 didn't jump to line 572 because the condition on line 571 was never true

572 program = "clean-suites (no action)" 

573 Logger = daklog.Logger(program, debug=Options["No-Action"]) 

574 

575 session = DBConn().session() 

576 

577 archives = None 

578 if "Archive" in Options: 

579 archive_names = Options["Archive"].split(",") 

580 archives = ( 

581 session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all() 

582 ) 

583 if len(archives) == 0: 583 ↛ 584line 583 didn't jump to line 584 because the condition on line 583 was never true

584 utils.fubar("Unknown archive.") 

585 

586 now_date = datetime.now() 

587 

588 set_archive_delete_dates(now_date, session) 

589 

590 check_binaries(now_date, session) 

591 clean_binaries(now_date, session) 

592 check_sources(now_date, session) 

593 check_files(now_date, session) 

594 clean(now_date, archives, max_delete, session) 

595 clean_maintainers(now_date, session) 

596 clean_fingerprints(now_date, session) 

597 clean_byhash(now_date, session) 

598 clean_empty_directories(session) 

599 

600 session.rollback() 

601 

602 Logger.close() 

603 

604 

605################################################################################ 

606 

607 

608if __name__ == "__main__": 

609 main()