Coverage for dak/clean_suites.py: 72%
212 statements
« prev ^ index » next coverage.py v7.6.0, created at 2026-01-04 16:18 +0000
« prev ^ index » next coverage.py v7.6.0, created at 2026-01-04 16:18 +0000
1#! /usr/bin/env python3
3"""Cleans up unassociated binary and source packages
5@contact: Debian FTPMaster <ftpmaster@debian.org>
6@copyright: 2000, 2001, 2002, 2003, 2006 James Troup <james@nocrew.org>
7@copyright: 2009 Mark Hymers <mhy@debian.org>
8@copyright: 2010 Joerg Jaspert <joerg@debian.org>
9@license: GNU General Public License version 2 or later
10"""
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26################################################################################
28# 07:05|<elmo> well.. *shrug*.. no, probably not.. but to fix it,
29# | we're going to have to implement reference counting
30# | through dependencies.. do we really want to go down
31# | that road?
32#
33# 07:05|<Culus> elmo: Augh! <brain jumps out of skull>
35################################################################################
37import errno
38import os
39import stat
40import sys
41from datetime import datetime
43import apt_pkg
44import sqlalchemy.sql as sql
46from daklib import daklog, utils
47from daklib.config import Config
48from daklib.dbconn import Archive, ArchiveFile, DBConn
50################################################################################
52Options: apt_pkg.Configuration
53Logger: daklog.Logger
55################################################################################
58def usage(exit_code=0):
59 print(
60 """Usage: dak clean-suites [OPTIONS]
61Clean old packages from suites.
63 -n, --no-action don't do anything
64 -h, --help show this help and exit
65 -m, --maximum maximum number of files to remove"""
66 )
67 sys.exit(exit_code)
70################################################################################
73def check_binaries(now_date, session):
74 Logger.log(["Checking for orphaned binary packages..."])
76 # Get the list of binary packages not in a suite and mark them for
77 # deletion.
78 # Check for any binaries which are marked for eventual deletion
79 # but are now used again.
81 query = """
82 WITH usage AS (
83 SELECT
84 af.archive_id AS archive_id,
85 af.file_id AS file_id,
86 af.component_id AS component_id,
87 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba
88 JOIN suite s ON ba.suite = s.id
89 WHERE ba.bin = b.id
90 AND s.archive_id = af.archive_id))
91 AS in_use
92 FROM files_archive_map af
93 JOIN binaries b ON af.file_id = b.file
94 GROUP BY af.archive_id, af.file_id, af.component_id
95 )
97 UPDATE files_archive_map af
98 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
99 FROM usage, files f, archive
100 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
101 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
102 AND af.file_id = f.id
103 AND af.archive_id = archive.id
104 RETURNING archive.name, f.filename, af.last_used IS NULL"""
106 res = session.execute(sql.text(query), {"last_used": now_date})
107 for i in res:
108 op = "set lastused"
109 if i[2]: 109 ↛ 110line 109 didn't jump to line 110 because the condition on line 109 was never true
110 op = "unset lastused"
111 Logger.log([op, i[0], i[1]])
114########################################
117def check_sources(now_date, session):
118 Logger.log(["Checking for orphaned source packages..."])
120 # Get the list of source packages not in a suite and not used by
121 # any binaries.
123 # Check for any sources which are marked for deletion but which
124 # are now used again.
126 # TODO: the UPDATE part is the same as in check_binaries. Merge?
128 query = """
129 WITH usage AS (
130 SELECT
131 af.archive_id AS archive_id,
132 af.file_id AS file_id,
133 af.component_id AS component_id,
134 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa
135 JOIN suite s ON sa.suite = s.id
136 WHERE sa.source = df.source
137 AND s.archive_id = af.archive_id)
138 OR EXISTS (SELECT 1 FROM files_archive_map af_bin
139 JOIN binaries b ON af_bin.file_id = b.file
140 WHERE b.source = df.source
141 AND af_bin.archive_id = af.archive_id
142 AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date))
143 OR EXISTS (SELECT 1 FROM extra_src_references esr
144 JOIN bin_associations ba ON esr.bin_id = ba.bin
145 JOIN binaries b ON ba.bin = b.id
146 JOIN suite s ON ba.suite = s.id
147 WHERE esr.src_id = df.source
148 AND s.archive_id = af.archive_id))
149 AS in_use
150 FROM files_archive_map af
151 JOIN dsc_files df ON af.file_id = df.file
152 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
153 GROUP BY af.archive_id, af.file_id, af.component_id
154 )
156 UPDATE files_archive_map af
157 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
158 FROM usage, files f, archive
159 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
160 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
161 AND af.file_id = f.id
162 AND af.archive_id = archive.id
164 RETURNING archive.name, f.filename, af.last_used IS NULL
165 """
167 res = session.execute(sql.text(query), {"last_used": now_date})
168 for i in res:
169 op = "set lastused"
170 if i[2]: 170 ↛ 171line 170 didn't jump to line 171 because the condition on line 170 was never true
171 op = "unset lastused"
172 Logger.log([op, i[0], i[1]])
175########################################
178def check_files(now_date, session):
179 # FIXME: this is evil; nothing should ever be in this state. if
180 # they are, it's a bug.
182 # However, we've discovered it happens sometimes so we print a huge warning
183 # and then mark the file for deletion. This probably masks a bug somwhere
184 # else but is better than collecting cruft forever
186 Logger.log(["Checking for unused files..."])
187 q = session.execute(
188 sql.text(
189 """
190 UPDATE files_archive_map af
191 SET last_used = :last_used
192 FROM files f, archive
193 WHERE af.file_id = f.id
194 AND af.archive_id = archive.id
195 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id)
196 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
197 AND af.last_used IS NULL
198 RETURNING archive.name, f.filename"""
199 ),
200 {"last_used": now_date},
201 )
203 for x in q: 203 ↛ 204line 203 didn't jump to line 204 because the loop on line 203 never started
204 utils.warn("orphaned file: {0}".format(x))
205 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"])
207 if not Options["No-Action"]: 207 ↛ exitline 207 didn't return from function 'check_files' because the condition on line 207 was always true
208 session.commit()
211def clean_binaries(now_date, session):
212 # We do this here so that the binaries we remove will have their
213 # source also removed (if possible).
215 # XXX: why doesn't this remove the files here as well? I don't think it
216 # buys anything keeping this separate
218 Logger.log(["Deleting from binaries table... "])
219 q = session.execute(
220 sql.text(
221 """
222 DELETE FROM binaries b
223 USING files f
224 WHERE f.id = b.file
225 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
226 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
227 WHERE af.file_id = b.file
228 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
229 RETURNING f.filename
230 """
231 )
232 )
233 for b in q:
234 Logger.log(["delete binary", b[0]])
237########################################
240def clean(now_date, archives, max_delete, session):
241 cnf = Config()
243 count = 0
244 size = 0
246 Logger.log(["Cleaning out packages..."])
248 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", "morgue"))
249 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", "pool")
251 # Build directory as morguedir/morguesubdir/year/month/day
252 dest = os.path.join(
253 morguedir,
254 morguesubdir,
255 str(now_date.year),
256 "%.2d" % now_date.month,
257 "%.2d" % now_date.day,
258 )
260 if not Options["No-Action"] and not os.path.exists(dest):
261 os.makedirs(dest)
263 # Delete from source
264 Logger.log(["Deleting from source table..."])
265 q = session.execute(
266 sql.text(
267 """
268 WITH
269 deleted_sources AS (
270 DELETE FROM source
271 USING files f
272 WHERE source.file = f.id
273 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
274 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
275 WHERE af.file_id = source.file
276 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
277 RETURNING source.id AS id, f.filename AS filename
278 ),
279 deleted_dsc_files AS (
280 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources)
281 RETURNING df.file AS file_id
282 ),
283 now_unused_source_files AS (
284 UPDATE files_archive_map af
285 SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc.
286 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files)
287 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
288 )
289 SELECT filename FROM deleted_sources"""
290 )
291 )
292 for s in q: 292 ↛ 293line 292 didn't jump to line 293 because the loop on line 292 never started
293 Logger.log(["delete source", s[0]])
295 if not Options["No-Action"]: 295 ↛ 299line 295 didn't jump to line 299
296 session.commit()
298 # Delete files from the pool
299 old_files = (
300 session.query(ArchiveFile)
301 .filter(
302 sql.text(
303 "files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)"
304 )
305 )
306 .join(Archive)
307 )
308 if max_delete is not None: 308 ↛ 309line 308 didn't jump to line 309 because the condition on line 308 was never true
309 old_files = old_files.limit(max_delete)
310 Logger.log(["Limiting removals to %d" % max_delete])
312 if archives is not None:
313 archive_ids = [a.archive_id for a in archives]
314 old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids))
316 for af in old_files:
317 filename = af.path
318 try:
319 st = os.lstat(filename)
320 except FileNotFoundError:
321 Logger.log(["database referred to non-existing file", filename])
322 session.delete(af)
323 continue
324 Logger.log(["delete archive file", filename])
325 if stat.S_ISLNK(st.st_mode): 325 ↛ 326line 325 didn't jump to line 326 because the condition on line 325 was never true
326 count += 1
327 Logger.log(["delete symlink", filename])
328 if not Options["No-Action"]:
329 os.unlink(filename)
330 session.delete(af)
331 elif stat.S_ISREG(st.st_mode): 331 ↛ 350line 331 didn't jump to line 350 because the condition on line 331 was always true
332 size += st.st_size
333 count += 1
335 dest_filename = dest + "/" + os.path.basename(filename)
336 # If the destination file exists; try to find another filename to use
337 if os.path.lexists(dest_filename): 337 ↛ 338line 337 didn't jump to line 338 because the condition on line 337 was never true
338 dest_filename = utils.find_next_free(dest_filename)
340 if not Options["No-Action"]: 340 ↛ 316line 340 didn't jump to line 316 because the condition on line 340 was always true
341 if af.archive.use_morgue: 341 ↛ 342line 341 didn't jump to line 342 because the condition on line 341 was never true
342 Logger.log(["move to morgue", filename, dest_filename])
343 utils.move(filename, dest_filename)
344 else:
345 Logger.log(["removed file", filename])
346 os.unlink(filename)
347 session.delete(af)
349 else:
350 utils.fubar("%s is neither symlink nor file?!" % (filename))
352 if count > 0: 352 ↛ 356line 352 didn't jump to line 356 because the condition on line 352 was always true
353 Logger.log(["total", count, utils.size_type(size)])
355 # Delete entries in files no longer referenced by any archive
356 query = """
357 DELETE FROM files f
358 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id)
359 """
360 session.execute(sql.text(query))
362 if not Options["No-Action"]: 362 ↛ exitline 362 didn't return from function 'clean' because the condition on line 362 was always true
363 session.commit()
366################################################################################
369def clean_maintainers(now_date, session):
370 Logger.log(["Cleaning out unused Maintainer entries..."])
372 # TODO Replace this whole thing with one SQL statement
373 q = session.execute(
374 sql.text(
375 """
376SELECT m.id, m.name FROM maintainer m
377 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id)
378 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id)
379 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)"""
380 )
381 )
383 count = 0
385 for i in q.fetchall(): 385 ↛ 386line 385 didn't jump to line 386 because the loop on line 385 never started
386 maintainer_id = i[0]
387 Logger.log(["delete maintainer", i[1]])
388 if not Options["No-Action"]:
389 session.execute(
390 sql.text("DELETE FROM maintainer WHERE id = :maint"),
391 {"maint": maintainer_id},
392 )
393 count += 1
395 if not Options["No-Action"]: 395 ↛ 398line 395 didn't jump to line 398 because the condition on line 395 was always true
396 session.commit()
398 if count > 0: 398 ↛ 399line 398 didn't jump to line 399 because the condition on line 398 was never true
399 Logger.log(["total", count])
402################################################################################
405def clean_fingerprints(now_date, session):
406 Logger.log(["Cleaning out unused fingerprint entries..."])
408 # TODO Replace this whole thing with one SQL statement
409 q = session.execute(
410 sql.text(
411 """
412SELECT f.id, f.fingerprint FROM fingerprint f
413 WHERE f.keyring IS NULL
414 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id)
415 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id OR s.authorized_by_fingerprint_id = f.id)
416 AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)"""
417 )
418 )
420 count = 0
422 for i in q.fetchall(): 422 ↛ 423line 422 didn't jump to line 423 because the loop on line 422 never started
423 fingerprint_id = i[0]
424 Logger.log(["delete fingerprint", i[1]])
425 if not Options["No-Action"]:
426 session.execute(
427 sql.text("DELETE FROM fingerprint WHERE id = :fpr"),
428 {"fpr": fingerprint_id},
429 )
430 count += 1
432 if not Options["No-Action"]: 432 ↛ 435line 432 didn't jump to line 435 because the condition on line 432 was always true
433 session.commit()
435 if count > 0: 435 ↛ 436line 435 didn't jump to line 436 because the condition on line 435 was never true
436 Logger.log(["total", count])
439################################################################################
442def clean_byhash(now_date, session):
443 Logger.log(["Cleaning out unused by-hash files..."])
445 q = session.execute(
446 sql.text(
447 """
448 DELETE FROM hashfile h
449 USING suite s, archive a
450 WHERE s.id = h.suite_id
451 AND a.id = s.archive_id
452 AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP
453 RETURNING a.path, s.suite_name, h.path"""
454 )
455 )
456 count = q.rowcount
458 if not Options["No-Action"]: 458 ↛ 472line 458 didn't jump to line 472 because the condition on line 458 was always true
459 for base, suite, path in q: 459 ↛ 460line 459 didn't jump to line 460 because the loop on line 459 never started
460 suite_suffix = utils.suite_suffix(suite)
461 filename = os.path.join(base, "dists", suite, suite_suffix, path)
462 try:
463 os.unlink(filename)
464 except OSError as exc:
465 if exc.errno != errno.ENOENT:
466 raise
467 Logger.log(["database referred to non-existing file", filename])
468 else:
469 Logger.log(["delete hashfile", suite, path])
470 session.commit()
472 if count > 0: 472 ↛ 473line 472 didn't jump to line 473 because the condition on line 472 was never true
473 Logger.log(["total", count])
476################################################################################
479def clean_empty_directories(session):
480 """
481 Removes empty directories from pool directories.
482 """
484 Logger.log(["Cleaning out empty directories..."])
486 count = 0
488 cursor = session.execute(sql.text("""SELECT DISTINCT(path) FROM archive"""))
489 bases = [x[0] for x in cursor.fetchall()]
491 for base in bases:
492 for dirpath, dirnames, filenames in os.walk(base, topdown=False):
493 if not filenames and not dirnames:
494 to_remove = os.path.join(base, dirpath)
495 if not Options["No-Action"]: 495 ↛ 498line 495 didn't jump to line 498 because the condition on line 495 was always true
496 Logger.log(["removing directory", to_remove])
497 os.removedirs(to_remove)
498 count += 1
500 if count: 500 ↛ exitline 500 didn't return from function 'clean_empty_directories' because the condition on line 500 was always true
501 Logger.log(["total removed directories", count])
504################################################################################
507def set_archive_delete_dates(now_date, session):
508 session.execute(
509 sql.text(
510 """
511 CREATE TEMPORARY TABLE archive_delete_date (
512 archive_id INT NOT NULL,
513 delete_date TIMESTAMP NOT NULL
514 )"""
515 )
516 )
518 session.execute(
519 sql.text(
520 """
521 INSERT INTO archive_delete_date
522 (archive_id, delete_date)
523 SELECT
524 archive.id, :now_date - archive.stayofexecution
525 FROM archive"""
526 ),
527 {"now_date": now_date},
528 )
530 session.flush()
533################################################################################
536def main():
537 global Options, Logger
539 cnf = Config()
541 for i in ["Help", "No-Action", "Maximum"]:
542 key = "Clean-Suites::Options::%s" % i
543 if key not in cnf: 543 ↛ 541line 543 didn't jump to line 541 because the condition on line 543 was always true
544 cnf[key] = ""
546 Arguments = [
547 ("h", "help", "Clean-Suites::Options::Help"),
548 ("a", "archive", "Clean-Suites::Options::Archive", "HasArg"),
549 ("n", "no-action", "Clean-Suites::Options::No-Action"),
550 ("m", "maximum", "Clean-Suites::Options::Maximum", "HasArg"),
551 ]
553 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) # type: ignore[attr-defined]
554 Options = cnf.subtree("Clean-Suites::Options")
556 if cnf["Clean-Suites::Options::Maximum"] != "": 556 ↛ 557line 556 didn't jump to line 557 because the condition on line 556 was never true
557 try:
558 # Only use Maximum if it's an integer
559 max_delete = int(cnf["Clean-Suites::Options::Maximum"])
560 if max_delete < 1:
561 utils.fubar("If given, Maximum must be at least 1")
562 except ValueError:
563 utils.fubar("If given, Maximum must be an integer")
564 else:
565 max_delete = None
567 if Options["Help"]:
568 usage()
570 program = "clean-suites"
571 if Options["No-Action"]: 571 ↛ 572line 571 didn't jump to line 572 because the condition on line 571 was never true
572 program = "clean-suites (no action)"
573 Logger = daklog.Logger(program, debug=Options["No-Action"])
575 session = DBConn().session()
577 archives = None
578 if "Archive" in Options:
579 archive_names = Options["Archive"].split(",")
580 archives = (
581 session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all()
582 )
583 if len(archives) == 0: 583 ↛ 584line 583 didn't jump to line 584 because the condition on line 583 was never true
584 utils.fubar("Unknown archive.")
586 now_date = datetime.now()
588 set_archive_delete_dates(now_date, session)
590 check_binaries(now_date, session)
591 clean_binaries(now_date, session)
592 check_sources(now_date, session)
593 check_files(now_date, session)
594 clean(now_date, archives, max_delete, session)
595 clean_maintainers(now_date, session)
596 clean_fingerprints(now_date, session)
597 clean_byhash(now_date, session)
598 clean_empty_directories(session)
600 session.rollback()
602 Logger.close()
605################################################################################
608if __name__ == "__main__":
609 main()