1#! /usr/bin/env python3
3"""Cleans up unassociated binary and source packages
5@contact: Debian FTPMaster <ftpmaster@debian.org>
6@copyright: 2000, 2001, 2002, 2003, 2006 James Troup <james@nocrew.org>
7@copyright: 2009 Mark Hymers <mhy@debian.org>
8@copyright: 2010 Joerg Jaspert <joerg@debian.org>
9@license: GNU General Public License version 2 or later
10"""
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26################################################################################
28# 07:05|<elmo> well.. *shrug*.. no, probably not.. but to fix it,
29# | we're going to have to implement reference counting
30# | through dependencies.. do we really want to go down
31# | that road?
32#
33# 07:05|<Culus> elmo: Augh! <brain jumps out of skull>
35################################################################################
37import errno
38import os
39import stat
40import sys
41from datetime import datetime
43import apt_pkg
44import sqlalchemy.sql as sql
46from daklib import daklog, utils
47from daklib.config import Config
48from daklib.dbconn import Archive, ArchiveFile, DBConn
50################################################################################
52Options = None
53Logger = None
55################################################################################
58def usage(exit_code=0):
59 print(
60 """Usage: dak clean-suites [OPTIONS]
61Clean old packages from suites.
63 -n, --no-action don't do anything
64 -h, --help show this help and exit
65 -m, --maximum maximum number of files to remove"""
66 )
67 sys.exit(exit_code)
70################################################################################
73def check_binaries(now_date, session):
74 Logger.log(["Checking for orphaned binary packages..."])
76 # Get the list of binary packages not in a suite and mark them for
77 # deletion.
78 # Check for any binaries which are marked for eventual deletion
79 # but are now used again.
81 query = """
82 WITH usage AS (
83 SELECT
84 af.archive_id AS archive_id,
85 af.file_id AS file_id,
86 af.component_id AS component_id,
87 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba
88 JOIN suite s ON ba.suite = s.id
89 WHERE ba.bin = b.id
90 AND s.archive_id = af.archive_id))
91 AS in_use
92 FROM files_archive_map af
93 JOIN binaries b ON af.file_id = b.file
94 GROUP BY af.archive_id, af.file_id, af.component_id
95 )
97 UPDATE files_archive_map af
98 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
99 FROM usage, files f, archive
100 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
101 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
102 AND af.file_id = f.id
103 AND af.archive_id = archive.id
104 RETURNING archive.name, f.filename, af.last_used IS NULL"""
106 res = session.execute(query, {"last_used": now_date})
107 for i in res:
108 op = "set lastused"
109 if i[2]: 109 ↛ 110line 109 didn't jump to line 110, because the condition on line 109 was never true
110 op = "unset lastused"
111 Logger.log([op, i[0], i[1]])
114########################################
117def check_sources(now_date, session):
118 Logger.log(["Checking for orphaned source packages..."])
120 # Get the list of source packages not in a suite and not used by
121 # any binaries.
123 # Check for any sources which are marked for deletion but which
124 # are now used again.
126 # TODO: the UPDATE part is the same as in check_binaries. Merge?
128 query = """
129 WITH usage AS (
130 SELECT
131 af.archive_id AS archive_id,
132 af.file_id AS file_id,
133 af.component_id AS component_id,
134 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa
135 JOIN suite s ON sa.suite = s.id
136 WHERE sa.source = df.source
137 AND s.archive_id = af.archive_id)
138 OR EXISTS (SELECT 1 FROM files_archive_map af_bin
139 JOIN binaries b ON af_bin.file_id = b.file
140 WHERE b.source = df.source
141 AND af_bin.archive_id = af.archive_id
142 AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date))
143 OR EXISTS (SELECT 1 FROM extra_src_references esr
144 JOIN bin_associations ba ON esr.bin_id = ba.bin
145 JOIN binaries b ON ba.bin = b.id
146 JOIN suite s ON ba.suite = s.id
147 WHERE esr.src_id = df.source
148 AND s.archive_id = af.archive_id))
149 AS in_use
150 FROM files_archive_map af
151 JOIN dsc_files df ON af.file_id = df.file
152 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
153 GROUP BY af.archive_id, af.file_id, af.component_id
154 )
156 UPDATE files_archive_map af
157 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
158 FROM usage, files f, archive
159 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
160 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
161 AND af.file_id = f.id
162 AND af.archive_id = archive.id
164 RETURNING archive.name, f.filename, af.last_used IS NULL
165 """
167 res = session.execute(query, {"last_used": now_date})
168 for i in res:
169 op = "set lastused"
170 if i[2]: 170 ↛ 171line 170 didn't jump to line 171, because the condition on line 170 was never true
171 op = "unset lastused"
172 Logger.log([op, i[0], i[1]])
175########################################
178def check_files(now_date, session):
179 # FIXME: this is evil; nothing should ever be in this state. if
180 # they are, it's a bug.
182 # However, we've discovered it happens sometimes so we print a huge warning
183 # and then mark the file for deletion. This probably masks a bug somwhere
184 # else but is better than collecting cruft forever
186 Logger.log(["Checking for unused files..."])
187 q = session.execute(
188 """
189 UPDATE files_archive_map af
190 SET last_used = :last_used
191 FROM files f, archive
192 WHERE af.file_id = f.id
193 AND af.archive_id = archive.id
194 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id)
195 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
196 AND af.last_used IS NULL
197 RETURNING archive.name, f.filename""",
198 {"last_used": now_date},
199 )
201 for x in q: 201 ↛ 202line 201 didn't jump to line 202, because the loop on line 201 never started
202 utils.warn("orphaned file: {0}".format(x))
203 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"])
205 if not Options["No-Action"]: 205 ↛ exitline 205 didn't return from function 'check_files', because the condition on line 205 was never false
206 session.commit()
209def clean_binaries(now_date, session):
210 # We do this here so that the binaries we remove will have their
211 # source also removed (if possible).
213 # XXX: why doesn't this remove the files here as well? I don't think it
214 # buys anything keeping this separate
216 Logger.log(["Deleting from binaries table... "])
217 q = session.execute(
218 """
219 DELETE FROM binaries b
220 USING files f
221 WHERE f.id = b.file
222 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
223 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
224 WHERE af.file_id = b.file
225 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
226 RETURNING f.filename
227 """
228 )
229 for b in q:
230 Logger.log(["delete binary", b[0]])
233########################################
236def clean(now_date, archives, max_delete, session):
237 cnf = Config()
239 count = 0
240 size = 0
242 Logger.log(["Cleaning out packages..."])
244 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", "morgue"))
245 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", "pool")
247 # Build directory as morguedir/morguesubdir/year/month/day
248 dest = os.path.join(
249 morguedir,
250 morguesubdir,
251 str(now_date.year),
252 "%.2d" % now_date.month,
253 "%.2d" % now_date.day,
254 )
256 if not Options["No-Action"] and not os.path.exists(dest):
257 os.makedirs(dest)
259 # Delete from source
260 Logger.log(["Deleting from source table..."])
261 q = session.execute(
262 """
263 WITH
264 deleted_sources AS (
265 DELETE FROM source
266 USING files f
267 WHERE source.file = f.id
268 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
269 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
270 WHERE af.file_id = source.file
271 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
272 RETURNING source.id AS id, f.filename AS filename
273 ),
274 deleted_dsc_files AS (
275 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources)
276 RETURNING df.file AS file_id
277 ),
278 now_unused_source_files AS (
279 UPDATE files_archive_map af
280 SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc.
281 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files)
282 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
283 )
284 SELECT filename FROM deleted_sources"""
285 )
286 for s in q: 286 ↛ 287line 286 didn't jump to line 287, because the loop on line 286 never started
287 Logger.log(["delete source", s[0]])
289 if not Options["No-Action"]: 289 ↛ 293line 289 didn't jump to line 293
290 session.commit()
292 # Delete files from the pool
293 old_files = (
294 session.query(ArchiveFile)
295 .filter(
296 sql.text(
297 "files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)"
298 )
299 )
300 .join(Archive)
301 )
302 if max_delete is not None: 302 ↛ 303line 302 didn't jump to line 303, because the condition on line 302 was never true
303 old_files = old_files.limit(max_delete)
304 Logger.log(["Limiting removals to %d" % max_delete])
306 if archives is not None:
307 archive_ids = [a.archive_id for a in archives]
308 old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids))
310 for af in old_files:
311 filename = af.path
312 try:
313 st = os.lstat(filename)
314 except FileNotFoundError:
315 Logger.log(["database referred to non-existing file", filename])
316 session.delete(af)
317 continue
318 Logger.log(["delete archive file", filename])
319 if stat.S_ISLNK(st.st_mode): 319 ↛ 320line 319 didn't jump to line 320, because the condition on line 319 was never true
320 count += 1
321 Logger.log(["delete symlink", filename])
322 if not Options["No-Action"]:
323 os.unlink(filename)
324 session.delete(af)
325 elif stat.S_ISREG(st.st_mode): 325 ↛ 344line 325 didn't jump to line 344, because the condition on line 325 was never false
326 size += st.st_size
327 count += 1
329 dest_filename = dest + "/" + os.path.basename(filename)
330 # If the destination file exists; try to find another filename to use
331 if os.path.lexists(dest_filename): 331 ↛ 332line 331 didn't jump to line 332, because the condition on line 331 was never true
332 dest_filename = utils.find_next_free(dest_filename)
334 if not Options["No-Action"]: 334 ↛ 310line 334 didn't jump to line 310, because the condition on line 334 was never false
335 if af.archive.use_morgue: 335 ↛ 336line 335 didn't jump to line 336, because the condition on line 335 was never true
336 Logger.log(["move to morgue", filename, dest_filename])
337 utils.move(filename, dest_filename)
338 else:
339 Logger.log(["removed file", filename])
340 os.unlink(filename)
341 session.delete(af)
343 else:
344 utils.fubar("%s is neither symlink nor file?!" % (filename))
346 if count > 0: 346 ↛ 350line 346 didn't jump to line 350, because the condition on line 346 was never false
347 Logger.log(["total", count, utils.size_type(size)])
349 # Delete entries in files no longer referenced by any archive
350 query = """
351 DELETE FROM files f
352 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id)
353 """
354 session.execute(query)
356 if not Options["No-Action"]: 356 ↛ exitline 356 didn't return from function 'clean', because the condition on line 356 was never false
357 session.commit()
360################################################################################
363def clean_maintainers(now_date, session):
364 Logger.log(["Cleaning out unused Maintainer entries..."])
366 # TODO Replace this whole thing with one SQL statement
367 q = session.execute(
368 """
369SELECT m.id, m.name FROM maintainer m
370 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id)
371 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id)
372 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)"""
373 )
375 count = 0
377 for i in q.fetchall(): 377 ↛ 378line 377 didn't jump to line 378, because the loop on line 377 never started
378 maintainer_id = i[0]
379 Logger.log(["delete maintainer", i[1]])
380 if not Options["No-Action"]:
381 session.execute(
382 "DELETE FROM maintainer WHERE id = :maint", {"maint": maintainer_id}
383 )
384 count += 1
386 if not Options["No-Action"]: 386 ↛ 389line 386 didn't jump to line 389, because the condition on line 386 was never false
387 session.commit()
389 if count > 0: 389 ↛ 390line 389 didn't jump to line 390, because the condition on line 389 was never true
390 Logger.log(["total", count])
393################################################################################
396def clean_fingerprints(now_date, session):
397 Logger.log(["Cleaning out unused fingerprint entries..."])
399 # TODO Replace this whole thing with one SQL statement
400 q = session.execute(
401 """
402SELECT f.id, f.fingerprint FROM fingerprint f
403 WHERE f.keyring IS NULL
404 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id)
405 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id)
406 AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)"""
407 )
409 count = 0
411 for i in q.fetchall(): 411 ↛ 412line 411 didn't jump to line 412, because the loop on line 411 never started
412 fingerprint_id = i[0]
413 Logger.log(["delete fingerprint", i[1]])
414 if not Options["No-Action"]:
415 session.execute(
416 "DELETE FROM fingerprint WHERE id = :fpr", {"fpr": fingerprint_id}
417 )
418 count += 1
420 if not Options["No-Action"]: 420 ↛ 423line 420 didn't jump to line 423, because the condition on line 420 was never false
421 session.commit()
423 if count > 0: 423 ↛ 424line 423 didn't jump to line 424, because the condition on line 423 was never true
424 Logger.log(["total", count])
427################################################################################
430def clean_byhash(now_date, session):
431 Logger.log(["Cleaning out unused by-hash files..."])
433 q = session.execute(
434 """
435 DELETE FROM hashfile h
436 USING suite s, archive a
437 WHERE s.id = h.suite_id
438 AND a.id = s.archive_id
439 AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP
440 RETURNING a.path, s.suite_name, h.path"""
441 )
442 count = q.rowcount
444 if not Options["No-Action"]: 444 ↛ 458line 444 didn't jump to line 458, because the condition on line 444 was never false
445 for base, suite, path in q: 445 ↛ 446line 445 didn't jump to line 446, because the loop on line 445 never started
446 suite_suffix = utils.suite_suffix(suite)
447 filename = os.path.join(base, "dists", suite, suite_suffix, path)
448 try:
449 os.unlink(filename)
450 except OSError as exc:
451 if exc.errno != errno.ENOENT:
452 raise
453 Logger.log(["database referred to non-existing file", filename])
454 else:
455 Logger.log(["delete hashfile", suite, path])
456 session.commit()
458 if count > 0: 458 ↛ 459line 458 didn't jump to line 459, because the condition on line 458 was never true
459 Logger.log(["total", count])
462################################################################################
465def clean_empty_directories(session):
466 """
467 Removes empty directories from pool directories.
468 """
470 Logger.log(["Cleaning out empty directories..."])
472 count = 0
474 cursor = session.execute("""SELECT DISTINCT(path) FROM archive""")
475 bases = [x[0] for x in cursor.fetchall()]
477 for base in bases:
478 for dirpath, dirnames, filenames in os.walk(base, topdown=False):
479 if not filenames and not dirnames:
480 to_remove = os.path.join(base, dirpath)
481 if not Options["No-Action"]: 481 ↛ 484line 481 didn't jump to line 484, because the condition on line 481 was never false
482 Logger.log(["removing directory", to_remove])
483 os.removedirs(to_remove)
484 count += 1
486 if count: 486 ↛ exitline 486 didn't return from function 'clean_empty_directories', because the condition on line 486 was never false
487 Logger.log(["total removed directories", count])
490################################################################################
493def set_archive_delete_dates(now_date, session):
494 session.execute(
495 """
496 CREATE TEMPORARY TABLE archive_delete_date (
497 archive_id INT NOT NULL,
498 delete_date TIMESTAMP NOT NULL
499 )"""
500 )
502 session.execute(
503 """
504 INSERT INTO archive_delete_date
505 (archive_id, delete_date)
506 SELECT
507 archive.id, :now_date - archive.stayofexecution
508 FROM archive""",
509 {"now_date": now_date},
510 )
512 session.flush()
515################################################################################
518def main():
519 global Options, Logger
521 cnf = Config()
523 for i in ["Help", "No-Action", "Maximum"]:
524 key = "Clean-Suites::Options::%s" % i
525 if key not in cnf: 525 ↛ 523line 525 didn't jump to line 523, because the condition on line 525 was never false
526 cnf[key] = ""
528 Arguments = [
529 ("h", "help", "Clean-Suites::Options::Help"),
530 ("a", "archive", "Clean-Suites::Options::Archive", "HasArg"),
531 ("n", "no-action", "Clean-Suites::Options::No-Action"),
532 ("m", "maximum", "Clean-Suites::Options::Maximum", "HasArg"),
533 ]
535 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
536 Options = cnf.subtree("Clean-Suites::Options")
538 if cnf["Clean-Suites::Options::Maximum"] != "": 538 ↛ 539line 538 didn't jump to line 539, because the condition on line 538 was never true
539 try:
540 # Only use Maximum if it's an integer
541 max_delete = int(cnf["Clean-Suites::Options::Maximum"])
542 if max_delete < 1:
543 utils.fubar("If given, Maximum must be at least 1")
544 except ValueError:
545 utils.fubar("If given, Maximum must be an integer")
546 else:
547 max_delete = None
549 if Options["Help"]:
550 usage()
552 program = "clean-suites"
553 if Options["No-Action"]: 553 ↛ 554line 553 didn't jump to line 554, because the condition on line 553 was never true
554 program = "clean-suites (no action)"
555 Logger = daklog.Logger(program, debug=Options["No-Action"])
557 session = DBConn().session()
559 archives = None
560 if "Archive" in Options:
561 archive_names = Options["Archive"].split(",")
562 archives = (
563 session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all()
564 )
565 if len(archives) == 0: 565 ↛ 566line 565 didn't jump to line 566, because the condition on line 565 was never true
566 utils.fubar("Unknown archive.")
568 now_date = datetime.now()
570 set_archive_delete_dates(now_date, session)
572 check_binaries(now_date, session)
573 clean_binaries(now_date, session)
574 check_sources(now_date, session)
575 check_files(now_date, session)
576 clean(now_date, archives, max_delete, session)
577 clean_maintainers(now_date, session)
578 clean_fingerprints(now_date, session)
579 clean_byhash(now_date, session)
580 clean_empty_directories(session)
582 session.rollback()
584 Logger.close()
587################################################################################
590if __name__ == "__main__":
591 main()