1#! /usr/bin/env python3
3""" Cleans up unassociated binary and source packages
5@contact: Debian FTPMaster <ftpmaster@debian.org>
6@copyright: 2000, 2001, 2002, 2003, 2006 James Troup <james@nocrew.org>
7@copyright: 2009 Mark Hymers <mhy@debian.org>
8@copyright: 2010 Joerg Jaspert <joerg@debian.org>
9@license: GNU General Public License version 2 or later
10"""
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26################################################################################
28# 07:05|<elmo> well.. *shrug*.. no, probably not.. but to fix it,
29# | we're going to have to implement reference counting
30# | through dependencies.. do we really want to go down
31# | that road?
32#
33# 07:05|<Culus> elmo: Augh! <brain jumps out of skull>
35################################################################################
37import errno
38import os
39import sqlalchemy.sql as sql
40import stat
41import sys
42import apt_pkg
43from datetime import datetime
45from daklib.config import Config
46from daklib.dbconn import *
47from daklib import utils
48from daklib import daklog
50################################################################################
52Options = None
53Logger = None
55################################################################################
58def usage(exit_code=0):
59 print("""Usage: dak clean-suites [OPTIONS]
60Clean old packages from suites.
62 -n, --no-action don't do anything
63 -h, --help show this help and exit
64 -m, --maximum maximum number of files to remove""")
65 sys.exit(exit_code)
67################################################################################
70def check_binaries(now_date, session):
71 Logger.log(["Checking for orphaned binary packages..."])
73 # Get the list of binary packages not in a suite and mark them for
74 # deletion.
75 # Check for any binaries which are marked for eventual deletion
76 # but are now used again.
78 query = """
79 WITH usage AS (
80 SELECT
81 af.archive_id AS archive_id,
82 af.file_id AS file_id,
83 af.component_id AS component_id,
84 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba
85 JOIN suite s ON ba.suite = s.id
86 WHERE ba.bin = b.id
87 AND s.archive_id = af.archive_id))
88 AS in_use
89 FROM files_archive_map af
90 JOIN binaries b ON af.file_id = b.file
91 GROUP BY af.archive_id, af.file_id, af.component_id
92 )
94 UPDATE files_archive_map af
95 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
96 FROM usage, files f, archive
97 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
98 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
99 AND af.file_id = f.id
100 AND af.archive_id = archive.id
101 RETURNING archive.name, f.filename, af.last_used IS NULL"""
103 res = session.execute(query, {'last_used': now_date})
104 for i in res:
105 op = "set lastused"
106 if i[2]: 106 ↛ 107line 106 didn't jump to line 107, because the condition on line 106 was never true
107 op = "unset lastused"
108 Logger.log([op, i[0], i[1]])
110########################################
113def check_sources(now_date, session):
114 Logger.log(["Checking for orphaned source packages..."])
116 # Get the list of source packages not in a suite and not used by
117 # any binaries.
119 # Check for any sources which are marked for deletion but which
120 # are now used again.
122 # TODO: the UPDATE part is the same as in check_binaries. Merge?
124 query = """
125 WITH usage AS (
126 SELECT
127 af.archive_id AS archive_id,
128 af.file_id AS file_id,
129 af.component_id AS component_id,
130 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa
131 JOIN suite s ON sa.suite = s.id
132 WHERE sa.source = df.source
133 AND s.archive_id = af.archive_id)
134 OR EXISTS (SELECT 1 FROM files_archive_map af_bin
135 JOIN binaries b ON af_bin.file_id = b.file
136 WHERE b.source = df.source
137 AND af_bin.archive_id = af.archive_id
138 AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date))
139 OR EXISTS (SELECT 1 FROM extra_src_references esr
140 JOIN bin_associations ba ON esr.bin_id = ba.bin
141 JOIN binaries b ON ba.bin = b.id
142 JOIN suite s ON ba.suite = s.id
143 WHERE esr.src_id = df.source
144 AND s.archive_id = af.archive_id))
145 AS in_use
146 FROM files_archive_map af
147 JOIN dsc_files df ON af.file_id = df.file
148 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
149 GROUP BY af.archive_id, af.file_id, af.component_id
150 )
152 UPDATE files_archive_map af
153 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
154 FROM usage, files f, archive
155 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
156 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
157 AND af.file_id = f.id
158 AND af.archive_id = archive.id
160 RETURNING archive.name, f.filename, af.last_used IS NULL
161 """
163 res = session.execute(query, {'last_used': now_date})
164 for i in res:
165 op = "set lastused"
166 if i[2]: 166 ↛ 167line 166 didn't jump to line 167, because the condition on line 166 was never true
167 op = "unset lastused"
168 Logger.log([op, i[0], i[1]])
170########################################
173def check_files(now_date, session):
174 # FIXME: this is evil; nothing should ever be in this state. if
175 # they are, it's a bug.
177 # However, we've discovered it happens sometimes so we print a huge warning
178 # and then mark the file for deletion. This probably masks a bug somwhere
179 # else but is better than collecting cruft forever
181 Logger.log(["Checking for unused files..."])
182 q = session.execute("""
183 UPDATE files_archive_map af
184 SET last_used = :last_used
185 FROM files f, archive
186 WHERE af.file_id = f.id
187 AND af.archive_id = archive.id
188 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id)
189 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
190 AND af.last_used IS NULL
191 RETURNING archive.name, f.filename""", {'last_used': now_date})
193 for x in q: 193 ↛ 194line 193 didn't jump to line 194, because the loop on line 193 never started
194 utils.warn("orphaned file: {0}".format(x))
195 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"])
197 if not Options["No-Action"]: 197 ↛ exitline 197 didn't return from function 'check_files', because the condition on line 197 was never false
198 session.commit()
201def clean_binaries(now_date, session):
202 # We do this here so that the binaries we remove will have their
203 # source also removed (if possible).
205 # XXX: why doesn't this remove the files here as well? I don't think it
206 # buys anything keeping this separate
208 Logger.log(["Deleting from binaries table... "])
209 q = session.execute("""
210 DELETE FROM binaries b
211 USING files f
212 WHERE f.id = b.file
213 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
214 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
215 WHERE af.file_id = b.file
216 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
217 RETURNING f.filename
218 """)
219 for b in q:
220 Logger.log(["delete binary", b[0]])
222########################################
225def clean(now_date, archives, max_delete, session):
226 cnf = Config()
228 count = 0
229 size = 0
231 Logger.log(["Cleaning out packages..."])
233 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", 'morgue'))
234 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", 'pool')
236 # Build directory as morguedir/morguesubdir/year/month/day
237 dest = os.path.join(morguedir,
238 morguesubdir,
239 str(now_date.year),
240 '%.2d' % now_date.month,
241 '%.2d' % now_date.day)
243 if not Options["No-Action"] and not os.path.exists(dest):
244 os.makedirs(dest)
246 # Delete from source
247 Logger.log(["Deleting from source table..."])
248 q = session.execute("""
249 WITH
250 deleted_sources AS (
251 DELETE FROM source
252 USING files f
253 WHERE source.file = f.id
254 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
255 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
256 WHERE af.file_id = source.file
257 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
258 RETURNING source.id AS id, f.filename AS filename
259 ),
260 deleted_dsc_files AS (
261 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources)
262 RETURNING df.file AS file_id
263 ),
264 now_unused_source_files AS (
265 UPDATE files_archive_map af
266 SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc.
267 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files)
268 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
269 )
270 SELECT filename FROM deleted_sources""")
271 for s in q: 271 ↛ 272line 271 didn't jump to line 272, because the loop on line 271 never started
272 Logger.log(["delete source", s[0]])
274 if not Options["No-Action"]: 274 ↛ 278line 274 didn't jump to line 278, because the condition on line 274 was never false
275 session.commit()
277 # Delete files from the pool
278 old_files = session.query(ArchiveFile).filter(sql.text('files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)')).join(Archive)
279 if max_delete is not None: 279 ↛ 280line 279 didn't jump to line 280, because the condition on line 279 was never true
280 old_files = old_files.limit(max_delete)
281 Logger.log(["Limiting removals to %d" % max_delete])
283 if archives is not None:
284 archive_ids = [a.archive_id for a in archives]
285 old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids))
287 for af in old_files:
288 filename = af.path
289 try:
290 st = os.lstat(filename)
291 except FileNotFoundError:
292 Logger.log(["database referred to non-existing file", filename])
293 session.delete(af)
294 continue
295 Logger.log(["delete archive file", filename])
296 if stat.S_ISLNK(st.st_mode): 296 ↛ 297line 296 didn't jump to line 297, because the condition on line 296 was never true
297 count += 1
298 Logger.log(["delete symlink", filename])
299 if not Options["No-Action"]:
300 os.unlink(filename)
301 session.delete(af)
302 elif stat.S_ISREG(st.st_mode): 302 ↛ 321line 302 didn't jump to line 321, because the condition on line 302 was never false
303 size += st.st_size
304 count += 1
306 dest_filename = dest + '/' + os.path.basename(filename)
307 # If the destination file exists; try to find another filename to use
308 if os.path.lexists(dest_filename): 308 ↛ 309line 308 didn't jump to line 309, because the condition on line 308 was never true
309 dest_filename = utils.find_next_free(dest_filename)
311 if not Options["No-Action"]: 311 ↛ 287line 311 didn't jump to line 287, because the condition on line 311 was never false
312 if af.archive.use_morgue: 312 ↛ 313line 312 didn't jump to line 313, because the condition on line 312 was never true
313 Logger.log(["move to morgue", filename, dest_filename])
314 utils.move(filename, dest_filename)
315 else:
316 Logger.log(["removed file", filename])
317 os.unlink(filename)
318 session.delete(af)
320 else:
321 utils.fubar("%s is neither symlink nor file?!" % (filename))
323 if count > 0: 323 ↛ 327line 323 didn't jump to line 327, because the condition on line 323 was never false
324 Logger.log(["total", count, utils.size_type(size)])
326 # Delete entries in files no longer referenced by any archive
327 query = """
328 DELETE FROM files f
329 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id)
330 """
331 session.execute(query)
333 if not Options["No-Action"]: 333 ↛ exitline 333 didn't return from function 'clean', because the condition on line 333 was never false
334 session.commit()
336################################################################################
339def clean_maintainers(now_date, session):
340 Logger.log(["Cleaning out unused Maintainer entries..."])
342 # TODO Replace this whole thing with one SQL statement
343 q = session.execute("""
344SELECT m.id, m.name FROM maintainer m
345 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id)
346 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id)
347 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)""")
349 count = 0
351 for i in q.fetchall(): 351 ↛ 352line 351 didn't jump to line 352, because the loop on line 351 never started
352 maintainer_id = i[0]
353 Logger.log(["delete maintainer", i[1]])
354 if not Options["No-Action"]:
355 session.execute("DELETE FROM maintainer WHERE id = :maint", {'maint': maintainer_id})
356 count += 1
358 if not Options["No-Action"]: 358 ↛ 361line 358 didn't jump to line 361, because the condition on line 358 was never false
359 session.commit()
361 if count > 0: 361 ↛ 362line 361 didn't jump to line 362, because the condition on line 361 was never true
362 Logger.log(["total", count])
364################################################################################
367def clean_fingerprints(now_date, session):
368 Logger.log(["Cleaning out unused fingerprint entries..."])
370 # TODO Replace this whole thing with one SQL statement
371 q = session.execute("""
372SELECT f.id, f.fingerprint FROM fingerprint f
373 WHERE f.keyring IS NULL
374 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id)
375 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id)
376 AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)""")
378 count = 0
380 for i in q.fetchall(): 380 ↛ 381line 380 didn't jump to line 381, because the loop on line 380 never started
381 fingerprint_id = i[0]
382 Logger.log(["delete fingerprint", i[1]])
383 if not Options["No-Action"]:
384 session.execute("DELETE FROM fingerprint WHERE id = :fpr", {'fpr': fingerprint_id})
385 count += 1
387 if not Options["No-Action"]: 387 ↛ 390line 387 didn't jump to line 390, because the condition on line 387 was never false
388 session.commit()
390 if count > 0: 390 ↛ 391line 390 didn't jump to line 391, because the condition on line 390 was never true
391 Logger.log(["total", count])
393################################################################################
396def clean_byhash(now_date, session):
397 cnf = Config()
399 Logger.log(["Cleaning out unused by-hash files..."])
401 q = session.execute("""
402 DELETE FROM hashfile h
403 USING suite s, archive a
404 WHERE s.id = h.suite_id
405 AND a.id = s.archive_id
406 AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP
407 RETURNING a.path, s.suite_name, h.path""")
408 count = q.rowcount
410 if not Options["No-Action"]: 410 ↛ 424line 410 didn't jump to line 424, because the condition on line 410 was never false
411 for base, suite, path in q: 411 ↛ 412line 411 didn't jump to line 412, because the loop on line 411 never started
412 suite_suffix = utils.suite_suffix(suite)
413 filename = os.path.join(base, 'dists', suite, suite_suffix, path)
414 try:
415 os.unlink(filename)
416 except OSError as exc:
417 if exc.errno != errno.ENOENT:
418 raise
419 Logger.log(['database referred to non-existing file', filename])
420 else:
421 Logger.log(['delete hashfile', suite, path])
422 session.commit()
424 if count > 0: 424 ↛ 425line 424 didn't jump to line 425, because the condition on line 424 was never true
425 Logger.log(["total", count])
427################################################################################
430def clean_empty_directories(session):
431 """
432 Removes empty directories from pool directories.
433 """
435 Logger.log(["Cleaning out empty directories..."])
437 count = 0
439 cursor = session.execute(
440 """SELECT DISTINCT(path) FROM archive"""
441 )
442 bases = [x[0] for x in cursor.fetchall()]
444 for base in bases:
445 for dirpath, dirnames, filenames in os.walk(base, topdown=False):
446 if not filenames and not dirnames:
447 to_remove = os.path.join(base, dirpath)
448 if not Options["No-Action"]: 448 ↛ 451line 448 didn't jump to line 451, because the condition on line 448 was never false
449 Logger.log(["removing directory", to_remove])
450 os.removedirs(to_remove)
451 count += 1
453 if count: 453 ↛ exitline 453 didn't return from function 'clean_empty_directories', because the condition on line 453 was never false
454 Logger.log(["total removed directories", count])
456################################################################################
459def set_archive_delete_dates(now_date, session):
460 session.execute("""
461 CREATE TEMPORARY TABLE archive_delete_date (
462 archive_id INT NOT NULL,
463 delete_date TIMESTAMP NOT NULL
464 )""")
466 session.execute("""
467 INSERT INTO archive_delete_date
468 (archive_id, delete_date)
469 SELECT
470 archive.id, :now_date - archive.stayofexecution
471 FROM archive""", {'now_date': now_date})
473 session.flush()
475################################################################################
478def main():
479 global Options, Logger
481 cnf = Config()
483 for i in ["Help", "No-Action", "Maximum"]:
484 key = "Clean-Suites::Options::%s" % i
485 if key not in cnf: 485 ↛ 483line 485 didn't jump to line 483, because the condition on line 485 was never false
486 cnf[key] = ""
488 Arguments = [('h', "help", "Clean-Suites::Options::Help"),
489 ('a', 'archive', 'Clean-Suites::Options::Archive', 'HasArg'),
490 ('n', "no-action", "Clean-Suites::Options::No-Action"),
491 ('m', "maximum", "Clean-Suites::Options::Maximum", "HasArg")]
493 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
494 Options = cnf.subtree("Clean-Suites::Options")
496 if cnf["Clean-Suites::Options::Maximum"] != "": 496 ↛ 497line 496 didn't jump to line 497, because the condition on line 496 was never true
497 try:
498 # Only use Maximum if it's an integer
499 max_delete = int(cnf["Clean-Suites::Options::Maximum"])
500 if max_delete < 1:
501 utils.fubar("If given, Maximum must be at least 1")
502 except ValueError as e:
503 utils.fubar("If given, Maximum must be an integer")
504 else:
505 max_delete = None
507 if Options["Help"]:
508 usage()
510 program = "clean-suites"
511 if Options['No-Action']: 511 ↛ 512line 511 didn't jump to line 512, because the condition on line 511 was never true
512 program = "clean-suites (no action)"
513 Logger = daklog.Logger(program, debug=Options["No-Action"])
515 session = DBConn().session()
517 archives = None
518 if 'Archive' in Options:
519 archive_names = Options['Archive'].split(',')
520 archives = session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all()
521 if len(archives) == 0: 521 ↛ 522line 521 didn't jump to line 522, because the condition on line 521 was never true
522 utils.fubar('Unknown archive.')
524 now_date = datetime.now()
526 set_archive_delete_dates(now_date, session)
528 check_binaries(now_date, session)
529 clean_binaries(now_date, session)
530 check_sources(now_date, session)
531 check_files(now_date, session)
532 clean(now_date, archives, max_delete, session)
533 clean_maintainers(now_date, session)
534 clean_fingerprints(now_date, session)
535 clean_byhash(now_date, session)
536 clean_empty_directories(session)
538 session.rollback()
540 Logger.close()
542################################################################################
545if __name__ == '__main__':
546 main()