1
2
3 """ Cleans up unassociated binary and source packages
4
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2000, 2001, 2002, 2003, 2006 James Troup <james@nocrew.org>
7 @copyright: 2009 Mark Hymers <mhy@debian.org>
8 @copyright: 2010 Joerg Jaspert <joerg@debian.org>
9 @license: GNU General Public License version 2 or later
10 """
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 import errno
38 import os
39 import sqlalchemy.sql as sql
40 import stat
41 import sys
42 import apt_pkg
43 from datetime import datetime
44
45 from daklib.config import Config
46 from daklib.dbconn import *
47 from daklib import utils
48 from daklib import daklog
49
50
51
52 Options = None
53 Logger = None
54
55
56
57
59 print("""Usage: dak clean-suites [OPTIONS]
60 Clean old packages from suites.
61
62 -n, --no-action don't do anything
63 -h, --help show this help and exit
64 -m, --maximum maximum number of files to remove""")
65 sys.exit(exit_code)
66
67
68
69
71 Logger.log(["Checking for orphaned binary packages..."])
72
73
74
75
76
77
78 query = """
79 WITH usage AS (
80 SELECT
81 af.archive_id AS archive_id,
82 af.file_id AS file_id,
83 af.component_id AS component_id,
84 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba
85 JOIN suite s ON ba.suite = s.id
86 WHERE ba.bin = b.id
87 AND s.archive_id = af.archive_id))
88 AS in_use
89 FROM files_archive_map af
90 JOIN binaries b ON af.file_id = b.file
91 GROUP BY af.archive_id, af.file_id, af.component_id
92 )
93
94 UPDATE files_archive_map af
95 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
96 FROM usage, files f, archive
97 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
98 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
99 AND af.file_id = f.id
100 AND af.archive_id = archive.id
101 RETURNING archive.name, f.filename, af.last_used IS NULL"""
102
103 res = session.execute(query, {'last_used': now_date})
104 for i in res:
105 op = "set lastused"
106 if i[2]:
107 op = "unset lastused"
108 Logger.log([op, i[0], i[1]])
109
110
111
112
114 Logger.log(["Checking for orphaned source packages..."])
115
116
117
118
119
120
121
122
123
124 query = """
125 WITH usage AS (
126 SELECT
127 af.archive_id AS archive_id,
128 af.file_id AS file_id,
129 af.component_id AS component_id,
130 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa
131 JOIN suite s ON sa.suite = s.id
132 WHERE sa.source = df.source
133 AND s.archive_id = af.archive_id)
134 OR EXISTS (SELECT 1 FROM files_archive_map af_bin
135 JOIN binaries b ON af_bin.file_id = b.file
136 WHERE b.source = df.source
137 AND af_bin.archive_id = af.archive_id
138 AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date))
139 OR EXISTS (SELECT 1 FROM extra_src_references esr
140 JOIN bin_associations ba ON esr.bin_id = ba.bin
141 JOIN binaries b ON ba.bin = b.id
142 JOIN suite s ON ba.suite = s.id
143 WHERE esr.src_id = df.source
144 AND s.archive_id = af.archive_id))
145 AS in_use
146 FROM files_archive_map af
147 JOIN dsc_files df ON af.file_id = df.file
148 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
149 GROUP BY af.archive_id, af.file_id, af.component_id
150 )
151
152 UPDATE files_archive_map af
153 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
154 FROM usage, files f, archive
155 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
156 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
157 AND af.file_id = f.id
158 AND af.archive_id = archive.id
159
160 RETURNING archive.name, f.filename, af.last_used IS NULL
161 """
162
163 res = session.execute(query, {'last_used': now_date})
164 for i in res:
165 op = "set lastused"
166 if i[2]:
167 op = "unset lastused"
168 Logger.log([op, i[0], i[1]])
169
170
171
172
174
175
176
177
178
179
180
181 Logger.log(["Checking for unused files..."])
182 q = session.execute("""
183 UPDATE files_archive_map af
184 SET last_used = :last_used
185 FROM files f, archive
186 WHERE af.file_id = f.id
187 AND af.archive_id = archive.id
188 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id)
189 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
190 AND af.last_used IS NULL
191 RETURNING archive.name, f.filename""", {'last_used': now_date})
192
193 for x in q:
194 utils.warn("orphaned file: {0}".format(x))
195 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"])
196
197 if not Options["No-Action"]:
198 session.commit()
199
200
202
203
204
205
206
207
208 Logger.log(["Deleting from binaries table... "])
209 q = session.execute("""
210 DELETE FROM binaries b
211 USING files f
212 WHERE f.id = b.file
213 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
214 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
215 WHERE af.file_id = b.file
216 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
217 RETURNING f.filename
218 """)
219 for b in q:
220 Logger.log(["delete binary", b[0]])
221
222
223
224
225 -def clean(now_date, archives, max_delete, session):
226 cnf = Config()
227
228 count = 0
229 size = 0
230
231 Logger.log(["Cleaning out packages..."])
232
233 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", 'morgue'))
234 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", 'pool')
235
236
237 dest = os.path.join(morguedir,
238 morguesubdir,
239 str(now_date.year),
240 '%.2d' % now_date.month,
241 '%.2d' % now_date.day)
242
243 if not Options["No-Action"] and not os.path.exists(dest):
244 os.makedirs(dest)
245
246
247 Logger.log(["Deleting from source table..."])
248 q = session.execute("""
249 WITH
250 deleted_sources AS (
251 DELETE FROM source
252 USING files f
253 WHERE source.file = f.id
254 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
255 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
256 WHERE af.file_id = source.file
257 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
258 RETURNING source.id AS id, f.filename AS filename
259 ),
260 deleted_dsc_files AS (
261 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources)
262 RETURNING df.file AS file_id
263 ),
264 now_unused_source_files AS (
265 UPDATE files_archive_map af
266 SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc.
267 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files)
268 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
269 )
270 SELECT filename FROM deleted_sources""")
271 for s in q:
272 Logger.log(["delete source", s[0]])
273
274 if not Options["No-Action"]:
275 session.commit()
276
277
278 old_files = session.query(ArchiveFile).filter(sql.text('files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)')).join(Archive)
279 if max_delete is not None:
280 old_files = old_files.limit(max_delete)
281 Logger.log(["Limiting removals to %d" % max_delete])
282
283 if archives is not None:
284 archive_ids = [a.archive_id for a in archives]
285 old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids))
286
287 for af in old_files:
288 filename = af.path
289 try:
290 st = os.lstat(filename)
291 except FileNotFoundError:
292 Logger.log(["database referred to non-existing file", filename])
293 session.delete(af)
294 continue
295 Logger.log(["delete archive file", filename])
296 if stat.S_ISLNK(st.st_mode):
297 count += 1
298 Logger.log(["delete symlink", filename])
299 if not Options["No-Action"]:
300 os.unlink(filename)
301 session.delete(af)
302 elif stat.S_ISREG(st.st_mode):
303 size += st.st_size
304 count += 1
305
306 dest_filename = dest + '/' + os.path.basename(filename)
307
308 if os.path.lexists(dest_filename):
309 dest_filename = utils.find_next_free(dest_filename)
310
311 if not Options["No-Action"]:
312 if af.archive.use_morgue:
313 Logger.log(["move to morgue", filename, dest_filename])
314 utils.move(filename, dest_filename)
315 else:
316 Logger.log(["removed file", filename])
317 os.unlink(filename)
318 session.delete(af)
319
320 else:
321 utils.fubar("%s is neither symlink nor file?!" % (filename))
322
323 if count > 0:
324 Logger.log(["total", count, utils.size_type(size)])
325
326
327 query = """
328 DELETE FROM files f
329 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id)
330 """
331 session.execute(query)
332
333 if not Options["No-Action"]:
334 session.commit()
335
336
337
338
339 -def clean_maintainers(now_date, session):
340 Logger.log(["Cleaning out unused Maintainer entries..."])
341
342
343 q = session.execute("""
344 SELECT m.id, m.name FROM maintainer m
345 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id)
346 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id)
347 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)""")
348
349 count = 0
350
351 for i in q.fetchall():
352 maintainer_id = i[0]
353 Logger.log(["delete maintainer", i[1]])
354 if not Options["No-Action"]:
355 session.execute("DELETE FROM maintainer WHERE id = :maint", {'maint': maintainer_id})
356 count += 1
357
358 if not Options["No-Action"]:
359 session.commit()
360
361 if count > 0:
362 Logger.log(["total", count])
363
364
365
366
368 Logger.log(["Cleaning out unused fingerprint entries..."])
369
370
371 q = session.execute("""
372 SELECT f.id, f.fingerprint FROM fingerprint f
373 WHERE f.keyring IS NULL
374 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id)
375 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id)
376 AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)""")
377
378 count = 0
379
380 for i in q.fetchall():
381 fingerprint_id = i[0]
382 Logger.log(["delete fingerprint", i[1]])
383 if not Options["No-Action"]:
384 session.execute("DELETE FROM fingerprint WHERE id = :fpr", {'fpr': fingerprint_id})
385 count += 1
386
387 if not Options["No-Action"]:
388 session.commit()
389
390 if count > 0:
391 Logger.log(["total", count])
392
393
394
395
397 cnf = Config()
398
399 Logger.log(["Cleaning out unused by-hash files..."])
400
401 q = session.execute("""
402 DELETE FROM hashfile h
403 USING suite s, archive a
404 WHERE s.id = h.suite_id
405 AND a.id = s.archive_id
406 AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP
407 RETURNING a.path, s.suite_name, h.path""")
408 count = q.rowcount
409
410 if not Options["No-Action"]:
411 for base, suite, path in q:
412 suite_suffix = utils.suite_suffix(suite)
413 filename = os.path.join(base, 'dists', suite, suite_suffix, path)
414 try:
415 os.unlink(filename)
416 except OSError as exc:
417 if exc.errno != errno.ENOENT:
418 raise
419 Logger.log(['database referred to non-existing file', filename])
420 else:
421 Logger.log(['delete hashfile', suite, path])
422 session.commit()
423
424 if count > 0:
425 Logger.log(["total", count])
426
427
428
429
431 """
432 Removes empty directories from pool directories.
433 """
434
435 Logger.log(["Cleaning out empty directories..."])
436
437 count = 0
438
439 cursor = session.execute(
440 """SELECT DISTINCT(path) FROM archive"""
441 )
442 bases = [x[0] for x in cursor.fetchall()]
443
444 for base in bases:
445 for dirpath, dirnames, filenames in os.walk(base, topdown=False):
446 if not filenames and not dirnames:
447 to_remove = os.path.join(base, dirpath)
448 if not Options["No-Action"]:
449 Logger.log(["removing directory", to_remove])
450 os.removedirs(to_remove)
451 count += 1
452
453 if count:
454 Logger.log(["total removed directories", count])
455
456
457
458
460 session.execute("""
461 CREATE TEMPORARY TABLE archive_delete_date (
462 archive_id INT NOT NULL,
463 delete_date TIMESTAMP NOT NULL
464 )""")
465
466 session.execute("""
467 INSERT INTO archive_delete_date
468 (archive_id, delete_date)
469 SELECT
470 archive.id, :now_date - archive.stayofexecution
471 FROM archive""", {'now_date': now_date})
472
473 session.flush()
474
475
476
477
479 global Options, Logger
480
481 cnf = Config()
482
483 for i in ["Help", "No-Action", "Maximum"]:
484 key = "Clean-Suites::Options::%s" % i
485 if key not in cnf:
486 cnf[key] = ""
487
488 Arguments = [('h', "help", "Clean-Suites::Options::Help"),
489 ('a', 'archive', 'Clean-Suites::Options::Archive', 'HasArg'),
490 ('n', "no-action", "Clean-Suites::Options::No-Action"),
491 ('m', "maximum", "Clean-Suites::Options::Maximum", "HasArg")]
492
493 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
494 Options = cnf.subtree("Clean-Suites::Options")
495
496 if cnf["Clean-Suites::Options::Maximum"] != "":
497 try:
498
499 max_delete = int(cnf["Clean-Suites::Options::Maximum"])
500 if max_delete < 1:
501 utils.fubar("If given, Maximum must be at least 1")
502 except ValueError as e:
503 utils.fubar("If given, Maximum must be an integer")
504 else:
505 max_delete = None
506
507 if Options["Help"]:
508 usage()
509
510 program = "clean-suites"
511 if Options['No-Action']:
512 program = "clean-suites (no action)"
513 Logger = daklog.Logger(program, debug=Options["No-Action"])
514
515 session = DBConn().session()
516
517 archives = None
518 if 'Archive' in Options:
519 archive_names = Options['Archive'].split(',')
520 archives = session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all()
521 if len(archives) == 0:
522 utils.fubar('Unknown archive.')
523
524 now_date = datetime.now()
525
526 set_archive_delete_dates(now_date, session)
527
528 check_binaries(now_date, session)
529 clean_binaries(now_date, session)
530 check_sources(now_date, session)
531 check_files(now_date, session)
532 clean(now_date, archives, max_delete, session)
533 clean_maintainers(now_date, session)
534 clean_fingerprints(now_date, session)
535 clean_byhash(now_date, session)
536 clean_empty_directories(session)
537
538 session.rollback()
539
540 Logger.close()
541
542
543
544
545 if __name__ == '__main__':
546 main()
547