1
2
3 """Cleans up unassociated binary and source packages
4
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2000, 2001, 2002, 2003, 2006 James Troup <james@nocrew.org>
7 @copyright: 2009 Mark Hymers <mhy@debian.org>
8 @copyright: 2010 Joerg Jaspert <joerg@debian.org>
9 @license: GNU General Public License version 2 or later
10 """
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 import errno
38 import os
39 import stat
40 import sys
41 from datetime import datetime
42
43 import apt_pkg
44 import sqlalchemy.sql as sql
45
46 from daklib import daklog, utils
47 from daklib.config import Config
48 from daklib.dbconn import Archive, ArchiveFile, DBConn
49
50
51
52 Options = None
53 Logger = None
54
55
56
57
59 print(
60 """Usage: dak clean-suites [OPTIONS]
61 Clean old packages from suites.
62
63 -n, --no-action don't do anything
64 -h, --help show this help and exit
65 -m, --maximum maximum number of files to remove"""
66 )
67 sys.exit(exit_code)
68
69
70
71
72
74 Logger.log(["Checking for orphaned binary packages..."])
75
76
77
78
79
80
81 query = """
82 WITH usage AS (
83 SELECT
84 af.archive_id AS archive_id,
85 af.file_id AS file_id,
86 af.component_id AS component_id,
87 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba
88 JOIN suite s ON ba.suite = s.id
89 WHERE ba.bin = b.id
90 AND s.archive_id = af.archive_id))
91 AS in_use
92 FROM files_archive_map af
93 JOIN binaries b ON af.file_id = b.file
94 GROUP BY af.archive_id, af.file_id, af.component_id
95 )
96
97 UPDATE files_archive_map af
98 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
99 FROM usage, files f, archive
100 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
101 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
102 AND af.file_id = f.id
103 AND af.archive_id = archive.id
104 RETURNING archive.name, f.filename, af.last_used IS NULL"""
105
106 res = session.execute(query, {"last_used": now_date})
107 for i in res:
108 op = "set lastused"
109 if i[2]:
110 op = "unset lastused"
111 Logger.log([op, i[0], i[1]])
112
113
114
115
116
118 Logger.log(["Checking for orphaned source packages..."])
119
120
121
122
123
124
125
126
127
128 query = """
129 WITH usage AS (
130 SELECT
131 af.archive_id AS archive_id,
132 af.file_id AS file_id,
133 af.component_id AS component_id,
134 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa
135 JOIN suite s ON sa.suite = s.id
136 WHERE sa.source = df.source
137 AND s.archive_id = af.archive_id)
138 OR EXISTS (SELECT 1 FROM files_archive_map af_bin
139 JOIN binaries b ON af_bin.file_id = b.file
140 WHERE b.source = df.source
141 AND af_bin.archive_id = af.archive_id
142 AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date))
143 OR EXISTS (SELECT 1 FROM extra_src_references esr
144 JOIN bin_associations ba ON esr.bin_id = ba.bin
145 JOIN binaries b ON ba.bin = b.id
146 JOIN suite s ON ba.suite = s.id
147 WHERE esr.src_id = df.source
148 AND s.archive_id = af.archive_id))
149 AS in_use
150 FROM files_archive_map af
151 JOIN dsc_files df ON af.file_id = df.file
152 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
153 GROUP BY af.archive_id, af.file_id, af.component_id
154 )
155
156 UPDATE files_archive_map af
157 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END
158 FROM usage, files f, archive
159 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id
160 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use))
161 AND af.file_id = f.id
162 AND af.archive_id = archive.id
163
164 RETURNING archive.name, f.filename, af.last_used IS NULL
165 """
166
167 res = session.execute(query, {"last_used": now_date})
168 for i in res:
169 op = "set lastused"
170 if i[2]:
171 op = "unset lastused"
172 Logger.log([op, i[0], i[1]])
173
174
175
176
177
179
180
181
182
183
184
185
186 Logger.log(["Checking for unused files..."])
187 q = session.execute(
188 """
189 UPDATE files_archive_map af
190 SET last_used = :last_used
191 FROM files f, archive
192 WHERE af.file_id = f.id
193 AND af.archive_id = archive.id
194 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id)
195 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
196 AND af.last_used IS NULL
197 RETURNING archive.name, f.filename""",
198 {"last_used": now_date},
199 )
200
201 for x in q:
202 utils.warn("orphaned file: {0}".format(x))
203 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"])
204
205 if not Options["No-Action"]:
206 session.commit()
207
208
210
211
212
213
214
215
216 Logger.log(["Deleting from binaries table... "])
217 q = session.execute(
218 """
219 DELETE FROM binaries b
220 USING files f
221 WHERE f.id = b.file
222 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
223 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
224 WHERE af.file_id = b.file
225 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
226 RETURNING f.filename
227 """
228 )
229 for b in q:
230 Logger.log(["delete binary", b[0]])
231
232
233
234
235
236 -def clean(now_date, archives, max_delete, session):
237 cnf = Config()
238
239 count = 0
240 size = 0
241
242 Logger.log(["Cleaning out packages..."])
243
244 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", "morgue"))
245 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", "pool")
246
247
248 dest = os.path.join(
249 morguedir,
250 morguesubdir,
251 str(now_date.year),
252 "%.2d" % now_date.month,
253 "%.2d" % now_date.day,
254 )
255
256 if not Options["No-Action"] and not os.path.exists(dest):
257 os.makedirs(dest)
258
259
260 Logger.log(["Deleting from source table..."])
261 q = session.execute(
262 """
263 WITH
264 deleted_sources AS (
265 DELETE FROM source
266 USING files f
267 WHERE source.file = f.id
268 AND NOT EXISTS (SELECT 1 FROM files_archive_map af
269 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id
270 WHERE af.file_id = source.file
271 AND (af.last_used IS NULL OR af.last_used > ad.delete_date))
272 RETURNING source.id AS id, f.filename AS filename
273 ),
274 deleted_dsc_files AS (
275 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources)
276 RETURNING df.file AS file_id
277 ),
278 now_unused_source_files AS (
279 UPDATE files_archive_map af
280 SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc.
281 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files)
282 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id)
283 )
284 SELECT filename FROM deleted_sources"""
285 )
286 for s in q:
287 Logger.log(["delete source", s[0]])
288
289 if not Options["No-Action"]:
290 session.commit()
291
292
293 old_files = (
294 session.query(ArchiveFile)
295 .filter(
296 sql.text(
297 "files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)"
298 )
299 )
300 .join(Archive)
301 )
302 if max_delete is not None:
303 old_files = old_files.limit(max_delete)
304 Logger.log(["Limiting removals to %d" % max_delete])
305
306 if archives is not None:
307 archive_ids = [a.archive_id for a in archives]
308 old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids))
309
310 for af in old_files:
311 filename = af.path
312 try:
313 st = os.lstat(filename)
314 except FileNotFoundError:
315 Logger.log(["database referred to non-existing file", filename])
316 session.delete(af)
317 continue
318 Logger.log(["delete archive file", filename])
319 if stat.S_ISLNK(st.st_mode):
320 count += 1
321 Logger.log(["delete symlink", filename])
322 if not Options["No-Action"]:
323 os.unlink(filename)
324 session.delete(af)
325 elif stat.S_ISREG(st.st_mode):
326 size += st.st_size
327 count += 1
328
329 dest_filename = dest + "/" + os.path.basename(filename)
330
331 if os.path.lexists(dest_filename):
332 dest_filename = utils.find_next_free(dest_filename)
333
334 if not Options["No-Action"]:
335 if af.archive.use_morgue:
336 Logger.log(["move to morgue", filename, dest_filename])
337 utils.move(filename, dest_filename)
338 else:
339 Logger.log(["removed file", filename])
340 os.unlink(filename)
341 session.delete(af)
342
343 else:
344 utils.fubar("%s is neither symlink nor file?!" % (filename))
345
346 if count > 0:
347 Logger.log(["total", count, utils.size_type(size)])
348
349
350 query = """
351 DELETE FROM files f
352 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id)
353 """
354 session.execute(query)
355
356 if not Options["No-Action"]:
357 session.commit()
358
359
360
361
362
363 -def clean_maintainers(now_date, session):
364 Logger.log(["Cleaning out unused Maintainer entries..."])
365
366
367 q = session.execute(
368 """
369 SELECT m.id, m.name FROM maintainer m
370 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id)
371 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id)
372 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)"""
373 )
374
375 count = 0
376
377 for i in q.fetchall():
378 maintainer_id = i[0]
379 Logger.log(["delete maintainer", i[1]])
380 if not Options["No-Action"]:
381 session.execute(
382 "DELETE FROM maintainer WHERE id = :maint", {"maint": maintainer_id}
383 )
384 count += 1
385
386 if not Options["No-Action"]:
387 session.commit()
388
389 if count > 0:
390 Logger.log(["total", count])
391
392
393
394
395
397 Logger.log(["Cleaning out unused fingerprint entries..."])
398
399
400 q = session.execute(
401 """
402 SELECT f.id, f.fingerprint FROM fingerprint f
403 WHERE f.keyring IS NULL
404 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id)
405 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id)
406 AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)"""
407 )
408
409 count = 0
410
411 for i in q.fetchall():
412 fingerprint_id = i[0]
413 Logger.log(["delete fingerprint", i[1]])
414 if not Options["No-Action"]:
415 session.execute(
416 "DELETE FROM fingerprint WHERE id = :fpr", {"fpr": fingerprint_id}
417 )
418 count += 1
419
420 if not Options["No-Action"]:
421 session.commit()
422
423 if count > 0:
424 Logger.log(["total", count])
425
426
427
428
429
431 Logger.log(["Cleaning out unused by-hash files..."])
432
433 q = session.execute(
434 """
435 DELETE FROM hashfile h
436 USING suite s, archive a
437 WHERE s.id = h.suite_id
438 AND a.id = s.archive_id
439 AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP
440 RETURNING a.path, s.suite_name, h.path"""
441 )
442 count = q.rowcount
443
444 if not Options["No-Action"]:
445 for base, suite, path in q:
446 suite_suffix = utils.suite_suffix(suite)
447 filename = os.path.join(base, "dists", suite, suite_suffix, path)
448 try:
449 os.unlink(filename)
450 except OSError as exc:
451 if exc.errno != errno.ENOENT:
452 raise
453 Logger.log(["database referred to non-existing file", filename])
454 else:
455 Logger.log(["delete hashfile", suite, path])
456 session.commit()
457
458 if count > 0:
459 Logger.log(["total", count])
460
461
462
463
464
466 """
467 Removes empty directories from pool directories.
468 """
469
470 Logger.log(["Cleaning out empty directories..."])
471
472 count = 0
473
474 cursor = session.execute("""SELECT DISTINCT(path) FROM archive""")
475 bases = [x[0] for x in cursor.fetchall()]
476
477 for base in bases:
478 for dirpath, dirnames, filenames in os.walk(base, topdown=False):
479 if not filenames and not dirnames:
480 to_remove = os.path.join(base, dirpath)
481 if not Options["No-Action"]:
482 Logger.log(["removing directory", to_remove])
483 os.removedirs(to_remove)
484 count += 1
485
486 if count:
487 Logger.log(["total removed directories", count])
488
489
490
491
492
494 session.execute(
495 """
496 CREATE TEMPORARY TABLE archive_delete_date (
497 archive_id INT NOT NULL,
498 delete_date TIMESTAMP NOT NULL
499 )"""
500 )
501
502 session.execute(
503 """
504 INSERT INTO archive_delete_date
505 (archive_id, delete_date)
506 SELECT
507 archive.id, :now_date - archive.stayofexecution
508 FROM archive""",
509 {"now_date": now_date},
510 )
511
512 session.flush()
513
514
515
516
517
519 global Options, Logger
520
521 cnf = Config()
522
523 for i in ["Help", "No-Action", "Maximum"]:
524 key = "Clean-Suites::Options::%s" % i
525 if key not in cnf:
526 cnf[key] = ""
527
528 Arguments = [
529 ("h", "help", "Clean-Suites::Options::Help"),
530 ("a", "archive", "Clean-Suites::Options::Archive", "HasArg"),
531 ("n", "no-action", "Clean-Suites::Options::No-Action"),
532 ("m", "maximum", "Clean-Suites::Options::Maximum", "HasArg"),
533 ]
534
535 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
536 Options = cnf.subtree("Clean-Suites::Options")
537
538 if cnf["Clean-Suites::Options::Maximum"] != "":
539 try:
540
541 max_delete = int(cnf["Clean-Suites::Options::Maximum"])
542 if max_delete < 1:
543 utils.fubar("If given, Maximum must be at least 1")
544 except ValueError:
545 utils.fubar("If given, Maximum must be an integer")
546 else:
547 max_delete = None
548
549 if Options["Help"]:
550 usage()
551
552 program = "clean-suites"
553 if Options["No-Action"]:
554 program = "clean-suites (no action)"
555 Logger = daklog.Logger(program, debug=Options["No-Action"])
556
557 session = DBConn().session()
558
559 archives = None
560 if "Archive" in Options:
561 archive_names = Options["Archive"].split(",")
562 archives = (
563 session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all()
564 )
565 if len(archives) == 0:
566 utils.fubar("Unknown archive.")
567
568 now_date = datetime.now()
569
570 set_archive_delete_dates(now_date, session)
571
572 check_binaries(now_date, session)
573 clean_binaries(now_date, session)
574 check_sources(now_date, session)
575 check_files(now_date, session)
576 clean(now_date, archives, max_delete, session)
577 clean_maintainers(now_date, session)
578 clean_fingerprints(now_date, session)
579 clean_byhash(now_date, session)
580 clean_empty_directories(session)
581
582 session.rollback()
583
584 Logger.close()
585
586
587
588
589
590 if __name__ == "__main__":
591 main()
592