Package dak :: Module clean_suites
[hide private]
[frames] | no frames]

Source Code for Module dak.clean_suites

  1  #! /usr/bin/env python3 
  2   
  3  """Cleans up unassociated binary and source packages 
  4   
  5  @contact: Debian FTPMaster <ftpmaster@debian.org> 
  6  @copyright: 2000, 2001, 2002, 2003, 2006  James Troup <james@nocrew.org> 
  7  @copyright: 2009  Mark Hymers <mhy@debian.org> 
  8  @copyright: 2010  Joerg Jaspert <joerg@debian.org> 
  9  @license: GNU General Public License version 2 or later 
 10  """ 
 11   
 12  # This program is free software; you can redistribute it and/or modify 
 13  # it under the terms of the GNU General Public License as published by 
 14  # the Free Software Foundation; either version 2 of the License, or 
 15  # (at your option) any later version. 
 16   
 17  # This program is distributed in the hope that it will be useful, 
 18  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 19  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 20  # GNU General Public License for more details. 
 21   
 22  # You should have received a copy of the GNU General Public License 
 23  # along with this program; if not, write to the Free Software 
 24  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 25   
 26  ################################################################################ 
 27   
 28  # 07:05|<elmo> well.. *shrug*.. no, probably not.. but to fix it, 
 29  #      |       we're going to have to implement reference counting 
 30  #      |       through dependencies.. do we really want to go down 
 31  #      |       that road? 
 32  # 
 33  # 07:05|<Culus> elmo: Augh! <brain jumps out of skull> 
 34   
 35  ################################################################################ 
 36   
 37  import errno 
 38  import os 
 39  import stat 
 40  import sys 
 41  from datetime import datetime 
 42   
 43  import apt_pkg 
 44  import sqlalchemy.sql as sql 
 45   
 46  from daklib import daklog, utils 
 47  from daklib.config import Config 
 48  from daklib.dbconn import Archive, ArchiveFile, DBConn 
 49   
 50  ################################################################################ 
 51   
 52  Options = None 
 53  Logger = None 
 54   
 55  ################################################################################ 
 56   
 57   
58 -def usage(exit_code=0):
59 print( 60 """Usage: dak clean-suites [OPTIONS] 61 Clean old packages from suites. 62 63 -n, --no-action don't do anything 64 -h, --help show this help and exit 65 -m, --maximum maximum number of files to remove""" 66 ) 67 sys.exit(exit_code)
68 69 70 ################################################################################ 71 72
73 -def check_binaries(now_date, session):
74 Logger.log(["Checking for orphaned binary packages..."]) 75 76 # Get the list of binary packages not in a suite and mark them for 77 # deletion. 78 # Check for any binaries which are marked for eventual deletion 79 # but are now used again. 80 81 query = """ 82 WITH usage AS ( 83 SELECT 84 af.archive_id AS archive_id, 85 af.file_id AS file_id, 86 af.component_id AS component_id, 87 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba 88 JOIN suite s ON ba.suite = s.id 89 WHERE ba.bin = b.id 90 AND s.archive_id = af.archive_id)) 91 AS in_use 92 FROM files_archive_map af 93 JOIN binaries b ON af.file_id = b.file 94 GROUP BY af.archive_id, af.file_id, af.component_id 95 ) 96 97 UPDATE files_archive_map af 98 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END 99 FROM usage, files f, archive 100 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id 101 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) 102 AND af.file_id = f.id 103 AND af.archive_id = archive.id 104 RETURNING archive.name, f.filename, af.last_used IS NULL""" 105 106 res = session.execute(query, {"last_used": now_date}) 107 for i in res: 108 op = "set lastused" 109 if i[2]: 110 op = "unset lastused" 111 Logger.log([op, i[0], i[1]])
112 113 114 ######################################## 115 116
117 -def check_sources(now_date, session):
118 Logger.log(["Checking for orphaned source packages..."]) 119 120 # Get the list of source packages not in a suite and not used by 121 # any binaries. 122 123 # Check for any sources which are marked for deletion but which 124 # are now used again. 125 126 # TODO: the UPDATE part is the same as in check_binaries. Merge? 127 128 query = """ 129 WITH usage AS ( 130 SELECT 131 af.archive_id AS archive_id, 132 af.file_id AS file_id, 133 af.component_id AS component_id, 134 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa 135 JOIN suite s ON sa.suite = s.id 136 WHERE sa.source = df.source 137 AND s.archive_id = af.archive_id) 138 OR EXISTS (SELECT 1 FROM files_archive_map af_bin 139 JOIN binaries b ON af_bin.file_id = b.file 140 WHERE b.source = df.source 141 AND af_bin.archive_id = af.archive_id 142 AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date)) 143 OR EXISTS (SELECT 1 FROM extra_src_references esr 144 JOIN bin_associations ba ON esr.bin_id = ba.bin 145 JOIN binaries b ON ba.bin = b.id 146 JOIN suite s ON ba.suite = s.id 147 WHERE esr.src_id = df.source 148 AND s.archive_id = af.archive_id)) 149 AS in_use 150 FROM files_archive_map af 151 JOIN dsc_files df ON af.file_id = df.file 152 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 153 GROUP BY af.archive_id, af.file_id, af.component_id 154 ) 155 156 UPDATE files_archive_map af 157 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END 158 FROM usage, files f, archive 159 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id 160 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) 161 AND af.file_id = f.id 162 AND af.archive_id = archive.id 163 164 RETURNING archive.name, f.filename, af.last_used IS NULL 165 """ 166 167 res = session.execute(query, {"last_used": now_date}) 168 for i in res: 169 op = "set lastused" 170 if i[2]: 171 op = "unset lastused" 172 Logger.log([op, i[0], i[1]])
173 174 175 ######################################## 176 177
178 -def check_files(now_date, session):
179 # FIXME: this is evil; nothing should ever be in this state. if 180 # they are, it's a bug. 181 182 # However, we've discovered it happens sometimes so we print a huge warning 183 # and then mark the file for deletion. This probably masks a bug somwhere 184 # else but is better than collecting cruft forever 185 186 Logger.log(["Checking for unused files..."]) 187 q = session.execute( 188 """ 189 UPDATE files_archive_map af 190 SET last_used = :last_used 191 FROM files f, archive 192 WHERE af.file_id = f.id 193 AND af.archive_id = archive.id 194 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id) 195 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) 196 AND af.last_used IS NULL 197 RETURNING archive.name, f.filename""", 198 {"last_used": now_date}, 199 ) 200 201 for x in q: 202 utils.warn("orphaned file: {0}".format(x)) 203 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"]) 204 205 if not Options["No-Action"]: 206 session.commit()
207 208
209 -def clean_binaries(now_date, session):
210 # We do this here so that the binaries we remove will have their 211 # source also removed (if possible). 212 213 # XXX: why doesn't this remove the files here as well? I don't think it 214 # buys anything keeping this separate 215 216 Logger.log(["Deleting from binaries table... "]) 217 q = session.execute( 218 """ 219 DELETE FROM binaries b 220 USING files f 221 WHERE f.id = b.file 222 AND NOT EXISTS (SELECT 1 FROM files_archive_map af 223 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 224 WHERE af.file_id = b.file 225 AND (af.last_used IS NULL OR af.last_used > ad.delete_date)) 226 RETURNING f.filename 227 """ 228 ) 229 for b in q: 230 Logger.log(["delete binary", b[0]])
231 232 233 ######################################## 234 235
236 -def clean(now_date, archives, max_delete, session):
237 cnf = Config() 238 239 count = 0 240 size = 0 241 242 Logger.log(["Cleaning out packages..."]) 243 244 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", "morgue")) 245 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", "pool") 246 247 # Build directory as morguedir/morguesubdir/year/month/day 248 dest = os.path.join( 249 morguedir, 250 morguesubdir, 251 str(now_date.year), 252 "%.2d" % now_date.month, 253 "%.2d" % now_date.day, 254 ) 255 256 if not Options["No-Action"] and not os.path.exists(dest): 257 os.makedirs(dest) 258 259 # Delete from source 260 Logger.log(["Deleting from source table..."]) 261 q = session.execute( 262 """ 263 WITH 264 deleted_sources AS ( 265 DELETE FROM source 266 USING files f 267 WHERE source.file = f.id 268 AND NOT EXISTS (SELECT 1 FROM files_archive_map af 269 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 270 WHERE af.file_id = source.file 271 AND (af.last_used IS NULL OR af.last_used > ad.delete_date)) 272 RETURNING source.id AS id, f.filename AS filename 273 ), 274 deleted_dsc_files AS ( 275 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources) 276 RETURNING df.file AS file_id 277 ), 278 now_unused_source_files AS ( 279 UPDATE files_archive_map af 280 SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc. 281 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files) 282 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) 283 ) 284 SELECT filename FROM deleted_sources""" 285 ) 286 for s in q: 287 Logger.log(["delete source", s[0]]) 288 289 if not Options["No-Action"]: 290 session.commit() 291 292 # Delete files from the pool 293 old_files = ( 294 session.query(ArchiveFile) 295 .filter( 296 sql.text( 297 "files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)" 298 ) 299 ) 300 .join(Archive) 301 ) 302 if max_delete is not None: 303 old_files = old_files.limit(max_delete) 304 Logger.log(["Limiting removals to %d" % max_delete]) 305 306 if archives is not None: 307 archive_ids = [a.archive_id for a in archives] 308 old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids)) 309 310 for af in old_files: 311 filename = af.path 312 try: 313 st = os.lstat(filename) 314 except FileNotFoundError: 315 Logger.log(["database referred to non-existing file", filename]) 316 session.delete(af) 317 continue 318 Logger.log(["delete archive file", filename]) 319 if stat.S_ISLNK(st.st_mode): 320 count += 1 321 Logger.log(["delete symlink", filename]) 322 if not Options["No-Action"]: 323 os.unlink(filename) 324 session.delete(af) 325 elif stat.S_ISREG(st.st_mode): 326 size += st.st_size 327 count += 1 328 329 dest_filename = dest + "/" + os.path.basename(filename) 330 # If the destination file exists; try to find another filename to use 331 if os.path.lexists(dest_filename): 332 dest_filename = utils.find_next_free(dest_filename) 333 334 if not Options["No-Action"]: 335 if af.archive.use_morgue: 336 Logger.log(["move to morgue", filename, dest_filename]) 337 utils.move(filename, dest_filename) 338 else: 339 Logger.log(["removed file", filename]) 340 os.unlink(filename) 341 session.delete(af) 342 343 else: 344 utils.fubar("%s is neither symlink nor file?!" % (filename)) 345 346 if count > 0: 347 Logger.log(["total", count, utils.size_type(size)]) 348 349 # Delete entries in files no longer referenced by any archive 350 query = """ 351 DELETE FROM files f 352 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id) 353 """ 354 session.execute(query) 355 356 if not Options["No-Action"]: 357 session.commit()
358 359 360 ################################################################################ 361 362
363 -def clean_maintainers(now_date, session):
364 Logger.log(["Cleaning out unused Maintainer entries..."]) 365 366 # TODO Replace this whole thing with one SQL statement 367 q = session.execute( 368 """ 369 SELECT m.id, m.name FROM maintainer m 370 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id) 371 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id) 372 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)""" 373 ) 374 375 count = 0 376 377 for i in q.fetchall(): 378 maintainer_id = i[0] 379 Logger.log(["delete maintainer", i[1]]) 380 if not Options["No-Action"]: 381 session.execute( 382 "DELETE FROM maintainer WHERE id = :maint", {"maint": maintainer_id} 383 ) 384 count += 1 385 386 if not Options["No-Action"]: 387 session.commit() 388 389 if count > 0: 390 Logger.log(["total", count])
391 392 393 ################################################################################ 394 395
396 -def clean_fingerprints(now_date, session):
397 Logger.log(["Cleaning out unused fingerprint entries..."]) 398 399 # TODO Replace this whole thing with one SQL statement 400 q = session.execute( 401 """ 402 SELECT f.id, f.fingerprint FROM fingerprint f 403 WHERE f.keyring IS NULL 404 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id) 405 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id) 406 AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)""" 407 ) 408 409 count = 0 410 411 for i in q.fetchall(): 412 fingerprint_id = i[0] 413 Logger.log(["delete fingerprint", i[1]]) 414 if not Options["No-Action"]: 415 session.execute( 416 "DELETE FROM fingerprint WHERE id = :fpr", {"fpr": fingerprint_id} 417 ) 418 count += 1 419 420 if not Options["No-Action"]: 421 session.commit() 422 423 if count > 0: 424 Logger.log(["total", count])
425 426 427 ################################################################################ 428 429
430 -def clean_byhash(now_date, session):
431 Logger.log(["Cleaning out unused by-hash files..."]) 432 433 q = session.execute( 434 """ 435 DELETE FROM hashfile h 436 USING suite s, archive a 437 WHERE s.id = h.suite_id 438 AND a.id = s.archive_id 439 AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP 440 RETURNING a.path, s.suite_name, h.path""" 441 ) 442 count = q.rowcount 443 444 if not Options["No-Action"]: 445 for base, suite, path in q: 446 suite_suffix = utils.suite_suffix(suite) 447 filename = os.path.join(base, "dists", suite, suite_suffix, path) 448 try: 449 os.unlink(filename) 450 except OSError as exc: 451 if exc.errno != errno.ENOENT: 452 raise 453 Logger.log(["database referred to non-existing file", filename]) 454 else: 455 Logger.log(["delete hashfile", suite, path]) 456 session.commit() 457 458 if count > 0: 459 Logger.log(["total", count])
460 461 462 ################################################################################ 463 464
465 -def clean_empty_directories(session):
466 """ 467 Removes empty directories from pool directories. 468 """ 469 470 Logger.log(["Cleaning out empty directories..."]) 471 472 count = 0 473 474 cursor = session.execute("""SELECT DISTINCT(path) FROM archive""") 475 bases = [x[0] for x in cursor.fetchall()] 476 477 for base in bases: 478 for dirpath, dirnames, filenames in os.walk(base, topdown=False): 479 if not filenames and not dirnames: 480 to_remove = os.path.join(base, dirpath) 481 if not Options["No-Action"]: 482 Logger.log(["removing directory", to_remove]) 483 os.removedirs(to_remove) 484 count += 1 485 486 if count: 487 Logger.log(["total removed directories", count])
488 489 490 ################################################################################ 491 492
493 -def set_archive_delete_dates(now_date, session):
494 session.execute( 495 """ 496 CREATE TEMPORARY TABLE archive_delete_date ( 497 archive_id INT NOT NULL, 498 delete_date TIMESTAMP NOT NULL 499 )""" 500 ) 501 502 session.execute( 503 """ 504 INSERT INTO archive_delete_date 505 (archive_id, delete_date) 506 SELECT 507 archive.id, :now_date - archive.stayofexecution 508 FROM archive""", 509 {"now_date": now_date}, 510 ) 511 512 session.flush()
513 514 515 ################################################################################ 516 517
518 -def main():
519 global Options, Logger 520 521 cnf = Config() 522 523 for i in ["Help", "No-Action", "Maximum"]: 524 key = "Clean-Suites::Options::%s" % i 525 if key not in cnf: 526 cnf[key] = "" 527 528 Arguments = [ 529 ("h", "help", "Clean-Suites::Options::Help"), 530 ("a", "archive", "Clean-Suites::Options::Archive", "HasArg"), 531 ("n", "no-action", "Clean-Suites::Options::No-Action"), 532 ("m", "maximum", "Clean-Suites::Options::Maximum", "HasArg"), 533 ] 534 535 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) 536 Options = cnf.subtree("Clean-Suites::Options") 537 538 if cnf["Clean-Suites::Options::Maximum"] != "": 539 try: 540 # Only use Maximum if it's an integer 541 max_delete = int(cnf["Clean-Suites::Options::Maximum"]) 542 if max_delete < 1: 543 utils.fubar("If given, Maximum must be at least 1") 544 except ValueError: 545 utils.fubar("If given, Maximum must be an integer") 546 else: 547 max_delete = None 548 549 if Options["Help"]: 550 usage() 551 552 program = "clean-suites" 553 if Options["No-Action"]: 554 program = "clean-suites (no action)" 555 Logger = daklog.Logger(program, debug=Options["No-Action"]) 556 557 session = DBConn().session() 558 559 archives = None 560 if "Archive" in Options: 561 archive_names = Options["Archive"].split(",") 562 archives = ( 563 session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all() 564 ) 565 if len(archives) == 0: 566 utils.fubar("Unknown archive.") 567 568 now_date = datetime.now() 569 570 set_archive_delete_dates(now_date, session) 571 572 check_binaries(now_date, session) 573 clean_binaries(now_date, session) 574 check_sources(now_date, session) 575 check_files(now_date, session) 576 clean(now_date, archives, max_delete, session) 577 clean_maintainers(now_date, session) 578 clean_fingerprints(now_date, session) 579 clean_byhash(now_date, session) 580 clean_empty_directories(session) 581 582 session.rollback() 583 584 Logger.close()
585 586 587 ################################################################################ 588 589 590 if __name__ == "__main__": 591 main() 592