Package dak :: Module clean_suites
[hide private]
[frames] | no frames]

Source Code for Module dak.clean_suites

  1  #! /usr/bin/env python3 
  2   
  3  """ Cleans up unassociated binary and source packages 
  4   
  5  @contact: Debian FTPMaster <ftpmaster@debian.org> 
  6  @copyright: 2000, 2001, 2002, 2003, 2006  James Troup <james@nocrew.org> 
  7  @copyright: 2009  Mark Hymers <mhy@debian.org> 
  8  @copyright: 2010  Joerg Jaspert <joerg@debian.org> 
  9  @license: GNU General Public License version 2 or later 
 10  """ 
 11   
 12  # This program is free software; you can redistribute it and/or modify 
 13  # it under the terms of the GNU General Public License as published by 
 14  # the Free Software Foundation; either version 2 of the License, or 
 15  # (at your option) any later version. 
 16   
 17  # This program is distributed in the hope that it will be useful, 
 18  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 19  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 20  # GNU General Public License for more details. 
 21   
 22  # You should have received a copy of the GNU General Public License 
 23  # along with this program; if not, write to the Free Software 
 24  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 25   
 26  ################################################################################ 
 27   
 28  # 07:05|<elmo> well.. *shrug*.. no, probably not.. but to fix it, 
 29  #      |       we're going to have to implement reference counting 
 30  #      |       through dependencies.. do we really want to go down 
 31  #      |       that road? 
 32  # 
 33  # 07:05|<Culus> elmo: Augh! <brain jumps out of skull> 
 34   
 35  ################################################################################ 
 36   
 37  import errno 
 38  import os 
 39  import sqlalchemy.sql as sql 
 40  import stat 
 41  import sys 
 42  import apt_pkg 
 43  from datetime import datetime 
 44   
 45  from daklib.config import Config 
 46  from daklib.dbconn import * 
 47  from daklib import utils 
 48  from daklib import daklog 
 49   
 50  ################################################################################ 
 51   
 52  Options = None 
 53  Logger = None 
 54   
 55  ################################################################################ 
 56   
 57   
58 -def usage(exit_code=0):
59 print("""Usage: dak clean-suites [OPTIONS] 60 Clean old packages from suites. 61 62 -n, --no-action don't do anything 63 -h, --help show this help and exit 64 -m, --maximum maximum number of files to remove""") 65 sys.exit(exit_code)
66 67 ################################################################################ 68 69
70 -def check_binaries(now_date, session):
71 Logger.log(["Checking for orphaned binary packages..."]) 72 73 # Get the list of binary packages not in a suite and mark them for 74 # deletion. 75 # Check for any binaries which are marked for eventual deletion 76 # but are now used again. 77 78 query = """ 79 WITH usage AS ( 80 SELECT 81 af.archive_id AS archive_id, 82 af.file_id AS file_id, 83 af.component_id AS component_id, 84 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba 85 JOIN suite s ON ba.suite = s.id 86 WHERE ba.bin = b.id 87 AND s.archive_id = af.archive_id)) 88 AS in_use 89 FROM files_archive_map af 90 JOIN binaries b ON af.file_id = b.file 91 GROUP BY af.archive_id, af.file_id, af.component_id 92 ) 93 94 UPDATE files_archive_map af 95 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END 96 FROM usage, files f, archive 97 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id 98 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) 99 AND af.file_id = f.id 100 AND af.archive_id = archive.id 101 RETURNING archive.name, f.filename, af.last_used IS NULL""" 102 103 res = session.execute(query, {'last_used': now_date}) 104 for i in res: 105 op = "set lastused" 106 if i[2]: 107 op = "unset lastused" 108 Logger.log([op, i[0], i[1]])
109 110 ######################################## 111 112
113 -def check_sources(now_date, session):
114 Logger.log(["Checking for orphaned source packages..."]) 115 116 # Get the list of source packages not in a suite and not used by 117 # any binaries. 118 119 # Check for any sources which are marked for deletion but which 120 # are now used again. 121 122 # TODO: the UPDATE part is the same as in check_binaries. Merge? 123 124 query = """ 125 WITH usage AS ( 126 SELECT 127 af.archive_id AS archive_id, 128 af.file_id AS file_id, 129 af.component_id AS component_id, 130 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa 131 JOIN suite s ON sa.suite = s.id 132 WHERE sa.source = df.source 133 AND s.archive_id = af.archive_id) 134 OR EXISTS (SELECT 1 FROM files_archive_map af_bin 135 JOIN binaries b ON af_bin.file_id = b.file 136 WHERE b.source = df.source 137 AND af_bin.archive_id = af.archive_id 138 AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date)) 139 OR EXISTS (SELECT 1 FROM extra_src_references esr 140 JOIN bin_associations ba ON esr.bin_id = ba.bin 141 JOIN binaries b ON ba.bin = b.id 142 JOIN suite s ON ba.suite = s.id 143 WHERE esr.src_id = df.source 144 AND s.archive_id = af.archive_id)) 145 AS in_use 146 FROM files_archive_map af 147 JOIN dsc_files df ON af.file_id = df.file 148 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 149 GROUP BY af.archive_id, af.file_id, af.component_id 150 ) 151 152 UPDATE files_archive_map af 153 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END 154 FROM usage, files f, archive 155 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id 156 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) 157 AND af.file_id = f.id 158 AND af.archive_id = archive.id 159 160 RETURNING archive.name, f.filename, af.last_used IS NULL 161 """ 162 163 res = session.execute(query, {'last_used': now_date}) 164 for i in res: 165 op = "set lastused" 166 if i[2]: 167 op = "unset lastused" 168 Logger.log([op, i[0], i[1]])
169 170 ######################################## 171 172
173 -def check_files(now_date, session):
174 # FIXME: this is evil; nothing should ever be in this state. if 175 # they are, it's a bug. 176 177 # However, we've discovered it happens sometimes so we print a huge warning 178 # and then mark the file for deletion. This probably masks a bug somwhere 179 # else but is better than collecting cruft forever 180 181 Logger.log(["Checking for unused files..."]) 182 q = session.execute(""" 183 UPDATE files_archive_map af 184 SET last_used = :last_used 185 FROM files f, archive 186 WHERE af.file_id = f.id 187 AND af.archive_id = archive.id 188 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id) 189 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) 190 AND af.last_used IS NULL 191 RETURNING archive.name, f.filename""", {'last_used': now_date}) 192 193 for x in q: 194 utils.warn("orphaned file: {0}".format(x)) 195 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"]) 196 197 if not Options["No-Action"]: 198 session.commit()
199 200
201 -def clean_binaries(now_date, session):
202 # We do this here so that the binaries we remove will have their 203 # source also removed (if possible). 204 205 # XXX: why doesn't this remove the files here as well? I don't think it 206 # buys anything keeping this separate 207 208 Logger.log(["Deleting from binaries table... "]) 209 q = session.execute(""" 210 DELETE FROM binaries b 211 USING files f 212 WHERE f.id = b.file 213 AND NOT EXISTS (SELECT 1 FROM files_archive_map af 214 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 215 WHERE af.file_id = b.file 216 AND (af.last_used IS NULL OR af.last_used > ad.delete_date)) 217 RETURNING f.filename 218 """) 219 for b in q: 220 Logger.log(["delete binary", b[0]])
221 222 ######################################## 223 224
225 -def clean(now_date, archives, max_delete, session):
226 cnf = Config() 227 228 count = 0 229 size = 0 230 231 Logger.log(["Cleaning out packages..."]) 232 233 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", 'morgue')) 234 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", 'pool') 235 236 # Build directory as morguedir/morguesubdir/year/month/day 237 dest = os.path.join(morguedir, 238 morguesubdir, 239 str(now_date.year), 240 '%.2d' % now_date.month, 241 '%.2d' % now_date.day) 242 243 if not Options["No-Action"] and not os.path.exists(dest): 244 os.makedirs(dest) 245 246 # Delete from source 247 Logger.log(["Deleting from source table..."]) 248 q = session.execute(""" 249 WITH 250 deleted_sources AS ( 251 DELETE FROM source 252 USING files f 253 WHERE source.file = f.id 254 AND NOT EXISTS (SELECT 1 FROM files_archive_map af 255 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 256 WHERE af.file_id = source.file 257 AND (af.last_used IS NULL OR af.last_used > ad.delete_date)) 258 RETURNING source.id AS id, f.filename AS filename 259 ), 260 deleted_dsc_files AS ( 261 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources) 262 RETURNING df.file AS file_id 263 ), 264 now_unused_source_files AS ( 265 UPDATE files_archive_map af 266 SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc. 267 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files) 268 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) 269 ) 270 SELECT filename FROM deleted_sources""") 271 for s in q: 272 Logger.log(["delete source", s[0]]) 273 274 if not Options["No-Action"]: 275 session.commit() 276 277 # Delete files from the pool 278 old_files = session.query(ArchiveFile).filter(sql.text('files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)')).join(Archive) 279 if max_delete is not None: 280 old_files = old_files.limit(max_delete) 281 Logger.log(["Limiting removals to %d" % max_delete]) 282 283 if archives is not None: 284 archive_ids = [a.archive_id for a in archives] 285 old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids)) 286 287 for af in old_files: 288 filename = af.path 289 try: 290 st = os.lstat(filename) 291 except FileNotFoundError: 292 Logger.log(["database referred to non-existing file", filename]) 293 session.delete(af) 294 continue 295 Logger.log(["delete archive file", filename]) 296 if stat.S_ISLNK(st.st_mode): 297 count += 1 298 Logger.log(["delete symlink", filename]) 299 if not Options["No-Action"]: 300 os.unlink(filename) 301 session.delete(af) 302 elif stat.S_ISREG(st.st_mode): 303 size += st.st_size 304 count += 1 305 306 dest_filename = dest + '/' + os.path.basename(filename) 307 # If the destination file exists; try to find another filename to use 308 if os.path.lexists(dest_filename): 309 dest_filename = utils.find_next_free(dest_filename) 310 311 if not Options["No-Action"]: 312 if af.archive.use_morgue: 313 Logger.log(["move to morgue", filename, dest_filename]) 314 utils.move(filename, dest_filename) 315 else: 316 Logger.log(["removed file", filename]) 317 os.unlink(filename) 318 session.delete(af) 319 320 else: 321 utils.fubar("%s is neither symlink nor file?!" % (filename)) 322 323 if count > 0: 324 Logger.log(["total", count, utils.size_type(size)]) 325 326 # Delete entries in files no longer referenced by any archive 327 query = """ 328 DELETE FROM files f 329 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id) 330 """ 331 session.execute(query) 332 333 if not Options["No-Action"]: 334 session.commit()
335 336 ################################################################################ 337 338
339 -def clean_maintainers(now_date, session):
340 Logger.log(["Cleaning out unused Maintainer entries..."]) 341 342 # TODO Replace this whole thing with one SQL statement 343 q = session.execute(""" 344 SELECT m.id, m.name FROM maintainer m 345 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id) 346 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id) 347 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)""") 348 349 count = 0 350 351 for i in q.fetchall(): 352 maintainer_id = i[0] 353 Logger.log(["delete maintainer", i[1]]) 354 if not Options["No-Action"]: 355 session.execute("DELETE FROM maintainer WHERE id = :maint", {'maint': maintainer_id}) 356 count += 1 357 358 if not Options["No-Action"]: 359 session.commit() 360 361 if count > 0: 362 Logger.log(["total", count])
363 364 ################################################################################ 365 366
367 -def clean_fingerprints(now_date, session):
368 Logger.log(["Cleaning out unused fingerprint entries..."]) 369 370 # TODO Replace this whole thing with one SQL statement 371 q = session.execute(""" 372 SELECT f.id, f.fingerprint FROM fingerprint f 373 WHERE f.keyring IS NULL 374 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id) 375 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id) 376 AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)""") 377 378 count = 0 379 380 for i in q.fetchall(): 381 fingerprint_id = i[0] 382 Logger.log(["delete fingerprint", i[1]]) 383 if not Options["No-Action"]: 384 session.execute("DELETE FROM fingerprint WHERE id = :fpr", {'fpr': fingerprint_id}) 385 count += 1 386 387 if not Options["No-Action"]: 388 session.commit() 389 390 if count > 0: 391 Logger.log(["total", count])
392 393 ################################################################################ 394 395
396 -def clean_byhash(now_date, session):
397 cnf = Config() 398 399 Logger.log(["Cleaning out unused by-hash files..."]) 400 401 q = session.execute(""" 402 DELETE FROM hashfile h 403 USING suite s, archive a 404 WHERE s.id = h.suite_id 405 AND a.id = s.archive_id 406 AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP 407 RETURNING a.path, s.suite_name, h.path""") 408 count = q.rowcount 409 410 if not Options["No-Action"]: 411 for base, suite, path in q: 412 suite_suffix = utils.suite_suffix(suite) 413 filename = os.path.join(base, 'dists', suite, suite_suffix, path) 414 try: 415 os.unlink(filename) 416 except OSError as exc: 417 if exc.errno != errno.ENOENT: 418 raise 419 Logger.log(['database referred to non-existing file', filename]) 420 else: 421 Logger.log(['delete hashfile', suite, path]) 422 session.commit() 423 424 if count > 0: 425 Logger.log(["total", count])
426 427 ################################################################################ 428 429
430 -def clean_empty_directories(session):
431 """ 432 Removes empty directories from pool directories. 433 """ 434 435 Logger.log(["Cleaning out empty directories..."]) 436 437 count = 0 438 439 cursor = session.execute( 440 """SELECT DISTINCT(path) FROM archive""" 441 ) 442 bases = [x[0] for x in cursor.fetchall()] 443 444 for base in bases: 445 for dirpath, dirnames, filenames in os.walk(base, topdown=False): 446 if not filenames and not dirnames: 447 to_remove = os.path.join(base, dirpath) 448 if not Options["No-Action"]: 449 Logger.log(["removing directory", to_remove]) 450 os.removedirs(to_remove) 451 count += 1 452 453 if count: 454 Logger.log(["total removed directories", count])
455 456 ################################################################################ 457 458
459 -def set_archive_delete_dates(now_date, session):
460 session.execute(""" 461 CREATE TEMPORARY TABLE archive_delete_date ( 462 archive_id INT NOT NULL, 463 delete_date TIMESTAMP NOT NULL 464 )""") 465 466 session.execute(""" 467 INSERT INTO archive_delete_date 468 (archive_id, delete_date) 469 SELECT 470 archive.id, :now_date - archive.stayofexecution 471 FROM archive""", {'now_date': now_date}) 472 473 session.flush()
474 475 ################################################################################ 476 477
478 -def main():
479 global Options, Logger 480 481 cnf = Config() 482 483 for i in ["Help", "No-Action", "Maximum"]: 484 key = "Clean-Suites::Options::%s" % i 485 if key not in cnf: 486 cnf[key] = "" 487 488 Arguments = [('h', "help", "Clean-Suites::Options::Help"), 489 ('a', 'archive', 'Clean-Suites::Options::Archive', 'HasArg'), 490 ('n', "no-action", "Clean-Suites::Options::No-Action"), 491 ('m', "maximum", "Clean-Suites::Options::Maximum", "HasArg")] 492 493 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) 494 Options = cnf.subtree("Clean-Suites::Options") 495 496 if cnf["Clean-Suites::Options::Maximum"] != "": 497 try: 498 # Only use Maximum if it's an integer 499 max_delete = int(cnf["Clean-Suites::Options::Maximum"]) 500 if max_delete < 1: 501 utils.fubar("If given, Maximum must be at least 1") 502 except ValueError as e: 503 utils.fubar("If given, Maximum must be an integer") 504 else: 505 max_delete = None 506 507 if Options["Help"]: 508 usage() 509 510 program = "clean-suites" 511 if Options['No-Action']: 512 program = "clean-suites (no action)" 513 Logger = daklog.Logger(program, debug=Options["No-Action"]) 514 515 session = DBConn().session() 516 517 archives = None 518 if 'Archive' in Options: 519 archive_names = Options['Archive'].split(',') 520 archives = session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all() 521 if len(archives) == 0: 522 utils.fubar('Unknown archive.') 523 524 now_date = datetime.now() 525 526 set_archive_delete_dates(now_date, session) 527 528 check_binaries(now_date, session) 529 clean_binaries(now_date, session) 530 check_sources(now_date, session) 531 check_files(now_date, session) 532 clean(now_date, archives, max_delete, session) 533 clean_maintainers(now_date, session) 534 clean_fingerprints(now_date, session) 535 clean_byhash(now_date, session) 536 clean_empty_directories(session) 537 538 session.rollback() 539 540 Logger.close()
541 542 ################################################################################ 543 544 545 if __name__ == '__main__': 546 main() 547