Package dak :: Module check_archive
[hide private]
[frames] | no frames]

Source Code for Module dak.check_archive

  1  #! /usr/bin/env python3 
  2   
  3  """ Various different sanity checks 
  4   
  5  @contact: Debian FTP Master <ftpmaster@debian.org> 
  6  @copyright: (C) 2000, 2001, 2002, 2003, 2004, 2006  James Troup <james@nocrew.org> 
  7  @license: GNU General Public License version 2 or later 
  8  """ 
  9   
 10  # This program is free software; you can redistribute it and/or modify 
 11  # it under the terms of the GNU General Public License as published by 
 12  # the Free Software Foundation; either version 2 of the License, or 
 13  # (at your option) any later version. 
 14   
 15  # This program is distributed in the hope that it will be useful, 
 16  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18  # GNU General Public License for more details. 
 19   
 20  # You should have received a copy of the GNU General Public License 
 21  # along with this program; if not, write to the Free Software 
 22  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 23   
 24  ################################################################################ 
 25   
 26  #   And, lo, a great and menacing voice rose from the depths, and with 
 27  #   great wrath and vehemence it's voice boomed across the 
 28  #   land... ``hehehehehehe... that *tickles*'' 
 29  #                                                       -- aj on IRC 
 30   
 31  ################################################################################ 
 32   
 33  import errno 
 34  import os 
 35  import stat 
 36  import sys 
 37  import time 
 38  import apt_pkg 
 39  import apt_inst 
 40   
 41  from daklib.dbconn import * 
 42  from daklib import utils 
 43  from daklib.config import Config 
 44  from daklib.dak_exceptions import InvalidDscError 
 45   
 46  ################################################################################ 
 47   
 48  db_files = {}                  #: Cache of filenames as known by the database 
 49  waste = 0.0                    #: How many bytes are "wasted" by files not referenced in database 
 50  excluded = {}                  #: List of files which are excluded from files check 
 51  current_file = None 
 52  future_files = {} 
 53  current_time = time.time()     #: now() 
 54   
 55  ################################################################################ 
 56   
 57   
58 -def usage(exit_code=0):
59 print("""Usage: dak check-archive MODE 60 Run various sanity checks of the archive and/or database. 61 62 -h, --help show this help and exit. 63 64 The following MODEs are available: 65 66 checksums - validate the checksums stored in the database 67 files - check files in the database against what's in the archive 68 dsc-syntax - validate the syntax of .dsc files in the archive 69 missing-overrides - check for missing overrides 70 source-in-one-dir - ensure the source for each package is in one directory 71 timestamps - check for future timestamps in .deb's 72 files-in-dsc - ensure each .dsc references appropriate Files 73 validate-indices - ensure files mentioned in Packages & Sources exist 74 files-not-symlinks - check files in the database aren't symlinks 75 validate-builddeps - validate build-dependencies of .dsc files in the archive 76 add-missing-source-checksums - add missing checksums for source packages 77 """) 78 sys.exit(exit_code)
79 80 ################################################################################ 81 82
83 -def process_dir(unused, dirname, filenames):
84 """ 85 Process a directory and output every files name which is not listed already 86 in the C{filenames} or global C{excluded} dictionaries. 87 88 @type dirname: string 89 @param dirname: the directory to look at 90 91 @type filenames: dict 92 @param filenames: Known filenames to ignore 93 """ 94 global waste, db_files, excluded 95 96 if dirname.find('/disks-') != -1 or dirname.find('upgrade-') != -1: 97 return 98 # hack; can't handle .changes files 99 if dirname.find('proposed-updates') != -1: 100 return 101 for name in filenames: 102 filename = os.path.abspath(os.path.join(dirname, name)) 103 if os.path.isfile(filename) and not os.path.islink(filename) and filename not in db_files and filename not in excluded: 104 waste += os.stat(filename)[stat.ST_SIZE] 105 print("%s" % (filename))
106 107 ################################################################################ 108 109
110 -def check_files():
111 """ 112 Prepare the dictionary of existing filenames, then walk through the archive 113 pool/ directory to compare it. 114 """ 115 cnf = Config() 116 session = DBConn().session() 117 118 query = """ 119 SELECT archive.name, suite.suite_name, f.filename 120 FROM binaries b 121 JOIN bin_associations ba ON b.id = ba.bin 122 JOIN suite ON ba.suite = suite.id 123 JOIN archive ON suite.archive_id = archive.id 124 JOIN files f ON b.file = f.id 125 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af 126 WHERE af.archive_id = suite.archive_id 127 AND af.file_id = b.file) 128 ORDER BY archive.name, suite.suite_name, f.filename 129 """ 130 for row in session.execute(query): 131 print("MISSING-ARCHIVE-FILE {0} {1} {2}".vformat(row)) 132 133 query = """ 134 SELECT archive.name, suite.suite_name, f.filename 135 FROM source s 136 JOIN src_associations sa ON s.id = sa.source 137 JOIN suite ON sa.suite = suite.id 138 JOIN archive ON suite.archive_id = archive.id 139 JOIN dsc_files df ON s.id = df.source 140 JOIN files f ON df.file = f.id 141 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af 142 WHERE af.archive_id = suite.archive_id 143 AND af.file_id = df.file) 144 ORDER BY archive.name, suite.suite_name, f.filename 145 """ 146 for row in session.execute(query): 147 print("MISSING-ARCHIVE-FILE {0} {1} {2}".vformat(row)) 148 149 archive_files = session.query(ArchiveFile) \ 150 .join(ArchiveFile.archive).join(ArchiveFile.file) \ 151 .order_by(Archive.archive_name, PoolFile.filename) 152 153 expected_files = set() 154 for af in archive_files: 155 path = af.path 156 expected_files.add(af.path) 157 if not os.path.exists(path): 158 print("MISSING-FILE {0} {1} {2}".format(af.archive.archive_name, af.file.filename, path)) 159 160 archives = session.query(Archive).order_by(Archive.archive_name) 161 162 for a in archives: 163 top = os.path.join(a.path, 'pool') 164 for dirpath, dirnames, filenames in os.walk(top): 165 for fn in filenames: 166 path = os.path.join(dirpath, fn) 167 if path in expected_files: 168 continue 169 print("UNEXPECTED-FILE {0} {1}".format(a.archive_name, path))
170 171 ################################################################################ 172 173
174 -def check_dscs():
175 """ 176 Parse every .dsc file in the archive and check for it's validity. 177 """ 178 179 count = 0 180 181 for src in DBConn().session().query(DBSource).order_by(DBSource.source, DBSource.version): 182 f = src.poolfile.fullpath 183 try: 184 utils.parse_changes(f, signing_rules=1, dsc_file=1) 185 except InvalidDscError: 186 utils.warn("syntax error in .dsc file %s" % f) 187 count += 1 188 except UnicodeDecodeError: 189 utils.warn("found invalid dsc file (%s), not properly utf-8 encoded" % f) 190 count += 1 191 except OSError as e: 192 if e.errno == errno.ENOENT: 193 utils.warn("missing dsc file (%s)" % f) 194 count += 1 195 else: 196 raise 197 except Exception as e: 198 utils.warn("miscellaneous error parsing dsc file (%s): %s" % (f, str(e))) 199 count += 1 200 201 if count: 202 utils.warn("Found %s invalid .dsc files." % (count))
203 204 ################################################################################ 205 206
207 -def check_override():
208 """ 209 Check for missing overrides in stable and unstable. 210 """ 211 session = DBConn().session() 212 213 for suite_name in ["stable", "unstable"]: 214 print(suite_name) 215 print("-" * len(suite_name)) 216 print() 217 suite = get_suite(suite_name) 218 q = session.execute(""" 219 SELECT DISTINCT b.package FROM binaries b, bin_associations ba 220 WHERE b.id = ba.bin AND ba.suite = :suiteid AND NOT EXISTS 221 (SELECT 1 FROM override o WHERE o.suite = :suiteid AND o.package = b.package)""", 222 {'suiteid': suite.suite_id}) 223 224 for j in q.fetchall(): 225 print(j[0]) 226 227 q = session.execute(""" 228 SELECT DISTINCT s.source FROM source s, src_associations sa 229 WHERE s.id = sa.source AND sa.suite = :suiteid AND NOT EXISTS 230 (SELECT 1 FROM override o WHERE o.suite = :suiteid and o.package = s.source)""", 231 {'suiteid': suite.suite_id}) 232 for j in q.fetchall(): 233 print(j[0])
234 235 ################################################################################ 236 237
238 -def check_source_in_one_dir():
239 """ 240 Ensure that the source files for any given package is all in one 241 directory so that 'apt-get source' works... 242 """ 243 244 # Not the most enterprising method, but hey... 245 broken_count = 0 246 247 session = DBConn().session() 248 249 q = session.query(DBSource) 250 for s in q.all(): 251 first_path = "" 252 first_filename = "" 253 broken = False 254 255 qf = session.query(PoolFile).join(Location).join(DSCFile).filter_by(source_id=s.source_id) 256 for f in qf.all(): 257 # 0: path 258 # 1: filename 259 filename = os.path.join(f.location.path, f.filename) 260 path = os.path.dirname(filename) 261 262 if first_path == "": 263 first_path = path 264 first_filename = filename 265 elif first_path != path: 266 symlink = path + '/' + os.path.basename(first_filename) 267 if not os.path.exists(symlink): 268 broken = True 269 print("WOAH, we got a live one here... %s [%s] {%s}" % (filename, s.source_id, symlink)) 270 if broken: 271 broken_count += 1 272 273 print("Found %d source packages where the source is not all in one directory." % (broken_count))
274 275 ################################################################################ 276 277
278 -def check_checksums():
279 """ 280 Validate all files 281 """ 282 print("Getting file information from database...") 283 q = DBConn().session().query(PoolFile) 284 285 print("Checking file checksums & sizes...") 286 for f in q: 287 filename = f.fullpath 288 289 try: 290 fi = open(filename) 291 except: 292 utils.warn("can't open '%s'." % (filename)) 293 continue 294 295 size = os.stat(filename)[stat.ST_SIZE] 296 if size != f.filesize: 297 utils.warn("**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, size, f.filesize)) 298 299 md5sum = apt_pkg.md5sum(fi) 300 if md5sum != f.md5sum: 301 utils.warn("**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, md5sum, f.md5sum)) 302 303 fi.seek(0) 304 sha1sum = apt_pkg.sha1sum(fi) 305 if sha1sum != f.sha1sum: 306 utils.warn("**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha1sum, f.sha1sum)) 307 308 fi.seek(0) 309 sha256sum = apt_pkg.sha256sum(fi) 310 if sha256sum != f.sha256sum: 311 utils.warn("**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha256sum, f.sha256sum)) 312 fi.close() 313 314 print("Done.")
315 316 ################################################################################ 317 # 318 319
320 -def Ent(Kind, Name, Link, Mode, UID, GID, Size, MTime, Major, Minor):
321 global future_files 322 323 if MTime > current_time: 324 future_files[current_file] = MTime 325 print("%s: %s '%s','%s',%u,%u,%u,%u,%u,%u,%u" % (current_file, Kind, Name, Link, Mode, UID, GID, Size, MTime, Major, Minor))
326 327
328 -def check_timestamps():
329 """ 330 Check all files for timestamps in the future; common from hardware 331 (e.g. alpha) which have far-future dates as their default dates. 332 """ 333 334 global current_file 335 336 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like('.deb$')) 337 338 db_files.clear() 339 count = 0 340 341 for pf in q.all(): 342 filename = os.path.abspath(os.path.join(pf.location.path, pf.filename)) 343 if os.access(filename, os.R_OK): 344 with open(filename) as f: 345 current_file = filename 346 print("Processing %s." % (filename), file=sys.stderr) 347 apt_inst.debExtract(f, Ent, "control.tar.gz") 348 f.seek(0) 349 apt_inst.debExtract(f, Ent, "data.tar.gz") 350 count += 1 351 352 print("Checked %d files (out of %d)." % (count, len(db_files)))
353 354 ################################################################################ 355 356
357 -def check_files_in_dsc():
358 """ 359 Ensure each .dsc lists appropriate files in its Files field (according 360 to the format announced in its Format field). 361 """ 362 count = 0 363 364 print("Building list of database files...") 365 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like('.dsc$')) 366 367 if q.count() > 0: 368 print("Checking %d files..." % len(ql)) 369 else: 370 print("No files to check.") 371 372 for pf in q.all(): 373 filename = os.path.abspath(os.path.join(pf.location.path + pf.filename)) 374 375 try: 376 # NB: don't enforce .dsc syntax 377 dsc = utils.parse_changes(filename, dsc_file=1) 378 except: 379 utils.fubar("error parsing .dsc file '%s'." % (filename)) 380 381 reasons = utils.check_dsc_files(filename, dsc) 382 for r in reasons: 383 utils.warn(r) 384 385 if len(reasons) > 0: 386 count += 1 387 388 if count: 389 utils.warn("Found %s invalid .dsc files." % (count))
390 391 392 ################################################################################ 393
394 -def validate_sources(suite, component):
395 """ 396 Ensure files mentioned in Sources exist 397 """ 398 filename = "%s/dists/%s/%s/source/Sources" % (Cnf["Dir::Root"], suite, component) 399 filename = utils.find_possibly_compressed_file(filename) 400 print("Processing %s..." % (filename)) 401 with apt_pkg.TagFile(filename) as Sources: 402 while Sources.step(): 403 source = Sources.section.find('Package') 404 directory = Sources.section.find('Directory') 405 files = Sources.section.find('Files') 406 for i in files.split('\n'): 407 (md5, size, name) = i.split() 408 filename = "%s/%s/%s" % (Cnf["Dir::Root"], directory, name) 409 if not os.path.exists(filename): 410 if directory.find("potato") == -1: 411 print("W: %s missing." % (filename)) 412 else: 413 pool_location = utils.poolify(source) 414 pool_filename = "%s/%s/%s" % (Cnf["Dir::Pool"], pool_location, name) 415 if not os.path.exists(pool_filename): 416 print("E: %s missing (%s)." % (filename, pool_filename)) 417 else: 418 # Create symlink 419 pool_filename = os.path.normpath(pool_filename) 420 filename = os.path.normpath(filename) 421 src = utils.clean_symlink(pool_filename, filename, Cnf["Dir::Root"]) 422 print("Symlinking: %s -> %s" % (filename, src))
423 424 ######################################## 425 426
427 -def validate_packages(suite, component, architecture):
428 """ 429 Ensure files mentioned in Packages exist 430 """ 431 filename = "%s/dists/%s/%s/binary-%s/Packages" \ 432 % (Cnf["Dir::Root"], suite, component, architecture) 433 filename = utils.find_possibly_compressed_file(filename) 434 print("Processing %s..." % (filename)) 435 with apt_pkg.TagFile(filename) as Packages: 436 while Packages.step(): 437 filename = "%s/%s" % (Cnf["Dir::Root"], Packages.section.find('Filename')) 438 if not os.path.exists(filename): 439 print("W: %s missing." % (filename))
440 441 ######################################## 442 443
444 -def check_indices_files_exist():
445 """ 446 Ensure files mentioned in Packages & Sources exist 447 """ 448 for suite in ["stable", "testing", "unstable"]: 449 for component in get_component_names(): 450 architectures = get_suite_architectures(suite) 451 for arch in [i.arch_string.lower() for i in architectures]: 452 if arch == "source": 453 validate_sources(suite, component) 454 elif arch == "all": 455 continue 456 else: 457 validate_packages(suite, component, arch)
458 459 ################################################################################ 460 461 477 478 ################################################################################ 479 480
481 -def chk_bd_process_dir(dirname, filenames):
482 for name in filenames: 483 if not name.endswith(".dsc"): 484 continue 485 filename = os.path.abspath(dirname + '/' + name) 486 dsc = utils.parse_changes(filename, dsc_file=1) 487 for field_name in ["build-depends", "build-depends-indep"]: 488 field = dsc.get(field_name) 489 if field: 490 try: 491 apt_pkg.parse_src_depends(field) 492 except: 493 print("E: [%s] %s: %s" % (filename, field_name, field)) 494 pass
495 496 ################################################################################ 497 498
499 -def check_build_depends():
500 """ Validate build-dependencies of .dsc files in the archive """ 501 cnf = Config() 502 for dirpath, dirnames, filenames in os.walk(cnf["Dir::Root"]): 503 chk_bd_process_dir(dirpath, filenames)
504 505 ################################################################################ 506 507 508 _add_missing_source_checksums_query = R""" 509 INSERT INTO source_metadata 510 (src_id, key_id, value) 511 SELECT 512 s.id, 513 :checksum_key, 514 E'\n' || 515 (SELECT STRING_AGG(' ' || tmp.checksum || ' ' || tmp.size || ' ' || tmp.basename, E'\n' ORDER BY tmp.basename) 516 FROM 517 (SELECT 518 CASE :checksum_type 519 WHEN 'Files' THEN f.md5sum 520 WHEN 'Checksums-Sha1' THEN f.sha1sum 521 WHEN 'Checksums-Sha256' THEN f.sha256sum 522 END AS checksum, 523 f.size, 524 SUBSTRING(f.filename FROM E'/([^/]*)\\Z') AS basename 525 FROM files f JOIN dsc_files ON f.id = dsc_files.file 526 WHERE dsc_files.source = s.id AND f.id != s.file 527 ) AS tmp 528 ) 529 530 FROM 531 source s 532 WHERE NOT EXISTS (SELECT 1 FROM source_metadata md WHERE md.src_id=s.id AND md.key_id = :checksum_key); 533 """ 534 535
536 -def add_missing_source_checksums():
537 """ Add missing source checksums to source_metadata """ 538 session = DBConn().session() 539 for checksum in ['Files', 'Checksums-Sha1', 'Checksums-Sha256']: 540 checksum_key = get_or_set_metadatakey(checksum, session).key_id 541 rows = session.execute(_add_missing_source_checksums_query, 542 {'checksum_key': checksum_key, 'checksum_type': checksum}).rowcount 543 if rows > 0: 544 print("Added {0} missing entries for {1}".format(rows, checksum)) 545 session.commit()
546 547 ################################################################################ 548 549
550 -def main():
551 global db_files, waste, excluded 552 553 cnf = Config() 554 555 Arguments = [('h', "help", "Check-Archive::Options::Help")] 556 for i in ["help"]: 557 key = "Check-Archive::Options::%s" % i 558 if key not in cnf: 559 cnf[key] = "" 560 561 args = apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) 562 563 Options = cnf.subtree("Check-Archive::Options") 564 if Options["Help"]: 565 usage() 566 567 if len(args) < 1: 568 utils.warn("dak check-archive requires at least one argument") 569 usage(1) 570 elif len(args) > 1: 571 utils.warn("dak check-archive accepts only one argument") 572 usage(1) 573 mode = args[0].lower() 574 575 # Initialize DB 576 DBConn() 577 578 if mode == "checksums": 579 check_checksums() 580 elif mode == "files": 581 check_files() 582 elif mode == "dsc-syntax": 583 check_dscs() 584 elif mode == "missing-overrides": 585 check_override() 586 elif mode == "source-in-one-dir": 587 check_source_in_one_dir() 588 elif mode == "timestamps": 589 check_timestamps() 590 elif mode == "files-in-dsc": 591 check_files_in_dsc() 592 elif mode == "validate-indices": 593 check_indices_files_exist() 594 elif mode == "files-not-symlinks": 595 check_files_not_symlinks() 596 elif mode == "validate-builddeps": 597 check_build_depends() 598 elif mode == "add-missing-source-checksums": 599 add_missing_source_checksums() 600 else: 601 utils.warn("unknown mode '%s'" % (mode)) 602 usage(1)
603 604 ################################################################################ 605 606 607 if __name__ == '__main__': 608 main() 609