1#! /usr/bin/env python3
3""" Various different sanity checks
5@contact: Debian FTP Master <ftpmaster@debian.org>
6@copyright: (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup <james@nocrew.org>
7@license: GNU General Public License version 2 or later
8"""
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24################################################################################
26# And, lo, a great and menacing voice rose from the depths, and with
27# great wrath and vehemence it's voice boomed across the
28# land... ``hehehehehehe... that *tickles*''
29# -- aj on IRC
31################################################################################
33import errno
34import os
35import stat
36import sys
37import time
38import apt_pkg
39import apt_inst
41from daklib.dbconn import *
42from daklib import utils
43from daklib.config import Config
44from daklib.dak_exceptions import InvalidDscError
46################################################################################
48db_files = {} #: Cache of filenames as known by the database
49waste = 0.0 #: How many bytes are "wasted" by files not referenced in database
50excluded = {} #: List of files which are excluded from files check
51current_file = None
52future_files = {}
53current_time = time.time() #: now()
55################################################################################
58def usage(exit_code=0):
59 print("""Usage: dak check-archive MODE
60Run various sanity checks of the archive and/or database.
62 -h, --help show this help and exit.
64The following MODEs are available:
66 checksums - validate the checksums stored in the database
67 files - check files in the database against what's in the archive
68 dsc-syntax - validate the syntax of .dsc files in the archive
69 missing-overrides - check for missing overrides
70 source-in-one-dir - ensure the source for each package is in one directory
71 timestamps - check for future timestamps in .deb's
72 files-in-dsc - ensure each .dsc references appropriate Files
73 validate-indices - ensure files mentioned in Packages & Sources exist
74 files-not-symlinks - check files in the database aren't symlinks
75 validate-builddeps - validate build-dependencies of .dsc files in the archive
76 add-missing-source-checksums - add missing checksums for source packages
77""")
78 sys.exit(exit_code)
80################################################################################
83def process_dir(unused, dirname: str, filenames: dict) -> None:
84 """
85 Process a directory and output every files name which is not listed already
86 in the `filenames` or global :data:`excluded` dictionaries.
88 :param dirname: the directory to look at
89 :param filenames: Known filenames to ignore
90 """
91 global waste, db_files, excluded
93 if dirname.find('/disks-') != -1 or dirname.find('upgrade-') != -1:
94 return
95 # hack; can't handle .changes files
96 if dirname.find('proposed-updates') != -1:
97 return
98 for name in filenames:
99 filename = os.path.abspath(os.path.join(dirname, name))
100 if os.path.isfile(filename) and not os.path.islink(filename) and filename not in db_files and filename not in excluded:
101 waste += os.stat(filename)[stat.ST_SIZE]
102 print("%s" % (filename))
104################################################################################
107def check_files():
108 """
109 Prepare the dictionary of existing filenames, then walk through the archive
110 pool/ directory to compare it.
111 """
112 cnf = Config()
113 session = DBConn().session()
115 query = """
116 SELECT archive.name, suite.suite_name, f.filename
117 FROM binaries b
118 JOIN bin_associations ba ON b.id = ba.bin
119 JOIN suite ON ba.suite = suite.id
120 JOIN archive ON suite.archive_id = archive.id
121 JOIN files f ON b.file = f.id
122 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af
123 WHERE af.archive_id = suite.archive_id
124 AND af.file_id = b.file)
125 ORDER BY archive.name, suite.suite_name, f.filename
126 """
127 for row in session.execute(query):
128 print("MISSING-ARCHIVE-FILE {0} {1} {2}".vformat(row))
130 query = """
131 SELECT archive.name, suite.suite_name, f.filename
132 FROM source s
133 JOIN src_associations sa ON s.id = sa.source
134 JOIN suite ON sa.suite = suite.id
135 JOIN archive ON suite.archive_id = archive.id
136 JOIN dsc_files df ON s.id = df.source
137 JOIN files f ON df.file = f.id
138 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af
139 WHERE af.archive_id = suite.archive_id
140 AND af.file_id = df.file)
141 ORDER BY archive.name, suite.suite_name, f.filename
142 """
143 for row in session.execute(query):
144 print("MISSING-ARCHIVE-FILE {0} {1} {2}".vformat(row))
146 archive_files = session.query(ArchiveFile) \
147 .join(ArchiveFile.archive).join(ArchiveFile.file) \
148 .order_by(Archive.archive_name, PoolFile.filename)
150 expected_files = set()
151 for af in archive_files:
152 path = af.path
153 expected_files.add(af.path)
154 if not os.path.exists(path):
155 print("MISSING-FILE {0} {1} {2}".format(af.archive.archive_name, af.file.filename, path))
157 archives = session.query(Archive).order_by(Archive.archive_name)
159 for a in archives:
160 top = os.path.join(a.path, 'pool')
161 for dirpath, dirnames, filenames in os.walk(top):
162 for fn in filenames:
163 path = os.path.join(dirpath, fn)
164 if path in expected_files:
165 continue
166 print("UNEXPECTED-FILE {0} {1}".format(a.archive_name, path))
168################################################################################
171def check_dscs():
172 """
173 Parse every .dsc file in the archive and check for it's validity.
174 """
176 count = 0
178 for src in DBConn().session().query(DBSource).order_by(DBSource.source, DBSource.version):
179 f = src.poolfile.fullpath
180 try:
181 utils.parse_changes(f, signing_rules=1, dsc_file=True)
182 except InvalidDscError:
183 utils.warn("syntax error in .dsc file %s" % f)
184 count += 1
185 except UnicodeDecodeError:
186 utils.warn("found invalid dsc file (%s), not properly utf-8 encoded" % f)
187 count += 1
188 except OSError as e:
189 if e.errno == errno.ENOENT:
190 utils.warn("missing dsc file (%s)" % f)
191 count += 1
192 else:
193 raise
194 except Exception as e:
195 utils.warn("miscellaneous error parsing dsc file (%s): %s" % (f, str(e)))
196 count += 1
198 if count:
199 utils.warn("Found %s invalid .dsc files." % (count))
201################################################################################
204def check_override():
205 """
206 Check for missing overrides in stable and unstable.
207 """
208 session = DBConn().session()
210 for suite_name in ["stable", "unstable"]:
211 print(suite_name)
212 print("-" * len(suite_name))
213 print()
214 suite = get_suite(suite_name)
215 q = session.execute("""
216SELECT DISTINCT b.package FROM binaries b, bin_associations ba
217 WHERE b.id = ba.bin AND ba.suite = :suiteid AND NOT EXISTS
218 (SELECT 1 FROM override o WHERE o.suite = :suiteid AND o.package = b.package)""",
219 {'suiteid': suite.suite_id})
221 for j in q.fetchall():
222 print(j[0])
224 q = session.execute("""
225SELECT DISTINCT s.source FROM source s, src_associations sa
226 WHERE s.id = sa.source AND sa.suite = :suiteid AND NOT EXISTS
227 (SELECT 1 FROM override o WHERE o.suite = :suiteid and o.package = s.source)""",
228 {'suiteid': suite.suite_id})
229 for j in q.fetchall():
230 print(j[0])
232################################################################################
235def check_source_in_one_dir():
236 """
237 Ensure that the source files for any given package is all in one
238 directory so that 'apt-get source' works...
239 """
241 # Not the most enterprising method, but hey...
242 broken_count = 0
244 session = DBConn().session()
246 q = session.query(DBSource)
247 for s in q.all():
248 first_path = ""
249 first_filename = ""
250 broken = False
252 qf = session.query(PoolFile).join(Location).join(DSCFile).filter_by(source_id=s.source_id)
253 for f in qf.all():
254 # 0: path
255 # 1: filename
256 filename = os.path.join(f.location.path, f.filename)
257 path = os.path.dirname(filename)
259 if first_path == "":
260 first_path = path
261 first_filename = filename
262 elif first_path != path:
263 symlink = path + '/' + os.path.basename(first_filename)
264 if not os.path.exists(symlink):
265 broken = True
266 print("WOAH, we got a live one here... %s [%s] {%s}" % (filename, s.source_id, symlink))
267 if broken:
268 broken_count += 1
270 print("Found %d source packages where the source is not all in one directory." % (broken_count))
272################################################################################
275def check_checksums():
276 """
277 Validate all files
278 """
279 print("Getting file information from database...")
280 q = DBConn().session().query(PoolFile)
282 print("Checking file checksums & sizes...")
283 for f in q:
284 filename = f.fullpath
286 try:
287 fi = open(filename)
288 except:
289 utils.warn("can't open '%s'." % (filename))
290 continue
292 size = os.stat(filename)[stat.ST_SIZE]
293 if size != f.filesize:
294 utils.warn("**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, size, f.filesize))
296 md5sum = apt_pkg.md5sum(fi)
297 if md5sum != f.md5sum:
298 utils.warn("**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, md5sum, f.md5sum))
300 fi.seek(0)
301 sha1sum = apt_pkg.sha1sum(fi)
302 if sha1sum != f.sha1sum:
303 utils.warn("**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha1sum, f.sha1sum))
305 fi.seek(0)
306 sha256sum = apt_pkg.sha256sum(fi)
307 if sha256sum != f.sha256sum:
308 utils.warn("**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha256sum, f.sha256sum))
309 fi.close()
311 print("Done.")
313################################################################################
314#
317def Ent(Kind, Name, Link, Mode, UID, GID, Size, MTime, Major, Minor):
318 global future_files
320 if MTime > current_time:
321 future_files[current_file] = MTime
322 print("%s: %s '%s','%s',%u,%u,%u,%u,%u,%u,%u" % (current_file, Kind, Name, Link, Mode, UID, GID, Size, MTime, Major, Minor))
325def check_timestamps():
326 """
327 Check all files for timestamps in the future; common from hardware
328 (e.g. alpha) which have far-future dates as their default dates.
329 """
331 global current_file
333 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like('.deb$'))
335 db_files.clear()
336 count = 0
338 for pf in q.all():
339 filename = os.path.abspath(os.path.join(pf.location.path, pf.filename))
340 if os.access(filename, os.R_OK):
341 with open(filename) as f:
342 current_file = filename
343 print("Processing %s." % (filename), file=sys.stderr)
344 apt_inst.debExtract(f, Ent, "control.tar.gz")
345 f.seek(0)
346 apt_inst.debExtract(f, Ent, "data.tar.gz")
347 count += 1
349 print("Checked %d files (out of %d)." % (count, len(db_files)))
351################################################################################
354def check_files_in_dsc():
355 """
356 Ensure each .dsc lists appropriate files in its Files field (according
357 to the format announced in its Format field).
358 """
359 count = 0
361 print("Building list of database files...")
362 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like('.dsc$'))
364 if q.count() > 0:
365 print("Checking %d files..." % len(ql))
366 else:
367 print("No files to check.")
369 for pf in q.all():
370 filename = os.path.abspath(os.path.join(pf.location.path + pf.filename))
372 try:
373 # NB: don't enforce .dsc syntax
374 dsc = utils.parse_changes(filename, dsc_file=True)
375 except:
376 utils.fubar("error parsing .dsc file '%s'." % (filename))
378 reasons = utils.check_dsc_files(filename, dsc)
379 for r in reasons:
380 utils.warn(r)
382 if len(reasons) > 0:
383 count += 1
385 if count:
386 utils.warn("Found %s invalid .dsc files." % (count))
389################################################################################
391def validate_sources(suite, component):
392 """
393 Ensure files mentioned in Sources exist
394 """
395 filename = "%s/dists/%s/%s/source/Sources" % (Cnf["Dir::Root"], suite, component)
396 filename = utils.find_possibly_compressed_file(filename)
397 print("Processing %s..." % (filename))
398 with apt_pkg.TagFile(filename) as Sources:
399 while Sources.step():
400 source = Sources.section.find('Package')
401 directory = Sources.section.find('Directory')
402 files = Sources.section.find('Files')
403 for i in files.split('\n'):
404 (md5, size, name) = i.split()
405 filename = "%s/%s/%s" % (Cnf["Dir::Root"], directory, name)
406 if not os.path.exists(filename):
407 if directory.find("potato") == -1:
408 print("W: %s missing." % (filename))
409 else:
410 pool_location = utils.poolify(source)
411 pool_filename = "%s/%s/%s" % (Cnf["Dir::Pool"], pool_location, name)
412 if not os.path.exists(pool_filename):
413 print("E: %s missing (%s)." % (filename, pool_filename))
414 else:
415 # Create symlink
416 pool_filename = os.path.normpath(pool_filename)
417 filename = os.path.normpath(filename)
418 src = utils.clean_symlink(pool_filename, filename, Cnf["Dir::Root"])
419 print("Symlinking: %s -> %s" % (filename, src))
421########################################
424def validate_packages(suite, component, architecture):
425 """
426 Ensure files mentioned in Packages exist
427 """
428 filename = "%s/dists/%s/%s/binary-%s/Packages" \
429 % (Cnf["Dir::Root"], suite, component, architecture)
430 filename = utils.find_possibly_compressed_file(filename)
431 print("Processing %s..." % (filename))
432 with apt_pkg.TagFile(filename) as Packages:
433 while Packages.step():
434 filename = "%s/%s" % (Cnf["Dir::Root"], Packages.section.find('Filename'))
435 if not os.path.exists(filename):
436 print("W: %s missing." % (filename))
438########################################
441def check_indices_files_exist():
442 """
443 Ensure files mentioned in Packages & Sources exist
444 """
445 for suite in ["stable", "testing", "unstable"]:
446 for component in get_component_names():
447 architectures = get_suite_architectures(suite)
448 for arch in [i.arch_string.lower() for i in architectures]:
449 if arch == "source":
450 validate_sources(suite, component)
451 elif arch == "all":
452 continue
453 else:
454 validate_packages(suite, component, arch)
456################################################################################
459def check_files_not_symlinks():
460 """
461 Check files in the database aren't symlinks
462 """
463 print("Building list of database files... ", end=' ')
464 before = time.time()
465 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like('.dsc$'))
467 for pf in q.all():
468 filename = os.path.abspath(os.path.join(pf.location.path, pf.filename))
469 if os.access(filename, os.R_OK) == 0:
470 utils.warn("%s: doesn't exist." % (filename))
471 else:
472 if os.path.islink(filename):
473 utils.warn("%s: is a symlink." % (filename))
475################################################################################
478def chk_bd_process_dir(dirname, filenames):
479 for name in filenames:
480 if not name.endswith(".dsc"):
481 continue
482 filename = os.path.abspath(dirname + '/' + name)
483 dsc = utils.parse_changes(filename, dsc_file=True)
484 for field_name in ["build-depends", "build-depends-indep"]:
485 field = dsc.get(field_name)
486 if field:
487 try:
488 apt_pkg.parse_src_depends(field)
489 except:
490 print("E: [%s] %s: %s" % (filename, field_name, field))
491 pass
493################################################################################
496def check_build_depends():
497 """ Validate build-dependencies of .dsc files in the archive """
498 cnf = Config()
499 for dirpath, dirnames, filenames in os.walk(cnf["Dir::Root"]):
500 chk_bd_process_dir(dirpath, filenames)
502################################################################################
505_add_missing_source_checksums_query = R"""
506INSERT INTO source_metadata
507 (src_id, key_id, value)
508SELECT
509 s.id,
510 :checksum_key,
511 E'\n' ||
512 (SELECT STRING_AGG(' ' || tmp.checksum || ' ' || tmp.size || ' ' || tmp.basename, E'\n' ORDER BY tmp.basename)
513 FROM
514 (SELECT
515 CASE :checksum_type
516 WHEN 'Files' THEN f.md5sum
517 WHEN 'Checksums-Sha1' THEN f.sha1sum
518 WHEN 'Checksums-Sha256' THEN f.sha256sum
519 END AS checksum,
520 f.size,
521 SUBSTRING(f.filename FROM E'/([^/]*)\\Z') AS basename
522 FROM files f JOIN dsc_files ON f.id = dsc_files.file
523 WHERE dsc_files.source = s.id AND f.id != s.file
524 ) AS tmp
525 )
527 FROM
528 source s
529 WHERE NOT EXISTS (SELECT 1 FROM source_metadata md WHERE md.src_id=s.id AND md.key_id = :checksum_key);
530"""
533def add_missing_source_checksums():
534 """ Add missing source checksums to source_metadata """
535 session = DBConn().session()
536 for checksum in ['Files', 'Checksums-Sha1', 'Checksums-Sha256']:
537 checksum_key = get_or_set_metadatakey(checksum, session).key_id
538 rows = session.execute(_add_missing_source_checksums_query,
539 {'checksum_key': checksum_key, 'checksum_type': checksum}).rowcount
540 if rows > 0:
541 print("Added {0} missing entries for {1}".format(rows, checksum))
542 session.commit()
544################################################################################
547def main():
548 global db_files, waste, excluded
550 cnf = Config()
552 Arguments = [('h', "help", "Check-Archive::Options::Help")]
553 for i in ["help"]:
554 key = "Check-Archive::Options::%s" % i
555 if key not in cnf: 555 ↛ 553line 555 didn't jump to line 553, because the condition on line 555 was never false
556 cnf[key] = ""
558 args = apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
560 Options = cnf.subtree("Check-Archive::Options")
561 if Options["Help"]: 561 ↛ 564line 561 didn't jump to line 564, because the condition on line 561 was never false
562 usage()
564 if len(args) < 1:
565 utils.warn("dak check-archive requires at least one argument")
566 usage(1)
567 elif len(args) > 1:
568 utils.warn("dak check-archive accepts only one argument")
569 usage(1)
570 mode = args[0].lower()
572 # Initialize DB
573 DBConn()
575 if mode == "checksums":
576 check_checksums()
577 elif mode == "files":
578 check_files()
579 elif mode == "dsc-syntax":
580 check_dscs()
581 elif mode == "missing-overrides":
582 check_override()
583 elif mode == "source-in-one-dir":
584 check_source_in_one_dir()
585 elif mode == "timestamps":
586 check_timestamps()
587 elif mode == "files-in-dsc":
588 check_files_in_dsc()
589 elif mode == "validate-indices":
590 check_indices_files_exist()
591 elif mode == "files-not-symlinks":
592 check_files_not_symlinks()
593 elif mode == "validate-builddeps":
594 check_build_depends()
595 elif mode == "add-missing-source-checksums":
596 add_missing_source_checksums()
597 else:
598 utils.warn("unknown mode '%s'" % (mode))
599 usage(1)
601################################################################################
604if __name__ == '__main__':
605 main()