1#! /usr/bin/env python3
3"""Various different sanity checks
5@contact: Debian FTP Master <ftpmaster@debian.org>
6@copyright: (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup <james@nocrew.org>
7@license: GNU General Public License version 2 or later
8"""
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24################################################################################
26# And, lo, a great and menacing voice rose from the depths, and with
27# great wrath and vehemence it's voice boomed across the
28# land... ``hehehehehehe... that *tickles*''
29# -- aj on IRC
31################################################################################
33import errno
34import os
35import stat
36import sys
37import time
39import apt_inst
40import apt_pkg
42from daklib import utils
43from daklib.config import Config
44from daklib.dak_exceptions import InvalidDscError
45from daklib.dbconn import (
46 Archive,
47 ArchiveFile,
48 DBConn,
49 DBSource,
50 DSCFile,
51 PoolFile,
52 get_component_names,
53 get_or_set_metadatakey,
54 get_suite,
55 get_suite_architectures,
56)
58################################################################################
60db_files = {} #: Cache of filenames as known by the database
61waste = 0.0 #: How many bytes are "wasted" by files not referenced in database
62excluded = {} #: List of files which are excluded from files check
63current_file = None
64future_files = {}
65current_time = time.time() #: now()
67################################################################################
70def usage(exit_code=0):
71 print(
72 """Usage: dak check-archive MODE
73Run various sanity checks of the archive and/or database.
75 -h, --help show this help and exit.
77The following MODEs are available:
79 checksums - validate the checksums stored in the database
80 files - check files in the database against what's in the archive
81 dsc-syntax - validate the syntax of .dsc files in the archive
82 missing-overrides - check for missing overrides
83 source-in-one-dir - ensure the source for each package is in one directory
84 timestamps - check for future timestamps in .deb's
85 files-in-dsc - ensure each .dsc references appropriate Files
86 validate-indices - ensure files mentioned in Packages & Sources exist
87 files-not-symlinks - check files in the database aren't symlinks
88 validate-builddeps - validate build-dependencies of .dsc files in the archive
89 add-missing-source-checksums - add missing checksums for source packages
90"""
91 )
92 sys.exit(exit_code)
95################################################################################
98def process_dir(unused, dirname: str, filenames: dict) -> None:
99 """
100 Process a directory and output every files name which is not listed already
101 in the `filenames` or global :data:`excluded` dictionaries.
103 :param dirname: the directory to look at
104 :param filenames: Known filenames to ignore
105 """
106 global waste, db_files, excluded
108 if dirname.find("/disks-") != -1 or dirname.find("upgrade-") != -1:
109 return
110 # hack; can't handle .changes files
111 if dirname.find("proposed-updates") != -1:
112 return
113 for name in filenames:
114 filename = os.path.abspath(os.path.join(dirname, name))
115 if (
116 os.path.isfile(filename)
117 and not os.path.islink(filename)
118 and filename not in db_files
119 and filename not in excluded
120 ):
121 waste += os.stat(filename)[stat.ST_SIZE]
122 print("%s" % (filename))
125################################################################################
128def check_files():
129 """
130 Prepare the dictionary of existing filenames, then walk through the archive
131 pool/ directory to compare it.
132 """
133 session = DBConn().session()
135 query = """
136 SELECT archive.name, suite.suite_name, f.filename
137 FROM binaries b
138 JOIN bin_associations ba ON b.id = ba.bin
139 JOIN suite ON ba.suite = suite.id
140 JOIN archive ON suite.archive_id = archive.id
141 JOIN files f ON b.file = f.id
142 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af
143 WHERE af.archive_id = suite.archive_id
144 AND af.file_id = b.file)
145 ORDER BY archive.name, suite.suite_name, f.filename
146 """
147 for row in session.execute(query):
148 print("MISSING-ARCHIVE-FILE {0} {1} {2}".vformat(row))
150 query = """
151 SELECT archive.name, suite.suite_name, f.filename
152 FROM source s
153 JOIN src_associations sa ON s.id = sa.source
154 JOIN suite ON sa.suite = suite.id
155 JOIN archive ON suite.archive_id = archive.id
156 JOIN dsc_files df ON s.id = df.source
157 JOIN files f ON df.file = f.id
158 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af
159 WHERE af.archive_id = suite.archive_id
160 AND af.file_id = df.file)
161 ORDER BY archive.name, suite.suite_name, f.filename
162 """
163 for row in session.execute(query):
164 print("MISSING-ARCHIVE-FILE {0} {1} {2}".vformat(row))
166 archive_files = (
167 session.query(ArchiveFile)
168 .join(ArchiveFile.archive)
169 .join(ArchiveFile.file)
170 .order_by(Archive.archive_name, PoolFile.filename)
171 )
173 expected_files = set()
174 for af in archive_files:
175 path = af.path
176 expected_files.add(af.path)
177 if not os.path.exists(path):
178 print(
179 "MISSING-FILE {0} {1} {2}".format(
180 af.archive.archive_name, af.file.filename, path
181 )
182 )
184 archives = session.query(Archive).order_by(Archive.archive_name)
186 for a in archives:
187 top = os.path.join(a.path, "pool")
188 for dirpath, dirnames, filenames in os.walk(top):
189 for fn in filenames:
190 path = os.path.join(dirpath, fn)
191 if path in expected_files:
192 continue
193 print("UNEXPECTED-FILE {0} {1}".format(a.archive_name, path))
196################################################################################
199def check_dscs():
200 """
201 Parse every .dsc file in the archive and check for it's validity.
202 """
204 count = 0
206 for src in (
207 DBConn().session().query(DBSource).order_by(DBSource.source, DBSource.version)
208 ):
209 f = src.poolfile.fullpath
210 try:
211 utils.parse_changes(f, signing_rules=1, dsc_file=True)
212 except InvalidDscError:
213 utils.warn("syntax error in .dsc file %s" % f)
214 count += 1
215 except UnicodeDecodeError:
216 utils.warn("found invalid dsc file (%s), not properly utf-8 encoded" % f)
217 count += 1
218 except OSError as e:
219 if e.errno == errno.ENOENT:
220 utils.warn("missing dsc file (%s)" % f)
221 count += 1
222 else:
223 raise
224 except Exception as e:
225 utils.warn("miscellaneous error parsing dsc file (%s): %s" % (f, str(e)))
226 count += 1
228 if count:
229 utils.warn("Found %s invalid .dsc files." % (count))
232################################################################################
235def check_override():
236 """
237 Check for missing overrides in stable and unstable.
238 """
239 session = DBConn().session()
241 for suite_name in ["stable", "unstable"]:
242 print(suite_name)
243 print("-" * len(suite_name))
244 print()
245 suite = get_suite(suite_name)
246 q = session.execute(
247 """
248SELECT DISTINCT b.package FROM binaries b, bin_associations ba
249 WHERE b.id = ba.bin AND ba.suite = :suiteid AND NOT EXISTS
250 (SELECT 1 FROM override o WHERE o.suite = :suiteid AND o.package = b.package)""",
251 {"suiteid": suite.suite_id},
252 )
254 for j in q.fetchall():
255 print(j[0])
257 q = session.execute(
258 """
259SELECT DISTINCT s.source FROM source s, src_associations sa
260 WHERE s.id = sa.source AND sa.suite = :suiteid AND NOT EXISTS
261 (SELECT 1 FROM override o WHERE o.suite = :suiteid and o.package = s.source)""",
262 {"suiteid": suite.suite_id},
263 )
264 for j in q.fetchall():
265 print(j[0])
268################################################################################
271def check_source_in_one_dir():
272 """
273 Ensure that the source files for any given package is all in one
274 directory so that 'apt-get source' works...
275 """
277 cnf = Config()
279 # Not the most enterprising method, but hey...
280 broken_count = 0
282 session = DBConn().session()
284 q = session.query(DBSource)
285 for s in q.all():
286 first_path = ""
287 first_filename = ""
288 broken = False
290 qf = session.query(PoolFile).join(DSCFile).filter_by(source_id=s.source_id)
291 for f in qf.all():
292 # 0: path
293 # 1: filename
294 filename = os.path.join(cnf["Dir::Root"], f.filename)
295 path = os.path.dirname(filename)
297 if first_path == "":
298 first_path = path
299 first_filename = filename
300 elif first_path != path:
301 symlink = path + "/" + os.path.basename(first_filename)
302 if not os.path.exists(symlink):
303 broken = True
304 print(
305 "WOAH, we got a live one here... %s [%s] {%s}"
306 % (filename, s.source_id, symlink)
307 )
308 if broken:
309 broken_count += 1
311 print(
312 "Found %d source packages where the source is not all in one directory."
313 % (broken_count)
314 )
317################################################################################
320def check_checksums():
321 """
322 Validate all files
323 """
324 print("Getting file information from database...")
325 q = DBConn().session().query(PoolFile)
327 print("Checking file checksums & sizes...")
328 for f in q:
329 filename = f.fullpath
331 try:
332 fi = open(filename)
333 except:
334 utils.warn("can't open '%s'." % (filename))
335 continue
337 size = os.stat(filename)[stat.ST_SIZE]
338 if size != f.filesize:
339 utils.warn(
340 "**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])."
341 % (filename, size, f.filesize)
342 )
344 md5sum = apt_pkg.md5sum(fi)
345 if md5sum != f.md5sum:
346 utils.warn(
347 "**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])."
348 % (filename, md5sum, f.md5sum)
349 )
351 fi.seek(0)
352 sha1sum = apt_pkg.sha1sum(fi)
353 if sha1sum != f.sha1sum:
354 utils.warn(
355 "**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])."
356 % (filename, sha1sum, f.sha1sum)
357 )
359 fi.seek(0)
360 sha256sum = apt_pkg.sha256sum(fi)
361 if sha256sum != f.sha256sum:
362 utils.warn(
363 "**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])."
364 % (filename, sha256sum, f.sha256sum)
365 )
366 fi.close()
368 print("Done.")
371################################################################################
372#
375def Ent(Kind, Name, Link, Mode, UID, GID, Size, MTime, Major, Minor):
376 global future_files
378 if MTime > current_time:
379 future_files[current_file] = MTime
380 print(
381 "%s: %s '%s','%s',%u,%u,%u,%u,%u,%u,%u"
382 % (
383 current_file,
384 Kind,
385 Name,
386 Link,
387 Mode,
388 UID,
389 GID,
390 Size,
391 MTime,
392 Major,
393 Minor,
394 )
395 )
398def check_timestamps():
399 """
400 Check all files for timestamps in the future; common from hardware
401 (e.g. alpha) which have far-future dates as their default dates.
402 """
404 global current_file
406 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".deb$"))
408 db_files.clear()
409 count = 0
411 for pf in q.all():
412 filename = os.path.abspath(os.path.join(pf.location.path, pf.filename))
413 if os.access(filename, os.R_OK):
414 with open(filename) as f:
415 current_file = filename
416 print("Processing %s." % (filename), file=sys.stderr)
417 apt_inst.debExtract(f, Ent, "control.tar.gz")
418 f.seek(0)
419 apt_inst.debExtract(f, Ent, "data.tar.gz")
420 count += 1
422 print("Checked %d files (out of %d)." % (count, len(db_files)))
425################################################################################
428def check_files_in_dsc():
429 """
430 Ensure each .dsc lists appropriate files in its Files field (according
431 to the format announced in its Format field).
432 """
433 count = 0
435 print("Building list of database files...")
436 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".dsc$"))
438 if q.count() > 0:
439 print("Checking %d files..." % q.count())
440 else:
441 print("No files to check.")
443 cnf = Config()
444 for pf in q.all():
445 filename = os.path.abspath(os.path.join(cnf["Dir::Root"], pf.filename))
447 try:
448 # NB: don't enforce .dsc syntax
449 dsc = utils.parse_changes(filename, dsc_file=True)
450 except:
451 utils.fubar("error parsing .dsc file '%s'." % (filename))
453 reasons = utils.check_dsc_files(filename, dsc)
454 for r in reasons:
455 utils.warn(r)
457 if len(reasons) > 0:
458 count += 1
460 if count:
461 utils.warn("Found %s invalid .dsc files." % (count))
464################################################################################
467def validate_sources(suite, component):
468 """
469 Ensure files mentioned in Sources exist
470 """
471 cnf = Config()
472 filename = "%s/dists/%s/%s/source/Sources" % (cnf["Dir::Root"], suite, component)
473 filename = utils.find_possibly_compressed_file(filename)
474 print("Processing %s..." % (filename))
475 with apt_pkg.TagFile(filename) as Sources:
476 while Sources.step():
477 source = Sources.section.find("Package")
478 directory = Sources.section.find("Directory")
479 files = Sources.section.find("Files")
480 for i in files.split("\n"):
481 (md5, size, name) = i.split()
482 filename = "%s/%s/%s" % (cnf["Dir::Root"], directory, name)
483 if not os.path.exists(filename):
484 if directory.find("potato") == -1:
485 print("W: %s missing." % (filename))
486 else:
487 pool_location = utils.poolify(source)
488 pool_filename = "%s/%s/%s" % (
489 cnf["Dir::Pool"],
490 pool_location,
491 name,
492 )
493 if not os.path.exists(pool_filename):
494 print("E: %s missing (%s)." % (filename, pool_filename))
495 else:
496 # Create symlink
497 pool_filename = os.path.normpath(pool_filename)
498 filename = os.path.normpath(filename)
499 src = utils.clean_symlink(
500 pool_filename, filename, cnf["Dir::Root"]
501 )
502 print("Symlinking: %s -> %s" % (filename, src))
505########################################
508def validate_packages(suite, component, architecture):
509 """
510 Ensure files mentioned in Packages exist
511 """
512 cnf = Config()
513 filename = "%s/dists/%s/%s/binary-%s/Packages" % (
514 cnf["Dir::Root"],
515 suite,
516 component,
517 architecture,
518 )
519 filename = utils.find_possibly_compressed_file(filename)
520 print("Processing %s..." % (filename))
521 with apt_pkg.TagFile(filename) as Packages:
522 while Packages.step():
523 filename = "%s/%s" % (cnf["Dir::Root"], Packages.section.find("Filename"))
524 if not os.path.exists(filename):
525 print("W: %s missing." % (filename))
528########################################
531def check_indices_files_exist():
532 """
533 Ensure files mentioned in Packages & Sources exist
534 """
535 for suite in ["stable", "testing", "unstable"]:
536 for component in get_component_names():
537 architectures = get_suite_architectures(suite)
538 for arch in [i.arch_string.lower() for i in architectures]:
539 if arch == "source":
540 validate_sources(suite, component)
541 elif arch == "all":
542 continue
543 else:
544 validate_packages(suite, component, arch)
547################################################################################
550def check_files_not_symlinks():
551 """
552 Check files in the database aren't symlinks
553 """
554 print("Building list of database files... ", end=" ")
555 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".dsc$"))
557 for pf in q.all():
558 filename = os.path.abspath(os.path.join(pf.location.path, pf.filename))
559 if os.access(filename, os.R_OK) == 0:
560 utils.warn("%s: doesn't exist." % (filename))
561 else:
562 if os.path.islink(filename):
563 utils.warn("%s: is a symlink." % (filename))
566################################################################################
569def chk_bd_process_dir(dirname, filenames):
570 for name in filenames:
571 if not name.endswith(".dsc"):
572 continue
573 filename = os.path.abspath(dirname + "/" + name)
574 dsc = utils.parse_changes(filename, dsc_file=True)
575 for field_name in ["build-depends", "build-depends-indep"]:
576 field = dsc.get(field_name)
577 if field:
578 try:
579 apt_pkg.parse_src_depends(field)
580 except:
581 print("E: [%s] %s: %s" % (filename, field_name, field))
582 pass
585################################################################################
588def check_build_depends():
589 """Validate build-dependencies of .dsc files in the archive"""
590 cnf = Config()
591 for dirpath, dirnames, filenames in os.walk(cnf["Dir::Root"]):
592 chk_bd_process_dir(dirpath, filenames)
595################################################################################
598_add_missing_source_checksums_query = R"""
599INSERT INTO source_metadata
600 (src_id, key_id, value)
601SELECT
602 s.id,
603 :checksum_key,
604 E'\n' ||
605 (SELECT STRING_AGG(' ' || tmp.checksum || ' ' || tmp.size || ' ' || tmp.basename, E'\n' ORDER BY tmp.basename)
606 FROM
607 (SELECT
608 CASE :checksum_type
609 WHEN 'Files' THEN f.md5sum
610 WHEN 'Checksums-Sha1' THEN f.sha1sum
611 WHEN 'Checksums-Sha256' THEN f.sha256sum
612 END AS checksum,
613 f.size,
614 SUBSTRING(f.filename FROM E'/([^/]*)\\Z') AS basename
615 FROM files f JOIN dsc_files ON f.id = dsc_files.file
616 WHERE dsc_files.source = s.id AND f.id != s.file
617 ) AS tmp
618 )
620 FROM
621 source s
622 WHERE NOT EXISTS (SELECT 1 FROM source_metadata md WHERE md.src_id=s.id AND md.key_id = :checksum_key);
623"""
626def add_missing_source_checksums():
627 """Add missing source checksums to source_metadata"""
628 session = DBConn().session()
629 for checksum in ["Files", "Checksums-Sha1", "Checksums-Sha256"]:
630 checksum_key = get_or_set_metadatakey(checksum, session).key_id
631 rows = session.execute(
632 _add_missing_source_checksums_query,
633 {"checksum_key": checksum_key, "checksum_type": checksum},
634 ).rowcount
635 if rows > 0:
636 print("Added {0} missing entries for {1}".format(rows, checksum))
637 session.commit()
640################################################################################
643def main():
644 global db_files, waste, excluded
646 cnf = Config()
648 Arguments = [("h", "help", "Check-Archive::Options::Help")]
649 for i in ["help"]:
650 key = "Check-Archive::Options::%s" % i
651 if key not in cnf: 651 ↛ 649line 651 didn't jump to line 649, because the condition on line 651 was never false
652 cnf[key] = ""
654 args = apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv)
656 Options = cnf.subtree("Check-Archive::Options")
657 if Options["Help"]: 657 ↛ 660line 657 didn't jump to line 660, because the condition on line 657 was never false
658 usage()
660 if len(args) < 1:
661 utils.warn("dak check-archive requires at least one argument")
662 usage(1)
663 elif len(args) > 1:
664 utils.warn("dak check-archive accepts only one argument")
665 usage(1)
666 mode = args[0].lower()
668 # Initialize DB
669 DBConn()
671 if mode == "checksums":
672 check_checksums()
673 elif mode == "files":
674 check_files()
675 elif mode == "dsc-syntax":
676 check_dscs()
677 elif mode == "missing-overrides":
678 check_override()
679 elif mode == "source-in-one-dir":
680 check_source_in_one_dir()
681 elif mode == "timestamps":
682 check_timestamps()
683 elif mode == "files-in-dsc":
684 check_files_in_dsc()
685 elif mode == "validate-indices":
686 check_indices_files_exist()
687 elif mode == "files-not-symlinks":
688 check_files_not_symlinks()
689 elif mode == "validate-builddeps":
690 check_build_depends()
691 elif mode == "add-missing-source-checksums":
692 add_missing_source_checksums()
693 else:
694 utils.warn("unknown mode '%s'" % (mode))
695 usage(1)
698################################################################################
701if __name__ == "__main__":
702 main()