Coverage for dak/check_archive.py: 13%
270 statements
« prev ^ index » next coverage.py v7.6.0, created at 2026-01-04 16:18 +0000
« prev ^ index » next coverage.py v7.6.0, created at 2026-01-04 16:18 +0000
1#! /usr/bin/env python3
3"""Various different sanity checks
5@contact: Debian FTP Master <ftpmaster@debian.org>
6@copyright: (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup <james@nocrew.org>
7@license: GNU General Public License version 2 or later
8"""
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24################################################################################
26# And, lo, a great and menacing voice rose from the depths, and with
27# great wrath and vehemence it's voice boomed across the
28# land... ``hehehehehehe... that *tickles*''
29# -- aj on IRC
31################################################################################
33import errno
34import os
35import stat
36import sys
37import time
38from collections.abc import Iterable
39from typing import NoReturn, cast
41import apt_pkg
42from sqlalchemy import sql
43from sqlalchemy.engine import CursorResult
45from daklib import utils
46from daklib.config import Config
47from daklib.dak_exceptions import InvalidDscError
48from daklib.dbconn import (
49 Archive,
50 ArchiveFile,
51 DBConn,
52 DBSource,
53 DSCFile,
54 PoolFile,
55 get_component_names,
56 get_or_set_metadatakey,
57 get_suite,
58 get_suite_architectures,
59)
61################################################################################
63db_files: dict = {} #: Cache of filenames as known by the database
64waste = 0.0 #: How many bytes are "wasted" by files not referenced in database
65excluded: dict = {} #: List of files which are excluded from files check
66current_file: str | None = None
67future_files: dict[str, int] = {}
68current_time = time.time() #: now()
70################################################################################
73def usage(exit_code=0) -> NoReturn:
74 print(
75 """Usage: dak check-archive MODE
76Run various sanity checks of the archive and/or database.
78 -h, --help show this help and exit.
80The following MODEs are available:
82 checksums - validate the checksums stored in the database
83 files - check files in the database against what's in the archive
84 dsc-syntax - validate the syntax of .dsc files in the archive
85 missing-overrides - check for missing overrides
86 source-in-one-dir - ensure the source for each package is in one directory
87 timestamps - check for future timestamps in .deb's
88 files-in-dsc - ensure each .dsc references appropriate Files
89 validate-indices - ensure files mentioned in Packages & Sources exist
90 files-not-symlinks - check files in the database aren't symlinks
91 validate-builddeps - validate build-dependencies of .dsc files in the archive
92 add-missing-source-checksums - add missing checksums for source packages
93"""
94 )
95 sys.exit(exit_code)
98################################################################################
101def process_dir(dirname: str, filenames: dict) -> None:
102 """
103 Process a directory and output every files name which is not listed already
104 in the `filenames` or global :data:`excluded` dictionaries.
106 :param dirname: the directory to look at
107 :param filenames: Known filenames to ignore
108 """
109 global waste, db_files, excluded
111 if dirname.find("/disks-") != -1 or dirname.find("upgrade-") != -1:
112 return
113 # hack; can't handle .changes files
114 if dirname.find("proposed-updates") != -1:
115 return
116 for name in filenames:
117 filename = os.path.abspath(os.path.join(dirname, name))
118 if (
119 os.path.isfile(filename)
120 and not os.path.islink(filename)
121 and filename not in db_files
122 and filename not in excluded
123 ):
124 waste += os.stat(filename)[stat.ST_SIZE]
125 print("%s" % (filename))
128################################################################################
131def check_files() -> None:
132 """
133 Prepare the dictionary of existing filenames, then walk through the archive
134 pool/ directory to compare it.
135 """
136 session = DBConn().session()
138 query = """
139 SELECT archive.name, suite.suite_name, f.filename
140 FROM binaries b
141 JOIN bin_associations ba ON b.id = ba.bin
142 JOIN suite ON ba.suite = suite.id
143 JOIN archive ON suite.archive_id = archive.id
144 JOIN files f ON b.file = f.id
145 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af
146 WHERE af.archive_id = suite.archive_id
147 AND af.file_id = b.file)
148 ORDER BY archive.name, suite.suite_name, f.filename
149 """
150 for row in session.execute(sql.text(query)):
151 print("MISSING-ARCHIVE-FILE {0} {1} {2}".format(*row))
153 query = """
154 SELECT archive.name, suite.suite_name, f.filename
155 FROM source s
156 JOIN src_associations sa ON s.id = sa.source
157 JOIN suite ON sa.suite = suite.id
158 JOIN archive ON suite.archive_id = archive.id
159 JOIN dsc_files df ON s.id = df.source
160 JOIN files f ON df.file = f.id
161 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af
162 WHERE af.archive_id = suite.archive_id
163 AND af.file_id = df.file)
164 ORDER BY archive.name, suite.suite_name, f.filename
165 """
166 for row in session.execute(sql.text(query)):
167 print("MISSING-ARCHIVE-FILE {0} {1} {2}".format(*row))
169 archive_files = (
170 session.query(ArchiveFile)
171 .join(ArchiveFile.archive)
172 .join(ArchiveFile.file)
173 .order_by(Archive.archive_name, PoolFile.filename)
174 )
176 expected_files = set()
177 for af in archive_files:
178 path = af.path
179 expected_files.add(af.path)
180 if not os.path.exists(path):
181 print(
182 "MISSING-FILE {0} {1} {2}".format(
183 af.archive.archive_name, af.file.filename, path
184 )
185 )
187 archives = session.query(Archive).order_by(Archive.archive_name)
189 for a in archives:
190 top = os.path.join(a.path, "pool")
191 for dirpath, dirnames, filenames in os.walk(top):
192 for fn in filenames:
193 path = os.path.join(dirpath, fn)
194 if path in expected_files:
195 continue
196 print("UNEXPECTED-FILE {0} {1}".format(a.archive_name, path))
199################################################################################
202def check_dscs() -> None:
203 """
204 Parse every .dsc file in the archive and check for it's validity.
205 """
207 count = 0
209 for src in (
210 DBConn().session().query(DBSource).order_by(DBSource.source, DBSource.version)
211 ):
212 f = src.poolfile.fullpath
213 try:
214 utils.parse_changes(f, signing_rules=1, dsc_file=True)
215 except InvalidDscError:
216 utils.warn("syntax error in .dsc file %s" % f)
217 count += 1
218 except UnicodeDecodeError:
219 utils.warn("found invalid dsc file (%s), not properly utf-8 encoded" % f)
220 count += 1
221 except OSError as e:
222 if e.errno == errno.ENOENT:
223 utils.warn("missing dsc file (%s)" % f)
224 count += 1
225 else:
226 raise
227 except Exception as e:
228 utils.warn("miscellaneous error parsing dsc file (%s): %s" % (f, str(e)))
229 count += 1
231 if count:
232 utils.warn("Found %s invalid .dsc files." % (count))
235################################################################################
238def check_override() -> None:
239 """
240 Check for missing overrides in stable and unstable.
241 """
242 session = DBConn().session()
244 for suite_name in ["stable", "unstable"]:
245 print(suite_name)
246 print("-" * len(suite_name))
247 print()
248 suite = get_suite(suite_name)
249 assert suite is not None
250 q = session.execute(
251 sql.text(
252 """
253SELECT DISTINCT b.package FROM binaries b, bin_associations ba
254 WHERE b.id = ba.bin AND ba.suite = :suiteid AND NOT EXISTS
255 (SELECT 1 FROM override o WHERE o.suite = :suiteid AND o.package = b.package)"""
256 ),
257 {"suiteid": suite.suite_id},
258 )
260 for j in q.fetchall():
261 print(j[0])
263 q = session.execute(
264 sql.text(
265 """
266SELECT DISTINCT s.source FROM source s, src_associations sa
267 WHERE s.id = sa.source AND sa.suite = :suiteid AND NOT EXISTS
268 (SELECT 1 FROM override o WHERE o.suite = :suiteid and o.package = s.source)"""
269 ),
270 {"suiteid": suite.suite_id},
271 )
272 for j in q.fetchall():
273 print(j[0])
276################################################################################
279def check_source_in_one_dir() -> None:
280 """
281 Ensure that the source files for any given package is all in one
282 directory so that 'apt-get source' works...
283 """
285 cnf = Config()
287 # Not the most enterprising method, but hey...
288 broken_count = 0
290 session = DBConn().session()
292 q = session.query(DBSource)
293 for s in q.all():
294 first_path = ""
295 first_filename = ""
296 broken = False
298 qf = session.query(PoolFile).join(DSCFile).filter_by(source_id=s.source_id)
299 for f in qf.all():
300 # 0: path
301 # 1: filename
302 filename = os.path.join(cnf["Dir::Root"], f.filename)
303 path = os.path.dirname(filename)
305 if first_path == "":
306 first_path = path
307 first_filename = filename
308 elif first_path != path:
309 symlink = path + "/" + os.path.basename(first_filename)
310 if not os.path.exists(symlink):
311 broken = True
312 print(
313 "WOAH, we got a live one here... %s [%s] {%s}"
314 % (filename, s.source_id, symlink)
315 )
316 if broken:
317 broken_count += 1
319 print(
320 "Found %d source packages where the source is not all in one directory."
321 % (broken_count)
322 )
325################################################################################
328def check_checksums() -> None:
329 """
330 Validate all files
331 """
332 print("Getting file information from database...")
333 q = DBConn().session().query(PoolFile)
335 print("Checking file checksums & sizes...")
336 for f in q:
337 filename = f.fullpath
339 try:
340 fi = open(filename)
341 except:
342 utils.warn("can't open '%s'." % (filename))
343 continue
345 size = os.stat(filename)[stat.ST_SIZE]
346 if size != f.filesize:
347 utils.warn(
348 "**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])."
349 % (filename, size, f.filesize)
350 )
352 md5sum = apt_pkg.md5sum(fi)
353 if md5sum != f.md5sum:
354 utils.warn(
355 "**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])."
356 % (filename, md5sum, f.md5sum)
357 )
359 fi.seek(0)
360 sha1sum = apt_pkg.sha1sum(fi) # type: ignore[attr-defined]
361 if sha1sum != f.sha1sum:
362 utils.warn(
363 "**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])."
364 % (filename, sha1sum, f.sha1sum)
365 )
367 fi.seek(0)
368 sha256sum = apt_pkg.sha256sum(fi) # type: ignore[attr-defined]
369 if sha256sum != f.sha256sum:
370 utils.warn(
371 "**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])."
372 % (filename, sha256sum, f.sha256sum)
373 )
374 fi.close()
376 print("Done.")
379################################################################################
380#
383def Ent(Kind, Name, Link, Mode, UID, GID, Size, MTime: int, Major, Minor) -> None:
384 global future_files
385 assert current_file is not None
387 if MTime > current_time:
388 future_files[current_file] = MTime
389 print(
390 "%s: %s '%s','%s',%u,%u,%u,%u,%u,%u,%u"
391 % (
392 current_file,
393 Kind,
394 Name,
395 Link,
396 Mode,
397 UID,
398 GID,
399 Size,
400 MTime,
401 Major,
402 Minor,
403 )
404 )
407def check_timestamps() -> None:
408 """
409 Check all files for timestamps in the future; common from hardware
410 (e.g. alpha) which have far-future dates as their default dates.
411 """
412 return
414 # global current_file
415 #
416 # q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".deb$"))
417 #
418 # db_files.clear()
419 # count = 0
420 #
421 # for pf in q.all():
422 # filename = os.path.abspath(os.path.join(pf.location.path, pf.filename))
423 # if os.access(filename, os.R_OK):
424 # with open(filename) as f:
425 # current_file = filename
426 # print("Processing %s." % (filename), file=sys.stderr)
427 # apt_inst.debExtract(f, Ent, "control.tar.gz")
428 # f.seek(0)
429 # apt_inst.debExtract(f, Ent, "data.tar.gz")
430 # count += 1
431 #
432 # print("Checked %d files (out of %d)." % (count, len(db_files)))
435################################################################################
438def check_files_in_dsc() -> None:
439 """
440 Ensure each .dsc lists appropriate files in its Files field (according
441 to the format announced in its Format field).
442 """
443 return
445 # count = 0
446 #
447 # print("Building list of database files...")
448 # q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".dsc$"))
449 #
450 # if q.count() > 0:
451 # print("Checking %d files..." % q.count())
452 # else:
453 # print("No files to check.")
454 #
455 # cnf = Config()
456 # for pf in q.all():
457 # filename = os.path.abspath(os.path.join(cnf["Dir::Root"], pf.filename))
458 #
459 # try:
460 # # NB: don't enforce .dsc syntax
461 # dsc = utils.parse_changes(filename, dsc_file=True)
462 # except:
463 # utils.fubar("error parsing .dsc file '%s'." % (filename))
464 #
465 # reasons = utils.check_dsc_files(filename, dsc)
466 # for r in reasons:
467 # utils.warn(r)
468 #
469 # if len(reasons) > 0:
470 # count += 1
471 #
472 # if count:
473 # utils.warn("Found %s invalid .dsc files." % (count))
476################################################################################
479def validate_sources(suite: str, component: str) -> None:
480 """
481 Ensure files mentioned in Sources exist
482 """
483 cnf = Config()
484 filename = "%s/dists/%s/%s/source/Sources" % (cnf["Dir::Root"], suite, component)
485 filename = utils.find_possibly_compressed_file(filename)
486 print("Processing %s..." % (filename))
487 with apt_pkg.TagFile(filename) as Sources:
488 while Sources.step(): # type: ignore[attr-defined]
489 section: apt_pkg.TagSection = Sources.section # type: ignore[attr-defined]
490 source = section.find("Package")
491 directory = section.find("Directory")
492 files = section.find("Files")
493 for i in files.split("\n"):
494 (md5, size, name) = i.split()
495 filename = "%s/%s/%s" % (cnf["Dir::Root"], directory, name)
496 if not os.path.exists(filename):
497 if directory.find("potato") == -1:
498 print("W: %s missing." % (filename))
499 else:
500 pool_location = utils.poolify(source)
501 pool_filename = "%s/%s/%s" % (
502 cnf["Dir::Pool"],
503 pool_location,
504 name,
505 )
506 if not os.path.exists(pool_filename):
507 print("E: %s missing (%s)." % (filename, pool_filename))
508 else:
509 # Create symlink
510 pool_filename = os.path.normpath(pool_filename)
511 filename = os.path.normpath(filename)
512 src = utils.clean_symlink(
513 pool_filename, filename, cnf["Dir::Root"]
514 )
515 print("Symlinking: %s -> %s" % (filename, src))
518########################################
521def validate_packages(suite: str, component: str, architecture: str) -> None:
522 """
523 Ensure files mentioned in Packages exist
524 """
525 cnf = Config()
526 filename = "%s/dists/%s/%s/binary-%s/Packages" % (
527 cnf["Dir::Root"],
528 suite,
529 component,
530 architecture,
531 )
532 filename = utils.find_possibly_compressed_file(filename)
533 print("Processing %s..." % (filename))
534 with apt_pkg.TagFile(filename) as Packages:
535 while Packages.step(): # type: ignore[attr-defined]
536 section: apt_pkg.TagSection = Packages.section # type: ignore[attr-defined]
537 filename = "%s/%s" % (cnf["Dir::Root"], section.find("Filename"))
538 if not os.path.exists(filename):
539 print("W: %s missing." % (filename))
542########################################
545def check_indices_files_exist() -> None:
546 """
547 Ensure files mentioned in Packages & Sources exist
548 """
549 for suite in ["stable", "testing", "unstable"]:
550 for component in get_component_names():
551 architectures = get_suite_architectures(suite)
552 for arch in [i.arch_string.lower() for i in architectures]:
553 if arch == "source":
554 validate_sources(suite, component)
555 elif arch == "all":
556 continue
557 else:
558 validate_packages(suite, component, arch)
561################################################################################
564def check_files_not_symlinks() -> None:
565 """
566 Check files in the database aren't symlinks
567 """
568 return
570 # print("Building list of database files... ", end=" ")
571 # q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".dsc$"))
572 #
573 # for pf in q.all():
574 # filename = os.path.abspath(os.path.join(pf.location.path, pf.filename))
575 # if os.access(filename, os.R_OK) == 0:
576 # utils.warn("%s: doesn't exist." % (filename))
577 # else:
578 # if os.path.islink(filename):
579 # utils.warn("%s: is a symlink." % (filename))
582################################################################################
585def chk_bd_process_dir(dirname: str, filenames: Iterable[str]) -> None:
586 for name in filenames:
587 if not name.endswith(".dsc"):
588 continue
589 filename = os.path.abspath(dirname + "/" + name)
590 dsc = utils.parse_changes(filename, dsc_file=True)
591 for field_name in ["build-depends", "build-depends-indep"]:
592 field = dsc.get(field_name)
593 if field:
594 try:
595 apt_pkg.parse_src_depends(field)
596 except:
597 print("E: [%s] %s: %s" % (filename, field_name, field))
600################################################################################
603def check_build_depends() -> None:
604 """Validate build-dependencies of .dsc files in the archive"""
605 cnf = Config()
606 for dirpath, dirnames, filenames in os.walk(cnf["Dir::Root"]):
607 chk_bd_process_dir(dirpath, filenames)
610################################################################################
613_add_missing_source_checksums_query = R"""
614INSERT INTO source_metadata
615 (src_id, key_id, value)
616SELECT
617 s.id,
618 :checksum_key,
619 E'\n' ||
620 (SELECT STRING_AGG(' ' || tmp.checksum || ' ' || tmp.size || ' ' || tmp.basename, E'\n' ORDER BY tmp.basename)
621 FROM
622 (SELECT
623 CASE :checksum_type
624 WHEN 'Files' THEN f.md5sum
625 WHEN 'Checksums-Sha1' THEN f.sha1sum
626 WHEN 'Checksums-Sha256' THEN f.sha256sum
627 END AS checksum,
628 f.size,
629 SUBSTRING(f.filename FROM E'/([^/]*)\\Z') AS basename
630 FROM files f JOIN dsc_files ON f.id = dsc_files.file
631 WHERE dsc_files.source = s.id AND f.id != s.file
632 ) AS tmp
633 )
635 FROM
636 source s
637 WHERE NOT EXISTS (SELECT 1 FROM source_metadata md WHERE md.src_id=s.id AND md.key_id = :checksum_key);
638"""
641def add_missing_source_checksums() -> None:
642 """Add missing source checksums to source_metadata"""
643 session = DBConn().session()
644 for checksum in ["Files", "Checksums-Sha1", "Checksums-Sha256"]:
645 checksum_key = get_or_set_metadatakey(checksum, session).key_id
646 rows = cast(
647 CursorResult,
648 session.execute(
649 sql.text(_add_missing_source_checksums_query),
650 {"checksum_key": checksum_key, "checksum_type": checksum},
651 ),
652 ).rowcount
653 if rows > 0:
654 print("Added {0} missing entries for {1}".format(rows, checksum))
655 session.commit()
658################################################################################
661def main() -> None:
662 global db_files, waste, excluded
664 cnf = Config()
666 Arguments = [("h", "help", "Check-Archive::Options::Help")]
667 for i in ["help"]:
668 key = "Check-Archive::Options::%s" % i
669 if key not in cnf: 669 ↛ 667line 669 didn't jump to line 667 because the condition on line 669 was always true
670 cnf[key] = ""
672 args = apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) # type: ignore[attr-defined]
674 Options = cnf.subtree("Check-Archive::Options")
675 if Options["Help"]: 675 ↛ 678line 675 didn't jump to line 678 because the condition on line 675 was always true
676 usage()
678 if len(args) < 1:
679 utils.warn("dak check-archive requires at least one argument")
680 usage(1)
681 elif len(args) > 1:
682 utils.warn("dak check-archive accepts only one argument")
683 usage(1)
684 mode = args[0].lower()
686 # Initialize DB
687 DBConn()
689 if mode == "checksums":
690 check_checksums()
691 elif mode == "files":
692 check_files()
693 elif mode == "dsc-syntax":
694 check_dscs()
695 elif mode == "missing-overrides":
696 check_override()
697 elif mode == "source-in-one-dir":
698 check_source_in_one_dir()
699 elif mode == "timestamps":
700 check_timestamps()
701 elif mode == "files-in-dsc":
702 check_files_in_dsc()
703 elif mode == "validate-indices":
704 check_indices_files_exist()
705 elif mode == "files-not-symlinks":
706 check_files_not_symlinks()
707 elif mode == "validate-builddeps":
708 check_build_depends()
709 elif mode == "add-missing-source-checksums":
710 add_missing_source_checksums()
711 else:
712 utils.warn("unknown mode '%s'" % (mode))
713 usage(1)
716################################################################################
719if __name__ == "__main__":
720 main()