Coverage for dak/check_archive.py: 13%

270 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2026-01-04 16:18 +0000

1#! /usr/bin/env python3 

2 

3"""Various different sanity checks 

4 

5@contact: Debian FTP Master <ftpmaster@debian.org> 

6@copyright: (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup <james@nocrew.org> 

7@license: GNU General Public License version 2 or later 

8""" 

9 

10# This program is free software; you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation; either version 2 of the License, or 

13# (at your option) any later version. 

14 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19 

20# You should have received a copy of the GNU General Public License 

21# along with this program; if not, write to the Free Software 

22# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 

23 

24################################################################################ 

25 

26# And, lo, a great and menacing voice rose from the depths, and with 

27# great wrath and vehemence it's voice boomed across the 

28# land... ``hehehehehehe... that *tickles*'' 

29# -- aj on IRC 

30 

31################################################################################ 

32 

33import errno 

34import os 

35import stat 

36import sys 

37import time 

38from collections.abc import Iterable 

39from typing import NoReturn, cast 

40 

41import apt_pkg 

42from sqlalchemy import sql 

43from sqlalchemy.engine import CursorResult 

44 

45from daklib import utils 

46from daklib.config import Config 

47from daklib.dak_exceptions import InvalidDscError 

48from daklib.dbconn import ( 

49 Archive, 

50 ArchiveFile, 

51 DBConn, 

52 DBSource, 

53 DSCFile, 

54 PoolFile, 

55 get_component_names, 

56 get_or_set_metadatakey, 

57 get_suite, 

58 get_suite_architectures, 

59) 

60 

61################################################################################ 

62 

63db_files: dict = {} #: Cache of filenames as known by the database 

64waste = 0.0 #: How many bytes are "wasted" by files not referenced in database 

65excluded: dict = {} #: List of files which are excluded from files check 

66current_file: str | None = None 

67future_files: dict[str, int] = {} 

68current_time = time.time() #: now() 

69 

70################################################################################ 

71 

72 

73def usage(exit_code=0) -> NoReturn: 

74 print( 

75 """Usage: dak check-archive MODE 

76Run various sanity checks of the archive and/or database. 

77 

78 -h, --help show this help and exit. 

79 

80The following MODEs are available: 

81 

82 checksums - validate the checksums stored in the database 

83 files - check files in the database against what's in the archive 

84 dsc-syntax - validate the syntax of .dsc files in the archive 

85 missing-overrides - check for missing overrides 

86 source-in-one-dir - ensure the source for each package is in one directory 

87 timestamps - check for future timestamps in .deb's 

88 files-in-dsc - ensure each .dsc references appropriate Files 

89 validate-indices - ensure files mentioned in Packages & Sources exist 

90 files-not-symlinks - check files in the database aren't symlinks 

91 validate-builddeps - validate build-dependencies of .dsc files in the archive 

92 add-missing-source-checksums - add missing checksums for source packages 

93""" 

94 ) 

95 sys.exit(exit_code) 

96 

97 

98################################################################################ 

99 

100 

101def process_dir(dirname: str, filenames: dict) -> None: 

102 """ 

103 Process a directory and output every files name which is not listed already 

104 in the `filenames` or global :data:`excluded` dictionaries. 

105 

106 :param dirname: the directory to look at 

107 :param filenames: Known filenames to ignore 

108 """ 

109 global waste, db_files, excluded 

110 

111 if dirname.find("/disks-") != -1 or dirname.find("upgrade-") != -1: 

112 return 

113 # hack; can't handle .changes files 

114 if dirname.find("proposed-updates") != -1: 

115 return 

116 for name in filenames: 

117 filename = os.path.abspath(os.path.join(dirname, name)) 

118 if ( 

119 os.path.isfile(filename) 

120 and not os.path.islink(filename) 

121 and filename not in db_files 

122 and filename not in excluded 

123 ): 

124 waste += os.stat(filename)[stat.ST_SIZE] 

125 print("%s" % (filename)) 

126 

127 

128################################################################################ 

129 

130 

131def check_files() -> None: 

132 """ 

133 Prepare the dictionary of existing filenames, then walk through the archive 

134 pool/ directory to compare it. 

135 """ 

136 session = DBConn().session() 

137 

138 query = """ 

139 SELECT archive.name, suite.suite_name, f.filename 

140 FROM binaries b 

141 JOIN bin_associations ba ON b.id = ba.bin 

142 JOIN suite ON ba.suite = suite.id 

143 JOIN archive ON suite.archive_id = archive.id 

144 JOIN files f ON b.file = f.id 

145 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af 

146 WHERE af.archive_id = suite.archive_id 

147 AND af.file_id = b.file) 

148 ORDER BY archive.name, suite.suite_name, f.filename 

149 """ 

150 for row in session.execute(sql.text(query)): 

151 print("MISSING-ARCHIVE-FILE {0} {1} {2}".format(*row)) 

152 

153 query = """ 

154 SELECT archive.name, suite.suite_name, f.filename 

155 FROM source s 

156 JOIN src_associations sa ON s.id = sa.source 

157 JOIN suite ON sa.suite = suite.id 

158 JOIN archive ON suite.archive_id = archive.id 

159 JOIN dsc_files df ON s.id = df.source 

160 JOIN files f ON df.file = f.id 

161 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af 

162 WHERE af.archive_id = suite.archive_id 

163 AND af.file_id = df.file) 

164 ORDER BY archive.name, suite.suite_name, f.filename 

165 """ 

166 for row in session.execute(sql.text(query)): 

167 print("MISSING-ARCHIVE-FILE {0} {1} {2}".format(*row)) 

168 

169 archive_files = ( 

170 session.query(ArchiveFile) 

171 .join(ArchiveFile.archive) 

172 .join(ArchiveFile.file) 

173 .order_by(Archive.archive_name, PoolFile.filename) 

174 ) 

175 

176 expected_files = set() 

177 for af in archive_files: 

178 path = af.path 

179 expected_files.add(af.path) 

180 if not os.path.exists(path): 

181 print( 

182 "MISSING-FILE {0} {1} {2}".format( 

183 af.archive.archive_name, af.file.filename, path 

184 ) 

185 ) 

186 

187 archives = session.query(Archive).order_by(Archive.archive_name) 

188 

189 for a in archives: 

190 top = os.path.join(a.path, "pool") 

191 for dirpath, dirnames, filenames in os.walk(top): 

192 for fn in filenames: 

193 path = os.path.join(dirpath, fn) 

194 if path in expected_files: 

195 continue 

196 print("UNEXPECTED-FILE {0} {1}".format(a.archive_name, path)) 

197 

198 

199################################################################################ 

200 

201 

202def check_dscs() -> None: 

203 """ 

204 Parse every .dsc file in the archive and check for it's validity. 

205 """ 

206 

207 count = 0 

208 

209 for src in ( 

210 DBConn().session().query(DBSource).order_by(DBSource.source, DBSource.version) 

211 ): 

212 f = src.poolfile.fullpath 

213 try: 

214 utils.parse_changes(f, signing_rules=1, dsc_file=True) 

215 except InvalidDscError: 

216 utils.warn("syntax error in .dsc file %s" % f) 

217 count += 1 

218 except UnicodeDecodeError: 

219 utils.warn("found invalid dsc file (%s), not properly utf-8 encoded" % f) 

220 count += 1 

221 except OSError as e: 

222 if e.errno == errno.ENOENT: 

223 utils.warn("missing dsc file (%s)" % f) 

224 count += 1 

225 else: 

226 raise 

227 except Exception as e: 

228 utils.warn("miscellaneous error parsing dsc file (%s): %s" % (f, str(e))) 

229 count += 1 

230 

231 if count: 

232 utils.warn("Found %s invalid .dsc files." % (count)) 

233 

234 

235################################################################################ 

236 

237 

238def check_override() -> None: 

239 """ 

240 Check for missing overrides in stable and unstable. 

241 """ 

242 session = DBConn().session() 

243 

244 for suite_name in ["stable", "unstable"]: 

245 print(suite_name) 

246 print("-" * len(suite_name)) 

247 print() 

248 suite = get_suite(suite_name) 

249 assert suite is not None 

250 q = session.execute( 

251 sql.text( 

252 """ 

253SELECT DISTINCT b.package FROM binaries b, bin_associations ba 

254 WHERE b.id = ba.bin AND ba.suite = :suiteid AND NOT EXISTS 

255 (SELECT 1 FROM override o WHERE o.suite = :suiteid AND o.package = b.package)""" 

256 ), 

257 {"suiteid": suite.suite_id}, 

258 ) 

259 

260 for j in q.fetchall(): 

261 print(j[0]) 

262 

263 q = session.execute( 

264 sql.text( 

265 """ 

266SELECT DISTINCT s.source FROM source s, src_associations sa 

267 WHERE s.id = sa.source AND sa.suite = :suiteid AND NOT EXISTS 

268 (SELECT 1 FROM override o WHERE o.suite = :suiteid and o.package = s.source)""" 

269 ), 

270 {"suiteid": suite.suite_id}, 

271 ) 

272 for j in q.fetchall(): 

273 print(j[0]) 

274 

275 

276################################################################################ 

277 

278 

279def check_source_in_one_dir() -> None: 

280 """ 

281 Ensure that the source files for any given package is all in one 

282 directory so that 'apt-get source' works... 

283 """ 

284 

285 cnf = Config() 

286 

287 # Not the most enterprising method, but hey... 

288 broken_count = 0 

289 

290 session = DBConn().session() 

291 

292 q = session.query(DBSource) 

293 for s in q.all(): 

294 first_path = "" 

295 first_filename = "" 

296 broken = False 

297 

298 qf = session.query(PoolFile).join(DSCFile).filter_by(source_id=s.source_id) 

299 for f in qf.all(): 

300 # 0: path 

301 # 1: filename 

302 filename = os.path.join(cnf["Dir::Root"], f.filename) 

303 path = os.path.dirname(filename) 

304 

305 if first_path == "": 

306 first_path = path 

307 first_filename = filename 

308 elif first_path != path: 

309 symlink = path + "/" + os.path.basename(first_filename) 

310 if not os.path.exists(symlink): 

311 broken = True 

312 print( 

313 "WOAH, we got a live one here... %s [%s] {%s}" 

314 % (filename, s.source_id, symlink) 

315 ) 

316 if broken: 

317 broken_count += 1 

318 

319 print( 

320 "Found %d source packages where the source is not all in one directory." 

321 % (broken_count) 

322 ) 

323 

324 

325################################################################################ 

326 

327 

328def check_checksums() -> None: 

329 """ 

330 Validate all files 

331 """ 

332 print("Getting file information from database...") 

333 q = DBConn().session().query(PoolFile) 

334 

335 print("Checking file checksums & sizes...") 

336 for f in q: 

337 filename = f.fullpath 

338 

339 try: 

340 fi = open(filename) 

341 except: 

342 utils.warn("can't open '%s'." % (filename)) 

343 continue 

344 

345 size = os.stat(filename)[stat.ST_SIZE] 

346 if size != f.filesize: 

347 utils.warn( 

348 "**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

349 % (filename, size, f.filesize) 

350 ) 

351 

352 md5sum = apt_pkg.md5sum(fi) 

353 if md5sum != f.md5sum: 

354 utils.warn( 

355 "**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

356 % (filename, md5sum, f.md5sum) 

357 ) 

358 

359 fi.seek(0) 

360 sha1sum = apt_pkg.sha1sum(fi) # type: ignore[attr-defined] 

361 if sha1sum != f.sha1sum: 

362 utils.warn( 

363 "**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

364 % (filename, sha1sum, f.sha1sum) 

365 ) 

366 

367 fi.seek(0) 

368 sha256sum = apt_pkg.sha256sum(fi) # type: ignore[attr-defined] 

369 if sha256sum != f.sha256sum: 

370 utils.warn( 

371 "**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

372 % (filename, sha256sum, f.sha256sum) 

373 ) 

374 fi.close() 

375 

376 print("Done.") 

377 

378 

379################################################################################ 

380# 

381 

382 

383def Ent(Kind, Name, Link, Mode, UID, GID, Size, MTime: int, Major, Minor) -> None: 

384 global future_files 

385 assert current_file is not None 

386 

387 if MTime > current_time: 

388 future_files[current_file] = MTime 

389 print( 

390 "%s: %s '%s','%s',%u,%u,%u,%u,%u,%u,%u" 

391 % ( 

392 current_file, 

393 Kind, 

394 Name, 

395 Link, 

396 Mode, 

397 UID, 

398 GID, 

399 Size, 

400 MTime, 

401 Major, 

402 Minor, 

403 ) 

404 ) 

405 

406 

407def check_timestamps() -> None: 

408 """ 

409 Check all files for timestamps in the future; common from hardware 

410 (e.g. alpha) which have far-future dates as their default dates. 

411 """ 

412 return 

413 

414 # global current_file 

415 # 

416 # q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".deb$")) 

417 # 

418 # db_files.clear() 

419 # count = 0 

420 # 

421 # for pf in q.all(): 

422 # filename = os.path.abspath(os.path.join(pf.location.path, pf.filename)) 

423 # if os.access(filename, os.R_OK): 

424 # with open(filename) as f: 

425 # current_file = filename 

426 # print("Processing %s." % (filename), file=sys.stderr) 

427 # apt_inst.debExtract(f, Ent, "control.tar.gz") 

428 # f.seek(0) 

429 # apt_inst.debExtract(f, Ent, "data.tar.gz") 

430 # count += 1 

431 # 

432 # print("Checked %d files (out of %d)." % (count, len(db_files))) 

433 

434 

435################################################################################ 

436 

437 

438def check_files_in_dsc() -> None: 

439 """ 

440 Ensure each .dsc lists appropriate files in its Files field (according 

441 to the format announced in its Format field). 

442 """ 

443 return 

444 

445 # count = 0 

446 # 

447 # print("Building list of database files...") 

448 # q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".dsc$")) 

449 # 

450 # if q.count() > 0: 

451 # print("Checking %d files..." % q.count()) 

452 # else: 

453 # print("No files to check.") 

454 # 

455 # cnf = Config() 

456 # for pf in q.all(): 

457 # filename = os.path.abspath(os.path.join(cnf["Dir::Root"], pf.filename)) 

458 # 

459 # try: 

460 # # NB: don't enforce .dsc syntax 

461 # dsc = utils.parse_changes(filename, dsc_file=True) 

462 # except: 

463 # utils.fubar("error parsing .dsc file '%s'." % (filename)) 

464 # 

465 # reasons = utils.check_dsc_files(filename, dsc) 

466 # for r in reasons: 

467 # utils.warn(r) 

468 # 

469 # if len(reasons) > 0: 

470 # count += 1 

471 # 

472 # if count: 

473 # utils.warn("Found %s invalid .dsc files." % (count)) 

474 

475 

476################################################################################ 

477 

478 

479def validate_sources(suite: str, component: str) -> None: 

480 """ 

481 Ensure files mentioned in Sources exist 

482 """ 

483 cnf = Config() 

484 filename = "%s/dists/%s/%s/source/Sources" % (cnf["Dir::Root"], suite, component) 

485 filename = utils.find_possibly_compressed_file(filename) 

486 print("Processing %s..." % (filename)) 

487 with apt_pkg.TagFile(filename) as Sources: 

488 while Sources.step(): # type: ignore[attr-defined] 

489 section: apt_pkg.TagSection = Sources.section # type: ignore[attr-defined] 

490 source = section.find("Package") 

491 directory = section.find("Directory") 

492 files = section.find("Files") 

493 for i in files.split("\n"): 

494 (md5, size, name) = i.split() 

495 filename = "%s/%s/%s" % (cnf["Dir::Root"], directory, name) 

496 if not os.path.exists(filename): 

497 if directory.find("potato") == -1: 

498 print("W: %s missing." % (filename)) 

499 else: 

500 pool_location = utils.poolify(source) 

501 pool_filename = "%s/%s/%s" % ( 

502 cnf["Dir::Pool"], 

503 pool_location, 

504 name, 

505 ) 

506 if not os.path.exists(pool_filename): 

507 print("E: %s missing (%s)." % (filename, pool_filename)) 

508 else: 

509 # Create symlink 

510 pool_filename = os.path.normpath(pool_filename) 

511 filename = os.path.normpath(filename) 

512 src = utils.clean_symlink( 

513 pool_filename, filename, cnf["Dir::Root"] 

514 ) 

515 print("Symlinking: %s -> %s" % (filename, src)) 

516 

517 

518######################################## 

519 

520 

521def validate_packages(suite: str, component: str, architecture: str) -> None: 

522 """ 

523 Ensure files mentioned in Packages exist 

524 """ 

525 cnf = Config() 

526 filename = "%s/dists/%s/%s/binary-%s/Packages" % ( 

527 cnf["Dir::Root"], 

528 suite, 

529 component, 

530 architecture, 

531 ) 

532 filename = utils.find_possibly_compressed_file(filename) 

533 print("Processing %s..." % (filename)) 

534 with apt_pkg.TagFile(filename) as Packages: 

535 while Packages.step(): # type: ignore[attr-defined] 

536 section: apt_pkg.TagSection = Packages.section # type: ignore[attr-defined] 

537 filename = "%s/%s" % (cnf["Dir::Root"], section.find("Filename")) 

538 if not os.path.exists(filename): 

539 print("W: %s missing." % (filename)) 

540 

541 

542######################################## 

543 

544 

545def check_indices_files_exist() -> None: 

546 """ 

547 Ensure files mentioned in Packages & Sources exist 

548 """ 

549 for suite in ["stable", "testing", "unstable"]: 

550 for component in get_component_names(): 

551 architectures = get_suite_architectures(suite) 

552 for arch in [i.arch_string.lower() for i in architectures]: 

553 if arch == "source": 

554 validate_sources(suite, component) 

555 elif arch == "all": 

556 continue 

557 else: 

558 validate_packages(suite, component, arch) 

559 

560 

561################################################################################ 

562 

563 

564def check_files_not_symlinks() -> None: 

565 """ 

566 Check files in the database aren't symlinks 

567 """ 

568 return 

569 

570 # print("Building list of database files... ", end=" ") 

571 # q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".dsc$")) 

572 # 

573 # for pf in q.all(): 

574 # filename = os.path.abspath(os.path.join(pf.location.path, pf.filename)) 

575 # if os.access(filename, os.R_OK) == 0: 

576 # utils.warn("%s: doesn't exist." % (filename)) 

577 # else: 

578 # if os.path.islink(filename): 

579 # utils.warn("%s: is a symlink." % (filename)) 

580 

581 

582################################################################################ 

583 

584 

585def chk_bd_process_dir(dirname: str, filenames: Iterable[str]) -> None: 

586 for name in filenames: 

587 if not name.endswith(".dsc"): 

588 continue 

589 filename = os.path.abspath(dirname + "/" + name) 

590 dsc = utils.parse_changes(filename, dsc_file=True) 

591 for field_name in ["build-depends", "build-depends-indep"]: 

592 field = dsc.get(field_name) 

593 if field: 

594 try: 

595 apt_pkg.parse_src_depends(field) 

596 except: 

597 print("E: [%s] %s: %s" % (filename, field_name, field)) 

598 

599 

600################################################################################ 

601 

602 

603def check_build_depends() -> None: 

604 """Validate build-dependencies of .dsc files in the archive""" 

605 cnf = Config() 

606 for dirpath, dirnames, filenames in os.walk(cnf["Dir::Root"]): 

607 chk_bd_process_dir(dirpath, filenames) 

608 

609 

610################################################################################ 

611 

612 

613_add_missing_source_checksums_query = R""" 

614INSERT INTO source_metadata 

615 (src_id, key_id, value) 

616SELECT 

617 s.id, 

618 :checksum_key, 

619 E'\n' || 

620 (SELECT STRING_AGG(' ' || tmp.checksum || ' ' || tmp.size || ' ' || tmp.basename, E'\n' ORDER BY tmp.basename) 

621 FROM 

622 (SELECT 

623 CASE :checksum_type 

624 WHEN 'Files' THEN f.md5sum 

625 WHEN 'Checksums-Sha1' THEN f.sha1sum 

626 WHEN 'Checksums-Sha256' THEN f.sha256sum 

627 END AS checksum, 

628 f.size, 

629 SUBSTRING(f.filename FROM E'/([^/]*)\\Z') AS basename 

630 FROM files f JOIN dsc_files ON f.id = dsc_files.file 

631 WHERE dsc_files.source = s.id AND f.id != s.file 

632 ) AS tmp 

633 ) 

634 

635 FROM 

636 source s 

637 WHERE NOT EXISTS (SELECT 1 FROM source_metadata md WHERE md.src_id=s.id AND md.key_id = :checksum_key); 

638""" 

639 

640 

641def add_missing_source_checksums() -> None: 

642 """Add missing source checksums to source_metadata""" 

643 session = DBConn().session() 

644 for checksum in ["Files", "Checksums-Sha1", "Checksums-Sha256"]: 

645 checksum_key = get_or_set_metadatakey(checksum, session).key_id 

646 rows = cast( 

647 CursorResult, 

648 session.execute( 

649 sql.text(_add_missing_source_checksums_query), 

650 {"checksum_key": checksum_key, "checksum_type": checksum}, 

651 ), 

652 ).rowcount 

653 if rows > 0: 

654 print("Added {0} missing entries for {1}".format(rows, checksum)) 

655 session.commit() 

656 

657 

658################################################################################ 

659 

660 

661def main() -> None: 

662 global db_files, waste, excluded 

663 

664 cnf = Config() 

665 

666 Arguments = [("h", "help", "Check-Archive::Options::Help")] 

667 for i in ["help"]: 

668 key = "Check-Archive::Options::%s" % i 

669 if key not in cnf: 669 ↛ 667line 669 didn't jump to line 667 because the condition on line 669 was always true

670 cnf[key] = "" 

671 

672 args = apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) # type: ignore[attr-defined] 

673 

674 Options = cnf.subtree("Check-Archive::Options") 

675 if Options["Help"]: 675 ↛ 678line 675 didn't jump to line 678 because the condition on line 675 was always true

676 usage() 

677 

678 if len(args) < 1: 

679 utils.warn("dak check-archive requires at least one argument") 

680 usage(1) 

681 elif len(args) > 1: 

682 utils.warn("dak check-archive accepts only one argument") 

683 usage(1) 

684 mode = args[0].lower() 

685 

686 # Initialize DB 

687 DBConn() 

688 

689 if mode == "checksums": 

690 check_checksums() 

691 elif mode == "files": 

692 check_files() 

693 elif mode == "dsc-syntax": 

694 check_dscs() 

695 elif mode == "missing-overrides": 

696 check_override() 

697 elif mode == "source-in-one-dir": 

698 check_source_in_one_dir() 

699 elif mode == "timestamps": 

700 check_timestamps() 

701 elif mode == "files-in-dsc": 

702 check_files_in_dsc() 

703 elif mode == "validate-indices": 

704 check_indices_files_exist() 

705 elif mode == "files-not-symlinks": 

706 check_files_not_symlinks() 

707 elif mode == "validate-builddeps": 

708 check_build_depends() 

709 elif mode == "add-missing-source-checksums": 

710 add_missing_source_checksums() 

711 else: 

712 utils.warn("unknown mode '%s'" % (mode)) 

713 usage(1) 

714 

715 

716################################################################################ 

717 

718 

719if __name__ == "__main__": 

720 main()