Coverage for dak/check_archive.py: 14%

260 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2026-05-10 21:38 +0000

1#! /usr/bin/env python3 

2 

3"""Various different sanity checks 

4 

5@contact: Debian FTP Master <ftpmaster@debian.org> 

6@copyright: (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup <james@nocrew.org> 

7@license: GNU General Public License version 2 or later 

8""" 

9 

10# This program is free software; you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation; either version 2 of the License, or 

13# (at your option) any later version. 

14 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19 

20# You should have received a copy of the GNU General Public License 

21# along with this program; if not, write to the Free Software 

22# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 

23 

24################################################################################ 

25 

26# And, lo, a great and menacing voice rose from the depths, and with 

27# great wrath and vehemence it's voice boomed across the 

28# land... ``hehehehehehe... that *tickles*'' 

29# -- aj on IRC 

30 

31################################################################################ 

32 

33import errno 

34import os 

35import stat 

36import sys 

37import time 

38from collections.abc import Iterable 

39from typing import NoReturn, cast 

40 

41import apt_pkg 

42from sqlalchemy import sql 

43from sqlalchemy.engine import CursorResult 

44 

45from daklib import utils 

46from daklib.config import Config 

47from daklib.dak_exceptions import InvalidDscError 

48from daklib.dbconn import ( 

49 Archive, 

50 ArchiveFile, 

51 DBConn, 

52 DBSource, 

53 DSCFile, 

54 PoolFile, 

55 get_component_names, 

56 get_or_set_metadatakey, 

57 get_suite, 

58 get_suite_architectures, 

59) 

60 

61################################################################################ 

62 

63db_files: dict = {} #: Cache of filenames as known by the database 

64waste = 0.0 #: How many bytes are "wasted" by files not referenced in database 

65excluded: dict = {} #: List of files which are excluded from files check 

66current_file: str | None = None 

67future_files: dict[str, int] = {} 

68current_time = time.time() #: now() 

69 

70################################################################################ 

71 

72 

73def usage(exit_code=0) -> NoReturn: 

74 print( 

75 """Usage: dak check-archive MODE 

76Run various sanity checks of the archive and/or database. 

77 

78 -h, --help show this help and exit. 

79 

80The following MODEs are available: 

81 

82 checksums - validate the checksums stored in the database 

83 files - check files in the database against what's in the archive 

84 dsc-syntax - validate the syntax of .dsc files in the archive 

85 missing-overrides - check for missing overrides 

86 source-in-one-dir - ensure the source for each package is in one directory 

87 timestamps - check for future timestamps in .deb's 

88 files-in-dsc - ensure each .dsc references appropriate Files 

89 validate-indices - ensure files mentioned in Packages & Sources exist 

90 files-not-symlinks - check files in the database aren't symlinks 

91 validate-builddeps - validate build-dependencies of .dsc files in the archive 

92 add-missing-source-checksums - add missing checksums for source packages 

93""" 

94 ) 

95 sys.exit(exit_code) 

96 

97 

98################################################################################ 

99 

100 

101def check_files() -> None: 

102 """ 

103 Prepare the dictionary of existing filenames, then walk through the archive 

104 pool/ directory to compare it. 

105 """ 

106 session = DBConn().session() 

107 

108 query = """ 

109 SELECT archive.name, suite.suite_name, f.filename 

110 FROM binaries b 

111 JOIN bin_associations ba ON b.id = ba.bin 

112 JOIN suite ON ba.suite = suite.id 

113 JOIN archive ON suite.archive_id = archive.id 

114 JOIN files f ON b.file = f.id 

115 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af 

116 WHERE af.archive_id = suite.archive_id 

117 AND af.file_id = b.file) 

118 ORDER BY archive.name, suite.suite_name, f.filename 

119 """ 

120 for row in session.execute(sql.text(query)): 

121 print("MISSING-ARCHIVE-FILE {0} {1} {2}".format(*row)) 

122 

123 query = """ 

124 SELECT archive.name, suite.suite_name, f.filename 

125 FROM source s 

126 JOIN src_associations sa ON s.id = sa.source 

127 JOIN suite ON sa.suite = suite.id 

128 JOIN archive ON suite.archive_id = archive.id 

129 JOIN dsc_files df ON s.id = df.source 

130 JOIN files f ON df.file = f.id 

131 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af 

132 WHERE af.archive_id = suite.archive_id 

133 AND af.file_id = df.file) 

134 ORDER BY archive.name, suite.suite_name, f.filename 

135 """ 

136 for row in session.execute(sql.text(query)): 

137 print("MISSING-ARCHIVE-FILE {0} {1} {2}".format(*row)) 

138 

139 archive_files = ( 

140 session.query(ArchiveFile) 

141 .join(ArchiveFile.archive) 

142 .join(ArchiveFile.file) 

143 .order_by(Archive.archive_name, PoolFile.filename) 

144 ) 

145 

146 expected_files = set() 

147 for af in archive_files: 

148 path = af.path 

149 expected_files.add(af.path) 

150 if not os.path.exists(path): 

151 print( 

152 "MISSING-FILE {0} {1} {2}".format( 

153 af.archive.archive_name, af.file.filename, path 

154 ) 

155 ) 

156 

157 archives = session.query(Archive).order_by(Archive.archive_name) 

158 

159 for a in archives: 

160 top = os.path.join(a.path, "pool") 

161 for dirpath, dirnames, filenames in os.walk(top): 

162 for fn in filenames: 

163 path = os.path.join(dirpath, fn) 

164 if path in expected_files: 

165 continue 

166 print("UNEXPECTED-FILE {0} {1}".format(a.archive_name, path)) 

167 

168 

169################################################################################ 

170 

171 

172def check_dscs() -> None: 

173 """ 

174 Parse every .dsc file in the archive and check for it's validity. 

175 """ 

176 

177 count = 0 

178 

179 for src in ( 

180 DBConn().session().query(DBSource).order_by(DBSource.source, DBSource.version) 

181 ): 

182 f = src.poolfile.fullpath 

183 try: 

184 utils.parse_changes(f, signing_rules=1, dsc_file=True) 

185 except InvalidDscError: 

186 utils.warn("syntax error in .dsc file %s" % f) 

187 count += 1 

188 except UnicodeDecodeError: 

189 utils.warn("found invalid dsc file (%s), not properly utf-8 encoded" % f) 

190 count += 1 

191 except OSError as e: 

192 if e.errno == errno.ENOENT: 

193 utils.warn("missing dsc file (%s)" % f) 

194 count += 1 

195 else: 

196 raise 

197 except Exception as e: 

198 utils.warn("miscellaneous error parsing dsc file (%s): %s" % (f, str(e))) 

199 count += 1 

200 

201 if count: 

202 utils.warn("Found %s invalid .dsc files." % (count)) 

203 

204 

205################################################################################ 

206 

207 

208def check_override() -> None: 

209 """ 

210 Check for missing overrides in stable and unstable. 

211 """ 

212 session = DBConn().session() 

213 

214 for suite_name in ["stable", "unstable"]: 

215 print(suite_name) 

216 print("-" * len(suite_name)) 

217 print() 

218 suite = get_suite(suite_name) 

219 assert suite is not None 

220 q = session.execute( 

221 sql.text( 

222 """ 

223SELECT DISTINCT b.package FROM binaries b, bin_associations ba 

224 WHERE b.id = ba.bin AND ba.suite = :suiteid AND NOT EXISTS 

225 (SELECT 1 FROM override o WHERE o.suite = :suiteid AND o.package = b.package)""" 

226 ), 

227 {"suiteid": suite.suite_id}, 

228 ) 

229 

230 for j in q.fetchall(): 

231 print(j[0]) 

232 

233 q = session.execute( 

234 sql.text( 

235 """ 

236SELECT DISTINCT s.source FROM source s, src_associations sa 

237 WHERE s.id = sa.source AND sa.suite = :suiteid AND NOT EXISTS 

238 (SELECT 1 FROM override o WHERE o.suite = :suiteid and o.package = s.source)""" 

239 ), 

240 {"suiteid": suite.suite_id}, 

241 ) 

242 for j in q.fetchall(): 

243 print(j[0]) 

244 

245 

246################################################################################ 

247 

248 

249def check_source_in_one_dir() -> None: 

250 """ 

251 Ensure that the source files for any given package is all in one 

252 directory so that 'apt-get source' works... 

253 """ 

254 

255 cnf = Config() 

256 

257 # Not the most enterprising method, but hey... 

258 broken_count = 0 

259 

260 session = DBConn().session() 

261 

262 q = session.query(DBSource) 

263 for s in q.all(): 

264 first_path = "" 

265 first_filename = "" 

266 broken = False 

267 

268 qf = session.query(PoolFile).join(DSCFile).filter_by(source_id=s.source_id) 

269 for f in qf.all(): 

270 # 0: path 

271 # 1: filename 

272 filename = os.path.join(cnf["Dir::Root"], f.filename) 

273 path = os.path.dirname(filename) 

274 

275 if first_path == "": 

276 first_path = path 

277 first_filename = filename 

278 elif first_path != path: 

279 symlink = path + "/" + os.path.basename(first_filename) 

280 if not os.path.exists(symlink): 

281 broken = True 

282 print( 

283 "WOAH, we got a live one here... %s [%s] {%s}" 

284 % (filename, s.source_id, symlink) 

285 ) 

286 if broken: 

287 broken_count += 1 

288 

289 print( 

290 "Found %d source packages where the source is not all in one directory." 

291 % (broken_count) 

292 ) 

293 

294 

295################################################################################ 

296 

297 

298def check_checksums() -> None: 

299 """ 

300 Validate all files 

301 """ 

302 print("Getting file information from database...") 

303 q = DBConn().session().query(PoolFile) 

304 

305 print("Checking file checksums & sizes...") 

306 for f in q: 

307 filename = f.fullpath 

308 

309 try: 

310 fi = open(filename) 

311 except: 

312 utils.warn("can't open '%s'." % (filename)) 

313 continue 

314 

315 size = os.stat(filename)[stat.ST_SIZE] 

316 if size != f.filesize: 

317 utils.warn( 

318 "**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

319 % (filename, size, f.filesize) 

320 ) 

321 

322 md5sum = apt_pkg.md5sum(fi) 

323 if md5sum != f.md5sum: 

324 utils.warn( 

325 "**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

326 % (filename, md5sum, f.md5sum) 

327 ) 

328 

329 fi.seek(0) 

330 sha1sum = apt_pkg.sha1sum(fi) # type: ignore[attr-defined] 

331 if sha1sum != f.sha1sum: 

332 utils.warn( 

333 "**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

334 % (filename, sha1sum, f.sha1sum) 

335 ) 

336 

337 fi.seek(0) 

338 sha256sum = apt_pkg.sha256sum(fi) # type: ignore[attr-defined] 

339 if sha256sum != f.sha256sum: 

340 utils.warn( 

341 "**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

342 % (filename, sha256sum, f.sha256sum) 

343 ) 

344 fi.close() 

345 

346 print("Done.") 

347 

348 

349################################################################################ 

350# 

351 

352 

353def Ent(Kind, Name, Link, Mode, UID, GID, Size, MTime: int, Major, Minor) -> None: 

354 global future_files 

355 assert current_file is not None 

356 

357 if MTime > current_time: 

358 future_files[current_file] = MTime 

359 print( 

360 "%s: %s '%s','%s',%u,%u,%u,%u,%u,%u,%u" 

361 % ( 

362 current_file, 

363 Kind, 

364 Name, 

365 Link, 

366 Mode, 

367 UID, 

368 GID, 

369 Size, 

370 MTime, 

371 Major, 

372 Minor, 

373 ) 

374 ) 

375 

376 

377def check_timestamps() -> None: 

378 """ 

379 Check all files for timestamps in the future; common from hardware 

380 (e.g. alpha) which have far-future dates as their default dates. 

381 """ 

382 return 

383 

384 # global current_file 

385 # 

386 # q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".deb$")) 

387 # 

388 # db_files.clear() 

389 # count = 0 

390 # 

391 # for pf in q.all(): 

392 # filename = os.path.abspath(os.path.join(pf.location.path, pf.filename)) 

393 # if os.access(filename, os.R_OK): 

394 # with open(filename) as f: 

395 # current_file = filename 

396 # print("Processing %s." % (filename), file=sys.stderr) 

397 # apt_inst.debExtract(f, Ent, "control.tar.gz") 

398 # f.seek(0) 

399 # apt_inst.debExtract(f, Ent, "data.tar.gz") 

400 # count += 1 

401 # 

402 # print("Checked %d files (out of %d)." % (count, len(db_files))) 

403 

404 

405################################################################################ 

406 

407 

408def check_files_in_dsc() -> None: 

409 """ 

410 Ensure each .dsc lists appropriate files in its Files field (according 

411 to the format announced in its Format field). 

412 """ 

413 return 

414 

415 # count = 0 

416 # 

417 # print("Building list of database files...") 

418 # q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".dsc$")) 

419 # 

420 # if q.count() > 0: 

421 # print("Checking %d files..." % q.count()) 

422 # else: 

423 # print("No files to check.") 

424 # 

425 # cnf = Config() 

426 # for pf in q.all(): 

427 # filename = os.path.abspath(os.path.join(cnf["Dir::Root"], pf.filename)) 

428 # 

429 # try: 

430 # # NB: don't enforce .dsc syntax 

431 # dsc = utils.parse_changes(filename, dsc_file=True) 

432 # except: 

433 # utils.fubar("error parsing .dsc file '%s'." % (filename)) 

434 # 

435 # reasons = utils.check_dsc_files(filename, dsc) 

436 # for r in reasons: 

437 # utils.warn(r) 

438 # 

439 # if len(reasons) > 0: 

440 # count += 1 

441 # 

442 # if count: 

443 # utils.warn("Found %s invalid .dsc files." % (count)) 

444 

445 

446################################################################################ 

447 

448 

449def validate_sources(suite: str, component: str) -> None: 

450 """ 

451 Ensure files mentioned in Sources exist 

452 """ 

453 cnf = Config() 

454 filename = "%s/dists/%s/%s/source/Sources" % (cnf["Dir::Root"], suite, component) 

455 filename = utils.find_possibly_compressed_file(filename) 

456 print("Processing %s..." % (filename)) 

457 with apt_pkg.TagFile(filename) as Sources: 

458 while Sources.step(): # type: ignore[attr-defined] 

459 section: apt_pkg.TagSection = Sources.section # type: ignore[attr-defined] 

460 source = section.find("Package") 

461 directory = section.find("Directory") 

462 files = section.find("Files") 

463 for i in files.split("\n"): 

464 (md5, size, name) = i.split() 

465 filename = "%s/%s/%s" % (cnf["Dir::Root"], directory, name) 

466 if not os.path.exists(filename): 

467 if directory.find("potato") == -1: 

468 print("W: %s missing." % (filename)) 

469 else: 

470 pool_location = utils.poolify(source) 

471 pool_filename = "%s/%s/%s" % ( 

472 cnf["Dir::Pool"], 

473 pool_location, 

474 name, 

475 ) 

476 if not os.path.exists(pool_filename): 

477 print("E: %s missing (%s)." % (filename, pool_filename)) 

478 else: 

479 # Create symlink 

480 pool_filename = os.path.normpath(pool_filename) 

481 filename = os.path.normpath(filename) 

482 src = utils.clean_symlink( 

483 pool_filename, filename, cnf["Dir::Root"] 

484 ) 

485 print("Symlinking: %s -> %s" % (filename, src)) 

486 

487 

488######################################## 

489 

490 

491def validate_packages(suite: str, component: str, architecture: str) -> None: 

492 """ 

493 Ensure files mentioned in Packages exist 

494 """ 

495 cnf = Config() 

496 filename = "%s/dists/%s/%s/binary-%s/Packages" % ( 

497 cnf["Dir::Root"], 

498 suite, 

499 component, 

500 architecture, 

501 ) 

502 filename = utils.find_possibly_compressed_file(filename) 

503 print("Processing %s..." % (filename)) 

504 with apt_pkg.TagFile(filename) as Packages: 

505 while Packages.step(): # type: ignore[attr-defined] 

506 section: apt_pkg.TagSection = Packages.section # type: ignore[attr-defined] 

507 filename = "%s/%s" % (cnf["Dir::Root"], section.find("Filename")) 

508 if not os.path.exists(filename): 

509 print("W: %s missing." % (filename)) 

510 

511 

512######################################## 

513 

514 

515def check_indices_files_exist() -> None: 

516 """ 

517 Ensure files mentioned in Packages & Sources exist 

518 """ 

519 for suite in ["stable", "testing", "unstable"]: 

520 for component in get_component_names(): 

521 architectures = get_suite_architectures(suite) 

522 for arch in [i.arch_string.lower() for i in architectures]: 

523 if arch == "source": 

524 validate_sources(suite, component) 

525 elif arch == "all": 

526 continue 

527 else: 

528 validate_packages(suite, component, arch) 

529 

530 

531################################################################################ 

532 

533 

534def check_files_not_symlinks() -> None: 

535 """ 

536 Check files in the database aren't symlinks 

537 """ 

538 return 

539 

540 # print("Building list of database files... ", end=" ") 

541 # q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".dsc$")) 

542 # 

543 # for pf in q.all(): 

544 # filename = os.path.abspath(os.path.join(pf.location.path, pf.filename)) 

545 # if os.access(filename, os.R_OK) == 0: 

546 # utils.warn("%s: doesn't exist." % (filename)) 

547 # else: 

548 # if os.path.islink(filename): 

549 # utils.warn("%s: is a symlink." % (filename)) 

550 

551 

552################################################################################ 

553 

554 

555def chk_bd_process_dir(dirname: str, filenames: Iterable[str]) -> None: 

556 for name in filenames: 

557 if not name.endswith(".dsc"): 

558 continue 

559 filename = os.path.abspath(dirname + "/" + name) 

560 dsc = utils.parse_changes(filename, dsc_file=True) 

561 for field_name in ["build-depends", "build-depends-indep"]: 

562 field = dsc.get(field_name) 

563 if field: 

564 try: 

565 apt_pkg.parse_src_depends(field) 

566 except: 

567 print("E: [%s] %s: %s" % (filename, field_name, field)) 

568 

569 

570################################################################################ 

571 

572 

573def check_build_depends() -> None: 

574 """Validate build-dependencies of .dsc files in the archive""" 

575 cnf = Config() 

576 for dirpath, dirnames, filenames in os.walk(cnf["Dir::Root"]): 

577 chk_bd_process_dir(dirpath, filenames) 

578 

579 

580################################################################################ 

581 

582 

583_add_missing_source_checksums_query = R""" 

584INSERT INTO source_metadata 

585 (src_id, key_id, value) 

586SELECT 

587 s.id, 

588 :checksum_key, 

589 E'\n' || 

590 (SELECT STRING_AGG(' ' || tmp.checksum || ' ' || tmp.size || ' ' || tmp.basename, E'\n' ORDER BY tmp.basename) 

591 FROM 

592 (SELECT 

593 CASE :checksum_type 

594 WHEN 'Files' THEN f.md5sum 

595 WHEN 'Checksums-Sha1' THEN f.sha1sum 

596 WHEN 'Checksums-Sha256' THEN f.sha256sum 

597 END AS checksum, 

598 f.size, 

599 SUBSTRING(f.filename FROM E'/([^/]*)\\Z') AS basename 

600 FROM files f JOIN dsc_files ON f.id = dsc_files.file 

601 WHERE dsc_files.source = s.id AND f.id != s.file 

602 ) AS tmp 

603 ) 

604 

605 FROM 

606 source s 

607 WHERE NOT EXISTS (SELECT 1 FROM source_metadata md WHERE md.src_id=s.id AND md.key_id = :checksum_key); 

608""" 

609 

610 

611def add_missing_source_checksums() -> None: 

612 """Add missing source checksums to source_metadata""" 

613 session = DBConn().session() 

614 for checksum in ["Files", "Checksums-Sha1", "Checksums-Sha256"]: 

615 checksum_key = get_or_set_metadatakey(checksum, session).key_id 

616 rows = cast( 

617 CursorResult, 

618 session.execute( 

619 sql.text(_add_missing_source_checksums_query), 

620 {"checksum_key": checksum_key, "checksum_type": checksum}, 

621 ), 

622 ).rowcount 

623 if rows > 0: 

624 print("Added {0} missing entries for {1}".format(rows, checksum)) 

625 session.commit() 

626 

627 

628################################################################################ 

629 

630 

631def main() -> None: 

632 global db_files, waste, excluded 

633 

634 cnf = Config() 

635 

636 Arguments = [("h", "help", "Check-Archive::Options::Help")] 

637 for i in ["help"]: 

638 key = "Check-Archive::Options::%s" % i 

639 if key not in cnf: 639 ↛ 637line 639 didn't jump to line 637 because the condition on line 639 was always true

640 cnf[key] = "" 

641 

642 args = apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) # type: ignore[attr-defined] 

643 

644 Options = cnf.subtree("Check-Archive::Options") 

645 if Options["Help"]: 645 ↛ 648line 645 didn't jump to line 648 because the condition on line 645 was always true

646 usage() 

647 

648 if len(args) < 1: 

649 utils.warn("dak check-archive requires at least one argument") 

650 usage(1) 

651 elif len(args) > 1: 

652 utils.warn("dak check-archive accepts only one argument") 

653 usage(1) 

654 mode = args[0].lower() 

655 

656 # Initialize DB 

657 DBConn() 

658 

659 if mode == "checksums": 

660 check_checksums() 

661 elif mode == "files": 

662 check_files() 

663 elif mode == "dsc-syntax": 

664 check_dscs() 

665 elif mode == "missing-overrides": 

666 check_override() 

667 elif mode == "source-in-one-dir": 

668 check_source_in_one_dir() 

669 elif mode == "timestamps": 

670 check_timestamps() 

671 elif mode == "files-in-dsc": 

672 check_files_in_dsc() 

673 elif mode == "validate-indices": 

674 check_indices_files_exist() 

675 elif mode == "files-not-symlinks": 

676 check_files_not_symlinks() 

677 elif mode == "validate-builddeps": 

678 check_build_depends() 

679 elif mode == "add-missing-source-checksums": 

680 add_missing_source_checksums() 

681 else: 

682 utils.warn("unknown mode '%s'" % (mode)) 

683 usage(1) 

684 

685 

686################################################################################ 

687 

688 

689if __name__ == "__main__": 

690 main()