1#! /usr/bin/env python3 

2 

3"""Various different sanity checks 

4 

5@contact: Debian FTP Master <ftpmaster@debian.org> 

6@copyright: (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup <james@nocrew.org> 

7@license: GNU General Public License version 2 or later 

8""" 

9 

10# This program is free software; you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation; either version 2 of the License, or 

13# (at your option) any later version. 

14 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19 

20# You should have received a copy of the GNU General Public License 

21# along with this program; if not, write to the Free Software 

22# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 

23 

24################################################################################ 

25 

26# And, lo, a great and menacing voice rose from the depths, and with 

27# great wrath and vehemence it's voice boomed across the 

28# land... ``hehehehehehe... that *tickles*'' 

29# -- aj on IRC 

30 

31################################################################################ 

32 

33import errno 

34import os 

35import stat 

36import sys 

37import time 

38 

39import apt_inst 

40import apt_pkg 

41 

42from daklib import utils 

43from daklib.config import Config 

44from daklib.dak_exceptions import InvalidDscError 

45from daklib.dbconn import ( 

46 Archive, 

47 ArchiveFile, 

48 DBConn, 

49 DBSource, 

50 DSCFile, 

51 PoolFile, 

52 get_component_names, 

53 get_or_set_metadatakey, 

54 get_suite, 

55 get_suite_architectures, 

56) 

57 

58################################################################################ 

59 

60db_files = {} #: Cache of filenames as known by the database 

61waste = 0.0 #: How many bytes are "wasted" by files not referenced in database 

62excluded = {} #: List of files which are excluded from files check 

63current_file = None 

64future_files = {} 

65current_time = time.time() #: now() 

66 

67################################################################################ 

68 

69 

70def usage(exit_code=0): 

71 print( 

72 """Usage: dak check-archive MODE 

73Run various sanity checks of the archive and/or database. 

74 

75 -h, --help show this help and exit. 

76 

77The following MODEs are available: 

78 

79 checksums - validate the checksums stored in the database 

80 files - check files in the database against what's in the archive 

81 dsc-syntax - validate the syntax of .dsc files in the archive 

82 missing-overrides - check for missing overrides 

83 source-in-one-dir - ensure the source for each package is in one directory 

84 timestamps - check for future timestamps in .deb's 

85 files-in-dsc - ensure each .dsc references appropriate Files 

86 validate-indices - ensure files mentioned in Packages & Sources exist 

87 files-not-symlinks - check files in the database aren't symlinks 

88 validate-builddeps - validate build-dependencies of .dsc files in the archive 

89 add-missing-source-checksums - add missing checksums for source packages 

90""" 

91 ) 

92 sys.exit(exit_code) 

93 

94 

95################################################################################ 

96 

97 

98def process_dir(unused, dirname: str, filenames: dict) -> None: 

99 """ 

100 Process a directory and output every files name which is not listed already 

101 in the `filenames` or global :data:`excluded` dictionaries. 

102 

103 :param dirname: the directory to look at 

104 :param filenames: Known filenames to ignore 

105 """ 

106 global waste, db_files, excluded 

107 

108 if dirname.find("/disks-") != -1 or dirname.find("upgrade-") != -1: 

109 return 

110 # hack; can't handle .changes files 

111 if dirname.find("proposed-updates") != -1: 

112 return 

113 for name in filenames: 

114 filename = os.path.abspath(os.path.join(dirname, name)) 

115 if ( 

116 os.path.isfile(filename) 

117 and not os.path.islink(filename) 

118 and filename not in db_files 

119 and filename not in excluded 

120 ): 

121 waste += os.stat(filename)[stat.ST_SIZE] 

122 print("%s" % (filename)) 

123 

124 

125################################################################################ 

126 

127 

128def check_files(): 

129 """ 

130 Prepare the dictionary of existing filenames, then walk through the archive 

131 pool/ directory to compare it. 

132 """ 

133 session = DBConn().session() 

134 

135 query = """ 

136 SELECT archive.name, suite.suite_name, f.filename 

137 FROM binaries b 

138 JOIN bin_associations ba ON b.id = ba.bin 

139 JOIN suite ON ba.suite = suite.id 

140 JOIN archive ON suite.archive_id = archive.id 

141 JOIN files f ON b.file = f.id 

142 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af 

143 WHERE af.archive_id = suite.archive_id 

144 AND af.file_id = b.file) 

145 ORDER BY archive.name, suite.suite_name, f.filename 

146 """ 

147 for row in session.execute(query): 

148 print("MISSING-ARCHIVE-FILE {0} {1} {2}".vformat(row)) 

149 

150 query = """ 

151 SELECT archive.name, suite.suite_name, f.filename 

152 FROM source s 

153 JOIN src_associations sa ON s.id = sa.source 

154 JOIN suite ON sa.suite = suite.id 

155 JOIN archive ON suite.archive_id = archive.id 

156 JOIN dsc_files df ON s.id = df.source 

157 JOIN files f ON df.file = f.id 

158 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af 

159 WHERE af.archive_id = suite.archive_id 

160 AND af.file_id = df.file) 

161 ORDER BY archive.name, suite.suite_name, f.filename 

162 """ 

163 for row in session.execute(query): 

164 print("MISSING-ARCHIVE-FILE {0} {1} {2}".vformat(row)) 

165 

166 archive_files = ( 

167 session.query(ArchiveFile) 

168 .join(ArchiveFile.archive) 

169 .join(ArchiveFile.file) 

170 .order_by(Archive.archive_name, PoolFile.filename) 

171 ) 

172 

173 expected_files = set() 

174 for af in archive_files: 

175 path = af.path 

176 expected_files.add(af.path) 

177 if not os.path.exists(path): 

178 print( 

179 "MISSING-FILE {0} {1} {2}".format( 

180 af.archive.archive_name, af.file.filename, path 

181 ) 

182 ) 

183 

184 archives = session.query(Archive).order_by(Archive.archive_name) 

185 

186 for a in archives: 

187 top = os.path.join(a.path, "pool") 

188 for dirpath, dirnames, filenames in os.walk(top): 

189 for fn in filenames: 

190 path = os.path.join(dirpath, fn) 

191 if path in expected_files: 

192 continue 

193 print("UNEXPECTED-FILE {0} {1}".format(a.archive_name, path)) 

194 

195 

196################################################################################ 

197 

198 

199def check_dscs(): 

200 """ 

201 Parse every .dsc file in the archive and check for it's validity. 

202 """ 

203 

204 count = 0 

205 

206 for src in ( 

207 DBConn().session().query(DBSource).order_by(DBSource.source, DBSource.version) 

208 ): 

209 f = src.poolfile.fullpath 

210 try: 

211 utils.parse_changes(f, signing_rules=1, dsc_file=True) 

212 except InvalidDscError: 

213 utils.warn("syntax error in .dsc file %s" % f) 

214 count += 1 

215 except UnicodeDecodeError: 

216 utils.warn("found invalid dsc file (%s), not properly utf-8 encoded" % f) 

217 count += 1 

218 except OSError as e: 

219 if e.errno == errno.ENOENT: 

220 utils.warn("missing dsc file (%s)" % f) 

221 count += 1 

222 else: 

223 raise 

224 except Exception as e: 

225 utils.warn("miscellaneous error parsing dsc file (%s): %s" % (f, str(e))) 

226 count += 1 

227 

228 if count: 

229 utils.warn("Found %s invalid .dsc files." % (count)) 

230 

231 

232################################################################################ 

233 

234 

235def check_override(): 

236 """ 

237 Check for missing overrides in stable and unstable. 

238 """ 

239 session = DBConn().session() 

240 

241 for suite_name in ["stable", "unstable"]: 

242 print(suite_name) 

243 print("-" * len(suite_name)) 

244 print() 

245 suite = get_suite(suite_name) 

246 q = session.execute( 

247 """ 

248SELECT DISTINCT b.package FROM binaries b, bin_associations ba 

249 WHERE b.id = ba.bin AND ba.suite = :suiteid AND NOT EXISTS 

250 (SELECT 1 FROM override o WHERE o.suite = :suiteid AND o.package = b.package)""", 

251 {"suiteid": suite.suite_id}, 

252 ) 

253 

254 for j in q.fetchall(): 

255 print(j[0]) 

256 

257 q = session.execute( 

258 """ 

259SELECT DISTINCT s.source FROM source s, src_associations sa 

260 WHERE s.id = sa.source AND sa.suite = :suiteid AND NOT EXISTS 

261 (SELECT 1 FROM override o WHERE o.suite = :suiteid and o.package = s.source)""", 

262 {"suiteid": suite.suite_id}, 

263 ) 

264 for j in q.fetchall(): 

265 print(j[0]) 

266 

267 

268################################################################################ 

269 

270 

271def check_source_in_one_dir(): 

272 """ 

273 Ensure that the source files for any given package is all in one 

274 directory so that 'apt-get source' works... 

275 """ 

276 

277 cnf = Config() 

278 

279 # Not the most enterprising method, but hey... 

280 broken_count = 0 

281 

282 session = DBConn().session() 

283 

284 q = session.query(DBSource) 

285 for s in q.all(): 

286 first_path = "" 

287 first_filename = "" 

288 broken = False 

289 

290 qf = session.query(PoolFile).join(DSCFile).filter_by(source_id=s.source_id) 

291 for f in qf.all(): 

292 # 0: path 

293 # 1: filename 

294 filename = os.path.join(cnf["Dir::Root"], f.filename) 

295 path = os.path.dirname(filename) 

296 

297 if first_path == "": 

298 first_path = path 

299 first_filename = filename 

300 elif first_path != path: 

301 symlink = path + "/" + os.path.basename(first_filename) 

302 if not os.path.exists(symlink): 

303 broken = True 

304 print( 

305 "WOAH, we got a live one here... %s [%s] {%s}" 

306 % (filename, s.source_id, symlink) 

307 ) 

308 if broken: 

309 broken_count += 1 

310 

311 print( 

312 "Found %d source packages where the source is not all in one directory." 

313 % (broken_count) 

314 ) 

315 

316 

317################################################################################ 

318 

319 

320def check_checksums(): 

321 """ 

322 Validate all files 

323 """ 

324 print("Getting file information from database...") 

325 q = DBConn().session().query(PoolFile) 

326 

327 print("Checking file checksums & sizes...") 

328 for f in q: 

329 filename = f.fullpath 

330 

331 try: 

332 fi = open(filename) 

333 except: 

334 utils.warn("can't open '%s'." % (filename)) 

335 continue 

336 

337 size = os.stat(filename)[stat.ST_SIZE] 

338 if size != f.filesize: 

339 utils.warn( 

340 "**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

341 % (filename, size, f.filesize) 

342 ) 

343 

344 md5sum = apt_pkg.md5sum(fi) 

345 if md5sum != f.md5sum: 

346 utils.warn( 

347 "**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

348 % (filename, md5sum, f.md5sum) 

349 ) 

350 

351 fi.seek(0) 

352 sha1sum = apt_pkg.sha1sum(fi) 

353 if sha1sum != f.sha1sum: 

354 utils.warn( 

355 "**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

356 % (filename, sha1sum, f.sha1sum) 

357 ) 

358 

359 fi.seek(0) 

360 sha256sum = apt_pkg.sha256sum(fi) 

361 if sha256sum != f.sha256sum: 

362 utils.warn( 

363 "**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." 

364 % (filename, sha256sum, f.sha256sum) 

365 ) 

366 fi.close() 

367 

368 print("Done.") 

369 

370 

371################################################################################ 

372# 

373 

374 

375def Ent(Kind, Name, Link, Mode, UID, GID, Size, MTime, Major, Minor): 

376 global future_files 

377 

378 if MTime > current_time: 

379 future_files[current_file] = MTime 

380 print( 

381 "%s: %s '%s','%s',%u,%u,%u,%u,%u,%u,%u" 

382 % ( 

383 current_file, 

384 Kind, 

385 Name, 

386 Link, 

387 Mode, 

388 UID, 

389 GID, 

390 Size, 

391 MTime, 

392 Major, 

393 Minor, 

394 ) 

395 ) 

396 

397 

398def check_timestamps(): 

399 """ 

400 Check all files for timestamps in the future; common from hardware 

401 (e.g. alpha) which have far-future dates as their default dates. 

402 """ 

403 

404 global current_file 

405 

406 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".deb$")) 

407 

408 db_files.clear() 

409 count = 0 

410 

411 for pf in q.all(): 

412 filename = os.path.abspath(os.path.join(pf.location.path, pf.filename)) 

413 if os.access(filename, os.R_OK): 

414 with open(filename) as f: 

415 current_file = filename 

416 print("Processing %s." % (filename), file=sys.stderr) 

417 apt_inst.debExtract(f, Ent, "control.tar.gz") 

418 f.seek(0) 

419 apt_inst.debExtract(f, Ent, "data.tar.gz") 

420 count += 1 

421 

422 print("Checked %d files (out of %d)." % (count, len(db_files))) 

423 

424 

425################################################################################ 

426 

427 

428def check_files_in_dsc(): 

429 """ 

430 Ensure each .dsc lists appropriate files in its Files field (according 

431 to the format announced in its Format field). 

432 """ 

433 count = 0 

434 

435 print("Building list of database files...") 

436 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".dsc$")) 

437 

438 if q.count() > 0: 

439 print("Checking %d files..." % q.count()) 

440 else: 

441 print("No files to check.") 

442 

443 cnf = Config() 

444 for pf in q.all(): 

445 filename = os.path.abspath(os.path.join(cnf["Dir::Root"], pf.filename)) 

446 

447 try: 

448 # NB: don't enforce .dsc syntax 

449 dsc = utils.parse_changes(filename, dsc_file=True) 

450 except: 

451 utils.fubar("error parsing .dsc file '%s'." % (filename)) 

452 

453 reasons = utils.check_dsc_files(filename, dsc) 

454 for r in reasons: 

455 utils.warn(r) 

456 

457 if len(reasons) > 0: 

458 count += 1 

459 

460 if count: 

461 utils.warn("Found %s invalid .dsc files." % (count)) 

462 

463 

464################################################################################ 

465 

466 

467def validate_sources(suite, component): 

468 """ 

469 Ensure files mentioned in Sources exist 

470 """ 

471 cnf = Config() 

472 filename = "%s/dists/%s/%s/source/Sources" % (cnf["Dir::Root"], suite, component) 

473 filename = utils.find_possibly_compressed_file(filename) 

474 print("Processing %s..." % (filename)) 

475 with apt_pkg.TagFile(filename) as Sources: 

476 while Sources.step(): 

477 source = Sources.section.find("Package") 

478 directory = Sources.section.find("Directory") 

479 files = Sources.section.find("Files") 

480 for i in files.split("\n"): 

481 (md5, size, name) = i.split() 

482 filename = "%s/%s/%s" % (cnf["Dir::Root"], directory, name) 

483 if not os.path.exists(filename): 

484 if directory.find("potato") == -1: 

485 print("W: %s missing." % (filename)) 

486 else: 

487 pool_location = utils.poolify(source) 

488 pool_filename = "%s/%s/%s" % ( 

489 cnf["Dir::Pool"], 

490 pool_location, 

491 name, 

492 ) 

493 if not os.path.exists(pool_filename): 

494 print("E: %s missing (%s)." % (filename, pool_filename)) 

495 else: 

496 # Create symlink 

497 pool_filename = os.path.normpath(pool_filename) 

498 filename = os.path.normpath(filename) 

499 src = utils.clean_symlink( 

500 pool_filename, filename, cnf["Dir::Root"] 

501 ) 

502 print("Symlinking: %s -> %s" % (filename, src)) 

503 

504 

505######################################## 

506 

507 

508def validate_packages(suite, component, architecture): 

509 """ 

510 Ensure files mentioned in Packages exist 

511 """ 

512 cnf = Config() 

513 filename = "%s/dists/%s/%s/binary-%s/Packages" % ( 

514 cnf["Dir::Root"], 

515 suite, 

516 component, 

517 architecture, 

518 ) 

519 filename = utils.find_possibly_compressed_file(filename) 

520 print("Processing %s..." % (filename)) 

521 with apt_pkg.TagFile(filename) as Packages: 

522 while Packages.step(): 

523 filename = "%s/%s" % (cnf["Dir::Root"], Packages.section.find("Filename")) 

524 if not os.path.exists(filename): 

525 print("W: %s missing." % (filename)) 

526 

527 

528######################################## 

529 

530 

531def check_indices_files_exist(): 

532 """ 

533 Ensure files mentioned in Packages & Sources exist 

534 """ 

535 for suite in ["stable", "testing", "unstable"]: 

536 for component in get_component_names(): 

537 architectures = get_suite_architectures(suite) 

538 for arch in [i.arch_string.lower() for i in architectures]: 

539 if arch == "source": 

540 validate_sources(suite, component) 

541 elif arch == "all": 

542 continue 

543 else: 

544 validate_packages(suite, component, arch) 

545 

546 

547################################################################################ 

548 

549 

550def check_files_not_symlinks(): 

551 """ 

552 Check files in the database aren't symlinks 

553 """ 

554 print("Building list of database files... ", end=" ") 

555 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like(".dsc$")) 

556 

557 for pf in q.all(): 

558 filename = os.path.abspath(os.path.join(pf.location.path, pf.filename)) 

559 if os.access(filename, os.R_OK) == 0: 

560 utils.warn("%s: doesn't exist." % (filename)) 

561 else: 

562 if os.path.islink(filename): 

563 utils.warn("%s: is a symlink." % (filename)) 

564 

565 

566################################################################################ 

567 

568 

569def chk_bd_process_dir(dirname, filenames): 

570 for name in filenames: 

571 if not name.endswith(".dsc"): 

572 continue 

573 filename = os.path.abspath(dirname + "/" + name) 

574 dsc = utils.parse_changes(filename, dsc_file=True) 

575 for field_name in ["build-depends", "build-depends-indep"]: 

576 field = dsc.get(field_name) 

577 if field: 

578 try: 

579 apt_pkg.parse_src_depends(field) 

580 except: 

581 print("E: [%s] %s: %s" % (filename, field_name, field)) 

582 pass 

583 

584 

585################################################################################ 

586 

587 

588def check_build_depends(): 

589 """Validate build-dependencies of .dsc files in the archive""" 

590 cnf = Config() 

591 for dirpath, dirnames, filenames in os.walk(cnf["Dir::Root"]): 

592 chk_bd_process_dir(dirpath, filenames) 

593 

594 

595################################################################################ 

596 

597 

598_add_missing_source_checksums_query = R""" 

599INSERT INTO source_metadata 

600 (src_id, key_id, value) 

601SELECT 

602 s.id, 

603 :checksum_key, 

604 E'\n' || 

605 (SELECT STRING_AGG(' ' || tmp.checksum || ' ' || tmp.size || ' ' || tmp.basename, E'\n' ORDER BY tmp.basename) 

606 FROM 

607 (SELECT 

608 CASE :checksum_type 

609 WHEN 'Files' THEN f.md5sum 

610 WHEN 'Checksums-Sha1' THEN f.sha1sum 

611 WHEN 'Checksums-Sha256' THEN f.sha256sum 

612 END AS checksum, 

613 f.size, 

614 SUBSTRING(f.filename FROM E'/([^/]*)\\Z') AS basename 

615 FROM files f JOIN dsc_files ON f.id = dsc_files.file 

616 WHERE dsc_files.source = s.id AND f.id != s.file 

617 ) AS tmp 

618 ) 

619 

620 FROM 

621 source s 

622 WHERE NOT EXISTS (SELECT 1 FROM source_metadata md WHERE md.src_id=s.id AND md.key_id = :checksum_key); 

623""" 

624 

625 

626def add_missing_source_checksums(): 

627 """Add missing source checksums to source_metadata""" 

628 session = DBConn().session() 

629 for checksum in ["Files", "Checksums-Sha1", "Checksums-Sha256"]: 

630 checksum_key = get_or_set_metadatakey(checksum, session).key_id 

631 rows = session.execute( 

632 _add_missing_source_checksums_query, 

633 {"checksum_key": checksum_key, "checksum_type": checksum}, 

634 ).rowcount 

635 if rows > 0: 

636 print("Added {0} missing entries for {1}".format(rows, checksum)) 

637 session.commit() 

638 

639 

640################################################################################ 

641 

642 

643def main(): 

644 global db_files, waste, excluded 

645 

646 cnf = Config() 

647 

648 Arguments = [("h", "help", "Check-Archive::Options::Help")] 

649 for i in ["help"]: 

650 key = "Check-Archive::Options::%s" % i 

651 if key not in cnf: 651 ↛ 649line 651 didn't jump to line 649, because the condition on line 651 was never false

652 cnf[key] = "" 

653 

654 args = apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) 

655 

656 Options = cnf.subtree("Check-Archive::Options") 

657 if Options["Help"]: 657 ↛ 660line 657 didn't jump to line 660, because the condition on line 657 was never false

658 usage() 

659 

660 if len(args) < 1: 

661 utils.warn("dak check-archive requires at least one argument") 

662 usage(1) 

663 elif len(args) > 1: 

664 utils.warn("dak check-archive accepts only one argument") 

665 usage(1) 

666 mode = args[0].lower() 

667 

668 # Initialize DB 

669 DBConn() 

670 

671 if mode == "checksums": 

672 check_checksums() 

673 elif mode == "files": 

674 check_files() 

675 elif mode == "dsc-syntax": 

676 check_dscs() 

677 elif mode == "missing-overrides": 

678 check_override() 

679 elif mode == "source-in-one-dir": 

680 check_source_in_one_dir() 

681 elif mode == "timestamps": 

682 check_timestamps() 

683 elif mode == "files-in-dsc": 

684 check_files_in_dsc() 

685 elif mode == "validate-indices": 

686 check_indices_files_exist() 

687 elif mode == "files-not-symlinks": 

688 check_files_not_symlinks() 

689 elif mode == "validate-builddeps": 

690 check_build_depends() 

691 elif mode == "add-missing-source-checksums": 

692 add_missing_source_checksums() 

693 else: 

694 utils.warn("unknown mode '%s'" % (mode)) 

695 usage(1) 

696 

697 

698################################################################################ 

699 

700 

701if __name__ == "__main__": 

702 main()