1#! /usr/bin/env python3 

2 

3""" Various different sanity checks 

4 

5@contact: Debian FTP Master <ftpmaster@debian.org> 

6@copyright: (C) 2000, 2001, 2002, 2003, 2004, 2006 James Troup <james@nocrew.org> 

7@license: GNU General Public License version 2 or later 

8""" 

9 

10# This program is free software; you can redistribute it and/or modify 

11# it under the terms of the GNU General Public License as published by 

12# the Free Software Foundation; either version 2 of the License, or 

13# (at your option) any later version. 

14 

15# This program is distributed in the hope that it will be useful, 

16# but WITHOUT ANY WARRANTY; without even the implied warranty of 

17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

18# GNU General Public License for more details. 

19 

20# You should have received a copy of the GNU General Public License 

21# along with this program; if not, write to the Free Software 

22# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 

23 

24################################################################################ 

25 

26# And, lo, a great and menacing voice rose from the depths, and with 

27# great wrath and vehemence it's voice boomed across the 

28# land... ``hehehehehehe... that *tickles*'' 

29# -- aj on IRC 

30 

31################################################################################ 

32 

33import errno 

34import os 

35import stat 

36import sys 

37import time 

38import apt_pkg 

39import apt_inst 

40 

41from daklib.dbconn import * 

42from daklib import utils 

43from daklib.config import Config 

44from daklib.dak_exceptions import InvalidDscError 

45 

46################################################################################ 

47 

48db_files = {} #: Cache of filenames as known by the database 

49waste = 0.0 #: How many bytes are "wasted" by files not referenced in database 

50excluded = {} #: List of files which are excluded from files check 

51current_file = None 

52future_files = {} 

53current_time = time.time() #: now() 

54 

55################################################################################ 

56 

57 

58def usage(exit_code=0): 

59 print("""Usage: dak check-archive MODE 

60Run various sanity checks of the archive and/or database. 

61 

62 -h, --help show this help and exit. 

63 

64The following MODEs are available: 

65 

66 checksums - validate the checksums stored in the database 

67 files - check files in the database against what's in the archive 

68 dsc-syntax - validate the syntax of .dsc files in the archive 

69 missing-overrides - check for missing overrides 

70 source-in-one-dir - ensure the source for each package is in one directory 

71 timestamps - check for future timestamps in .deb's 

72 files-in-dsc - ensure each .dsc references appropriate Files 

73 validate-indices - ensure files mentioned in Packages & Sources exist 

74 files-not-symlinks - check files in the database aren't symlinks 

75 validate-builddeps - validate build-dependencies of .dsc files in the archive 

76 add-missing-source-checksums - add missing checksums for source packages 

77""") 

78 sys.exit(exit_code) 

79 

80################################################################################ 

81 

82 

83def process_dir(unused, dirname: str, filenames: dict) -> None: 

84 """ 

85 Process a directory and output every files name which is not listed already 

86 in the `filenames` or global :data:`excluded` dictionaries. 

87 

88 :param dirname: the directory to look at 

89 :param filenames: Known filenames to ignore 

90 """ 

91 global waste, db_files, excluded 

92 

93 if dirname.find('/disks-') != -1 or dirname.find('upgrade-') != -1: 

94 return 

95 # hack; can't handle .changes files 

96 if dirname.find('proposed-updates') != -1: 

97 return 

98 for name in filenames: 

99 filename = os.path.abspath(os.path.join(dirname, name)) 

100 if os.path.isfile(filename) and not os.path.islink(filename) and filename not in db_files and filename not in excluded: 

101 waste += os.stat(filename)[stat.ST_SIZE] 

102 print("%s" % (filename)) 

103 

104################################################################################ 

105 

106 

107def check_files(): 

108 """ 

109 Prepare the dictionary of existing filenames, then walk through the archive 

110 pool/ directory to compare it. 

111 """ 

112 cnf = Config() 

113 session = DBConn().session() 

114 

115 query = """ 

116 SELECT archive.name, suite.suite_name, f.filename 

117 FROM binaries b 

118 JOIN bin_associations ba ON b.id = ba.bin 

119 JOIN suite ON ba.suite = suite.id 

120 JOIN archive ON suite.archive_id = archive.id 

121 JOIN files f ON b.file = f.id 

122 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af 

123 WHERE af.archive_id = suite.archive_id 

124 AND af.file_id = b.file) 

125 ORDER BY archive.name, suite.suite_name, f.filename 

126 """ 

127 for row in session.execute(query): 

128 print("MISSING-ARCHIVE-FILE {0} {1} {2}".vformat(row)) 

129 

130 query = """ 

131 SELECT archive.name, suite.suite_name, f.filename 

132 FROM source s 

133 JOIN src_associations sa ON s.id = sa.source 

134 JOIN suite ON sa.suite = suite.id 

135 JOIN archive ON suite.archive_id = archive.id 

136 JOIN dsc_files df ON s.id = df.source 

137 JOIN files f ON df.file = f.id 

138 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af 

139 WHERE af.archive_id = suite.archive_id 

140 AND af.file_id = df.file) 

141 ORDER BY archive.name, suite.suite_name, f.filename 

142 """ 

143 for row in session.execute(query): 

144 print("MISSING-ARCHIVE-FILE {0} {1} {2}".vformat(row)) 

145 

146 archive_files = session.query(ArchiveFile) \ 

147 .join(ArchiveFile.archive).join(ArchiveFile.file) \ 

148 .order_by(Archive.archive_name, PoolFile.filename) 

149 

150 expected_files = set() 

151 for af in archive_files: 

152 path = af.path 

153 expected_files.add(af.path) 

154 if not os.path.exists(path): 

155 print("MISSING-FILE {0} {1} {2}".format(af.archive.archive_name, af.file.filename, path)) 

156 

157 archives = session.query(Archive).order_by(Archive.archive_name) 

158 

159 for a in archives: 

160 top = os.path.join(a.path, 'pool') 

161 for dirpath, dirnames, filenames in os.walk(top): 

162 for fn in filenames: 

163 path = os.path.join(dirpath, fn) 

164 if path in expected_files: 

165 continue 

166 print("UNEXPECTED-FILE {0} {1}".format(a.archive_name, path)) 

167 

168################################################################################ 

169 

170 

171def check_dscs(): 

172 """ 

173 Parse every .dsc file in the archive and check for it's validity. 

174 """ 

175 

176 count = 0 

177 

178 for src in DBConn().session().query(DBSource).order_by(DBSource.source, DBSource.version): 

179 f = src.poolfile.fullpath 

180 try: 

181 utils.parse_changes(f, signing_rules=1, dsc_file=True) 

182 except InvalidDscError: 

183 utils.warn("syntax error in .dsc file %s" % f) 

184 count += 1 

185 except UnicodeDecodeError: 

186 utils.warn("found invalid dsc file (%s), not properly utf-8 encoded" % f) 

187 count += 1 

188 except OSError as e: 

189 if e.errno == errno.ENOENT: 

190 utils.warn("missing dsc file (%s)" % f) 

191 count += 1 

192 else: 

193 raise 

194 except Exception as e: 

195 utils.warn("miscellaneous error parsing dsc file (%s): %s" % (f, str(e))) 

196 count += 1 

197 

198 if count: 

199 utils.warn("Found %s invalid .dsc files." % (count)) 

200 

201################################################################################ 

202 

203 

204def check_override(): 

205 """ 

206 Check for missing overrides in stable and unstable. 

207 """ 

208 session = DBConn().session() 

209 

210 for suite_name in ["stable", "unstable"]: 

211 print(suite_name) 

212 print("-" * len(suite_name)) 

213 print() 

214 suite = get_suite(suite_name) 

215 q = session.execute(""" 

216SELECT DISTINCT b.package FROM binaries b, bin_associations ba 

217 WHERE b.id = ba.bin AND ba.suite = :suiteid AND NOT EXISTS 

218 (SELECT 1 FROM override o WHERE o.suite = :suiteid AND o.package = b.package)""", 

219 {'suiteid': suite.suite_id}) 

220 

221 for j in q.fetchall(): 

222 print(j[0]) 

223 

224 q = session.execute(""" 

225SELECT DISTINCT s.source FROM source s, src_associations sa 

226 WHERE s.id = sa.source AND sa.suite = :suiteid AND NOT EXISTS 

227 (SELECT 1 FROM override o WHERE o.suite = :suiteid and o.package = s.source)""", 

228 {'suiteid': suite.suite_id}) 

229 for j in q.fetchall(): 

230 print(j[0]) 

231 

232################################################################################ 

233 

234 

235def check_source_in_one_dir(): 

236 """ 

237 Ensure that the source files for any given package is all in one 

238 directory so that 'apt-get source' works... 

239 """ 

240 

241 # Not the most enterprising method, but hey... 

242 broken_count = 0 

243 

244 session = DBConn().session() 

245 

246 q = session.query(DBSource) 

247 for s in q.all(): 

248 first_path = "" 

249 first_filename = "" 

250 broken = False 

251 

252 qf = session.query(PoolFile).join(Location).join(DSCFile).filter_by(source_id=s.source_id) 

253 for f in qf.all(): 

254 # 0: path 

255 # 1: filename 

256 filename = os.path.join(f.location.path, f.filename) 

257 path = os.path.dirname(filename) 

258 

259 if first_path == "": 

260 first_path = path 

261 first_filename = filename 

262 elif first_path != path: 

263 symlink = path + '/' + os.path.basename(first_filename) 

264 if not os.path.exists(symlink): 

265 broken = True 

266 print("WOAH, we got a live one here... %s [%s] {%s}" % (filename, s.source_id, symlink)) 

267 if broken: 

268 broken_count += 1 

269 

270 print("Found %d source packages where the source is not all in one directory." % (broken_count)) 

271 

272################################################################################ 

273 

274 

275def check_checksums(): 

276 """ 

277 Validate all files 

278 """ 

279 print("Getting file information from database...") 

280 q = DBConn().session().query(PoolFile) 

281 

282 print("Checking file checksums & sizes...") 

283 for f in q: 

284 filename = f.fullpath 

285 

286 try: 

287 fi = open(filename) 

288 except: 

289 utils.warn("can't open '%s'." % (filename)) 

290 continue 

291 

292 size = os.stat(filename)[stat.ST_SIZE] 

293 if size != f.filesize: 

294 utils.warn("**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, size, f.filesize)) 

295 

296 md5sum = apt_pkg.md5sum(fi) 

297 if md5sum != f.md5sum: 

298 utils.warn("**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, md5sum, f.md5sum)) 

299 

300 fi.seek(0) 

301 sha1sum = apt_pkg.sha1sum(fi) 

302 if sha1sum != f.sha1sum: 

303 utils.warn("**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha1sum, f.sha1sum)) 

304 

305 fi.seek(0) 

306 sha256sum = apt_pkg.sha256sum(fi) 

307 if sha256sum != f.sha256sum: 

308 utils.warn("**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha256sum, f.sha256sum)) 

309 fi.close() 

310 

311 print("Done.") 

312 

313################################################################################ 

314# 

315 

316 

317def Ent(Kind, Name, Link, Mode, UID, GID, Size, MTime, Major, Minor): 

318 global future_files 

319 

320 if MTime > current_time: 

321 future_files[current_file] = MTime 

322 print("%s: %s '%s','%s',%u,%u,%u,%u,%u,%u,%u" % (current_file, Kind, Name, Link, Mode, UID, GID, Size, MTime, Major, Minor)) 

323 

324 

325def check_timestamps(): 

326 """ 

327 Check all files for timestamps in the future; common from hardware 

328 (e.g. alpha) which have far-future dates as their default dates. 

329 """ 

330 

331 global current_file 

332 

333 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like('.deb$')) 

334 

335 db_files.clear() 

336 count = 0 

337 

338 for pf in q.all(): 

339 filename = os.path.abspath(os.path.join(pf.location.path, pf.filename)) 

340 if os.access(filename, os.R_OK): 

341 with open(filename) as f: 

342 current_file = filename 

343 print("Processing %s." % (filename), file=sys.stderr) 

344 apt_inst.debExtract(f, Ent, "control.tar.gz") 

345 f.seek(0) 

346 apt_inst.debExtract(f, Ent, "data.tar.gz") 

347 count += 1 

348 

349 print("Checked %d files (out of %d)." % (count, len(db_files))) 

350 

351################################################################################ 

352 

353 

354def check_files_in_dsc(): 

355 """ 

356 Ensure each .dsc lists appropriate files in its Files field (according 

357 to the format announced in its Format field). 

358 """ 

359 count = 0 

360 

361 print("Building list of database files...") 

362 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like('.dsc$')) 

363 

364 if q.count() > 0: 

365 print("Checking %d files..." % len(ql)) 

366 else: 

367 print("No files to check.") 

368 

369 for pf in q.all(): 

370 filename = os.path.abspath(os.path.join(pf.location.path + pf.filename)) 

371 

372 try: 

373 # NB: don't enforce .dsc syntax 

374 dsc = utils.parse_changes(filename, dsc_file=True) 

375 except: 

376 utils.fubar("error parsing .dsc file '%s'." % (filename)) 

377 

378 reasons = utils.check_dsc_files(filename, dsc) 

379 for r in reasons: 

380 utils.warn(r) 

381 

382 if len(reasons) > 0: 

383 count += 1 

384 

385 if count: 

386 utils.warn("Found %s invalid .dsc files." % (count)) 

387 

388 

389################################################################################ 

390 

391def validate_sources(suite, component): 

392 """ 

393 Ensure files mentioned in Sources exist 

394 """ 

395 filename = "%s/dists/%s/%s/source/Sources" % (Cnf["Dir::Root"], suite, component) 

396 filename = utils.find_possibly_compressed_file(filename) 

397 print("Processing %s..." % (filename)) 

398 with apt_pkg.TagFile(filename) as Sources: 

399 while Sources.step(): 

400 source = Sources.section.find('Package') 

401 directory = Sources.section.find('Directory') 

402 files = Sources.section.find('Files') 

403 for i in files.split('\n'): 

404 (md5, size, name) = i.split() 

405 filename = "%s/%s/%s" % (Cnf["Dir::Root"], directory, name) 

406 if not os.path.exists(filename): 

407 if directory.find("potato") == -1: 

408 print("W: %s missing." % (filename)) 

409 else: 

410 pool_location = utils.poolify(source) 

411 pool_filename = "%s/%s/%s" % (Cnf["Dir::Pool"], pool_location, name) 

412 if not os.path.exists(pool_filename): 

413 print("E: %s missing (%s)." % (filename, pool_filename)) 

414 else: 

415 # Create symlink 

416 pool_filename = os.path.normpath(pool_filename) 

417 filename = os.path.normpath(filename) 

418 src = utils.clean_symlink(pool_filename, filename, Cnf["Dir::Root"]) 

419 print("Symlinking: %s -> %s" % (filename, src)) 

420 

421######################################## 

422 

423 

424def validate_packages(suite, component, architecture): 

425 """ 

426 Ensure files mentioned in Packages exist 

427 """ 

428 filename = "%s/dists/%s/%s/binary-%s/Packages" \ 

429 % (Cnf["Dir::Root"], suite, component, architecture) 

430 filename = utils.find_possibly_compressed_file(filename) 

431 print("Processing %s..." % (filename)) 

432 with apt_pkg.TagFile(filename) as Packages: 

433 while Packages.step(): 

434 filename = "%s/%s" % (Cnf["Dir::Root"], Packages.section.find('Filename')) 

435 if not os.path.exists(filename): 

436 print("W: %s missing." % (filename)) 

437 

438######################################## 

439 

440 

441def check_indices_files_exist(): 

442 """ 

443 Ensure files mentioned in Packages & Sources exist 

444 """ 

445 for suite in ["stable", "testing", "unstable"]: 

446 for component in get_component_names(): 

447 architectures = get_suite_architectures(suite) 

448 for arch in [i.arch_string.lower() for i in architectures]: 

449 if arch == "source": 

450 validate_sources(suite, component) 

451 elif arch == "all": 

452 continue 

453 else: 

454 validate_packages(suite, component, arch) 

455 

456################################################################################ 

457 

458 

459def check_files_not_symlinks(): 

460 """ 

461 Check files in the database aren't symlinks 

462 """ 

463 print("Building list of database files... ", end=' ') 

464 before = time.time() 

465 q = DBConn().session().query(PoolFile).filter(PoolFile.filename.like('.dsc$')) 

466 

467 for pf in q.all(): 

468 filename = os.path.abspath(os.path.join(pf.location.path, pf.filename)) 

469 if os.access(filename, os.R_OK) == 0: 

470 utils.warn("%s: doesn't exist." % (filename)) 

471 else: 

472 if os.path.islink(filename): 

473 utils.warn("%s: is a symlink." % (filename)) 

474 

475################################################################################ 

476 

477 

478def chk_bd_process_dir(dirname, filenames): 

479 for name in filenames: 

480 if not name.endswith(".dsc"): 

481 continue 

482 filename = os.path.abspath(dirname + '/' + name) 

483 dsc = utils.parse_changes(filename, dsc_file=True) 

484 for field_name in ["build-depends", "build-depends-indep"]: 

485 field = dsc.get(field_name) 

486 if field: 

487 try: 

488 apt_pkg.parse_src_depends(field) 

489 except: 

490 print("E: [%s] %s: %s" % (filename, field_name, field)) 

491 pass 

492 

493################################################################################ 

494 

495 

496def check_build_depends(): 

497 """ Validate build-dependencies of .dsc files in the archive """ 

498 cnf = Config() 

499 for dirpath, dirnames, filenames in os.walk(cnf["Dir::Root"]): 

500 chk_bd_process_dir(dirpath, filenames) 

501 

502################################################################################ 

503 

504 

505_add_missing_source_checksums_query = R""" 

506INSERT INTO source_metadata 

507 (src_id, key_id, value) 

508SELECT 

509 s.id, 

510 :checksum_key, 

511 E'\n' || 

512 (SELECT STRING_AGG(' ' || tmp.checksum || ' ' || tmp.size || ' ' || tmp.basename, E'\n' ORDER BY tmp.basename) 

513 FROM 

514 (SELECT 

515 CASE :checksum_type 

516 WHEN 'Files' THEN f.md5sum 

517 WHEN 'Checksums-Sha1' THEN f.sha1sum 

518 WHEN 'Checksums-Sha256' THEN f.sha256sum 

519 END AS checksum, 

520 f.size, 

521 SUBSTRING(f.filename FROM E'/([^/]*)\\Z') AS basename 

522 FROM files f JOIN dsc_files ON f.id = dsc_files.file 

523 WHERE dsc_files.source = s.id AND f.id != s.file 

524 ) AS tmp 

525 ) 

526 

527 FROM 

528 source s 

529 WHERE NOT EXISTS (SELECT 1 FROM source_metadata md WHERE md.src_id=s.id AND md.key_id = :checksum_key); 

530""" 

531 

532 

533def add_missing_source_checksums(): 

534 """ Add missing source checksums to source_metadata """ 

535 session = DBConn().session() 

536 for checksum in ['Files', 'Checksums-Sha1', 'Checksums-Sha256']: 

537 checksum_key = get_or_set_metadatakey(checksum, session).key_id 

538 rows = session.execute(_add_missing_source_checksums_query, 

539 {'checksum_key': checksum_key, 'checksum_type': checksum}).rowcount 

540 if rows > 0: 

541 print("Added {0} missing entries for {1}".format(rows, checksum)) 

542 session.commit() 

543 

544################################################################################ 

545 

546 

547def main(): 

548 global db_files, waste, excluded 

549 

550 cnf = Config() 

551 

552 Arguments = [('h', "help", "Check-Archive::Options::Help")] 

553 for i in ["help"]: 

554 key = "Check-Archive::Options::%s" % i 

555 if key not in cnf: 555 ↛ 553line 555 didn't jump to line 553, because the condition on line 555 was never false

556 cnf[key] = "" 

557 

558 args = apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) 

559 

560 Options = cnf.subtree("Check-Archive::Options") 

561 if Options["Help"]: 561 ↛ 564line 561 didn't jump to line 564, because the condition on line 561 was never false

562 usage() 

563 

564 if len(args) < 1: 

565 utils.warn("dak check-archive requires at least one argument") 

566 usage(1) 

567 elif len(args) > 1: 

568 utils.warn("dak check-archive accepts only one argument") 

569 usage(1) 

570 mode = args[0].lower() 

571 

572 # Initialize DB 

573 DBConn() 

574 

575 if mode == "checksums": 

576 check_checksums() 

577 elif mode == "files": 

578 check_files() 

579 elif mode == "dsc-syntax": 

580 check_dscs() 

581 elif mode == "missing-overrides": 

582 check_override() 

583 elif mode == "source-in-one-dir": 

584 check_source_in_one_dir() 

585 elif mode == "timestamps": 

586 check_timestamps() 

587 elif mode == "files-in-dsc": 

588 check_files_in_dsc() 

589 elif mode == "validate-indices": 

590 check_indices_files_exist() 

591 elif mode == "files-not-symlinks": 

592 check_files_not_symlinks() 

593 elif mode == "validate-builddeps": 

594 check_build_depends() 

595 elif mode == "add-missing-source-checksums": 

596 add_missing_source_checksums() 

597 else: 

598 utils.warn("unknown mode '%s'" % (mode)) 

599 usage(1) 

600 

601################################################################################ 

602 

603 

604if __name__ == '__main__': 

605 main()