1#! /usr/bin/env python3 

2 

3""" Cleans up unassociated binary and source packages 

4 

5@contact: Debian FTPMaster <ftpmaster@debian.org> 

6@copyright: 2000, 2001, 2002, 2003, 2006 James Troup <james@nocrew.org> 

7@copyright: 2009 Mark Hymers <mhy@debian.org> 

8@copyright: 2010 Joerg Jaspert <joerg@debian.org> 

9@license: GNU General Public License version 2 or later 

10""" 

11 

12# This program is free software; you can redistribute it and/or modify 

13# it under the terms of the GNU General Public License as published by 

14# the Free Software Foundation; either version 2 of the License, or 

15# (at your option) any later version. 

16 

17# This program is distributed in the hope that it will be useful, 

18# but WITHOUT ANY WARRANTY; without even the implied warranty of 

19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

20# GNU General Public License for more details. 

21 

22# You should have received a copy of the GNU General Public License 

23# along with this program; if not, write to the Free Software 

24# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 

25 

26################################################################################ 

27 

28# 07:05|<elmo> well.. *shrug*.. no, probably not.. but to fix it, 

29# | we're going to have to implement reference counting 

30# | through dependencies.. do we really want to go down 

31# | that road? 

32# 

33# 07:05|<Culus> elmo: Augh! <brain jumps out of skull> 

34 

35################################################################################ 

36 

37import errno 

38import os 

39import sqlalchemy.sql as sql 

40import stat 

41import sys 

42import apt_pkg 

43from datetime import datetime 

44 

45from daklib.config import Config 

46from daklib.dbconn import * 

47from daklib import utils 

48from daklib import daklog 

49 

50################################################################################ 

51 

52Options = None 

53Logger = None 

54 

55################################################################################ 

56 

57 

58def usage(exit_code=0): 

59 print("""Usage: dak clean-suites [OPTIONS] 

60Clean old packages from suites. 

61 

62 -n, --no-action don't do anything 

63 -h, --help show this help and exit 

64 -m, --maximum maximum number of files to remove""") 

65 sys.exit(exit_code) 

66 

67################################################################################ 

68 

69 

70def check_binaries(now_date, session): 

71 Logger.log(["Checking for orphaned binary packages..."]) 

72 

73 # Get the list of binary packages not in a suite and mark them for 

74 # deletion. 

75 # Check for any binaries which are marked for eventual deletion 

76 # but are now used again. 

77 

78 query = """ 

79 WITH usage AS ( 

80 SELECT 

81 af.archive_id AS archive_id, 

82 af.file_id AS file_id, 

83 af.component_id AS component_id, 

84 BOOL_OR(EXISTS (SELECT 1 FROM bin_associations ba 

85 JOIN suite s ON ba.suite = s.id 

86 WHERE ba.bin = b.id 

87 AND s.archive_id = af.archive_id)) 

88 AS in_use 

89 FROM files_archive_map af 

90 JOIN binaries b ON af.file_id = b.file 

91 GROUP BY af.archive_id, af.file_id, af.component_id 

92 ) 

93 

94 UPDATE files_archive_map af 

95 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END 

96 FROM usage, files f, archive 

97 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id 

98 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) 

99 AND af.file_id = f.id 

100 AND af.archive_id = archive.id 

101 RETURNING archive.name, f.filename, af.last_used IS NULL""" 

102 

103 res = session.execute(query, {'last_used': now_date}) 

104 for i in res: 

105 op = "set lastused" 

106 if i[2]: 106 ↛ 107line 106 didn't jump to line 107, because the condition on line 106 was never true

107 op = "unset lastused" 

108 Logger.log([op, i[0], i[1]]) 

109 

110######################################## 

111 

112 

113def check_sources(now_date, session): 

114 Logger.log(["Checking for orphaned source packages..."]) 

115 

116 # Get the list of source packages not in a suite and not used by 

117 # any binaries. 

118 

119 # Check for any sources which are marked for deletion but which 

120 # are now used again. 

121 

122 # TODO: the UPDATE part is the same as in check_binaries. Merge? 

123 

124 query = """ 

125 WITH usage AS ( 

126 SELECT 

127 af.archive_id AS archive_id, 

128 af.file_id AS file_id, 

129 af.component_id AS component_id, 

130 BOOL_OR(EXISTS (SELECT 1 FROM src_associations sa 

131 JOIN suite s ON sa.suite = s.id 

132 WHERE sa.source = df.source 

133 AND s.archive_id = af.archive_id) 

134 OR EXISTS (SELECT 1 FROM files_archive_map af_bin 

135 JOIN binaries b ON af_bin.file_id = b.file 

136 WHERE b.source = df.source 

137 AND af_bin.archive_id = af.archive_id 

138 AND (af_bin.last_used IS NULL OR af_bin.last_used > ad.delete_date)) 

139 OR EXISTS (SELECT 1 FROM extra_src_references esr 

140 JOIN bin_associations ba ON esr.bin_id = ba.bin 

141 JOIN binaries b ON ba.bin = b.id 

142 JOIN suite s ON ba.suite = s.id 

143 WHERE esr.src_id = df.source 

144 AND s.archive_id = af.archive_id)) 

145 AS in_use 

146 FROM files_archive_map af 

147 JOIN dsc_files df ON af.file_id = df.file 

148 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 

149 GROUP BY af.archive_id, af.file_id, af.component_id 

150 ) 

151 

152 UPDATE files_archive_map af 

153 SET last_used = CASE WHEN usage.in_use THEN NULL ELSE :last_used END 

154 FROM usage, files f, archive 

155 WHERE af.archive_id = usage.archive_id AND af.file_id = usage.file_id AND af.component_id = usage.component_id 

156 AND ((af.last_used IS NULL AND NOT usage.in_use) OR (af.last_used IS NOT NULL AND usage.in_use)) 

157 AND af.file_id = f.id 

158 AND af.archive_id = archive.id 

159 

160 RETURNING archive.name, f.filename, af.last_used IS NULL 

161 """ 

162 

163 res = session.execute(query, {'last_used': now_date}) 

164 for i in res: 

165 op = "set lastused" 

166 if i[2]: 166 ↛ 167line 166 didn't jump to line 167, because the condition on line 166 was never true

167 op = "unset lastused" 

168 Logger.log([op, i[0], i[1]]) 

169 

170######################################## 

171 

172 

173def check_files(now_date, session): 

174 # FIXME: this is evil; nothing should ever be in this state. if 

175 # they are, it's a bug. 

176 

177 # However, we've discovered it happens sometimes so we print a huge warning 

178 # and then mark the file for deletion. This probably masks a bug somwhere 

179 # else but is better than collecting cruft forever 

180 

181 Logger.log(["Checking for unused files..."]) 

182 q = session.execute(""" 

183 UPDATE files_archive_map af 

184 SET last_used = :last_used 

185 FROM files f, archive 

186 WHERE af.file_id = f.id 

187 AND af.archive_id = archive.id 

188 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.file = af.file_id) 

189 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) 

190 AND af.last_used IS NULL 

191 RETURNING archive.name, f.filename""", {'last_used': now_date}) 

192 

193 for x in q: 193 ↛ 194line 193 didn't jump to line 194, because the loop on line 193 never started

194 utils.warn("orphaned file: {0}".format(x)) 

195 Logger.log(["set lastused", x[0], x[1], "ORPHANED FILE"]) 

196 

197 if not Options["No-Action"]: 197 ↛ exitline 197 didn't return from function 'check_files', because the condition on line 197 was never false

198 session.commit() 

199 

200 

201def clean_binaries(now_date, session): 

202 # We do this here so that the binaries we remove will have their 

203 # source also removed (if possible). 

204 

205 # XXX: why doesn't this remove the files here as well? I don't think it 

206 # buys anything keeping this separate 

207 

208 Logger.log(["Deleting from binaries table... "]) 

209 q = session.execute(""" 

210 DELETE FROM binaries b 

211 USING files f 

212 WHERE f.id = b.file 

213 AND NOT EXISTS (SELECT 1 FROM files_archive_map af 

214 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 

215 WHERE af.file_id = b.file 

216 AND (af.last_used IS NULL OR af.last_used > ad.delete_date)) 

217 RETURNING f.filename 

218 """) 

219 for b in q: 

220 Logger.log(["delete binary", b[0]]) 

221 

222######################################## 

223 

224 

225def clean(now_date, archives, max_delete, session): 

226 cnf = Config() 

227 

228 count = 0 

229 size = 0 

230 

231 Logger.log(["Cleaning out packages..."]) 

232 

233 morguedir = cnf.get("Dir::Morgue", os.path.join("Dir::Pool", 'morgue')) 

234 morguesubdir = cnf.get("Clean-Suites::MorgueSubDir", 'pool') 

235 

236 # Build directory as morguedir/morguesubdir/year/month/day 

237 dest = os.path.join(morguedir, 

238 morguesubdir, 

239 str(now_date.year), 

240 '%.2d' % now_date.month, 

241 '%.2d' % now_date.day) 

242 

243 if not Options["No-Action"] and not os.path.exists(dest): 

244 os.makedirs(dest) 

245 

246 # Delete from source 

247 Logger.log(["Deleting from source table..."]) 

248 q = session.execute(""" 

249 WITH 

250 deleted_sources AS ( 

251 DELETE FROM source 

252 USING files f 

253 WHERE source.file = f.id 

254 AND NOT EXISTS (SELECT 1 FROM files_archive_map af 

255 JOIN archive_delete_date ad ON af.archive_id = ad.archive_id 

256 WHERE af.file_id = source.file 

257 AND (af.last_used IS NULL OR af.last_used > ad.delete_date)) 

258 RETURNING source.id AS id, f.filename AS filename 

259 ), 

260 deleted_dsc_files AS ( 

261 DELETE FROM dsc_files df WHERE df.source IN (SELECT id FROM deleted_sources) 

262 RETURNING df.file AS file_id 

263 ), 

264 now_unused_source_files AS ( 

265 UPDATE files_archive_map af 

266 SET last_used = '1977-03-13 13:37:42' -- Kill it now. We waited long enough before removing the .dsc. 

267 WHERE af.file_id IN (SELECT file_id FROM deleted_dsc_files) 

268 AND NOT EXISTS (SELECT 1 FROM dsc_files df WHERE df.file = af.file_id) 

269 ) 

270 SELECT filename FROM deleted_sources""") 

271 for s in q: 271 ↛ 272line 271 didn't jump to line 272, because the loop on line 271 never started

272 Logger.log(["delete source", s[0]]) 

273 

274 if not Options["No-Action"]: 274 ↛ 278line 274 didn't jump to line 278, because the condition on line 274 was never false

275 session.commit() 

276 

277 # Delete files from the pool 

278 old_files = session.query(ArchiveFile).filter(sql.text('files_archive_map.last_used <= (SELECT delete_date FROM archive_delete_date ad WHERE ad.archive_id = files_archive_map.archive_id)')).join(Archive) 

279 if max_delete is not None: 279 ↛ 280line 279 didn't jump to line 280, because the condition on line 279 was never true

280 old_files = old_files.limit(max_delete) 

281 Logger.log(["Limiting removals to %d" % max_delete]) 

282 

283 if archives is not None: 

284 archive_ids = [a.archive_id for a in archives] 

285 old_files = old_files.filter(ArchiveFile.archive_id.in_(archive_ids)) 

286 

287 for af in old_files: 

288 filename = af.path 

289 try: 

290 st = os.lstat(filename) 

291 except FileNotFoundError: 

292 Logger.log(["database referred to non-existing file", filename]) 

293 session.delete(af) 

294 continue 

295 Logger.log(["delete archive file", filename]) 

296 if stat.S_ISLNK(st.st_mode): 296 ↛ 297line 296 didn't jump to line 297, because the condition on line 296 was never true

297 count += 1 

298 Logger.log(["delete symlink", filename]) 

299 if not Options["No-Action"]: 

300 os.unlink(filename) 

301 session.delete(af) 

302 elif stat.S_ISREG(st.st_mode): 302 ↛ 321line 302 didn't jump to line 321, because the condition on line 302 was never false

303 size += st.st_size 

304 count += 1 

305 

306 dest_filename = dest + '/' + os.path.basename(filename) 

307 # If the destination file exists; try to find another filename to use 

308 if os.path.lexists(dest_filename): 308 ↛ 309line 308 didn't jump to line 309, because the condition on line 308 was never true

309 dest_filename = utils.find_next_free(dest_filename) 

310 

311 if not Options["No-Action"]: 311 ↛ 287line 311 didn't jump to line 287, because the condition on line 311 was never false

312 if af.archive.use_morgue: 312 ↛ 313line 312 didn't jump to line 313, because the condition on line 312 was never true

313 Logger.log(["move to morgue", filename, dest_filename]) 

314 utils.move(filename, dest_filename) 

315 else: 

316 Logger.log(["removed file", filename]) 

317 os.unlink(filename) 

318 session.delete(af) 

319 

320 else: 

321 utils.fubar("%s is neither symlink nor file?!" % (filename)) 

322 

323 if count > 0: 323 ↛ 327line 323 didn't jump to line 327, because the condition on line 323 was never false

324 Logger.log(["total", count, utils.size_type(size)]) 

325 

326 # Delete entries in files no longer referenced by any archive 

327 query = """ 

328 DELETE FROM files f 

329 WHERE NOT EXISTS (SELECT 1 FROM files_archive_map af WHERE af.file_id = f.id) 

330 """ 

331 session.execute(query) 

332 

333 if not Options["No-Action"]: 333 ↛ exitline 333 didn't return from function 'clean', because the condition on line 333 was never false

334 session.commit() 

335 

336################################################################################ 

337 

338 

339def clean_maintainers(now_date, session): 

340 Logger.log(["Cleaning out unused Maintainer entries..."]) 

341 

342 # TODO Replace this whole thing with one SQL statement 

343 q = session.execute(""" 

344SELECT m.id, m.name FROM maintainer m 

345 WHERE NOT EXISTS (SELECT 1 FROM binaries b WHERE b.maintainer = m.id) 

346 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.maintainer = m.id OR s.changedby = m.id) 

347 AND NOT EXISTS (SELECT 1 FROM src_uploaders u WHERE u.maintainer = m.id)""") 

348 

349 count = 0 

350 

351 for i in q.fetchall(): 351 ↛ 352line 351 didn't jump to line 352, because the loop on line 351 never started

352 maintainer_id = i[0] 

353 Logger.log(["delete maintainer", i[1]]) 

354 if not Options["No-Action"]: 

355 session.execute("DELETE FROM maintainer WHERE id = :maint", {'maint': maintainer_id}) 

356 count += 1 

357 

358 if not Options["No-Action"]: 358 ↛ 361line 358 didn't jump to line 361, because the condition on line 358 was never false

359 session.commit() 

360 

361 if count > 0: 361 ↛ 362line 361 didn't jump to line 362, because the condition on line 361 was never true

362 Logger.log(["total", count]) 

363 

364################################################################################ 

365 

366 

367def clean_fingerprints(now_date, session): 

368 Logger.log(["Cleaning out unused fingerprint entries..."]) 

369 

370 # TODO Replace this whole thing with one SQL statement 

371 q = session.execute(""" 

372SELECT f.id, f.fingerprint FROM fingerprint f 

373 WHERE f.keyring IS NULL 

374 AND NOT EXISTS (SELECT 1 FROM binaries b WHERE b.sig_fpr = f.id) 

375 AND NOT EXISTS (SELECT 1 FROM source s WHERE s.sig_fpr = f.id) 

376 AND NOT EXISTS (SELECT 1 FROM acl_per_source aps WHERE aps.created_by_id = f.id)""") 

377 

378 count = 0 

379 

380 for i in q.fetchall(): 380 ↛ 381line 380 didn't jump to line 381, because the loop on line 380 never started

381 fingerprint_id = i[0] 

382 Logger.log(["delete fingerprint", i[1]]) 

383 if not Options["No-Action"]: 

384 session.execute("DELETE FROM fingerprint WHERE id = :fpr", {'fpr': fingerprint_id}) 

385 count += 1 

386 

387 if not Options["No-Action"]: 387 ↛ 390line 387 didn't jump to line 390, because the condition on line 387 was never false

388 session.commit() 

389 

390 if count > 0: 390 ↛ 391line 390 didn't jump to line 391, because the condition on line 390 was never true

391 Logger.log(["total", count]) 

392 

393################################################################################ 

394 

395 

396def clean_byhash(now_date, session): 

397 cnf = Config() 

398 

399 Logger.log(["Cleaning out unused by-hash files..."]) 

400 

401 q = session.execute(""" 

402 DELETE FROM hashfile h 

403 USING suite s, archive a 

404 WHERE s.id = h.suite_id 

405 AND a.id = s.archive_id 

406 AND h.unreferenced + a.stayofexecution < CURRENT_TIMESTAMP 

407 RETURNING a.path, s.suite_name, h.path""") 

408 count = q.rowcount 

409 

410 if not Options["No-Action"]: 410 ↛ 424line 410 didn't jump to line 424, because the condition on line 410 was never false

411 for base, suite, path in q: 411 ↛ 412line 411 didn't jump to line 412, because the loop on line 411 never started

412 suite_suffix = utils.suite_suffix(suite) 

413 filename = os.path.join(base, 'dists', suite, suite_suffix, path) 

414 try: 

415 os.unlink(filename) 

416 except OSError as exc: 

417 if exc.errno != errno.ENOENT: 

418 raise 

419 Logger.log(['database referred to non-existing file', filename]) 

420 else: 

421 Logger.log(['delete hashfile', suite, path]) 

422 session.commit() 

423 

424 if count > 0: 424 ↛ 425line 424 didn't jump to line 425, because the condition on line 424 was never true

425 Logger.log(["total", count]) 

426 

427################################################################################ 

428 

429 

430def clean_empty_directories(session): 

431 """ 

432 Removes empty directories from pool directories. 

433 """ 

434 

435 Logger.log(["Cleaning out empty directories..."]) 

436 

437 count = 0 

438 

439 cursor = session.execute( 

440 """SELECT DISTINCT(path) FROM archive""" 

441 ) 

442 bases = [x[0] for x in cursor.fetchall()] 

443 

444 for base in bases: 

445 for dirpath, dirnames, filenames in os.walk(base, topdown=False): 

446 if not filenames and not dirnames: 

447 to_remove = os.path.join(base, dirpath) 

448 if not Options["No-Action"]: 448 ↛ 451line 448 didn't jump to line 451, because the condition on line 448 was never false

449 Logger.log(["removing directory", to_remove]) 

450 os.removedirs(to_remove) 

451 count += 1 

452 

453 if count: 453 ↛ exitline 453 didn't return from function 'clean_empty_directories', because the condition on line 453 was never false

454 Logger.log(["total removed directories", count]) 

455 

456################################################################################ 

457 

458 

459def set_archive_delete_dates(now_date, session): 

460 session.execute(""" 

461 CREATE TEMPORARY TABLE archive_delete_date ( 

462 archive_id INT NOT NULL, 

463 delete_date TIMESTAMP NOT NULL 

464 )""") 

465 

466 session.execute(""" 

467 INSERT INTO archive_delete_date 

468 (archive_id, delete_date) 

469 SELECT 

470 archive.id, :now_date - archive.stayofexecution 

471 FROM archive""", {'now_date': now_date}) 

472 

473 session.flush() 

474 

475################################################################################ 

476 

477 

478def main(): 

479 global Options, Logger 

480 

481 cnf = Config() 

482 

483 for i in ["Help", "No-Action", "Maximum"]: 

484 key = "Clean-Suites::Options::%s" % i 

485 if key not in cnf: 485 ↛ 483line 485 didn't jump to line 483, because the condition on line 485 was never false

486 cnf[key] = "" 

487 

488 Arguments = [('h', "help", "Clean-Suites::Options::Help"), 

489 ('a', 'archive', 'Clean-Suites::Options::Archive', 'HasArg'), 

490 ('n', "no-action", "Clean-Suites::Options::No-Action"), 

491 ('m', "maximum", "Clean-Suites::Options::Maximum", "HasArg")] 

492 

493 apt_pkg.parse_commandline(cnf.Cnf, Arguments, sys.argv) 

494 Options = cnf.subtree("Clean-Suites::Options") 

495 

496 if cnf["Clean-Suites::Options::Maximum"] != "": 496 ↛ 497line 496 didn't jump to line 497, because the condition on line 496 was never true

497 try: 

498 # Only use Maximum if it's an integer 

499 max_delete = int(cnf["Clean-Suites::Options::Maximum"]) 

500 if max_delete < 1: 

501 utils.fubar("If given, Maximum must be at least 1") 

502 except ValueError as e: 

503 utils.fubar("If given, Maximum must be an integer") 

504 else: 

505 max_delete = None 

506 

507 if Options["Help"]: 

508 usage() 

509 

510 program = "clean-suites" 

511 if Options['No-Action']: 511 ↛ 512line 511 didn't jump to line 512, because the condition on line 511 was never true

512 program = "clean-suites (no action)" 

513 Logger = daklog.Logger(program, debug=Options["No-Action"]) 

514 

515 session = DBConn().session() 

516 

517 archives = None 

518 if 'Archive' in Options: 

519 archive_names = Options['Archive'].split(',') 

520 archives = session.query(Archive).filter(Archive.archive_name.in_(archive_names)).all() 

521 if len(archives) == 0: 521 ↛ 522line 521 didn't jump to line 522, because the condition on line 521 was never true

522 utils.fubar('Unknown archive.') 

523 

524 now_date = datetime.now() 

525 

526 set_archive_delete_dates(now_date, session) 

527 

528 check_binaries(now_date, session) 

529 clean_binaries(now_date, session) 

530 check_sources(now_date, session) 

531 check_files(now_date, session) 

532 clean(now_date, archives, max_delete, session) 

533 clean_maintainers(now_date, session) 

534 clean_fingerprints(now_date, session) 

535 clean_byhash(now_date, session) 

536 clean_empty_directories(session) 

537 

538 session.rollback() 

539 

540 Logger.close() 

541 

542################################################################################ 

543 

544 

545if __name__ == '__main__': 

546 main()