Coverage for dak/generate_index_diffs.py: 83%

184 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2026-01-04 16:18 +0000

1#! /usr/bin/env python3 

2 

3"""generates partial package updates list""" 

4 

5########################################################### 

6 

7# idea and basic implementation by Anthony, some changes by Andreas 

8# parts are stolen from 'dak generate-releases' 

9# 

10# Copyright (C) 2004, 2005, 2006 Anthony Towns <aj@azure.humbug.org.au> 

11# Copyright (C) 2004, 2005 Andreas Barth <aba@not.so.argh.org> 

12 

13# This program is free software; you can redistribute it and/or modify 

14# it under the terms of the GNU General Public License as published by 

15# the Free Software Foundation; either version 2 of the License, or 

16# (at your option) any later version. 

17 

18# This program is distributed in the hope that it will be useful, 

19# but WITHOUT ANY WARRANTY; without even the implied warranty of 

20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

21# GNU General Public License for more details. 

22 

23# You should have received a copy of the GNU General Public License 

24# along with this program; if not, write to the Free Software 

25# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 

26 

27 

28# < elmo> bah, don't bother me with annoying facts 

29# < elmo> I was on a roll 

30 

31 

32################################################################################ 

33 

34import asyncio 

35import errno 

36import os 

37import re 

38import sys 

39import time 

40import traceback 

41from typing import NoReturn 

42 

43import apt_pkg 

44 

45from daklib import pdiff, utils 

46from daklib.dbconn import ( 

47 Archive, 

48 Component, 

49 DBConn, 

50 Suite, 

51 get_suite, 

52 get_suite_architectures, 

53) 

54from daklib.pdiff import PDiffIndex 

55 

56re_includeinpdiff = re.compile(r"(Translation-[a-zA-Z_]+\.(?:bz2|xz))") 

57 

58################################################################################ 

59 

60Cnf: apt_pkg.Configuration 

61Options: apt_pkg.Configuration 

62 

63################################################################################ 

64 

65 

66def usage(exit_code=0) -> NoReturn: 

67 print( 

68 """Usage: dak generate-index-diffs [OPTIONS] [suites] 

69Write out ed-style diffs to Packages/Source lists 

70 

71 -h, --help show this help and exit 

72 -a <archive> generate diffs for suites in <archive> 

73 -c give the canonical path of the file 

74 -p name for the patch (defaults to current time) 

75 -d name for the hardlink farm for status 

76 -m how many diffs to generate 

77 -n take no action 

78 -v be verbose and list each file as we work on it 

79 """ 

80 ) 

81 sys.exit(exit_code) 

82 

83 

84def tryunlink(file: str) -> None: 

85 try: 

86 os.unlink(file) 

87 except OSError: 

88 print("warning: removing of %s denied" % (file)) 

89 

90 

91def smartstat(file: str) -> tuple[str, os.stat_result] | tuple[None, None]: 

92 for ext in ["", ".gz", ".bz2", ".xz", ".zst"]: 

93 if os.path.isfile(file + ext): 

94 return (ext, os.stat(file + ext)) 

95 return (None, None) 

96 

97 

98async def smartlink(f: str, t: str) -> None: 

99 async def call_decompressor(cmd, inpath, outpath): 

100 with open(inpath, "rb") as rfd, open(outpath, "wb") as wfd: 

101 await pdiff.asyncio_check_call( 

102 *cmd, 

103 stdin=rfd, 

104 stdout=wfd, 

105 ) 

106 

107 if os.path.isfile(f): 107 ↛ 108line 107 didn't jump to line 108 because the condition on line 107 was never true

108 os.link(f, t) 

109 elif os.path.isfile("%s.gz" % (f)): 

110 await call_decompressor(["gzip", "-d"], "{}.gz".format(f), t) 

111 elif os.path.isfile("%s.bz2" % (f)): 111 ↛ 112line 111 didn't jump to line 112 because the condition on line 111 was never true

112 await call_decompressor(["bzip2", "-d"], "{}.bz2".format(f), t) 

113 elif os.path.isfile("%s.xz" % (f)): 113 ↛ 115line 113 didn't jump to line 115 because the condition on line 113 was always true

114 await call_decompressor(["xz", "-d", "-T0"], "{}.xz".format(f), t) 

115 elif os.path.isfile(f"{f}.zst"): 

116 await call_decompressor(["zstd", "--decompress"], f"{f}.zst", t) 

117 else: 

118 print("missing: %s" % (f)) 

119 raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), f) 

120 

121 

122async def genchanges( 

123 Options: apt_pkg.Configuration, 

124 outdir: str, 

125 oldfile: str, 

126 origfile: str, 

127 maxdiffs=56, 

128 merged_pdiffs=False, 

129) -> None: 

130 if "NoAct" in Options: 130 ↛ 131line 130 didn't jump to line 131 because the condition on line 130 was never true

131 print( 

132 "Not acting on: od: %s, oldf: %s, origf: %s, md: %s" 

133 % (outdir, oldfile, origfile, maxdiffs) 

134 ) 

135 return 

136 

137 patchname = Options["PatchName"] 

138 

139 # origfile = /path/to/Packages 

140 # oldfile = ./Packages 

141 # newfile = ./Packages.tmp 

142 

143 # (outdir, oldfile, origfile) = argv 

144 

145 (oldext, oldstat) = smartstat(oldfile) 

146 (origext, origstat) = smartstat(origfile) 

147 if not origstat: 147 ↛ 148line 147 didn't jump to line 148 because the condition on line 147 was never true

148 print("%s: doesn't exist" % (origfile)) 

149 return 

150 # orig file with the (new) compression extension in case it changed 

151 assert origext is not None 

152 old_full_path = oldfile + origext 

153 resolved_orig_path = os.path.realpath(origfile + origext) 

154 

155 if not oldstat: 

156 print("%s: initial run" % origfile) 

157 # The target file might have been copying over the symlink as an accident 

158 # in a previous run. 

159 if os.path.islink(old_full_path): 159 ↛ 160line 159 didn't jump to line 160 because the condition on line 159 was never true

160 os.unlink(old_full_path) 

161 os.link(resolved_orig_path, old_full_path) 

162 return 

163 assert oldext is not None 

164 

165 if oldstat[1:3] == origstat[1:3]: 165 ↛ 166line 165 didn't jump to line 166 because the condition on line 165 was never true

166 return 

167 

168 upd = PDiffIndex(outdir, int(maxdiffs), merged_pdiffs) 

169 

170 if "CanonicalPath" in Options: 170 ↛ 171line 170 didn't jump to line 171 because the condition on line 170 was never true

171 upd.can_path = Options["CanonicalPath"] 

172 

173 # generate_and_add_patch_file needs an uncompressed file 

174 # The `newfile` variable is our uncompressed copy of 'oldfile` thanks to 

175 # smartlink 

176 newfile = oldfile + ".new" 

177 if os.path.exists(newfile): 177 ↛ 178line 177 didn't jump to line 178 because the condition on line 177 was never true

178 os.unlink(newfile) 

179 

180 await smartlink(origfile, newfile) 

181 

182 try: 

183 await upd.generate_and_add_patch_file(oldfile, newfile, patchname) 

184 finally: 

185 os.unlink(newfile) 

186 

187 upd.prune_patch_history() 

188 

189 for obsolete_patch in upd.find_obsolete_patches(): 

190 tryunlink(obsolete_patch) 

191 

192 upd.update_index() 

193 

194 if oldfile + oldext != old_full_path and os.path.islink(old_full_path): 194 ↛ 197line 194 didn't jump to line 197 because the condition on line 194 was never true

195 # The target file might have been copying over the symlink as an accident 

196 # in a previous run. 

197 os.unlink(old_full_path) 

198 

199 os.unlink(oldfile + oldext) 

200 os.link(resolved_orig_path, old_full_path) 

201 

202 

203def main() -> None: 

204 global Cnf, Options 

205 

206 os.umask(0o002) 

207 

208 Cnf = utils.get_conf() 

209 Arguments = [ 

210 ("h", "help", "Generate-Index-Diffs::Options::Help"), 

211 ("a", "archive", "Generate-Index-Diffs::Options::Archive", "hasArg"), 

212 ("c", None, "Generate-Index-Diffs::Options::CanonicalPath", "hasArg"), 

213 ("p", "patchname", "Generate-Index-Diffs::Options::PatchName", "hasArg"), 

214 ("d", "tmpdir", "Generate-Index-Diffs::Options::TempDir", "hasArg"), 

215 ("m", "maxdiffs", "Generate-Index-Diffs::Options::MaxDiffs", "hasArg"), 

216 ("n", "no-act", "Generate-Index-Diffs::Options::NoAct"), 

217 ("v", "verbose", "Generate-Index-Diffs::Options::Verbose"), 

218 ] 

219 suites = apt_pkg.parse_commandline(Cnf, Arguments, sys.argv) # type: ignore[attr-defined] 

220 Options = Cnf.subtree("Generate-Index-Diffs::Options") # type: ignore[attr-defined] 

221 if "Help" in Options: 

222 usage() 

223 

224 maxdiffs = Options.get("MaxDiffs::Default", "56") 

225 maxpackages = Options.get("MaxDiffs::Packages", maxdiffs) 

226 maxcontents = Options.get("MaxDiffs::Contents", maxdiffs) 

227 maxsources = Options.get("MaxDiffs::Sources", maxdiffs) 

228 

229 # can only be set via config at the moment 

230 max_parallel = int(Options.get("MaxParallel", "8")) 

231 

232 if "PatchName" not in Options: 232 ↛ 236line 232 didn't jump to line 236 because the condition on line 232 was always true

233 format = "%Y-%m-%d-%H%M.%S" 

234 Options["PatchName"] = time.strftime(format) # type: ignore[index] 

235 

236 session = DBConn().session() 

237 pending_tasks = [] 

238 

239 if not suites: 239 ↛ 246line 239 didn't jump to line 246 because the condition on line 239 was always true

240 query = session.query(Suite.suite_name) 

241 if Options.get("Archive"): 241 ↛ 244line 241 didn't jump to line 244 because the condition on line 241 was always true

242 archives = utils.split_args(Options["Archive"]) 

243 query = query.join(Suite.archive).filter(Archive.archive_name.in_(archives)) 

244 suites = [s.suite_name for s in query] 

245 

246 for suitename in suites: 

247 print("Processing: " + suitename) 

248 

249 suiteobj = get_suite(suitename.lower(), session=session) 

250 assert suiteobj is not None 

251 

252 # Use the canonical version of the suite name 

253 suite = suiteobj.suite_name 

254 

255 if suiteobj.untouchable: 255 ↛ 256line 255 didn't jump to line 256 because the condition on line 255 was never true

256 print("Skipping: " + suite + " (untouchable)") 

257 continue 

258 

259 skip_all = True 

260 if ( 

261 suiteobj.separate_contents_architecture_all 

262 or suiteobj.separate_packages_architecture_all 

263 ): 

264 skip_all = False 

265 

266 architectures = get_suite_architectures( 

267 suite, skipall=skip_all, session=session 

268 ) 

269 components = [c.component_name for c in session.query(Component.component_name)] 

270 

271 suite_suffix = utils.suite_suffix(suitename) 

272 if components and suite_suffix: 272 ↛ 273line 272 didn't jump to line 273 because the condition on line 272 was never true

273 longsuite = suite + "/" + suite_suffix 

274 else: 

275 longsuite = suite 

276 

277 merged_pdiffs = suiteobj.merged_pdiffs 

278 

279 tree = os.path.join(suiteobj.archive.path, "dists", longsuite) 

280 

281 # See if there are Translations which might need a new pdiff 

282 cwd = os.getcwd() 

283 for component in components: 

284 workpath = os.path.join(tree, component, "i18n") 

285 if os.path.isdir(workpath): 285 ↛ 283line 285 didn't jump to line 283 because the condition on line 285 was always true

286 os.chdir(workpath) 

287 for dirpath, dirnames, filenames in os.walk( 

288 ".", followlinks=True, topdown=True 

289 ): 

290 for entry in filenames: 

291 if not re_includeinpdiff.match(entry): 

292 continue 

293 (fname, fext) = os.path.splitext(entry) 

294 processfile = os.path.join(workpath, fname) 

295 storename = "%s/%s_%s_%s" % ( 

296 Options["TempDir"], 

297 suite, 

298 component, 

299 fname, 

300 ) 

301 coroutine = genchanges( 

302 Options, 

303 processfile + ".diff", 

304 storename, 

305 processfile, 

306 maxdiffs, 

307 merged_pdiffs, 

308 ) 

309 pending_tasks.append(coroutine) 

310 os.chdir(cwd) 

311 

312 for archobj in architectures: 

313 architecture = archobj.arch_string 

314 

315 if architecture == "source": 

316 longarch = architecture 

317 packages = "Sources" 

318 maxsuite = maxsources 

319 else: 

320 longarch = "binary-%s" % architecture 

321 packages = "Packages" 

322 maxsuite = maxpackages 

323 

324 for component in components: 

325 # Process Contents 

326 file = "%s/%s/Contents-%s" % (tree, component, architecture) 

327 

328 storename = "%s/%s_%s_contents_%s" % ( 

329 Options["TempDir"], 

330 suite, 

331 component, 

332 architecture, 

333 ) 

334 coroutine = genchanges( 

335 Options, file + ".diff", storename, file, maxcontents, merged_pdiffs 

336 ) 

337 pending_tasks.append(coroutine) 

338 

339 file = "%s/%s/%s/%s" % (tree, component, longarch, packages) 

340 storename = "%s/%s_%s_%s" % ( 

341 Options["TempDir"], 

342 suite, 

343 component, 

344 architecture, 

345 ) 

346 coroutine = genchanges( 

347 Options, file + ".diff", storename, file, maxsuite, merged_pdiffs 

348 ) 

349 pending_tasks.append(coroutine) 

350 

351 asyncio.run(process_pdiff_tasks(pending_tasks, max_parallel)) 

352 

353 

354async def process_pdiff_tasks(pending_coroutines, limit): 

355 if limit is not None: 355 ↛ 365line 355 didn't jump to line 365 because the condition on line 355 was always true

356 # If there is a limit, wrap the tasks with a semaphore to handle the limit 

357 semaphore = asyncio.Semaphore(limit) 

358 

359 async def bounded_task(task): 

360 async with semaphore: 

361 return await task 

362 

363 pending_coroutines = [bounded_task(task) for task in pending_coroutines] 

364 

365 print( 

366 f"Processing {len(pending_coroutines)} PDiff generation tasks (parallel limit {limit})" 

367 ) 

368 start = time.time() 

369 pending_tasks = [asyncio.create_task(coroutine) for coroutine in pending_coroutines] 

370 done, pending = await asyncio.wait(pending_tasks) 

371 duration = round(time.time() - start, 2) 

372 

373 errors = False 

374 

375 for task in done: 

376 try: 

377 task.result() 

378 except Exception: 

379 traceback.print_exc() 

380 errors = True 

381 

382 if errors: 382 ↛ 383line 382 didn't jump to line 383 because the condition on line 382 was never true

383 print(f"Processing failed after {duration} seconds") 

384 sys.exit(1) 

385 

386 print(f"Processing finished {duration} seconds") 

387 

388 

389################################################################################ 

390 

391 

392if __name__ == "__main__": 

393 main()