Coverage for dak/generate_index_diffs.py: 83%
184 statements
« prev ^ index » next coverage.py v7.6.0, created at 2026-01-04 16:18 +0000
« prev ^ index » next coverage.py v7.6.0, created at 2026-01-04 16:18 +0000
1#! /usr/bin/env python3
3"""generates partial package updates list"""
5###########################################################
7# idea and basic implementation by Anthony, some changes by Andreas
8# parts are stolen from 'dak generate-releases'
9#
10# Copyright (C) 2004, 2005, 2006 Anthony Towns <aj@azure.humbug.org.au>
11# Copyright (C) 2004, 2005 Andreas Barth <aba@not.so.argh.org>
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28# < elmo> bah, don't bother me with annoying facts
29# < elmo> I was on a roll
32################################################################################
34import asyncio
35import errno
36import os
37import re
38import sys
39import time
40import traceback
41from typing import NoReturn
43import apt_pkg
45from daklib import pdiff, utils
46from daklib.dbconn import (
47 Archive,
48 Component,
49 DBConn,
50 Suite,
51 get_suite,
52 get_suite_architectures,
53)
54from daklib.pdiff import PDiffIndex
56re_includeinpdiff = re.compile(r"(Translation-[a-zA-Z_]+\.(?:bz2|xz))")
58################################################################################
60Cnf: apt_pkg.Configuration
61Options: apt_pkg.Configuration
63################################################################################
66def usage(exit_code=0) -> NoReturn:
67 print(
68 """Usage: dak generate-index-diffs [OPTIONS] [suites]
69Write out ed-style diffs to Packages/Source lists
71 -h, --help show this help and exit
72 -a <archive> generate diffs for suites in <archive>
73 -c give the canonical path of the file
74 -p name for the patch (defaults to current time)
75 -d name for the hardlink farm for status
76 -m how many diffs to generate
77 -n take no action
78 -v be verbose and list each file as we work on it
79 """
80 )
81 sys.exit(exit_code)
84def tryunlink(file: str) -> None:
85 try:
86 os.unlink(file)
87 except OSError:
88 print("warning: removing of %s denied" % (file))
91def smartstat(file: str) -> tuple[str, os.stat_result] | tuple[None, None]:
92 for ext in ["", ".gz", ".bz2", ".xz", ".zst"]:
93 if os.path.isfile(file + ext):
94 return (ext, os.stat(file + ext))
95 return (None, None)
98async def smartlink(f: str, t: str) -> None:
99 async def call_decompressor(cmd, inpath, outpath):
100 with open(inpath, "rb") as rfd, open(outpath, "wb") as wfd:
101 await pdiff.asyncio_check_call(
102 *cmd,
103 stdin=rfd,
104 stdout=wfd,
105 )
107 if os.path.isfile(f): 107 ↛ 108line 107 didn't jump to line 108 because the condition on line 107 was never true
108 os.link(f, t)
109 elif os.path.isfile("%s.gz" % (f)):
110 await call_decompressor(["gzip", "-d"], "{}.gz".format(f), t)
111 elif os.path.isfile("%s.bz2" % (f)): 111 ↛ 112line 111 didn't jump to line 112 because the condition on line 111 was never true
112 await call_decompressor(["bzip2", "-d"], "{}.bz2".format(f), t)
113 elif os.path.isfile("%s.xz" % (f)): 113 ↛ 115line 113 didn't jump to line 115 because the condition on line 113 was always true
114 await call_decompressor(["xz", "-d", "-T0"], "{}.xz".format(f), t)
115 elif os.path.isfile(f"{f}.zst"):
116 await call_decompressor(["zstd", "--decompress"], f"{f}.zst", t)
117 else:
118 print("missing: %s" % (f))
119 raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), f)
122async def genchanges(
123 Options: apt_pkg.Configuration,
124 outdir: str,
125 oldfile: str,
126 origfile: str,
127 maxdiffs=56,
128 merged_pdiffs=False,
129) -> None:
130 if "NoAct" in Options: 130 ↛ 131line 130 didn't jump to line 131 because the condition on line 130 was never true
131 print(
132 "Not acting on: od: %s, oldf: %s, origf: %s, md: %s"
133 % (outdir, oldfile, origfile, maxdiffs)
134 )
135 return
137 patchname = Options["PatchName"]
139 # origfile = /path/to/Packages
140 # oldfile = ./Packages
141 # newfile = ./Packages.tmp
143 # (outdir, oldfile, origfile) = argv
145 (oldext, oldstat) = smartstat(oldfile)
146 (origext, origstat) = smartstat(origfile)
147 if not origstat: 147 ↛ 148line 147 didn't jump to line 148 because the condition on line 147 was never true
148 print("%s: doesn't exist" % (origfile))
149 return
150 # orig file with the (new) compression extension in case it changed
151 assert origext is not None
152 old_full_path = oldfile + origext
153 resolved_orig_path = os.path.realpath(origfile + origext)
155 if not oldstat:
156 print("%s: initial run" % origfile)
157 # The target file might have been copying over the symlink as an accident
158 # in a previous run.
159 if os.path.islink(old_full_path): 159 ↛ 160line 159 didn't jump to line 160 because the condition on line 159 was never true
160 os.unlink(old_full_path)
161 os.link(resolved_orig_path, old_full_path)
162 return
163 assert oldext is not None
165 if oldstat[1:3] == origstat[1:3]: 165 ↛ 166line 165 didn't jump to line 166 because the condition on line 165 was never true
166 return
168 upd = PDiffIndex(outdir, int(maxdiffs), merged_pdiffs)
170 if "CanonicalPath" in Options: 170 ↛ 171line 170 didn't jump to line 171 because the condition on line 170 was never true
171 upd.can_path = Options["CanonicalPath"]
173 # generate_and_add_patch_file needs an uncompressed file
174 # The `newfile` variable is our uncompressed copy of 'oldfile` thanks to
175 # smartlink
176 newfile = oldfile + ".new"
177 if os.path.exists(newfile): 177 ↛ 178line 177 didn't jump to line 178 because the condition on line 177 was never true
178 os.unlink(newfile)
180 await smartlink(origfile, newfile)
182 try:
183 await upd.generate_and_add_patch_file(oldfile, newfile, patchname)
184 finally:
185 os.unlink(newfile)
187 upd.prune_patch_history()
189 for obsolete_patch in upd.find_obsolete_patches():
190 tryunlink(obsolete_patch)
192 upd.update_index()
194 if oldfile + oldext != old_full_path and os.path.islink(old_full_path): 194 ↛ 197line 194 didn't jump to line 197 because the condition on line 194 was never true
195 # The target file might have been copying over the symlink as an accident
196 # in a previous run.
197 os.unlink(old_full_path)
199 os.unlink(oldfile + oldext)
200 os.link(resolved_orig_path, old_full_path)
203def main() -> None:
204 global Cnf, Options
206 os.umask(0o002)
208 Cnf = utils.get_conf()
209 Arguments = [
210 ("h", "help", "Generate-Index-Diffs::Options::Help"),
211 ("a", "archive", "Generate-Index-Diffs::Options::Archive", "hasArg"),
212 ("c", None, "Generate-Index-Diffs::Options::CanonicalPath", "hasArg"),
213 ("p", "patchname", "Generate-Index-Diffs::Options::PatchName", "hasArg"),
214 ("d", "tmpdir", "Generate-Index-Diffs::Options::TempDir", "hasArg"),
215 ("m", "maxdiffs", "Generate-Index-Diffs::Options::MaxDiffs", "hasArg"),
216 ("n", "no-act", "Generate-Index-Diffs::Options::NoAct"),
217 ("v", "verbose", "Generate-Index-Diffs::Options::Verbose"),
218 ]
219 suites = apt_pkg.parse_commandline(Cnf, Arguments, sys.argv) # type: ignore[attr-defined]
220 Options = Cnf.subtree("Generate-Index-Diffs::Options") # type: ignore[attr-defined]
221 if "Help" in Options:
222 usage()
224 maxdiffs = Options.get("MaxDiffs::Default", "56")
225 maxpackages = Options.get("MaxDiffs::Packages", maxdiffs)
226 maxcontents = Options.get("MaxDiffs::Contents", maxdiffs)
227 maxsources = Options.get("MaxDiffs::Sources", maxdiffs)
229 # can only be set via config at the moment
230 max_parallel = int(Options.get("MaxParallel", "8"))
232 if "PatchName" not in Options: 232 ↛ 236line 232 didn't jump to line 236 because the condition on line 232 was always true
233 format = "%Y-%m-%d-%H%M.%S"
234 Options["PatchName"] = time.strftime(format) # type: ignore[index]
236 session = DBConn().session()
237 pending_tasks = []
239 if not suites: 239 ↛ 246line 239 didn't jump to line 246 because the condition on line 239 was always true
240 query = session.query(Suite.suite_name)
241 if Options.get("Archive"): 241 ↛ 244line 241 didn't jump to line 244 because the condition on line 241 was always true
242 archives = utils.split_args(Options["Archive"])
243 query = query.join(Suite.archive).filter(Archive.archive_name.in_(archives))
244 suites = [s.suite_name for s in query]
246 for suitename in suites:
247 print("Processing: " + suitename)
249 suiteobj = get_suite(suitename.lower(), session=session)
250 assert suiteobj is not None
252 # Use the canonical version of the suite name
253 suite = suiteobj.suite_name
255 if suiteobj.untouchable: 255 ↛ 256line 255 didn't jump to line 256 because the condition on line 255 was never true
256 print("Skipping: " + suite + " (untouchable)")
257 continue
259 skip_all = True
260 if (
261 suiteobj.separate_contents_architecture_all
262 or suiteobj.separate_packages_architecture_all
263 ):
264 skip_all = False
266 architectures = get_suite_architectures(
267 suite, skipall=skip_all, session=session
268 )
269 components = [c.component_name for c in session.query(Component.component_name)]
271 suite_suffix = utils.suite_suffix(suitename)
272 if components and suite_suffix: 272 ↛ 273line 272 didn't jump to line 273 because the condition on line 272 was never true
273 longsuite = suite + "/" + suite_suffix
274 else:
275 longsuite = suite
277 merged_pdiffs = suiteobj.merged_pdiffs
279 tree = os.path.join(suiteobj.archive.path, "dists", longsuite)
281 # See if there are Translations which might need a new pdiff
282 cwd = os.getcwd()
283 for component in components:
284 workpath = os.path.join(tree, component, "i18n")
285 if os.path.isdir(workpath): 285 ↛ 283line 285 didn't jump to line 283 because the condition on line 285 was always true
286 os.chdir(workpath)
287 for dirpath, dirnames, filenames in os.walk(
288 ".", followlinks=True, topdown=True
289 ):
290 for entry in filenames:
291 if not re_includeinpdiff.match(entry):
292 continue
293 (fname, fext) = os.path.splitext(entry)
294 processfile = os.path.join(workpath, fname)
295 storename = "%s/%s_%s_%s" % (
296 Options["TempDir"],
297 suite,
298 component,
299 fname,
300 )
301 coroutine = genchanges(
302 Options,
303 processfile + ".diff",
304 storename,
305 processfile,
306 maxdiffs,
307 merged_pdiffs,
308 )
309 pending_tasks.append(coroutine)
310 os.chdir(cwd)
312 for archobj in architectures:
313 architecture = archobj.arch_string
315 if architecture == "source":
316 longarch = architecture
317 packages = "Sources"
318 maxsuite = maxsources
319 else:
320 longarch = "binary-%s" % architecture
321 packages = "Packages"
322 maxsuite = maxpackages
324 for component in components:
325 # Process Contents
326 file = "%s/%s/Contents-%s" % (tree, component, architecture)
328 storename = "%s/%s_%s_contents_%s" % (
329 Options["TempDir"],
330 suite,
331 component,
332 architecture,
333 )
334 coroutine = genchanges(
335 Options, file + ".diff", storename, file, maxcontents, merged_pdiffs
336 )
337 pending_tasks.append(coroutine)
339 file = "%s/%s/%s/%s" % (tree, component, longarch, packages)
340 storename = "%s/%s_%s_%s" % (
341 Options["TempDir"],
342 suite,
343 component,
344 architecture,
345 )
346 coroutine = genchanges(
347 Options, file + ".diff", storename, file, maxsuite, merged_pdiffs
348 )
349 pending_tasks.append(coroutine)
351 asyncio.run(process_pdiff_tasks(pending_tasks, max_parallel))
354async def process_pdiff_tasks(pending_coroutines, limit):
355 if limit is not None: 355 ↛ 365line 355 didn't jump to line 365 because the condition on line 355 was always true
356 # If there is a limit, wrap the tasks with a semaphore to handle the limit
357 semaphore = asyncio.Semaphore(limit)
359 async def bounded_task(task):
360 async with semaphore:
361 return await task
363 pending_coroutines = [bounded_task(task) for task in pending_coroutines]
365 print(
366 f"Processing {len(pending_coroutines)} PDiff generation tasks (parallel limit {limit})"
367 )
368 start = time.time()
369 pending_tasks = [asyncio.create_task(coroutine) for coroutine in pending_coroutines]
370 done, pending = await asyncio.wait(pending_tasks)
371 duration = round(time.time() - start, 2)
373 errors = False
375 for task in done:
376 try:
377 task.result()
378 except Exception:
379 traceback.print_exc()
380 errors = True
382 if errors: 382 ↛ 383line 382 didn't jump to line 383 because the condition on line 382 was never true
383 print(f"Processing failed after {duration} seconds")
384 sys.exit(1)
386 print(f"Processing finished {duration} seconds")
389################################################################################
392if __name__ == "__main__":
393 main()