Coverage for dak/graph_new.py: 31%
190 statements
« prev ^ index » next coverage.py v6.5.0, created at 2025-08-26 22:11 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2025-08-26 22:11 +0000
1#!/usr/bin/env python3
3"""
4Generate an HTML page and PNG charts wih the age of packages in NEW
6To be run once a day.
8The tool stores persistent status in the RRD database and the pickeled queue
9in order to speed up daily runs.
10Directories and database files are created as needed.
12If the RRD directory is deleted the script rebuilds past years from the historical
13logs, and that can take a while, otherwise it just parses fresh data.
14"""
15# Copyright 2020-2024 Federico Ceratto <federico@debian.org>
16# License: GPL-2+
17# Tests are in tests/test_graph_new.py
19import os.path
20import pickle
21import subprocess
22import sys
23from datetime import datetime, timedelta
24from glob import glob
25from os import makedirs
26from textwrap import dedent
27from typing import Iterator, Optional
29import apt_pkg
30import rrdtool # debdeps: python3-rrdtool
32from daklib import utils
34SECONDS_IN_DAY = 86400
35debug_mode = False
38def debug(msg: str) -> None:
39 """Verbose messages for debugging"""
40 if debug_mode: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true
41 print(msg)
44def write_index(out_dir: str) -> None:
45 """Generates index.html page in the output directory"""
46 index_html = """
47<!DOCTYPE html>
48<html lang="en">
49 <head>
50 <meta charset="utf-8" />
51 <link rel="shortcut icon" href="https://www.debian.org/favicon.ico" />
52 <title>Debian NEW queue</title>
53 </head>
54 <body>
55 <p>How much time a new package spends in the NEW queue.</p>
56 <p>Divided in percentiles: 0 (aka age of the oldest package), 50 (aka median) and 99</p>
58 <img src="new_queue_wait_time_all.png">
59 <p></p>
60 <img src="new_queue_wait_time_year.png">
61 <p></p>
62 <img src="new_queue_wait_time_month.png">
63 <p></p>
64 <p>Generated on {}</p>
65 </body>
66</html>
67"""
68 fname = os.path.join(out_dir, "index.html")
69 with open(fname, "w") as f:
70 f.write(index_html.format(datetime.utcnow()))
73def init_rrd(rrdfn: str, t: datetime) -> None:
74 """Initialize empty RRD"""
75 print(f"RRD file {rrdfn} not found. Initializing it now.")
76 rrdtool.create(
77 rrdfn,
78 "--start",
79 t.strftime("%s"),
80 "--step",
81 str(SECONDS_IN_DAY), # 1 update per day
82 "DS:p0:GAUGE:172800:U:U", # 0 percentile
83 "DS:p50:GAUGE:172800:U:U", # 50 percentile
84 "DS:p99:GAUGE:172800:U:U", # 99 percentile
85 "RRA:AVERAGE:0.5:1:3650",
86 )
89def gen_stats(t: datetime, q: dict) -> tuple:
90 """Extract percentiles of package ages"""
91 s = sorted(q.values())
92 p99 = t - s[int(len(s) * 0.50)]
93 p50 = t - s[int(len(s) * 0.05)]
94 p0 = t - s[int(len(s) * 0.02)]
95 return (p0, p50, p99)
98def plot_graph(
99 out_dir: str, fname: str, rrdfn: str, start, end: datetime, title: str
100) -> None:
101 fname = os.path.join(out_dir, fname)
102 print(f"Writing {fname}")
103 args = (
104 "--start",
105 start.strftime("%s"),
106 "--end",
107 end.strftime("%s"),
108 "--width",
109 "900",
110 "--height",
111 "200",
112 "--title",
113 "Days spent in NEW over {}".format(title),
114 "-v",
115 "days",
116 f"DEF:p0={rrdfn}:p0:AVERAGE",
117 f"DEF:p50={rrdfn}:p50:AVERAGE",
118 f"DEF:p99={rrdfn}:p99:AVERAGE",
119 "LINE1:p0#FF8080:0 percentile",
120 "LINE2:p50#8F8080:50 percentile",
121 "LINE3:p99#8FFF80:99 percentile",
122 )
123 # print(" ".join(args))
124 rrdtool.graph(fname, *args)
127def _handle_conf() -> tuple:
128 """Load configuration parameters from Cnf"""
129 Cnf = utils.get_conf()
130 Arguments = [
131 ("h", "help", "GraphNew::Options::Help"),
132 ("r", "rrd", "GraphNew::Options::Rrd", "HasArg"),
133 ("o", "outdir", "GraphNew::Options::Outdir", "HasArg"),
134 ("b", "dak_base_dir", "Dir::Base", "HasArg"),
135 ]
136 apt_pkg.parse_commandline(Cnf, Arguments, sys.argv)
138 try:
139 Options = Cnf.subtree("GraphNew::Options")
140 except KeyError:
141 msg = dedent(
142 """
143 The GraphNew::Options subtree is missing from the config file
144 Please use --rrd ... --outdir ... (see --help) or add:
146 GraphNew
147 {
148 Options
149 {
150 Rrd "<RRD directory>";
151 Outdir "<HTML output directory>";
152 }
153 }
154 """
155 )
156 print(msg, file=sys.stderr)
157 sys.exit(1)
159 if "Help" in Options:
160 usage()
162 if "GraphNew::Options::Rrd" in Cnf:
163 rrd_dir = Cnf["GraphNew::Options::Rrd"]
164 elif "Dir::Rrd" in Cnf:
165 rrd_dir = Cnf["Dir::Rrd"]
166 else:
167 print("No RRD directory configured.\n", file=sys.stderr)
168 sys.exit(1)
170 try:
171 outdir = Cnf["GraphNew::Options::Outdir"]
172 except Exception:
173 print("No output directory configured\n", file=sys.stderr)
174 sys.exit(1)
176 log_dir = Cnf.get("Dir::Log")
177 assert log_dir, "Dir::Log is missing from dak.conf or empty"
178 return rrd_dir, outdir, log_dir
181def skip_file(fn: str, last_update) -> bool:
182 """Skip files that are already processed"""
183 if last_update is None:
184 return False
185 fn = os.path.split(fn)[1]
186 basename, ext = os.path.splitext(fn)
187 if ext == "":
188 return False # always process last, uncompressed file
189 filetime = datetime.strptime(basename, "%Y-%m")
190 return filetime < last_update
193def extract_queue_events(
194 log_dir: str, last_update: Optional[datetime]
195) -> Iterator[tuple]:
196 """Extracts NEW queue events from log files"""
197 compressed_glob = os.path.join(log_dir, "20*xz")
198 debug(f"Scanning for compressed logfiles using glob '{compressed_glob}'")
199 compressed_fns = sorted(glob(compressed_glob))
200 fns = compressed_fns[5:] # 5 oldest logfiles have no events
201 current_fn = os.path.join(log_dir, datetime.utcnow().strftime("%Y-%m"))
202 debug(f"Also adding uncompressed log {current_fn}")
203 fns.append(current_fn)
204 print("%d files to process" % len(fns))
205 events_per_month_cnt = None
206 for fn in fns:
207 if skip_file(fn, last_update):
208 debug(f"Skipping {fn}")
209 continue
211 if events_per_month_cnt is not None:
212 debug(f"Events in the month: {events_per_month_cnt}")
214 events_per_month_cnt = 0
215 debug(f"Processing {fn}")
216 cmd = (
217 """xzgrep -h -e ACCEPT-TO-NEW -e "|NEW ACCEPT|" -e "|REJECT|" """
218 f"""-e "|NEW REJECT|" -e "|Policy Queue " {fn}"""
219 )
220 data = subprocess.check_output(cmd, shell=True).decode()
221 for line in data.splitlines():
222 line = line.rstrip()
223 if line.startswith("#"):
224 continue
225 try:
226 # <timestamp> <stage> <actor> <event> ...
227 ts, _, _, event, remaining = line.split("|", 4)
228 except ValueError:
229 continue
231 if event == "exception":
232 # Seem to have logged a python exception, ignore
233 continue
234 elif event.startswith("Policy Queue "):
235 _, pname = remaining.split("|", 1)
236 else:
237 pname = remaining
239 assert "|" not in pname, repr(line)
240 assert len(ts) == 14
242 event_time = datetime.strptime(ts, "%Y%m%d%H%M%S")
243 events_per_month_cnt += 1
244 yield pname, event, event_time
247def process_events(
248 events, last_update: Optional[datetime], init_time: Optional[datetime], queue
249) -> tuple:
250 """Process logged events like ACCEPT-TO-NEW, ACCEPT, REJECT and
251 update the RRD database accordingly"""
252 glitch_cnt = 0
253 processed_events_cnt = 0
254 previous_day = None
255 rrdtool_updates = []
256 for pname, event, event_time in events:
257 if last_update and last_update >= event_time: 257 ↛ 258line 257 didn't jump to line 258, because the condition on line 257 was never true
258 continue
260 processed_events_cnt += 1
261 if init_time is None:
262 # Initialise RRD once
263 i = event_time - timedelta(days=1)
264 debug(f"Very first event at: {event_time}")
265 init_time = i
266 previous_day = event_time.date()
268 pname = pname.rsplit(".", 1)[0]
270 # Update queue dict
271 exit_events = set(
272 (
273 "NEW ACCEPT",
274 "NEW REJECT",
275 "Policy Queue ACCEPT",
276 "REJECT",
277 "Policy Queue REJECT",
278 )
279 )
280 if event == "ACCEPT-TO-NEW":
281 queue[pname] = event_time
282 elif event in exit_events: 282 ↛ 288line 282 didn't jump to line 288, because the condition on line 282 was never false
283 # Approved or rejected, exits NEW
284 if pname in queue:
285 queue.pop(pname)
286 else:
287 glitch_cnt += 1
288 elif event == "ACCEPT":
289 pass
291 if event_time.date() != previous_day:
292 # On day change run statistics, update RRD and queue.pickle file
293 stats = gen_stats(event_time, queue)
294 ux = event_time.strftime("%s")
295 v = f"{ux}:{stats[0].days}:{stats[1].days}:{stats[2].days}"
296 rrdtool_updates.append(v)
298 previous_day = event_time.date()
300 debug(f"glitches count: {glitch_cnt}")
301 debug(f"processed events count: {processed_events_cnt}")
302 debug(f"queue len: {len(queue)}")
303 return rrdtool_updates, init_time
306def generate_output(rrdfn: str, out_dir: str, init_time) -> None:
307 """Generate png charts and index.html in the output directory"""
308 end_time = rrdtool.lastupdate(rrdfn)["date"]
309 ofn = "new_queue_wait_time_all.png"
310 plot_graph(out_dir, ofn, rrdfn, init_time, end_time, "all")
312 start_time = end_time - timedelta(days=365)
313 ofn = "new_queue_wait_time_year.png"
314 plot_graph(out_dir, ofn, rrdfn, start_time, end_time, "one year")
316 start_time = end_time - timedelta(days=30)
317 ofn = "new_queue_wait_time_month.png"
318 plot_graph(out_dir, ofn, rrdfn, start_time, end_time, "one month")
319 write_index(out_dir)
322def main(verbose_debug=False) -> None:
323 global debug_mode
324 debug_mode = verbose_debug
325 rrd_dir, out_dir, log_dir = _handle_conf()
326 makedirs(rrd_dir, exist_ok=True)
327 makedirs(out_dir, exist_ok=True)
328 rrdfn = os.path.join(rrd_dir, "graph_new.rrd")
329 queue_fn = os.path.join(rrd_dir, "graph_new.pickle")
330 init_time: Optional[datetime] = None
331 last_update: Optional[datetime] = None
332 if os.path.isfile(rrdfn):
333 init_time = datetime.fromtimestamp(rrdtool.first(rrdfn))
334 last_update = rrdtool.lastupdate(rrdfn)["date"]
335 with open(queue_fn, "rb") as f:
336 queue = pickle.load(f)
337 else:
338 queue = {}
340 events = tuple(extract_queue_events(log_dir, last_update))
341 print(f"{len(events)} events to process")
342 if events:
343 debug(f"First event to process at: {events[0][2]}")
345 rrdtool_updates, begin_time = process_events(events, last_update, init_time, queue)
346 if rrdtool_updates:
347 debug(f"First RRD update: {rrdtool_updates[0]}")
349 with open(queue_fn, "wb") as f:
350 pickle.dump(queue, f)
352 if init_time is None:
353 assert begin_time, "This happens only if no events are found at all"
354 init_time = begin_time
355 init_rrd(rrdfn, begin_time)
357 for u in rrdtool_updates:
358 rrdtool.update(rrdfn, u)
360 generate_output(rrdfn, out_dir, init_time)
363def usage(exit_code=0) -> None:
364 msg = dedent(
365 """\
366 Usage: dak graph-new
367 Graphs the age of packages in the NEW queue.
369 -h, --help show this help and exit.
370 -r, --rrd=key Directory where rrd files to be updated are stored
371 -o, --outdir=key Directory where the output is stored
372 """
373 )
374 print(msg)
375 sys.exit(exit_code)
378# Uncomment this for manual/debugging runs
379# if __name__ == "__main__":
380# main(verbose_debug=True)