1#!/usr/bin/env python3
3"""
4Generate an HTML page and PNG charts wih the age of packages in NEW
6To be run once a day.
8The tool stores persistent status in the RRD database and the pickeled queue
9in order to speed up daily runs.
10Directories and database files are created as needed.
12If the RRD directory is deleted the script rebuilds past years from the historical
13logs, and that can take a while, otherwise it just parses fresh data.
14"""
15# Copyright 2020-2024 Federico Ceratto <federico@debian.org>
16# License: GPL-2+
17# Tests are in tests/test_graph_new.py
19from datetime import datetime, timedelta
20from glob import glob
21from textwrap import dedent
22from typing import Optional, Iterator
23from os import makedirs
24import pickle
25import os.path
26import sys
27import subprocess
29import rrdtool # debdeps: python3-rrdtool
31import apt_pkg
32from daklib import utils
34SECONDS_IN_DAY = 86400
35debug_mode = False
38def debug(msg: str) -> None:
39 """Verbose messages for debugging"""
40 if debug_mode: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true
41 print(msg)
44def write_index(out_dir: str) -> None:
45 """Generates index.html page in the output directory"""
46 index_html = """
47<!DOCTYPE html>
48<html lang="en">
49 <head>
50 <meta charset="utf-8" />
51 <link rel="shortcut icon" href="https://www.debian.org/favicon.ico" />
52 <title>Debian NEW queue</title>
53 </head>
54 <body>
55 <p>How much time a new package spends in the NEW queue.</p>
56 <p>Divided in percentiles: 0 (aka age of the oldest package), 50 (aka median) and 99</p>
58 <img src="new_queue_wait_time_all.png">
59 <p></p>
60 <img src="new_queue_wait_time_year.png">
61 <p></p>
62 <img src="new_queue_wait_time_month.png">
63 <p></p>
64 <p>Generated on {}</p>
65 </body>
66</html>
67"""
68 fname = os.path.join(out_dir, "index.html")
69 with open(fname, "w") as f:
70 f.write(index_html.format(datetime.utcnow()))
73def init_rrd(rrdfn: str, t: datetime) -> None:
74 """Initialize empty RRD"""
75 print(f"RRD file {rrdfn} not found. Initializing it now.")
76 rrdtool.create(
77 rrdfn,
78 "--start",
79 t.strftime("%s"),
80 "--step",
81 str(SECONDS_IN_DAY), # 1 update per day
82 "DS:p0:GAUGE:172800:U:U", # 0 percentile
83 "DS:p50:GAUGE:172800:U:U", # 50 percentile
84 "DS:p99:GAUGE:172800:U:U", # 99 percentile
85 "RRA:AVERAGE:0.5:1:3650",
86 )
89def gen_stats(t: datetime, q: dict) -> tuple:
90 """Extract percentiles of package ages"""
91 s = sorted(q.values())
92 p99 = t - s[int(len(s) * 0.50)]
93 p50 = t - s[int(len(s) * 0.05)]
94 p0 = t - s[int(len(s) * 0.02)]
95 return (p0, p50, p99)
98def plot_graph(
99 out_dir: str, fname: str, rrdfn: str, start, end: datetime, title: str
100) -> None:
101 fname = os.path.join(out_dir, fname)
102 print(f"Writing {fname}")
103 args = (
104 "--start",
105 start.strftime("%s"),
106 "--end",
107 end.strftime("%s"),
108 "--width",
109 "900",
110 "--height",
111 "200",
112 "--title",
113 "Days spent in NEW over {}".format(title),
114 "-v",
115 "days",
116 f"DEF:p0={rrdfn}:p0:AVERAGE",
117 f"DEF:p50={rrdfn}:p50:AVERAGE",
118 f"DEF:p99={rrdfn}:p99:AVERAGE",
119 "LINE1:p0#FF8080:0 percentile",
120 "LINE2:p50#8F8080:50 percentile",
121 "LINE3:p99#8FFF80:99 percentile",
122 )
123 # print(" ".join(args))
124 rrdtool.graph(fname, *args)
127def _handle_conf() -> tuple:
128 """Load configuration parameters from Cnf"""
129 Cnf = utils.get_conf()
130 Arguments = [
131 ("h", "help", "GraphNew::Options::Help"),
132 ("r", "rrd", "GraphNew::Options::Rrd", "HasArg"),
133 ("o", "outdir", "GraphNew::Options::Outdir", "HasArg"),
134 ("b", "dak_base_dir", "Dir::Base", "HasArg"),
135 ]
136 apt_pkg.parse_commandline(Cnf, Arguments, sys.argv)
138 try:
139 Options = Cnf.subtree("GraphNew::Options")
140 except KeyError:
141 msg = dedent(
142 """
143 The GraphNew::Options subtree is missing from the config file
144 Please use --rrd ... --outdir ... (see --help) or add:
146 GraphNew
147 {
148 Options
149 {
150 Rrd "<RRD directory>";
151 Outdir "<HTML output directory>";
152 }
153 }
154 """
155 )
156 print(msg, file=sys.stderr)
157 sys.exit(1)
159 if "Help" in Options:
160 usage()
162 if "GraphNew::Options::Rrd" in Cnf:
163 rrd_dir = Cnf["GraphNew::Options::Rrd"]
164 elif "Dir::Rrd" in Cnf:
165 rrd_dir = Cnf["Dir::Rrd"]
166 else:
167 print("No RRD directory configured.\n", file=sys.stderr)
168 sys.exit(1)
170 try:
171 outdir = Cnf["GraphNew::Options::Outdir"]
172 except Exception:
173 print("No output directory configured\n", file=sys.stderr)
174 sys.exit(1)
176 log_dir = Cnf.get("Dir::Log")
177 assert log_dir, "Dir::Log is missing from dak.conf or empty"
178 return rrd_dir, outdir, log_dir
181def skip_file(fn: str, last_update) -> bool:
182 """Skip files that are already processed"""
183 if last_update is None:
184 return False
185 fn = os.path.split(fn)[1]
186 basename, ext = os.path.splitext(fn)
187 if ext == "":
188 return False # always process last, uncompressed file
189 filetime = datetime.strptime(basename, "%Y-%m")
190 return filetime < last_update
193def extract_queue_events(
194 log_dir: str, last_update: Optional[datetime]
195) -> Iterator[tuple]:
196 """Extracts NEW queue events from log files"""
197 compressed_glob = os.path.join(log_dir, "20*xz")
198 debug(f"Scanning for compressed logfiles using glob '{compressed_glob}'")
199 compressed_fns = sorted(glob(compressed_glob))
200 fns = compressed_fns[5:] # 5 oldest logfiles have no events
201 current_fn = os.path.join(log_dir, datetime.utcnow().strftime("%Y-%m"))
202 debug(f"Also adding uncompressed log {current_fn}")
203 fns.append(current_fn)
204 print("%d files to process" % len(fns))
205 events_per_month_cnt = None
206 for fn in fns:
207 if skip_file(fn, last_update):
208 debug(f"Skipping {fn}")
209 continue
211 if events_per_month_cnt is not None:
212 debug(f"Events in the month: {events_per_month_cnt}")
214 events_per_month_cnt = 0
215 debug(f"Processing {fn}")
216 cmd = (
217 """xzgrep -h -e ACCEPT-TO-NEW -e "|NEW ACCEPT|" -e "|REJECT|" """
218 f"""-e "|NEW REJECT|" -e "|Policy Queue " {fn}"""
219 )
220 data = subprocess.check_output(cmd, shell=True).decode()
221 for line in data.splitlines():
222 line = line.rstrip()
223 if line.startswith("#"):
224 continue
225 try:
226 # <timestamp> <stage> <actor> <event> ...
227 ts, _, _, event, remaining = line.split("|", 4)
228 except ValueError:
229 continue
231 if event.startswith("Policy Queue "):
232 _, pname = remaining.split("|", 1)
233 else:
234 pname = remaining
236 assert "|" not in pname, repr(line)
237 assert len(ts) == 14
239 event_time = datetime.strptime(ts, "%Y%m%d%H%M%S")
240 events_per_month_cnt += 1
241 yield pname, event, event_time
244def process_events(
245 events, last_update: Optional[datetime], init_time: Optional[datetime], queue
246) -> tuple:
247 """Process logged events like ACCEPT-TO-NEW, ACCEPT, REJECT and
248 update the RRD database accordingly"""
249 glitch_cnt = 0
250 processed_events_cnt = 0
251 previous_day = None
252 rrdtool_updates = []
253 for pname, event, event_time in events:
254 if last_update and last_update >= event_time: 254 ↛ 255line 254 didn't jump to line 255, because the condition on line 254 was never true
255 continue
257 processed_events_cnt += 1
258 if init_time is None:
259 # Initialise RRD once
260 i = event_time - timedelta(days=1)
261 debug(f"Very first event at: {event_time}")
262 init_time = i
263 previous_day = event_time.date()
265 pname = pname.rsplit(".", 1)[0]
267 # Update queue dict
268 exit_events = set(
269 (
270 "NEW ACCEPT",
271 "NEW REJECT",
272 "Policy Queue ACCEPT",
273 "REJECT",
274 "Policy Queue REJECT",
275 )
276 )
277 if event == "ACCEPT-TO-NEW":
278 queue[pname] = event_time
279 elif event in exit_events: 279 ↛ 285line 279 didn't jump to line 285, because the condition on line 279 was never false
280 # Approved or rejected, exits NEW
281 if pname in queue:
282 queue.pop(pname)
283 else:
284 glitch_cnt += 1
285 elif event == "ACCEPT":
286 pass
288 if event_time.date() != previous_day:
289 # On day change run statistics, update RRD and queue.pickle file
290 stats = gen_stats(event_time, queue)
291 ux = event_time.strftime("%s")
292 v = f"{ux}:{stats[0].days}:{stats[1].days}:{stats[2].days}"
293 rrdtool_updates.append(v)
295 previous_day = event_time.date()
297 debug(f"glitches count: {glitch_cnt}")
298 debug(f"processed events count: {processed_events_cnt}")
299 debug(f"queue len: {len(queue)}")
300 return rrdtool_updates, init_time
303def generate_output(rrdfn: str, out_dir: str, init_time) -> None:
304 """Generate png charts and index.html in the output directory"""
305 end_time = rrdtool.lastupdate(rrdfn)["date"]
306 ofn = "new_queue_wait_time_all.png"
307 plot_graph(out_dir, ofn, rrdfn, init_time, end_time, "all")
309 start_time = end_time - timedelta(days=365)
310 ofn = "new_queue_wait_time_year.png"
311 plot_graph(out_dir, ofn, rrdfn, start_time, end_time, "one year")
313 start_time = end_time - timedelta(days=30)
314 ofn = "new_queue_wait_time_month.png"
315 plot_graph(out_dir, ofn, rrdfn, start_time, end_time, "one month")
316 write_index(out_dir)
319def main(verbose_debug=False) -> None:
320 global debug_mode
321 debug_mode = verbose_debug
322 rrd_dir, out_dir, log_dir = _handle_conf()
323 makedirs(rrd_dir, exist_ok=True)
324 makedirs(out_dir, exist_ok=True)
325 rrdfn = os.path.join(rrd_dir, "graph_new.rrd")
326 queue_fn = os.path.join(rrd_dir, "graph_new.pickle")
327 init_time: Optional[datetime] = None
328 last_update: Optional[datetime] = None
329 if os.path.isfile(rrdfn):
330 init_time = datetime.fromtimestamp(rrdtool.first(rrdfn))
331 last_update = rrdtool.lastupdate(rrdfn)["date"]
332 with open(queue_fn, "rb") as f:
333 queue = pickle.load(f)
334 else:
335 queue = {}
337 events = tuple(extract_queue_events(log_dir, last_update))
338 print(f"{len(events)} events to process")
339 if events:
340 debug(f"First event to process at: {events[0][2]}")
342 rrdtool_updates, begin_time = process_events(events, last_update, init_time, queue)
343 if rrdtool_updates:
344 debug(f"First RRD update: {rrdtool_updates[0]}")
346 with open(queue_fn, "wb") as f:
347 pickle.dump(queue, f)
349 if init_time is None:
350 assert begin_time, "This happens only if no events are found at all"
351 init_time = begin_time
352 init_rrd(rrdfn, begin_time)
354 for u in rrdtool_updates:
355 rrdtool.update(rrdfn, u)
357 generate_output(rrdfn, out_dir, init_time)
360def usage(exit_code=0) -> None:
361 msg = dedent(
362 """\
363 Usage: dak graph-new
364 Graphs the age of packages in the NEW queue.
366 -h, --help show this help and exit.
367 -r, --rrd=key Directory where rrd files to be updated are stored
368 -o, --outdir=key Directory where the output is stored
369 """
370 )
371 print(msg)
372 sys.exit(exit_code)
375# Uncomment this for manual/debugging runs
376# if __name__ == "__main__":
377# main(verbose_debug=True)