Coverage for dak/graph_new.py: 31%

190 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2025-08-26 22:11 +0000

1#!/usr/bin/env python3 

2 

3""" 

4Generate an HTML page and PNG charts wih the age of packages in NEW 

5 

6To be run once a day. 

7 

8The tool stores persistent status in the RRD database and the pickeled queue 

9in order to speed up daily runs. 

10Directories and database files are created as needed. 

11 

12If the RRD directory is deleted the script rebuilds past years from the historical 

13logs, and that can take a while, otherwise it just parses fresh data. 

14""" 

15# Copyright 2020-2024 Federico Ceratto <federico@debian.org> 

16# License: GPL-2+ 

17# Tests are in tests/test_graph_new.py 

18 

19import os.path 

20import pickle 

21import subprocess 

22import sys 

23from datetime import datetime, timedelta 

24from glob import glob 

25from os import makedirs 

26from textwrap import dedent 

27from typing import Iterator, Optional 

28 

29import apt_pkg 

30import rrdtool # debdeps: python3-rrdtool 

31 

32from daklib import utils 

33 

34SECONDS_IN_DAY = 86400 

35debug_mode = False 

36 

37 

38def debug(msg: str) -> None: 

39 """Verbose messages for debugging""" 

40 if debug_mode: 40 ↛ 41line 40 didn't jump to line 41, because the condition on line 40 was never true

41 print(msg) 

42 

43 

44def write_index(out_dir: str) -> None: 

45 """Generates index.html page in the output directory""" 

46 index_html = """ 

47<!DOCTYPE html> 

48<html lang="en"> 

49 <head> 

50 <meta charset="utf-8" /> 

51 <link rel="shortcut icon" href="https://www.debian.org/favicon.ico" /> 

52 <title>Debian NEW queue</title> 

53 </head> 

54 <body> 

55 <p>How much time a new package spends in the NEW queue.</p> 

56 <p>Divided in percentiles: 0 (aka age of the oldest package), 50 (aka median) and 99</p> 

57 

58 <img src="new_queue_wait_time_all.png"> 

59 <p></p> 

60 <img src="new_queue_wait_time_year.png"> 

61 <p></p> 

62 <img src="new_queue_wait_time_month.png"> 

63 <p></p> 

64 <p>Generated on {}</p> 

65 </body> 

66</html> 

67""" 

68 fname = os.path.join(out_dir, "index.html") 

69 with open(fname, "w") as f: 

70 f.write(index_html.format(datetime.utcnow())) 

71 

72 

73def init_rrd(rrdfn: str, t: datetime) -> None: 

74 """Initialize empty RRD""" 

75 print(f"RRD file {rrdfn} not found. Initializing it now.") 

76 rrdtool.create( 

77 rrdfn, 

78 "--start", 

79 t.strftime("%s"), 

80 "--step", 

81 str(SECONDS_IN_DAY), # 1 update per day 

82 "DS:p0:GAUGE:172800:U:U", # 0 percentile 

83 "DS:p50:GAUGE:172800:U:U", # 50 percentile 

84 "DS:p99:GAUGE:172800:U:U", # 99 percentile 

85 "RRA:AVERAGE:0.5:1:3650", 

86 ) 

87 

88 

89def gen_stats(t: datetime, q: dict) -> tuple: 

90 """Extract percentiles of package ages""" 

91 s = sorted(q.values()) 

92 p99 = t - s[int(len(s) * 0.50)] 

93 p50 = t - s[int(len(s) * 0.05)] 

94 p0 = t - s[int(len(s) * 0.02)] 

95 return (p0, p50, p99) 

96 

97 

98def plot_graph( 

99 out_dir: str, fname: str, rrdfn: str, start, end: datetime, title: str 

100) -> None: 

101 fname = os.path.join(out_dir, fname) 

102 print(f"Writing {fname}") 

103 args = ( 

104 "--start", 

105 start.strftime("%s"), 

106 "--end", 

107 end.strftime("%s"), 

108 "--width", 

109 "900", 

110 "--height", 

111 "200", 

112 "--title", 

113 "Days spent in NEW over {}".format(title), 

114 "-v", 

115 "days", 

116 f"DEF:p0={rrdfn}:p0:AVERAGE", 

117 f"DEF:p50={rrdfn}:p50:AVERAGE", 

118 f"DEF:p99={rrdfn}:p99:AVERAGE", 

119 "LINE1:p0#FF8080:0 percentile", 

120 "LINE2:p50#8F8080:50 percentile", 

121 "LINE3:p99#8FFF80:99 percentile", 

122 ) 

123 # print(" ".join(args)) 

124 rrdtool.graph(fname, *args) 

125 

126 

127def _handle_conf() -> tuple: 

128 """Load configuration parameters from Cnf""" 

129 Cnf = utils.get_conf() 

130 Arguments = [ 

131 ("h", "help", "GraphNew::Options::Help"), 

132 ("r", "rrd", "GraphNew::Options::Rrd", "HasArg"), 

133 ("o", "outdir", "GraphNew::Options::Outdir", "HasArg"), 

134 ("b", "dak_base_dir", "Dir::Base", "HasArg"), 

135 ] 

136 apt_pkg.parse_commandline(Cnf, Arguments, sys.argv) 

137 

138 try: 

139 Options = Cnf.subtree("GraphNew::Options") 

140 except KeyError: 

141 msg = dedent( 

142 """ 

143 The GraphNew::Options subtree is missing from the config file 

144 Please use --rrd ... --outdir ... (see --help) or add: 

145 

146 GraphNew 

147 { 

148 Options 

149 { 

150 Rrd "<RRD directory>"; 

151 Outdir "<HTML output directory>"; 

152 } 

153 } 

154 """ 

155 ) 

156 print(msg, file=sys.stderr) 

157 sys.exit(1) 

158 

159 if "Help" in Options: 

160 usage() 

161 

162 if "GraphNew::Options::Rrd" in Cnf: 

163 rrd_dir = Cnf["GraphNew::Options::Rrd"] 

164 elif "Dir::Rrd" in Cnf: 

165 rrd_dir = Cnf["Dir::Rrd"] 

166 else: 

167 print("No RRD directory configured.\n", file=sys.stderr) 

168 sys.exit(1) 

169 

170 try: 

171 outdir = Cnf["GraphNew::Options::Outdir"] 

172 except Exception: 

173 print("No output directory configured\n", file=sys.stderr) 

174 sys.exit(1) 

175 

176 log_dir = Cnf.get("Dir::Log") 

177 assert log_dir, "Dir::Log is missing from dak.conf or empty" 

178 return rrd_dir, outdir, log_dir 

179 

180 

181def skip_file(fn: str, last_update) -> bool: 

182 """Skip files that are already processed""" 

183 if last_update is None: 

184 return False 

185 fn = os.path.split(fn)[1] 

186 basename, ext = os.path.splitext(fn) 

187 if ext == "": 

188 return False # always process last, uncompressed file 

189 filetime = datetime.strptime(basename, "%Y-%m") 

190 return filetime < last_update 

191 

192 

193def extract_queue_events( 

194 log_dir: str, last_update: Optional[datetime] 

195) -> Iterator[tuple]: 

196 """Extracts NEW queue events from log files""" 

197 compressed_glob = os.path.join(log_dir, "20*xz") 

198 debug(f"Scanning for compressed logfiles using glob '{compressed_glob}'") 

199 compressed_fns = sorted(glob(compressed_glob)) 

200 fns = compressed_fns[5:] # 5 oldest logfiles have no events 

201 current_fn = os.path.join(log_dir, datetime.utcnow().strftime("%Y-%m")) 

202 debug(f"Also adding uncompressed log {current_fn}") 

203 fns.append(current_fn) 

204 print("%d files to process" % len(fns)) 

205 events_per_month_cnt = None 

206 for fn in fns: 

207 if skip_file(fn, last_update): 

208 debug(f"Skipping {fn}") 

209 continue 

210 

211 if events_per_month_cnt is not None: 

212 debug(f"Events in the month: {events_per_month_cnt}") 

213 

214 events_per_month_cnt = 0 

215 debug(f"Processing {fn}") 

216 cmd = ( 

217 """xzgrep -h -e ACCEPT-TO-NEW -e "|NEW ACCEPT|" -e "|REJECT|" """ 

218 f"""-e "|NEW REJECT|" -e "|Policy Queue " {fn}""" 

219 ) 

220 data = subprocess.check_output(cmd, shell=True).decode() 

221 for line in data.splitlines(): 

222 line = line.rstrip() 

223 if line.startswith("#"): 

224 continue 

225 try: 

226 # <timestamp> <stage> <actor> <event> ... 

227 ts, _, _, event, remaining = line.split("|", 4) 

228 except ValueError: 

229 continue 

230 

231 if event == "exception": 

232 # Seem to have logged a python exception, ignore 

233 continue 

234 elif event.startswith("Policy Queue "): 

235 _, pname = remaining.split("|", 1) 

236 else: 

237 pname = remaining 

238 

239 assert "|" not in pname, repr(line) 

240 assert len(ts) == 14 

241 

242 event_time = datetime.strptime(ts, "%Y%m%d%H%M%S") 

243 events_per_month_cnt += 1 

244 yield pname, event, event_time 

245 

246 

247def process_events( 

248 events, last_update: Optional[datetime], init_time: Optional[datetime], queue 

249) -> tuple: 

250 """Process logged events like ACCEPT-TO-NEW, ACCEPT, REJECT and 

251 update the RRD database accordingly""" 

252 glitch_cnt = 0 

253 processed_events_cnt = 0 

254 previous_day = None 

255 rrdtool_updates = [] 

256 for pname, event, event_time in events: 

257 if last_update and last_update >= event_time: 257 ↛ 258line 257 didn't jump to line 258, because the condition on line 257 was never true

258 continue 

259 

260 processed_events_cnt += 1 

261 if init_time is None: 

262 # Initialise RRD once 

263 i = event_time - timedelta(days=1) 

264 debug(f"Very first event at: {event_time}") 

265 init_time = i 

266 previous_day = event_time.date() 

267 

268 pname = pname.rsplit(".", 1)[0] 

269 

270 # Update queue dict 

271 exit_events = set( 

272 ( 

273 "NEW ACCEPT", 

274 "NEW REJECT", 

275 "Policy Queue ACCEPT", 

276 "REJECT", 

277 "Policy Queue REJECT", 

278 ) 

279 ) 

280 if event == "ACCEPT-TO-NEW": 

281 queue[pname] = event_time 

282 elif event in exit_events: 282 ↛ 288line 282 didn't jump to line 288, because the condition on line 282 was never false

283 # Approved or rejected, exits NEW 

284 if pname in queue: 

285 queue.pop(pname) 

286 else: 

287 glitch_cnt += 1 

288 elif event == "ACCEPT": 

289 pass 

290 

291 if event_time.date() != previous_day: 

292 # On day change run statistics, update RRD and queue.pickle file 

293 stats = gen_stats(event_time, queue) 

294 ux = event_time.strftime("%s") 

295 v = f"{ux}:{stats[0].days}:{stats[1].days}:{stats[2].days}" 

296 rrdtool_updates.append(v) 

297 

298 previous_day = event_time.date() 

299 

300 debug(f"glitches count: {glitch_cnt}") 

301 debug(f"processed events count: {processed_events_cnt}") 

302 debug(f"queue len: {len(queue)}") 

303 return rrdtool_updates, init_time 

304 

305 

306def generate_output(rrdfn: str, out_dir: str, init_time) -> None: 

307 """Generate png charts and index.html in the output directory""" 

308 end_time = rrdtool.lastupdate(rrdfn)["date"] 

309 ofn = "new_queue_wait_time_all.png" 

310 plot_graph(out_dir, ofn, rrdfn, init_time, end_time, "all") 

311 

312 start_time = end_time - timedelta(days=365) 

313 ofn = "new_queue_wait_time_year.png" 

314 plot_graph(out_dir, ofn, rrdfn, start_time, end_time, "one year") 

315 

316 start_time = end_time - timedelta(days=30) 

317 ofn = "new_queue_wait_time_month.png" 

318 plot_graph(out_dir, ofn, rrdfn, start_time, end_time, "one month") 

319 write_index(out_dir) 

320 

321 

322def main(verbose_debug=False) -> None: 

323 global debug_mode 

324 debug_mode = verbose_debug 

325 rrd_dir, out_dir, log_dir = _handle_conf() 

326 makedirs(rrd_dir, exist_ok=True) 

327 makedirs(out_dir, exist_ok=True) 

328 rrdfn = os.path.join(rrd_dir, "graph_new.rrd") 

329 queue_fn = os.path.join(rrd_dir, "graph_new.pickle") 

330 init_time: Optional[datetime] = None 

331 last_update: Optional[datetime] = None 

332 if os.path.isfile(rrdfn): 

333 init_time = datetime.fromtimestamp(rrdtool.first(rrdfn)) 

334 last_update = rrdtool.lastupdate(rrdfn)["date"] 

335 with open(queue_fn, "rb") as f: 

336 queue = pickle.load(f) 

337 else: 

338 queue = {} 

339 

340 events = tuple(extract_queue_events(log_dir, last_update)) 

341 print(f"{len(events)} events to process") 

342 if events: 

343 debug(f"First event to process at: {events[0][2]}") 

344 

345 rrdtool_updates, begin_time = process_events(events, last_update, init_time, queue) 

346 if rrdtool_updates: 

347 debug(f"First RRD update: {rrdtool_updates[0]}") 

348 

349 with open(queue_fn, "wb") as f: 

350 pickle.dump(queue, f) 

351 

352 if init_time is None: 

353 assert begin_time, "This happens only if no events are found at all" 

354 init_time = begin_time 

355 init_rrd(rrdfn, begin_time) 

356 

357 for u in rrdtool_updates: 

358 rrdtool.update(rrdfn, u) 

359 

360 generate_output(rrdfn, out_dir, init_time) 

361 

362 

363def usage(exit_code=0) -> None: 

364 msg = dedent( 

365 """\ 

366 Usage: dak graph-new 

367 Graphs the age of packages in the NEW queue. 

368 

369 -h, --help show this help and exit. 

370 -r, --rrd=key Directory where rrd files to be updated are stored 

371 -o, --outdir=key Directory where the output is stored 

372 """ 

373 ) 

374 print(msg) 

375 sys.exit(exit_code) 

376 

377 

378# Uncomment this for manual/debugging runs 

379# if __name__ == "__main__": 

380# main(verbose_debug=True)