1
2
3 """Various statistical pr0nography fun and games"""
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 import subprocess
35 import sys
36 import tempfile
37 from datetime import datetime
38 from email.utils import mktime_tz, parsedate_tz
39 from mailbox import mbox
40 from os import listdir
41 from os.path import isfile, join, splitext
42 from re import DOTALL, MULTILINE, findall
43 from sys import stderr
44
45 import apt_pkg
46 from yaml import safe_dump, safe_load
47
48 from daklib import utils
49 from daklib.dbconn import Architecture, DBConn, Suite, get_suite_architectures
50
51
52
53 Cnf = None
54
55 stats = {}
56 users = {}
57 buffer = 0
58 FORMAT_SWITCH = "2009-08"
59 blacklisted = ("dak", "katie")
60
61 NEW = (
62 r"^(\d{14})\|(?:jennifer|process-unchecked|.*?\|dak)"
63 r"\|(Moving to new|ACCEPT-TO-NEW)"
64 )
65 new_ACTIONS = r"^(\d{14})\|[^\|]*\|(\S+)\|NEW (\S+)[:\|]"
66 old_ACTIONS = (
67 r"(?:lisa|process-new)\|program start\|(.*?)\|" r"(?:lisa|process-new)\|program end"
68 )
69 old_ACTION = r"^(\d{14})\|(?:lisa|process-new)\|(Accepting changes|rejected)\|"
70
71
72
73
75 print(
76 """Usage: dak stats MODE
77 Print various stats.
78
79 -h, --help show this help and exit.
80
81 The following MODEs are available:
82
83 arch-space - displays space used by each architecture
84 pkg-nums - displays the number of packages by suite/architecture
85 daily-install - displays daily install stats suitable for graphing
86 new - stores stats about the NEW queue
87 """
88 )
89 sys.exit(exit_code)
90
91
92
93
94
96 session = DBConn().session()
97 q = session.execute(
98 """
99 SELECT a.arch_string as Architecture, sum(f.size) AS sum
100 FROM files f, binaries b, architecture a
101 WHERE a.id=b.architecture AND f.id=b.file
102 GROUP BY a.arch_string ORDER BY sum"""
103 ).fetchall()
104 for j in q:
105 print("%-15.15s %s" % (j[0], j[1]))
106 print()
107 q = session.execute(
108 "SELECT sum(size) FROM files WHERE filename ~ '.(diff.gz|tar.gz|dsc)$'"
109 ).fetchall()
110 print("%-15.15s %s" % ("Source", q[0][0]))
111
112
113
114
115
117 stats = {}
118 f = open("2001-11")
119 for line in f.readlines():
120 split = line.strip().split("|")
121 program = split[1]
122 if program != "katie" and program != "process-accepted":
123 continue
124 action = split[2]
125 if action != "installing changes" and action != "installed":
126 continue
127 date = split[0][:8]
128 if date not in stats:
129 stats[date] = {}
130 stats[date]["packages"] = 0
131 stats[date]["size"] = 0.0
132 if action == "installing changes":
133 stats[date]["packages"] += 1
134 elif action == "installed":
135 stats[date]["size"] += float(split[5])
136
137 dates = sorted(stats)
138 for date in dates:
139 packages = stats[date]["packages"]
140 size = int(stats[date]["size"] / 1024.0 / 1024.0)
141 print("%s %s %s" % (date, packages, size))
142
143
144
145
146
152
153
155 arches = {}
156 arch_ids = {}
157 suites = {}
158 suite_ids = {}
159 d = {}
160 session = DBConn().session()
161
162 for i in session.query(Suite).all():
163 suites[i.suite_id] = i.suite_name
164 suite_ids[i.suite_name] = i.suite_id
165
166 for i in session.query(Architecture).all():
167 arches[i.arch_id] = i.arch_string
168 arch_ids[i.arch_string] = i.arch_id
169
170 for suite_id in suites.keys():
171 d[suite_id] = {}
172 for arch_id in arches.keys():
173 d[suite_id][arch_id] = 0
174
175
176
177 for i in session.execute(
178 """SELECT suite, architecture, COUNT(suite)
179 FROM bin_associations
180 LEFT JOIN binaries ON bin = binaries.id
181 GROUP BY suite, architecture"""
182 ).fetchall():
183 d[i[0]][i[1]] = i[2]
184
185 arch_id = arch_ids["source"]
186 for i in session.execute(
187 "SELECT suite, COUNT(suite) FROM src_associations GROUP BY suite"
188 ).fetchall():
189 (suite_id, count) = i
190 d[suite_id][arch_id] = d[suite_id][arch_id] + count
191
192
193 suite_list = list(suites.values())
194 suite_id_list = []
195 suite_arches = {}
196 for suite in suite_list:
197 suite_id = suite_ids[suite]
198 suite_arches[suite_id] = {}
199 for arch in get_suite_architectures(suite):
200 suite_arches[suite_id][arch.arch_string] = ""
201 suite_id_list.append(suite_id)
202 output_list = [output_format(i) for i in suite_list]
203 longest_suite = max(len(suite) for suite in output_list)
204 arch_list = sorted(arches.values())
205 longest_arch = max(len(arch) for arch in arch_list)
206
207 output = (" " * longest_arch) + " |"
208 for suite in output_list:
209 output = output + suite.center(longest_suite) + " |"
210 output = output + "\n" + (len(output) * "-") + "\n"
211
212 for arch in arch_list:
213 arch_id = arch_ids[arch]
214 output = output + arch.center(longest_arch) + " |"
215 for suite_id in suite_id_list:
216 if arch in suite_arches[suite_id]:
217 count = "%d" % d[suite_id][arch_id]
218 else:
219 count = "-"
220 output = output + count.rjust(longest_suite) + " |"
221 output = output + "\n"
222 print(output)
223
224
225
226
227
229 global stats
230 latest_timestamp = stats["timestamp"]
231 for entry in findall(NEW, data, MULTILINE):
232 timestamp = entry[0]
233 if stats["timestamp"] >= timestamp:
234 continue
235 date = parse_timestamp(timestamp)
236 if date not in stats:
237 stats[date] = {
238 "stats": {"NEW": 0, "ACCEPT": 0, "REJECT": 0, "PROD": 0},
239 "members": {},
240 }
241 stats[date]["stats"]["NEW"] += 1
242 stats["history"]["stats"]["NEW"] += 1
243 latest_timestamp = timestamp
244 return latest_timestamp
245
246
248 global stats
249 latest_timestamp = stats["timestamp"]
250 if logdate <= FORMAT_SWITCH:
251 for batch in findall(old_ACTIONS, data, DOTALL):
252 who = batch.split()[0]
253 if who in blacklisted:
254 continue
255 for entry in findall(old_ACTION, batch, MULTILINE):
256 action = entry[1]
257 if action.startswith("Accepting"):
258 action = "ACCEPT"
259 elif action.startswith("rejected"):
260 action = "REJECT"
261 timestamp = entry[0]
262 if stats["timestamp"] >= timestamp:
263 continue
264 date = parse_timestamp(entry[0])
265 if date not in stats:
266 stats[date] = {
267 "stats": {"NEW": 0, "ACCEPT": 0, "REJECT": 0, "PROD": 0},
268 "members": {},
269 }
270 stats[date]["stats"][action] += 1
271 stats["history"]["stats"][action] += 1
272 if who not in stats[date]["members"]:
273 stats[date]["members"][who] = {"ACCEPT": 0, "REJECT": 0, "PROD": 0}
274 stats[date]["members"][who][action] += 1
275 if who not in stats["history"]["members"]:
276 stats["history"]["members"][who] = {
277 "ACCEPT": 0,
278 "REJECT": 0,
279 "PROD": 0,
280 }
281 stats["history"]["members"][who][action] += 1
282 latest_timestamp = timestamp
283 parse_prod(logdate)
284 if logdate >= FORMAT_SWITCH:
285 for entry in findall(new_ACTIONS, data, MULTILINE):
286 action = entry[2]
287 timestamp = entry[0]
288 if stats["timestamp"] >= timestamp:
289 continue
290 date = parse_timestamp(timestamp)
291 if date not in stats:
292 stats[date] = {
293 "stats": {"NEW": 0, "ACCEPT": 0, "REJECT": 0, "PROD": 0},
294 "members": {},
295 }
296 member = entry[1]
297 if member in blacklisted:
298 continue
299 if date not in stats:
300 stats[date] = {
301 "stats": {"NEW": 0, "ACCEPT": 0, "REJECT": 0, "PROD": 0},
302 "members": {},
303 }
304 if member not in stats[date]["members"]:
305 stats[date]["members"][member] = {"ACCEPT": 0, "REJECT": 0, "PROD": 0}
306 if member not in stats["history"]["members"]:
307 stats["history"]["members"][member] = {
308 "ACCEPT": 0,
309 "REJECT": 0,
310 "PROD": 0,
311 }
312 stats[date]["stats"][action] += 1
313 stats[date]["members"][member][action] += 1
314 stats["history"]["stats"][action] += 1
315 stats["history"]["members"][member][action] += 1
316 latest_timestamp = timestamp
317 return latest_timestamp
318
319
321 global stats
322 global users
323 maildate = "".join([x[-2:] for x in logdate.split("-")])
324 mailarchive = join(
325 utils.get_conf()["Dir::Base"], "mail/archive", "mail-%s.xz" % maildate
326 )
327 if not isfile(mailarchive):
328 return
329 with tempfile.NamedTemporaryFile(dir=utils.get_conf()["Dir::TempPath"]) as tmpfile:
330 with open(mailarchive, "rb") as fh:
331 subprocess.check_call(["xzcat"], stdin=fh, stdout=tmpfile)
332 for message in mbox(tmpfile.name):
333 if message["subject"] and message["subject"].startswith(
334 "Comments regarding"
335 ):
336 try:
337 member = users[" ".join(message["From"].split()[:-1])]
338 except KeyError:
339 continue
340 ts = mktime_tz(parsedate_tz(message["date"]))
341 timestamp = datetime.fromtimestamp(ts).strftime("%Y%m%d%H%M%S")
342 date = parse_timestamp(timestamp)
343 if date not in stats:
344 stats[date] = {
345 "stats": {"NEW": 0, "ACCEPT": 0, "REJECT": 0, "PROD": 0},
346 "members": {},
347 }
348 if member not in stats[date]["members"]:
349 stats[date]["members"][member] = {
350 "ACCEPT": 0,
351 "REJECT": 0,
352 "PROD": 0,
353 }
354 if member not in stats["history"]["members"]:
355 stats["history"]["members"][member] = {
356 "ACCEPT": 0,
357 "REJECT": 0,
358 "PROD": 0,
359 }
360 stats[date]["stats"]["PROD"] += 1
361 stats[date]["members"][member]["PROD"] += 1
362 stats["history"]["stats"]["PROD"] += 1
363 stats["history"]["members"][member]["PROD"] += 1
364
365
367 y = int(timestamp[:4])
368 m = int(timestamp[4:6])
369 return "%d-%02d" % (y, m)
370
371
373 global Cnf
374 global stats
375 try:
376 with open(yaml, "r") as fd:
377 stats = safe_load(fd)
378 except OSError:
379 pass
380 if not stats:
381 stats = {
382 "history": {
383 "stats": {"NEW": 0, "ACCEPT": 0, "REJECT": 0, "PROD": 0},
384 "members": {},
385 },
386 "timestamp": "19700101000000",
387 }
388 latest_timestamp = stats["timestamp"]
389 for fn in sorted(listdir(logdir)):
390 if fn == "current":
391 continue
392 log = splitext(fn)[0]
393 if log < parse_timestamp(stats["timestamp"]):
394 continue
395 logfile = join(logdir, fn)
396 if isfile(logfile):
397 if fn.endswith(".bz2"):
398
399
400 with open(logfile, "rb") as fh:
401 data = subprocess.check_output(["bzcat"], stdin=fh)
402 elif fn.endswith(".xz"):
403 with open(logfile, "rb") as fh:
404 data = subprocess.check_output(["xzcat"], stdin=fh)
405 elif fn.endswith(".zst"):
406 with open(logfile, "rb") as fh:
407 data = subprocess.check_output(["zstdcat"], stdin=fh)
408 else:
409 with open(logfile, "rb") as fd:
410 data = fd.read()
411 try:
412 data = data.decode()
413 except UnicodeDecodeError:
414 data = data.decode("latin1")
415 ts = parse_new_uploads(data)
416 if ts > latest_timestamp:
417 latest_timestamp = ts
418 ts = parse_actions(data, log)
419 if ts > latest_timestamp:
420 latest_timestamp = ts
421 stderr.write(".")
422 stderr.flush()
423 stderr.write("\n")
424 stderr.flush()
425 stats["timestamp"] = latest_timestamp
426 with open(yaml, "w") as fd:
427 safe_dump(stats, fd)
428
429
430
431
432
434 global Cnf
435 global users
436
437 Cnf = utils.get_conf()
438 Arguments = [("h", "help", "Stats::Options::Help")]
439 for i in ["help"]:
440 key = "Stats::Options::%s" % i
441 if key not in Cnf:
442 Cnf[key] = ""
443
444 args = apt_pkg.parse_commandline(Cnf, Arguments, sys.argv)
445
446 Options = Cnf.subtree("Stats::Options")
447 if Options["Help"]:
448 usage()
449
450 if len(args) < 1:
451 utils.warn("dak stats requires a MODE argument")
452 usage(1)
453 elif len(args) > 1:
454 if args[0].lower() != "new":
455 utils.warn("dak stats accepts only one MODE argument")
456 usage(1)
457 elif args[0].lower() == "new":
458 utils.warn("new MODE requires an output file")
459 usage(1)
460 mode = args[0].lower()
461
462 if mode == "arch-space":
463 per_arch_space_use()
464 elif mode == "pkg-nums":
465 number_of_packages()
466 elif mode == "daily-install":
467 daily_install_stats()
468 elif mode == "new":
469 users = utils.get_users_from_ldap()
470 new_stats(Cnf["Dir::Log"], args[1])
471 else:
472 utils.warn("unknown mode '%s'" % (mode))
473 usage(1)
474
475
476
477
478
479 if __name__ == "__main__":
480 main()
481