1
2
3 """ Various statistical pr0nography fun and games """
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 import subprocess
35 import sys
36 import tempfile
37 import apt_pkg
38
39 from datetime import datetime
40 from email.utils import mktime_tz, parsedate_tz
41 from mailbox import mbox
42 from os import listdir
43 from os.path import isfile, join, splitext
44 from re import findall, DOTALL, MULTILINE
45 from sys import stderr
46 from yaml import safe_load, safe_dump
47
48 from daklib import utils
49 from daklib.dbconn import DBConn, get_suite_architectures, Suite, Architecture
50
51
52
53 Cnf = None
54
55 stats = {}
56 users = {}
57 buffer = 0
58 FORMAT_SWITCH = '2009-08'
59 blacklisted = ('dak', 'katie')
60
61 NEW = (r'^(\d{14})\|(?:jennifer|process-unchecked|.*?\|dak)'
62 r'\|(Moving to new|ACCEPT-TO-NEW)')
63 new_ACTIONS = r'^(\d{14})\|[^\|]*\|(\S+)\|NEW (\S+)[:\|]'
64 old_ACTIONS = (r'(?:lisa|process-new)\|program start\|(.*?)\|'
65 r'(?:lisa|process-new)\|program end')
66 old_ACTION = r'^(\d{14})\|(?:lisa|process-new)\|(Accepting changes|rejected)\|'
67
68
69
70
72 print("""Usage: dak stats MODE
73 Print various stats.
74
75 -h, --help show this help and exit.
76
77 The following MODEs are available:
78
79 arch-space - displays space used by each architecture
80 pkg-nums - displays the number of packages by suite/architecture
81 daily-install - displays daily install stats suitable for graphing
82 new - stores stats about the NEW queue
83 """)
84 sys.exit(exit_code)
85
86
87
88
90 session = DBConn().session()
91 q = session.execute("""
92 SELECT a.arch_string as Architecture, sum(f.size) AS sum
93 FROM files f, binaries b, architecture a
94 WHERE a.id=b.architecture AND f.id=b.file
95 GROUP BY a.arch_string ORDER BY sum""").fetchall()
96 for j in q:
97 print("%-15.15s %s" % (j[0], j[1]))
98 print()
99 q = session.execute("SELECT sum(size) FROM files WHERE filename ~ '.(diff.gz|tar.gz|dsc)$'").fetchall()
100 print("%-15.15s %s" % ("Source", q[0][0]))
101
102
103
104
106 stats = {}
107 f = open("2001-11")
108 for line in f.readlines():
109 split = line.strip().split('|')
110 program = split[1]
111 if program != "katie" and program != "process-accepted":
112 continue
113 action = split[2]
114 if action != "installing changes" and action != "installed":
115 continue
116 date = split[0][:8]
117 if date not in stats:
118 stats[date] = {}
119 stats[date]["packages"] = 0
120 stats[date]["size"] = 0.0
121 if action == "installing changes":
122 stats[date]["packages"] += 1
123 elif action == "installed":
124 stats[date]["size"] += float(split[5])
125
126 dates = sorted(stats)
127 for date in dates:
128 packages = stats[date]["packages"]
129 size = int(stats[date]["size"] / 1024.0 / 1024.0)
130 print("%s %s %s" % (date, packages, size))
131
132
133
134
140
141
143 arches = {}
144 arch_ids = {}
145 suites = {}
146 suite_ids = {}
147 d = {}
148 session = DBConn().session()
149
150 for i in session.query(Suite).all():
151 suites[i.suite_id] = i.suite_name
152 suite_ids[i.suite_name] = i.suite_id
153
154 for i in session.query(Architecture).all():
155 arches[i.arch_id] = i.arch_string
156 arch_ids[i.arch_string] = i.arch_id
157
158 for suite_id in suites.keys():
159 d[suite_id] = {}
160 for arch_id in arches.keys():
161 d[suite_id][arch_id] = 0
162
163
164
165 for i in session.execute("""SELECT suite, architecture, COUNT(suite)
166 FROM bin_associations
167 LEFT JOIN binaries ON bin = binaries.id
168 GROUP BY suite, architecture""").fetchall():
169 d[i[0]][i[1]] = i[2]
170
171 arch_id = arch_ids["source"]
172 for i in session.execute('SELECT suite, COUNT(suite) FROM src_associations GROUP BY suite').fetchall():
173 (suite_id, count) = i
174 d[suite_id][arch_id] = d[suite_id][arch_id] + count
175
176
177 suite_list = list(suites.values())
178 suite_id_list = []
179 suite_arches = {}
180 for suite in suite_list:
181 suite_id = suite_ids[suite]
182 suite_arches[suite_id] = {}
183 for arch in get_suite_architectures(suite):
184 suite_arches[suite_id][arch.arch_string] = ""
185 suite_id_list.append(suite_id)
186 output_list = [output_format(i) for i in suite_list]
187 longest_suite = max(len(suite) for suite in output_list)
188 arch_list = sorted(arches.values())
189 longest_arch = max(len(arch) for arch in arch_list)
190
191 output = (" " * longest_arch) + " |"
192 for suite in output_list:
193 output = output + suite.center(longest_suite) + " |"
194 output = output + "\n" + (len(output) * "-") + "\n"
195
196 for arch in arch_list:
197 arch_id = arch_ids[arch]
198 output = output + arch.center(longest_arch) + " |"
199 for suite_id in suite_id_list:
200 if arch in suite_arches[suite_id]:
201 count = "%d" % d[suite_id][arch_id]
202 else:
203 count = "-"
204 output = output + count.rjust(longest_suite) + " |"
205 output = output + "\n"
206 print(output)
207
208
209
210
212 global stats
213 latest_timestamp = stats['timestamp']
214 for entry in findall(NEW, data, MULTILINE):
215 timestamp = entry[0]
216 if stats['timestamp'] >= timestamp:
217 continue
218 date = parse_timestamp(timestamp)
219 if date not in stats:
220 stats[date] = {'stats': {'NEW': 0, 'ACCEPT': 0,
221 'REJECT': 0, 'PROD': 0}, 'members': {}}
222 stats[date]['stats']['NEW'] += 1
223 stats['history']['stats']['NEW'] += 1
224 latest_timestamp = timestamp
225 return latest_timestamp
226
227
229 global stats
230 latest_timestamp = stats['timestamp']
231 if logdate <= FORMAT_SWITCH:
232 for batch in findall(old_ACTIONS, data, DOTALL):
233 who = batch.split()[0]
234 if who in blacklisted:
235 continue
236 for entry in findall(old_ACTION, batch, MULTILINE):
237 action = entry[1]
238 if action.startswith('Accepting'):
239 action = 'ACCEPT'
240 elif action.startswith('rejected'):
241 action = 'REJECT'
242 timestamp = entry[0]
243 if stats['timestamp'] >= timestamp:
244 continue
245 date = parse_timestamp(entry[0])
246 if date not in stats:
247 stats[date] = {'stats': {'NEW': 0, 'ACCEPT': 0,
248 'REJECT': 0, 'PROD': 0}, 'members': {}}
249 stats[date]['stats'][action] += 1
250 stats['history']['stats'][action] += 1
251 if who not in stats[date]['members']:
252 stats[date]['members'][who] = {'ACCEPT': 0, 'REJECT': 0,
253 'PROD': 0}
254 stats[date]['members'][who][action] += 1
255 if who not in stats['history']['members']:
256 stats['history']['members'][who] = {'ACCEPT': 0, 'REJECT': 0,
257 'PROD': 0}
258 stats['history']['members'][who][action] += 1
259 latest_timestamp = timestamp
260 parse_prod(logdate)
261 if logdate >= FORMAT_SWITCH:
262 for entry in findall(new_ACTIONS, data, MULTILINE):
263 action = entry[2]
264 timestamp = entry[0]
265 if stats['timestamp'] >= timestamp:
266 continue
267 date = parse_timestamp(timestamp)
268 if date not in stats:
269 stats[date] = {'stats': {'NEW': 0, 'ACCEPT': 0,
270 'REJECT': 0, 'PROD': 0}, 'members': {}}
271 member = entry[1]
272 if member in blacklisted:
273 continue
274 if date not in stats:
275 stats[date] = {'stats': {'NEW': 0, 'ACCEPT': 0,
276 'REJECT': 0, 'PROD': 0}, 'members': {}}
277 if member not in stats[date]['members']:
278 stats[date]['members'][member] = {'ACCEPT': 0, 'REJECT': 0,
279 'PROD': 0}
280 if member not in stats['history']['members']:
281 stats['history']['members'][member] = {'ACCEPT': 0,
282 'REJECT': 0, 'PROD': 0}
283 stats[date]['stats'][action] += 1
284 stats[date]['members'][member][action] += 1
285 stats['history']['stats'][action] += 1
286 stats['history']['members'][member][action] += 1
287 latest_timestamp = timestamp
288 return latest_timestamp
289
290
292 global stats
293 global users
294 maildate = ''.join([x[-2:] for x in logdate.split('-')])
295 mailarchive = join(utils.get_conf()['Dir::Base'], 'mail/archive',
296 'mail-%s.xz' % maildate)
297 if not isfile(mailarchive):
298 return
299 with tempfile.NamedTemporaryFile(dir=utils.get_conf()['Dir::TempPath']) as tmpfile:
300 with open(mailarchive, 'rb') as fh:
301 subprocess.check_call(['xzcat'], stdin=fh, stdout=tmpfile)
302 for message in mbox(tmpfile.name):
303 if (message['subject']
304 and message['subject'].startswith('Comments regarding')):
305 try:
306 member = users[' '.join(message['From'].split()[:-1])]
307 except KeyError:
308 continue
309 ts = mktime_tz(parsedate_tz(message['date']))
310 timestamp = datetime.fromtimestamp(ts).strftime("%Y%m%d%H%M%S")
311 date = parse_timestamp(timestamp)
312 if date not in stats:
313 stats[date] = {'stats': {'NEW': 0, 'ACCEPT': 0,
314 'REJECT': 0, 'PROD': 0}, 'members': {}}
315 if member not in stats[date]['members']:
316 stats[date]['members'][member] = {'ACCEPT': 0, 'REJECT': 0,
317 'PROD': 0}
318 if member not in stats['history']['members']:
319 stats['history']['members'][member] = {'ACCEPT': 0,
320 'REJECT': 0, 'PROD': 0}
321 stats[date]['stats']['PROD'] += 1
322 stats[date]['members'][member]['PROD'] += 1
323 stats['history']['stats']['PROD'] += 1
324 stats['history']['members'][member]['PROD'] += 1
325
326
328 y = int(timestamp[:4])
329 m = int(timestamp[4:6])
330 return '%d-%02d' % (y, m)
331
332
334 global Cnf
335 global stats
336 try:
337 with open(yaml, 'r') as fd:
338 stats = safe_load(fd)
339 except OSError:
340 pass
341 if not stats:
342 stats = {'history': {'stats': {'NEW': 0, 'ACCEPT': 0,
343 'REJECT': 0, 'PROD': 0}, 'members': {}},
344 'timestamp': '19700101000000'}
345 latest_timestamp = stats['timestamp']
346 for fn in sorted(listdir(logdir)):
347 if fn == 'current':
348 continue
349 log = splitext(fn)[0]
350 if log < parse_timestamp(stats['timestamp']):
351 continue
352 logfile = join(logdir, fn)
353 if isfile(logfile):
354 if fn.endswith('.bz2'):
355
356
357 with open(logfile, 'rb') as fh:
358 data = subprocess.check_output(['bzcat'], stdin=fh)
359 elif fn.endswith('.xz'):
360 with open(logfile, 'rb') as fh:
361 data = subprocess.check_output(['xzcat'], stdin=fh)
362 elif fn.endswith('.zst'):
363 with open(logfile, 'rb') as fh:
364 data = subprocess.check_output(['zstdcat'], stdin=fh)
365 else:
366 with open(logfile, 'rb') as fd:
367 data = fd.read()
368 try:
369 data = data.decode()
370 except UnicodeDecodeError:
371 data = data.decode('latin1')
372 ts = parse_new_uploads(data)
373 if ts > latest_timestamp:
374 latest_timestamp = ts
375 ts = parse_actions(data, log)
376 if ts > latest_timestamp:
377 latest_timestamp = ts
378 stderr.write('.')
379 stderr.flush()
380 stderr.write('\n')
381 stderr.flush()
382 stats['timestamp'] = latest_timestamp
383 with open(yaml, 'w') as fd:
384 safe_dump(stats, fd)
385
386
387
388
390 global Cnf
391 global users
392
393 Cnf = utils.get_conf()
394 Arguments = [('h', "help", "Stats::Options::Help")]
395 for i in ["help"]:
396 key = "Stats::Options::%s" % i
397 if key not in Cnf:
398 Cnf[key] = ""
399
400 args = apt_pkg.parse_commandline(Cnf, Arguments, sys.argv)
401
402 Options = Cnf.subtree("Stats::Options")
403 if Options["Help"]:
404 usage()
405
406 if len(args) < 1:
407 utils.warn("dak stats requires a MODE argument")
408 usage(1)
409 elif len(args) > 1:
410 if args[0].lower() != "new":
411 utils.warn("dak stats accepts only one MODE argument")
412 usage(1)
413 elif args[0].lower() == "new":
414 utils.warn("new MODE requires an output file")
415 usage(1)
416 mode = args[0].lower()
417
418 if mode == "arch-space":
419 per_arch_space_use()
420 elif mode == "pkg-nums":
421 number_of_packages()
422 elif mode == "daily-install":
423 daily_install_stats()
424 elif mode == "new":
425 users = utils.get_users_from_ldap()
426 new_stats(Cnf["Dir::Log"], args[1])
427 else:
428 utils.warn("unknown mode '%s'" % (mode))
429 usage(1)
430
431
432
433
434 if __name__ == '__main__':
435 main()
436