1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
#!/usr/bin/python
"""Combines results from multiple days of a single metric.
Feed it the STATUS.txt files on stdin. It then finds the corresponding
results.csv, and takes the top N items.
Example:
Date, "google.com,", yahoo.com
2015-03-01, 0.0, 0.9
2015-03-02, 0.1, 0.8
Dygraphs can load this CSV file directly.
TODO: Use different dygraph API?
Also we need error bars.
new Dygraph(document.getElementById("graphdiv2"),
[
[1,10,100],
[2,20,80],
[3,50,60],
[4,70,80]
],
{
labels: [ "Date", "failure", "timeout", "google.com" ]
});
"""
import collections
import csv
import json
import os
import sys
import util
def CombineDistResults(stdin, c_out, num_top):
dates = []
var_cols = collections.defaultdict(dict) # {name: {date: value}}
seen_dates = set()
for line in stdin:
status_path = line.strip()
# Assume it looks like .../2015-03-01/STATUS.txt
task_dir = os.path.dirname(status_path)
date = os.path.basename(task_dir)
# Get rid of duplicate dates. These could be caused by retries.
if date in seen_dates:
continue
seen_dates.add(date)
with open(status_path) as f:
status = f.readline().split()[0] # OK, FAIL, TIMEOUT, SKIPPED
dates.append(date)
if status != 'OK':
continue # won't have results.csv
results_path = os.path.join(task_dir, 'results.csv')
with open(results_path) as f:
c = csv.reader(f)
unused_header = c.next() # header row
# they are sorted by decreasing "estimate", which is what we want
for i in xrange(0, num_top):
try:
row = c.next()
except StopIteration:
# It's OK if it doesn't have enough
util.log('Stopping early. Fewer than %d results to render.', num_top)
break
string, _, _, proportion, _, prop_low, prop_high = row
# dygraphs has a weird format with semicolons:
# value;lower;upper,value;lower;upper.
# http://dygraphs.com/data.html#csv
# Arbitrarily use 4 digits after decimal point (for dygraphs, not
# directly displayed)
dygraph_triple = '%.4f;%.4f;%.4f' % (
float(prop_low), float(proportion), float(prop_high))
var_cols[string][date] = dygraph_triple
# Now print CSV on stdout.
cols = sorted(var_cols.keys()) # sort columns alphabetically
c_out.writerow(['date'] + cols)
dates.sort()
for date in dates:
row = [date]
for col in cols:
cell = var_cols[col].get(date) # None mean sthere is no row
row.append(cell)
c_out.writerow(row)
#util.log("Number of dynamic cols: %d", len(var_cols))
def CombineAssocResults(stdin, c_out, num_top):
header = ('dummy',)
c_out.writerow(header)
def main(argv):
action = argv[1]
if action == 'dist':
num_top = int(argv[2]) # number of values to keep
c_out = csv.writer(sys.stdout)
CombineDistResults(sys.stdin, c_out, num_top)
elif action == 'assoc':
num_top = int(argv[2]) # number of values to keep
c_out = csv.writer(sys.stdout)
CombineAssocResults(sys.stdin, c_out, num_top)
else:
raise RuntimeError('Invalid action %r' % action)
if __name__ == '__main__':
try:
main(sys.argv)
except RuntimeError, e:
print >>sys.stderr, 'FATAL: %s' % e
sys.exit(1)
|