1*2abb3134SXin Li#!/usr/bin/python 2*2abb3134SXin Li"""Combines results from multiple days of a single metric. 3*2abb3134SXin Li 4*2abb3134SXin LiFeed it the STATUS.txt files on stdin. It then finds the corresponding 5*2abb3134SXin Liresults.csv, and takes the top N items. 6*2abb3134SXin Li 7*2abb3134SXin LiExample: 8*2abb3134SXin Li 9*2abb3134SXin LiDate, "google.com,", yahoo.com 10*2abb3134SXin Li2015-03-01, 0.0, 0.9 11*2abb3134SXin Li2015-03-02, 0.1, 0.8 12*2abb3134SXin Li 13*2abb3134SXin LiDygraphs can load this CSV file directly. 14*2abb3134SXin Li 15*2abb3134SXin LiTODO: Use different dygraph API? 16*2abb3134SXin Li 17*2abb3134SXin LiAlso we need error bars. 18*2abb3134SXin Li 19*2abb3134SXin Li new Dygraph(document.getElementById("graphdiv2"), 20*2abb3134SXin Li [ 21*2abb3134SXin Li [1,10,100], 22*2abb3134SXin Li [2,20,80], 23*2abb3134SXin Li [3,50,60], 24*2abb3134SXin Li [4,70,80] 25*2abb3134SXin Li ], 26*2abb3134SXin Li { 27*2abb3134SXin Li labels: [ "Date", "failure", "timeout", "google.com" ] 28*2abb3134SXin Li }); 29*2abb3134SXin Li""" 30*2abb3134SXin Li 31*2abb3134SXin Liimport collections 32*2abb3134SXin Liimport csv 33*2abb3134SXin Liimport json 34*2abb3134SXin Liimport os 35*2abb3134SXin Liimport sys 36*2abb3134SXin Li 37*2abb3134SXin Liimport util 38*2abb3134SXin Li 39*2abb3134SXin Li 40*2abb3134SXin Lidef CombineDistResults(stdin, c_out, num_top): 41*2abb3134SXin Li dates = [] 42*2abb3134SXin Li var_cols = collections.defaultdict(dict) # {name: {date: value}} 43*2abb3134SXin Li 44*2abb3134SXin Li seen_dates = set() 45*2abb3134SXin Li 46*2abb3134SXin Li for line in stdin: 47*2abb3134SXin Li status_path = line.strip() 48*2abb3134SXin Li 49*2abb3134SXin Li # Assume it looks like .../2015-03-01/STATUS.txt 50*2abb3134SXin Li task_dir = os.path.dirname(status_path) 51*2abb3134SXin Li date = os.path.basename(task_dir) 52*2abb3134SXin Li 53*2abb3134SXin Li # Get rid of duplicate dates. These could be caused by retries. 54*2abb3134SXin Li if date in seen_dates: 55*2abb3134SXin Li continue 56*2abb3134SXin Li 57*2abb3134SXin Li seen_dates.add(date) 58*2abb3134SXin Li 59*2abb3134SXin Li with open(status_path) as f: 60*2abb3134SXin Li status = f.readline().split()[0] # OK, FAIL, TIMEOUT, SKIPPED 61*2abb3134SXin Li 62*2abb3134SXin Li dates.append(date) 63*2abb3134SXin Li 64*2abb3134SXin Li if status != 'OK': 65*2abb3134SXin Li continue # won't have results.csv 66*2abb3134SXin Li 67*2abb3134SXin Li results_path = os.path.join(task_dir, 'results.csv') 68*2abb3134SXin Li with open(results_path) as f: 69*2abb3134SXin Li c = csv.reader(f) 70*2abb3134SXin Li unused_header = c.next() # header row 71*2abb3134SXin Li 72*2abb3134SXin Li # they are sorted by decreasing "estimate", which is what we want 73*2abb3134SXin Li for i in xrange(0, num_top): 74*2abb3134SXin Li try: 75*2abb3134SXin Li row = c.next() 76*2abb3134SXin Li except StopIteration: 77*2abb3134SXin Li # It's OK if it doesn't have enough 78*2abb3134SXin Li util.log('Stopping early. Fewer than %d results to render.', num_top) 79*2abb3134SXin Li break 80*2abb3134SXin Li 81*2abb3134SXin Li string, _, _, proportion, _, prop_low, prop_high = row 82*2abb3134SXin Li 83*2abb3134SXin Li # dygraphs has a weird format with semicolons: 84*2abb3134SXin Li # value;lower;upper,value;lower;upper. 85*2abb3134SXin Li 86*2abb3134SXin Li # http://dygraphs.com/data.html#csv 87*2abb3134SXin Li 88*2abb3134SXin Li # Arbitrarily use 4 digits after decimal point (for dygraphs, not 89*2abb3134SXin Li # directly displayed) 90*2abb3134SXin Li dygraph_triple = '%.4f;%.4f;%.4f' % ( 91*2abb3134SXin Li float(prop_low), float(proportion), float(prop_high)) 92*2abb3134SXin Li 93*2abb3134SXin Li var_cols[string][date] = dygraph_triple 94*2abb3134SXin Li 95*2abb3134SXin Li # Now print CSV on stdout. 96*2abb3134SXin Li cols = sorted(var_cols.keys()) # sort columns alphabetically 97*2abb3134SXin Li c_out.writerow(['date'] + cols) 98*2abb3134SXin Li 99*2abb3134SXin Li dates.sort() 100*2abb3134SXin Li 101*2abb3134SXin Li for date in dates: 102*2abb3134SXin Li row = [date] 103*2abb3134SXin Li for col in cols: 104*2abb3134SXin Li cell = var_cols[col].get(date) # None mean sthere is no row 105*2abb3134SXin Li row.append(cell) 106*2abb3134SXin Li c_out.writerow(row) 107*2abb3134SXin Li 108*2abb3134SXin Li #util.log("Number of dynamic cols: %d", len(var_cols)) 109*2abb3134SXin Li 110*2abb3134SXin Li 111*2abb3134SXin Lidef CombineAssocResults(stdin, c_out, num_top): 112*2abb3134SXin Li header = ('dummy',) 113*2abb3134SXin Li c_out.writerow(header) 114*2abb3134SXin Li 115*2abb3134SXin Li 116*2abb3134SXin Lidef main(argv): 117*2abb3134SXin Li action = argv[1] 118*2abb3134SXin Li 119*2abb3134SXin Li if action == 'dist': 120*2abb3134SXin Li num_top = int(argv[2]) # number of values to keep 121*2abb3134SXin Li c_out = csv.writer(sys.stdout) 122*2abb3134SXin Li CombineDistResults(sys.stdin, c_out, num_top) 123*2abb3134SXin Li 124*2abb3134SXin Li elif action == 'assoc': 125*2abb3134SXin Li num_top = int(argv[2]) # number of values to keep 126*2abb3134SXin Li c_out = csv.writer(sys.stdout) 127*2abb3134SXin Li CombineAssocResults(sys.stdin, c_out, num_top) 128*2abb3134SXin Li 129*2abb3134SXin Li else: 130*2abb3134SXin Li raise RuntimeError('Invalid action %r' % action) 131*2abb3134SXin Li 132*2abb3134SXin Li 133*2abb3134SXin Liif __name__ == '__main__': 134*2abb3134SXin Li try: 135*2abb3134SXin Li main(sys.argv) 136*2abb3134SXin Li except RuntimeError, e: 137*2abb3134SXin Li print >>sys.stderr, 'FATAL: %s' % e 138*2abb3134SXin Li sys.exit(1) 139