1#!/usr/bin/python 2"""Combines results from multiple days of a single metric. 3 4Feed it the STATUS.txt files on stdin. It then finds the corresponding 5results.csv, and takes the top N items. 6 7Example: 8 9Date, "google.com,", yahoo.com 102015-03-01, 0.0, 0.9 112015-03-02, 0.1, 0.8 12 13Dygraphs can load this CSV file directly. 14 15TODO: Use different dygraph API? 16 17Also we need error bars. 18 19 new Dygraph(document.getElementById("graphdiv2"), 20 [ 21 [1,10,100], 22 [2,20,80], 23 [3,50,60], 24 [4,70,80] 25 ], 26 { 27 labels: [ "Date", "failure", "timeout", "google.com" ] 28 }); 29""" 30 31import collections 32import csv 33import json 34import os 35import sys 36 37import util 38 39 40def CombineDistResults(stdin, c_out, num_top): 41 dates = [] 42 var_cols = collections.defaultdict(dict) # {name: {date: value}} 43 44 seen_dates = set() 45 46 for line in stdin: 47 status_path = line.strip() 48 49 # Assume it looks like .../2015-03-01/STATUS.txt 50 task_dir = os.path.dirname(status_path) 51 date = os.path.basename(task_dir) 52 53 # Get rid of duplicate dates. These could be caused by retries. 54 if date in seen_dates: 55 continue 56 57 seen_dates.add(date) 58 59 with open(status_path) as f: 60 status = f.readline().split()[0] # OK, FAIL, TIMEOUT, SKIPPED 61 62 dates.append(date) 63 64 if status != 'OK': 65 continue # won't have results.csv 66 67 results_path = os.path.join(task_dir, 'results.csv') 68 with open(results_path) as f: 69 c = csv.reader(f) 70 unused_header = c.next() # header row 71 72 # they are sorted by decreasing "estimate", which is what we want 73 for i in xrange(0, num_top): 74 try: 75 row = c.next() 76 except StopIteration: 77 # It's OK if it doesn't have enough 78 util.log('Stopping early. Fewer than %d results to render.', num_top) 79 break 80 81 string, _, _, proportion, _, prop_low, prop_high = row 82 83 # dygraphs has a weird format with semicolons: 84 # value;lower;upper,value;lower;upper. 85 86 # http://dygraphs.com/data.html#csv 87 88 # Arbitrarily use 4 digits after decimal point (for dygraphs, not 89 # directly displayed) 90 dygraph_triple = '%.4f;%.4f;%.4f' % ( 91 float(prop_low), float(proportion), float(prop_high)) 92 93 var_cols[string][date] = dygraph_triple 94 95 # Now print CSV on stdout. 96 cols = sorted(var_cols.keys()) # sort columns alphabetically 97 c_out.writerow(['date'] + cols) 98 99 dates.sort() 100 101 for date in dates: 102 row = [date] 103 for col in cols: 104 cell = var_cols[col].get(date) # None mean sthere is no row 105 row.append(cell) 106 c_out.writerow(row) 107 108 #util.log("Number of dynamic cols: %d", len(var_cols)) 109 110 111def CombineAssocResults(stdin, c_out, num_top): 112 header = ('dummy',) 113 c_out.writerow(header) 114 115 116def main(argv): 117 action = argv[1] 118 119 if action == 'dist': 120 num_top = int(argv[2]) # number of values to keep 121 c_out = csv.writer(sys.stdout) 122 CombineDistResults(sys.stdin, c_out, num_top) 123 124 elif action == 'assoc': 125 num_top = int(argv[2]) # number of values to keep 126 c_out = csv.writer(sys.stdout) 127 CombineAssocResults(sys.stdin, c_out, num_top) 128 129 else: 130 raise RuntimeError('Invalid action %r' % action) 131 132 133if __name__ == '__main__': 134 try: 135 main(sys.argv) 136 except RuntimeError, e: 137 print >>sys.stderr, 'FATAL: %s' % e 138 sys.exit(1) 139