1*2abb3134SXin Li#!/usr/bin/python 2*2abb3134SXin Li"""Given a regtest result tree, prints an HTML summary to a file. 3*2abb3134SXin Li 4*2abb3134SXin LiSee HTML skeleton in tests/regtest.html. 5*2abb3134SXin Li""" 6*2abb3134SXin Li 7*2abb3134SXin Liimport os 8*2abb3134SXin Liimport re 9*2abb3134SXin Liimport sys 10*2abb3134SXin Li 11*2abb3134SXin Li 12*2abb3134SXin LiSUMMARY_ROW = """\ 13*2abb3134SXin Li<tfoot style="font-weight: bold; text-align: right"> 14*2abb3134SXin Li<tr> 15*2abb3134SXin Li <td> 16*2abb3134SXin Li %(name)s 17*2abb3134SXin Li </td> 18*2abb3134SXin Li 19*2abb3134SXin Li <!-- input params --> 20*2abb3134SXin Li <td></td> 21*2abb3134SXin Li <td></td> 22*2abb3134SXin Li <td></td> 23*2abb3134SXin Li <td></td> 24*2abb3134SXin Li 25*2abb3134SXin Li <!-- RAPPOR params --> 26*2abb3134SXin Li <td></td> 27*2abb3134SXin Li <td></td> 28*2abb3134SXin Li <td></td> 29*2abb3134SXin Li <td></td> 30*2abb3134SXin Li <td></td> 31*2abb3134SXin Li <td></td> 32*2abb3134SXin Li 33*2abb3134SXin Li <!-- MAP params --> 34*2abb3134SXin Li <td></td> 35*2abb3134SXin Li <td></td> 36*2abb3134SXin Li 37*2abb3134SXin Li <!-- Result metrics --> 38*2abb3134SXin Li <td></td> 39*2abb3134SXin Li <td></td> 40*2abb3134SXin Li <td>%(mean_fpr)s</td> 41*2abb3134SXin Li <td>%(mean_fnr)s</td> 42*2abb3134SXin Li <td>%(mean_tv)s</td> 43*2abb3134SXin Li <td>%(mean_am)s</td> 44*2abb3134SXin Li <td>%(mean_time)s</td> 45*2abb3134SXin Li</tr> 46*2abb3134SXin Li</tfoot> 47*2abb3134SXin Li""" 48*2abb3134SXin Li 49*2abb3134SXin Li# Navigation and links to plot. 50*2abb3134SXin LiDETAILS = """\ 51*2abb3134SXin Li<p style="text-align: right"> 52*2abb3134SXin Li <a href="#top">Up</a> 53*2abb3134SXin Li</p> 54*2abb3134SXin Li 55*2abb3134SXin Li<a id="%(anchor)s"></a> 56*2abb3134SXin Li 57*2abb3134SXin Li<p style="text-align: center"> 58*2abb3134SXin Li <img src="%(instance_dir)s/dist.png"/> 59*2abb3134SXin Li</p> 60*2abb3134SXin Li 61*2abb3134SXin Li<p> 62*2abb3134SXin Li<a href="%(instance_dir)s">%(name)s files</a> 63*2abb3134SXin Li</p> 64*2abb3134SXin Li""" 65*2abb3134SXin Li 66*2abb3134SXin Li 67*2abb3134SXin Lidef FormatFloat(x, percent): 68*2abb3134SXin Li """Formats a floating-point number.""" 69*2abb3134SXin Li if percent: 70*2abb3134SXin Li return '{:.1f}%'.format(x * 100.0) 71*2abb3134SXin Li else: 72*2abb3134SXin Li return '{:.3f}'.format(x) 73*2abb3134SXin Li 74*2abb3134SXin Li 75*2abb3134SXin Lidef FormatMeanWithSem(m_std_error, percent=False): 76*2abb3134SXin Li """Formats an estimate with standard error.""" 77*2abb3134SXin Li if m_std_error is None: 78*2abb3134SXin Li return '' 79*2abb3134SXin Li m, std_error = m_std_error 80*2abb3134SXin Li if std_error is None: 81*2abb3134SXin Li return FormatFloat(m, percent) 82*2abb3134SXin Li else: 83*2abb3134SXin Li return '{}±{}'.format( 84*2abb3134SXin Li FormatFloat(m, percent), 85*2abb3134SXin Li FormatFloat(std_error, percent)) 86*2abb3134SXin Li 87*2abb3134SXin Li 88*2abb3134SXin Lidef Mean(l): 89*2abb3134SXin Li """Computes the mean (average) for a list of numbers.""" 90*2abb3134SXin Li if l: 91*2abb3134SXin Li return float(sum(l)) / len(l) 92*2abb3134SXin Li else: 93*2abb3134SXin Li return None 94*2abb3134SXin Li 95*2abb3134SXin Li 96*2abb3134SXin Lidef SampleVar(l): 97*2abb3134SXin Li """Computes the sample variance for a list of numbers.""" 98*2abb3134SXin Li if len(l) > 1: 99*2abb3134SXin Li mean = Mean(l) 100*2abb3134SXin Li var = sum([(x - mean) ** 2 for x in l]) / (len(l) - 1) 101*2abb3134SXin Li return var 102*2abb3134SXin Li else: 103*2abb3134SXin Li return None 104*2abb3134SXin Li 105*2abb3134SXin Li 106*2abb3134SXin Lidef StandardErrorEstimate(l): 107*2abb3134SXin Li """Returns the standard error estimate for a list of numbers. 108*2abb3134SXin Li 109*2abb3134SXin Li For a singleton the standard error is assumed to be 10% of its value. 110*2abb3134SXin Li """ 111*2abb3134SXin Li if len(l) > 1: 112*2abb3134SXin Li return (SampleVar(l) / len(l)) ** .5 113*2abb3134SXin Li elif l: 114*2abb3134SXin Li return l[0] / 10.0 115*2abb3134SXin Li else: 116*2abb3134SXin Li return None 117*2abb3134SXin Li 118*2abb3134SXin Li 119*2abb3134SXin Lidef MeanOfMeans(dict_of_lists): 120*2abb3134SXin Li """Returns the average of averages with the standard error of the estimate. 121*2abb3134SXin Li """ 122*2abb3134SXin Li means = [Mean(dict_of_lists[key]) for key in dict_of_lists 123*2abb3134SXin Li if dict_of_lists[key]] 124*2abb3134SXin Li if means: 125*2abb3134SXin Li # Compute variances of the estimate for each sublist. 126*2abb3134SXin Li se = [StandardErrorEstimate(dict_of_lists[key]) ** 2 for key 127*2abb3134SXin Li in dict_of_lists if dict_of_lists[key]] 128*2abb3134SXin Li return (Mean(means), # Mean over all sublists 129*2abb3134SXin Li sum(se) ** .5 / len(se)) # Standard deviation of the mean 130*2abb3134SXin Li else: 131*2abb3134SXin Li return None 132*2abb3134SXin Li 133*2abb3134SXin Li 134*2abb3134SXin Lidef ParseSpecFile(spec_filename): 135*2abb3134SXin Li """Parses the spec (parameters) file. 136*2abb3134SXin Li 137*2abb3134SXin Li Returns: 138*2abb3134SXin Li An integer and a string. The integer is the number of bogus candidates 139*2abb3134SXin Li and the string is parameters in the HTML format. 140*2abb3134SXin Li """ 141*2abb3134SXin Li with open(spec_filename) as s: 142*2abb3134SXin Li spec_row = s.readline().split() 143*2abb3134SXin Li 144*2abb3134SXin Li # Second to last column is 'num_additional' -- the number of bogus 145*2abb3134SXin Li # candidates added 146*2abb3134SXin Li num_additional = int(spec_row[-2]) 147*2abb3134SXin Li 148*2abb3134SXin Li spec_in_html = ' '.join('<td>%s</td>' % cell for cell in spec_row[1:]) 149*2abb3134SXin Li 150*2abb3134SXin Li return num_additional, spec_in_html 151*2abb3134SXin Li 152*2abb3134SXin Li 153*2abb3134SXin Lidef ExtractTime(log_filename): 154*2abb3134SXin Li """Extracts the elapsed time information from the log file. 155*2abb3134SXin Li 156*2abb3134SXin Li Returns: 157*2abb3134SXin Li Elapsed time (in seconds) or None in case of failure. 158*2abb3134SXin Li """ 159*2abb3134SXin Li if os.path.isfile(log_filename): 160*2abb3134SXin Li with open(log_filename) as log: 161*2abb3134SXin Li log_str = log.read() 162*2abb3134SXin Li # Matching a line output by analyze.R. 163*2abb3134SXin Li match = re.search(r'Inference took ([0-9.]+) seconds', log_str) 164*2abb3134SXin Li if match: 165*2abb3134SXin Li return float(match.group(1)) 166*2abb3134SXin Li return None 167*2abb3134SXin Li 168*2abb3134SXin Li 169*2abb3134SXin Lidef ParseMetrics(metrics_file, log_file, num_additional): 170*2abb3134SXin Li """Processes the metrics file. 171*2abb3134SXin Li 172*2abb3134SXin Li Args: 173*2abb3134SXin Li metrics_file: name of the metrics file 174*2abb3134SXin Li log_file: name of the log.txt file 175*2abb3134SXin Li num_additional: A number of bogus candidates added to the candidate list. 176*2abb3134SXin Li 177*2abb3134SXin Li Returns a pair: 178*2abb3134SXin Li - A dictionary of metrics (some can be []). 179*2abb3134SXin Li - An HTML-formatted portion of the report row. 180*2abb3134SXin Li """ 181*2abb3134SXin Li 182*2abb3134SXin Li if not os.path.isfile(metrics_file): 183*2abb3134SXin Li metrics_row_str = ['', '', '', '', '', ''] 184*2abb3134SXin Li metrics_row_dict = {} 185*2abb3134SXin Li else: 186*2abb3134SXin Li with open(metrics_file) as m: 187*2abb3134SXin Li m.readline() 188*2abb3134SXin Li metrics_row = m.readline().split(',') 189*2abb3134SXin Li 190*2abb3134SXin Li (num_actual, num_rappor, num_false_pos, num_false_neg, total_variation, 191*2abb3134SXin Li allocated_mass) = metrics_row 192*2abb3134SXin Li 193*2abb3134SXin Li num_actual = int(num_actual) 194*2abb3134SXin Li num_rappor = int(num_rappor) 195*2abb3134SXin Li 196*2abb3134SXin Li num_false_pos = int(num_false_pos) 197*2abb3134SXin Li num_false_neg = int(num_false_neg) 198*2abb3134SXin Li 199*2abb3134SXin Li total_variation = float(total_variation) 200*2abb3134SXin Li allocated_mass = float(allocated_mass) 201*2abb3134SXin Li 202*2abb3134SXin Li # e.g. if there are 20 additional candidates added, and 1 false positive, 203*2abb3134SXin Li # the false positive rate is 5%. 204*2abb3134SXin Li fp_rate = float(num_false_pos) / num_additional if num_additional else 0 205*2abb3134SXin Li # e.g. if there are 100 strings in the true input, and 80 strings 206*2abb3134SXin Li # detected by RAPPOR, then we have 20 false negatives, and a false 207*2abb3134SXin Li # negative rate of 20%. 208*2abb3134SXin Li fn_rate = float(num_false_neg) / num_actual 209*2abb3134SXin Li 210*2abb3134SXin Li metrics_row_str = [ 211*2abb3134SXin Li str(num_actual), 212*2abb3134SXin Li str(num_rappor), 213*2abb3134SXin Li '%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional 214*2abb3134SXin Li else '', 215*2abb3134SXin Li '%.1f%% (%d)' % (fn_rate * 100, num_false_neg), 216*2abb3134SXin Li '%.3f' % total_variation, 217*2abb3134SXin Li '%.3f' % allocated_mass, 218*2abb3134SXin Li ] 219*2abb3134SXin Li 220*2abb3134SXin Li metrics_row_dict = { 221*2abb3134SXin Li 'tv': [total_variation], 222*2abb3134SXin Li 'fpr': [fp_rate] if num_additional else [], 223*2abb3134SXin Li 'fnr': [fn_rate], 224*2abb3134SXin Li 'am': [allocated_mass], 225*2abb3134SXin Li } 226*2abb3134SXin Li 227*2abb3134SXin Li elapsed_time = ExtractTime(log_file) 228*2abb3134SXin Li if elapsed_time is not None: 229*2abb3134SXin Li metrics_row_str = metrics_row_str + ['%.2f' % elapsed_time] 230*2abb3134SXin Li metrics_row_dict['time'] = [elapsed_time] 231*2abb3134SXin Li 232*2abb3134SXin Li # return metrics formatted as HTML table entries 233*2abb3134SXin Li return (metrics_row_dict, 234*2abb3134SXin Li ' '.join('<td>%s</td>' % cell for cell in metrics_row_str)) 235*2abb3134SXin Li 236*2abb3134SXin Li 237*2abb3134SXin Lidef FormatCell1(test_case, test_instance, metrics_file, log_file, plot_file, 238*2abb3134SXin Li link_to_plots): 239*2abb3134SXin Li """Outputs an HTML table entry for the first cell of the row. 240*2abb3134SXin Li 241*2abb3134SXin Li The row is filled if the metrics file exist. The first cell contains a link 242*2abb3134SXin Li that for short tables points to a plot file inline, for large tables to an 243*2abb3134SXin Li external file. 244*2abb3134SXin Li 245*2abb3134SXin Li If the metrics file is missing, the link points to the log file (if one 246*2abb3134SXin Li exists) 247*2abb3134SXin Li """ 248*2abb3134SXin Li relpath_report = '{}/{}_report'.format(test_case, test_instance) 249*2abb3134SXin Li if os.path.isfile(metrics_file): 250*2abb3134SXin Li external_file = plot_file 251*2abb3134SXin Li if link_to_plots: 252*2abb3134SXin Li link = '#{}_{}'.format(test_case, test_instance) # anchor 253*2abb3134SXin Li else: 254*2abb3134SXin Li link = os.path.join(relpath_report, 'dist.png') 255*2abb3134SXin Li else: # no results likely due to an error, puts a link to the log file 256*2abb3134SXin Li external_file = log_file 257*2abb3134SXin Li link = os.path.join(relpath_report, 'log.txt') 258*2abb3134SXin Li 259*2abb3134SXin Li if os.path.isfile(external_file): 260*2abb3134SXin Li return '<td><a href="{}">{}</a></td>'.format(link, test_case) 261*2abb3134SXin Li else: # if no file to link to 262*2abb3134SXin Li return '<td>{}</td>'.format(test_case) 263*2abb3134SXin Li 264*2abb3134SXin Li 265*2abb3134SXin Lidef FormatSummaryRow(metrics_lists): 266*2abb3134SXin Li """Outputs an HTML-formatted summary row.""" 267*2abb3134SXin Li means_with_sem = {} # SEM - standard error of the mean 268*2abb3134SXin Li 269*2abb3134SXin Li for key in metrics_lists: 270*2abb3134SXin Li means_with_sem[key] = MeanOfMeans(metrics_lists[key]) 271*2abb3134SXin Li # If none of the lists is longer than one element, drop the SEM component. 272*2abb3134SXin Li if means_with_sem[key] and max([len(l) for l in metrics_lists[key]]) < 2: 273*2abb3134SXin Li means_with_sem[key] = [means_with_sem[key][0], None] 274*2abb3134SXin Li 275*2abb3134SXin Li summary = { 276*2abb3134SXin Li 'name': 'Means', 277*2abb3134SXin Li 'mean_fpr': FormatMeanWithSem(means_with_sem['fpr'], percent=True), 278*2abb3134SXin Li 'mean_fnr': FormatMeanWithSem(means_with_sem['fnr'], percent=True), 279*2abb3134SXin Li 'mean_tv': FormatMeanWithSem(means_with_sem['tv'], percent=True), 280*2abb3134SXin Li 'mean_am': FormatMeanWithSem(means_with_sem['am'], percent=True), 281*2abb3134SXin Li 'mean_time': FormatMeanWithSem(means_with_sem['time']), 282*2abb3134SXin Li } 283*2abb3134SXin Li return SUMMARY_ROW % summary 284*2abb3134SXin Li 285*2abb3134SXin Li 286*2abb3134SXin Lidef FormatPlots(base_dir, test_instances): 287*2abb3134SXin Li """Outputs HTML-formatted plots.""" 288*2abb3134SXin Li result = '' 289*2abb3134SXin Li for instance in test_instances: 290*2abb3134SXin Li # A test instance is identified by the test name and the test run. 291*2abb3134SXin Li test_case, test_instance, _ = instance.split(' ') 292*2abb3134SXin Li instance_dir = test_case + '/' + test_instance + '_report' 293*2abb3134SXin Li if os.path.isfile(os.path.join(base_dir, instance_dir, 'dist.png')): 294*2abb3134SXin Li result += DETAILS % {'anchor': test_case + '_' + test_instance, 295*2abb3134SXin Li 'name': '{} (instance {})'.format(test_case, 296*2abb3134SXin Li test_instance), 297*2abb3134SXin Li 'instance_dir': instance_dir} 298*2abb3134SXin Li return result 299*2abb3134SXin Li 300*2abb3134SXin Li 301*2abb3134SXin Lidef main(argv): 302*2abb3134SXin Li base_dir = argv[1] 303*2abb3134SXin Li output_file = open(argv[2], 'w') 304*2abb3134SXin Li 305*2abb3134SXin Li # This file has the test case names, in the order that they should be 306*2abb3134SXin Li # displayed. 307*2abb3134SXin Li instances_file = os.path.join(base_dir, 'test-instances.txt') 308*2abb3134SXin Li if not os.path.isfile(instances_file): 309*2abb3134SXin Li raise RuntimeError('{} is missing'.format(instances_file)) 310*2abb3134SXin Li 311*2abb3134SXin Li with open(instances_file) as f: 312*2abb3134SXin Li test_instances = [line.strip() for line in f] 313*2abb3134SXin Li 314*2abb3134SXin Li # Metrics are assembled into a dictionary of dictionaries. The top-level 315*2abb3134SXin Li # key is the metric name ('tv', 'fpr', etc.), the second level key is 316*2abb3134SXin Li # the test case. These keys reference a list of floats, which can be empty. 317*2abb3134SXin Li metrics = { 318*2abb3134SXin Li 'tv': {}, # total_variation for all test cases 319*2abb3134SXin Li 'fpr': {}, # dictionary of false positive rates 320*2abb3134SXin Li 'fnr': {}, # dictionary of false negative rates 321*2abb3134SXin Li 'am': {}, # dictionary of total allocated masses 322*2abb3134SXin Li 'time': {}, # dictionary of total elapsed time measurements 323*2abb3134SXin Li } 324*2abb3134SXin Li 325*2abb3134SXin Li # If there are too many tests, the plots are not included in the results 326*2abb3134SXin Li # file. Instead, rows' names are links to the corresponding .png files. 327*2abb3134SXin Li include_plots = len(test_instances) < 20 328*2abb3134SXin Li 329*2abb3134SXin Li instances_succeeded = 0 330*2abb3134SXin Li instances_failed = 0 331*2abb3134SXin Li instances_running = 0 332*2abb3134SXin Li 333*2abb3134SXin Li for instance in test_instances: 334*2abb3134SXin Li # A test instance is idenfied by the test name and the test run. 335*2abb3134SXin Li test_case, test_instance, _ = instance.split(' ') 336*2abb3134SXin Li 337*2abb3134SXin Li spec_file = os.path.join(base_dir, test_case, 'spec.txt') 338*2abb3134SXin Li if not os.path.isfile(spec_file): 339*2abb3134SXin Li raise RuntimeError('{} is missing'.format(spec_file)) 340*2abb3134SXin Li 341*2abb3134SXin Li num_additional, spec_html = ParseSpecFile(spec_file) 342*2abb3134SXin Li metrics_html = '' # will be filled in later on, if metrics exist 343*2abb3134SXin Li 344*2abb3134SXin Li report_dir = os.path.join(base_dir, test_case, test_instance + '_report') 345*2abb3134SXin Li 346*2abb3134SXin Li metrics_file = os.path.join(report_dir, 'metrics.csv') 347*2abb3134SXin Li log_file = os.path.join(report_dir, 'log.txt') 348*2abb3134SXin Li plot_file = os.path.join(report_dir, 'dist.png') 349*2abb3134SXin Li 350*2abb3134SXin Li cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file, 351*2abb3134SXin Li plot_file, include_plots) 352*2abb3134SXin Li 353*2abb3134SXin Li # ParseMetrics outputs an HTML table row and also updates lists 354*2abb3134SXin Li metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file, 355*2abb3134SXin Li num_additional) 356*2abb3134SXin Li 357*2abb3134SXin Li # Update the metrics structure. Initialize dictionaries if necessary. 358*2abb3134SXin Li for m in metrics: 359*2abb3134SXin Li if m in metrics_dict: 360*2abb3134SXin Li if not test_case in metrics[m]: 361*2abb3134SXin Li metrics[m][test_case] = metrics_dict[m] 362*2abb3134SXin Li else: 363*2abb3134SXin Li metrics[m][test_case] += metrics_dict[m] 364*2abb3134SXin Li 365*2abb3134SXin Li print >>output_file, '<tr>{}{}{}</tr>'.format(cell1_html, 366*2abb3134SXin Li spec_html, metrics_html) 367*2abb3134SXin Li 368*2abb3134SXin Li # Update counters 369*2abb3134SXin Li if 'tv' in metrics_dict: 370*2abb3134SXin Li instances_succeeded += 1 371*2abb3134SXin Li else: 372*2abb3134SXin Li if 'time' in metrics_dict: 373*2abb3134SXin Li instances_failed += 1 374*2abb3134SXin Li else: 375*2abb3134SXin Li if os.path.isfile(log_file): 376*2abb3134SXin Li instances_running += 1 377*2abb3134SXin Li 378*2abb3134SXin Li print >>output_file, FormatSummaryRow(metrics) 379*2abb3134SXin Li 380*2abb3134SXin Li print >>output_file, '</tbody>' 381*2abb3134SXin Li print >>output_file, '</table>' 382*2abb3134SXin Li print >>output_file, '<p style="padding-bottom: 3em"></p>' # vertical space 383*2abb3134SXin Li 384*2abb3134SXin Li # Plot links. 385*2abb3134SXin Li if include_plots: 386*2abb3134SXin Li print >>output_file, FormatPlots(base_dir, test_instances) 387*2abb3134SXin Li else: 388*2abb3134SXin Li print >>output_file, ('<p>Too many tests to include plots. ' 389*2abb3134SXin Li 'Click links within rows for details.</p>') 390*2abb3134SXin Li 391*2abb3134SXin Li print ('Instances' 392*2abb3134SXin Li ' succeeded: {} failed: {} running: {} total: {}'. 393*2abb3134SXin Li format(instances_succeeded, instances_failed, instances_running, 394*2abb3134SXin Li len(test_instances))) 395*2abb3134SXin Li 396*2abb3134SXin Liif __name__ == '__main__': 397*2abb3134SXin Li try: 398*2abb3134SXin Li main(sys.argv) 399*2abb3134SXin Li except RuntimeError, e: 400*2abb3134SXin Li print >>sys.stderr, 'FATAL: %s' % e 401*2abb3134SXin Li sys.exit(1) 402