1#!/usr/bin/python 2"""Given a regtest result tree, prints an HTML summary to a file. 3 4See HTML skeleton in tests/regtest.html. 5""" 6 7import os 8import re 9import sys 10 11 12SUMMARY_ROW = """\ 13<tfoot style="font-weight: bold; text-align: right"> 14<tr> 15 <td> 16 %(name)s 17 </td> 18 19 <!-- input params --> 20 <td></td> 21 <td></td> 22 <td></td> 23 <td></td> 24 25 <!-- RAPPOR params --> 26 <td></td> 27 <td></td> 28 <td></td> 29 <td></td> 30 <td></td> 31 <td></td> 32 33 <!-- MAP params --> 34 <td></td> 35 <td></td> 36 37 <!-- Result metrics --> 38 <td></td> 39 <td></td> 40 <td>%(mean_fpr)s</td> 41 <td>%(mean_fnr)s</td> 42 <td>%(mean_tv)s</td> 43 <td>%(mean_am)s</td> 44 <td>%(mean_time)s</td> 45</tr> 46</tfoot> 47""" 48 49# Navigation and links to plot. 50DETAILS = """\ 51<p style="text-align: right"> 52 <a href="#top">Up</a> 53</p> 54 55<a id="%(anchor)s"></a> 56 57<p style="text-align: center"> 58 <img src="%(instance_dir)s/dist.png"/> 59</p> 60 61<p> 62<a href="%(instance_dir)s">%(name)s files</a> 63</p> 64""" 65 66 67def FormatFloat(x, percent): 68 """Formats a floating-point number.""" 69 if percent: 70 return '{:.1f}%'.format(x * 100.0) 71 else: 72 return '{:.3f}'.format(x) 73 74 75def FormatMeanWithSem(m_std_error, percent=False): 76 """Formats an estimate with standard error.""" 77 if m_std_error is None: 78 return '' 79 m, std_error = m_std_error 80 if std_error is None: 81 return FormatFloat(m, percent) 82 else: 83 return '{}±{}'.format( 84 FormatFloat(m, percent), 85 FormatFloat(std_error, percent)) 86 87 88def Mean(l): 89 """Computes the mean (average) for a list of numbers.""" 90 if l: 91 return float(sum(l)) / len(l) 92 else: 93 return None 94 95 96def SampleVar(l): 97 """Computes the sample variance for a list of numbers.""" 98 if len(l) > 1: 99 mean = Mean(l) 100 var = sum([(x - mean) ** 2 for x in l]) / (len(l) - 1) 101 return var 102 else: 103 return None 104 105 106def StandardErrorEstimate(l): 107 """Returns the standard error estimate for a list of numbers. 108 109 For a singleton the standard error is assumed to be 10% of its value. 110 """ 111 if len(l) > 1: 112 return (SampleVar(l) / len(l)) ** .5 113 elif l: 114 return l[0] / 10.0 115 else: 116 return None 117 118 119def MeanOfMeans(dict_of_lists): 120 """Returns the average of averages with the standard error of the estimate. 121 """ 122 means = [Mean(dict_of_lists[key]) for key in dict_of_lists 123 if dict_of_lists[key]] 124 if means: 125 # Compute variances of the estimate for each sublist. 126 se = [StandardErrorEstimate(dict_of_lists[key]) ** 2 for key 127 in dict_of_lists if dict_of_lists[key]] 128 return (Mean(means), # Mean over all sublists 129 sum(se) ** .5 / len(se)) # Standard deviation of the mean 130 else: 131 return None 132 133 134def ParseSpecFile(spec_filename): 135 """Parses the spec (parameters) file. 136 137 Returns: 138 An integer and a string. The integer is the number of bogus candidates 139 and the string is parameters in the HTML format. 140 """ 141 with open(spec_filename) as s: 142 spec_row = s.readline().split() 143 144 # Second to last column is 'num_additional' -- the number of bogus 145 # candidates added 146 num_additional = int(spec_row[-2]) 147 148 spec_in_html = ' '.join('<td>%s</td>' % cell for cell in spec_row[1:]) 149 150 return num_additional, spec_in_html 151 152 153def ExtractTime(log_filename): 154 """Extracts the elapsed time information from the log file. 155 156 Returns: 157 Elapsed time (in seconds) or None in case of failure. 158 """ 159 if os.path.isfile(log_filename): 160 with open(log_filename) as log: 161 log_str = log.read() 162 # Matching a line output by analyze.R. 163 match = re.search(r'Inference took ([0-9.]+) seconds', log_str) 164 if match: 165 return float(match.group(1)) 166 return None 167 168 169def ParseMetrics(metrics_file, log_file, num_additional): 170 """Processes the metrics file. 171 172 Args: 173 metrics_file: name of the metrics file 174 log_file: name of the log.txt file 175 num_additional: A number of bogus candidates added to the candidate list. 176 177 Returns a pair: 178 - A dictionary of metrics (some can be []). 179 - An HTML-formatted portion of the report row. 180 """ 181 182 if not os.path.isfile(metrics_file): 183 metrics_row_str = ['', '', '', '', '', ''] 184 metrics_row_dict = {} 185 else: 186 with open(metrics_file) as m: 187 m.readline() 188 metrics_row = m.readline().split(',') 189 190 (num_actual, num_rappor, num_false_pos, num_false_neg, total_variation, 191 allocated_mass) = metrics_row 192 193 num_actual = int(num_actual) 194 num_rappor = int(num_rappor) 195 196 num_false_pos = int(num_false_pos) 197 num_false_neg = int(num_false_neg) 198 199 total_variation = float(total_variation) 200 allocated_mass = float(allocated_mass) 201 202 # e.g. if there are 20 additional candidates added, and 1 false positive, 203 # the false positive rate is 5%. 204 fp_rate = float(num_false_pos) / num_additional if num_additional else 0 205 # e.g. if there are 100 strings in the true input, and 80 strings 206 # detected by RAPPOR, then we have 20 false negatives, and a false 207 # negative rate of 20%. 208 fn_rate = float(num_false_neg) / num_actual 209 210 metrics_row_str = [ 211 str(num_actual), 212 str(num_rappor), 213 '%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional 214 else '', 215 '%.1f%% (%d)' % (fn_rate * 100, num_false_neg), 216 '%.3f' % total_variation, 217 '%.3f' % allocated_mass, 218 ] 219 220 metrics_row_dict = { 221 'tv': [total_variation], 222 'fpr': [fp_rate] if num_additional else [], 223 'fnr': [fn_rate], 224 'am': [allocated_mass], 225 } 226 227 elapsed_time = ExtractTime(log_file) 228 if elapsed_time is not None: 229 metrics_row_str = metrics_row_str + ['%.2f' % elapsed_time] 230 metrics_row_dict['time'] = [elapsed_time] 231 232 # return metrics formatted as HTML table entries 233 return (metrics_row_dict, 234 ' '.join('<td>%s</td>' % cell for cell in metrics_row_str)) 235 236 237def FormatCell1(test_case, test_instance, metrics_file, log_file, plot_file, 238 link_to_plots): 239 """Outputs an HTML table entry for the first cell of the row. 240 241 The row is filled if the metrics file exist. The first cell contains a link 242 that for short tables points to a plot file inline, for large tables to an 243 external file. 244 245 If the metrics file is missing, the link points to the log file (if one 246 exists) 247 """ 248 relpath_report = '{}/{}_report'.format(test_case, test_instance) 249 if os.path.isfile(metrics_file): 250 external_file = plot_file 251 if link_to_plots: 252 link = '#{}_{}'.format(test_case, test_instance) # anchor 253 else: 254 link = os.path.join(relpath_report, 'dist.png') 255 else: # no results likely due to an error, puts a link to the log file 256 external_file = log_file 257 link = os.path.join(relpath_report, 'log.txt') 258 259 if os.path.isfile(external_file): 260 return '<td><a href="{}">{}</a></td>'.format(link, test_case) 261 else: # if no file to link to 262 return '<td>{}</td>'.format(test_case) 263 264 265def FormatSummaryRow(metrics_lists): 266 """Outputs an HTML-formatted summary row.""" 267 means_with_sem = {} # SEM - standard error of the mean 268 269 for key in metrics_lists: 270 means_with_sem[key] = MeanOfMeans(metrics_lists[key]) 271 # If none of the lists is longer than one element, drop the SEM component. 272 if means_with_sem[key] and max([len(l) for l in metrics_lists[key]]) < 2: 273 means_with_sem[key] = [means_with_sem[key][0], None] 274 275 summary = { 276 'name': 'Means', 277 'mean_fpr': FormatMeanWithSem(means_with_sem['fpr'], percent=True), 278 'mean_fnr': FormatMeanWithSem(means_with_sem['fnr'], percent=True), 279 'mean_tv': FormatMeanWithSem(means_with_sem['tv'], percent=True), 280 'mean_am': FormatMeanWithSem(means_with_sem['am'], percent=True), 281 'mean_time': FormatMeanWithSem(means_with_sem['time']), 282 } 283 return SUMMARY_ROW % summary 284 285 286def FormatPlots(base_dir, test_instances): 287 """Outputs HTML-formatted plots.""" 288 result = '' 289 for instance in test_instances: 290 # A test instance is identified by the test name and the test run. 291 test_case, test_instance, _ = instance.split(' ') 292 instance_dir = test_case + '/' + test_instance + '_report' 293 if os.path.isfile(os.path.join(base_dir, instance_dir, 'dist.png')): 294 result += DETAILS % {'anchor': test_case + '_' + test_instance, 295 'name': '{} (instance {})'.format(test_case, 296 test_instance), 297 'instance_dir': instance_dir} 298 return result 299 300 301def main(argv): 302 base_dir = argv[1] 303 output_file = open(argv[2], 'w') 304 305 # This file has the test case names, in the order that they should be 306 # displayed. 307 instances_file = os.path.join(base_dir, 'test-instances.txt') 308 if not os.path.isfile(instances_file): 309 raise RuntimeError('{} is missing'.format(instances_file)) 310 311 with open(instances_file) as f: 312 test_instances = [line.strip() for line in f] 313 314 # Metrics are assembled into a dictionary of dictionaries. The top-level 315 # key is the metric name ('tv', 'fpr', etc.), the second level key is 316 # the test case. These keys reference a list of floats, which can be empty. 317 metrics = { 318 'tv': {}, # total_variation for all test cases 319 'fpr': {}, # dictionary of false positive rates 320 'fnr': {}, # dictionary of false negative rates 321 'am': {}, # dictionary of total allocated masses 322 'time': {}, # dictionary of total elapsed time measurements 323 } 324 325 # If there are too many tests, the plots are not included in the results 326 # file. Instead, rows' names are links to the corresponding .png files. 327 include_plots = len(test_instances) < 20 328 329 instances_succeeded = 0 330 instances_failed = 0 331 instances_running = 0 332 333 for instance in test_instances: 334 # A test instance is idenfied by the test name and the test run. 335 test_case, test_instance, _ = instance.split(' ') 336 337 spec_file = os.path.join(base_dir, test_case, 'spec.txt') 338 if not os.path.isfile(spec_file): 339 raise RuntimeError('{} is missing'.format(spec_file)) 340 341 num_additional, spec_html = ParseSpecFile(spec_file) 342 metrics_html = '' # will be filled in later on, if metrics exist 343 344 report_dir = os.path.join(base_dir, test_case, test_instance + '_report') 345 346 metrics_file = os.path.join(report_dir, 'metrics.csv') 347 log_file = os.path.join(report_dir, 'log.txt') 348 plot_file = os.path.join(report_dir, 'dist.png') 349 350 cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file, 351 plot_file, include_plots) 352 353 # ParseMetrics outputs an HTML table row and also updates lists 354 metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file, 355 num_additional) 356 357 # Update the metrics structure. Initialize dictionaries if necessary. 358 for m in metrics: 359 if m in metrics_dict: 360 if not test_case in metrics[m]: 361 metrics[m][test_case] = metrics_dict[m] 362 else: 363 metrics[m][test_case] += metrics_dict[m] 364 365 print >>output_file, '<tr>{}{}{}</tr>'.format(cell1_html, 366 spec_html, metrics_html) 367 368 # Update counters 369 if 'tv' in metrics_dict: 370 instances_succeeded += 1 371 else: 372 if 'time' in metrics_dict: 373 instances_failed += 1 374 else: 375 if os.path.isfile(log_file): 376 instances_running += 1 377 378 print >>output_file, FormatSummaryRow(metrics) 379 380 print >>output_file, '</tbody>' 381 print >>output_file, '</table>' 382 print >>output_file, '<p style="padding-bottom: 3em"></p>' # vertical space 383 384 # Plot links. 385 if include_plots: 386 print >>output_file, FormatPlots(base_dir, test_instances) 387 else: 388 print >>output_file, ('<p>Too many tests to include plots. ' 389 'Click links within rows for details.</p>') 390 391 print ('Instances' 392 ' succeeded: {} failed: {} running: {} total: {}'. 393 format(instances_succeeded, instances_failed, instances_running, 394 len(test_instances))) 395 396if __name__ == '__main__': 397 try: 398 main(sys.argv) 399 except RuntimeError, e: 400 print >>sys.stderr, 'FATAL: %s' % e 401 sys.exit(1) 402