xref: /aosp_15_r20/external/rappor/tests/make_summary.py (revision 2abb31345f6c95944768b5222a9a5ed3fc68cc00)
1*2abb3134SXin Li#!/usr/bin/python
2*2abb3134SXin Li"""Given a regtest result tree, prints an HTML summary to a file.
3*2abb3134SXin Li
4*2abb3134SXin LiSee HTML skeleton in tests/regtest.html.
5*2abb3134SXin Li"""
6*2abb3134SXin Li
7*2abb3134SXin Liimport os
8*2abb3134SXin Liimport re
9*2abb3134SXin Liimport sys
10*2abb3134SXin Li
11*2abb3134SXin Li
12*2abb3134SXin LiSUMMARY_ROW = """\
13*2abb3134SXin Li<tfoot style="font-weight: bold; text-align: right">
14*2abb3134SXin Li<tr>
15*2abb3134SXin Li  <td>
16*2abb3134SXin Li    %(name)s
17*2abb3134SXin Li  </td>
18*2abb3134SXin Li
19*2abb3134SXin Li  <!-- input params -->
20*2abb3134SXin Li  <td></td>
21*2abb3134SXin Li  <td></td>
22*2abb3134SXin Li  <td></td>
23*2abb3134SXin Li  <td></td>
24*2abb3134SXin Li
25*2abb3134SXin Li  <!-- RAPPOR params -->
26*2abb3134SXin Li  <td></td>
27*2abb3134SXin Li  <td></td>
28*2abb3134SXin Li  <td></td>
29*2abb3134SXin Li  <td></td>
30*2abb3134SXin Li  <td></td>
31*2abb3134SXin Li  <td></td>
32*2abb3134SXin Li
33*2abb3134SXin Li  <!-- MAP params -->
34*2abb3134SXin Li  <td></td>
35*2abb3134SXin Li  <td></td>
36*2abb3134SXin Li
37*2abb3134SXin Li  <!-- Result metrics -->
38*2abb3134SXin Li  <td></td>
39*2abb3134SXin Li  <td></td>
40*2abb3134SXin Li  <td>%(mean_fpr)s</td>
41*2abb3134SXin Li  <td>%(mean_fnr)s</td>
42*2abb3134SXin Li  <td>%(mean_tv)s</td>
43*2abb3134SXin Li  <td>%(mean_am)s</td>
44*2abb3134SXin Li  <td>%(mean_time)s</td>
45*2abb3134SXin Li</tr>
46*2abb3134SXin Li</tfoot>
47*2abb3134SXin Li"""
48*2abb3134SXin Li
49*2abb3134SXin Li# Navigation and links to plot.
50*2abb3134SXin LiDETAILS = """\
51*2abb3134SXin Li<p style="text-align: right">
52*2abb3134SXin Li  <a href="#top">Up</a>
53*2abb3134SXin Li</p>
54*2abb3134SXin Li
55*2abb3134SXin Li<a id="%(anchor)s"></a>
56*2abb3134SXin Li
57*2abb3134SXin Li<p style="text-align: center">
58*2abb3134SXin Li  <img src="%(instance_dir)s/dist.png"/>
59*2abb3134SXin Li</p>
60*2abb3134SXin Li
61*2abb3134SXin Li<p>
62*2abb3134SXin Li<a href="%(instance_dir)s">%(name)s files</a>
63*2abb3134SXin Li</p>
64*2abb3134SXin Li"""
65*2abb3134SXin Li
66*2abb3134SXin Li
67*2abb3134SXin Lidef FormatFloat(x, percent):
68*2abb3134SXin Li  """Formats a floating-point number."""
69*2abb3134SXin Li  if percent:
70*2abb3134SXin Li    return '{:.1f}%'.format(x * 100.0)
71*2abb3134SXin Li  else:
72*2abb3134SXin Li    return '{:.3f}'.format(x)
73*2abb3134SXin Li
74*2abb3134SXin Li
75*2abb3134SXin Lidef FormatMeanWithSem(m_std_error, percent=False):
76*2abb3134SXin Li  """Formats an estimate with standard error."""
77*2abb3134SXin Li  if m_std_error is None:
78*2abb3134SXin Li    return ''
79*2abb3134SXin Li  m, std_error = m_std_error
80*2abb3134SXin Li  if std_error is None:
81*2abb3134SXin Li    return FormatFloat(m, percent)
82*2abb3134SXin Li  else:
83*2abb3134SXin Li    return '{}&plusmn;{}'.format(
84*2abb3134SXin Li        FormatFloat(m, percent),
85*2abb3134SXin Li        FormatFloat(std_error, percent))
86*2abb3134SXin Li
87*2abb3134SXin Li
88*2abb3134SXin Lidef Mean(l):
89*2abb3134SXin Li  """Computes the mean (average) for a list of numbers."""
90*2abb3134SXin Li  if l:
91*2abb3134SXin Li    return float(sum(l)) / len(l)
92*2abb3134SXin Li  else:
93*2abb3134SXin Li    return None
94*2abb3134SXin Li
95*2abb3134SXin Li
96*2abb3134SXin Lidef SampleVar(l):
97*2abb3134SXin Li  """Computes the sample variance for a list of numbers."""
98*2abb3134SXin Li  if len(l) > 1:
99*2abb3134SXin Li    mean = Mean(l)
100*2abb3134SXin Li    var = sum([(x - mean) ** 2 for x in l]) / (len(l) - 1)
101*2abb3134SXin Li    return var
102*2abb3134SXin Li  else:
103*2abb3134SXin Li    return None
104*2abb3134SXin Li
105*2abb3134SXin Li
106*2abb3134SXin Lidef StandardErrorEstimate(l):
107*2abb3134SXin Li  """Returns the standard error estimate for a list of numbers.
108*2abb3134SXin Li
109*2abb3134SXin Li  For a singleton the standard error is assumed to be 10% of its value.
110*2abb3134SXin Li  """
111*2abb3134SXin Li  if len(l) > 1:
112*2abb3134SXin Li    return (SampleVar(l) / len(l)) ** .5
113*2abb3134SXin Li  elif l:
114*2abb3134SXin Li    return l[0] / 10.0
115*2abb3134SXin Li  else:
116*2abb3134SXin Li    return None
117*2abb3134SXin Li
118*2abb3134SXin Li
119*2abb3134SXin Lidef MeanOfMeans(dict_of_lists):
120*2abb3134SXin Li  """Returns the average of averages with the standard error of the estimate.
121*2abb3134SXin Li  """
122*2abb3134SXin Li  means = [Mean(dict_of_lists[key]) for key in dict_of_lists
123*2abb3134SXin Li           if dict_of_lists[key]]
124*2abb3134SXin Li  if means:
125*2abb3134SXin Li    # Compute variances of the estimate for each sublist.
126*2abb3134SXin Li    se = [StandardErrorEstimate(dict_of_lists[key]) ** 2 for key
127*2abb3134SXin Li          in dict_of_lists if dict_of_lists[key]]
128*2abb3134SXin Li    return (Mean(means),  # Mean over all sublists
129*2abb3134SXin Li            sum(se) ** .5 / len(se))  # Standard deviation of the mean
130*2abb3134SXin Li  else:
131*2abb3134SXin Li    return None
132*2abb3134SXin Li
133*2abb3134SXin Li
134*2abb3134SXin Lidef ParseSpecFile(spec_filename):
135*2abb3134SXin Li  """Parses the spec (parameters) file.
136*2abb3134SXin Li
137*2abb3134SXin Li  Returns:
138*2abb3134SXin Li    An integer and a string. The integer is the number of bogus candidates
139*2abb3134SXin Li    and the string is parameters in the HTML format.
140*2abb3134SXin Li  """
141*2abb3134SXin Li  with open(spec_filename) as s:
142*2abb3134SXin Li    spec_row = s.readline().split()
143*2abb3134SXin Li
144*2abb3134SXin Li  # Second to last column is 'num_additional' -- the number of bogus
145*2abb3134SXin Li  # candidates added
146*2abb3134SXin Li  num_additional = int(spec_row[-2])
147*2abb3134SXin Li
148*2abb3134SXin Li  spec_in_html = ' '.join('<td>%s</td>' % cell for cell in spec_row[1:])
149*2abb3134SXin Li
150*2abb3134SXin Li  return num_additional, spec_in_html
151*2abb3134SXin Li
152*2abb3134SXin Li
153*2abb3134SXin Lidef ExtractTime(log_filename):
154*2abb3134SXin Li  """Extracts the elapsed time information from the log file.
155*2abb3134SXin Li
156*2abb3134SXin Li  Returns:
157*2abb3134SXin Li     Elapsed time (in seconds) or None in case of failure.
158*2abb3134SXin Li  """
159*2abb3134SXin Li  if os.path.isfile(log_filename):
160*2abb3134SXin Li    with open(log_filename) as log:
161*2abb3134SXin Li      log_str = log.read()
162*2abb3134SXin Li    # Matching a line output by analyze.R.
163*2abb3134SXin Li    match = re.search(r'Inference took ([0-9.]+) seconds', log_str)
164*2abb3134SXin Li    if match:
165*2abb3134SXin Li      return float(match.group(1))
166*2abb3134SXin Li  return None
167*2abb3134SXin Li
168*2abb3134SXin Li
169*2abb3134SXin Lidef ParseMetrics(metrics_file, log_file, num_additional):
170*2abb3134SXin Li  """Processes the metrics file.
171*2abb3134SXin Li
172*2abb3134SXin Li  Args:
173*2abb3134SXin Li    metrics_file: name of the metrics file
174*2abb3134SXin Li    log_file: name of the log.txt file
175*2abb3134SXin Li    num_additional: A number of bogus candidates added to the candidate list.
176*2abb3134SXin Li
177*2abb3134SXin Li  Returns a pair:
178*2abb3134SXin Li    - A dictionary of metrics (some can be []).
179*2abb3134SXin Li    - An HTML-formatted portion of the report row.
180*2abb3134SXin Li  """
181*2abb3134SXin Li
182*2abb3134SXin Li  if not os.path.isfile(metrics_file):
183*2abb3134SXin Li    metrics_row_str = ['', '', '', '', '', '']
184*2abb3134SXin Li    metrics_row_dict = {}
185*2abb3134SXin Li  else:
186*2abb3134SXin Li    with open(metrics_file) as m:
187*2abb3134SXin Li      m.readline()
188*2abb3134SXin Li      metrics_row = m.readline().split(',')
189*2abb3134SXin Li
190*2abb3134SXin Li    (num_actual, num_rappor, num_false_pos, num_false_neg, total_variation,
191*2abb3134SXin Li        allocated_mass) = metrics_row
192*2abb3134SXin Li
193*2abb3134SXin Li    num_actual = int(num_actual)
194*2abb3134SXin Li    num_rappor = int(num_rappor)
195*2abb3134SXin Li
196*2abb3134SXin Li    num_false_pos = int(num_false_pos)
197*2abb3134SXin Li    num_false_neg = int(num_false_neg)
198*2abb3134SXin Li
199*2abb3134SXin Li    total_variation = float(total_variation)
200*2abb3134SXin Li    allocated_mass = float(allocated_mass)
201*2abb3134SXin Li
202*2abb3134SXin Li    # e.g. if there are 20 additional candidates added, and 1 false positive,
203*2abb3134SXin Li    # the false positive rate is 5%.
204*2abb3134SXin Li    fp_rate = float(num_false_pos) / num_additional if num_additional else 0
205*2abb3134SXin Li    # e.g. if there are 100 strings in the true input, and 80 strings
206*2abb3134SXin Li    # detected by RAPPOR, then we have 20 false negatives, and a false
207*2abb3134SXin Li    # negative rate of 20%.
208*2abb3134SXin Li    fn_rate = float(num_false_neg) / num_actual
209*2abb3134SXin Li
210*2abb3134SXin Li    metrics_row_str = [
211*2abb3134SXin Li        str(num_actual),
212*2abb3134SXin Li        str(num_rappor),
213*2abb3134SXin Li        '%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional
214*2abb3134SXin Li        else '',
215*2abb3134SXin Li        '%.1f%% (%d)' % (fn_rate * 100, num_false_neg),
216*2abb3134SXin Li        '%.3f' % total_variation,
217*2abb3134SXin Li        '%.3f' % allocated_mass,
218*2abb3134SXin Li    ]
219*2abb3134SXin Li
220*2abb3134SXin Li    metrics_row_dict = {
221*2abb3134SXin Li        'tv': [total_variation],
222*2abb3134SXin Li        'fpr': [fp_rate] if num_additional else [],
223*2abb3134SXin Li        'fnr': [fn_rate],
224*2abb3134SXin Li        'am': [allocated_mass],
225*2abb3134SXin Li    }
226*2abb3134SXin Li
227*2abb3134SXin Li  elapsed_time = ExtractTime(log_file)
228*2abb3134SXin Li  if elapsed_time is not None:
229*2abb3134SXin Li    metrics_row_str = metrics_row_str + ['%.2f' % elapsed_time]
230*2abb3134SXin Li    metrics_row_dict['time'] = [elapsed_time]
231*2abb3134SXin Li
232*2abb3134SXin Li  # return metrics formatted as HTML table entries
233*2abb3134SXin Li  return (metrics_row_dict,
234*2abb3134SXin Li          ' '.join('<td>%s</td>' % cell for cell in metrics_row_str))
235*2abb3134SXin Li
236*2abb3134SXin Li
237*2abb3134SXin Lidef FormatCell1(test_case, test_instance, metrics_file, log_file, plot_file,
238*2abb3134SXin Li                link_to_plots):
239*2abb3134SXin Li  """Outputs an HTML table entry for the first cell of the row.
240*2abb3134SXin Li
241*2abb3134SXin Li  The row is filled if the metrics file exist. The first cell contains a link
242*2abb3134SXin Li  that for short tables points to a plot file inline, for large tables to an
243*2abb3134SXin Li  external file.
244*2abb3134SXin Li
245*2abb3134SXin Li  If the metrics file is missing, the link points to the log file (if one
246*2abb3134SXin Li  exists)
247*2abb3134SXin Li  """
248*2abb3134SXin Li  relpath_report = '{}/{}_report'.format(test_case, test_instance)
249*2abb3134SXin Li  if os.path.isfile(metrics_file):
250*2abb3134SXin Li    external_file = plot_file
251*2abb3134SXin Li    if link_to_plots:
252*2abb3134SXin Li      link = '#{}_{}'.format(test_case, test_instance)  # anchor
253*2abb3134SXin Li    else:
254*2abb3134SXin Li      link = os.path.join(relpath_report, 'dist.png')
255*2abb3134SXin Li  else:  # no results likely due to an error, puts a link to the log file
256*2abb3134SXin Li    external_file = log_file
257*2abb3134SXin Li    link = os.path.join(relpath_report, 'log.txt')
258*2abb3134SXin Li
259*2abb3134SXin Li  if os.path.isfile(external_file):
260*2abb3134SXin Li    return '<td><a href="{}">{}</a></td>'.format(link, test_case)
261*2abb3134SXin Li  else:  # if no file to link to
262*2abb3134SXin Li    return '<td>{}</td>'.format(test_case)
263*2abb3134SXin Li
264*2abb3134SXin Li
265*2abb3134SXin Lidef FormatSummaryRow(metrics_lists):
266*2abb3134SXin Li  """Outputs an HTML-formatted summary row."""
267*2abb3134SXin Li  means_with_sem = {}  # SEM - standard error of the mean
268*2abb3134SXin Li
269*2abb3134SXin Li  for key in metrics_lists:
270*2abb3134SXin Li    means_with_sem[key] = MeanOfMeans(metrics_lists[key])
271*2abb3134SXin Li    # If none of the lists is longer than one element, drop the SEM component.
272*2abb3134SXin Li    if means_with_sem[key] and max([len(l) for l in metrics_lists[key]]) < 2:
273*2abb3134SXin Li      means_with_sem[key] = [means_with_sem[key][0], None]
274*2abb3134SXin Li
275*2abb3134SXin Li  summary = {
276*2abb3134SXin Li      'name': 'Means',
277*2abb3134SXin Li      'mean_fpr': FormatMeanWithSem(means_with_sem['fpr'], percent=True),
278*2abb3134SXin Li      'mean_fnr': FormatMeanWithSem(means_with_sem['fnr'], percent=True),
279*2abb3134SXin Li      'mean_tv': FormatMeanWithSem(means_with_sem['tv'], percent=True),
280*2abb3134SXin Li      'mean_am': FormatMeanWithSem(means_with_sem['am'], percent=True),
281*2abb3134SXin Li      'mean_time': FormatMeanWithSem(means_with_sem['time']),
282*2abb3134SXin Li  }
283*2abb3134SXin Li  return SUMMARY_ROW % summary
284*2abb3134SXin Li
285*2abb3134SXin Li
286*2abb3134SXin Lidef FormatPlots(base_dir, test_instances):
287*2abb3134SXin Li  """Outputs HTML-formatted plots."""
288*2abb3134SXin Li  result = ''
289*2abb3134SXin Li  for instance in test_instances:
290*2abb3134SXin Li    # A test instance is identified by the test name and the test run.
291*2abb3134SXin Li    test_case, test_instance, _ = instance.split(' ')
292*2abb3134SXin Li    instance_dir = test_case + '/' + test_instance + '_report'
293*2abb3134SXin Li    if os.path.isfile(os.path.join(base_dir, instance_dir, 'dist.png')):
294*2abb3134SXin Li      result += DETAILS % {'anchor': test_case + '_' + test_instance,
295*2abb3134SXin Li                           'name': '{} (instance {})'.format(test_case,
296*2abb3134SXin Li                                                             test_instance),
297*2abb3134SXin Li                           'instance_dir': instance_dir}
298*2abb3134SXin Li  return result
299*2abb3134SXin Li
300*2abb3134SXin Li
301*2abb3134SXin Lidef main(argv):
302*2abb3134SXin Li  base_dir = argv[1]
303*2abb3134SXin Li  output_file = open(argv[2], 'w')
304*2abb3134SXin Li
305*2abb3134SXin Li  # This file has the test case names, in the order that they should be
306*2abb3134SXin Li  # displayed.
307*2abb3134SXin Li  instances_file = os.path.join(base_dir, 'test-instances.txt')
308*2abb3134SXin Li  if not os.path.isfile(instances_file):
309*2abb3134SXin Li    raise RuntimeError('{} is missing'.format(instances_file))
310*2abb3134SXin Li
311*2abb3134SXin Li  with open(instances_file) as f:
312*2abb3134SXin Li    test_instances = [line.strip() for line in f]
313*2abb3134SXin Li
314*2abb3134SXin Li  # Metrics are assembled into a dictionary of dictionaries. The top-level
315*2abb3134SXin Li  # key is the metric name ('tv', 'fpr', etc.), the second level key is
316*2abb3134SXin Li  # the test case. These keys reference a list of floats, which can be empty.
317*2abb3134SXin Li  metrics = {
318*2abb3134SXin Li      'tv': {},  # total_variation for all test cases
319*2abb3134SXin Li      'fpr': {},  # dictionary of false positive rates
320*2abb3134SXin Li      'fnr': {},  # dictionary of false negative rates
321*2abb3134SXin Li      'am': {},  # dictionary of total allocated masses
322*2abb3134SXin Li      'time': {},  # dictionary of total elapsed time measurements
323*2abb3134SXin Li  }
324*2abb3134SXin Li
325*2abb3134SXin Li  # If there are too many tests, the plots are not included in the results
326*2abb3134SXin Li  # file. Instead, rows' names are links to the corresponding .png files.
327*2abb3134SXin Li  include_plots = len(test_instances) < 20
328*2abb3134SXin Li
329*2abb3134SXin Li  instances_succeeded = 0
330*2abb3134SXin Li  instances_failed = 0
331*2abb3134SXin Li  instances_running = 0
332*2abb3134SXin Li
333*2abb3134SXin Li  for instance in test_instances:
334*2abb3134SXin Li    # A test instance is idenfied by the test name and the test run.
335*2abb3134SXin Li    test_case, test_instance, _ = instance.split(' ')
336*2abb3134SXin Li
337*2abb3134SXin Li    spec_file = os.path.join(base_dir, test_case, 'spec.txt')
338*2abb3134SXin Li    if not os.path.isfile(spec_file):
339*2abb3134SXin Li      raise RuntimeError('{} is missing'.format(spec_file))
340*2abb3134SXin Li
341*2abb3134SXin Li    num_additional, spec_html = ParseSpecFile(spec_file)
342*2abb3134SXin Li    metrics_html = ''  # will be filled in later on, if metrics exist
343*2abb3134SXin Li
344*2abb3134SXin Li    report_dir = os.path.join(base_dir, test_case, test_instance + '_report')
345*2abb3134SXin Li
346*2abb3134SXin Li    metrics_file = os.path.join(report_dir, 'metrics.csv')
347*2abb3134SXin Li    log_file = os.path.join(report_dir, 'log.txt')
348*2abb3134SXin Li    plot_file = os.path.join(report_dir, 'dist.png')
349*2abb3134SXin Li
350*2abb3134SXin Li    cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file,
351*2abb3134SXin Li                             plot_file, include_plots)
352*2abb3134SXin Li
353*2abb3134SXin Li    # ParseMetrics outputs an HTML table row and also updates lists
354*2abb3134SXin Li    metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file,
355*2abb3134SXin Li                                              num_additional)
356*2abb3134SXin Li
357*2abb3134SXin Li    # Update the metrics structure. Initialize dictionaries if necessary.
358*2abb3134SXin Li    for m in metrics:
359*2abb3134SXin Li      if m in metrics_dict:
360*2abb3134SXin Li        if not test_case in metrics[m]:
361*2abb3134SXin Li          metrics[m][test_case] = metrics_dict[m]
362*2abb3134SXin Li        else:
363*2abb3134SXin Li          metrics[m][test_case] += metrics_dict[m]
364*2abb3134SXin Li
365*2abb3134SXin Li    print >>output_file, '<tr>{}{}{}</tr>'.format(cell1_html,
366*2abb3134SXin Li                                                  spec_html, metrics_html)
367*2abb3134SXin Li
368*2abb3134SXin Li    # Update counters
369*2abb3134SXin Li    if 'tv' in metrics_dict:
370*2abb3134SXin Li      instances_succeeded += 1
371*2abb3134SXin Li    else:
372*2abb3134SXin Li      if 'time' in metrics_dict:
373*2abb3134SXin Li        instances_failed += 1
374*2abb3134SXin Li      else:
375*2abb3134SXin Li        if os.path.isfile(log_file):
376*2abb3134SXin Li          instances_running += 1
377*2abb3134SXin Li
378*2abb3134SXin Li  print >>output_file, FormatSummaryRow(metrics)
379*2abb3134SXin Li
380*2abb3134SXin Li  print >>output_file, '</tbody>'
381*2abb3134SXin Li  print >>output_file, '</table>'
382*2abb3134SXin Li  print >>output_file, '<p style="padding-bottom: 3em"></p>'  # vertical space
383*2abb3134SXin Li
384*2abb3134SXin Li  # Plot links.
385*2abb3134SXin Li  if include_plots:
386*2abb3134SXin Li    print >>output_file, FormatPlots(base_dir, test_instances)
387*2abb3134SXin Li  else:
388*2abb3134SXin Li    print >>output_file, ('<p>Too many tests to include plots. '
389*2abb3134SXin Li                          'Click links within rows for details.</p>')
390*2abb3134SXin Li
391*2abb3134SXin Li  print ('Instances'
392*2abb3134SXin Li         ' succeeded: {}  failed: {}  running: {}  total: {}'.
393*2abb3134SXin Li         format(instances_succeeded, instances_failed, instances_running,
394*2abb3134SXin Li                len(test_instances)))
395*2abb3134SXin Li
396*2abb3134SXin Liif __name__ == '__main__':
397*2abb3134SXin Li  try:
398*2abb3134SXin Li    main(sys.argv)
399*2abb3134SXin Li  except RuntimeError, e:
400*2abb3134SXin Li    print >>sys.stderr, 'FATAL: %s' % e
401*2abb3134SXin Li    sys.exit(1)
402