diff options
Diffstat (limited to 'tests/make_summary.py')
-rwxr-xr-x | tests/make_summary.py | 401 |
1 files changed, 401 insertions, 0 deletions
diff --git a/tests/make_summary.py b/tests/make_summary.py new file mode 100755 index 0000000..b55559c --- /dev/null +++ b/tests/make_summary.py @@ -0,0 +1,401 @@ +#!/usr/bin/python +"""Given a regtest result tree, prints an HTML summary to a file. + +See HTML skeleton in tests/regtest.html. +""" + +import os +import re +import sys + + +SUMMARY_ROW = """\ +<tfoot style="font-weight: bold; text-align: right"> +<tr> + <td> + %(name)s + </td> + + <!-- input params --> + <td></td> + <td></td> + <td></td> + <td></td> + + <!-- RAPPOR params --> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + <td></td> + + <!-- MAP params --> + <td></td> + <td></td> + + <!-- Result metrics --> + <td></td> + <td></td> + <td>%(mean_fpr)s</td> + <td>%(mean_fnr)s</td> + <td>%(mean_tv)s</td> + <td>%(mean_am)s</td> + <td>%(mean_time)s</td> +</tr> +</tfoot> +""" + +# Navigation and links to plot. +DETAILS = """\ +<p style="text-align: right"> + <a href="#top">Up</a> +</p> + +<a id="%(anchor)s"></a> + +<p style="text-align: center"> + <img src="%(instance_dir)s/dist.png"/> +</p> + +<p> +<a href="%(instance_dir)s">%(name)s files</a> +</p> +""" + + +def FormatFloat(x, percent): + """Formats a floating-point number.""" + if percent: + return '{:.1f}%'.format(x * 100.0) + else: + return '{:.3f}'.format(x) + + +def FormatMeanWithSem(m_std_error, percent=False): + """Formats an estimate with standard error.""" + if m_std_error is None: + return '' + m, std_error = m_std_error + if std_error is None: + return FormatFloat(m, percent) + else: + return '{}±{}'.format( + FormatFloat(m, percent), + FormatFloat(std_error, percent)) + + +def Mean(l): + """Computes the mean (average) for a list of numbers.""" + if l: + return float(sum(l)) / len(l) + else: + return None + + +def SampleVar(l): + """Computes the sample variance for a list of numbers.""" + if len(l) > 1: + mean = Mean(l) + var = sum([(x - mean) ** 2 for x in l]) / (len(l) - 1) + return var + else: + return None + + +def StandardErrorEstimate(l): + """Returns the standard error estimate for a list of numbers. + + For a singleton the standard error is assumed to be 10% of its value. + """ + if len(l) > 1: + return (SampleVar(l) / len(l)) ** .5 + elif l: + return l[0] / 10.0 + else: + return None + + +def MeanOfMeans(dict_of_lists): + """Returns the average of averages with the standard error of the estimate. + """ + means = [Mean(dict_of_lists[key]) for key in dict_of_lists + if dict_of_lists[key]] + if means: + # Compute variances of the estimate for each sublist. + se = [StandardErrorEstimate(dict_of_lists[key]) ** 2 for key + in dict_of_lists if dict_of_lists[key]] + return (Mean(means), # Mean over all sublists + sum(se) ** .5 / len(se)) # Standard deviation of the mean + else: + return None + + +def ParseSpecFile(spec_filename): + """Parses the spec (parameters) file. + + Returns: + An integer and a string. The integer is the number of bogus candidates + and the string is parameters in the HTML format. + """ + with open(spec_filename) as s: + spec_row = s.readline().split() + + # Second to last column is 'num_additional' -- the number of bogus + # candidates added + num_additional = int(spec_row[-2]) + + spec_in_html = ' '.join('<td>%s</td>' % cell for cell in spec_row[1:]) + + return num_additional, spec_in_html + + +def ExtractTime(log_filename): + """Extracts the elapsed time information from the log file. + + Returns: + Elapsed time (in seconds) or None in case of failure. + """ + if os.path.isfile(log_filename): + with open(log_filename) as log: + log_str = log.read() + # Matching a line output by analyze.R. + match = re.search(r'Inference took ([0-9.]+) seconds', log_str) + if match: + return float(match.group(1)) + return None + + +def ParseMetrics(metrics_file, log_file, num_additional): + """Processes the metrics file. + + Args: + metrics_file: name of the metrics file + log_file: name of the log.txt file + num_additional: A number of bogus candidates added to the candidate list. + + Returns a pair: + - A dictionary of metrics (some can be []). + - An HTML-formatted portion of the report row. + """ + + if not os.path.isfile(metrics_file): + metrics_row_str = ['', '', '', '', '', ''] + metrics_row_dict = {} + else: + with open(metrics_file) as m: + m.readline() + metrics_row = m.readline().split(',') + + (num_actual, num_rappor, num_false_pos, num_false_neg, total_variation, + allocated_mass) = metrics_row + + num_actual = int(num_actual) + num_rappor = int(num_rappor) + + num_false_pos = int(num_false_pos) + num_false_neg = int(num_false_neg) + + total_variation = float(total_variation) + allocated_mass = float(allocated_mass) + + # e.g. if there are 20 additional candidates added, and 1 false positive, + # the false positive rate is 5%. + fp_rate = float(num_false_pos) / num_additional if num_additional else 0 + # e.g. if there are 100 strings in the true input, and 80 strings + # detected by RAPPOR, then we have 20 false negatives, and a false + # negative rate of 20%. + fn_rate = float(num_false_neg) / num_actual + + metrics_row_str = [ + str(num_actual), + str(num_rappor), + '%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional + else '', + '%.1f%% (%d)' % (fn_rate * 100, num_false_neg), + '%.3f' % total_variation, + '%.3f' % allocated_mass, + ] + + metrics_row_dict = { + 'tv': [total_variation], + 'fpr': [fp_rate] if num_additional else [], + 'fnr': [fn_rate], + 'am': [allocated_mass], + } + + elapsed_time = ExtractTime(log_file) + if elapsed_time is not None: + metrics_row_str = metrics_row_str + ['%.2f' % elapsed_time] + metrics_row_dict['time'] = [elapsed_time] + + # return metrics formatted as HTML table entries + return (metrics_row_dict, + ' '.join('<td>%s</td>' % cell for cell in metrics_row_str)) + + +def FormatCell1(test_case, test_instance, metrics_file, log_file, plot_file, + link_to_plots): + """Outputs an HTML table entry for the first cell of the row. + + The row is filled if the metrics file exist. The first cell contains a link + that for short tables points to a plot file inline, for large tables to an + external file. + + If the metrics file is missing, the link points to the log file (if one + exists) + """ + relpath_report = '{}/{}_report'.format(test_case, test_instance) + if os.path.isfile(metrics_file): + external_file = plot_file + if link_to_plots: + link = '#{}_{}'.format(test_case, test_instance) # anchor + else: + link = os.path.join(relpath_report, 'dist.png') + else: # no results likely due to an error, puts a link to the log file + external_file = log_file + link = os.path.join(relpath_report, 'log.txt') + + if os.path.isfile(external_file): + return '<td><a href="{}">{}</a></td>'.format(link, test_case) + else: # if no file to link to + return '<td>{}</td>'.format(test_case) + + +def FormatSummaryRow(metrics_lists): + """Outputs an HTML-formatted summary row.""" + means_with_sem = {} # SEM - standard error of the mean + + for key in metrics_lists: + means_with_sem[key] = MeanOfMeans(metrics_lists[key]) + # If none of the lists is longer than one element, drop the SEM component. + if means_with_sem[key] and max([len(l) for l in metrics_lists[key]]) < 2: + means_with_sem[key] = [means_with_sem[key][0], None] + + summary = { + 'name': 'Means', + 'mean_fpr': FormatMeanWithSem(means_with_sem['fpr'], percent=True), + 'mean_fnr': FormatMeanWithSem(means_with_sem['fnr'], percent=True), + 'mean_tv': FormatMeanWithSem(means_with_sem['tv'], percent=True), + 'mean_am': FormatMeanWithSem(means_with_sem['am'], percent=True), + 'mean_time': FormatMeanWithSem(means_with_sem['time']), + } + return SUMMARY_ROW % summary + + +def FormatPlots(base_dir, test_instances): + """Outputs HTML-formatted plots.""" + result = '' + for instance in test_instances: + # A test instance is identified by the test name and the test run. + test_case, test_instance, _ = instance.split(' ') + instance_dir = test_case + '/' + test_instance + '_report' + if os.path.isfile(os.path.join(base_dir, instance_dir, 'dist.png')): + result += DETAILS % {'anchor': test_case + '_' + test_instance, + 'name': '{} (instance {})'.format(test_case, + test_instance), + 'instance_dir': instance_dir} + return result + + +def main(argv): + base_dir = argv[1] + output_file = open(argv[2], 'w') + + # This file has the test case names, in the order that they should be + # displayed. + instances_file = os.path.join(base_dir, 'test-instances.txt') + if not os.path.isfile(instances_file): + raise RuntimeError('{} is missing'.format(instances_file)) + + with open(instances_file) as f: + test_instances = [line.strip() for line in f] + + # Metrics are assembled into a dictionary of dictionaries. The top-level + # key is the metric name ('tv', 'fpr', etc.), the second level key is + # the test case. These keys reference a list of floats, which can be empty. + metrics = { + 'tv': {}, # total_variation for all test cases + 'fpr': {}, # dictionary of false positive rates + 'fnr': {}, # dictionary of false negative rates + 'am': {}, # dictionary of total allocated masses + 'time': {}, # dictionary of total elapsed time measurements + } + + # If there are too many tests, the plots are not included in the results + # file. Instead, rows' names are links to the corresponding .png files. + include_plots = len(test_instances) < 20 + + instances_succeeded = 0 + instances_failed = 0 + instances_running = 0 + + for instance in test_instances: + # A test instance is idenfied by the test name and the test run. + test_case, test_instance, _ = instance.split(' ') + + spec_file = os.path.join(base_dir, test_case, 'spec.txt') + if not os.path.isfile(spec_file): + raise RuntimeError('{} is missing'.format(spec_file)) + + num_additional, spec_html = ParseSpecFile(spec_file) + metrics_html = '' # will be filled in later on, if metrics exist + + report_dir = os.path.join(base_dir, test_case, test_instance + '_report') + + metrics_file = os.path.join(report_dir, 'metrics.csv') + log_file = os.path.join(report_dir, 'log.txt') + plot_file = os.path.join(report_dir, 'dist.png') + + cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file, + plot_file, include_plots) + + # ParseMetrics outputs an HTML table row and also updates lists + metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file, + num_additional) + + # Update the metrics structure. Initialize dictionaries if necessary. + for m in metrics: + if m in metrics_dict: + if not test_case in metrics[m]: + metrics[m][test_case] = metrics_dict[m] + else: + metrics[m][test_case] += metrics_dict[m] + + print >>output_file, '<tr>{}{}{}</tr>'.format(cell1_html, + spec_html, metrics_html) + + # Update counters + if 'tv' in metrics_dict: + instances_succeeded += 1 + else: + if 'time' in metrics_dict: + instances_failed += 1 + else: + if os.path.isfile(log_file): + instances_running += 1 + + print >>output_file, FormatSummaryRow(metrics) + + print >>output_file, '</tbody>' + print >>output_file, '</table>' + print >>output_file, '<p style="padding-bottom: 3em"></p>' # vertical space + + # Plot links. + if include_plots: + print >>output_file, FormatPlots(base_dir, test_instances) + else: + print >>output_file, ('<p>Too many tests to include plots. ' + 'Click links within rows for details.</p>') + + print ('Instances' + ' succeeded: {} failed: {} running: {} total: {}'. + format(instances_succeeded, instances_failed, instances_running, + len(test_instances))) + +if __name__ == '__main__': + try: + main(sys.argv) + except RuntimeError, e: + print >>sys.stderr, 'FATAL: %s' % e + sys.exit(1) |