#!/usr/bin/python """Given a regtest result tree, prints an HTML summary to a file. See HTML skeleton in tests/regtest.html. """ import os import re import sys SUMMARY_ROW = """\ %(name)s %(mean_fpr)s %(mean_fnr)s %(mean_tv)s %(mean_am)s %(mean_time)s """ # Navigation and links to plot. DETAILS = """\

Up

%(name)s files

""" def FormatFloat(x, percent): """Formats a floating-point number.""" if percent: return '{:.1f}%'.format(x * 100.0) else: return '{:.3f}'.format(x) def FormatMeanWithSem(m_std_error, percent=False): """Formats an estimate with standard error.""" if m_std_error is None: return '' m, std_error = m_std_error if std_error is None: return FormatFloat(m, percent) else: return '{}±{}'.format( FormatFloat(m, percent), FormatFloat(std_error, percent)) def Mean(l): """Computes the mean (average) for a list of numbers.""" if l: return float(sum(l)) / len(l) else: return None def SampleVar(l): """Computes the sample variance for a list of numbers.""" if len(l) > 1: mean = Mean(l) var = sum([(x - mean) ** 2 for x in l]) / (len(l) - 1) return var else: return None def StandardErrorEstimate(l): """Returns the standard error estimate for a list of numbers. For a singleton the standard error is assumed to be 10% of its value. """ if len(l) > 1: return (SampleVar(l) / len(l)) ** .5 elif l: return l[0] / 10.0 else: return None def MeanOfMeans(dict_of_lists): """Returns the average of averages with the standard error of the estimate. """ means = [Mean(dict_of_lists[key]) for key in dict_of_lists if dict_of_lists[key]] if means: # Compute variances of the estimate for each sublist. se = [StandardErrorEstimate(dict_of_lists[key]) ** 2 for key in dict_of_lists if dict_of_lists[key]] return (Mean(means), # Mean over all sublists sum(se) ** .5 / len(se)) # Standard deviation of the mean else: return None def ParseSpecFile(spec_filename): """Parses the spec (parameters) file. Returns: An integer and a string. The integer is the number of bogus candidates and the string is parameters in the HTML format. """ with open(spec_filename) as s: spec_row = s.readline().split() # Second to last column is 'num_additional' -- the number of bogus # candidates added num_additional = int(spec_row[-2]) spec_in_html = ' '.join('%s' % cell for cell in spec_row[1:]) return num_additional, spec_in_html def ExtractTime(log_filename): """Extracts the elapsed time information from the log file. Returns: Elapsed time (in seconds) or None in case of failure. """ if os.path.isfile(log_filename): with open(log_filename) as log: log_str = log.read() # Matching a line output by analyze.R. match = re.search(r'Inference took ([0-9.]+) seconds', log_str) if match: return float(match.group(1)) return None def ParseMetrics(metrics_file, log_file, num_additional): """Processes the metrics file. Args: metrics_file: name of the metrics file log_file: name of the log.txt file num_additional: A number of bogus candidates added to the candidate list. Returns a pair: - A dictionary of metrics (some can be []). - An HTML-formatted portion of the report row. """ if not os.path.isfile(metrics_file): metrics_row_str = ['', '', '', '', '', ''] metrics_row_dict = {} else: with open(metrics_file) as m: m.readline() metrics_row = m.readline().split(',') (num_actual, num_rappor, num_false_pos, num_false_neg, total_variation, allocated_mass) = metrics_row num_actual = int(num_actual) num_rappor = int(num_rappor) num_false_pos = int(num_false_pos) num_false_neg = int(num_false_neg) total_variation = float(total_variation) allocated_mass = float(allocated_mass) # e.g. if there are 20 additional candidates added, and 1 false positive, # the false positive rate is 5%. fp_rate = float(num_false_pos) / num_additional if num_additional else 0 # e.g. if there are 100 strings in the true input, and 80 strings # detected by RAPPOR, then we have 20 false negatives, and a false # negative rate of 20%. fn_rate = float(num_false_neg) / num_actual metrics_row_str = [ str(num_actual), str(num_rappor), '%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional else '', '%.1f%% (%d)' % (fn_rate * 100, num_false_neg), '%.3f' % total_variation, '%.3f' % allocated_mass, ] metrics_row_dict = { 'tv': [total_variation], 'fpr': [fp_rate] if num_additional else [], 'fnr': [fn_rate], 'am': [allocated_mass], } elapsed_time = ExtractTime(log_file) if elapsed_time is not None: metrics_row_str = metrics_row_str + ['%.2f' % elapsed_time] metrics_row_dict['time'] = [elapsed_time] # return metrics formatted as HTML table entries return (metrics_row_dict, ' '.join('%s' % cell for cell in metrics_row_str)) def FormatCell1(test_case, test_instance, metrics_file, log_file, plot_file, link_to_plots): """Outputs an HTML table entry for the first cell of the row. The row is filled if the metrics file exist. The first cell contains a link that for short tables points to a plot file inline, for large tables to an external file. If the metrics file is missing, the link points to the log file (if one exists) """ relpath_report = '{}/{}_report'.format(test_case, test_instance) if os.path.isfile(metrics_file): external_file = plot_file if link_to_plots: link = '#{}_{}'.format(test_case, test_instance) # anchor else: link = os.path.join(relpath_report, 'dist.png') else: # no results likely due to an error, puts a link to the log file external_file = log_file link = os.path.join(relpath_report, 'log.txt') if os.path.isfile(external_file): return '{}'.format(link, test_case) else: # if no file to link to return '{}'.format(test_case) def FormatSummaryRow(metrics_lists): """Outputs an HTML-formatted summary row.""" means_with_sem = {} # SEM - standard error of the mean for key in metrics_lists: means_with_sem[key] = MeanOfMeans(metrics_lists[key]) # If none of the lists is longer than one element, drop the SEM component. if means_with_sem[key] and max([len(l) for l in metrics_lists[key]]) < 2: means_with_sem[key] = [means_with_sem[key][0], None] summary = { 'name': 'Means', 'mean_fpr': FormatMeanWithSem(means_with_sem['fpr'], percent=True), 'mean_fnr': FormatMeanWithSem(means_with_sem['fnr'], percent=True), 'mean_tv': FormatMeanWithSem(means_with_sem['tv'], percent=True), 'mean_am': FormatMeanWithSem(means_with_sem['am'], percent=True), 'mean_time': FormatMeanWithSem(means_with_sem['time']), } return SUMMARY_ROW % summary def FormatPlots(base_dir, test_instances): """Outputs HTML-formatted plots.""" result = '' for instance in test_instances: # A test instance is identified by the test name and the test run. test_case, test_instance, _ = instance.split(' ') instance_dir = test_case + '/' + test_instance + '_report' if os.path.isfile(os.path.join(base_dir, instance_dir, 'dist.png')): result += DETAILS % {'anchor': test_case + '_' + test_instance, 'name': '{} (instance {})'.format(test_case, test_instance), 'instance_dir': instance_dir} return result def main(argv): base_dir = argv[1] output_file = open(argv[2], 'w') # This file has the test case names, in the order that they should be # displayed. instances_file = os.path.join(base_dir, 'test-instances.txt') if not os.path.isfile(instances_file): raise RuntimeError('{} is missing'.format(instances_file)) with open(instances_file) as f: test_instances = [line.strip() for line in f] # Metrics are assembled into a dictionary of dictionaries. The top-level # key is the metric name ('tv', 'fpr', etc.), the second level key is # the test case. These keys reference a list of floats, which can be empty. metrics = { 'tv': {}, # total_variation for all test cases 'fpr': {}, # dictionary of false positive rates 'fnr': {}, # dictionary of false negative rates 'am': {}, # dictionary of total allocated masses 'time': {}, # dictionary of total elapsed time measurements } # If there are too many tests, the plots are not included in the results # file. Instead, rows' names are links to the corresponding .png files. include_plots = len(test_instances) < 20 instances_succeeded = 0 instances_failed = 0 instances_running = 0 for instance in test_instances: # A test instance is idenfied by the test name and the test run. test_case, test_instance, _ = instance.split(' ') spec_file = os.path.join(base_dir, test_case, 'spec.txt') if not os.path.isfile(spec_file): raise RuntimeError('{} is missing'.format(spec_file)) num_additional, spec_html = ParseSpecFile(spec_file) metrics_html = '' # will be filled in later on, if metrics exist report_dir = os.path.join(base_dir, test_case, test_instance + '_report') metrics_file = os.path.join(report_dir, 'metrics.csv') log_file = os.path.join(report_dir, 'log.txt') plot_file = os.path.join(report_dir, 'dist.png') cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file, plot_file, include_plots) # ParseMetrics outputs an HTML table row and also updates lists metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file, num_additional) # Update the metrics structure. Initialize dictionaries if necessary. for m in metrics: if m in metrics_dict: if not test_case in metrics[m]: metrics[m][test_case] = metrics_dict[m] else: metrics[m][test_case] += metrics_dict[m] print >>output_file, '{}{}{}'.format(cell1_html, spec_html, metrics_html) # Update counters if 'tv' in metrics_dict: instances_succeeded += 1 else: if 'time' in metrics_dict: instances_failed += 1 else: if os.path.isfile(log_file): instances_running += 1 print >>output_file, FormatSummaryRow(metrics) print >>output_file, '' print >>output_file, '' print >>output_file, '

' # vertical space # Plot links. if include_plots: print >>output_file, FormatPlots(base_dir, test_instances) else: print >>output_file, ('

Too many tests to include plots. ' 'Click links within rows for details.

') print ('Instances' ' succeeded: {} failed: {} running: {} total: {}'. format(instances_succeeded, instances_failed, instances_running, len(test_instances))) if __name__ == '__main__': try: main(sys.argv) except RuntimeError, e: print >>sys.stderr, 'FATAL: %s' % e sys.exit(1)