#!/usr/bin/python
"""Given a regtest result tree, prints an HTML summary to a file.
See HTML skeleton in tests/regtest.html.
"""
import os
import re
import sys
SUMMARY_ROW = """\
%(name)s
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
%(mean_fpr)s |
%(mean_fnr)s |
%(mean_tv)s |
%(mean_am)s |
%(mean_time)s |
"""
# Navigation and links to plot.
DETAILS = """\
Up
%(name)s files
"""
def FormatFloat(x, percent):
"""Formats a floating-point number."""
if percent:
return '{:.1f}%'.format(x * 100.0)
else:
return '{:.3f}'.format(x)
def FormatMeanWithSem(m_std_error, percent=False):
"""Formats an estimate with standard error."""
if m_std_error is None:
return ''
m, std_error = m_std_error
if std_error is None:
return FormatFloat(m, percent)
else:
return '{}±{}'.format(
FormatFloat(m, percent),
FormatFloat(std_error, percent))
def Mean(l):
"""Computes the mean (average) for a list of numbers."""
if l:
return float(sum(l)) / len(l)
else:
return None
def SampleVar(l):
"""Computes the sample variance for a list of numbers."""
if len(l) > 1:
mean = Mean(l)
var = sum([(x - mean) ** 2 for x in l]) / (len(l) - 1)
return var
else:
return None
def StandardErrorEstimate(l):
"""Returns the standard error estimate for a list of numbers.
For a singleton the standard error is assumed to be 10% of its value.
"""
if len(l) > 1:
return (SampleVar(l) / len(l)) ** .5
elif l:
return l[0] / 10.0
else:
return None
def MeanOfMeans(dict_of_lists):
"""Returns the average of averages with the standard error of the estimate.
"""
means = [Mean(dict_of_lists[key]) for key in dict_of_lists
if dict_of_lists[key]]
if means:
# Compute variances of the estimate for each sublist.
se = [StandardErrorEstimate(dict_of_lists[key]) ** 2 for key
in dict_of_lists if dict_of_lists[key]]
return (Mean(means), # Mean over all sublists
sum(se) ** .5 / len(se)) # Standard deviation of the mean
else:
return None
def ParseSpecFile(spec_filename):
"""Parses the spec (parameters) file.
Returns:
An integer and a string. The integer is the number of bogus candidates
and the string is parameters in the HTML format.
"""
with open(spec_filename) as s:
spec_row = s.readline().split()
# Second to last column is 'num_additional' -- the number of bogus
# candidates added
num_additional = int(spec_row[-2])
spec_in_html = ' '.join('%s | ' % cell for cell in spec_row[1:])
return num_additional, spec_in_html
def ExtractTime(log_filename):
"""Extracts the elapsed time information from the log file.
Returns:
Elapsed time (in seconds) or None in case of failure.
"""
if os.path.isfile(log_filename):
with open(log_filename) as log:
log_str = log.read()
# Matching a line output by analyze.R.
match = re.search(r'Inference took ([0-9.]+) seconds', log_str)
if match:
return float(match.group(1))
return None
def ParseMetrics(metrics_file, log_file, num_additional):
"""Processes the metrics file.
Args:
metrics_file: name of the metrics file
log_file: name of the log.txt file
num_additional: A number of bogus candidates added to the candidate list.
Returns a pair:
- A dictionary of metrics (some can be []).
- An HTML-formatted portion of the report row.
"""
if not os.path.isfile(metrics_file):
metrics_row_str = ['', '', '', '', '', '']
metrics_row_dict = {}
else:
with open(metrics_file) as m:
m.readline()
metrics_row = m.readline().split(',')
(num_actual, num_rappor, num_false_pos, num_false_neg, total_variation,
allocated_mass) = metrics_row
num_actual = int(num_actual)
num_rappor = int(num_rappor)
num_false_pos = int(num_false_pos)
num_false_neg = int(num_false_neg)
total_variation = float(total_variation)
allocated_mass = float(allocated_mass)
# e.g. if there are 20 additional candidates added, and 1 false positive,
# the false positive rate is 5%.
fp_rate = float(num_false_pos) / num_additional if num_additional else 0
# e.g. if there are 100 strings in the true input, and 80 strings
# detected by RAPPOR, then we have 20 false negatives, and a false
# negative rate of 20%.
fn_rate = float(num_false_neg) / num_actual
metrics_row_str = [
str(num_actual),
str(num_rappor),
'%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional
else '',
'%.1f%% (%d)' % (fn_rate * 100, num_false_neg),
'%.3f' % total_variation,
'%.3f' % allocated_mass,
]
metrics_row_dict = {
'tv': [total_variation],
'fpr': [fp_rate] if num_additional else [],
'fnr': [fn_rate],
'am': [allocated_mass],
}
elapsed_time = ExtractTime(log_file)
if elapsed_time is not None:
metrics_row_str = metrics_row_str + ['%.2f' % elapsed_time]
metrics_row_dict['time'] = [elapsed_time]
# return metrics formatted as HTML table entries
return (metrics_row_dict,
' '.join('%s | ' % cell for cell in metrics_row_str))
def FormatCell1(test_case, test_instance, metrics_file, log_file, plot_file,
link_to_plots):
"""Outputs an HTML table entry for the first cell of the row.
The row is filled if the metrics file exist. The first cell contains a link
that for short tables points to a plot file inline, for large tables to an
external file.
If the metrics file is missing, the link points to the log file (if one
exists)
"""
relpath_report = '{}/{}_report'.format(test_case, test_instance)
if os.path.isfile(metrics_file):
external_file = plot_file
if link_to_plots:
link = '#{}_{}'.format(test_case, test_instance) # anchor
else:
link = os.path.join(relpath_report, 'dist.png')
else: # no results likely due to an error, puts a link to the log file
external_file = log_file
link = os.path.join(relpath_report, 'log.txt')
if os.path.isfile(external_file):
return '{} | '.format(link, test_case)
else: # if no file to link to
return '{} | '.format(test_case)
def FormatSummaryRow(metrics_lists):
"""Outputs an HTML-formatted summary row."""
means_with_sem = {} # SEM - standard error of the mean
for key in metrics_lists:
means_with_sem[key] = MeanOfMeans(metrics_lists[key])
# If none of the lists is longer than one element, drop the SEM component.
if means_with_sem[key] and max([len(l) for l in metrics_lists[key]]) < 2:
means_with_sem[key] = [means_with_sem[key][0], None]
summary = {
'name': 'Means',
'mean_fpr': FormatMeanWithSem(means_with_sem['fpr'], percent=True),
'mean_fnr': FormatMeanWithSem(means_with_sem['fnr'], percent=True),
'mean_tv': FormatMeanWithSem(means_with_sem['tv'], percent=True),
'mean_am': FormatMeanWithSem(means_with_sem['am'], percent=True),
'mean_time': FormatMeanWithSem(means_with_sem['time']),
}
return SUMMARY_ROW % summary
def FormatPlots(base_dir, test_instances):
"""Outputs HTML-formatted plots."""
result = ''
for instance in test_instances:
# A test instance is identified by the test name and the test run.
test_case, test_instance, _ = instance.split(' ')
instance_dir = test_case + '/' + test_instance + '_report'
if os.path.isfile(os.path.join(base_dir, instance_dir, 'dist.png')):
result += DETAILS % {'anchor': test_case + '_' + test_instance,
'name': '{} (instance {})'.format(test_case,
test_instance),
'instance_dir': instance_dir}
return result
def main(argv):
base_dir = argv[1]
output_file = open(argv[2], 'w')
# This file has the test case names, in the order that they should be
# displayed.
instances_file = os.path.join(base_dir, 'test-instances.txt')
if not os.path.isfile(instances_file):
raise RuntimeError('{} is missing'.format(instances_file))
with open(instances_file) as f:
test_instances = [line.strip() for line in f]
# Metrics are assembled into a dictionary of dictionaries. The top-level
# key is the metric name ('tv', 'fpr', etc.), the second level key is
# the test case. These keys reference a list of floats, which can be empty.
metrics = {
'tv': {}, # total_variation for all test cases
'fpr': {}, # dictionary of false positive rates
'fnr': {}, # dictionary of false negative rates
'am': {}, # dictionary of total allocated masses
'time': {}, # dictionary of total elapsed time measurements
}
# If there are too many tests, the plots are not included in the results
# file. Instead, rows' names are links to the corresponding .png files.
include_plots = len(test_instances) < 20
instances_succeeded = 0
instances_failed = 0
instances_running = 0
for instance in test_instances:
# A test instance is idenfied by the test name and the test run.
test_case, test_instance, _ = instance.split(' ')
spec_file = os.path.join(base_dir, test_case, 'spec.txt')
if not os.path.isfile(spec_file):
raise RuntimeError('{} is missing'.format(spec_file))
num_additional, spec_html = ParseSpecFile(spec_file)
metrics_html = '' # will be filled in later on, if metrics exist
report_dir = os.path.join(base_dir, test_case, test_instance + '_report')
metrics_file = os.path.join(report_dir, 'metrics.csv')
log_file = os.path.join(report_dir, 'log.txt')
plot_file = os.path.join(report_dir, 'dist.png')
cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file,
plot_file, include_plots)
# ParseMetrics outputs an HTML table row and also updates lists
metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file,
num_additional)
# Update the metrics structure. Initialize dictionaries if necessary.
for m in metrics:
if m in metrics_dict:
if not test_case in metrics[m]:
metrics[m][test_case] = metrics_dict[m]
else:
metrics[m][test_case] += metrics_dict[m]
print >>output_file, '{}{}{}
'.format(cell1_html,
spec_html, metrics_html)
# Update counters
if 'tv' in metrics_dict:
instances_succeeded += 1
else:
if 'time' in metrics_dict:
instances_failed += 1
else:
if os.path.isfile(log_file):
instances_running += 1
print >>output_file, FormatSummaryRow(metrics)
print >>output_file, ''
print >>output_file, ''
print >>output_file, '' # vertical space
# Plot links.
if include_plots:
print >>output_file, FormatPlots(base_dir, test_instances)
else:
print >>output_file, ('Too many tests to include plots. '
'Click links within rows for details.
')
print ('Instances'
' succeeded: {} failed: {} running: {} total: {}'.
format(instances_succeeded, instances_failed, instances_running,
len(test_instances)))
if __name__ == '__main__':
try:
main(sys.argv)
except RuntimeError, e:
print >>sys.stderr, 'FATAL: %s' % e
sys.exit(1)