#!/usr/bin/python """Combines results from multiple days of a single metric. Feed it the STATUS.txt files on stdin. It then finds the corresponding results.csv, and takes the top N items. Example: Date, "google.com,", yahoo.com 2015-03-01, 0.0, 0.9 2015-03-02, 0.1, 0.8 Dygraphs can load this CSV file directly. TODO: Use different dygraph API? Also we need error bars. new Dygraph(document.getElementById("graphdiv2"), [ [1,10,100], [2,20,80], [3,50,60], [4,70,80] ], { labels: [ "Date", "failure", "timeout", "google.com" ] }); """ import collections import csv import json import os import sys import util def CombineDistResults(stdin, c_out, num_top): dates = [] var_cols = collections.defaultdict(dict) # {name: {date: value}} seen_dates = set() for line in stdin: status_path = line.strip() # Assume it looks like .../2015-03-01/STATUS.txt task_dir = os.path.dirname(status_path) date = os.path.basename(task_dir) # Get rid of duplicate dates. These could be caused by retries. if date in seen_dates: continue seen_dates.add(date) with open(status_path) as f: status = f.readline().split()[0] # OK, FAIL, TIMEOUT, SKIPPED dates.append(date) if status != 'OK': continue # won't have results.csv results_path = os.path.join(task_dir, 'results.csv') with open(results_path) as f: c = csv.reader(f) unused_header = c.next() # header row # they are sorted by decreasing "estimate", which is what we want for i in xrange(0, num_top): try: row = c.next() except StopIteration: # It's OK if it doesn't have enough util.log('Stopping early. Fewer than %d results to render.', num_top) break string, _, _, proportion, _, prop_low, prop_high = row # dygraphs has a weird format with semicolons: # value;lower;upper,value;lower;upper. # http://dygraphs.com/data.html#csv # Arbitrarily use 4 digits after decimal point (for dygraphs, not # directly displayed) dygraph_triple = '%.4f;%.4f;%.4f' % ( float(prop_low), float(proportion), float(prop_high)) var_cols[string][date] = dygraph_triple # Now print CSV on stdout. cols = sorted(var_cols.keys()) # sort columns alphabetically c_out.writerow(['date'] + cols) dates.sort() for date in dates: row = [date] for col in cols: cell = var_cols[col].get(date) # None mean sthere is no row row.append(cell) c_out.writerow(row) #util.log("Number of dynamic cols: %d", len(var_cols)) def CombineAssocResults(stdin, c_out, num_top): header = ('dummy',) c_out.writerow(header) def main(argv): action = argv[1] if action == 'dist': num_top = int(argv[2]) # number of values to keep c_out = csv.writer(sys.stdout) CombineDistResults(sys.stdin, c_out, num_top) elif action == 'assoc': num_top = int(argv[2]) # number of values to keep c_out = csv.writer(sys.stdout) CombineAssocResults(sys.stdin, c_out, num_top) else: raise RuntimeError('Invalid action %r' % action) if __name__ == '__main__': try: main(sys.argv) except RuntimeError, e: print >>sys.stderr, 'FATAL: %s' % e sys.exit(1)