1#!/usr/bin/python 2"""Reads a CSV file on stdin, and prints an an HTML table on stdout. 3 4The static HTML can then be made made dynamic with JavaScript, e.g. jQuery 5DataTable. 6 7Use Cases: 8 9 - overview.csv -- each row is a metric 10 - links: to metric page 11 12 - status.csv -- each row is a day 13 - links: to log.txt, to results.html 14""" 15 16import cgi 17import csv 18import optparse 19import sys 20 21import util 22 23 24def CreateOptionsParser(): 25 p = optparse.OptionParser() 26 27 # We are taking a path, and not using stdin, because we read it twice. 28 p.add_option( 29 '--col-format', dest='col_formats', metavar="'COLNAME FMT'", type='str', 30 default=[], action='append', 31 help='Add HTML links to the named column, using the given Python ' 32 '.format() string') 33 34 p.add_option( 35 '--def', dest='defs', metavar="'NAME VALUE'", type='str', 36 default=[], action='append', 37 help='Define varaibles for use in format strings') 38 39 p.add_option( 40 '--as-percent', dest='percent_cols', metavar="COLNAME", type='str', 41 default=[], action='append', 42 help='Format this floating point column as a percentage string') 43 44 # TODO: We could include this by default, and then change all the HTML to 45 # have <div> placeholders instead of <table>. 46 p.add_option( 47 '--table', dest='table', default=False, action='store_true', 48 help='Add <table></table> tags (useful for testing)') 49 50 return p 51 52 53def ParseSpec(arg_list): 54 """Given an argument list, return a string -> string dictionary.""" 55 # The format string is passed the cell value. Escaped as HTML? 56 d = {} 57 for s in arg_list: 58 try: 59 name, value = s.split(' ', 1) 60 except ValueError: 61 raise RuntimeError('Invalid column format %r' % s) 62 d[name] = value 63 return d 64 65 66def PrintRow(row, col_names, col_formats, defs, percent_cols): 67 """Print a CSV row as HTML, using the given formatting. 68 69 Returns: 70 An array of booleans indicating whether each cell is a number. 71 """ 72 is_number_flags = [False] * len(col_names) 73 74 for i, cell in enumerate(row): 75 # The cell as a string. By default we leave it as is; it may be mutated 76 # below. 77 cell_str = cell 78 css_class = '' # CSS class for the cell. 79 col_name = col_names[i] # column that the cell is under 80 81 # Does the cell look like a float? 82 try: 83 cell_float = float(cell) 84 if col_name in percent_cols: # Floats can be formatted as percentages. 85 cell_str = '{:.1f}%'.format(cell_float * 100) 86 else: 87 # Arbitrarily use 3 digits of precision for display 88 cell_str = '{:.3f}'.format(cell_float) 89 css_class = 'num' 90 is_number_flags[i] = True 91 except ValueError: 92 pass 93 94 # Does it look lik an int? 95 try: 96 cell_int = int(cell) 97 cell_str = '{:,}'.format(cell_int) 98 css_class = 'num' 99 is_number_flags[i] = True 100 except ValueError: 101 pass 102 103 # Special CSS class for R NA values. 104 if cell_str.strip() == 'NA': 105 css_class = 'num na' # num should right justify; na should make it red 106 is_number_flags[i] = True 107 108 if css_class: 109 print ' <td class="{}">'.format(css_class), 110 else: 111 print ' <td>', 112 113 cell_safe = cgi.escape(cell_str) 114 115 # If the cell has a format string, print it this way. 116 117 fmt = col_formats.get(col_name) # e.g. "../{date}.html" 118 if fmt: 119 # Copy variable bindings 120 bindings = dict(defs) 121 122 # Also let the format string use other column names. TODO: Is there a 123 # more efficient way? 124 bindings.update(zip(col_names, [cgi.escape(c) for c in row])) 125 126 bindings[col_name] = cell_safe 127 128 print fmt.format(**bindings), # no newline 129 else: 130 print cell_safe, # no newline 131 132 print '</td>' 133 134 return is_number_flags 135 136 137def ReadCsv(f): 138 """Read the CSV file, returning the column names and rows.""" 139 c = csv.reader(f) 140 141 # The first row of the CSV is assumed to be a header. The rest are data. 142 col_names = [] 143 rows = [] 144 for i, row in enumerate(c): 145 if i == 0: 146 col_names = row 147 continue 148 rows.append(row) 149 return col_names, rows 150 151 152def PrintColGroup(col_names, col_is_numeric): 153 """Print HTML colgroup element, used for JavaScript sorting.""" 154 print '<colgroup>' 155 for i, col in enumerate(col_names): 156 # CSS class is used for sorting 157 if col_is_numeric[i]: 158 css_class = 'number' 159 else: 160 css_class = 'case-insensitive' 161 162 # NOTE: id is a comment only; not used 163 print ' <col id="{}" type="{}" />'.format(col, css_class) 164 print '</colgroup>' 165 166 167def main(argv): 168 (opts, argv) = CreateOptionsParser().parse_args(argv) 169 170 col_formats = ParseSpec(opts.col_formats) 171 defs = ParseSpec(opts.defs) 172 173 col_names, rows = ReadCsv(sys.stdin) 174 175 for col in opts.percent_cols: 176 if col not in col_names: 177 raise RuntimeError('--percent-col %s is not a valid column' % col) 178 179 # By default, we don't print the <table> bit -- that's up to the host page 180 if opts.table: 181 print '<table>' 182 183 print '<thead>' 184 for col in col_names: 185 # change _ to space so long column names can wrap 186 print ' <td>%s</td>' % cgi.escape(col.replace('_', ' ')) 187 print '</thead>' 188 189 # Assume all columns are numeric at first. Look at each row for non-numeric 190 # values. 191 col_is_numeric = [True] * len(col_names) 192 193 print '<tbody>' 194 for row in rows: 195 print ' <tr>' 196 is_number_flags = PrintRow(row, col_names, col_formats, defs, 197 opts.percent_cols) 198 199 # If one cell in a column is not a number, then the whole cell isn't. 200 for (i, is_number) in enumerate(is_number_flags): 201 if not is_number: 202 col_is_numeric[i] = False 203 204 print ' </tr>' 205 print '</tbody>' 206 207 PrintColGroup(col_names, col_is_numeric) 208 209 if opts.table: 210 print '</table>' 211 212 213if __name__ == '__main__': 214 try: 215 main(sys.argv) 216 except RuntimeError, e: 217 print >>sys.stderr, 'FATAL: %s' % e 218 sys.exit(1) 219