1*2abb3134SXin Li#!/usr/bin/python 2*2abb3134SXin Li"""Reads a CSV file on stdin, and prints an an HTML table on stdout. 3*2abb3134SXin Li 4*2abb3134SXin LiThe static HTML can then be made made dynamic with JavaScript, e.g. jQuery 5*2abb3134SXin LiDataTable. 6*2abb3134SXin Li 7*2abb3134SXin LiUse Cases: 8*2abb3134SXin Li 9*2abb3134SXin Li - overview.csv -- each row is a metric 10*2abb3134SXin Li - links: to metric page 11*2abb3134SXin Li 12*2abb3134SXin Li - status.csv -- each row is a day 13*2abb3134SXin Li - links: to log.txt, to results.html 14*2abb3134SXin Li""" 15*2abb3134SXin Li 16*2abb3134SXin Liimport cgi 17*2abb3134SXin Liimport csv 18*2abb3134SXin Liimport optparse 19*2abb3134SXin Liimport sys 20*2abb3134SXin Li 21*2abb3134SXin Liimport util 22*2abb3134SXin Li 23*2abb3134SXin Li 24*2abb3134SXin Lidef CreateOptionsParser(): 25*2abb3134SXin Li p = optparse.OptionParser() 26*2abb3134SXin Li 27*2abb3134SXin Li # We are taking a path, and not using stdin, because we read it twice. 28*2abb3134SXin Li p.add_option( 29*2abb3134SXin Li '--col-format', dest='col_formats', metavar="'COLNAME FMT'", type='str', 30*2abb3134SXin Li default=[], action='append', 31*2abb3134SXin Li help='Add HTML links to the named column, using the given Python ' 32*2abb3134SXin Li '.format() string') 33*2abb3134SXin Li 34*2abb3134SXin Li p.add_option( 35*2abb3134SXin Li '--def', dest='defs', metavar="'NAME VALUE'", type='str', 36*2abb3134SXin Li default=[], action='append', 37*2abb3134SXin Li help='Define varaibles for use in format strings') 38*2abb3134SXin Li 39*2abb3134SXin Li p.add_option( 40*2abb3134SXin Li '--as-percent', dest='percent_cols', metavar="COLNAME", type='str', 41*2abb3134SXin Li default=[], action='append', 42*2abb3134SXin Li help='Format this floating point column as a percentage string') 43*2abb3134SXin Li 44*2abb3134SXin Li # TODO: We could include this by default, and then change all the HTML to 45*2abb3134SXin Li # have <div> placeholders instead of <table>. 46*2abb3134SXin Li p.add_option( 47*2abb3134SXin Li '--table', dest='table', default=False, action='store_true', 48*2abb3134SXin Li help='Add <table></table> tags (useful for testing)') 49*2abb3134SXin Li 50*2abb3134SXin Li return p 51*2abb3134SXin Li 52*2abb3134SXin Li 53*2abb3134SXin Lidef ParseSpec(arg_list): 54*2abb3134SXin Li """Given an argument list, return a string -> string dictionary.""" 55*2abb3134SXin Li # The format string is passed the cell value. Escaped as HTML? 56*2abb3134SXin Li d = {} 57*2abb3134SXin Li for s in arg_list: 58*2abb3134SXin Li try: 59*2abb3134SXin Li name, value = s.split(' ', 1) 60*2abb3134SXin Li except ValueError: 61*2abb3134SXin Li raise RuntimeError('Invalid column format %r' % s) 62*2abb3134SXin Li d[name] = value 63*2abb3134SXin Li return d 64*2abb3134SXin Li 65*2abb3134SXin Li 66*2abb3134SXin Lidef PrintRow(row, col_names, col_formats, defs, percent_cols): 67*2abb3134SXin Li """Print a CSV row as HTML, using the given formatting. 68*2abb3134SXin Li 69*2abb3134SXin Li Returns: 70*2abb3134SXin Li An array of booleans indicating whether each cell is a number. 71*2abb3134SXin Li """ 72*2abb3134SXin Li is_number_flags = [False] * len(col_names) 73*2abb3134SXin Li 74*2abb3134SXin Li for i, cell in enumerate(row): 75*2abb3134SXin Li # The cell as a string. By default we leave it as is; it may be mutated 76*2abb3134SXin Li # below. 77*2abb3134SXin Li cell_str = cell 78*2abb3134SXin Li css_class = '' # CSS class for the cell. 79*2abb3134SXin Li col_name = col_names[i] # column that the cell is under 80*2abb3134SXin Li 81*2abb3134SXin Li # Does the cell look like a float? 82*2abb3134SXin Li try: 83*2abb3134SXin Li cell_float = float(cell) 84*2abb3134SXin Li if col_name in percent_cols: # Floats can be formatted as percentages. 85*2abb3134SXin Li cell_str = '{:.1f}%'.format(cell_float * 100) 86*2abb3134SXin Li else: 87*2abb3134SXin Li # Arbitrarily use 3 digits of precision for display 88*2abb3134SXin Li cell_str = '{:.3f}'.format(cell_float) 89*2abb3134SXin Li css_class = 'num' 90*2abb3134SXin Li is_number_flags[i] = True 91*2abb3134SXin Li except ValueError: 92*2abb3134SXin Li pass 93*2abb3134SXin Li 94*2abb3134SXin Li # Does it look lik an int? 95*2abb3134SXin Li try: 96*2abb3134SXin Li cell_int = int(cell) 97*2abb3134SXin Li cell_str = '{:,}'.format(cell_int) 98*2abb3134SXin Li css_class = 'num' 99*2abb3134SXin Li is_number_flags[i] = True 100*2abb3134SXin Li except ValueError: 101*2abb3134SXin Li pass 102*2abb3134SXin Li 103*2abb3134SXin Li # Special CSS class for R NA values. 104*2abb3134SXin Li if cell_str.strip() == 'NA': 105*2abb3134SXin Li css_class = 'num na' # num should right justify; na should make it red 106*2abb3134SXin Li is_number_flags[i] = True 107*2abb3134SXin Li 108*2abb3134SXin Li if css_class: 109*2abb3134SXin Li print ' <td class="{}">'.format(css_class), 110*2abb3134SXin Li else: 111*2abb3134SXin Li print ' <td>', 112*2abb3134SXin Li 113*2abb3134SXin Li cell_safe = cgi.escape(cell_str) 114*2abb3134SXin Li 115*2abb3134SXin Li # If the cell has a format string, print it this way. 116*2abb3134SXin Li 117*2abb3134SXin Li fmt = col_formats.get(col_name) # e.g. "../{date}.html" 118*2abb3134SXin Li if fmt: 119*2abb3134SXin Li # Copy variable bindings 120*2abb3134SXin Li bindings = dict(defs) 121*2abb3134SXin Li 122*2abb3134SXin Li # Also let the format string use other column names. TODO: Is there a 123*2abb3134SXin Li # more efficient way? 124*2abb3134SXin Li bindings.update(zip(col_names, [cgi.escape(c) for c in row])) 125*2abb3134SXin Li 126*2abb3134SXin Li bindings[col_name] = cell_safe 127*2abb3134SXin Li 128*2abb3134SXin Li print fmt.format(**bindings), # no newline 129*2abb3134SXin Li else: 130*2abb3134SXin Li print cell_safe, # no newline 131*2abb3134SXin Li 132*2abb3134SXin Li print '</td>' 133*2abb3134SXin Li 134*2abb3134SXin Li return is_number_flags 135*2abb3134SXin Li 136*2abb3134SXin Li 137*2abb3134SXin Lidef ReadCsv(f): 138*2abb3134SXin Li """Read the CSV file, returning the column names and rows.""" 139*2abb3134SXin Li c = csv.reader(f) 140*2abb3134SXin Li 141*2abb3134SXin Li # The first row of the CSV is assumed to be a header. The rest are data. 142*2abb3134SXin Li col_names = [] 143*2abb3134SXin Li rows = [] 144*2abb3134SXin Li for i, row in enumerate(c): 145*2abb3134SXin Li if i == 0: 146*2abb3134SXin Li col_names = row 147*2abb3134SXin Li continue 148*2abb3134SXin Li rows.append(row) 149*2abb3134SXin Li return col_names, rows 150*2abb3134SXin Li 151*2abb3134SXin Li 152*2abb3134SXin Lidef PrintColGroup(col_names, col_is_numeric): 153*2abb3134SXin Li """Print HTML colgroup element, used for JavaScript sorting.""" 154*2abb3134SXin Li print '<colgroup>' 155*2abb3134SXin Li for i, col in enumerate(col_names): 156*2abb3134SXin Li # CSS class is used for sorting 157*2abb3134SXin Li if col_is_numeric[i]: 158*2abb3134SXin Li css_class = 'number' 159*2abb3134SXin Li else: 160*2abb3134SXin Li css_class = 'case-insensitive' 161*2abb3134SXin Li 162*2abb3134SXin Li # NOTE: id is a comment only; not used 163*2abb3134SXin Li print ' <col id="{}" type="{}" />'.format(col, css_class) 164*2abb3134SXin Li print '</colgroup>' 165*2abb3134SXin Li 166*2abb3134SXin Li 167*2abb3134SXin Lidef main(argv): 168*2abb3134SXin Li (opts, argv) = CreateOptionsParser().parse_args(argv) 169*2abb3134SXin Li 170*2abb3134SXin Li col_formats = ParseSpec(opts.col_formats) 171*2abb3134SXin Li defs = ParseSpec(opts.defs) 172*2abb3134SXin Li 173*2abb3134SXin Li col_names, rows = ReadCsv(sys.stdin) 174*2abb3134SXin Li 175*2abb3134SXin Li for col in opts.percent_cols: 176*2abb3134SXin Li if col not in col_names: 177*2abb3134SXin Li raise RuntimeError('--percent-col %s is not a valid column' % col) 178*2abb3134SXin Li 179*2abb3134SXin Li # By default, we don't print the <table> bit -- that's up to the host page 180*2abb3134SXin Li if opts.table: 181*2abb3134SXin Li print '<table>' 182*2abb3134SXin Li 183*2abb3134SXin Li print '<thead>' 184*2abb3134SXin Li for col in col_names: 185*2abb3134SXin Li # change _ to space so long column names can wrap 186*2abb3134SXin Li print ' <td>%s</td>' % cgi.escape(col.replace('_', ' ')) 187*2abb3134SXin Li print '</thead>' 188*2abb3134SXin Li 189*2abb3134SXin Li # Assume all columns are numeric at first. Look at each row for non-numeric 190*2abb3134SXin Li # values. 191*2abb3134SXin Li col_is_numeric = [True] * len(col_names) 192*2abb3134SXin Li 193*2abb3134SXin Li print '<tbody>' 194*2abb3134SXin Li for row in rows: 195*2abb3134SXin Li print ' <tr>' 196*2abb3134SXin Li is_number_flags = PrintRow(row, col_names, col_formats, defs, 197*2abb3134SXin Li opts.percent_cols) 198*2abb3134SXin Li 199*2abb3134SXin Li # If one cell in a column is not a number, then the whole cell isn't. 200*2abb3134SXin Li for (i, is_number) in enumerate(is_number_flags): 201*2abb3134SXin Li if not is_number: 202*2abb3134SXin Li col_is_numeric[i] = False 203*2abb3134SXin Li 204*2abb3134SXin Li print ' </tr>' 205*2abb3134SXin Li print '</tbody>' 206*2abb3134SXin Li 207*2abb3134SXin Li PrintColGroup(col_names, col_is_numeric) 208*2abb3134SXin Li 209*2abb3134SXin Li if opts.table: 210*2abb3134SXin Li print '</table>' 211*2abb3134SXin Li 212*2abb3134SXin Li 213*2abb3134SXin Liif __name__ == '__main__': 214*2abb3134SXin Li try: 215*2abb3134SXin Li main(sys.argv) 216*2abb3134SXin Li except RuntimeError, e: 217*2abb3134SXin Li print >>sys.stderr, 'FATAL: %s' % e 218*2abb3134SXin Li sys.exit(1) 219