#!/usr/bin/python """Reads a CSV file on stdin, and prints an an HTML table on stdout. The static HTML can then be made made dynamic with JavaScript, e.g. jQuery DataTable. Use Cases: - overview.csv -- each row is a metric - links: to metric page - status.csv -- each row is a day - links: to log.txt, to results.html """ import cgi import csv import optparse import sys import util def CreateOptionsParser(): p = optparse.OptionParser() # We are taking a path, and not using stdin, because we read it twice. p.add_option( '--col-format', dest='col_formats', metavar="'COLNAME FMT'", type='str', default=[], action='append', help='Add HTML links to the named column, using the given Python ' '.format() string') p.add_option( '--def', dest='defs', metavar="'NAME VALUE'", type='str', default=[], action='append', help='Define varaibles for use in format strings') p.add_option( '--as-percent', dest='percent_cols', metavar="COLNAME", type='str', default=[], action='append', help='Format this floating point column as a percentage string') # TODO: We could include this by default, and then change all the HTML to # have
placeholders instead of . p.add_option( '--table', dest='table', default=False, action='store_true', help='Add
tags (useful for testing)') return p def ParseSpec(arg_list): """Given an argument list, return a string -> string dictionary.""" # The format string is passed the cell value. Escaped as HTML? d = {} for s in arg_list: try: name, value = s.split(' ', 1) except ValueError: raise RuntimeError('Invalid column format %r' % s) d[name] = value return d def PrintRow(row, col_names, col_formats, defs, percent_cols): """Print a CSV row as HTML, using the given formatting. Returns: An array of booleans indicating whether each cell is a number. """ is_number_flags = [False] * len(col_names) for i, cell in enumerate(row): # The cell as a string. By default we leave it as is; it may be mutated # below. cell_str = cell css_class = '' # CSS class for the cell. col_name = col_names[i] # column that the cell is under # Does the cell look like a float? try: cell_float = float(cell) if col_name in percent_cols: # Floats can be formatted as percentages. cell_str = '{:.1f}%'.format(cell_float * 100) else: # Arbitrarily use 3 digits of precision for display cell_str = '{:.3f}'.format(cell_float) css_class = 'num' is_number_flags[i] = True except ValueError: pass # Does it look lik an int? try: cell_int = int(cell) cell_str = '{:,}'.format(cell_int) css_class = 'num' is_number_flags[i] = True except ValueError: pass # Special CSS class for R NA values. if cell_str.strip() == 'NA': css_class = 'num na' # num should right justify; na should make it red is_number_flags[i] = True if css_class: print ' '.format(css_class), else: print ' ', cell_safe = cgi.escape(cell_str) # If the cell has a format string, print it this way. fmt = col_formats.get(col_name) # e.g. "../{date}.html" if fmt: # Copy variable bindings bindings = dict(defs) # Also let the format string use other column names. TODO: Is there a # more efficient way? bindings.update(zip(col_names, [cgi.escape(c) for c in row])) bindings[col_name] = cell_safe print fmt.format(**bindings), # no newline else: print cell_safe, # no newline print '' return is_number_flags def ReadCsv(f): """Read the CSV file, returning the column names and rows.""" c = csv.reader(f) # The first row of the CSV is assumed to be a header. The rest are data. col_names = [] rows = [] for i, row in enumerate(c): if i == 0: col_names = row continue rows.append(row) return col_names, rows def PrintColGroup(col_names, col_is_numeric): """Print HTML colgroup element, used for JavaScript sorting.""" print '' for i, col in enumerate(col_names): # CSS class is used for sorting if col_is_numeric[i]: css_class = 'number' else: css_class = 'case-insensitive' # NOTE: id is a comment only; not used print ' '.format(col, css_class) print '' def main(argv): (opts, argv) = CreateOptionsParser().parse_args(argv) col_formats = ParseSpec(opts.col_formats) defs = ParseSpec(opts.defs) col_names, rows = ReadCsv(sys.stdin) for col in opts.percent_cols: if col not in col_names: raise RuntimeError('--percent-col %s is not a valid column' % col) # By default, we don't print the bit -- that's up to the host page if opts.table: print '
' print '' for col in col_names: # change _ to space so long column names can wrap print ' ' % cgi.escape(col.replace('_', ' ')) print '' # Assume all columns are numeric at first. Look at each row for non-numeric # values. col_is_numeric = [True] * len(col_names) print '' for row in rows: print ' ' is_number_flags = PrintRow(row, col_names, col_formats, defs, opts.percent_cols) # If one cell in a column is not a number, then the whole cell isn't. for (i, is_number) in enumerate(is_number_flags): if not is_number: col_is_numeric[i] = False print ' ' print '' PrintColGroup(col_names, col_is_numeric) if opts.table: print '
%s
' if __name__ == '__main__': try: main(sys.argv) except RuntimeError, e: print >>sys.stderr, 'FATAL: %s' % e sys.exit(1)