1import logging 2import os 3import re 4from collections import defaultdict 5 6import click 7import pandas as pd 8from tabulate import tabulate 9 10 11def gmean(s): 12 return s.product() ** (1 / len(s)) 13 14 15def find_csv_files(path, perf_compare): 16 """ 17 Recursively search for all CSV files in directory and subdirectories whose 18 name contains a target string. 19 """ 20 21 def is_csv(f): 22 if perf_compare: 23 regex = r"training_(torchbench|huggingface|timm_models)\.csv" 24 return re.match(regex, f) is not None 25 else: 26 return f.endswith("_performance.csv") 27 28 csv_files = [] 29 for root, dirs, files in os.walk(path): 30 for file in files: 31 if is_csv(file): 32 csv_files.append(os.path.join(root, file)) 33 return csv_files 34 35 36@click.command() 37@click.argument("directory", default="artifacts") 38@click.option("--amp", is_flag=True) 39@click.option("--float32", is_flag=True) 40@click.option( 41 "--perf-compare", 42 is_flag=True, 43 help="Set if the CSVs were generated by running manually the action rather than picking them from the nightly job", 44) 45def main(directory, amp, float32, perf_compare): 46 """ 47 Given a directory containing multiple CSVs from --performance benchmark 48 runs, aggregates and generates summary statistics similar to the web UI at 49 https://torchci-git-fork-huydhn-add-compilers-bench-74abf8-fbopensource.vercel.app/benchmark/compilers 50 51 This is most useful if you've downloaded CSVs from CI and need to quickly 52 look at aggregate stats. The CSVs are expected to follow exactly the same 53 naming convention that is used in CI. 54 55 You may also be interested in 56 https://docs.google.com/document/d/1DQQxIgmKa3eF0HByDTLlcJdvefC4GwtsklJUgLs09fQ/edit# 57 which explains how to interpret the raw csv data. 58 """ 59 dtypes = ["amp", "float32"] 60 if amp and not float32: 61 dtypes = ["amp"] 62 if float32 and not amp: 63 dtypes = ["float32"] 64 65 dfs = defaultdict(list) 66 for f in find_csv_files(directory, perf_compare): 67 try: 68 dfs[os.path.basename(f)].append(pd.read_csv(f)) 69 except Exception: 70 logging.warning("failed parsing %s", f) 71 raise 72 73 # dtype -> statistic -> benchmark -> compiler -> value 74 results = defaultdict( # dtype 75 lambda: defaultdict( # statistic 76 lambda: defaultdict(dict) # benchmark # compiler -> value 77 ) 78 ) 79 80 for k, v in sorted(dfs.items()): 81 if perf_compare: 82 regex = r"training_(torchbench|huggingface|timm_models)\.csv" 83 m = re.match(regex, k) 84 assert m is not None, k 85 compiler = "inductor" 86 benchmark = m.group(1) 87 dtype = "float32" 88 mode = "training" 89 device = "cuda" 90 else: 91 regex = ( 92 "(.+)_" 93 "(torchbench|huggingface|timm_models)_" 94 "(float32|amp)_" 95 "(inference|training)_" 96 "(cpu|cuda)_" 97 r"performance\.csv" 98 ) 99 m = re.match(regex, k) 100 compiler = m.group(1) 101 benchmark = m.group(2) 102 dtype = m.group(3) 103 mode = m.group(4) 104 device = m.group(5) 105 106 df = pd.concat(v) 107 df = df.dropna().query("speedup != 0") 108 109 statistics = { 110 "speedup": gmean(df["speedup"]), 111 "comptime": df["compilation_latency"].mean(), 112 "memory": gmean(df["compression_ratio"]), 113 } 114 115 if dtype not in dtypes: 116 continue 117 118 for statistic, v in statistics.items(): 119 results[f"{device} {dtype} {mode}"][statistic][benchmark][compiler] = v 120 121 descriptions = { 122 "speedup": "Geometric mean speedup", 123 "comptime": "Mean compilation time", 124 "memory": "Peak memory compression ratio", 125 } 126 127 for dtype_mode, r in results.items(): 128 print(f"# {dtype_mode} performance results") 129 for statistic, data in r.items(): 130 print(f"## {descriptions[statistic]}") 131 132 table = [] 133 for row_name in data[next(iter(data.keys()))]: 134 row = [row_name] 135 for col_name in data: 136 row.append(round(data[col_name][row_name], 2)) 137 table.append(row) 138 139 headers = list(data.keys()) 140 print(tabulate(table, headers=headers)) 141 print() 142 143 144main() 145