xref: /aosp_15_r20/external/pytorch/benchmarks/dynamo/summarize_perf.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1import logging
2import os
3import re
4from collections import defaultdict
5
6import click
7import pandas as pd
8from tabulate import tabulate
9
10
11def gmean(s):
12    return s.product() ** (1 / len(s))
13
14
15def find_csv_files(path, perf_compare):
16    """
17    Recursively search for all CSV files in directory and subdirectories whose
18    name contains a target string.
19    """
20
21    def is_csv(f):
22        if perf_compare:
23            regex = r"training_(torchbench|huggingface|timm_models)\.csv"
24            return re.match(regex, f) is not None
25        else:
26            return f.endswith("_performance.csv")
27
28    csv_files = []
29    for root, dirs, files in os.walk(path):
30        for file in files:
31            if is_csv(file):
32                csv_files.append(os.path.join(root, file))
33    return csv_files
34
35
36@click.command()
37@click.argument("directory", default="artifacts")
38@click.option("--amp", is_flag=True)
39@click.option("--float32", is_flag=True)
40@click.option(
41    "--perf-compare",
42    is_flag=True,
43    help="Set if the CSVs were generated by running manually the action rather than picking them from the nightly job",
44)
45def main(directory, amp, float32, perf_compare):
46    """
47    Given a directory containing multiple CSVs from --performance benchmark
48    runs, aggregates and generates summary statistics similar to the web UI at
49    https://torchci-git-fork-huydhn-add-compilers-bench-74abf8-fbopensource.vercel.app/benchmark/compilers
50
51    This is most useful if you've downloaded CSVs from CI and need to quickly
52    look at aggregate stats.  The CSVs are expected to follow exactly the same
53    naming convention that is used in CI.
54
55    You may also be interested in
56    https://docs.google.com/document/d/1DQQxIgmKa3eF0HByDTLlcJdvefC4GwtsklJUgLs09fQ/edit#
57    which explains how to interpret the raw csv data.
58    """
59    dtypes = ["amp", "float32"]
60    if amp and not float32:
61        dtypes = ["amp"]
62    if float32 and not amp:
63        dtypes = ["float32"]
64
65    dfs = defaultdict(list)
66    for f in find_csv_files(directory, perf_compare):
67        try:
68            dfs[os.path.basename(f)].append(pd.read_csv(f))
69        except Exception:
70            logging.warning("failed parsing %s", f)
71            raise
72
73    # dtype -> statistic -> benchmark -> compiler -> value
74    results = defaultdict(  # dtype
75        lambda: defaultdict(  # statistic
76            lambda: defaultdict(dict)  # benchmark  # compiler -> value
77        )
78    )
79
80    for k, v in sorted(dfs.items()):
81        if perf_compare:
82            regex = r"training_(torchbench|huggingface|timm_models)\.csv"
83            m = re.match(regex, k)
84            assert m is not None, k
85            compiler = "inductor"
86            benchmark = m.group(1)
87            dtype = "float32"
88            mode = "training"
89            device = "cuda"
90        else:
91            regex = (
92                "(.+)_"
93                "(torchbench|huggingface|timm_models)_"
94                "(float32|amp)_"
95                "(inference|training)_"
96                "(cpu|cuda)_"
97                r"performance\.csv"
98            )
99            m = re.match(regex, k)
100            compiler = m.group(1)
101            benchmark = m.group(2)
102            dtype = m.group(3)
103            mode = m.group(4)
104            device = m.group(5)
105
106        df = pd.concat(v)
107        df = df.dropna().query("speedup != 0")
108
109        statistics = {
110            "speedup": gmean(df["speedup"]),
111            "comptime": df["compilation_latency"].mean(),
112            "memory": gmean(df["compression_ratio"]),
113        }
114
115        if dtype not in dtypes:
116            continue
117
118        for statistic, v in statistics.items():
119            results[f"{device} {dtype} {mode}"][statistic][benchmark][compiler] = v
120
121    descriptions = {
122        "speedup": "Geometric mean speedup",
123        "comptime": "Mean compilation time",
124        "memory": "Peak memory compression ratio",
125    }
126
127    for dtype_mode, r in results.items():
128        print(f"# {dtype_mode} performance results")
129        for statistic, data in r.items():
130            print(f"## {descriptions[statistic]}")
131
132            table = []
133            for row_name in data[next(iter(data.keys()))]:
134                row = [row_name]
135                for col_name in data:
136                    row.append(round(data[col_name][row_name], 2))
137                table.append(row)
138
139            headers = list(data.keys())
140            print(tabulate(table, headers=headers))
141            print()
142
143
144main()
145