xref: /aosp_15_r20/external/pytorch/benchmarks/profiler_benchmark/profiler_bench.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1import argparse
2import sys
3import timeit
4
5import torch
6from torch.utils.benchmark import Timer
7
8
9PARALLEL_TASKS_NUM = 4
10INTERNAL_ITER = None
11
12
13def loop_workload(x):
14    for i in range(INTERNAL_ITER):
15        x = torch.mm(x, x)
16    return x
17
18
19def parallel_workload(x):
20    def parallel_task(x):
21        for i in range(int(INTERNAL_ITER / PARALLEL_TASKS_NUM)):
22            x = torch.mm(x, x)
23        return x
24
25    futs = []
26    for i in range(PARALLEL_TASKS_NUM):
27        futs.append(torch.jit._fork(parallel_task, x))
28    for i in range(PARALLEL_TASKS_NUM):
29        torch.jit._wait(futs[i])
30    return x
31
32
33if __name__ == "__main__":
34    torch._C._set_graph_executor_optimize(False)
35    parser = argparse.ArgumentParser(description="Profiler benchmark")
36
37    parser.add_argument("--with-cuda", "--with_cuda", action="store_true")
38    parser.add_argument("--with-stack", "--with_stack", action="store_true")
39    parser.add_argument("--use-script", "--use_script", action="store_true")
40    parser.add_argument("--use-kineto", "--use_kineto", action="store_true")
41    parser.add_argument(
42        "--profiling-tensor-size", "--profiling_tensor_size", default=1, type=int
43    )
44    parser.add_argument("--workload", "--workload", default="loop", type=str)
45    parser.add_argument("--internal-iter", "--internal_iter", default=256, type=int)
46    parser.add_argument(
47        "--timer-min-run-time", "--timer_min_run_time", default=10, type=int
48    )
49    parser.add_argument("--cuda-only", "--cuda_only", action="store_true")
50
51    args = parser.parse_args()
52
53    if args.with_cuda and not torch.cuda.is_available():
54        print("No CUDA available")
55        sys.exit()
56
57    print(
58        f"Payload: {args.workload}, {args.internal_iter} iterations; timer min. runtime = {args.timer_min_run_time}\n"
59    )
60    INTERNAL_ITER = args.internal_iter
61
62    for profiling_enabled in [False, True]:
63        print(
64            "Profiling {}, tensor size {}x{}, use cuda: {}, use kineto: {}, with stacks: {}, use script: {}".format(
65                "enabled" if profiling_enabled else "disabled",
66                args.profiling_tensor_size,
67                args.profiling_tensor_size,
68                args.with_cuda,
69                args.use_kineto,
70                args.with_stack,
71                args.use_script,
72            )
73        )
74
75        input_x = torch.rand(args.profiling_tensor_size, args.profiling_tensor_size)
76
77        if args.with_cuda:
78            input_x = input_x.cuda()
79
80        workload = None
81        assert args.workload in ["loop", "parallel"]
82        if args.workload == "loop":
83            workload = loop_workload
84        else:
85            workload = parallel_workload
86
87        if args.use_script:
88            traced_workload = torch.jit.trace(workload, (input_x,))
89            workload = traced_workload
90
91        if profiling_enabled:
92
93            def payload():
94                x = None
95                with torch.autograd.profiler.profile(
96                    use_cuda=args.with_cuda,
97                    with_stack=args.with_stack,
98                    use_kineto=args.use_kineto,
99                    use_cpu=not args.cuda_only,
100                ) as prof:
101                    x = workload(input_x)
102                return x
103
104        else:
105
106            def payload():
107                return workload(input_x)
108
109        t = Timer(
110            "payload()",
111            globals={"payload": payload},
112            timer=timeit.default_timer,
113        ).blocked_autorange(min_run_time=args.timer_min_run_time)
114        print(t)
115