xref: /aosp_15_r20/external/pytorch/test/inductor/test_triton_wrapper.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1# Owner(s): ["module: inductor"]
2
3import subprocess
4import sys
5
6import torch
7import torch._inductor.async_compile  # noqa: F401 required to warm up AsyncCompile pools
8from torch._inductor.codecache import PyCodeCache
9from torch._inductor.test_case import run_tests, TestCase
10from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
11
12
13class TestTritonWrapper(TestCase):
14    def get_compiled_module(self):
15        compiled_module = None
16        for v in PyCodeCache.cache.values():
17            if hasattr(v, "benchmark_compiled_module"):
18                self.assertTrue(
19                    compiled_module is None, "Found multiple compiled modules"
20                )
21                compiled_module = v
22
23        self.assertTrue(compiled_module is not None)
24        return compiled_module
25
26    def test_wrapper_using_gpu_seed(self):
27        """
28        Make sure the subprocess.check_output does not throw.
29        """
30
31        @torch.compile
32        def f(x, y):
33            # dropout will result in usage of cuda_seed
34            z = torch.nn.functional.dropout(x, 0.5)
35            return z + y
36
37        N = 10
38        x = torch.rand(N).to(device=GPU_TYPE)
39        y = torch.rand(N).to(device=GPU_TYPE)
40        out = f(x, y)
41        compiled_module = self.get_compiled_module()
42
43        # now run the compiled module in subprocess and check its output
44        bench_out = subprocess.check_output(
45            f"{sys.executable} {compiled_module.__file__}".split(),
46            stderr=subprocess.STDOUT,
47        ).decode()
48
49        self.assertTrue(len(bench_out) > 0)
50
51
52if __name__ == "__main__":
53    if HAS_GPU:
54        run_tests()
55