1# Owner(s): ["module: inductor"] 2 3import subprocess 4import sys 5 6import torch 7import torch._inductor.async_compile # noqa: F401 required to warm up AsyncCompile pools 8from torch._inductor.codecache import PyCodeCache 9from torch._inductor.test_case import run_tests, TestCase 10from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU 11 12 13class TestTritonWrapper(TestCase): 14 def get_compiled_module(self): 15 compiled_module = None 16 for v in PyCodeCache.cache.values(): 17 if hasattr(v, "benchmark_compiled_module"): 18 self.assertTrue( 19 compiled_module is None, "Found multiple compiled modules" 20 ) 21 compiled_module = v 22 23 self.assertTrue(compiled_module is not None) 24 return compiled_module 25 26 def test_wrapper_using_gpu_seed(self): 27 """ 28 Make sure the subprocess.check_output does not throw. 29 """ 30 31 @torch.compile 32 def f(x, y): 33 # dropout will result in usage of cuda_seed 34 z = torch.nn.functional.dropout(x, 0.5) 35 return z + y 36 37 N = 10 38 x = torch.rand(N).to(device=GPU_TYPE) 39 y = torch.rand(N).to(device=GPU_TYPE) 40 out = f(x, y) 41 compiled_module = self.get_compiled_module() 42 43 # now run the compiled module in subprocess and check its output 44 bench_out = subprocess.check_output( 45 f"{sys.executable} {compiled_module.__file__}".split(), 46 stderr=subprocess.STDOUT, 47 ).decode() 48 49 self.assertTrue(len(bench_out) > 0) 50 51 52if __name__ == "__main__": 53 if HAS_GPU: 54 run_tests() 55