1# Owner(s): ["module: inductor"] 2import unittest 3from unittest.mock import patch 4 5import torch._dynamo.config as dynamo_config 6import torch._inductor.config as inductor_config 7from torch._dynamo.test_minifier_common import MinifierTestBase 8from torch._inductor import config 9from torch.testing._internal.common_utils import IS_JETSON, IS_MACOS, TEST_WITH_ASAN 10from torch.testing._internal.inductor_utils import GPU_TYPE 11from torch.testing._internal.triton_utils import requires_gpu 12 13 14class MinifierTests(MinifierTestBase): 15 # Test that compile and accuracy errors after aot can be repro'd (both CPU and CUDA) 16 def _test_after_aot(self, device, expected_error): 17 # NB: The program is intentionally quite simple, just enough to 18 # trigger one minification step, no more (dedicated minifier tests 19 # should exercise minifier only) 20 run_code = f"""\ 21@torch.compile() 22def inner(x): 23 x = torch.relu(x) 24 x = torch.cos(x) 25 return x 26 27inner(torch.randn(20, 20).to("{device}")) 28""" 29 self._run_full_test(run_code, "aot", expected_error, isolate=False) 30 31 @unittest.skipIf(IS_JETSON, "Fails on Jetson") 32 @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "compile_error") 33 def test_after_aot_cpu_compile_error(self): 34 self._test_after_aot("cpu", "CppCompileError") 35 36 @unittest.skipIf(IS_JETSON, "Fails on Jetson") 37 @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy") 38 def test_after_aot_cpu_accuracy_error(self): 39 self._test_after_aot("cpu", "AccuracyError") 40 41 @requires_gpu 42 @inductor_config.patch("triton.inject_relu_bug_TESTING_ONLY", "compile_error") 43 def test_after_aot_gpu_compile_error(self): 44 self._test_after_aot(GPU_TYPE, "SyntaxError") 45 46 @requires_gpu 47 @inductor_config.patch("triton.inject_relu_bug_TESTING_ONLY", "accuracy") 48 def test_after_aot_gpu_accuracy_error(self): 49 self._test_after_aot(GPU_TYPE, "AccuracyError") 50 51 @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy") 52 def test_constant_in_graph(self): 53 run_code = """\ 54@torch.compile() 55def inner(x): 56 return torch.tensor(2) + torch.relu(x) 57 58inner(torch.randn(2)) 59""" 60 self._run_full_test(run_code, "aot", "AccuracyError", isolate=False) 61 62 @requires_gpu 63 @patch.object(config, "joint_graph_constant_folding", False) 64 def test_rmse_improves_over_atol(self): 65 # From https://twitter.com/itsclivetime/status/1651135821045719041?s=20 66 run_code = """ 67@torch.compile() 68def inner(x): 69 return x - torch.tensor(655, dtype=torch.half, device='GPU_TYPE') * 100 70 71inner(torch.tensor(655 * 100, dtype=torch.half, device='GPU_TYPE')) 72""".replace( 73 "GPU_TYPE", GPU_TYPE 74 ) 75 76 # If we disable RMSE against fp64, this triggers accuracy error, 77 # as the increased precision from torch.compile changes the result 78 # of 655 * 100 79 with dynamo_config.patch("same_two_models_use_fp64", False): 80 self._run_full_test( 81 run_code, 82 "aot", 83 "AccuracyError", 84 isolate=False, 85 # NB: need this to avoid refusing to minify when fp64 doesn't work 86 # (which it doesn't, due to the config patch above) 87 minifier_args=["--strict-accuracy"], 88 ) 89 90 # But using fp64, we see that the intended semantics is the increased 91 # 655 * 100 precision, and so we report no problem 92 self._run_full_test(run_code, "aot", None, isolate=False) 93 94 @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy") 95 @inductor_config.patch("cpp.inject_log1p_bug_TESTING_ONLY", "accuracy") 96 def test_accuracy_vs_strict_accuracy(self): 97 run_code = """ 98@torch.compile() 99def inner(x): 100 y = torch.log1p(x) 101 b = y > 0 102 # Need to ensure suffix removal hits a boolean output 103 b = torch.logical_not(b) 104 b = torch.logical_not(b) 105 x = torch.relu(x) 106 return torch.where(b, x, x) 107 108inner(torch.randn(20)) 109""" 110 111 # Strict accuracy gets hung up on the boolean mask difference, which 112 # will localize the error to sigmoid, even though it doesn't actually 113 # matter to the end result 114 res = self._run_full_test( 115 run_code, 116 "aot", 117 "AccuracyError", 118 isolate=False, 119 minifier_args=["--strict-accuracy"], 120 ) 121 self.assertExpectedInline( 122 res.repro_module(), 123 """\ 124class Repro(torch.nn.Module): 125 def __init__(self) -> None: 126 super().__init__() 127 128 def forward(self, arg0_1): 129 log1p = torch.ops.aten.log1p.default(arg0_1); arg0_1 = None 130 return (log1p,)""", 131 ) 132 133 # FP accuracy will refuse to promote the logical_not on the outputs, 134 # and so you'll get to the relu (unless the minifier somehow tries 135 # removing entire suffix except the log1p first!) 136 res = self._run_full_test(run_code, "aot", "AccuracyError", isolate=False) 137 self.assertExpectedInline( 138 res.repro_module(), 139 """\ 140class Repro(torch.nn.Module): 141 def __init__(self) -> None: 142 super().__init__() 143 144 def forward(self, arg0_1): 145 relu = torch.ops.aten.relu.default(arg0_1); arg0_1 = None 146 return (relu,)""", 147 ) 148 149 @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy") 150 def test_offload_to_disk(self): 151 # Just a smoketest, this doesn't actually test that memory 152 # usage went down. Test case is carefully constructed to hit 153 # delta debugging. 154 run_code = """\ 155@torch.compile() 156def inner(x): 157 x = torch.sin(x) 158 x = torch.sin(x) 159 x = torch.cos(x) 160 x = torch.relu(x) 161 return x 162 163inner(torch.randn(20, 20)) 164""" 165 self._run_full_test( 166 run_code, 167 "aot", 168 "AccuracyError", 169 isolate=False, 170 minifier_args=["--offload-to-disk"], 171 ) 172 173 174if __name__ == "__main__": 175 from torch._dynamo.test_case import run_tests 176 177 # Skip CI tests on mac since CPU inductor does not seem to work due to C++ compile errors, 178 # also skip on ASAN due to https://github.com/pytorch/pytorch/issues/98262 179 if not IS_MACOS and not TEST_WITH_ASAN: 180 run_tests() 181