xref: /aosp_15_r20/external/pytorch/test/inductor/test_minifier.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1# Owner(s): ["module: inductor"]
2import unittest
3from unittest.mock import patch
4
5import torch._dynamo.config as dynamo_config
6import torch._inductor.config as inductor_config
7from torch._dynamo.test_minifier_common import MinifierTestBase
8from torch._inductor import config
9from torch.testing._internal.common_utils import IS_JETSON, IS_MACOS, TEST_WITH_ASAN
10from torch.testing._internal.inductor_utils import GPU_TYPE
11from torch.testing._internal.triton_utils import requires_gpu
12
13
14class MinifierTests(MinifierTestBase):
15    # Test that compile and accuracy errors after aot can be repro'd (both CPU and CUDA)
16    def _test_after_aot(self, device, expected_error):
17        # NB: The program is intentionally quite simple, just enough to
18        # trigger one minification step, no more (dedicated minifier tests
19        # should exercise minifier only)
20        run_code = f"""\
21@torch.compile()
22def inner(x):
23    x = torch.relu(x)
24    x = torch.cos(x)
25    return x
26
27inner(torch.randn(20, 20).to("{device}"))
28"""
29        self._run_full_test(run_code, "aot", expected_error, isolate=False)
30
31    @unittest.skipIf(IS_JETSON, "Fails on Jetson")
32    @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "compile_error")
33    def test_after_aot_cpu_compile_error(self):
34        self._test_after_aot("cpu", "CppCompileError")
35
36    @unittest.skipIf(IS_JETSON, "Fails on Jetson")
37    @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
38    def test_after_aot_cpu_accuracy_error(self):
39        self._test_after_aot("cpu", "AccuracyError")
40
41    @requires_gpu
42    @inductor_config.patch("triton.inject_relu_bug_TESTING_ONLY", "compile_error")
43    def test_after_aot_gpu_compile_error(self):
44        self._test_after_aot(GPU_TYPE, "SyntaxError")
45
46    @requires_gpu
47    @inductor_config.patch("triton.inject_relu_bug_TESTING_ONLY", "accuracy")
48    def test_after_aot_gpu_accuracy_error(self):
49        self._test_after_aot(GPU_TYPE, "AccuracyError")
50
51    @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
52    def test_constant_in_graph(self):
53        run_code = """\
54@torch.compile()
55def inner(x):
56    return torch.tensor(2) + torch.relu(x)
57
58inner(torch.randn(2))
59"""
60        self._run_full_test(run_code, "aot", "AccuracyError", isolate=False)
61
62    @requires_gpu
63    @patch.object(config, "joint_graph_constant_folding", False)
64    def test_rmse_improves_over_atol(self):
65        # From https://twitter.com/itsclivetime/status/1651135821045719041?s=20
66        run_code = """
67@torch.compile()
68def inner(x):
69    return x - torch.tensor(655, dtype=torch.half, device='GPU_TYPE') * 100
70
71inner(torch.tensor(655 * 100, dtype=torch.half, device='GPU_TYPE'))
72""".replace(
73            "GPU_TYPE", GPU_TYPE
74        )
75
76        # If we disable RMSE against fp64, this triggers accuracy error,
77        # as the increased precision from torch.compile changes the result
78        # of 655 * 100
79        with dynamo_config.patch("same_two_models_use_fp64", False):
80            self._run_full_test(
81                run_code,
82                "aot",
83                "AccuracyError",
84                isolate=False,
85                # NB: need this to avoid refusing to minify when fp64 doesn't work
86                # (which it doesn't, due to the config patch above)
87                minifier_args=["--strict-accuracy"],
88            )
89
90        # But using fp64, we see that the intended semantics is the increased
91        # 655 * 100 precision, and so we report no problem
92        self._run_full_test(run_code, "aot", None, isolate=False)
93
94    @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
95    @inductor_config.patch("cpp.inject_log1p_bug_TESTING_ONLY", "accuracy")
96    def test_accuracy_vs_strict_accuracy(self):
97        run_code = """
98@torch.compile()
99def inner(x):
100    y = torch.log1p(x)
101    b = y > 0
102    # Need to ensure suffix removal hits a boolean output
103    b = torch.logical_not(b)
104    b = torch.logical_not(b)
105    x = torch.relu(x)
106    return torch.where(b, x, x)
107
108inner(torch.randn(20))
109"""
110
111        # Strict accuracy gets hung up on the boolean mask difference, which
112        # will localize the error to sigmoid, even though it doesn't actually
113        # matter to the end result
114        res = self._run_full_test(
115            run_code,
116            "aot",
117            "AccuracyError",
118            isolate=False,
119            minifier_args=["--strict-accuracy"],
120        )
121        self.assertExpectedInline(
122            res.repro_module(),
123            """\
124class Repro(torch.nn.Module):
125    def __init__(self) -> None:
126        super().__init__()
127
128    def forward(self, arg0_1):
129        log1p = torch.ops.aten.log1p.default(arg0_1);  arg0_1 = None
130        return (log1p,)""",
131        )
132
133        # FP accuracy will refuse to promote the logical_not on the outputs,
134        # and so you'll get to the relu (unless the minifier somehow tries
135        # removing entire suffix except the log1p first!)
136        res = self._run_full_test(run_code, "aot", "AccuracyError", isolate=False)
137        self.assertExpectedInline(
138            res.repro_module(),
139            """\
140class Repro(torch.nn.Module):
141    def __init__(self) -> None:
142        super().__init__()
143
144    def forward(self, arg0_1):
145        relu = torch.ops.aten.relu.default(arg0_1);  arg0_1 = None
146        return (relu,)""",
147        )
148
149    @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
150    def test_offload_to_disk(self):
151        # Just a smoketest, this doesn't actually test that memory
152        # usage went down.  Test case is carefully constructed to hit
153        # delta debugging.
154        run_code = """\
155@torch.compile()
156def inner(x):
157    x = torch.sin(x)
158    x = torch.sin(x)
159    x = torch.cos(x)
160    x = torch.relu(x)
161    return x
162
163inner(torch.randn(20, 20))
164"""
165        self._run_full_test(
166            run_code,
167            "aot",
168            "AccuracyError",
169            isolate=False,
170            minifier_args=["--offload-to-disk"],
171        )
172
173
174if __name__ == "__main__":
175    from torch._dynamo.test_case import run_tests
176
177    # Skip CI tests on mac since CPU inductor does not seem to work due to C++ compile errors,
178    # also skip on ASAN due to https://github.com/pytorch/pytorch/issues/98262
179    if not IS_MACOS and not TEST_WITH_ASAN:
180        run_tests()
181