Unskipped multiple inductor tests for ROCm (#143581)

All of them should be fine to run now after the triton fix. Pull Request resolved: https://github.com/pytorch/pytorch/pull/143581 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily <jeff.daily@amd.com>
2026-05-14 20:57:59 +00:00 · 2025-01-16 20:46:06 +00:00 · 2025-01-16 20:46:06 +00:00 · 577708e6de
commit 577708e6de
parent a9bfc5f70c
7 changed files with 1 additions and 24 deletions
--- a/test/inductor/test_flex_decoding.py
+++ b/test/inductor/test_flex_decoding.py
@ -1354,7 +1354,6 @@ def forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1):
        self.run_test(bias_mod)
        self.run_test_with_paged_attention(bias_mod)

-    @skipIfRocm
    @supported_platform
    def test_fully_masked_out_rows_0_check_gqa(self):
        # Ensure fully masked out rows won't cause NaNs.
--- a/test/inductor/test_inductor_freezing.py
+++ b/test/inductor/test_inductor_freezing.py
@ -712,7 +712,6 @@ class OptimizeForInferenceTemplate(TestCase):
        self.assertEqual(eager, compiled)
        self.assertTrue(weight_ref() is None)

-    @skipIfRocm
    def test_conv_with_as_strided(self):
        class Model(nn.Module):
            def __init__(self, groups):
--- a/test/inductor/test_max_autotune.py
+++ b/test/inductor/test_max_autotune.py
@ -305,7 +305,6 @@ class TestMaxAutotune(TestCase):
        with config.patch({"max_autotune": True}):
            torch.compile(mm, dynamic=dynamic)(a, b)

-    @skipIfRocm
    def test_precompilation_threads(self):
        import threading
        from typing import Any, Dict
@ -481,7 +480,6 @@ class TestMaxAutotune(TestCase):
        with config.patch({"max_autotune": True}):
            torch.compile(addmm, dynamic=dynamic)(x, a, b)

-    @skipIfRocm
    def test_autotune_conv1x1(self):
        # Assuming input has 3 channels and we want to produce 16 channels as output
        conv1x1 = (
@ -512,7 +510,6 @@ class TestMaxAutotune(TestCase):
            FileCheck().check_not("extern_kernels.convolution").run(code[0])
            self.assertEqual(conv1x1(input_tensor), out, atol=1e-2, rtol=0)

-    @skipIfRocm
    def test_filled_cache_precompile(self):
        def fn(a, b, c):
            a = (a @ b) @ c
@ -531,7 +528,6 @@ class TestMaxAutotune(TestCase):
        fn_c = torch.compile(mode="max-autotune-no-cudagraphs")(fn)
        self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0)

-    @skipIfRocm
    @fresh_inductor_cache()
    @config.patch(search_autotune_cache=True)
    def test_search_autotune_cache(self):
@ -547,7 +543,6 @@ class TestMaxAutotune(TestCase):
        self.assertEqual(fn(*inputs), fn_c(*inputs), atol=1e-2, rtol=1e-2)
        self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0)

-    @skipIfRocm
    @fresh_inductor_cache()
    @config.patch(max_autotune=True, max_fusion_size=2)
    def test_jit_fusion_matches_aot_fusion(self):
@ -990,7 +985,6 @@ class TestMaxAutotuneRemoteCache(TestCase):
        super().tearDown()
        PatchCaches.tearDown()

-    @skipIfRocm
    @parametrize("dynamic", (False, True))
    def test_max_autotune_remote_caching(self, dynamic: bool):
        from unittest.mock import patch
--- a/test/inductor/test_memory_planning.py
+++ b/test/inductor/test_memory_planning.py
@ -3,12 +3,7 @@
 import sys
 import unittest

-from torch.testing._internal.common_utils import (
-    IS_CI,
-    IS_WINDOWS,
-    skipIfRocm,
-    skipIfXpu,
-)
+from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, skipIfXpu
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU, requires_gpu


@ -83,7 +78,6 @@ class TestMemoryPlanning(TestCase):
        )
        self.assertTrue(same(f(*args), result))

-    @skipIfRocm(msg="test_aot_inductor doesn't work on ROCm")
    @skipIfXpu(msg="aoti doesn't work on XPU")
    def test_aoti(self):
        try:
--- a/test/inductor/test_pattern_matcher.py
+++ b/test/inductor/test_pattern_matcher.py
@ -143,7 +143,6 @@ class TestPatternMatcher(TestCase):
                ref[indices], test[indices]
            )  # also checks that dtype is correct

-    @skipIfRocm
    @skipIfXpu
    @skipCUDAIf(not SM80OrLater, "need sm_80")
    @inductor_config.patch(force_fuse_int_mm_with_mul=True)
@ -237,7 +236,6 @@ class TestPatternMatcher(TestCase):
        self.assertEqual(f(inp), f_replaced(inp))
        self.assertEqual(count, 2)

-    @skipIfRocm
    @skipIfXpu
    @skipCUDAIf(not SM80OrLater, "need sm_80")
    @inductor_config.patch(force_fuse_int_mm_with_mul=True)
--- a/test/inductor/test_select_algorithm.py
+++ b/test/inductor/test_select_algorithm.py
@ -112,8 +112,6 @@ class TestSelectAlgorithm(TestCase):
        )
        self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)

-    # FIXME: Investigate why _int_mm_out_cuda is not compiled on ROCm
-    @skipIfRocm
    @patches
    def test__int_mm(self):
        @torch.compile
@ -296,7 +294,6 @@ class TestSelectAlgorithm(TestCase):
        )
        self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)

-    @skipIfRocm
    @patches
    @torch._inductor.config.patch(conv_1x1_as_mm=False)
    def test_convolution2(self):
--- a/test/inductor/test_triton_kernels.py
+++ b/test/inductor/test_triton_kernels.py
@ -550,7 +550,6 @@ def forward(self, x_1, output_1):
        call_triton(output)

    @requires_gpu
-    @skipIfRocm
    def test_triton_kernel_dependancies(self):
        def call_triton(
            x: torch.Tensor,
@ -669,7 +668,6 @@ def forward(self, x_1, output_1):

    @requires_gpu
    @skipIfXpu
-    @skipIfRocm
    def test_triton_kernel_constants(self):
        @triton.jit
        def mulC_kernel(
@ -754,7 +752,6 @@ def forward(self, x_1, output_1):
        self.assertEqual(compiled_func(t1, t2, output2), torch_add)

    @requires_gpu
-    @skipIfRocm  # https://github.com/pytorch/pytorch/actions/runs/10051552819/job/27782048305?pr=131431
    @common_utils.parametrize("backend", ["eager", "aot_eager", "inductor"])
    @patch.object(
        torch._inductor.config, "unsafe_ignore_unsupported_triton_autotune_args", True
@ -2434,7 +2431,6 @@ class MutationTests(torch._inductor.test_case.TestCase):
        )

    @requires_gpu
-    @skipIfRocm
    def test_triton_kernel_inference_mode(self):
        def f(x, y, out):
            n_elements = x.numel()