From 577708e6dea122115a081f3e220dd40b68be8954 Mon Sep 17 00:00:00 2001 From: iupaikov-amd Date: Thu, 16 Jan 2025 20:46:06 +0000 Subject: [PATCH] Unskipped multiple inductor tests for ROCm (#143581) All of them should be fine to run now after the triton fix. Pull Request resolved: https://github.com/pytorch/pytorch/pull/143581 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily --- test/inductor/test_flex_decoding.py | 1 - test/inductor/test_inductor_freezing.py | 1 - test/inductor/test_max_autotune.py | 6 ------ test/inductor/test_memory_planning.py | 8 +------- test/inductor/test_pattern_matcher.py | 2 -- test/inductor/test_select_algorithm.py | 3 --- test/inductor/test_triton_kernels.py | 4 ---- 7 files changed, 1 insertion(+), 24 deletions(-) diff --git a/test/inductor/test_flex_decoding.py b/test/inductor/test_flex_decoding.py index f633cf95b58..42a19d36d9a 100644 --- a/test/inductor/test_flex_decoding.py +++ b/test/inductor/test_flex_decoding.py @@ -1354,7 +1354,6 @@ def forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1): self.run_test(bias_mod) self.run_test_with_paged_attention(bias_mod) - @skipIfRocm @supported_platform def test_fully_masked_out_rows_0_check_gqa(self): # Ensure fully masked out rows won't cause NaNs. diff --git a/test/inductor/test_inductor_freezing.py b/test/inductor/test_inductor_freezing.py index ba1da8e85d0..ecfe02e37ff 100644 --- a/test/inductor/test_inductor_freezing.py +++ b/test/inductor/test_inductor_freezing.py @@ -712,7 +712,6 @@ class OptimizeForInferenceTemplate(TestCase): self.assertEqual(eager, compiled) self.assertTrue(weight_ref() is None) - @skipIfRocm def test_conv_with_as_strided(self): class Model(nn.Module): def __init__(self, groups): diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py index ac1d209b598..e4e5bba7057 100644 --- a/test/inductor/test_max_autotune.py +++ b/test/inductor/test_max_autotune.py @@ -305,7 +305,6 @@ class TestMaxAutotune(TestCase): with config.patch({"max_autotune": True}): torch.compile(mm, dynamic=dynamic)(a, b) - @skipIfRocm def test_precompilation_threads(self): import threading from typing import Any, Dict @@ -481,7 +480,6 @@ class TestMaxAutotune(TestCase): with config.patch({"max_autotune": True}): torch.compile(addmm, dynamic=dynamic)(x, a, b) - @skipIfRocm def test_autotune_conv1x1(self): # Assuming input has 3 channels and we want to produce 16 channels as output conv1x1 = ( @@ -512,7 +510,6 @@ class TestMaxAutotune(TestCase): FileCheck().check_not("extern_kernels.convolution").run(code[0]) self.assertEqual(conv1x1(input_tensor), out, atol=1e-2, rtol=0) - @skipIfRocm def test_filled_cache_precompile(self): def fn(a, b, c): a = (a @ b) @ c @@ -531,7 +528,6 @@ class TestMaxAutotune(TestCase): fn_c = torch.compile(mode="max-autotune-no-cudagraphs")(fn) self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0) - @skipIfRocm @fresh_inductor_cache() @config.patch(search_autotune_cache=True) def test_search_autotune_cache(self): @@ -547,7 +543,6 @@ class TestMaxAutotune(TestCase): self.assertEqual(fn(*inputs), fn_c(*inputs), atol=1e-2, rtol=1e-2) self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0) - @skipIfRocm @fresh_inductor_cache() @config.patch(max_autotune=True, max_fusion_size=2) def test_jit_fusion_matches_aot_fusion(self): @@ -990,7 +985,6 @@ class TestMaxAutotuneRemoteCache(TestCase): super().tearDown() PatchCaches.tearDown() - @skipIfRocm @parametrize("dynamic", (False, True)) def test_max_autotune_remote_caching(self, dynamic: bool): from unittest.mock import patch diff --git a/test/inductor/test_memory_planning.py b/test/inductor/test_memory_planning.py index 765a51d6b17..b95f396bd52 100644 --- a/test/inductor/test_memory_planning.py +++ b/test/inductor/test_memory_planning.py @@ -3,12 +3,7 @@ import sys import unittest -from torch.testing._internal.common_utils import ( - IS_CI, - IS_WINDOWS, - skipIfRocm, - skipIfXpu, -) +from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, skipIfXpu from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU, requires_gpu @@ -83,7 +78,6 @@ class TestMemoryPlanning(TestCase): ) self.assertTrue(same(f(*args), result)) - @skipIfRocm(msg="test_aot_inductor doesn't work on ROCm") @skipIfXpu(msg="aoti doesn't work on XPU") def test_aoti(self): try: diff --git a/test/inductor/test_pattern_matcher.py b/test/inductor/test_pattern_matcher.py index a3eccb175ad..06c0d2853f7 100644 --- a/test/inductor/test_pattern_matcher.py +++ b/test/inductor/test_pattern_matcher.py @@ -143,7 +143,6 @@ class TestPatternMatcher(TestCase): ref[indices], test[indices] ) # also checks that dtype is correct - @skipIfRocm @skipIfXpu @skipCUDAIf(not SM80OrLater, "need sm_80") @inductor_config.patch(force_fuse_int_mm_with_mul=True) @@ -237,7 +236,6 @@ class TestPatternMatcher(TestCase): self.assertEqual(f(inp), f_replaced(inp)) self.assertEqual(count, 2) - @skipIfRocm @skipIfXpu @skipCUDAIf(not SM80OrLater, "need sm_80") @inductor_config.patch(force_fuse_int_mm_with_mul=True) diff --git a/test/inductor/test_select_algorithm.py b/test/inductor/test_select_algorithm.py index 0edf2f58778..5ceedad5506 100644 --- a/test/inductor/test_select_algorithm.py +++ b/test/inductor/test_select_algorithm.py @@ -112,8 +112,6 @@ class TestSelectAlgorithm(TestCase): ) self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1) - # FIXME: Investigate why _int_mm_out_cuda is not compiled on ROCm - @skipIfRocm @patches def test__int_mm(self): @torch.compile @@ -296,7 +294,6 @@ class TestSelectAlgorithm(TestCase): ) self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1) - @skipIfRocm @patches @torch._inductor.config.patch(conv_1x1_as_mm=False) def test_convolution2(self): diff --git a/test/inductor/test_triton_kernels.py b/test/inductor/test_triton_kernels.py index 5b5b9421e59..25df9a86cbd 100644 --- a/test/inductor/test_triton_kernels.py +++ b/test/inductor/test_triton_kernels.py @@ -550,7 +550,6 @@ def forward(self, x_1, output_1): call_triton(output) @requires_gpu - @skipIfRocm def test_triton_kernel_dependancies(self): def call_triton( x: torch.Tensor, @@ -669,7 +668,6 @@ def forward(self, x_1, output_1): @requires_gpu @skipIfXpu - @skipIfRocm def test_triton_kernel_constants(self): @triton.jit def mulC_kernel( @@ -754,7 +752,6 @@ def forward(self, x_1, output_1): self.assertEqual(compiled_func(t1, t2, output2), torch_add) @requires_gpu - @skipIfRocm # https://github.com/pytorch/pytorch/actions/runs/10051552819/job/27782048305?pr=131431 @common_utils.parametrize("backend", ["eager", "aot_eager", "inductor"]) @patch.object( torch._inductor.config, "unsafe_ignore_unsupported_triton_autotune_args", True @@ -2434,7 +2431,6 @@ class MutationTests(torch._inductor.test_case.TestCase): ) @requires_gpu - @skipIfRocm def test_triton_kernel_inference_mode(self): def f(x, y, out): n_elements = x.numel()