Unskipped multiple inductor tests for ROCm (#143581)

All of them should be fine to run now after the triton fix.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/143581
Approved by: https://github.com/jeffdaily

Co-authored-by: Jeff Daily <jeff.daily@amd.com>
This commit is contained in:
iupaikov-amd 2025-01-16 20:46:06 +00:00 committed by PyTorch MergeBot
parent a9bfc5f70c
commit 577708e6de
7 changed files with 1 additions and 24 deletions

View file

@ -1354,7 +1354,6 @@ def forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1):
self.run_test(bias_mod)
self.run_test_with_paged_attention(bias_mod)
@skipIfRocm
@supported_platform
def test_fully_masked_out_rows_0_check_gqa(self):
# Ensure fully masked out rows won't cause NaNs.

View file

@ -712,7 +712,6 @@ class OptimizeForInferenceTemplate(TestCase):
self.assertEqual(eager, compiled)
self.assertTrue(weight_ref() is None)
@skipIfRocm
def test_conv_with_as_strided(self):
class Model(nn.Module):
def __init__(self, groups):

View file

@ -305,7 +305,6 @@ class TestMaxAutotune(TestCase):
with config.patch({"max_autotune": True}):
torch.compile(mm, dynamic=dynamic)(a, b)
@skipIfRocm
def test_precompilation_threads(self):
import threading
from typing import Any, Dict
@ -481,7 +480,6 @@ class TestMaxAutotune(TestCase):
with config.patch({"max_autotune": True}):
torch.compile(addmm, dynamic=dynamic)(x, a, b)
@skipIfRocm
def test_autotune_conv1x1(self):
# Assuming input has 3 channels and we want to produce 16 channels as output
conv1x1 = (
@ -512,7 +510,6 @@ class TestMaxAutotune(TestCase):
FileCheck().check_not("extern_kernels.convolution").run(code[0])
self.assertEqual(conv1x1(input_tensor), out, atol=1e-2, rtol=0)
@skipIfRocm
def test_filled_cache_precompile(self):
def fn(a, b, c):
a = (a @ b) @ c
@ -531,7 +528,6 @@ class TestMaxAutotune(TestCase):
fn_c = torch.compile(mode="max-autotune-no-cudagraphs")(fn)
self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0)
@skipIfRocm
@fresh_inductor_cache()
@config.patch(search_autotune_cache=True)
def test_search_autotune_cache(self):
@ -547,7 +543,6 @@ class TestMaxAutotune(TestCase):
self.assertEqual(fn(*inputs), fn_c(*inputs), atol=1e-2, rtol=1e-2)
self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0)
@skipIfRocm
@fresh_inductor_cache()
@config.patch(max_autotune=True, max_fusion_size=2)
def test_jit_fusion_matches_aot_fusion(self):
@ -990,7 +985,6 @@ class TestMaxAutotuneRemoteCache(TestCase):
super().tearDown()
PatchCaches.tearDown()
@skipIfRocm
@parametrize("dynamic", (False, True))
def test_max_autotune_remote_caching(self, dynamic: bool):
from unittest.mock import patch

View file

@ -3,12 +3,7 @@
import sys
import unittest
from torch.testing._internal.common_utils import (
IS_CI,
IS_WINDOWS,
skipIfRocm,
skipIfXpu,
)
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, skipIfXpu
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU, requires_gpu
@ -83,7 +78,6 @@ class TestMemoryPlanning(TestCase):
)
self.assertTrue(same(f(*args), result))
@skipIfRocm(msg="test_aot_inductor doesn't work on ROCm")
@skipIfXpu(msg="aoti doesn't work on XPU")
def test_aoti(self):
try:

View file

@ -143,7 +143,6 @@ class TestPatternMatcher(TestCase):
ref[indices], test[indices]
) # also checks that dtype is correct
@skipIfRocm
@skipIfXpu
@skipCUDAIf(not SM80OrLater, "need sm_80")
@inductor_config.patch(force_fuse_int_mm_with_mul=True)
@ -237,7 +236,6 @@ class TestPatternMatcher(TestCase):
self.assertEqual(f(inp), f_replaced(inp))
self.assertEqual(count, 2)
@skipIfRocm
@skipIfXpu
@skipCUDAIf(not SM80OrLater, "need sm_80")
@inductor_config.patch(force_fuse_int_mm_with_mul=True)

View file

@ -112,8 +112,6 @@ class TestSelectAlgorithm(TestCase):
)
self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)
# FIXME: Investigate why _int_mm_out_cuda is not compiled on ROCm
@skipIfRocm
@patches
def test__int_mm(self):
@torch.compile
@ -296,7 +294,6 @@ class TestSelectAlgorithm(TestCase):
)
self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)
@skipIfRocm
@patches
@torch._inductor.config.patch(conv_1x1_as_mm=False)
def test_convolution2(self):

View file

@ -550,7 +550,6 @@ def forward(self, x_1, output_1):
call_triton(output)
@requires_gpu
@skipIfRocm
def test_triton_kernel_dependancies(self):
def call_triton(
x: torch.Tensor,
@ -669,7 +668,6 @@ def forward(self, x_1, output_1):
@requires_gpu
@skipIfXpu
@skipIfRocm
def test_triton_kernel_constants(self):
@triton.jit
def mulC_kernel(
@ -754,7 +752,6 @@ def forward(self, x_1, output_1):
self.assertEqual(compiled_func(t1, t2, output2), torch_add)
@requires_gpu
@skipIfRocm # https://github.com/pytorch/pytorch/actions/runs/10051552819/job/27782048305?pr=131431
@common_utils.parametrize("backend", ["eager", "aot_eager", "inductor"])
@patch.object(
torch._inductor.config, "unsafe_ignore_unsupported_triton_autotune_args", True
@ -2434,7 +2431,6 @@ class MutationTests(torch._inductor.test_case.TestCase):
)
@requires_gpu
@skipIfRocm
def test_triton_kernel_inference_mode(self):
def f(x, y, out):
n_elements = x.numel()