mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Unskipped multiple inductor tests for ROCm (#143581)
All of them should be fine to run now after the triton fix. Pull Request resolved: https://github.com/pytorch/pytorch/pull/143581 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily <jeff.daily@amd.com>
This commit is contained in:
parent
a9bfc5f70c
commit
577708e6de
7 changed files with 1 additions and 24 deletions
|
|
@ -1354,7 +1354,6 @@ def forward(self, arg0_1, arg1_1, arg2_1, arg3_1, arg4_1):
|
|||
self.run_test(bias_mod)
|
||||
self.run_test_with_paged_attention(bias_mod)
|
||||
|
||||
@skipIfRocm
|
||||
@supported_platform
|
||||
def test_fully_masked_out_rows_0_check_gqa(self):
|
||||
# Ensure fully masked out rows won't cause NaNs.
|
||||
|
|
|
|||
|
|
@ -712,7 +712,6 @@ class OptimizeForInferenceTemplate(TestCase):
|
|||
self.assertEqual(eager, compiled)
|
||||
self.assertTrue(weight_ref() is None)
|
||||
|
||||
@skipIfRocm
|
||||
def test_conv_with_as_strided(self):
|
||||
class Model(nn.Module):
|
||||
def __init__(self, groups):
|
||||
|
|
|
|||
|
|
@ -305,7 +305,6 @@ class TestMaxAutotune(TestCase):
|
|||
with config.patch({"max_autotune": True}):
|
||||
torch.compile(mm, dynamic=dynamic)(a, b)
|
||||
|
||||
@skipIfRocm
|
||||
def test_precompilation_threads(self):
|
||||
import threading
|
||||
from typing import Any, Dict
|
||||
|
|
@ -481,7 +480,6 @@ class TestMaxAutotune(TestCase):
|
|||
with config.patch({"max_autotune": True}):
|
||||
torch.compile(addmm, dynamic=dynamic)(x, a, b)
|
||||
|
||||
@skipIfRocm
|
||||
def test_autotune_conv1x1(self):
|
||||
# Assuming input has 3 channels and we want to produce 16 channels as output
|
||||
conv1x1 = (
|
||||
|
|
@ -512,7 +510,6 @@ class TestMaxAutotune(TestCase):
|
|||
FileCheck().check_not("extern_kernels.convolution").run(code[0])
|
||||
self.assertEqual(conv1x1(input_tensor), out, atol=1e-2, rtol=0)
|
||||
|
||||
@skipIfRocm
|
||||
def test_filled_cache_precompile(self):
|
||||
def fn(a, b, c):
|
||||
a = (a @ b) @ c
|
||||
|
|
@ -531,7 +528,6 @@ class TestMaxAutotune(TestCase):
|
|||
fn_c = torch.compile(mode="max-autotune-no-cudagraphs")(fn)
|
||||
self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0)
|
||||
|
||||
@skipIfRocm
|
||||
@fresh_inductor_cache()
|
||||
@config.patch(search_autotune_cache=True)
|
||||
def test_search_autotune_cache(self):
|
||||
|
|
@ -547,7 +543,6 @@ class TestMaxAutotune(TestCase):
|
|||
self.assertEqual(fn(*inputs), fn_c(*inputs), atol=1e-2, rtol=1e-2)
|
||||
self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0)
|
||||
|
||||
@skipIfRocm
|
||||
@fresh_inductor_cache()
|
||||
@config.patch(max_autotune=True, max_fusion_size=2)
|
||||
def test_jit_fusion_matches_aot_fusion(self):
|
||||
|
|
@ -990,7 +985,6 @@ class TestMaxAutotuneRemoteCache(TestCase):
|
|||
super().tearDown()
|
||||
PatchCaches.tearDown()
|
||||
|
||||
@skipIfRocm
|
||||
@parametrize("dynamic", (False, True))
|
||||
def test_max_autotune_remote_caching(self, dynamic: bool):
|
||||
from unittest.mock import patch
|
||||
|
|
|
|||
|
|
@ -3,12 +3,7 @@
|
|||
import sys
|
||||
import unittest
|
||||
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_CI,
|
||||
IS_WINDOWS,
|
||||
skipIfRocm,
|
||||
skipIfXpu,
|
||||
)
|
||||
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, skipIfXpu
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU, requires_gpu
|
||||
|
||||
|
||||
|
|
@ -83,7 +78,6 @@ class TestMemoryPlanning(TestCase):
|
|||
)
|
||||
self.assertTrue(same(f(*args), result))
|
||||
|
||||
@skipIfRocm(msg="test_aot_inductor doesn't work on ROCm")
|
||||
@skipIfXpu(msg="aoti doesn't work on XPU")
|
||||
def test_aoti(self):
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -143,7 +143,6 @@ class TestPatternMatcher(TestCase):
|
|||
ref[indices], test[indices]
|
||||
) # also checks that dtype is correct
|
||||
|
||||
@skipIfRocm
|
||||
@skipIfXpu
|
||||
@skipCUDAIf(not SM80OrLater, "need sm_80")
|
||||
@inductor_config.patch(force_fuse_int_mm_with_mul=True)
|
||||
|
|
@ -237,7 +236,6 @@ class TestPatternMatcher(TestCase):
|
|||
self.assertEqual(f(inp), f_replaced(inp))
|
||||
self.assertEqual(count, 2)
|
||||
|
||||
@skipIfRocm
|
||||
@skipIfXpu
|
||||
@skipCUDAIf(not SM80OrLater, "need sm_80")
|
||||
@inductor_config.patch(force_fuse_int_mm_with_mul=True)
|
||||
|
|
|
|||
|
|
@ -112,8 +112,6 @@ class TestSelectAlgorithm(TestCase):
|
|||
)
|
||||
self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)
|
||||
|
||||
# FIXME: Investigate why _int_mm_out_cuda is not compiled on ROCm
|
||||
@skipIfRocm
|
||||
@patches
|
||||
def test__int_mm(self):
|
||||
@torch.compile
|
||||
|
|
@ -296,7 +294,6 @@ class TestSelectAlgorithm(TestCase):
|
|||
)
|
||||
self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)
|
||||
|
||||
@skipIfRocm
|
||||
@patches
|
||||
@torch._inductor.config.patch(conv_1x1_as_mm=False)
|
||||
def test_convolution2(self):
|
||||
|
|
|
|||
|
|
@ -550,7 +550,6 @@ def forward(self, x_1, output_1):
|
|||
call_triton(output)
|
||||
|
||||
@requires_gpu
|
||||
@skipIfRocm
|
||||
def test_triton_kernel_dependancies(self):
|
||||
def call_triton(
|
||||
x: torch.Tensor,
|
||||
|
|
@ -669,7 +668,6 @@ def forward(self, x_1, output_1):
|
|||
|
||||
@requires_gpu
|
||||
@skipIfXpu
|
||||
@skipIfRocm
|
||||
def test_triton_kernel_constants(self):
|
||||
@triton.jit
|
||||
def mulC_kernel(
|
||||
|
|
@ -754,7 +752,6 @@ def forward(self, x_1, output_1):
|
|||
self.assertEqual(compiled_func(t1, t2, output2), torch_add)
|
||||
|
||||
@requires_gpu
|
||||
@skipIfRocm # https://github.com/pytorch/pytorch/actions/runs/10051552819/job/27782048305?pr=131431
|
||||
@common_utils.parametrize("backend", ["eager", "aot_eager", "inductor"])
|
||||
@patch.object(
|
||||
torch._inductor.config, "unsafe_ignore_unsupported_triton_autotune_args", True
|
||||
|
|
@ -2434,7 +2431,6 @@ class MutationTests(torch._inductor.test_case.TestCase):
|
|||
)
|
||||
|
||||
@requires_gpu
|
||||
@skipIfRocm
|
||||
def test_triton_kernel_inference_mode(self):
|
||||
def f(x, y, out):
|
||||
n_elements = x.numel()
|
||||
|
|
|
|||
Loading…
Reference in a new issue