[tests] make more tests device-agnostic (#33580)

* enable * fix * add xpu skip * add marker * skip for xpu * add more * enable on accelerator * add more cases * add more tests * add more
2026-05-14 20:58:08 +00:00 · 2024-09-20 17:16:43 +08:00 · 2024-09-20 17:16:43 +08:00 · 8bd1f2f338
commit 8bd1f2f338
parent 31650a53a1
7 changed files with 24 additions and 17 deletions
--- a/tests/models/grounding_dino/test_modeling_grounding_dino.py
+++ b/tests/models/grounding_dino/test_modeling_grounding_dino.py
@ -30,7 +30,7 @@ from transformers.file_utils import cached_property
 from transformers.testing_utils import (
    require_timm,
    require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
    require_vision,
    slow,
    torch_device,
@ -676,7 +676,7 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase):
        self.assertTrue(torch.allclose(results["boxes"][0, :], expected_slice_boxes, atol=1e-2))
        self.assertListEqual(results["labels"], expected_labels)

-    @require_torch_gpu
+    @require_torch_accelerator
    def test_inference_object_detection_head_equivalence_cpu_gpu(self):
        processor = self.default_processor
        image = prepare_img()
@ -690,8 +690,8 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase):
            cpu_outputs = model(**encoding)

        # 2. run model on GPU
-        model.to("cuda")
-        encoding = encoding.to("cuda")
+        model.to(torch_device)
+        encoding = encoding.to(torch_device)
        with torch.no_grad():
            gpu_outputs = model(**encoding)

--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@ -24,10 +24,12 @@ from parameterized import parameterized

 from transformers import AutoTokenizer, LlamaConfig, StaticCache, is_torch_available, set_seed
 from transformers.testing_utils import (
+    backend_empty_cache,
    require_bitsandbytes,
    require_flash_attn,
    require_read_token,
    require_torch,
+    require_torch_accelerator,
    require_torch_gpu,
    require_torch_sdpa,
    slow,
@ -899,11 +901,11 @@ class LlamaIntegrationTest(unittest.TestCase):


@slow
-@require_torch_gpu
+@require_torch_accelerator
 class Mask4DTestHard(unittest.TestCase):
    def tearDown(self):
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def setUp(self):
        model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@ -29,6 +29,7 @@ from transformers.testing_utils import (
    require_flash_attn,
    require_read_token,
    require_torch,
+    require_torch_accelerator,
    require_torch_gpu,
    require_torch_sdpa,
    slow,
@ -719,14 +720,14 @@ class MistralIntegrationTest(unittest.TestCase):


@slow
-@require_torch_gpu
+@require_torch_accelerator
 class Mask4DTestHard(unittest.TestCase):
    model_name = "mistralai/Mistral-7B-v0.1"
    _model = None

    def tearDown(self):
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    @property
    def model(self):
--- a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py
+++ b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py
@ -21,7 +21,7 @@ from transformers.testing_utils import (
    require_bitsandbytes,
    require_read_token,
    require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
    slow,
    torch_device,
 )
@ -418,7 +418,7 @@ class RecurrentGemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
        pass


-@require_torch_gpu
+@require_torch_accelerator
@slow
 class RecurrentGemmaIntegrationTest(unittest.TestCase):
    input_text = ["Hello I am doing", "Hi today"]
--- a/tests/models/univnet/test_modeling_univnet.py
+++ b/tests/models/univnet/test_modeling_univnet.py
@ -21,9 +21,10 @@ from datasets import Audio, load_dataset

 from transformers import UnivNetConfig, UnivNetFeatureExtractor
 from transformers.testing_utils import (
+    backend_empty_cache,
    is_torch_available,
    require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
    slow,
    torch_device,
 )
@ -207,13 +208,13 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase):
            self.assertTrue(outputs.shape[0] == 1, msg="Unbatched input should create batched output with bsz = 1")


-@require_torch_gpu
+@require_torch_accelerator
@slow
 class UnivNetModelIntegrationTests(unittest.TestCase):
    def tearDown(self):
        super().tearDown()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def _load_datasamples(self, num_samples, sampling_rate=24000):
        ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@ -34,10 +34,12 @@ from transformers.testing_utils import (
    is_flaky,
    is_pt_flax_cross_test,
    require_flash_attn,
+    require_non_xpu,
    require_torch,
+    require_torch_accelerator,
    require_torch_fp16,
    require_torch_gpu,
-    require_torch_multi_gpu,
+    require_torch_multi_accelerator,
    require_torchaudio,
    slow,
    torch_device,
@ -2612,6 +2614,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):

        self.assertTrue(prompt in text)

+    @require_non_xpu
    @slow
    @require_torch_gpu
    def test_speculative_decoding_distil(self):
@ -3239,7 +3242,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        for i in range(num_samples):
            assert decoded_all[i] == EXPECTED_TEXT[i]

-    @require_torch_gpu
+    @require_torch_accelerator
    @slow
    def test_whisper_empty_longform(self):
        processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
@ -3278,7 +3281,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
        torch.manual_seed(0)
        model.generate(**inputs, **gen_kwargs)

-    @require_torch_multi_gpu
+    @require_torch_multi_accelerator
    @slow
    def test_whisper_empty_longform_multi_gpu(self):
        processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@ -4751,7 +4751,7 @@ class ModelTesterMixin:

    # For now, Let's focus only on GPU for `torch.compile`
    @slow
-    @require_torch_gpu
+    @require_torch_accelerator
    @require_read_token
    def test_torch_compile(self):
        if version.parse(torch.__version__) < version.parse("2.3"):