From 8bd1f2f33888ec8a86cecd6b9d448d8e26940b63 Mon Sep 17 00:00:00 2001
From: Fanli Lin <fanli0116@gmail.com>
Date: Fri, 20 Sep 2024 17:16:43 +0800
Subject: [PATCH] [tests] make more tests device-agnostic (#33580)

* enable

* fix

* add xpu skip

* add marker

* skip for xpu

* add more

* enable on accelerator

* add more cases

* add more tests

* add more
---
 .../grounding_dino/test_modeling_grounding_dino.py       | 8 ++++----
 tests/models/llama/test_modeling_llama.py                | 6 ++++--
 tests/models/mistral/test_modeling_mistral.py            | 5 +++--
 .../recurrent_gemma/test_modeling_recurrent_gemma.py     | 4 ++--
 tests/models/univnet/test_modeling_univnet.py            | 7 ++++---
 tests/models/whisper/test_modeling_whisper.py            | 9 ++++++---
 tests/test_modeling_common.py                            | 2 +-
 7 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/tests/models/grounding_dino/test_modeling_grounding_dino.py b/tests/models/grounding_dino/test_modeling_grounding_dino.py
index 1b4970785..c6e9671dd 100644
--- a/tests/models/grounding_dino/test_modeling_grounding_dino.py
+++ b/tests/models/grounding_dino/test_modeling_grounding_dino.py
@@ -30,7 +30,7 @@ from transformers.file_utils import cached_property
 from transformers.testing_utils import (
     require_timm,
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
     require_vision,
     slow,
     torch_device,
@@ -676,7 +676,7 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase):
         self.assertTrue(torch.allclose(results["boxes"][0, :], expected_slice_boxes, atol=1e-2))
         self.assertListEqual(results["labels"], expected_labels)
 
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_inference_object_detection_head_equivalence_cpu_gpu(self):
         processor = self.default_processor
         image = prepare_img()
@@ -690,8 +690,8 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase):
             cpu_outputs = model(**encoding)
 
         # 2. run model on GPU
-        model.to("cuda")
-        encoding = encoding.to("cuda")
+        model.to(torch_device)
+        encoding = encoding.to(torch_device)
         with torch.no_grad():
             gpu_outputs = model(**encoding)
 
diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
index c99357ff9..a21665c82 100644
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -24,10 +24,12 @@ from parameterized import parameterized
 
 from transformers import AutoTokenizer, LlamaConfig, StaticCache, is_torch_available, set_seed
 from transformers.testing_utils import (
+    backend_empty_cache,
     require_bitsandbytes,
     require_flash_attn,
     require_read_token,
     require_torch,
+    require_torch_accelerator,
     require_torch_gpu,
     require_torch_sdpa,
     slow,
@@ -899,11 +901,11 @@ class LlamaIntegrationTest(unittest.TestCase):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 class Mask4DTestHard(unittest.TestCase):
     def tearDown(self):
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def setUp(self):
         model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
index 0da7ae72a..0730f8ba4 100644
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -29,6 +29,7 @@ from transformers.testing_utils import (
     require_flash_attn,
     require_read_token,
     require_torch,
+    require_torch_accelerator,
     require_torch_gpu,
     require_torch_sdpa,
     slow,
@@ -719,14 +720,14 @@ class MistralIntegrationTest(unittest.TestCase):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 class Mask4DTestHard(unittest.TestCase):
     model_name = "mistralai/Mistral-7B-v0.1"
     _model = None
 
     def tearDown(self):
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     @property
     def model(self):
diff --git a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py
index 1a58ee297..23dace68c 100644
--- a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py
+++ b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py
@@ -21,7 +21,7 @@ from transformers.testing_utils import (
     require_bitsandbytes,
     require_read_token,
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
     slow,
     torch_device,
 )
@@ -418,7 +418,7 @@ class RecurrentGemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT
         pass
 
 
-@require_torch_gpu
+@require_torch_accelerator
 @slow
 class RecurrentGemmaIntegrationTest(unittest.TestCase):
     input_text = ["Hello I am doing", "Hi today"]
diff --git a/tests/models/univnet/test_modeling_univnet.py b/tests/models/univnet/test_modeling_univnet.py
index e160c799b..f26a423a1 100644
--- a/tests/models/univnet/test_modeling_univnet.py
+++ b/tests/models/univnet/test_modeling_univnet.py
@@ -21,9 +21,10 @@ from datasets import Audio, load_dataset
 
 from transformers import UnivNetConfig, UnivNetFeatureExtractor
 from transformers.testing_utils import (
+    backend_empty_cache,
     is_torch_available,
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
     slow,
     torch_device,
 )
@@ -207,13 +208,13 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase):
             self.assertTrue(outputs.shape[0] == 1, msg="Unbatched input should create batched output with bsz = 1")
 
 
-@require_torch_gpu
+@require_torch_accelerator
 @slow
 class UnivNetModelIntegrationTests(unittest.TestCase):
     def tearDown(self):
         super().tearDown()
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def _load_datasamples(self, num_samples, sampling_rate=24000):
         ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py
index 70b38d3bf..4bcf4252a 100644
--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -34,10 +34,12 @@ from transformers.testing_utils import (
     is_flaky,
     is_pt_flax_cross_test,
     require_flash_attn,
+    require_non_xpu,
     require_torch,
+    require_torch_accelerator,
     require_torch_fp16,
     require_torch_gpu,
-    require_torch_multi_gpu,
+    require_torch_multi_accelerator,
     require_torchaudio,
     slow,
     torch_device,
@@ -2612,6 +2614,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
 
         self.assertTrue(prompt in text)
 
+    @require_non_xpu
     @slow
     @require_torch_gpu
     def test_speculative_decoding_distil(self):
@@ -3239,7 +3242,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
         for i in range(num_samples):
             assert decoded_all[i] == EXPECTED_TEXT[i]
 
-    @require_torch_gpu
+    @require_torch_accelerator
     @slow
     def test_whisper_empty_longform(self):
         processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
@@ -3278,7 +3281,7 @@ class WhisperModelIntegrationTests(unittest.TestCase):
         torch.manual_seed(0)
         model.generate(**inputs, **gen_kwargs)
 
-    @require_torch_multi_gpu
+    @require_torch_multi_accelerator
     @slow
     def test_whisper_empty_longform_multi_gpu(self):
         processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
index 1ad6e93b1..d55399a95 100755
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -4751,7 +4751,7 @@ class ModelTesterMixin:
 
     # For now, Let's focus only on GPU for `torch.compile`
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     @require_read_token
     def test_torch_compile(self):
         if version.parse(torch.__version__) < version.parse("2.3"):