From 8bd1f2f33888ec8a86cecd6b9d448d8e26940b63 Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Fri, 20 Sep 2024 17:16:43 +0800 Subject: [PATCH] [tests] make more tests device-agnostic (#33580) * enable * fix * add xpu skip * add marker * skip for xpu * add more * enable on accelerator * add more cases * add more tests * add more --- .../grounding_dino/test_modeling_grounding_dino.py | 8 ++++---- tests/models/llama/test_modeling_llama.py | 6 ++++-- tests/models/mistral/test_modeling_mistral.py | 5 +++-- .../recurrent_gemma/test_modeling_recurrent_gemma.py | 4 ++-- tests/models/univnet/test_modeling_univnet.py | 7 ++++--- tests/models/whisper/test_modeling_whisper.py | 9 ++++++--- tests/test_modeling_common.py | 2 +- 7 files changed, 24 insertions(+), 17 deletions(-) diff --git a/tests/models/grounding_dino/test_modeling_grounding_dino.py b/tests/models/grounding_dino/test_modeling_grounding_dino.py index 1b4970785..c6e9671dd 100644 --- a/tests/models/grounding_dino/test_modeling_grounding_dino.py +++ b/tests/models/grounding_dino/test_modeling_grounding_dino.py @@ -30,7 +30,7 @@ from transformers.file_utils import cached_property from transformers.testing_utils import ( require_timm, require_torch, - require_torch_gpu, + require_torch_accelerator, require_vision, slow, torch_device, @@ -676,7 +676,7 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase): self.assertTrue(torch.allclose(results["boxes"][0, :], expected_slice_boxes, atol=1e-2)) self.assertListEqual(results["labels"], expected_labels) - @require_torch_gpu + @require_torch_accelerator def test_inference_object_detection_head_equivalence_cpu_gpu(self): processor = self.default_processor image = prepare_img() @@ -690,8 +690,8 @@ class GroundingDinoModelIntegrationTests(unittest.TestCase): cpu_outputs = model(**encoding) # 2. run model on GPU - model.to("cuda") - encoding = encoding.to("cuda") + model.to(torch_device) + encoding = encoding.to(torch_device) with torch.no_grad(): gpu_outputs = model(**encoding) diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index c99357ff9..a21665c82 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -24,10 +24,12 @@ from parameterized import parameterized from transformers import AutoTokenizer, LlamaConfig, StaticCache, is_torch_available, set_seed from transformers.testing_utils import ( + backend_empty_cache, require_bitsandbytes, require_flash_attn, require_read_token, require_torch, + require_torch_accelerator, require_torch_gpu, require_torch_sdpa, slow, @@ -899,11 +901,11 @@ class LlamaIntegrationTest(unittest.TestCase): @slow -@require_torch_gpu +@require_torch_accelerator class Mask4DTestHard(unittest.TestCase): def tearDown(self): gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def setUp(self): model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py index 0da7ae72a..0730f8ba4 100644 --- a/tests/models/mistral/test_modeling_mistral.py +++ b/tests/models/mistral/test_modeling_mistral.py @@ -29,6 +29,7 @@ from transformers.testing_utils import ( require_flash_attn, require_read_token, require_torch, + require_torch_accelerator, require_torch_gpu, require_torch_sdpa, slow, @@ -719,14 +720,14 @@ class MistralIntegrationTest(unittest.TestCase): @slow -@require_torch_gpu +@require_torch_accelerator class Mask4DTestHard(unittest.TestCase): model_name = "mistralai/Mistral-7B-v0.1" _model = None def tearDown(self): gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) @property def model(self): diff --git a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py index 1a58ee297..23dace68c 100644 --- a/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py +++ b/tests/models/recurrent_gemma/test_modeling_recurrent_gemma.py @@ -21,7 +21,7 @@ from transformers.testing_utils import ( require_bitsandbytes, require_read_token, require_torch, - require_torch_gpu, + require_torch_accelerator, slow, torch_device, ) @@ -418,7 +418,7 @@ class RecurrentGemmaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineT pass -@require_torch_gpu +@require_torch_accelerator @slow class RecurrentGemmaIntegrationTest(unittest.TestCase): input_text = ["Hello I am doing", "Hi today"] diff --git a/tests/models/univnet/test_modeling_univnet.py b/tests/models/univnet/test_modeling_univnet.py index e160c799b..f26a423a1 100644 --- a/tests/models/univnet/test_modeling_univnet.py +++ b/tests/models/univnet/test_modeling_univnet.py @@ -21,9 +21,10 @@ from datasets import Audio, load_dataset from transformers import UnivNetConfig, UnivNetFeatureExtractor from transformers.testing_utils import ( + backend_empty_cache, is_torch_available, require_torch, - require_torch_gpu, + require_torch_accelerator, slow, torch_device, ) @@ -207,13 +208,13 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase): self.assertTrue(outputs.shape[0] == 1, msg="Unbatched input should create batched output with bsz = 1") -@require_torch_gpu +@require_torch_accelerator @slow class UnivNetModelIntegrationTests(unittest.TestCase): def tearDown(self): super().tearDown() gc.collect() - torch.cuda.empty_cache() + backend_empty_cache(torch_device) def _load_datasamples(self, num_samples, sampling_rate=24000): ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index 70b38d3bf..4bcf4252a 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -34,10 +34,12 @@ from transformers.testing_utils import ( is_flaky, is_pt_flax_cross_test, require_flash_attn, + require_non_xpu, require_torch, + require_torch_accelerator, require_torch_fp16, require_torch_gpu, - require_torch_multi_gpu, + require_torch_multi_accelerator, require_torchaudio, slow, torch_device, @@ -2612,6 +2614,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): self.assertTrue(prompt in text) + @require_non_xpu @slow @require_torch_gpu def test_speculative_decoding_distil(self): @@ -3239,7 +3242,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): for i in range(num_samples): assert decoded_all[i] == EXPECTED_TEXT[i] - @require_torch_gpu + @require_torch_accelerator @slow def test_whisper_empty_longform(self): processor = WhisperProcessor.from_pretrained("openai/whisper-tiny") @@ -3278,7 +3281,7 @@ class WhisperModelIntegrationTests(unittest.TestCase): torch.manual_seed(0) model.generate(**inputs, **gen_kwargs) - @require_torch_multi_gpu + @require_torch_multi_accelerator @slow def test_whisper_empty_longform_multi_gpu(self): processor = WhisperProcessor.from_pretrained("openai/whisper-tiny") diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 1ad6e93b1..d55399a95 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -4751,7 +4751,7 @@ class ModelTesterMixin: # For now, Let's focus only on GPU for `torch.compile` @slow - @require_torch_gpu + @require_torch_accelerator @require_read_token def test_torch_compile(self): if version.parse(torch.__version__) < version.parse("2.3"):