mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
[tests] make cuda-only tests device-agnostic (#35607)
* intial commit * remove unrelated files * further remove * Update test_trainer.py * fix style
This commit is contained in:
parent
e6f9b03464
commit
2fa876d2d8
18 changed files with 57 additions and 47 deletions
|
|
@ -32,7 +32,6 @@ from transformers.testing_utils import (
|
|||
require_accelerate,
|
||||
require_fsdp,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
require_torch_multi_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
|
|
@ -288,7 +287,7 @@ class TrainerIntegrationFSDP(TestCasePlus, TrainerIntegrationCommon):
|
|||
|
||||
@require_torch_multi_accelerator
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_fsdp
|
||||
def test_fsdp_cpu_offloading(self):
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ from transformers.testing_utils import (
|
|||
require_flash_attn,
|
||||
require_optimum_quanto,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
require_torch_multi_accelerator,
|
||||
require_torch_multi_gpu,
|
||||
|
|
@ -2043,7 +2044,7 @@ class GenerationTesterMixin:
|
|||
model.generate(**generation_kwargs, **inputs_dict)
|
||||
|
||||
@pytest.mark.generate
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@slow
|
||||
def test_generate_compile_model_forward(self):
|
||||
"""
|
||||
|
|
@ -3791,10 +3792,12 @@ class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMi
|
|||
self.assertTrue(input_length <= out.shape[-1] <= input_length + 20)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_assisted_decoding_model_in_gpu_assistant_in_cpu(self):
|
||||
# PT-only test: TF doesn't support assisted decoding yet.
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to("cuda")
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(
|
||||
torch_device
|
||||
)
|
||||
assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(
|
||||
"cpu"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ from parameterized import parameterized
|
|||
from transformers import CONFIG_MAPPING, Blip2Config, Blip2QFormerConfig, Blip2VisionConfig
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_fp16,
|
||||
require_torch_gpu,
|
||||
require_torch_multi_accelerator,
|
||||
|
|
@ -1565,7 +1566,7 @@ class Blip2TextModelWithProjectionTest(ModelTesterMixin, unittest.TestCase):
|
|||
self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_model_from_pretrained(self):
|
||||
model_name = "Salesforce/blip2-itm-vit-g"
|
||||
model = Blip2TextModelWithProjection.from_pretrained(model_name)
|
||||
|
|
@ -2191,7 +2192,7 @@ class Blip2ModelIntegrationTest(unittest.TestCase):
|
|||
|
||||
self.assertTrue(generated_text_expanded == generated_text)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_inference_itm(self):
|
||||
model_name = "Salesforce/blip2-itm-vit-g"
|
||||
processor = Blip2Processor.from_pretrained(model_name)
|
||||
|
|
@ -2210,7 +2211,7 @@ class Blip2ModelIntegrationTest(unittest.TestCase):
|
|||
self.assertTrue(torch.allclose(torch.nn.Softmax()(out_itm[0].cpu()), expected_scores, rtol=1e-3, atol=1e-3))
|
||||
self.assertTrue(torch.allclose(out[0].cpu(), torch.Tensor([[0.4406]]), rtol=1e-3, atol=1e-3))
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_itm_fp16(self):
|
||||
model_name = "Salesforce/blip2-itm-vit-g"
|
||||
|
|
@ -2232,7 +2233,7 @@ class Blip2ModelIntegrationTest(unittest.TestCase):
|
|||
)
|
||||
self.assertTrue(torch.allclose(out[0].cpu().float(), torch.Tensor([[0.4406]]), rtol=1e-3, atol=1e-3))
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_vision_with_projection_fp16(self):
|
||||
model_name = "Salesforce/blip2-itm-vit-g"
|
||||
|
|
@ -2256,7 +2257,7 @@ class Blip2ModelIntegrationTest(unittest.TestCase):
|
|||
]
|
||||
self.assertTrue(np.allclose(out.image_embeds[0][0][:6].tolist(), expected_image_embeds, atol=1e-3))
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_torch_fp16
|
||||
def test_inference_text_with_projection_fp16(self):
|
||||
model_name = "Salesforce/blip2-itm-vit-g"
|
||||
|
|
|
|||
|
|
@ -676,7 +676,7 @@ class DiffLlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
|
|||
)
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class DiffLlamaIntegrationTest(unittest.TestCase):
|
||||
# This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
|
||||
# Depending on the hardware we get different logits / generations
|
||||
|
|
@ -689,7 +689,7 @@ class DiffLlamaIntegrationTest(unittest.TestCase):
|
|||
cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_read_token
|
||||
def test_compile_static_cache(self):
|
||||
# `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|||
from transformers.testing_utils import (
|
||||
require_bitsandbytes,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
require_torch_multi_gpu,
|
||||
slow,
|
||||
torch_device,
|
||||
|
|
@ -426,7 +426,7 @@ class FalconMambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest
|
|||
|
||||
|
||||
@require_torch
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@slow
|
||||
class FalconMambaIntegrationTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import requests
|
|||
from parameterized import parameterized
|
||||
|
||||
from transformers import FuyuConfig, is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
|
||||
from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
|
|
@ -327,7 +327,7 @@ class FuyuModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
|
|||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class FuyuModelIntegrationTest(unittest.TestCase):
|
||||
@cached_property
|
||||
def default_processor(self):
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ from transformers.testing_utils import (
|
|||
require_read_token,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
|
@ -541,7 +540,7 @@ class LlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
|
|||
config = _reinitialize_config(base_config, {"rope_scaling": {"rope_type": "linear"}}) # missing "factor"
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class LlamaIntegrationTest(unittest.TestCase):
|
||||
# This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
|
||||
# Depending on the hardware we get different logits / generations
|
||||
|
|
@ -695,7 +694,7 @@ class LlamaIntegrationTest(unittest.TestCase):
|
|||
self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_read_token
|
||||
def test_compile_static_cache(self):
|
||||
# `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
|
||||
|
|
|
|||
|
|
@ -424,7 +424,7 @@ class MistralModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
|
|||
self.skipTest(reason="Mistral flash attention does not support right padding")
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class MistralIntegrationTest(unittest.TestCase):
|
||||
# This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
|
||||
# Depending on the hardware we get different logits / generations
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ from transformers import MixtralConfig, is_torch_available
|
|||
from transformers.testing_utils import (
|
||||
require_flash_attn,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
slow,
|
||||
torch_device,
|
||||
|
|
@ -471,7 +472,7 @@ class MixtralIntegrationTest(unittest.TestCase):
|
|||
cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_small_model_logits(self):
|
||||
model_id = "hf-internal-testing/Mixtral-tiny"
|
||||
dummy_input = torch.LongTensor([[0, 1, 0], [0, 1, 0]]).to(torch_device)
|
||||
|
|
@ -507,7 +508,7 @@ class MixtralIntegrationTest(unittest.TestCase):
|
|||
)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_small_model_logits_batched(self):
|
||||
model_id = "hf-internal-testing/Mixtral-tiny"
|
||||
dummy_input = torch.LongTensor([[0, 0, 0, 0, 0, 0, 1, 2, 3], [1, 1, 2, 3, 4, 5, 6, 7, 8]]).to(torch_device)
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ from transformers.testing_utils import (
|
|||
require_flash_attn,
|
||||
require_read_token,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
require_torch_sdpa,
|
||||
slow,
|
||||
|
|
@ -103,7 +104,7 @@ class NemotronModelTest(GemmaModelTest):
|
|||
pass
|
||||
|
||||
@require_torch_sdpa
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@slow
|
||||
def test_sdpa_equivalence(self):
|
||||
for model_class in self.all_model_classes:
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ from transformers.file_utils import cached_property
|
|||
from transformers.testing_utils import (
|
||||
require_timm,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
require_vision,
|
||||
slow,
|
||||
torch_device,
|
||||
|
|
@ -865,7 +865,7 @@ class OmDetTurboModelIntegrationTests(unittest.TestCase):
|
|||
]
|
||||
self.assertListEqual([result["classes"] for result in results], expected_classes)
|
||||
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_inference_object_detection_head_equivalence_cpu_gpu(self):
|
||||
processor = self.default_processor
|
||||
image = prepare_img()
|
||||
|
|
@ -878,8 +878,8 @@ class OmDetTurboModelIntegrationTests(unittest.TestCase):
|
|||
cpu_outputs = model(**encoding)
|
||||
|
||||
# 2. run model on GPU
|
||||
model.to("cuda")
|
||||
encoding = encoding.to("cuda")
|
||||
model.to(torch_device)
|
||||
encoding = encoding.to(torch_device)
|
||||
with torch.no_grad():
|
||||
gpu_outputs = model(**encoding)
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,13 @@ from transformers import (
|
|||
is_torch_available,
|
||||
is_vision_available,
|
||||
)
|
||||
from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
|
||||
from transformers.testing_utils import (
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_vision,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
from transformers.utils import cached_property
|
||||
|
||||
from ...test_configuration_common import ConfigTester
|
||||
|
|
@ -631,7 +637,7 @@ class RTDetrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
|||
self.assertTrue(not failed_cases, message)
|
||||
|
||||
@parameterized.expand(["float32", "float16", "bfloat16"])
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@slow
|
||||
def test_inference_with_different_dtypes(self, torch_dtype_str):
|
||||
torch_dtype = {
|
||||
|
|
@ -653,7 +659,7 @@ class RTDetrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
|||
_ = model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
|
||||
@parameterized.expand(["float32", "float16", "bfloat16"])
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@slow
|
||||
def test_inference_equivalence_for_static_and_dynamic_anchors(self, torch_dtype_str):
|
||||
torch_dtype = {
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from transformers.testing_utils import (
|
|||
require_bitsandbytes,
|
||||
require_flash_attn,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
slow,
|
||||
torch_device,
|
||||
|
|
@ -412,7 +413,7 @@ class Starcoder2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste
|
|||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
class Starcoder2IntegrationTest(unittest.TestCase):
|
||||
def test_starcoder2_batched_generation_sdpa(self):
|
||||
EXPECTED_TEXT = [
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ from transformers.testing_utils import (
|
|||
require_sentencepiece,
|
||||
require_tokenizers,
|
||||
require_torch,
|
||||
require_torch_gpu,
|
||||
require_torch_accelerator,
|
||||
slow,
|
||||
torch_device,
|
||||
)
|
||||
|
|
@ -1646,7 +1646,7 @@ class T5ModelIntegrationTests(unittest.TestCase):
|
|||
)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_compile_static_cache(self):
|
||||
NUM_TOKENS_TO_GENERATE = 40
|
||||
EXPECTED_TEXT_COMPLETION = [
|
||||
|
|
@ -1686,7 +1686,7 @@ class T5ModelIntegrationTests(unittest.TestCase):
|
|||
self.assertEqual(EXPECTED_TEXT_COMPLETION, static_compiled_text)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_compile_static_cache_encoder(self):
|
||||
prompts = [
|
||||
"summarize: Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ from transformers.testing_utils import (
|
|||
require_tf,
|
||||
require_torch,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
require_torch_or_tf,
|
||||
torch_device,
|
||||
)
|
||||
|
|
@ -553,7 +552,7 @@ class TextGenerationPipelineTests(unittest.TestCase):
|
|||
|
||||
@require_torch
|
||||
@require_accelerate
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_small_model_pt_bloom_accelerate(self):
|
||||
import torch
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ from transformers.testing_utils import (
|
|||
require_accelerate,
|
||||
require_optimum_quanto,
|
||||
require_read_token,
|
||||
require_torch_accelerator,
|
||||
require_torch_gpu,
|
||||
slow,
|
||||
torch_device,
|
||||
|
|
@ -123,7 +124,7 @@ class QuantoTestIntegration(unittest.TestCase):
|
|||
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
@require_optimum_quanto
|
||||
@require_accelerate
|
||||
class QuantoQuantizationTest(unittest.TestCase):
|
||||
|
|
@ -268,7 +269,7 @@ class QuantoQuantizationTest(unittest.TestCase):
|
|||
quantize(model.transformer, weights=w_mapping[self.weights])
|
||||
freeze(model.transformer)
|
||||
self.check_same_model(model, self.quantized_model)
|
||||
self.check_inference_correctness(model, device="cuda")
|
||||
self.check_inference_correctness(model, device=torch_device)
|
||||
|
||||
@unittest.skip
|
||||
def test_load_from_quanto_saved(self):
|
||||
|
|
|
|||
|
|
@ -1862,7 +1862,6 @@ class ModelTesterMixin:
|
|||
def test_resize_tokens_embeddings(self):
|
||||
if not self.test_resize_embeddings:
|
||||
self.skipTest(reason="test_resize_embeddings is set to `False`")
|
||||
|
||||
(
|
||||
original_config,
|
||||
inputs_dict,
|
||||
|
|
@ -2017,7 +2016,7 @@ class ModelTesterMixin:
|
|||
torch.testing.assert_close(old_embeddings_mean, new_embeddings_mean, atol=1e-3, rtol=1e-1)
|
||||
|
||||
@require_deepspeed
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_resize_tokens_embeddings_with_deepspeed(self):
|
||||
ds_config = {
|
||||
"zero_optimization": {
|
||||
|
|
@ -2123,7 +2122,7 @@ class ModelTesterMixin:
|
|||
model(**self._prepare_for_class(inputs_dict, model_class))
|
||||
|
||||
@require_deepspeed
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_resize_embeddings_untied_with_deepspeed(self):
|
||||
ds_config = {
|
||||
"zero_optimization": {
|
||||
|
|
@ -3202,7 +3201,7 @@ class ModelTesterMixin:
|
|||
|
||||
@require_accelerate
|
||||
@mark.accelerate_tests
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_disk_offload_bin(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
|
|
@ -3243,7 +3242,7 @@ class ModelTesterMixin:
|
|||
|
||||
@require_accelerate
|
||||
@mark.accelerate_tests
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_disk_offload_safetensors(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
|
|
@ -3278,7 +3277,7 @@ class ModelTesterMixin:
|
|||
|
||||
@require_accelerate
|
||||
@mark.accelerate_tests
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_cpu_offload(self):
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
|
||||
|
|
@ -4746,7 +4745,7 @@ class ModelTesterMixin:
|
|||
torch.testing.assert_close(normalized_0, normalized_1, rtol=1e-3, atol=1e-4)
|
||||
|
||||
@slow
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_torch_compile_for_training(self):
|
||||
if version.parse(torch.__version__) < version.parse("2.3"):
|
||||
self.skipTest(reason="This test requires torch >= 2.3 to run.")
|
||||
|
|
|
|||
|
|
@ -1831,7 +1831,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
|||
_ = trainer.train()
|
||||
|
||||
@require_grokadamw
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_grokadamw(self):
|
||||
config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4)
|
||||
tiny_llama = LlamaForCausalLM(config)
|
||||
|
|
@ -1852,7 +1852,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
|||
_ = trainer.train()
|
||||
|
||||
@require_schedulefree
|
||||
@require_torch_gpu
|
||||
@require_torch_accelerator
|
||||
def test_schedulefree_adam(self):
|
||||
config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4)
|
||||
tiny_llama = LlamaForCausalLM(config)
|
||||
|
|
|
|||
Loading…
Reference in a new issue