diff --git a/src/transformers/integrations/fp8.py b/src/transformers/integrations/fp8.py index a3a369059..2e11b889f 100644 --- a/src/transformers/integrations/fp8.py +++ b/src/transformers/integrations/fp8.py @@ -28,7 +28,6 @@ if is_accelerate_available(): from accelerate import init_empty_weights - logger = logging.get_logger(__name__) diff --git a/tests/quantization/fp8_integration/test_fp8.py b/tests/quantization/fp8_integration/test_fp8.py index f60d6bb81..f02dfc484 100644 --- a/tests/quantization/fp8_integration/test_fp8.py +++ b/tests/quantization/fp8_integration/test_fp8.py @@ -33,6 +33,7 @@ if is_torch_available(): if is_accelerate_available(): from accelerate import init_empty_weights + @require_torch_gpu class FP8ConfigTest(unittest.TestCase): def test_to_dict(self): @@ -200,6 +201,7 @@ class FP8QuantizerTest(unittest.TestCase): @require_torch_gpu class FP8LinearTest(unittest.TestCase): device = "cuda" + def test_linear_preserves_shape(self): """ Test that FP8Linear preserves shape when in_features == out_features. @@ -218,7 +220,6 @@ class FP8LinearTest(unittest.TestCase): """ from transformers.integrations import FP8Linear - linear = FP8Linear(128, 256, device=self.device) x = torch.rand((1, 5, 128)).to(self.device)