From d41e11b3f18bd040ead336332ff0e2c7136851fd Mon Sep 17 00:00:00 2001 From: MekkCyber Date: Sat, 8 Feb 2025 14:37:19 +0000 Subject: [PATCH] update --- src/transformers/integrations/fp8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/integrations/fp8.py b/src/transformers/integrations/fp8.py index 6e912a107..33ca6f675 100644 --- a/src/transformers/integrations/fp8.py +++ b/src/transformers/integrations/fp8.py @@ -56,7 +56,7 @@ def act_quant(x: torch.Tensor, block_size: int = 128) -> Tuple[torch.Tensor, tor act_quant_kernel[grid](x, y, s, BLOCK_SIZE=block_size) return y, s - +# Adapted from https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/layers/quantization/fp8_kernel.py @triton.jit def _w8a8_block_fp8_matmul( # Pointers to inputs and output