update

2026-05-14 20:58:08 +00:00 · 2025-02-08 14:37:19 +00:00 · 2025-02-08 14:37:19 +00:00 · d41e11b3f1
commit d41e11b3f1
parent 68a3234aa8
1 changed files with 1 additions and 1 deletions
--- a/src/transformers/integrations/fp8.py
+++ b/src/transformers/integrations/fp8.py
@ -56,7 +56,7 @@ def act_quant(x: torch.Tensor, block_size: int = 128) -> Tuple[torch.Tensor, tor
    act_quant_kernel[grid](x, y, s, BLOCK_SIZE=block_size)
    return y, s

-
+# Adapted from https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/layers/quantization/fp8_kernel.py 
@triton.jit
 def _w8a8_block_fp8_matmul(
    # Pointers to inputs and output