This commit is contained in:
MekkCyber 2025-02-08 14:37:19 +00:00
parent 68a3234aa8
commit d41e11b3f1

View file

@ -56,7 +56,7 @@ def act_quant(x: torch.Tensor, block_size: int = 128) -> Tuple[torch.Tensor, tor
act_quant_kernel[grid](x, y, s, BLOCK_SIZE=block_size)
return y, s
# Adapted from https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/layers/quantization/fp8_kernel.py
@triton.jit
def _w8a8_block_fp8_matmul(
# Pointers to inputs and output