This commit is contained in:
MekkCyber 2025-02-10 07:06:53 +00:00
parent 1a5bfc1de5
commit 820ae396a4

View file

@ -227,7 +227,7 @@ def w8a8_block_fp8_matmul_triton(
return C
# Python version of the above triton function
# Python version of the above triton function, it's much slower than the triton version
@torch.compile
def w8a8_block_fp8_matmul_compile(
input_q: torch.Tensor, # [batch, seq_len, hidden_dim]