From d2876b84f13cf3797c57810eb282614118bf2523 Mon Sep 17 00:00:00 2001 From: Guenther Schmuelling Date: Fri, 7 Feb 2025 13:12:03 -0800 Subject: [PATCH] works --- onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h index a2470d9268..1a32ef4280 100644 --- a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h +++ b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h @@ -60,7 +60,7 @@ class MatMulNBits final : public WebGpuKernel { N_ = info.GetAttr("N"); block_size_ = info.GetAttr("block_size"); int64_t bits = info.GetAttr("bits"); - accuracy_level_ = info.GetAttrOrDefault("accuracy_level", 4); + accuracy_level_ = 4; // info.GetAttrOrDefault("accuracy_level", 4); ORT_ENFORCE(bits == 4, "Only 4b quantization is supported for MatMulNBits op, additional bits support is planned."); }