diff --git a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h index a2470d9268..1a32ef4280 100644 --- a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h +++ b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.h @@ -60,7 +60,7 @@ class MatMulNBits final : public WebGpuKernel { N_ = info.GetAttr("N"); block_size_ = info.GetAttr("block_size"); int64_t bits = info.GetAttr("bits"); - accuracy_level_ = info.GetAttrOrDefault("accuracy_level", 4); + accuracy_level_ = 4; // info.GetAttrOrDefault("accuracy_level", 4); ORT_ENFORCE(bits == 4, "Only 4b quantization is supported for MatMulNBits op, additional bits support is planned."); }