From 2b45410e52e55dd16ac45802f8683c8bd64de6c2 Mon Sep 17 00:00:00 2001 From: Adrian Lizarraga Date: Wed, 4 Jan 2023 19:32:06 -0800 Subject: [PATCH] Fix Prefast warning in CUDA contrib op (#14074) ### Description Fixes Prefast C26814 ```shell onnxruntime::contrib::cuda::QAttention::ComputeInternal onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc The const variable 'element_size' can be computed at compile-time. Consider using constexpr (con.5). ``` --- .../contrib_ops/cuda/quantization/attention_quantization.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc b/onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc index 664d01ab42..5757c4b388 100644 --- a/onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc +++ b/onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc @@ -132,7 +132,7 @@ Status QAttention::ComputeInternal(OpKernelContext* context) const { Tensor* output = context->Output(0, output_shape); cublasHandle_t cublas = GetCublasHandle(context); - const size_t element_size = sizeof(T); + constexpr size_t element_size = sizeof(T); // Use GEMM for fully connection. int m = batch_size * sequence_length;