Fix Prefast warning in CUDA contrib op (#14074)

### Description
Fixes Prefast C26814

```shell
onnxruntime::contrib::cuda::QAttention<onnxruntime::MLFloat16,signed char>::ComputeInternal
onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc
The const variable 'element_size' can be computed at compile-time. Consider using constexpr (con.5).
```
This commit is contained in:
Adrian Lizarraga 2023-01-04 19:32:06 -08:00 committed by GitHub
parent 68794d0ac1
commit 2b45410e52
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -132,7 +132,7 @@ Status QAttention<T, int8_t>::ComputeInternal(OpKernelContext* context) const {
Tensor* output = context->Output(0, output_shape);
cublasHandle_t cublas = GetCublasHandle(context);
const size_t element_size = sizeof(T);
constexpr size_t element_size = sizeof(T);
// Use GEMM for fully connection.
int m = batch_size * sequence_length;