Fix a prefast warning (#15343)

### Description  ### Motivation and Context  https://aiinfra.visualstudio.com/ONNX%20Runtime/_workitems/edit/14272/?triage=true
2026-07-13 18:08:13 +00:00 · 2023-04-03 18:25:25 -07:00 · 2023-04-03 18:25:25 -07:00 · dec11afb83
commit dec11afb83
parent 44027797b0
1 changed files with 8 additions and 3 deletions
--- a/onnxruntime/contrib_ops/cuda/decoder/decoder_masked_multihead_attention.cc
+++ b/onnxruntime/contrib_ops/cuda/decoder/decoder_masked_multihead_attention.cc
@ -107,9 +107,13 @@ Status DecoderMaskedMultiHeadAttention<T1, T2>::ComputeInternal(OpKernelContext*
  output_shape[2] = static_cast<int64_t>(parameters.v_hidden_size);
  Tensor* output = context->Output(0, output_shape);

-  // Present input will have the same shape as the past input
-  Tensor* present_key = context->Output(kPresentOutputIndex, past_key->Shape());
-  Tensor* present_value = context->Output(kPresentOutputIndex + 1, past_value->Shape());
+  std::vector<int64_t> present_dims{
+    parameters.batch_size, parameters.num_heads,
+    past_present_share_buffer_ ? parameters.max_sequence_length : parameters.total_sequence_length,
+    parameters.head_size};
+  TensorShape present_shape(present_dims);
+  Tensor* present_key = context->Output(kPresentOutputIndex, present_shape);
+  Tensor* present_value = context->Output(kPresentOutputIndex + 1, present_shape);

  auto cuda_stream = Stream(context);

@ -139,6 +143,7 @@ Status DecoderMaskedMultiHeadAttention<T1, T2>::ComputeInternal(OpKernelContext*
  } else {
    // Sanity check
    ORT_ENFORCE(past_present_share_buffer_);
+    ORT_ENFORCE(past_key != nullptr && past_value != nullptr);

    auto* present_key_data = present_key->MutableData<T1>();
    auto* present_value_data = present_value->MutableData<T1>();