mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-26 22:35:43 +00:00
Fix a prefast warning (#15343)
### Description <!-- Describe your changes. --> ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> https://aiinfra.visualstudio.com/ONNX%20Runtime/_workitems/edit/14272/?triage=true
This commit is contained in:
parent
44027797b0
commit
dec11afb83
1 changed files with 8 additions and 3 deletions
|
|
@ -107,9 +107,13 @@ Status DecoderMaskedMultiHeadAttention<T1, T2>::ComputeInternal(OpKernelContext*
|
|||
output_shape[2] = static_cast<int64_t>(parameters.v_hidden_size);
|
||||
Tensor* output = context->Output(0, output_shape);
|
||||
|
||||
// Present input will have the same shape as the past input
|
||||
Tensor* present_key = context->Output(kPresentOutputIndex, past_key->Shape());
|
||||
Tensor* present_value = context->Output(kPresentOutputIndex + 1, past_value->Shape());
|
||||
std::vector<int64_t> present_dims{
|
||||
parameters.batch_size, parameters.num_heads,
|
||||
past_present_share_buffer_ ? parameters.max_sequence_length : parameters.total_sequence_length,
|
||||
parameters.head_size};
|
||||
TensorShape present_shape(present_dims);
|
||||
Tensor* present_key = context->Output(kPresentOutputIndex, present_shape);
|
||||
Tensor* present_value = context->Output(kPresentOutputIndex + 1, present_shape);
|
||||
|
||||
auto cuda_stream = Stream(context);
|
||||
|
||||
|
|
@ -139,6 +143,7 @@ Status DecoderMaskedMultiHeadAttention<T1, T2>::ComputeInternal(OpKernelContext*
|
|||
} else {
|
||||
// Sanity check
|
||||
ORT_ENFORCE(past_present_share_buffer_);
|
||||
ORT_ENFORCE(past_key != nullptr && past_value != nullptr);
|
||||
|
||||
auto* present_key_data = present_key->MutableData<T1>();
|
||||
auto* present_value_data = present_value->MutableData<T1>();
|
||||
|
|
|
|||
Loading…
Reference in a new issue