diff --git a/onnxruntime/contrib_ops/cpu/bert/attention.cc b/onnxruntime/contrib_ops/cpu/bert/attention.cc index f81b3d109f..2e04de2161 100644 --- a/onnxruntime/contrib_ops/cpu/bert/attention.cc +++ b/onnxruntime/contrib_ops/cpu/bert/attention.cc @@ -319,7 +319,7 @@ Status Attention::Compute(OpKernelContext* context) const { for (std::ptrdiff_t i = begin; i != end; ++i) { T* current_tmp_data = reinterpret_cast(out_tmp_data) + sequence_length_mul_head_size * i; math::MatMul(sequence_length, head_size, sequence_length, - reinterpret_cast(scratch_data) + sequence_length_mul_head_size * i, + reinterpret_cast(scratch_data) + sequence_length * sequence_length * i, V + sequence_length_mul_head_size * i, current_tmp_data, nullptr); // transpose: out(B, S, N, H) = transpose out_tmp(B, N, S, H)