From 33578cc76efc19b50c9fc011215b2777de193cd1 Mon Sep 17 00:00:00 2001 From: Yi-Hong Lyu Date: Thu, 7 Mar 2024 13:54:16 -0800 Subject: [PATCH] Remove memset for the case no any mask (#19823) Improved OCR model speed by 1.034 end-to-end, by eliminating unnecessary memset when no mask is present. --- .../contrib_ops/cpu/bert/attention_cpu_base.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h b/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h index b761b1afd8..c617533319 100644 --- a/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h +++ b/onnxruntime/contrib_ops/cpu/bert/attention_cpu_base.h @@ -140,17 +140,6 @@ class AttentionCPUBase : public AttentionBase { if (mask_data != nullptr) { PrepareMask(mask_index, mask_index_dims, mask_data, causal, batch_size, sequence_length, past_sequence_length, mask_filter_value_); - } else { // no any mask - const int memset_loop_len = batch_size * num_heads_; - const double memset_cost = static_cast(sequence_length) * total_sequence_length; - - ThreadPool::TryParallelFor(tp, memset_loop_len, memset_cost, [&](std::ptrdiff_t begin, std::ptrdiff_t end) { - for (std::ptrdiff_t i = begin; i != end; ++i) { - const int output_offset = static_cast(i) * sequence_length * total_sequence_length; - T* output = attention_probs + output_offset; - memset(output, 0, static_cast(sequence_length) * total_sequence_length * sizeof(T)); - } - }); } const int loop_len = batch_size * num_heads_; @@ -188,7 +177,7 @@ class AttentionCPUBase : public AttentionBase { // B: K' (B x N x) T x H (B x N x) H x T H x T // C: attention_probs (B x N x) S x T (B x N x) S x T S x T math::Gemm(CblasNoTrans, CblasTrans, sequence_length, total_sequence_length, head_size, alpha, - Q + q_input_chunk_length * i, k, 1.0, + Q + q_input_chunk_length * i, k, mask_data != nullptr ? 1.0f : 0.0f, output, nullptr); if (relative_position_bias_data != nullptr) {