From 7a96cfc8f5a3ecf93bfa9c6517c9ebaaec04f74a Mon Sep 17 00:00:00 2001 From: Tracy Sharpe <42477615+tracysh@users.noreply.github.com> Date: Sat, 13 Jun 2020 14:47:44 -0700 Subject: [PATCH] operator code cleanup (#4228) Search/replace of the pattern "const auto foo = tensor.Shape()" to "const auto& foo = tensor.Shape()" to avoid unneeded copies at runtime and reduce code size (8KB drop for onnxruntime.dll). Remove some unnecessary header includes. --- .../cpu/attnlstm/deep_cpu_attn_lstm.cc | 1 - onnxruntime/contrib_ops/cpu/bert/attention.cc | 24 +++++++++---------- .../contrib_ops/cpu/bert/bias_gelu_helper.cc | 9 +++---- .../contrib_ops/cpu/bert/embed_layer_norm.cc | 13 +++------- .../cpu/bert/embed_layer_norm_helper.cc | 12 +++++----- onnxruntime/contrib_ops/cpu/crop.h | 4 ++-- .../contrib_ops/cpu/crop_and_resize.cc | 1 - onnxruntime/contrib_ops/cpu/image_scaler.h | 2 +- .../cpu/quantization/attention_quant.cc | 11 ++++----- .../contrib_ops/cpu/skip_layer_norm.cc | 10 ++++---- .../contrib_ops/cpu/transpose_matmul.cc | 2 -- .../contrib_ops/cpu/word_conv_embedding.cc | 1 - .../contrib_ops/cuda/bert/attention.cc | 11 ++++----- .../contrib_ops/cuda/bert/embed_layer_norm.cc | 15 ++++-------- .../contrib_ops/cuda/bert/skip_layer_norm.cc | 8 +++---- .../quantization/attention_quantization.cc | 11 ++++----- onnxruntime/contrib_ops/cuda/tensor/crop.cc | 2 +- .../contrib_ops/cuda/tensor/image_scaler.cc | 2 +- .../core/providers/cpu/tensor/nonzero_op.cc | 2 +- 19 files changed, 57 insertions(+), 84 deletions(-) diff --git a/onnxruntime/contrib_ops/cpu/attnlstm/deep_cpu_attn_lstm.cc b/onnxruntime/contrib_ops/cpu/attnlstm/deep_cpu_attn_lstm.cc index 96396308a8..f105856445 100644 --- a/onnxruntime/contrib_ops/cpu/attnlstm/deep_cpu_attn_lstm.cc +++ b/onnxruntime/contrib_ops/cpu/attnlstm/deep_cpu_attn_lstm.cc @@ -10,7 +10,6 @@ #include "core/common/logging/logging.h" #include "core/platform/threadpool.h" #include "core/framework/allocator.h" -#include "core/framework/op_kernel_context_internal.h" namespace onnxruntime { namespace contrib { diff --git a/onnxruntime/contrib_ops/cpu/bert/attention.cc b/onnxruntime/contrib_ops/cpu/bert/attention.cc index e6ed0c644b..7698832874 100644 --- a/onnxruntime/contrib_ops/cpu/bert/attention.cc +++ b/onnxruntime/contrib_ops/cpu/bert/attention.cc @@ -9,7 +9,6 @@ #include "core/util/math_cpuonly.h" #include "core/common/safeint.h" #include "core/platform/threadpool.h" -#include "core/mlas/inc/mlas.h" using onnxruntime::concurrency::ThreadPool; @@ -43,7 +42,7 @@ Status AttentionBase::CheckInputs(const Tensor* input, // mask_index : (batch_size) if presented // past : (2, batch_size, num_heads, past_sequence_length, head_size) - const auto dims = input->Shape().GetDims(); + const auto& dims = input->Shape().GetDims(); if (dims.size() != 3) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 0 is expected to have 3 dimensions, got ", dims.size()); @@ -55,7 +54,7 @@ Status AttentionBase::CheckInputs(const Tensor* input, "Input 0 dimension 2 should be divisiable by value of the num_heads attribute."); } - const auto weights_dims = weights->Shape().GetDims(); + const auto& weights_dims = weights->Shape().GetDims(); if (weights_dims.size() != 2) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 1 is expected to have 2 dimensions, got ", weights_dims.size()); @@ -68,7 +67,7 @@ Status AttentionBase::CheckInputs(const Tensor* input, return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 1 dimension 1 should be 3 times of dimension 0"); } - const auto bias_dims = bias->Shape().GetDims(); + const auto& bias_dims = bias->Shape().GetDims(); if (bias_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 2 is expected to have 1 dimension, got ", bias_dims.size()); @@ -84,7 +83,7 @@ Status AttentionBase::CheckInputs(const Tensor* input, return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 3 (mask_index) is not allowed for unidirectional"); } - const auto mask_dims = mask_index->Shape().GetDims(); + const auto& mask_dims = mask_index->Shape().GetDims(); if (mask_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 3 is expected to have 1 dimension, got ", mask_dims.size()); @@ -99,7 +98,7 @@ Status AttentionBase::CheckInputs(const Tensor* input, return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 4 (past) is only allowed for unidirectional"); } - const auto past_dims = past->Shape().GetDims(); + const auto& past_dims = past->Shape().GetDims(); if (past_dims.size() != 5) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 4 is expected to have 5 dimension, got ", past_dims.size()); @@ -133,7 +132,7 @@ Tensor* AttentionBase::GetPresent(OpKernelContext* context, std::vector present_dims{2, batch_size, num_heads_, sequence_length, head_size}; if (nullptr != past) { - const auto past_dims = past->Shape().GetDims(); + const auto& past_dims = past->Shape().GetDims(); past_sequence_length = static_cast(past_dims[3]); present_dims[3] += past_dims[3]; } @@ -161,14 +160,13 @@ Status Attention::Compute(OpKernelContext* context) const { ORT_RETURN_IF_ERROR(CheckInputs(input, weights, bias, mask_index, past)); - const auto dims = input->Shape().GetDims(); - const int batch_size = static_cast(dims[0]); - const int sequence_length = static_cast(dims[1]); - const int hidden_size = static_cast(dims[2]); + const auto& shape = input->Shape().GetDims(); + const int batch_size = static_cast(shape[0]); + const int sequence_length = static_cast(shape[1]); + const int hidden_size = static_cast(shape[2]); const int head_size = hidden_size / num_heads_; - TensorShape output_shape(dims); - Tensor* output = context->Output(0, output_shape); + Tensor* output = context->Output(0, shape); int past_sequence_length = 0; Tensor* present = GetPresent(context, past, batch_size, head_size, sequence_length, past_sequence_length); diff --git a/onnxruntime/contrib_ops/cpu/bert/bias_gelu_helper.cc b/onnxruntime/contrib_ops/cpu/bert/bias_gelu_helper.cc index 90079c8cca..a79695cca7 100644 --- a/onnxruntime/contrib_ops/cpu/bert/bias_gelu_helper.cc +++ b/onnxruntime/contrib_ops/cpu/bert/bias_gelu_helper.cc @@ -6,10 +6,7 @@ #include "onnx/defs/tensor_proto_util.h" #include "core/common/safeint.h" #include "core/framework/tensor.h" -#include "core/platform/threadpool.h" #include "core/providers/common.h" -#include "core/util/math_cpuonly.h" -#include "core/mlas/inc/mlas.h" namespace onnxruntime { namespace contrib { @@ -19,14 +16,14 @@ Status CheckInputs(const OpKernelContext* context) { const Tensor* input = context->Input(0); const Tensor* bias = context->Input(1); - const auto input_dims = input->Shape().GetDims(); + const auto& input_dims = input->Shape().GetDims(); if (input_dims.size() < 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 0 is expected to have 1 or more dimensions, got ", input_dims.size()); } if (nullptr != bias) { - const auto bias_dims = bias->Shape().GetDims(); + const auto& bias_dims = bias->Shape().GetDims(); if (bias_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 1 is expected to have 1 dimensions, got ", bias_dims.size()); @@ -40,6 +37,6 @@ Status CheckInputs(const OpKernelContext* context) { return Status::OK(); } -} // namespace bias_gelu +} // namespace bias_gelu_helper } // namespace contrib } // namespace onnxruntime diff --git a/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm.cc b/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm.cc index 2ebd9484aa..cdf484fb07 100644 --- a/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm.cc +++ b/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm.cc @@ -42,20 +42,13 @@ Status EmbedLayerNorm::Compute(OpKernelContext* context) const { const Tensor* beta = context->Input(6); const Tensor* mask = context->Input(7); // optional. nullptr if not provided - const auto input_dims = input_ids->Shape().GetDims(); + const auto& input_dims = input_ids->Shape().GetDims(); int64_t hidden_size = word_embedding->Shape()[1]; - std::vector out_dims; - out_dims.reserve(3); - out_dims.push_back(input_dims[0]); - out_dims.push_back(input_dims[1]); - out_dims.push_back(hidden_size); - TensorShape output_shape(out_dims); + TensorShape output_shape({input_dims[0], input_dims[1], hidden_size}); Tensor* output = context->Output(0, output_shape); - std::vector mask_index_dims; - mask_index_dims.push_back(input_dims[0]); - TensorShape mask_index_shape(mask_index_dims); + TensorShape mask_index_shape({input_dims[0]}); Tensor* mask_index = context->Output(1, mask_index_shape); int batch_size = static_cast(input_dims[0]); diff --git a/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm_helper.cc b/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm_helper.cc index 5b10547a14..0b5bdda71f 100644 --- a/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm_helper.cc +++ b/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm_helper.cc @@ -30,25 +30,25 @@ Status CheckInputs(const OpKernelContext* context) { } - const auto input_dims = input_ids->Shape().GetDims(); + const auto& input_dims = input_ids->Shape().GetDims(); if (input_dims.size() != 2) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "input_ids is expected to have 2 dimensions, got ", input_dims.size()); } - const auto word_embedding_dims = word_embedding->Shape().GetDims(); + const auto& word_embedding_dims = word_embedding->Shape().GetDims(); if (word_embedding_dims.size() != 2) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "word_embedding is expected to have 2 dimensions, got ", word_embedding_dims.size()); } - const auto position_embedding_dims = position_embedding->Shape().GetDims(); + const auto& position_embedding_dims = position_embedding->Shape().GetDims(); if (position_embedding_dims.size() != 2) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "position_embedding is expected to have 2 dimensions, got ", position_embedding_dims.size()); } - const auto segment_embedding_dims = segment_embedding->Shape().GetDims(); + const auto& segment_embedding_dims = segment_embedding->Shape().GetDims(); if (segment_embedding_dims.size() != 2) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "segment_embedding is expected to have 2 dimensions, got ", segment_embedding_dims.size()); @@ -64,7 +64,7 @@ Status CheckInputs(const OpKernelContext* context) { "word_embedding and segment_embedding shall have same dimension 1"); } - const auto beta_dims = beta->Shape().GetDims(); + const auto& beta_dims = beta->Shape().GetDims(); if (beta_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "beta is expected to have 1 dimensions, got ", beta_dims.size()); @@ -75,7 +75,7 @@ Status CheckInputs(const OpKernelContext* context) { "beta is expected to have size of ", word_embedding_dims[1], ", got ", beta_dims[0]); } - const auto gamma_dims = gamma->Shape().GetDims(); + const auto& gamma_dims = gamma->Shape().GetDims(); if (gamma_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "gamma is expected to have 1 dimensions, got ", gamma_dims.size()); diff --git a/onnxruntime/contrib_ops/cpu/crop.h b/onnxruntime/contrib_ops/cpu/crop.h index 4f69e7c70c..7d6e2566a2 100644 --- a/onnxruntime/contrib_ops/cpu/crop.h +++ b/onnxruntime/contrib_ops/cpu/crop.h @@ -24,7 +24,7 @@ class CropBase { "Attribute border needs to be specified with four border elements, got ", border_.size()); } - const auto dims = X->Shape().GetDims(); + const auto& dims = X->Shape().GetDims(); if (dims.size() != 4) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, @@ -83,7 +83,7 @@ class Crop final : public CropBase, public OpKernel { const Tensor* X = context->Input(0); ORT_RETURN_IF_ERROR(ValidateInput(X)); - const auto dims = X->Shape().GetDims(); + const auto& dims = X->Shape().GetDims(); const int64_t N = dims[0]; const int64_t C = dims[1]; const int64_t H = dims[2]; diff --git a/onnxruntime/contrib_ops/cpu/crop_and_resize.cc b/onnxruntime/contrib_ops/cpu/crop_and_resize.cc index 8bca4ed912..413eaf130b 100644 --- a/onnxruntime/contrib_ops/cpu/crop_and_resize.cc +++ b/onnxruntime/contrib_ops/cpu/crop_and_resize.cc @@ -20,7 +20,6 @@ limitations under the License. #include "core/util/math_cpuonly.h" #include "core/common/common.h" #include "core/framework/tensor.h" -#include "core/framework/op_kernel_context_internal.h" #include "core/platform/threadpool.h" #include "core/providers/cpu/object_detection/roialign.h" diff --git a/onnxruntime/contrib_ops/cpu/image_scaler.h b/onnxruntime/contrib_ops/cpu/image_scaler.h index 66ee084474..392624dc8b 100644 --- a/onnxruntime/contrib_ops/cpu/image_scaler.h +++ b/onnxruntime/contrib_ops/cpu/image_scaler.h @@ -22,7 +22,7 @@ class ImageScaler final : public OpKernel { const Tensor* X = context->Input(0); if (X == nullptr) return Status(common::ONNXRUNTIME, common::FAIL, "input count mismatch"); - const auto dims = X->Shape().GetDims(); + const auto& dims = X->Shape().GetDims(); if (dims.size() < 4) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, diff --git a/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc b/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc index c6f313322e..ef344d8db4 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc @@ -86,14 +86,13 @@ Status QAttention::Compute(OpKernelContext* context) const { weight_zero_point = *w_zp_tensor->template Data(); } - const auto dims = input->Shape().GetDims(); - const int batch_size = static_cast(dims[0]); - const int sequence_length = static_cast(dims[1]); - const int hidden_size = static_cast(dims[2]); + const auto& shape = input->Shape(); + const int batch_size = static_cast(shape[0]); + const int sequence_length = static_cast(shape[1]); + const int hidden_size = static_cast(shape[2]); const int head_size = hidden_size / num_heads_; - TensorShape output_shape(dims); - Tensor* output = context->Output(0, output_shape); + Tensor* output = context->Output(0, shape); AllocatorPtr allocator; ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&allocator)); diff --git a/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc b/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc index d1be62cc0b..29d244d64b 100644 --- a/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc +++ b/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc @@ -40,7 +40,7 @@ Status SkipLayerNorm::Compute(OpKernelContext* p_ctx) const { const Tensor* bias = p_ctx->Input(4); Tensor* output = p_ctx->Output(0, input->Shape()); - const auto input_dims = input->Shape().GetDims(); + const auto& input_dims = input->Shape().GetDims(); if (input_dims.size() != 3) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "input is expected to have 3 dimensions, got ", input_dims.size()); @@ -51,7 +51,7 @@ Status SkipLayerNorm::Compute(OpKernelContext* p_ctx) const { "skip is expected to have same shape as input"); } - const auto gamma_dims = gamma->Shape().GetDims(); + const auto& gamma_dims = gamma->Shape().GetDims(); if (gamma_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "gamma is expected to have 1 dimension, got ", gamma_dims.size()); @@ -61,7 +61,7 @@ Status SkipLayerNorm::Compute(OpKernelContext* p_ctx) const { "Last dimension of gamma and input does not match"); } - const auto beta_dims = beta->Shape().GetDims(); + const auto& beta_dims = beta->Shape().GetDims(); if (beta_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "beta is expected to have 1 dimension, got ", beta_dims.size()); @@ -72,7 +72,7 @@ Status SkipLayerNorm::Compute(OpKernelContext* p_ctx) const { } if (nullptr != bias) { - const auto bias_dims = bias->Shape().GetDims(); + const auto& bias_dims = bias->Shape().GetDims(); if (bias_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "bias is expected to have 1 dimension, got ", bias_dims.size()); @@ -124,7 +124,7 @@ Status SkipLayerNorm::Compute(OpKernelContext* p_ctx) const { }, 0); return Status::OK(); -} // namespace contrib +} } // namespace contrib } // namespace onnxruntime diff --git a/onnxruntime/contrib_ops/cpu/transpose_matmul.cc b/onnxruntime/contrib_ops/cpu/transpose_matmul.cc index d12e6355fb..10b3e11588 100644 --- a/onnxruntime/contrib_ops/cpu/transpose_matmul.cc +++ b/onnxruntime/contrib_ops/cpu/transpose_matmul.cc @@ -2,8 +2,6 @@ // Licensed under the MIT License. #include "transpose_matmul.h" - -#include "core/framework/op_kernel_context_internal.h" #include "core/providers/cpu/math/matmul_helper.h" #include "core/util/math.h" diff --git a/onnxruntime/contrib_ops/cpu/word_conv_embedding.cc b/onnxruntime/contrib_ops/cpu/word_conv_embedding.cc index a0e6d7c33d..f3a8a03684 100644 --- a/onnxruntime/contrib_ops/cpu/word_conv_embedding.cc +++ b/onnxruntime/contrib_ops/cpu/word_conv_embedding.cc @@ -6,7 +6,6 @@ #include "core/util/math.h" #include "core/util/math_cpuonly.h" #include "core/mlas/inc/mlas.h" -#include "core/framework/op_kernel_context_internal.h" namespace onnxruntime { namespace contrib { diff --git a/onnxruntime/contrib_ops/cuda/bert/attention.cc b/onnxruntime/contrib_ops/cuda/bert/attention.cc index b203a08345..158d0e8cc0 100644 --- a/onnxruntime/contrib_ops/cuda/bert/attention.cc +++ b/onnxruntime/contrib_ops/cuda/bert/attention.cc @@ -44,14 +44,13 @@ Status Attention::ComputeInternal(OpKernelContext* context) const { // Input and output shapes: // Input 0 - input : (batch_size, sequence_length, hidden_size) // Output 0 - output : (batch_size, sequence_length, hidden_size) - const auto dims = input->Shape().GetDims(); - int batch_size = static_cast(dims[0]); - int sequence_length = static_cast(dims[1]); - int hidden_size = static_cast(dims[2]); + const auto& shape = input->Shape(); + int batch_size = static_cast(shape[0]); + int sequence_length = static_cast(shape[1]); + int hidden_size = static_cast(shape[2]); int head_size = hidden_size / num_heads_; - TensorShape output_shape(dims); - Tensor* output = context->Output(0, output_shape); + Tensor* output = context->Output(0, shape); int past_sequence_length = 0; Tensor* present = GetPresent(context, past, batch_size, head_size, sequence_length, past_sequence_length); diff --git a/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm.cc b/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm.cc index 73cc52c822..38e1166a58 100644 --- a/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm.cc +++ b/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm.cc @@ -47,20 +47,13 @@ Status EmbedLayerNorm::ComputeInternal(OpKernelContext* context) const { const Tensor* beta = context->Input(6); const Tensor* mask = context->Input(7); // optional. nullptr if not provided - const auto input_dims = input_ids->Shape().GetDims(); + const auto& input_dims = input_ids->Shape().GetDims(); int64_t hidden_size = word_embedding->Shape()[1]; - std::vector out_dims; - out_dims.reserve(3); - out_dims.push_back(input_dims[0]); - out_dims.push_back(input_dims[1]); - out_dims.push_back(hidden_size); - TensorShape output_shape(out_dims); + TensorShape output_shape({input_dims[0], input_dims[1], hidden_size}); Tensor* output = context->Output(0, output_shape); - std::vector mask_index_dims; - mask_index_dims.push_back(input_dims[0]); - TensorShape mask_index_shape(mask_index_dims); + TensorShape mask_index_shape({input_dims[0]}); Tensor* mask_index = context->Output(1, mask_index_shape); int batch_size = static_cast(input_dims[0]); @@ -79,7 +72,7 @@ Status EmbedLayerNorm::ComputeInternal(OpKernelContext* context) const { position_embedding->template Data(), segment_embedding->template Data(), epsilon_, - static_cast(hidden_size), + static_cast(hidden_size), batch_size, sequence_length, element_size)) { diff --git a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc index dc7ed4e592..f8f6c2ad49 100644 --- a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc +++ b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc @@ -44,7 +44,7 @@ Status SkipLayerNorm::ComputeInternal(OpKernelContext* ctx) const { Tensor* output = ctx->Output(0, input->Shape()); - const auto input_dims = input->Shape().GetDims(); + const auto& input_dims = input->Shape().GetDims(); if (input_dims.size() != 3) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "input is expected to have 3 dimensions, got ", input_dims.size()); @@ -55,7 +55,7 @@ Status SkipLayerNorm::ComputeInternal(OpKernelContext* ctx) const { "skip is expected to have same shape as input"); } - const auto gamma_dims = gamma->Shape().GetDims(); + const auto& gamma_dims = gamma->Shape().GetDims(); if (gamma_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "gamma is expected to have 1 dimension, got ", gamma_dims.size()); @@ -65,7 +65,7 @@ Status SkipLayerNorm::ComputeInternal(OpKernelContext* ctx) const { "Last dimension of gamma and input does not match"); } - const auto beta_dims = beta->Shape().GetDims(); + const auto& beta_dims = beta->Shape().GetDims(); if (beta_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "beta is expected to have 1 dimension, got ", beta_dims.size()); @@ -76,7 +76,7 @@ Status SkipLayerNorm::ComputeInternal(OpKernelContext* ctx) const { } if (nullptr != bias) { - const auto bias_dims = bias->Shape().GetDims(); + const auto& bias_dims = bias->Shape().GetDims(); if (bias_dims.size() != 1) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "bias is expected to have 1 dimension, got ", bias_dims.size()); diff --git a/onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc b/onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc index 6d4ed08d38..c1a5a251eb 100644 --- a/onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc +++ b/onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc @@ -116,14 +116,13 @@ Status QAttention::ComputeInternal(OpKernelContext* context) const { i_zp_tensor, w_zp_tensor)); - const auto dims = input->Shape().GetDims(); - int batch_size = static_cast(dims[0]); - int sequence_length = static_cast(dims[1]); - int hidden_size = static_cast(dims[2]); + const auto& shape = input->Shape(); + int batch_size = static_cast(shape[0]); + int sequence_length = static_cast(shape[1]); + int hidden_size = static_cast(shape[2]); int head_size = hidden_size / num_heads_; - TensorShape output_shape(dims); - Tensor* output = context->Output(0, output_shape); + Tensor* output = context->Output(0, shape); cublasHandle_t cublas = CublasHandle(); const size_t element_size = sizeof(T); diff --git a/onnxruntime/contrib_ops/cuda/tensor/crop.cc b/onnxruntime/contrib_ops/cuda/tensor/crop.cc index bf16889f01..66e022e3c4 100644 --- a/onnxruntime/contrib_ops/cuda/tensor/crop.cc +++ b/onnxruntime/contrib_ops/cuda/tensor/crop.cc @@ -28,7 +28,7 @@ Status Crop::ComputeInternal(OpKernelContext* context) const { const Tensor* X = context->Input(0); ORT_RETURN_IF_ERROR(ValidateInput(X)); - const auto dims = X->Shape().GetDims(); + const auto& dims = X->Shape().GetDims(); const int64_t N = dims[0]; const int64_t C = dims[1]; const int64_t H = dims[2]; diff --git a/onnxruntime/contrib_ops/cuda/tensor/image_scaler.cc b/onnxruntime/contrib_ops/cuda/tensor/image_scaler.cc index 5efcdd046a..bf1f33e84a 100644 --- a/onnxruntime/contrib_ops/cuda/tensor/image_scaler.cc +++ b/onnxruntime/contrib_ops/cuda/tensor/image_scaler.cc @@ -36,7 +36,7 @@ ImageScaler::ImageScaler(const OpKernelInfo& info) : CudaKernel(info) { template Status ImageScaler::ComputeInternal(OpKernelContext* context) const { const Tensor* X = context->Input(0); - const auto dims = X->Shape().GetDims(); + const auto& dims = X->Shape().GetDims(); if (dims.size() != 4) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, diff --git a/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc b/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc index ef16693dc7..ce96337aea 100644 --- a/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc +++ b/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc @@ -45,7 +45,7 @@ Status NonZero::Compute(OpKernelContext* context) const { const auto X = context->Input(0); ORT_ENFORCE(X, "X input is required!"); - const auto X_shape = X->Shape(); + const auto& X_shape = X->Shape(); assert(X_shape.Size() >= 0); const Eigen::Index coordinate_size = X_shape.IsScalar() ? 1 : X_shape.NumDimensions();