operator code cleanup (#4228)

Search/replace of the pattern "const auto foo = tensor.Shape()" to "const auto& foo = tensor.Shape()" to avoid unneeded copies at runtime and reduce code size (8KB drop for onnxruntime.dll). Remove some unnecessary header includes.
2026-05-16 21:00:14 +00:00 · 2020-06-13 14:47:44 -07:00 · 2020-06-13 14:47:44 -07:00 · 7a96cfc8f5
commit 7a96cfc8f5
parent c55f6d76be
19 changed files with 57 additions and 84 deletions
--- a/onnxruntime/contrib_ops/cpu/attnlstm/deep_cpu_attn_lstm.cc
+++ b/onnxruntime/contrib_ops/cpu/attnlstm/deep_cpu_attn_lstm.cc
@ -10,7 +10,6 @@
 #include "core/common/logging/logging.h"
 #include "core/platform/threadpool.h"
 #include "core/framework/allocator.h"
-#include "core/framework/op_kernel_context_internal.h"

 namespace onnxruntime {
 namespace contrib {
--- a/onnxruntime/contrib_ops/cpu/bert/attention.cc
+++ b/onnxruntime/contrib_ops/cpu/bert/attention.cc
@ -9,7 +9,6 @@
 #include "core/util/math_cpuonly.h"
 #include "core/common/safeint.h"
 #include "core/platform/threadpool.h"
-#include "core/mlas/inc/mlas.h"

 using onnxruntime::concurrency::ThreadPool;

@ -43,7 +42,7 @@ Status AttentionBase::CheckInputs(const Tensor* input,
  //   mask_index  : (batch_size) if presented
  //   past        : (2, batch_size, num_heads, past_sequence_length, head_size)

-  const auto dims = input->Shape().GetDims();
+  const auto& dims = input->Shape().GetDims();
  if (dims.size() != 3) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 0 is expected to have 3 dimensions, got ",
                           dims.size());
@ -55,7 +54,7 @@ Status AttentionBase::CheckInputs(const Tensor* input,
                           "Input 0 dimension 2 should be divisiable by value of the num_heads attribute.");
  }

-  const auto weights_dims = weights->Shape().GetDims();
+  const auto& weights_dims = weights->Shape().GetDims();
  if (weights_dims.size() != 2) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 1 is expected to have 2 dimensions, got ",
                           weights_dims.size());
@ -68,7 +67,7 @@ Status AttentionBase::CheckInputs(const Tensor* input,
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 1 dimension 1 should be 3 times of dimension 0");
  }

-  const auto bias_dims = bias->Shape().GetDims();
+  const auto& bias_dims = bias->Shape().GetDims();
  if (bias_dims.size() != 1) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 2 is expected to have 1 dimension, got ",
                           bias_dims.size());
@ -84,7 +83,7 @@ Status AttentionBase::CheckInputs(const Tensor* input,
      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 3 (mask_index) is not allowed for unidirectional");
    }

-    const auto mask_dims = mask_index->Shape().GetDims();
+    const auto& mask_dims = mask_index->Shape().GetDims();
    if (mask_dims.size() != 1) {
      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 3 is expected to have 1 dimension, got ",
                             mask_dims.size());
@ -99,7 +98,7 @@ Status AttentionBase::CheckInputs(const Tensor* input,
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 4 (past) is only allowed for unidirectional");
    }

-    const auto past_dims = past->Shape().GetDims();
+    const auto& past_dims = past->Shape().GetDims();
    if (past_dims.size() != 5) {
      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input 4 is expected to have 5 dimension, got ",
                             past_dims.size());
@ -133,7 +132,7 @@ Tensor* AttentionBase::GetPresent(OpKernelContext* context,

  std::vector<int64_t> present_dims{2, batch_size, num_heads_, sequence_length, head_size};
  if (nullptr != past) {
-    const auto past_dims = past->Shape().GetDims();
+    const auto& past_dims = past->Shape().GetDims();
    past_sequence_length = static_cast<int>(past_dims[3]);
    present_dims[3] += past_dims[3];
  }
@ -161,14 +160,13 @@ Status Attention<T>::Compute(OpKernelContext* context) const {

  ORT_RETURN_IF_ERROR(CheckInputs(input, weights, bias, mask_index, past));

-  const auto dims = input->Shape().GetDims();
-  const int batch_size = static_cast<int>(dims[0]);
-  const int sequence_length = static_cast<int>(dims[1]);
-  const int hidden_size = static_cast<int>(dims[2]);
+  const auto& shape = input->Shape().GetDims();
+  const int batch_size = static_cast<int>(shape[0]);
+  const int sequence_length = static_cast<int>(shape[1]);
+  const int hidden_size = static_cast<int>(shape[2]);
  const int head_size = hidden_size / num_heads_;

-  TensorShape output_shape(dims);
-  Tensor* output = context->Output(0, output_shape);
+  Tensor* output = context->Output(0, shape);

  int past_sequence_length = 0;
  Tensor* present = GetPresent(context, past, batch_size, head_size, sequence_length, past_sequence_length);
--- a/onnxruntime/contrib_ops/cpu/bert/bias_gelu_helper.cc
+++ b/onnxruntime/contrib_ops/cpu/bert/bias_gelu_helper.cc
@ -6,10 +6,7 @@
 #include "onnx/defs/tensor_proto_util.h"
 #include "core/common/safeint.h"
 #include "core/framework/tensor.h"
-#include "core/platform/threadpool.h"
 #include "core/providers/common.h"
-#include "core/util/math_cpuonly.h"
-#include "core/mlas/inc/mlas.h"

 namespace onnxruntime {
 namespace contrib {
@ -19,14 +16,14 @@ Status CheckInputs(const OpKernelContext* context) {
  const Tensor* input = context->Input<Tensor>(0);
  const Tensor* bias = context->Input<Tensor>(1);

-  const auto input_dims = input->Shape().GetDims();
+  const auto& input_dims = input->Shape().GetDims();
  if (input_dims.size() < 1) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "Input 0 is expected to have 1 or more dimensions, got ", input_dims.size());
  }

  if (nullptr != bias) {
-    const auto bias_dims = bias->Shape().GetDims();
+    const auto& bias_dims = bias->Shape().GetDims();
    if (bias_dims.size() != 1) {
      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                             "Input 1 is expected to have 1 dimensions, got ", bias_dims.size());
@ -40,6 +37,6 @@ Status CheckInputs(const OpKernelContext* context) {
  return Status::OK();
 }

-}  // namespace bias_gelu
+}  // namespace bias_gelu_helper
 }  // namespace contrib
 }  // namespace onnxruntime
--- a/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm.cc
+++ b/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm.cc
@ -42,20 +42,13 @@ Status EmbedLayerNorm<T>::Compute(OpKernelContext* context) const {
  const Tensor* beta = context->Input<Tensor>(6);
  const Tensor* mask = context->Input<Tensor>(7);  // optional. nullptr if not provided

-  const auto input_dims = input_ids->Shape().GetDims();
+  const auto& input_dims = input_ids->Shape().GetDims();
  int64_t hidden_size = word_embedding->Shape()[1];

-  std::vector<int64_t> out_dims;
-  out_dims.reserve(3);
-  out_dims.push_back(input_dims[0]);
-  out_dims.push_back(input_dims[1]);
-  out_dims.push_back(hidden_size);
-  TensorShape output_shape(out_dims);
+  TensorShape output_shape({input_dims[0], input_dims[1], hidden_size});
  Tensor* output = context->Output(0, output_shape);

-  std::vector<int64_t> mask_index_dims;
-  mask_index_dims.push_back(input_dims[0]);
-  TensorShape mask_index_shape(mask_index_dims);
+  TensorShape mask_index_shape({input_dims[0]});
  Tensor* mask_index = context->Output(1, mask_index_shape);

  int batch_size = static_cast<int>(input_dims[0]);
--- a/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm_helper.cc
+++ b/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm_helper.cc
@ -30,25 +30,25 @@ Status CheckInputs(const OpKernelContext* context) {
  }


-  const auto input_dims = input_ids->Shape().GetDims();
+  const auto& input_dims = input_ids->Shape().GetDims();
  if (input_dims.size() != 2) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "input_ids is expected to have 2 dimensions, got ", input_dims.size());
  }

-  const auto word_embedding_dims = word_embedding->Shape().GetDims();
+  const auto& word_embedding_dims = word_embedding->Shape().GetDims();
  if (word_embedding_dims.size() != 2) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "word_embedding is expected to have 2 dimensions, got ", word_embedding_dims.size());
  }

-  const auto position_embedding_dims = position_embedding->Shape().GetDims();
+  const auto& position_embedding_dims = position_embedding->Shape().GetDims();
  if (position_embedding_dims.size() != 2) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "position_embedding is expected to have 2 dimensions, got ", position_embedding_dims.size());
  }

-  const auto segment_embedding_dims = segment_embedding->Shape().GetDims();
+  const auto& segment_embedding_dims = segment_embedding->Shape().GetDims();
  if (segment_embedding_dims.size() != 2) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "segment_embedding is expected to have 2 dimensions, got ", segment_embedding_dims.size());
@ -64,7 +64,7 @@ Status CheckInputs(const OpKernelContext* context) {
                           "word_embedding and segment_embedding shall have same dimension 1");
  }

-  const auto beta_dims = beta->Shape().GetDims();
+  const auto& beta_dims = beta->Shape().GetDims();
  if (beta_dims.size() != 1) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "beta is expected to have 1 dimensions, got ", beta_dims.size());
@ -75,7 +75,7 @@ Status CheckInputs(const OpKernelContext* context) {
                           "beta is expected to have size of ", word_embedding_dims[1], ", got ", beta_dims[0]);
  }

-  const auto gamma_dims = gamma->Shape().GetDims();
+  const auto& gamma_dims = gamma->Shape().GetDims();
  if (gamma_dims.size() != 1) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "gamma is expected to have 1 dimensions, got ", gamma_dims.size());
--- a/onnxruntime/contrib_ops/cpu/crop.h
+++ b/onnxruntime/contrib_ops/cpu/crop.h
@ -24,7 +24,7 @@ class CropBase {
                             "Attribute border needs to be specified with four border elements, got ", border_.size());
    }

-    const auto dims = X->Shape().GetDims();
+    const auto& dims = X->Shape().GetDims();

    if (dims.size() != 4) {
      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
@ -83,7 +83,7 @@ class Crop final : public CropBase, public OpKernel {
    const Tensor* X = context->Input<Tensor>(0);
    ORT_RETURN_IF_ERROR(ValidateInput(X));

-    const auto dims = X->Shape().GetDims();
+    const auto& dims = X->Shape().GetDims();
    const int64_t N = dims[0];
    const int64_t C = dims[1];
    const int64_t H = dims[2];
--- a/onnxruntime/contrib_ops/cpu/crop_and_resize.cc
+++ b/onnxruntime/contrib_ops/cpu/crop_and_resize.cc
@ -20,7 +20,6 @@ limitations under the License.
 #include "core/util/math_cpuonly.h"
 #include "core/common/common.h"
 #include "core/framework/tensor.h"
-#include "core/framework/op_kernel_context_internal.h"
 #include "core/platform/threadpool.h"
 #include "core/providers/cpu/object_detection/roialign.h"

--- a/onnxruntime/contrib_ops/cpu/image_scaler.h
+++ b/onnxruntime/contrib_ops/cpu/image_scaler.h
@ -22,7 +22,7 @@ class ImageScaler final : public OpKernel {
    const Tensor* X = context->Input<Tensor>(0);
    if (X == nullptr) return Status(common::ONNXRUNTIME, common::FAIL, "input count mismatch");

-    const auto dims = X->Shape().GetDims();
+    const auto& dims = X->Shape().GetDims();

    if (dims.size() < 4) {
      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
--- a/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc
+++ b/onnxruntime/contrib_ops/cpu/quantization/attention_quant.cc
@ -86,14 +86,13 @@ Status QAttention<T, QInput, QWeight>::Compute(OpKernelContext* context) const {
    weight_zero_point = *w_zp_tensor->template Data<QWeight>();
  }

-  const auto dims = input->Shape().GetDims();
-  const int batch_size = static_cast<int>(dims[0]);
-  const int sequence_length = static_cast<int>(dims[1]);
-  const int hidden_size = static_cast<int>(dims[2]);
+  const auto& shape = input->Shape();
+  const int batch_size = static_cast<int>(shape[0]);
+  const int sequence_length = static_cast<int>(shape[1]);
+  const int hidden_size = static_cast<int>(shape[2]);
  const int head_size = hidden_size / num_heads_;

-  TensorShape output_shape(dims);
-  Tensor* output = context->Output(0, output_shape);
+  Tensor* output = context->Output(0, shape);

  AllocatorPtr allocator;
  ORT_RETURN_IF_ERROR(context->GetTempSpaceAllocator(&allocator));
--- a/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc
+++ b/onnxruntime/contrib_ops/cpu/skip_layer_norm.cc
@ -40,7 +40,7 @@ Status SkipLayerNorm<T>::Compute(OpKernelContext* p_ctx) const {
  const Tensor* bias = p_ctx->Input<Tensor>(4);
  Tensor* output = p_ctx->Output(0, input->Shape());

-  const auto input_dims = input->Shape().GetDims();
+  const auto& input_dims = input->Shape().GetDims();
  if (input_dims.size() != 3) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "input is expected to have 3 dimensions, got ", input_dims.size());
@ -51,7 +51,7 @@ Status SkipLayerNorm<T>::Compute(OpKernelContext* p_ctx) const {
                           "skip is expected to have same shape as input");
  }

-  const auto gamma_dims = gamma->Shape().GetDims();
+  const auto& gamma_dims = gamma->Shape().GetDims();
  if (gamma_dims.size() != 1) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "gamma is expected to have 1 dimension, got ", gamma_dims.size());
@ -61,7 +61,7 @@ Status SkipLayerNorm<T>::Compute(OpKernelContext* p_ctx) const {
                           "Last dimension of gamma and input does not match");
  }

-  const auto beta_dims = beta->Shape().GetDims();
+  const auto& beta_dims = beta->Shape().GetDims();
  if (beta_dims.size() != 1) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "beta is expected to have 1 dimension, got ", beta_dims.size());
@ -72,7 +72,7 @@ Status SkipLayerNorm<T>::Compute(OpKernelContext* p_ctx) const {
  }

  if (nullptr != bias) {
-    const auto bias_dims = bias->Shape().GetDims();
+    const auto& bias_dims = bias->Shape().GetDims();
    if (bias_dims.size() != 1) {
      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                             "bias is expected to have 1 dimension, got ", bias_dims.size());
@ -124,7 +124,7 @@ Status SkipLayerNorm<T>::Compute(OpKernelContext* p_ctx) const {
                                               }, 0);

  return Status::OK();
-}  // namespace contrib
+}

 }  // namespace contrib
 }  // namespace onnxruntime
--- a/onnxruntime/contrib_ops/cpu/transpose_matmul.cc
+++ b/onnxruntime/contrib_ops/cpu/transpose_matmul.cc
@ -2,8 +2,6 @@
 // Licensed under the MIT License.

 #include "transpose_matmul.h"
-
-#include "core/framework/op_kernel_context_internal.h"
 #include "core/providers/cpu/math/matmul_helper.h"
 #include "core/util/math.h"

--- a/onnxruntime/contrib_ops/cpu/word_conv_embedding.cc
+++ b/onnxruntime/contrib_ops/cpu/word_conv_embedding.cc
@ -6,7 +6,6 @@
 #include "core/util/math.h"
 #include "core/util/math_cpuonly.h"
 #include "core/mlas/inc/mlas.h"
-#include "core/framework/op_kernel_context_internal.h"

 namespace onnxruntime {
 namespace contrib {
--- a/onnxruntime/contrib_ops/cuda/bert/attention.cc
+++ b/onnxruntime/contrib_ops/cuda/bert/attention.cc
@ -44,14 +44,13 @@ Status Attention<T>::ComputeInternal(OpKernelContext* context) const {
  // Input and output shapes:
  //   Input 0 - input       : (batch_size, sequence_length, hidden_size)
  //   Output 0 - output     : (batch_size, sequence_length, hidden_size)
-  const auto dims = input->Shape().GetDims();
-  int batch_size = static_cast<int>(dims[0]);
-  int sequence_length = static_cast<int>(dims[1]);
-  int hidden_size = static_cast<int>(dims[2]);
+  const auto& shape = input->Shape();
+  int batch_size = static_cast<int>(shape[0]);
+  int sequence_length = static_cast<int>(shape[1]);
+  int hidden_size = static_cast<int>(shape[2]);
  int head_size = hidden_size / num_heads_;

-  TensorShape output_shape(dims);
-  Tensor* output = context->Output(0, output_shape);
+  Tensor* output = context->Output(0, shape);

  int past_sequence_length = 0;
  Tensor* present = GetPresent(context, past, batch_size, head_size, sequence_length, past_sequence_length);
--- a/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm.cc
+++ b/onnxruntime/contrib_ops/cuda/bert/embed_layer_norm.cc
@ -47,20 +47,13 @@ Status EmbedLayerNorm<T>::ComputeInternal(OpKernelContext* context) const {
  const Tensor* beta = context->Input<Tensor>(6);
  const Tensor* mask = context->Input<Tensor>(7);  // optional. nullptr if not provided

-  const auto input_dims = input_ids->Shape().GetDims();
+  const auto& input_dims = input_ids->Shape().GetDims();
  int64_t hidden_size = word_embedding->Shape()[1];

-  std::vector<int64_t> out_dims;
-  out_dims.reserve(3);
-  out_dims.push_back(input_dims[0]);
-  out_dims.push_back(input_dims[1]);
-  out_dims.push_back(hidden_size);
-  TensorShape output_shape(out_dims);
+  TensorShape output_shape({input_dims[0], input_dims[1], hidden_size});
  Tensor* output = context->Output(0, output_shape);

-  std::vector<int64_t> mask_index_dims;
-  mask_index_dims.push_back(input_dims[0]);
-  TensorShape mask_index_shape(mask_index_dims);
+  TensorShape mask_index_shape({input_dims[0]});
  Tensor* mask_index = context->Output(1, mask_index_shape);

  int batch_size = static_cast<int>(input_dims[0]);
@ -79,7 +72,7 @@ Status EmbedLayerNorm<T>::ComputeInternal(OpKernelContext* context) const {
          position_embedding->template Data<T>(),
          segment_embedding->template Data<T>(),
          epsilon_,
-          static_cast<int>(hidden_size),          
+          static_cast<int>(hidden_size),
          batch_size,
          sequence_length,
          element_size)) {
--- a/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc
+++ b/onnxruntime/contrib_ops/cuda/bert/skip_layer_norm.cc
@ -44,7 +44,7 @@ Status SkipLayerNorm<T>::ComputeInternal(OpKernelContext* ctx) const {

  Tensor* output = ctx->Output(0, input->Shape());

-  const auto input_dims = input->Shape().GetDims();
+  const auto& input_dims = input->Shape().GetDims();
  if (input_dims.size() != 3) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "input is expected to have 3 dimensions, got ", input_dims.size());
@ -55,7 +55,7 @@ Status SkipLayerNorm<T>::ComputeInternal(OpKernelContext* ctx) const {
                           "skip is expected to have same shape as input");
  }

-  const auto gamma_dims = gamma->Shape().GetDims();
+  const auto& gamma_dims = gamma->Shape().GetDims();
  if (gamma_dims.size() != 1) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "gamma is expected to have 1 dimension, got ", gamma_dims.size());
@ -65,7 +65,7 @@ Status SkipLayerNorm<T>::ComputeInternal(OpKernelContext* ctx) const {
                           "Last dimension of gamma and input does not match");
  }

-  const auto beta_dims = beta->Shape().GetDims();
+  const auto& beta_dims = beta->Shape().GetDims();
  if (beta_dims.size() != 1) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "beta is expected to have 1 dimension, got ", beta_dims.size());
@ -76,7 +76,7 @@ Status SkipLayerNorm<T>::ComputeInternal(OpKernelContext* ctx) const {
  }

  if (nullptr != bias) {
-    const auto bias_dims = bias->Shape().GetDims();
+    const auto& bias_dims = bias->Shape().GetDims();
    if (bias_dims.size() != 1) {
      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                             "bias is expected to have 1 dimension, got ", bias_dims.size());
--- a/onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc
+++ b/onnxruntime/contrib_ops/cuda/quantization/attention_quantization.cc
@ -116,14 +116,13 @@ Status QAttention<T, int8_t>::ComputeInternal(OpKernelContext* context) const {
                                  i_zp_tensor,
                                  w_zp_tensor));

-  const auto dims = input->Shape().GetDims();
-  int batch_size = static_cast<int>(dims[0]);
-  int sequence_length = static_cast<int>(dims[1]);
-  int hidden_size = static_cast<int>(dims[2]);
+  const auto& shape = input->Shape();
+  int batch_size = static_cast<int>(shape[0]);
+  int sequence_length = static_cast<int>(shape[1]);
+  int hidden_size = static_cast<int>(shape[2]);
  int head_size = hidden_size / num_heads_;

-  TensorShape output_shape(dims);
-  Tensor* output = context->Output(0, output_shape);
+  Tensor* output = context->Output(0, shape);

  cublasHandle_t cublas = CublasHandle();
  const size_t element_size = sizeof(T);
--- a/onnxruntime/contrib_ops/cuda/tensor/crop.cc
+++ b/onnxruntime/contrib_ops/cuda/tensor/crop.cc
@ -28,7 +28,7 @@ Status Crop<T>::ComputeInternal(OpKernelContext* context) const {
  const Tensor* X = context->Input<Tensor>(0);
  ORT_RETURN_IF_ERROR(ValidateInput(X));

-  const auto dims = X->Shape().GetDims();
+  const auto& dims = X->Shape().GetDims();
  const int64_t N = dims[0];
  const int64_t C = dims[1];
  const int64_t H = dims[2];
--- a/onnxruntime/contrib_ops/cuda/tensor/image_scaler.cc
+++ b/onnxruntime/contrib_ops/cuda/tensor/image_scaler.cc
@ -36,7 +36,7 @@ ImageScaler<T>::ImageScaler(const OpKernelInfo& info) : CudaKernel(info) {
 template <typename T>
 Status ImageScaler<T>::ComputeInternal(OpKernelContext* context) const {
  const Tensor* X = context->Input<Tensor>(0);
-  const auto dims = X->Shape().GetDims();
+  const auto& dims = X->Shape().GetDims();

  if (dims.size() != 4) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
--- a/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc
+++ b/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc
@ -45,7 +45,7 @@ Status NonZero<T>::Compute(OpKernelContext* context) const {
  const auto X = context->Input<Tensor>(0);
  ORT_ENFORCE(X, "X input is required!");

-  const auto X_shape = X->Shape();
+  const auto& X_shape = X->Shape();
  assert(X_shape.Size() >= 0);

  const Eigen::Index coordinate_size = X_shape.IsScalar() ? 1 : X_shape.NumDimensions();