diff --git a/onnxruntime/contrib_ops/cuda/math/bias_softmax_impl.cu b/onnxruntime/contrib_ops/cuda/math/bias_softmax_impl.cu index 5a0dcb3805..05ba62a193 100644 --- a/onnxruntime/contrib_ops/cuda/math/bias_softmax_impl.cu +++ b/onnxruntime/contrib_ops/cuda/math/bias_softmax_impl.cu @@ -250,29 +250,31 @@ Status DispatchBiasSoftMaxForwardViaDnnLibraryImpl( const auto* B_data = reinterpret_cast(B->template Data()); auto* Y_data = reinterpret_cast(Y->template MutableData()); + int X_num_dim = static_cast(X_shape.NumDimensions()); + // binary elementise kernel requires input pitches - TArray lhs_padded_strides(static_cast(X_shape.NumDimensions())); + TArray lhs_padded_strides(X_num_dim); int64_t lhs_pitch = 1, rhs_pitch = 1; - for (int i = -1; i >= -(int)X_shape.NumDimensions(); i--) { - size_t positive_i = X_shape.NumDimensions() + i; - lhs_padded_strides[static_cast(positive_i)] = lhs_pitch; + for (int i = -1; i >= -X_num_dim; i--) { + int positive_i = X_num_dim + i; + lhs_padded_strides[positive_i] = lhs_pitch; lhs_pitch *= X_shape[positive_i]; } // set pitches for bias so it broadcasts along relevant dimensions - TArray rhs_padded_strides(static_cast(X_shape.NumDimensions())); - for (int i = -1; i >= -(int)X_shape.NumDimensions(); i--) { - size_t positive_ix = X_shape.NumDimensions() + i; - size_t positive_ib = B_shape.NumDimensions() + i; + TArray rhs_padded_strides(X_num_dim); + for (int i = -1; i >= -X_num_dim; i--) { + int positive_ix = X_num_dim + i; + int positive_ib = static_cast(B_shape.NumDimensions()) + i; if (broadcast_axis <= positive_ix && positive_ix < softmax_axis) { - rhs_padded_strides[static_cast(positive_ix)] = 0; + rhs_padded_strides[positive_ix] = 0; continue; } - rhs_padded_strides[static_cast(positive_ix)] = rhs_pitch; + rhs_padded_strides[positive_ix] = rhs_pitch; rhs_pitch *= B_shape[positive_ib]; } - TArray fdm_output_strides(static_cast(X_shape.NumDimensions())); + TArray fdm_output_strides(X_num_dim); //TODO: fast_divmod only supports int32 for (int i = 0; i < fdm_output_strides.Size(); i++) fdm_output_strides[i] = fast_divmod(static_cast(lhs_padded_strides[i])); @@ -281,7 +283,7 @@ Status DispatchBiasSoftMaxForwardViaDnnLibraryImpl( // invoke elementwise add with broadcast kernel ::onnxruntime::cuda::BinaryElementWiseImpl( stream, - (int32_t)X_shape.NumDimensions(), + (int32_t)X_num_dim, &lhs_padded_strides, X_data, &rhs_padded_strides, diff --git a/onnxruntime/contrib_ops/cuda/math/fft_ops.cc b/onnxruntime/contrib_ops/cuda/math/fft_ops.cc index d808c00bd5..31dfa99b0d 100644 --- a/onnxruntime/contrib_ops/cuda/math/fft_ops.cc +++ b/onnxruntime/contrib_ops/cuda/math/fft_ops.cc @@ -19,7 +19,7 @@ void SetFFTState(FFTState* state, cudaDataType exec_type) { memset(state, 0, sizeof(FFTState)); state->signal_ndim = signal_ndim; - for (int32_t i = 0; i < signal_dims.size(); ++i) { + for (int64_t i = 0; i < static_cast(signal_dims.size()); ++i) { state->signal_dims[i] = signal_dims[i]; } state->itype = itype; @@ -82,12 +82,12 @@ Status FFTBase::DoFFT(OpKernelContext* context, const Tensor* X, bool complex return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "cuFFT does not support tensor type: ", X->DataType()); } - //calculate batch size + // calculate batch size int64_t batch_ndim = input_ndim - signal_tensor_ndim; int64_t batch_size = (batch_ndim == 0 ? 1 : input_shape.SizeToDimension(batch_ndim)); - //infer output shape - //copy the input shape up to the second last dimention + // infer output shape + // copy the input shape up to the second last dimention std::vector output_dims, signal_dims; int i = 0; for (; i < batch_ndim + signal_ndim_ - 1; ++i) { @@ -97,9 +97,9 @@ Status FFTBase::DoFFT(OpKernelContext* context, const Tensor* X, bool complex } } - //process the last dim(s) + // process the last dim(s) if (onesided_) { - if (complex_input && !complex_output) { //IRFFT + if (complex_input && !complex_output) { // IRFFT int64_t inferred_size = input_shape[i] * 2 - 1; output_dims.push_back(inferred_size); signal_dims.push_back(inferred_size); diff --git a/onnxruntime/contrib_ops/rocm/math/bias_softmax_impl.cu b/onnxruntime/contrib_ops/rocm/math/bias_softmax_impl.cu index 1974b3961d..133afebc8b 100644 --- a/onnxruntime/contrib_ops/rocm/math/bias_softmax_impl.cu +++ b/onnxruntime/contrib_ops/rocm/math/bias_softmax_impl.cu @@ -253,19 +253,21 @@ void DispatchBiasSoftMaxForwardViaDnnLibraryImpl( const auto* B_data = reinterpret_cast(B->template Data()); auto* Y_data = reinterpret_cast(Y->template MutableData()); + int X_num_dim = static_cast(X_shape.NumDimensions()); + // binary elementise kernel requires input pitches - TArray lhs_padded_strides(X_shape.NumDimensions()); - for (int i = -1, lhs_pitch = 1; i >= -(int)X_shape.NumDimensions(); i--) { - size_t positive_i = X_shape.NumDimensions() + i; + TArray lhs_padded_strides(X_num_dim); + for (int i = -1, lhs_pitch = 1; i >= -X_num_dim; i--) { + int positive_i = X_num_dim + i; lhs_padded_strides[positive_i] = lhs_pitch; lhs_pitch *= X_shape[positive_i]; } // set pitches for bias so it broadcasts along relevant dimensions - TArray rhs_padded_strides(X_shape.NumDimensions()); - for (int i = -1, rhs_pitch = 1; i >= -(int)X_shape.NumDimensions(); i--) { - size_t positive_ix = X_shape.NumDimensions() + i; - size_t positive_ib = B_shape.NumDimensions() + i; + TArray rhs_padded_strides(X_num_dim); + for (int i = -1, rhs_pitch = 1; i >= -X_num_dim; i--) { + int positive_ix = X_num_dim + i; + int positive_ib = static_cast(B_shape.NumDimensions()) + i; if (broadcast_axis <= positive_ix && positive_ix < softmax_axis) { rhs_padded_strides[positive_ix] = 0; continue; @@ -274,7 +276,7 @@ void DispatchBiasSoftMaxForwardViaDnnLibraryImpl( rhs_pitch *= B_shape[positive_ib]; } - TArray fdm_output_strides(X_shape.NumDimensions()); + TArray fdm_output_strides(X_num_dim); for (int i = 0; i < fdm_output_strides.Size(); i++) fdm_output_strides[i] = fast_divmod(lhs_padded_strides[i]); fast_divmod fdm_H, fdm_C; @@ -282,7 +284,7 @@ void DispatchBiasSoftMaxForwardViaDnnLibraryImpl( // invoke elementwise add with broadcast kernel ::onnxruntime::rocm::BinaryElementWiseImpl( stream, - (int32_t)X_shape.NumDimensions(), + (int32_t)X_num_dim, &lhs_padded_strides, X_data, &rhs_padded_strides, diff --git a/onnxruntime/core/providers/cuda/math/einsum_utils/einsum_auxiliary_ops.cc b/onnxruntime/core/providers/cuda/math/einsum_utils/einsum_auxiliary_ops.cc index 22e7cbfe7c..033217df99 100644 --- a/onnxruntime/core/providers/cuda/math/einsum_utils/einsum_auxiliary_ops.cc +++ b/onnxruntime/core/providers/cuda/math/einsum_utils/einsum_auxiliary_ops.cc @@ -119,8 +119,8 @@ std::unique_ptr Diagonal(const Tensor& input, int64_t dim_1, int64_t dim TensorPitches input_strides(input.Shape().GetDims()); cuda::TArray gpu_input_strides(input_strides); - auto output_rank = output_dims.size(); - cuda::TArray gpu_output_strides(static_cast(output_rank)); + auto output_rank = static_cast(output_dims.size()); + cuda::TArray gpu_output_strides(output_rank); TensorPitches output_strides(output_dims); for (auto i = 0; i < output_rank; i++) { gpu_output_strides[i] = cuda::fast_divmod(static_cast(output_strides[i])); diff --git a/onnxruntime/core/providers/cuda/tensor/pad.cc b/onnxruntime/core/providers/cuda/tensor/pad.cc index e869d55a47..9472ca2c09 100644 --- a/onnxruntime/core/providers/cuda/tensor/pad.cc +++ b/onnxruntime/core/providers/cuda/tensor/pad.cc @@ -135,7 +135,7 @@ Status Pad::ComputeInternal(OpKernelContext* ctx) const { TArray input_strides(input_pitches); auto output_dims(input_shape.AsShapeVector()); - ORT_ENFORCE(dimension_count * 2 == p_pads->size(), "'pads' attribute has wrong number of values"); + ORT_ENFORCE(static_cast(dimension_count * 2) == p_pads->size(), "'pads' attribute has wrong number of values"); // Calculate output dimensions, and handle any negative padding TArray lower_pads(dimension_count); diff --git a/onnxruntime/core/providers/cuda/tensor/upsample.cc b/onnxruntime/core/providers/cuda/tensor/upsample.cc index 82a1251f48..b943d0fd57 100644 --- a/onnxruntime/core/providers/cuda/tensor/upsample.cc +++ b/onnxruntime/core/providers/cuda/tensor/upsample.cc @@ -46,11 +46,11 @@ Status Upsample::BaseCompute(OpKernelContext* context, auto X_dims = X->Shape().GetDims(); int32_t rank = static_cast(X_dims.size()); - ORT_ENFORCE(output_dims.size() == rank, "Rank of input and output tensor should be same."); + ORT_ENFORCE(static_cast(output_dims.size()) == rank, "Rank of input and output tensor should be same."); if (rank == 0) return Status(ONNXRUNTIME, INVALID_ARGUMENT, is_resize_ ? "Resize: input tensor cannot be scalar." : "Upsample: input tensor cannot be scalar."); - if (rank != scales.size()) + if (rank != static_cast(scales.size())) return Status(ONNXRUNTIME, INVALID_ARGUMENT, is_resize_ ? "Resize: input tensor's dimension does not match the scales." : "Upsample: input tensor's dimension does not match the scales."); if (roi.size() != 2 * X->Shape().GetDims().size()) diff --git a/orttraining/orttraining/training_ops/cpu/controlflow/group.cc b/orttraining/orttraining/training_ops/cpu/controlflow/group.cc index 93616d87b9..a14716583f 100644 --- a/orttraining/orttraining/training_ops/cpu/controlflow/group.cc +++ b/orttraining/orttraining/training_ops/cpu/controlflow/group.cc @@ -29,7 +29,7 @@ Status PassThrough::Compute(OpKernelContext* context) const { const auto* X = context->Input(i); ORT_ENFORCE(X != nullptr); Tensor* Y = context->Output(i, X->Shape()); - ORT_ENFORCE(X->DataRaw() == Y->DataRaw(), "PassThrough input and outpu are not sharing the same buffer."); + ORT_ENFORCE(X->DataRaw() == Y->DataRaw(), "PassThrough input and output are not sharing the same buffer."); } return Status::OK(); }