mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
parent
0bad5b1b5a
commit
9765ef8b4e
7 changed files with 37 additions and 33 deletions
|
|
@ -250,29 +250,31 @@ Status DispatchBiasSoftMaxForwardViaDnnLibraryImpl(
|
|||
const auto* B_data = reinterpret_cast<const CudaT*>(B->template Data<T>());
|
||||
auto* Y_data = reinterpret_cast<CudaT*>(Y->template MutableData<T>());
|
||||
|
||||
int X_num_dim = static_cast<int>(X_shape.NumDimensions());
|
||||
|
||||
// binary elementise kernel requires input pitches
|
||||
TArray<int64_t> lhs_padded_strides(static_cast<int>(X_shape.NumDimensions()));
|
||||
TArray<int64_t> lhs_padded_strides(X_num_dim);
|
||||
int64_t lhs_pitch = 1, rhs_pitch = 1;
|
||||
for (int i = -1; i >= -(int)X_shape.NumDimensions(); i--) {
|
||||
size_t positive_i = X_shape.NumDimensions() + i;
|
||||
lhs_padded_strides[static_cast<int>(positive_i)] = lhs_pitch;
|
||||
for (int i = -1; i >= -X_num_dim; i--) {
|
||||
int positive_i = X_num_dim + i;
|
||||
lhs_padded_strides[positive_i] = lhs_pitch;
|
||||
lhs_pitch *= X_shape[positive_i];
|
||||
}
|
||||
|
||||
// set pitches for bias so it broadcasts along relevant dimensions
|
||||
TArray<int64_t> rhs_padded_strides(static_cast<int>(X_shape.NumDimensions()));
|
||||
for (int i = -1; i >= -(int)X_shape.NumDimensions(); i--) {
|
||||
size_t positive_ix = X_shape.NumDimensions() + i;
|
||||
size_t positive_ib = B_shape.NumDimensions() + i;
|
||||
TArray<int64_t> rhs_padded_strides(X_num_dim);
|
||||
for (int i = -1; i >= -X_num_dim; i--) {
|
||||
int positive_ix = X_num_dim + i;
|
||||
int positive_ib = static_cast<int>(B_shape.NumDimensions()) + i;
|
||||
if (broadcast_axis <= positive_ix && positive_ix < softmax_axis) {
|
||||
rhs_padded_strides[static_cast<int>(positive_ix)] = 0;
|
||||
rhs_padded_strides[positive_ix] = 0;
|
||||
continue;
|
||||
}
|
||||
rhs_padded_strides[static_cast<int>(positive_ix)] = rhs_pitch;
|
||||
rhs_padded_strides[positive_ix] = rhs_pitch;
|
||||
rhs_pitch *= B_shape[positive_ib];
|
||||
}
|
||||
|
||||
TArray<fast_divmod> fdm_output_strides(static_cast<int>(X_shape.NumDimensions()));
|
||||
TArray<fast_divmod> fdm_output_strides(X_num_dim);
|
||||
//TODO: fast_divmod only supports int32
|
||||
for (int i = 0; i < fdm_output_strides.Size(); i++)
|
||||
fdm_output_strides[i] = fast_divmod(static_cast<int>(lhs_padded_strides[i]));
|
||||
|
|
@ -281,7 +283,7 @@ Status DispatchBiasSoftMaxForwardViaDnnLibraryImpl(
|
|||
// invoke elementwise add with broadcast kernel
|
||||
::onnxruntime::cuda::BinaryElementWiseImpl(
|
||||
stream,
|
||||
(int32_t)X_shape.NumDimensions(),
|
||||
(int32_t)X_num_dim,
|
||||
&lhs_padded_strides,
|
||||
X_data,
|
||||
&rhs_padded_strides,
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ void SetFFTState(FFTState* state,
|
|||
cudaDataType exec_type) {
|
||||
memset(state, 0, sizeof(FFTState));
|
||||
state->signal_ndim = signal_ndim;
|
||||
for (int32_t i = 0; i < signal_dims.size(); ++i) {
|
||||
for (int64_t i = 0; i < static_cast<int64_t>(signal_dims.size()); ++i) {
|
||||
state->signal_dims[i] = signal_dims[i];
|
||||
}
|
||||
state->itype = itype;
|
||||
|
|
@ -82,12 +82,12 @@ Status FFTBase<T>::DoFFT(OpKernelContext* context, const Tensor* X, bool complex
|
|||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "cuFFT does not support tensor type: ", X->DataType());
|
||||
}
|
||||
|
||||
//calculate batch size
|
||||
// calculate batch size
|
||||
int64_t batch_ndim = input_ndim - signal_tensor_ndim;
|
||||
int64_t batch_size = (batch_ndim == 0 ? 1 : input_shape.SizeToDimension(batch_ndim));
|
||||
|
||||
//infer output shape
|
||||
//copy the input shape up to the second last dimention
|
||||
// infer output shape
|
||||
// copy the input shape up to the second last dimention
|
||||
std::vector<int64_t> output_dims, signal_dims;
|
||||
int i = 0;
|
||||
for (; i < batch_ndim + signal_ndim_ - 1; ++i) {
|
||||
|
|
@ -97,9 +97,9 @@ Status FFTBase<T>::DoFFT(OpKernelContext* context, const Tensor* X, bool complex
|
|||
}
|
||||
}
|
||||
|
||||
//process the last dim(s)
|
||||
// process the last dim(s)
|
||||
if (onesided_) {
|
||||
if (complex_input && !complex_output) { //IRFFT
|
||||
if (complex_input && !complex_output) { // IRFFT
|
||||
int64_t inferred_size = input_shape[i] * 2 - 1;
|
||||
output_dims.push_back(inferred_size);
|
||||
signal_dims.push_back(inferred_size);
|
||||
|
|
|
|||
|
|
@ -253,19 +253,21 @@ void DispatchBiasSoftMaxForwardViaDnnLibraryImpl(
|
|||
const auto* B_data = reinterpret_cast<const HipT*>(B->template Data<T>());
|
||||
auto* Y_data = reinterpret_cast<HipT*>(Y->template MutableData<T>());
|
||||
|
||||
int X_num_dim = static_cast<int>(X_shape.NumDimensions());
|
||||
|
||||
// binary elementise kernel requires input pitches
|
||||
TArray<int64_t> lhs_padded_strides(X_shape.NumDimensions());
|
||||
for (int i = -1, lhs_pitch = 1; i >= -(int)X_shape.NumDimensions(); i--) {
|
||||
size_t positive_i = X_shape.NumDimensions() + i;
|
||||
TArray<int64_t> lhs_padded_strides(X_num_dim);
|
||||
for (int i = -1, lhs_pitch = 1; i >= -X_num_dim; i--) {
|
||||
int positive_i = X_num_dim + i;
|
||||
lhs_padded_strides[positive_i] = lhs_pitch;
|
||||
lhs_pitch *= X_shape[positive_i];
|
||||
}
|
||||
|
||||
// set pitches for bias so it broadcasts along relevant dimensions
|
||||
TArray<int64_t> rhs_padded_strides(X_shape.NumDimensions());
|
||||
for (int i = -1, rhs_pitch = 1; i >= -(int)X_shape.NumDimensions(); i--) {
|
||||
size_t positive_ix = X_shape.NumDimensions() + i;
|
||||
size_t positive_ib = B_shape.NumDimensions() + i;
|
||||
TArray<int64_t> rhs_padded_strides(X_num_dim);
|
||||
for (int i = -1, rhs_pitch = 1; i >= -X_num_dim; i--) {
|
||||
int positive_ix = X_num_dim + i;
|
||||
int positive_ib = static_cast<int>(B_shape.NumDimensions()) + i;
|
||||
if (broadcast_axis <= positive_ix && positive_ix < softmax_axis) {
|
||||
rhs_padded_strides[positive_ix] = 0;
|
||||
continue;
|
||||
|
|
@ -274,7 +276,7 @@ void DispatchBiasSoftMaxForwardViaDnnLibraryImpl(
|
|||
rhs_pitch *= B_shape[positive_ib];
|
||||
}
|
||||
|
||||
TArray<fast_divmod> fdm_output_strides(X_shape.NumDimensions());
|
||||
TArray<fast_divmod> fdm_output_strides(X_num_dim);
|
||||
for (int i = 0; i < fdm_output_strides.Size(); i++)
|
||||
fdm_output_strides[i] = fast_divmod(lhs_padded_strides[i]);
|
||||
fast_divmod fdm_H, fdm_C;
|
||||
|
|
@ -282,7 +284,7 @@ void DispatchBiasSoftMaxForwardViaDnnLibraryImpl(
|
|||
// invoke elementwise add with broadcast kernel
|
||||
::onnxruntime::rocm::BinaryElementWiseImpl(
|
||||
stream,
|
||||
(int32_t)X_shape.NumDimensions(),
|
||||
(int32_t)X_num_dim,
|
||||
&lhs_padded_strides,
|
||||
X_data,
|
||||
&rhs_padded_strides,
|
||||
|
|
|
|||
|
|
@ -119,8 +119,8 @@ std::unique_ptr<Tensor> Diagonal(const Tensor& input, int64_t dim_1, int64_t dim
|
|||
TensorPitches input_strides(input.Shape().GetDims());
|
||||
cuda::TArray<int64_t> gpu_input_strides(input_strides);
|
||||
|
||||
auto output_rank = output_dims.size();
|
||||
cuda::TArray<cuda::fast_divmod> gpu_output_strides(static_cast<int32_t>(output_rank));
|
||||
auto output_rank = static_cast<int32_t>(output_dims.size());
|
||||
cuda::TArray<cuda::fast_divmod> gpu_output_strides(output_rank);
|
||||
TensorPitches output_strides(output_dims);
|
||||
for (auto i = 0; i < output_rank; i++) {
|
||||
gpu_output_strides[i] = cuda::fast_divmod(static_cast<int>(output_strides[i]));
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ Status Pad<T>::ComputeInternal(OpKernelContext* ctx) const {
|
|||
TArray<int64_t> input_strides(input_pitches);
|
||||
|
||||
auto output_dims(input_shape.AsShapeVector());
|
||||
ORT_ENFORCE(dimension_count * 2 == p_pads->size(), "'pads' attribute has wrong number of values");
|
||||
ORT_ENFORCE(static_cast<size_t>(dimension_count * 2) == p_pads->size(), "'pads' attribute has wrong number of values");
|
||||
|
||||
// Calculate output dimensions, and handle any negative padding
|
||||
TArray<int64_t> lower_pads(dimension_count);
|
||||
|
|
|
|||
|
|
@ -46,11 +46,11 @@ Status Upsample<T>::BaseCompute(OpKernelContext* context,
|
|||
auto X_dims = X->Shape().GetDims();
|
||||
int32_t rank = static_cast<int32_t>(X_dims.size());
|
||||
|
||||
ORT_ENFORCE(output_dims.size() == rank, "Rank of input and output tensor should be same.");
|
||||
ORT_ENFORCE(static_cast<int32_t>(output_dims.size()) == rank, "Rank of input and output tensor should be same.");
|
||||
if (rank == 0)
|
||||
return Status(ONNXRUNTIME, INVALID_ARGUMENT,
|
||||
is_resize_ ? "Resize: input tensor cannot be scalar." : "Upsample: input tensor cannot be scalar.");
|
||||
if (rank != scales.size())
|
||||
if (rank != static_cast<int32_t>(scales.size()))
|
||||
return Status(ONNXRUNTIME, INVALID_ARGUMENT,
|
||||
is_resize_ ? "Resize: input tensor's dimension does not match the scales." : "Upsample: input tensor's dimension does not match the scales.");
|
||||
if (roi.size() != 2 * X->Shape().GetDims().size())
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ Status PassThrough::Compute(OpKernelContext* context) const {
|
|||
const auto* X = context->Input<Tensor>(i);
|
||||
ORT_ENFORCE(X != nullptr);
|
||||
Tensor* Y = context->Output(i, X->Shape());
|
||||
ORT_ENFORCE(X->DataRaw() == Y->DataRaw(), "PassThrough input and outpu are not sharing the same buffer.");
|
||||
ORT_ENFORCE(X->DataRaw() == Y->DataRaw(), "PassThrough input and output are not sharing the same buffer.");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue