diff --git a/include/onnxruntime/core/framework/tensor_shape.h b/include/onnxruntime/core/framework/tensor_shape.h index acf39638fe..c280f61eb1 100644 --- a/include/onnxruntime/core/framework/tensor_shape.h +++ b/include/onnxruntime/core/framework/tensor_shape.h @@ -34,13 +34,14 @@ class TensorShape : private std::vector { TensorShape(TensorShape&& /*other*/) = default; TensorShape& operator=(TensorShape&& /*other*/) = default; + TensorShape(const std::vector& dims) : std::vector(dims) {} + + TensorShape(std::vector&& dims) : std::vector(std::move(dims)) {} + + TensorShape(const std::initializer_list& dims) : std::vector(dims) {} + TensorShape(const int64_t* dimension_sizes, size_t dimension_count); - TensorShape(const std::vector& dims); - TensorShape(std::vector&& dims); - - TensorShape(const std::initializer_list& dims); - TensorShape(const std::vector& dims, size_t start, size_t end); /** diff --git a/onnxruntime/core/framework/execution_frame.cc b/onnxruntime/core/framework/execution_frame.cc index c44bb3e049..09f2f1ad55 100644 --- a/onnxruntime/core/framework/execution_frame.cc +++ b/onnxruntime/core/framework/execution_frame.cc @@ -402,54 +402,54 @@ Status ExecutionFrame::AllocateAsPerAllocationPlan(OrtValue& ort_value, int ort_ const auto& alloc_info = per_alloc_plan.location; const auto* ml_type = per_alloc_plan.value_type; - if (ml_type == nullptr) + if (ml_type == nullptr) { return Status( ONNXRUNTIME, INVALID_ARGUMENT, "Tried to allocate without valid type information, ort_value index=" + std::to_string(ort_value_index)); + } - if (ml_type->IsSparseTensorType()) { + if (ml_type->IsTensorType()) { + ORT_ENFORCE(shape, "Allocation of tensor types requires a shape."); + + // tensors + const auto* ml_data_type = static_cast(ml_type)->GetElementType(); + + AllocKind alloc_kind = per_alloc_plan.alloc_kind; + switch (alloc_kind) { + // Right now for kAllocate and kAllocateOutput we are using same approach. + // In the future we may want to have different way to handle it. + case AllocKind::kAllocateOutput: + case AllocKind::kAllocate: { + ORT_RETURN_IF_ERROR(AllocateMLValueTensorSelfOwnBuffer(ort_value, ort_value_index, ml_data_type, alloc_info, + *shape, per_alloc_plan.create_fence_if_async)); + break; + } + case AllocKind::kReuse: { + int reuse_mlvalue_index = per_alloc_plan.reused_buffer; + ORT_RETURN_IF_ERROR(AllocateMLValueTensorPreAllocateBuffer( + ort_value, reuse_mlvalue_index, ml_data_type, alloc_info, *shape, per_alloc_plan.create_fence_if_async)); + break; + } + case AllocKind::kShare: { + int reuse_mlvalue_index = per_alloc_plan.reused_buffer; + // copy at the OrtValue level so the shared_ptr for the data is shared between the two OrtValue instances + ort_value = GetMutableMLValue(reuse_mlvalue_index); + break; + } + default: { + std::ostringstream ostr; + ostr << "Invalid allocation kind: " << static_cast::type>(alloc_kind); + return Status(ONNXRUNTIME, FAIL, ostr.str()); + } + } + + return Status::OK(); + } else if (ml_type->IsSparseTensorType()) { return AllocateSparseTensor(ort_value, *ml_type, GetAllocator(alloc_info), *shape, nnz, per_alloc_plan.create_fence_if_async, session_state_); - } - if (!ml_type->IsTensorType()) { + } else { return AllocateTraditionalMLValue(ort_value, *static_cast(ml_type)); } - - ORT_ENFORCE(shape, "Allocation of tensor types requires a shape."); - - // tensors - const auto* ml_data_type = static_cast(ml_type)->GetElementType(); - - AllocKind alloc_kind = per_alloc_plan.alloc_kind; - switch (alloc_kind) { - // Right now for kAllocate and kAllocateOutput we are using same approach. - // In the future we may want to have different way to handle it. - case AllocKind::kAllocateOutput: - case AllocKind::kAllocate: { - ORT_RETURN_IF_ERROR(AllocateMLValueTensorSelfOwnBuffer(ort_value, ort_value_index, ml_data_type, alloc_info, - *shape, per_alloc_plan.create_fence_if_async)); - break; - } - case AllocKind::kReuse: { - int reuse_mlvalue_index = per_alloc_plan.reused_buffer; - ORT_RETURN_IF_ERROR(AllocateMLValueTensorPreAllocateBuffer( - ort_value, reuse_mlvalue_index, ml_data_type, alloc_info, *shape, per_alloc_plan.create_fence_if_async)); - break; - } - case AllocKind::kShare: { - int reuse_mlvalue_index = per_alloc_plan.reused_buffer; - // copy at the OrtValue level so the shared_ptr for the data is shared between the two OrtValue instances - ort_value = GetMutableMLValue(reuse_mlvalue_index); - break; - } - default: { - std::ostringstream ostr; - ostr << "Invalid allocation kind: " << static_cast::type>(alloc_kind); - return Status(ONNXRUNTIME, FAIL, ostr.str()); - } - } - - return Status::OK(); } AllocatorPtr ExecutionFrame::GetAllocatorImpl(const OrtAllocatorInfo& info) const { diff --git a/onnxruntime/core/framework/tensor_shape.cc b/onnxruntime/core/framework/tensor_shape.cc index b37c2e9499..72acfd7921 100644 --- a/onnxruntime/core/framework/tensor_shape.cc +++ b/onnxruntime/core/framework/tensor_shape.cc @@ -8,16 +8,8 @@ namespace onnxruntime { -TensorShape::TensorShape(const std::vector& dims) : std::vector(dims) { -} - -TensorShape::TensorShape(std::vector&& dims) : std::vector(std::move(dims)) { -} - -TensorShape::TensorShape(const std::initializer_list& dims) : std::vector(dims) { -} - -TensorShape::TensorShape(const int64_t* dimension_sizes, size_t dimension_count) : std::vector(dimension_count) { +TensorShape::TensorShape(const int64_t* dimension_sizes, size_t dimension_count) + : std::vector(dimension_count) { for (size_t i = 0; i < dimension_count; ++i) { (*this)[i] = dimension_sizes[i]; } diff --git a/onnxruntime/core/providers/cpu/tensor/concat.cc b/onnxruntime/core/providers/cpu/tensor/concat.cc index afca4d421e..0a26ea2a0d 100644 --- a/onnxruntime/core/providers/cpu/tensor/concat.cc +++ b/onnxruntime/core/providers/cpu/tensor/concat.cc @@ -34,16 +34,17 @@ Status ConcatBase::PrepareForCompute(OpKernelContext* ctx, int input_count, Prep auto& inputs_n = *tensor_pointer; const auto& inputs_n_dims = inputs_n.Shape().GetDims(); const size_t inputs_n_rank = inputs_n_dims.size(); - ORT_ENFORCE(inputs_n_rank == inputs_0_rank, "Ranks of input data are different, cannot concatenate them, " - "expected rank: ", std::to_string(inputs_0_rank), " got: ", std::to_string(inputs_n_rank)); + ORT_ENFORCE(inputs_n_rank == inputs_0_rank, + "Ranks of input data are different, cannot concatenate them. expected rank: ", + inputs_0_rank, " got: ", inputs_n_rank); // Ensure all the other (non-concat) axes match for (size_t axis_index = 0; axis_index < inputs_0_rank; ++axis_index) { num_elements *= inputs_n_dims[axis_index]; if (axis_index == p.axis) continue; ORT_RETURN_IF_NOT(inputs_n_dims[axis_index] == inputs_0_dims[axis_index], - "Non concat axis dimensions must match: Axis ", - axis_index, " has mismatched dimensions of ", inputs_n_dims[axis_index], + "Non concat axis dimensions must match: Axis ", + axis_index, " has mismatched dimensions of ", inputs_n_dims[axis_index], " and ", inputs_0_dims[axis_index]); } tensor_num_elements[index] = num_elements; @@ -58,7 +59,7 @@ Status ConcatBase::PrepareForCompute(OpKernelContext* ctx, int input_count, Prep // Calculate the shape of the output tensor std::vector dims(inputs_0_rank); - size_t num_elements = 1; // cache size of the first input along the way + size_t num_elements = 1; // cache size of the first input along the way for (size_t dimension_index = 0; dimension_index < inputs_0_rank; dimension_index++) { dims[dimension_index] = inputs_0_dims[dimension_index]; num_elements *= inputs_0_dims[dimension_index]; @@ -66,7 +67,7 @@ Status ConcatBase::PrepareForCompute(OpKernelContext* ctx, int input_count, Prep tensor_num_elements[0] = num_elements; dims[p.axis] = concat_axis_size; TensorShape output_shape(dims); - + auto& concat_result = *ctx->Output(0, output_shape); p.output_tensor = &concat_result; p.output_num_elements = output_shape.Size(); @@ -75,7 +76,7 @@ Status ConcatBase::PrepareForCompute(OpKernelContext* ctx, int input_count, Prep // there is no need to proceed further if (p.output_num_elements == 0) return Status::OK(); - + // The output_axis_pitch is the number of elements to add to move to the next split axis in the output p.output_axis_pitch = 1; for (size_t i = inputs_0_rank; i-- > p.axis;) p.output_axis_pitch *= dims[i]; @@ -110,7 +111,7 @@ Status Concat::Compute(OpKernelContext* ctx) const { auto is_string_type = ctx->Input(0)->DataType() == DataTypeImpl::GetType(); - int64_t output_offset = 0; + int64_t initial_output_offset = 0; // initial offset for each input auto element_bytes = p.output_tensor->DataType()->Size(); for (int input_index = 0; input_index < input_count; input_index++) { const auto& prep = p.inputs[input_index]; @@ -124,19 +125,29 @@ Status Concat::Compute(OpKernelContext* ctx) const { // Copy the data across. For every 'input_axis_pitch' values copied, we move over by the 'output_axis_pitch' uint8_t* output = static_cast(p.output_tensor->MutableDataRaw()); - for (size_t idxCopy = 0; idxCopy < input_size / input_axis_pitch; ++idxCopy) { + int64_t cur_out_offset = 0; + int64_t cur_in_offset = 0; + for (size_t idx_copy = 0, end = input_size / input_axis_pitch; idx_copy < end; ++idx_copy) { if (is_string_type) { - for (int idxItem = 0; idxItem < input_axis_pitch; ++idxItem) - reinterpret_cast(output)[output_offset + idxCopy * p.output_axis_pitch + idxItem] = - reinterpret_cast(input)[idxCopy * input_axis_pitch + idxItem]; - } else + size_t out = initial_output_offset + cur_out_offset; + for (int idx_item = 0; idx_item < input_axis_pitch; ++idx_item) { + reinterpret_cast(output)[out + idx_item] = + reinterpret_cast(input)[cur_in_offset + idx_item]; + } + } else { memcpy( - output + (output_offset + idxCopy * p.output_axis_pitch) * element_bytes, - input + idxCopy * input_axis_pitch * element_bytes, + output + (initial_output_offset + cur_out_offset) * element_bytes, + input + cur_in_offset * element_bytes, input_axis_pitch * element_bytes); + } + + cur_out_offset += p.output_axis_pitch; + cur_in_offset += input_axis_pitch; } - output_offset += input_axis_pitch; + + initial_output_offset += input_axis_pitch; } + return Status::OK(); } diff --git a/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc b/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc index 7c725bba0f..3d3a7fed69 100644 --- a/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc +++ b/onnxruntime/core/providers/cpu/tensor/nonzero_op.cc @@ -40,24 +40,6 @@ NONZERO_TYPED_KERNEL(float) #undef NONZERO_TYPED_KERNEL_WITH_TYPE_NAME #undef NONZERO_TYPED_KERNEL -namespace { -void IncrementCoordinate(const TensorShape& shape, std::vector* coordinate) { - assert(coordinate->size() == shape.NumDimensions()); - - size_t i = 0; - const size_t i_end = coordinate->size(); - for (; i < i_end; ++i) { - const size_t i_from_back = i_end - i - 1; - if ((*coordinate)[i_from_back] != shape[i_from_back] - 1) break; - (*coordinate)[i_from_back] = 0; - } - - if (i < i_end) { - ++(*coordinate)[i_end - i - 1]; - } -} -} // namespace - template Status NonZero::Compute(OpKernelContext* context) const { const auto X = context->Input(0); @@ -71,19 +53,37 @@ Status NonZero::Compute(OpKernelContext* context) const { // reserve enough space for indices for every element of X non_zero_indices_buffer.reserve(X_shape.Size() * coordinate_size); + const T* data = X->Data(); + if (X_shape.IsScalar()) { - const T& value = *(X->Data()); + const T& value = *data; if (value != T{}) { non_zero_indices_buffer.push_back(0); } } else { std::vector coordinate(coordinate_size, 0); - for (const T& value : X->DataAsSpan()) { + + // as we iterate the entries, increment the coordinate for the current entry + // e.g. if shape is {2,2}, we start with 0,0 increment to 0,1 increment to 1,0 and finally 1,1 + auto increment_coordinate = [&coordinate, &coordinate_size, &X_shape]() { + for (int64_t idx = coordinate_size - 1; idx >= 0; --idx) { + int64_t& cur_coord = coordinate[idx]; + if (cur_coord != X_shape[idx] - 1) { + ++cur_coord; + break; + } + cur_coord = 0; + } + }; + + for (size_t i = 0, end = X_shape.Size(); i < end; ++i) { + const T& value = *data++; if (value != T{}) { non_zero_indices_buffer.insert(non_zero_indices_buffer.end(), coordinate.begin(), coordinate.end()); } - IncrementCoordinate(X_shape, &coordinate); + + increment_coordinate(); } } diff --git a/onnxruntime/test/providers/cpu/tensor/nonzero_op_test.cc b/onnxruntime/test/providers/cpu/tensor/nonzero_op_test.cc index 75afeca8a7..cccab727af 100644 --- a/onnxruntime/test/providers/cpu/tensor/nonzero_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/nonzero_op_test.cc @@ -48,6 +48,25 @@ TEST(NonZeroOpTest, BasicBool) { test.Run(); } +TEST(NonZeroOpTest, ThreeDims) { + OpTester test{kOpName, kOpVersion}; + + std::vector X_dims{2, 2, 2}; + std::vector X{0, 1, + 1, 0, + + 1, 0, + 1, 0}; + test.AddInput("X", X_dims, std::vector{X.begin(), X.end()}); + test.AddOutput( + "Y", {3, 4}, + {0, 0, 1, 1, + 0, 1, 0, 1, + 1, 0, 0, 0}); + + test.Run(); +} + TEST(NonZeroOpTest, Scalar) { { OpTester test{kOpName, kOpVersion};