// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #if !defined(DISABLE_SPARSE_TENSORS) #include "core/framework/sparse_utils.h" #include "core/common/span_utils.h" #include "core/common/status.h" #include "core/framework/tensor.h" #include "core/framework/data_types_internal.h" #include "core/framework/data_transfer_manager.h" #include "core/framework/sparse_tensor.h" #include "core/util/math_cpuonly.h" namespace onnxruntime { namespace sparse_utils { // Copy element using CopyElementFunc = void (*)(void* dest, const void* src, int64_t dest_index, int64_t src_index); template inline void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) { reinterpret_cast(dst)[dst_index] = reinterpret_cast(src)[src_index]; } void CopyString(void* dst, const void* src, int64_t dst_index, int64_t src_index) { reinterpret_cast(dst)[dst_index] = reinterpret_cast(src)[src_index]; } template struct NotZero { bool operator()(T v) const { return v != T{0}; } }; template <> struct NotZero { bool operator()(const std::string& s) const { return !s.empty(); } }; #if !defined(ORT_MINIMAL_BUILD) template void ScanAndRecordCsr(gsl::span src_span, int64_t cols, std::vector& inner, std::vector& outer, ValueRecorder recorder) { int64_t row = 0; int64_t index = 0; outer.push_back(0); NotZero not_zero; for (const auto& v : src_span) { auto cur_row = index / cols; if (cur_row != row) { outer.push_back(static_cast(inner.size())); row = cur_row; } if (not_zero(v)) { auto cur_col = index - cur_row * cols; inner.push_back(cur_col); recorder(v); } ++index; } outer.push_back(static_cast(inner.size())); } Status DenseTensorToSparseCsr(const DataTransferManager& data_manager, const Tensor& src, const AllocatorPtr& cpu_allocator, const AllocatorPtr& dst_allocator, SparseTensor& dst) { const auto& src_dims = src.Shape().GetDims(); if (src_dims.size() != 2) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Currently do not support dims higher than 2 dimensions: ", src_dims.size()); } const bool is_string = src.IsDataTypeString(); if (is_string && dst_allocator->Info().device.Type() != OrtDevice::CPU) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unable to convert strings tensor to a sparse tensor that not on CPU"); } const IDataTransfer* data_transfer = data_manager.GetDataTransfer(cpu_allocator->Info().device, dst_allocator->Info().device); ORT_RETURN_IF_NOT(data_transfer != nullptr, "Unable to find a data transfer for copying from device type: ", cpu_allocator->Info().device.Type(), " to device type: ", dst_allocator->Info().device.Type()); const auto element_size = src.DataType()->Size(); gsl::span src_span; Tensor src_cpu; if (src.Location().device.Type() != OrtDevice::CPU) { Tensor t(src.DataType(), src.Shape(), cpu_allocator); ORT_RETURN_IF_ERROR(data_manager.CopyTensor(src, t)); src_cpu = std::move(t); src_span = gsl::make_span(reinterpret_cast(src_cpu.DataRaw()), src_cpu.SizeInBytes()); } else { src_span = gsl::make_span(reinterpret_cast(src.DataRaw()), src.SizeInBytes()); } const auto rows = src_dims[0]; const auto cols = src_dims[1]; std::vector inner_indices; inner_indices.reserve(static_cast(src.Shape().Size() / 2)); std::vector outer_indices; outer_indices.reserve(static_cast(rows) + 1); std::vector values_8; std::vector values_16; std::vector values_32; std::vector values_64; std::vector> values_str; Tensor nnz_tensor; if (is_string) { auto str_span = src.DataAsSpan(); ScanAndRecordCsr(str_span, cols, inner_indices, outer_indices, [&](const std::string& s) { values_str.push_back(std::cref(s)); }); } else { switch (element_size) { case sizeof(uint8_t): { ScanAndRecordCsr(src_span, cols, inner_indices, outer_indices, [&](uint8_t v) { values_8.push_back(v); }); Tensor t(src.DataType(), {static_cast(values_8.size())}, values_8.data(), cpu_allocator->Info()); nnz_tensor = std::move(t); } break; case sizeof(uint16_t): { // MFFloat16 and BFloat16 are handled fine auto span16 = ReinterpretAsSpan(src_span); ScanAndRecordCsr(span16, cols, inner_indices, outer_indices, [&](uint16_t v) { values_16.push_back(v); }); Tensor t(src.DataType(), {static_cast(values_16.size())}, values_16.data(), cpu_allocator->Info()); nnz_tensor = std::move(t); } break; case sizeof(uint32_t): { auto span32 = ReinterpretAsSpan(src_span); ScanAndRecordCsr(span32, cols, inner_indices, outer_indices, [&](uint32_t v) { values_32.push_back(v); }); Tensor t(src.DataType(), {static_cast(values_32.size())}, values_32.data(), cpu_allocator->Info()); nnz_tensor = std::move(t); } break; case sizeof(uint64_t): { auto span64 = ReinterpretAsSpan(src_span); ScanAndRecordCsr(span64, cols, inner_indices, outer_indices, [&](uint64_t v) { values_64.push_back(v); }); Tensor t(src.DataType(), {static_cast(values_64.size())}, values_64.data(), cpu_allocator->Info()); nnz_tensor = std::move(t); } break; default: return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported element size: ", element_size); } } const auto nnz = inner_indices.size(); const size_t outer_size = (nnz > 0) ? outer_indices.size() : 0U; SparseTensor dst_tensor(src.DataType(), src.Shape(), dst_allocator); auto mutator = dst_tensor.MakeCsrData(nnz, nnz, outer_size); if (nnz > 0) { if (is_string) { auto dst_span = mutator.Values().MutableDataAsSpan(); std::copy(values_str.cbegin(), values_str.cend(), dst_span.begin()); } else { ORT_RETURN_IF_ERROR(data_transfer->CopyTensor(nnz_tensor, mutator.Values())); } auto index_type = DataTypeImpl::GetType(); Tensor inner(index_type, {static_cast(nnz)}, inner_indices.data(), cpu_allocator->Info()); ORT_RETURN_IF_ERROR(data_transfer->CopyTensor(inner, mutator.Inner())); Tensor outer(index_type, {static_cast(outer_size)}, outer_indices.data(), cpu_allocator->Info()); ORT_RETURN_IF_ERROR(data_transfer->CopyTensor(outer, mutator.Outer())); } dst = std::move(dst_tensor); return Status::OK(); } Status SparseCsrToDenseTensor(const DataTransferManager& data_manager, const SparseTensor& src, const AllocatorPtr& cpu_allocator, const AllocatorPtr& dst_allocator, Tensor& dst) { const auto& src_dims = src.DenseShape().GetDims(); if (src_dims.size() != 2) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Support 2-D matrices only"); } if (!(src.Format() == SparseFormat::kCsrc)) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input must be of CSR format"); } const bool is_string = src.IsDataTypeString(); if (is_string && dst_allocator->Info().device.Type() != OrtDevice::CPU) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unable to convert strings tensor to a sparse tensor that is not on CPU"); } const AllocatorPtr& conversion_allocator = (dst_allocator->Info().device.Type() == OrtDevice::CPU) ? dst_allocator : cpu_allocator; Tensor cpu_result(src.DataType(), src.DenseShape(), conversion_allocator); if (!is_string) { memset(cpu_result.MutableDataRaw(), 0, cpu_result.SizeInBytes()); } if (src.NumValues() > 0) { const auto rows = src_dims[0]; const auto cols = src_dims[1]; { auto csr_view = src.AsCsr(); const auto inner_num = csr_view.Inner().Shape().Size(); const auto outer_num = csr_view.Outer().Shape().Size(); ORT_ENFORCE(inner_num == src.Values().Shape().Size(), "Expecting inner indices to be same as nnz. Got: ", inner_num); ORT_ENFORCE(outer_num == (rows + 1), "Outer indices must be M + 1. Got: ", outer_num); } CopyElementFunc copy_func; if (is_string) { copy_func = CopyString; } else { const auto element_size = src.DataType()->Size(); switch (element_size) { case sizeof(uint8_t): copy_func = CopyElement; break; case sizeof(uint16_t): copy_func = CopyElement; break; case sizeof(uint32_t): copy_func = CopyElement; break; case sizeof(uint64_t): copy_func = CopyElement; break; default: return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported element size: ", element_size); } } SparseTensor cpu_src; const void* values = nullptr; gsl::span inner_span; gsl::span outer_span; if (src.Location().device.Type() != OrtDevice::CPU) { SparseTensor t(src.DataType(), src.DenseShape(), cpu_allocator); ORT_RETURN_IF_ERROR(data_manager.CopySparseTensor(src, t)); cpu_src = std::move(t); values = cpu_src.Values().DataRaw(); inner_span = cpu_src.AsCsr().Inner().DataAsSpan(); outer_span = cpu_src.AsCsr().Outer().DataAsSpan(); } else { values = src.Values().DataRaw(); inner_span = src.AsCsr().Inner().DataAsSpan(); outer_span = src.AsCsr().Outer().DataAsSpan(); } void* output = cpu_result.MutableDataRaw(); size_t src_idx = 0; size_t inner_idx = 0; for (size_t out_i = 1; out_i < outer_span.size(); ++out_i) { auto row_size = outer_span[out_i] - outer_span[out_i - 1]; for (int64_t cnt = 0; cnt < row_size; ++cnt, ++inner_idx) { assert(inner_idx < inner_span.size()); auto col = inner_span[inner_idx]; auto dst_idx = (out_i - 1) * cols + col; copy_func(output, values, dst_idx, src_idx); } } } if (dst_allocator->Info().device.Type() != OrtDevice::CPU) { Tensor dest_tensor(src.DataType(), src.DenseShape(), dst_allocator); ORT_RETURN_IF_ERROR(data_manager.CopyTensor(cpu_result, dest_tensor)); dst = std::move(dest_tensor); } else { dst = std::move(cpu_result); } return Status::OK(); } Status SparseCooToDenseTensor(const DataTransferManager& data_manager, const SparseTensor& src, const AllocatorPtr& cpu_allocator, const AllocatorPtr& dst_allocator, Tensor& dst) { const auto& src_dims = src.DenseShape().GetDims(); if (src_dims.size() != 2) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Currently do not support dims higher than 2 dimensions: ", src_dims.size()); } if (!(src.Format() == SparseFormat::kCoo)) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Input must be of COO format"); } const bool is_string = src.IsDataTypeString(); if (is_string && dst_allocator->Info().device.Type() != OrtDevice::CPU) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unable to convert strings tensor to a sparse tensor that is not on CPU"); } const AllocatorPtr& conversion_allocator = (dst_allocator->Info().device.Type() == OrtDevice::CPU) ? dst_allocator : cpu_allocator; Tensor cpu_result(src.DataType(), src.DenseShape(), conversion_allocator); if (!is_string) { memset(cpu_result.MutableDataRaw(), 0, cpu_result.SizeInBytes()); } if (src.NumValues() > 0) { const void* values = nullptr; const int64_t* indices = nullptr; const auto num_values = src.Values().Shape().Size(); const auto num_indices = src.AsCoo().Indices().Shape().Size(); ORT_RETURN_IF_NOT((num_values == num_indices || 2 * num_values == num_indices), "Expecting indices to be equal the number of values or be twice as many"); SparseTensor src_cpu; if (src.Location().device.Type() != OrtDevice::CPU) { SparseTensor t(src.DataType(), src.DenseShape(), cpu_allocator); ORT_RETURN_IF_ERROR(data_manager.CopySparseTensor(src, t)); src_cpu = std::move(t); values = src_cpu.Values().DataRaw(); indices = src_cpu.AsCoo().Indices().Data(); } else { values = src.Values().DataRaw(); indices = src.AsCoo().Indices().Data(); } const auto element_size = src.DataType()->Size(); CopyElementFunc copy_func = nullptr; if (src.IsDataTypeString()) { copy_func = CopyString; } else { switch (element_size) { case sizeof(uint8_t): copy_func = CopyElement; break; case sizeof(uint16_t): { copy_func = CopyElement; } break; case sizeof(uint32_t): { copy_func = CopyElement; } break; case sizeof(uint64_t): { copy_func = CopyElement; } break; default: return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported element size: ", element_size); } } const auto dense_size = src.DenseShape().Size(); void* output = cpu_result.MutableDataRaw(); // Linear index if (num_indices == num_values) { for (int64_t src_idx = 0; src_idx < num_values; ++src_idx) { auto dst_idx = indices[src_idx]; ORT_RETURN_IF_NOT(dst_idx < dense_size, "Invalid index: ", dst_idx, " > dense_size: ", dense_size); copy_func(output, values, dst_idx, src_idx); } } else { const auto cols = src_dims[1]; for (int64_t src_idx = 0; src_idx < num_values; ++src_idx) { auto tuple_idx = src_idx * 2; auto dst_idx = indices[tuple_idx] * cols + indices[tuple_idx + 1]; ORT_RETURN_IF_NOT(dst_idx < dense_size, "Invalid index: ", dst_idx, " > dense_size: ", dense_size); copy_func(output, values, dst_idx, src_idx); } } } if (dst_allocator->Info().device.Type() != OrtDevice::CPU) { Tensor t(src.DataType(), src.DenseShape(), dst_allocator); ORT_RETURN_IF_ERROR(data_manager.CopyTensor(cpu_result, t)); dst = std::move(t); } else { dst = std::move(cpu_result); } return Status::OK(); } #endif // ORT_MINIMAL_BUILD template void ScanAndRecordCoo(gsl::span src_span, int64_t cols, bool linear, std::vector& indices, ValueRecorder recorder) { int64_t index = 0; NotZero not_zero; for (const auto& v : src_span) { if (not_zero(v)) { recorder(v); if (linear) { indices.push_back(index); } else { auto row = index / cols; auto col = index - row * cols; indices.push_back(row); indices.push_back(col); } } ++index; } } Status DenseTensorToSparseCoo(const DataTransferManager& data_manager, const Tensor& src, const AllocatorPtr& cpu_allocator, const AllocatorPtr& dst_allocator, bool linear_index, SparseTensor& dst) { const IDataTransfer* data_transfer = data_manager.GetDataTransfer(cpu_allocator->Info().device, dst_allocator->Info().device); ORT_RETURN_IF_NOT(data_transfer != nullptr, "Unable to find a data transfer for copying from device type: ", cpu_allocator->Info().device.Type(), " to device type: ", dst_allocator->Info().device.Type()); const auto& src_dims = src.Shape().GetDims(); if (src_dims.size() != 2) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Currently do not support dims higher than 2 dimensions: ", src_dims.size()); } const bool is_string = src.IsDataTypeString(); if (is_string && dst_allocator->Info().device.Type() != OrtDevice::CPU) { return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unable to convert strings tensor to a sparse tensor that is not on CPU"); } gsl::span src_span; Tensor src_cpu; if (src.Location().device.Type() != OrtDevice::CPU) { Tensor t(src.DataType(), src.Shape(), cpu_allocator); ORT_RETURN_IF_ERROR(data_manager.CopyTensor(src, t)); src_cpu = std::move(t); src_span = gsl::make_span(reinterpret_cast(src_cpu.DataRaw()), src_cpu.SizeInBytes()); } else { src_span = gsl::make_span(reinterpret_cast(src.DataRaw()), src.SizeInBytes()); } std::vector gathered_indices; gathered_indices.reserve(static_cast(src.Shape().Size() / 2)); const auto cols = src_dims[1]; std::vector values_8; std::vector values_16; std::vector values_32; std::vector values_64; std::vector> values_str; Tensor nnz_tensor; if (is_string) { auto str_span = src.DataAsSpan(); ScanAndRecordCoo(str_span, cols, linear_index, gathered_indices, [&](const std::string& s) { values_str.push_back(std::cref(s)); }); } else { const auto element_size = src.DataType()->Size(); switch (element_size) { case sizeof(uint8_t): { ScanAndRecordCoo(src_span, cols, linear_index, gathered_indices, [&](int8_t v) { values_8.push_back(v); }); Tensor t(src.DataType(), TensorShape{static_cast(values_8.size())}, values_8.data(), cpu_allocator->Info()); nnz_tensor = std::move(t); } break; case sizeof(uint16_t): { // MFFloat16 and BFloat16 are handled fine auto span16 = ReinterpretAsSpan(src_span); ScanAndRecordCoo(span16, cols, linear_index, gathered_indices, [&](int16_t v) { values_16.push_back(v); }); Tensor t(src.DataType(), TensorShape{static_cast(values_16.size())}, values_16.data(), cpu_allocator->Info()); nnz_tensor = std::move(t); } break; case sizeof(uint32_t): { auto span32 = ReinterpretAsSpan(src_span); ScanAndRecordCoo(span32, cols, linear_index, gathered_indices, [&](int32_t v) { values_32.push_back(v); }); Tensor t(src.DataType(), TensorShape{static_cast(values_32.size())}, values_32.data(), cpu_allocator->Info()); nnz_tensor = std::move(t); } break; case sizeof(uint64_t): { auto span64 = ReinterpretAsSpan(src_span); ScanAndRecordCoo(span64, cols, linear_index, gathered_indices, [&](int64_t v) { values_64.push_back(v); }); Tensor t(src.DataType(), TensorShape{static_cast(values_64.size())}, values_64.data(), cpu_allocator->Info()); nnz_tensor = std::move(t); } break; default: return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported element size: ", element_size); } } const auto nnz = (linear_index) ? gathered_indices.size() : gathered_indices.size() / 2; assert(static_cast(nnz) == nnz_tensor.Shape().Size() || nnz == values_str.size()); SparseTensor dst_result(src.DataType(), src.Shape(), dst_allocator); auto mutator = dst_result.MakeCooData(nnz, gathered_indices.size()); if (nnz > 0) { if (is_string) { auto dst_iter = mutator.Values().MutableData(); std::copy(values_str.cbegin(), values_str.cend(), dst_iter); } else { ORT_RETURN_IF_ERROR(data_transfer->CopyTensor(nnz_tensor, mutator.Values())); } Tensor indices_tensor(DataTypeImpl::GetType(), mutator.Indices().Shape(), gathered_indices.data(), cpu_allocator->Info()); ORT_RETURN_IF_ERROR(data_transfer->CopyTensor(indices_tensor, mutator.Indices())); } dst = std::move(dst_result); return Status::OK(); } } // namespace sparse_utils } // namespace onnxruntime #endif // !defined(DISABLE_SPARSE_TENSORS)