diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc index 6071bfd022..1619402cab 100644 --- a/onnxruntime/core/framework/tensorprotoutils.cc +++ b/onnxruntime/core/framework/tensorprotoutils.cc @@ -842,37 +842,93 @@ common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& n } #if !defined(DISABLE_SPARSE_TENSORS) -template static Status CopySparseData(size_t n_sparse_elements, const ONNX_NAMESPACE::TensorProto& indices, + const Path& model_path, gsl::span dims, std::function copier) { Status status = Status::OK(); TensorShape indices_shape(indices.dims().data(), indices.dims().size()); + const auto elements = gsl::narrow(indices_shape.Size()); - ORT_RETURN_IF_NOT(indices.data_type() == ONNX_NAMESPACE ::TensorProto_DataType_INT64, "Indicies expected to be INT64"); - + std::vector indices_values; // used for conversion of smaller size indices + std::vector unpack_buffer; gsl::span indices_data; - const auto elements = static_cast(indices_shape.Size()); - if (indices.int64_data_size() > 0) { - indices_data = gsl::make_span(indices.int64_data().data(), elements); - } else if (indices.has_raw_data()) { - ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int64_t)), - "Sparse Indicies raw data size does not match expected."); - indices_data = gsl::make_span(reinterpret_cast(indices.raw_data().data()), elements); - } else { - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, "Invalid SparseTensor indices. Should either have raw or int64 data"); + const bool has_raw_data = indices.has_raw_data(); + switch (indices.data_type()) { + case ONNX_NAMESPACE::TensorProto_DataType_INT64: + if (has_raw_data) { + ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int64_t)), + "Sparse Indices raw data size does not match expected."); + ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer)); + indices_data = gsl::make_span(unpack_buffer).as_span(); + } else { + ORT_RETURN_IF_NOT(indices.int64_data_size() == static_cast(elements), "Sparse indices int64 data size does not match expected"); + indices_data = gsl::make_span(indices.int64_data().data(), elements); + } + break; + case ONNX_NAMESPACE::TensorProto_DataType_INT32: { + if (has_raw_data) { + ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int32_t)), + "Sparse Indices raw data size does not match expected."); + ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer)); + auto int32_span = gsl::make_span(unpack_buffer).as_span(); + indices_values.insert(indices_values.cend(), int32_span.cbegin(), int32_span.cend()); + unpack_buffer.clear(); + unpack_buffer.shrink_to_fit(); + } else { + ORT_RETURN_IF_NOT(indices.int32_data_size() == static_cast(elements), "Sparse indices int32 data size does not match expected"); + indices_values.insert(indices_values.cend(), indices.int32_data().cbegin(), indices.int32_data().cend()); + } + indices_data = gsl::make_span(indices_values); + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_INT16: { + if (has_raw_data) { + ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int16_t)), + "Sparse Indices raw data size does not match expected."); + ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer)); + auto int16_span = gsl::make_span(unpack_buffer).as_span(); + indices_values.insert(indices_values.cend(), int16_span.cbegin(), int16_span.cend()); + indices_data = gsl::make_span(indices_values); + unpack_buffer.clear(); + unpack_buffer.shrink_to_fit(); + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, + "Invalid SparseTensor indices. INT16 indices must be in the raw data of indices tensor"); + } + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_INT8: { + if (has_raw_data) { + ORT_RETURN_IF_NOT(indices.raw_data().size() == elements, + "Sparse Indices raw data size does not match expected."); + ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer)); + auto int8_span = gsl::make_span(unpack_buffer).as_span(); + indices_values.insert(indices_values.cend(), int8_span.cbegin(), int8_span.cend()); + indices_data = gsl::make_span(indices_values); + unpack_buffer.clear(); + unpack_buffer.shrink_to_fit(); + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, + "Invalid SparseTensor indices. INT8 indices must be in the raw data of indices tensor"); + } + break; + } + default: + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, + "Invalid SparseTensor indices. Should one of the following types: int8, int16, int32 or int64"); } if (indices_shape.NumDimensions() == 1) { // flattened indexes for (size_t i = 0; i < n_sparse_elements; ++i) { - copier(i, static_cast(indices_data[i])); + copier(i, gsl::narrow(indices_data[i])); } } else if (indices_shape.NumDimensions() == 2) { // entries in format {NNZ, rank} - size_t rank = static_cast(indices_shape[1]); - ORT_ENFORCE(rank == dims.size() && rank > 0); + ORT_ENFORCE(indices_shape[1] > 0 && static_cast(indices_shape[1]) == dims.size()); + auto rank = static_cast(indices_shape[1]); const int64_t* cur_index = indices_data.data(); std::vector multipliers; multipliers.resize(rank); @@ -880,20 +936,20 @@ static Status CopySparseData(size_t n_sparse_elements, // calculate sum of inner dimension elements for each dimension. // e.g. if shape {2,3,4}, the result should be {3*4, 4, 1} multipliers[rank - 1] = 1; - for (int32_t r = static_cast(rank) - 2; r >= 0; --r) { - multipliers[r] = static_cast(dims[r + 1]) * multipliers[r + 1]; + for (auto r = rank - 1; r > 0; --r) { + multipliers[r - 1] = SafeInt(dims[r]) * multipliers[r]; } // calculate the offset for the entry // e.g. if shape was {2,3,4} and entry was (1, 0, 2) the offset is 14 // as there are 2 rows, each with 12 entries per row for (size_t i = 0; i < n_sparse_elements; ++i) { - size_t idx = 0; + SafeInt idx = 0; for (size_t j = 0; j < rank; ++j) { - idx += static_cast(cur_index[j]) * multipliers[j]; + idx += SafeInt(cur_index[j]) * multipliers[j]; } - copier(i, idx); + copier(i, static_cast(idx)); cur_index += rank; } @@ -905,29 +961,7 @@ static Status CopySparseData(size_t n_sparse_elements, return status; } -#endif // !defined(DISABLE_SPARSE_TENSORS) -namespace conversion_internal { -#if !defined(DISABLE_SPARSE_TENSORS) -struct UnsupportedSparseDataType { - void operator()(int32_t dt_type, Status& status) const { - status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ", dt_type); - } -}; -#endif -template -struct GetElementSize { - Status operator()(size_t& element_size) const { - element_size = sizeof(T); - return Status::OK(); - } -}; - -using SupportedConversionTypeList = onnxruntime::TypeList; -} // namespace conversion_internal - -#if !defined(DISABLE_SPARSE_TENSORS) common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse, const Path& model_path, ONNX_NAMESPACE::TensorProto& dense) { @@ -953,74 +987,70 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT auto dims = gsl::make_span(dense.dims().data(), dense.dims().size()); if (type != TensorProto_DataType_STRING) { + auto ml_data = DataTypeImpl::TensorTypeFromONNXEnum(type)->GetElementType(); + size_t element_size = ml_data->Size(); + // need to read in sparse data first as it could be in a type specific field, in raw data, or in external data std::vector sparse_data_storage; ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage)); void* sparse_data = sparse_data_storage.data(); - size_t element_size = 0; - // We want to this list to match the one used below in DenseTensorToSparseTensorProto() - MLTypeCallDispatcherFromTypeList type_disp(type); - ORT_RETURN_IF_ERROR( - (type_disp.InvokeRetWithUnsupportedPolicy(element_size))); // by putting the data into a std::string we can avoid a copy as set_raw_data can do a std::move - // into the TensorProto. however to actually write to the buffer we have created in the std::string we need - // this somewhat dirty hack to get a mutable pointer. we could alternatively use &dense_data_storage.front() - // but using const_cast makes it more obvious we're doing something ugly. - // C++17 add non-const data() where we could remove const_cast + // into the TensorProto. std::string dense_data_storage(n_dense_elements * element_size, 0); if (n_sparse_elements > 0) { - void* dense_data = const_cast(dense_data_storage.data()); + void* dense_data = dense_data_storage.data(); switch (element_size) { case 1: { - auto dense_data_span = gsl::make_span(static_cast(dense_data), n_dense_elements); - status = CopySparseData( + status = CopySparseData( n_sparse_elements, - indices, dims, - [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) { - dense_data_span[to_idx] = static_cast(sparse_data)[from_idx]; + indices, model_path, dims, + [sparse_data, dense_data](size_t from_idx, size_t to_idx) { + static_cast(dense_data)[to_idx] = static_cast(sparse_data)[from_idx]; }); break; } case 2: { - auto dense_data_span = gsl::make_span(static_cast(dense_data), n_dense_elements); - status = CopySparseData( + status = CopySparseData( n_sparse_elements, - indices, dims, - [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) { - dense_data_span[to_idx] = static_cast(sparse_data)[from_idx]; + indices, model_path, dims, + [sparse_data, dense_data](size_t from_idx, size_t to_idx) { + const auto* src = static_cast(sparse_data) + from_idx; + auto* dst = static_cast(dense_data) + to_idx; + memcpy(dst, src, sizeof(uint16_t)); }); break; } case 4: { - auto dense_data_span = gsl::make_span(static_cast(dense_data), n_dense_elements); - status = CopySparseData( + status = CopySparseData( n_sparse_elements, - indices, dims, - [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) { - dense_data_span[to_idx] = static_cast(sparse_data)[from_idx]; + indices, model_path, dims, + [sparse_data, dense_data](size_t from_idx, size_t to_idx) { + const auto* src = static_cast(sparse_data) + from_idx; + auto* dst = static_cast(dense_data) + to_idx; + memcpy(dst, src, sizeof(uint32_t)); }); break; } case 8: { - auto dense_data_span = gsl::make_span(static_cast(dense_data), n_dense_elements); - status = CopySparseData( + status = CopySparseData( n_sparse_elements, - indices, dims, - [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) { - dense_data_span[to_idx] = static_cast(sparse_data)[from_idx]; + indices, model_path, dims, + [sparse_data, dense_data](size_t from_idx, size_t to_idx) { + const auto* src = static_cast(sparse_data) + from_idx; + auto* dst = static_cast(dense_data) + to_idx; + memcpy(dst, src, sizeof(uint64_t)); }); break; } default: return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, - " BUG! Report to onnxruntime team. element_size of: ", - element_size, " is not supported.", " type: ", type); + "Element_size of: ", element_size, " is not supported.", " type: ", type); } ORT_RETURN_IF_ERROR(status); @@ -1029,7 +1059,8 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT } else { // No request for std::string - conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status); + status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ", + ONNX_NAMESPACE::TensorProto_DataType_STRING); } return status; } @@ -1040,36 +1071,6 @@ using IsZeroFunc = bool (*)(const void*); // Copy element using CopyElementFunc = void (*)(void* dest, const void* src, int64_t dest_index, int64_t src_index); -static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements, size_t element_size, - IsZeroFunc is_zero, CopyElementFunc copy, - TensorProto& values, TensorProto& indices) { - auto advance = [element_size](const void* start, size_t elements) -> const void* { - return (reinterpret_cast(start) + elements * element_size); - }; - - const auto* cbegin = dense_raw_data; - const auto* const cend = advance(cbegin, n_dense_elements); - auto& indices_data = *indices.mutable_int64_data(); - int64_t index = 0; - while (cbegin != cend) { - if (!is_zero(cbegin)) { - indices_data.Add(index); - } - ++index; - cbegin = advance(cbegin, 1U); - } - - auto& raw_data = *values.mutable_raw_data(); - raw_data.resize(indices.int64_data_size() * element_size); - void* data_dest = const_cast(raw_data.data()); - - int64_t dest_index = 0; - for (auto src_index : indices.int64_data()) { - copy(data_dest, dense_raw_data, dest_index, src_index); - ++dest_index; - } -} - // Here we are not using tolerance for FP types since these dense tensors were // created from sparse initializers where zeros were absolute template @@ -1079,7 +1080,85 @@ inline bool IsZero(const void* p) { template inline void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) { - reinterpret_cast(dst)[dst_index] = reinterpret_cast(src)[src_index]; + const auto* src_p = reinterpret_cast(src) + src_index; + auto* dst_p = reinterpret_cast(dst) + dst_index; + memcpy(dst_p, src_p, sizeof(T)); +} + +template <> +inline void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) { + reinterpret_cast(dst)[dst_index] = reinterpret_cast(src)[src_index]; +} + + +template +static void SetIndices(gsl::span gathered_indices, + std::string& raw_indices, + TensorProto& indices) { + raw_indices.resize(gathered_indices.size() * sizeof(T)); + auto* ind_dest = reinterpret_cast(raw_indices.data()); + size_t dest_index = 0; + for (auto src_index : gathered_indices) { + ORT_IF_CONSTEXPR(sizeof(T) == sizeof(int8_t)) { + ind_dest[dest_index] = static_cast(src_index); + } else { + auto* dst = ind_dest + dest_index; + T v = static_cast(src_index); + memcpy(dst, &v, sizeof(T)); + } + ++dest_index; + } + indices.set_data_type(utils::ToTensorProtoElementType()); +} + +static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements, size_t element_size, + IsZeroFunc is_zero, CopyElementFunc copy, + TensorProto& values, TensorProto& indices, + size_t& nnz) { + auto advance = [element_size](const void* start, size_t elements) -> const void* { + return (reinterpret_cast(start) + elements * element_size); + }; + + const auto* cbegin = dense_raw_data; + const auto* const cend = advance(cbegin, n_dense_elements); + std::vector gathered_indices; + int64_t index = 0; + while (cbegin != cend) { + if (!is_zero(cbegin)) { + gathered_indices.push_back(index); + } + ++index; + cbegin = advance(cbegin, 1U); + } + + if (!gathered_indices.empty()) { + auto& raw_data = *values.mutable_raw_data(); + raw_data.resize(gathered_indices.size() * element_size); + void* data_dest = raw_data.data(); + + int64_t dest_index = 0; + for (auto src_index : gathered_indices) { + copy(data_dest, dense_raw_data, dest_index, src_index); + ++dest_index; + } + + auto gathered_span = gsl::make_span(gathered_indices); + auto& raw_indices = *indices.mutable_raw_data(); + const auto max_index = gathered_indices.back(); + if (max_index <= std::numeric_limits::max()) { + SetIndices(gathered_span, raw_indices, indices); + } else if (max_index <= std::numeric_limits::max()) { + SetIndices(gathered_span, raw_indices, indices); + } else if (max_index <= std::numeric_limits::max()) { + SetIndices(gathered_span, raw_indices, indices); + } else { + SetIndices(gathered_span, raw_indices, indices); + } + } else { + indices.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT8); + indices.set_raw_data(std::string()); + } + nnz = gathered_indices.size(); } common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& dense_proto, @@ -1087,11 +1166,9 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& ONNX_NAMESPACE::SparseTensorProto& result) { ORT_ENFORCE(HasDataType(dense_proto), "Must have a valid data type"); - const bool is_string_data = dense_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING; - if (is_string_data) { - Status status{}; - conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status); - return status; + if (dense_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING) { + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ", + ONNX_NAMESPACE::TensorProto_DataType_STRING); } const auto data_type = dense_proto.data_type(); @@ -1101,51 +1178,47 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& values.set_data_type(data_type); auto& indices = *sparse_proto.mutable_indices(); - indices.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); SafeInt n_dense_elements = 1; for (auto dim : dense_proto.dims()) { n_dense_elements *= dim; } + auto ml_data = DataTypeImpl::TensorTypeFromONNXEnum(data_type)->GetElementType(); + size_t element_size = ml_data->Size(); + std::vector dense_raw_data; ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data)); - size_t element_size = 0; - // We want this type list to match the one above in SparseTensorProtoToDenseTensorProto - MLTypeCallDispatcherFromTypeList type_disp(data_type); - ORT_RETURN_IF_ERROR( - (type_disp.InvokeRetWithUnsupportedPolicy(element_size))); + size_t nnz = 0; void* dense_data = dense_raw_data.data(); switch (element_size) { case 1: { SparsifyGeneric(dense_data, n_dense_elements, element_size, - IsZero, CopyElement, values, indices); + IsZero, CopyElement, values, indices, nnz); break; } case 2: { SparsifyGeneric(dense_data, n_dense_elements, element_size, - IsZero, CopyElement, values, indices); + IsZero, CopyElement, values, indices, nnz); break; } case 4: { SparsifyGeneric(dense_data, n_dense_elements, element_size, - IsZero, CopyElement, values, indices); + IsZero, CopyElement, values, indices, nnz); break; } case 8: { SparsifyGeneric(dense_data, n_dense_elements, element_size, - IsZero, CopyElement, values, indices); + IsZero, CopyElement, values, indices, nnz); break; } default: return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, - " BUG! Report to onnxruntime team. element_size of: ", - element_size, " is not supported.", " data_type: ", data_type); + "Element_size of: ", element_size, " is not supported.", " data_type: ", data_type); } // Fix up shapes - const auto nnz = indices.int64_data_size(); values.add_dims(nnz); indices.add_dims(nnz); diff --git a/onnxruntime/test/framework/sparse_kernels_test.cc b/onnxruntime/test/framework/sparse_kernels_test.cc index 83aa35101f..4e4afd979f 100644 --- a/onnxruntime/test/framework/sparse_kernels_test.cc +++ b/onnxruntime/test/framework/sparse_kernels_test.cc @@ -669,8 +669,51 @@ static void CreateTensorWithExternalData( tensor_proto.set_data_type(type); } +namespace { + +void insert_indices_data(bool indices_1D, + size_t values_size, size_t shape_size, + std::vector& indices_data, + TensorProto& indices_tp) { + if (indices_1D) { + indices_data = {2, 5, 6, 10}; + indices_tp.add_dims(indices_data.size()); + } else { + // indices are shape {NNZ, rank} so convert flattened values of 2, 5, 6 and 10 to rank 3 values + indices_tp.add_dims(values_size); + indices_tp.add_dims(shape_size); + indices_data = { + 0, 1, 0, + 0, 2, 1, + 1, 0, 0, + 1, 2, 0}; + } +} + template -static NodeProto CreateConstantNode(bool indices_1D, +struct InsertIndices { + void operator()(bool indices_1D, size_t values_size, size_t shape_size, TensorProto& indices_tp) const { + static_assert(std::is_integral_v, "indices data must be integral data type"); + static_assert(std::is_signed_v, "indices must be signed data type"); + std::vector indices_data; + insert_indices_data(indices_1D, values_size, shape_size, indices_data, indices_tp); + indices_tp.set_data_type(utils::ToTensorProtoElementType()); + ORT_IF_CONSTEXPR (sizeof(T) == sizeof(int8_t)) { + indices_tp.mutable_raw_data()->assign(reinterpret_cast(indices_data.data()), indices_data.size()); + } else { + // Conversion on the fly to the target data type + std::vector indices(indices_data.cbegin(), indices_data.cend()); + indices_tp.mutable_raw_data()->assign(reinterpret_cast(indices.data()), indices.size() * sizeof(T)); + } + } +}; + +using SupportedIndicesTypeList = onnxruntime::TypeList; + +} // namespace + +template +static NodeProto CreateConstantNode(bool indices_1D, int32_t indices_type, std::function& values, TensorProto& tp)> inserter, std::vector& expected_data) { NodeProto constant_node; @@ -678,7 +721,6 @@ static NodeProto CreateConstantNode(bool indices_1D, constant_node.add_output("dense_tensor_output"); std::vector values = CreateValues(); - std::vector indices; std::vector shape{2, 3, 2}; AttributeProto& attrib = *constant_node.mutable_attribute()->Add(); @@ -686,26 +728,11 @@ static NodeProto CreateConstantNode(bool indices_1D, attrib.set_type(AttributeProto_AttributeType_SPARSE_TENSOR); SparseTensorProto& stp = *attrib.mutable_sparse_tensor(); - TensorProto& indices_tp = *stp.mutable_indices(); - stp.mutable_dims()->Add(shape.cbegin(), shape.cend()); - if (indices_1D) { - indices = {2, 5, 6, 10}; - indices_tp.add_dims(indices.size()); - } else { - // indices are shape {NNZ, rank} so convert flattened values of 2, 5, 6 and 10 to rank 3 values - indices_tp.add_dims(values.size()); - indices_tp.add_dims(shape.size()); - indices = { - 0, 1, 0, - 0, 2, 1, - 1, 0, 0, - 1, 2, 0}; - } - - indices_tp.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); - indices_tp.mutable_int64_data()->Add(indices.cbegin(), indices.cend()); + TensorProto& indices_tp = *stp.mutable_indices(); + utils::MLTypeCallDispatcherFromTypeList type_disp(indices_type); + type_disp.Invoke(indices_1D, values.size(), shape.size(), indices_tp); expected_data.resize(2 * 3 * 2); expected_data[2] = values[0]; @@ -733,10 +760,9 @@ static NodeProto CreateConstantNodeAllZeros(bool indices_1D, std::vector& exp attrib.set_type(AttributeProto_AttributeType_SPARSE_TENSOR); SparseTensorProto& stp = *attrib.mutable_sparse_tensor(); - TensorProto& indices_tp = *stp.mutable_indices(); - stp.mutable_dims()->Add(shape.cbegin(), shape.cend()); + TensorProto& indices_tp = *stp.mutable_indices(); if (indices_1D) { indices_tp.add_dims(0); } else { @@ -759,11 +785,11 @@ static NodeProto CreateConstantNodeAllZeros(bool indices_1D, std::vector& exp } template -static void TestConversion(bool use_1D_indices, +static void TestConversion(bool use_1D_indices, int32_t indices_type, std::function& values, TensorProto& tp)> inserter, std::function expected, const TensorProto& actual)> checker) { std::vector expected; - auto node = CreateConstantNode(use_1D_indices, inserter, expected); + auto node = CreateConstantNode(use_1D_indices, indices_type, inserter, expected); TensorProto dense; // Path is required for loading external data (if any) @@ -793,8 +819,17 @@ template static void TestConversion( std::function& values, TensorProto& tp)> inserter, std::function expected, const TensorProto& actual)> checker) { - TestConversion(true, inserter, checker); - TestConversion(false, inserter, checker); + std::vector indices_types{ + TensorProto_DataType_INT8, + TensorProto_DataType_INT16, + TensorProto_DataType_INT32, + TensorProto_DataType_INT64 + }; + + for (auto dt : indices_types) { + TestConversion(true, dt, inserter, checker); + TestConversion(false, dt, inserter, checker); + } TestConversionAllZeros(true, checker); TestConversionAllZeros(false, checker); } @@ -820,7 +855,7 @@ static void RawDataChecker(gsl::span expected, const TensorProto& actua const T* raw_data = reinterpret_cast(actual.raw_data().data()); auto actual_span = gsl::make_span(raw_data, actual_size); - EXPECT_THAT(actual_span, testing::ContainerEq(expected)); + ASSERT_THAT(actual_span, testing::ContainerEq(expected)); } template <> @@ -831,7 +866,7 @@ void RawDataChecker(gsl::span expected_bfloat, const const uint16_t* raw_data = reinterpret_cast(actual.raw_data().data()); auto actual_span = gsl::make_span(raw_data, actual_size); - EXPECT_THAT(actual_span, testing::ContainerEq(expected)); + ASSERT_THAT(actual_span, testing::ContainerEq(expected)); } template <> @@ -842,7 +877,7 @@ void RawDataChecker(gsl::span expected_bfloat, const T const uint16_t* raw_data = reinterpret_cast(actual.raw_data().data()); auto actual_span = gsl::make_span(raw_data, actual_size); - EXPECT_THAT(actual_span, testing::ContainerEq(expected)); + ASSERT_THAT(actual_span, testing::ContainerEq(expected)); } TEST(SparseTensorConversionTests, TestConstantNodeConversion) { @@ -938,6 +973,7 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) { PathString tensor_filename(ORT_TSTR("tensor_XXXXXX")); TestConversion( true, + TensorProto_DataType_INT64, [&tensor_filename](const std::vector& values, TensorProto& tp) { CreateTensorWithExternalData(TensorProto_DataType_FLOAT, values, tensor_filename, tp); }, @@ -950,8 +986,11 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) { #if !defined(ORT_MINIMAL_BUILD) template -static std::vector CreateSparseValues() { - return {0, 2, 3, 0}; +static std::vector CreateSparseValues(size_t indices_start) { + std::vector result(indices_start + 2); + result[indices_start] = 2; + result[indices_start + 1] = 3; + return result; } /* std::string support in the future @@ -962,13 +1001,19 @@ std::vector CreateSparseValues() { */ template <> -std::vector CreateSparseValues() { - return {BFloat16(0.f), BFloat16(2.f), BFloat16(3.f), BFloat16(0.f)}; +std::vector CreateSparseValues(size_t indices_start) { + std::vector result(indices_start + 2); + result[indices_start] = BFloat16(2.f); + result[indices_start + 1] = BFloat16(3.f); + return result; } template <> -std::vector CreateSparseValues() { - return {MLFloat16(0.f), MLFloat16(2.f), MLFloat16(3.f), MLFloat16(0.f)}; +std::vector CreateSparseValues(size_t indices_start) { + std::vector result(indices_start + 2); + result[indices_start] = MLFloat16(2.f); + result[indices_start + 1] = MLFloat16(3.f); + return result; } template @@ -987,11 +1032,13 @@ std::vector CreateSparseValuesAllZeros() { } template -TensorProto CreateDenseTensor(std::function& values, TensorProto& tp)> inserter, +TensorProto CreateDenseTensor(size_t indices_start, + std::function& values, TensorProto& tp)> inserter, std::vector& expected_values, std::vector& expected_indicies) { TensorProto result; - std::vector values = CreateSparseValues(); - expected_indicies = {1, 2}; + std::vector values = CreateSparseValues(indices_start); + auto ind_start = static_cast(indices_start); + expected_indicies = {ind_start, ind_start + 1}; for (const auto& ind : expected_indicies) { expected_values.push_back(values[ind]); } @@ -1026,12 +1073,9 @@ static void RawSparseDataChecker(gsl::span expected_values, const T* raw_data = reinterpret_cast(actual.values().raw_data().data()); auto actual_span = gsl::make_span(raw_data, actual_size); - EXPECT_THAT(actual_span, testing::ContainerEq(expected_values)); + ASSERT_THAT(actual_span, testing::ContainerEq(expected_values)); - // Check indicies - EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64); - auto actual_indicies = gsl::make_span(actual.indices().int64_data().data(), actual.indices().int64_data_size()); - EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies)); + SparseIndicesChecker(actual.indices(), expected_indicies); } template <> @@ -1045,11 +1089,8 @@ void RawSparseDataChecker(gsl::span expected_bfloat, const uint16_t* raw_data = reinterpret_cast(actual.values().raw_data().data()); auto actual_span = gsl::make_span(raw_data, actual_size); - EXPECT_THAT(actual_span, testing::ContainerEq(expected)); - // Check indicies - EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64); - auto actual_indicies = gsl::make_span(actual.indices().int64_data().data(), actual.indices().int64_data_size()); - EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies)); + ASSERT_THAT(actual_span, testing::ContainerEq(expected)); + SparseIndicesChecker(actual.indices(), expected_indicies); } template <> @@ -1063,15 +1104,12 @@ void RawSparseDataChecker(gsl::span expected_bfloat, const uint16_t* raw_data = reinterpret_cast(actual.values().raw_data().data()); auto actual_span = gsl::make_span(raw_data, actual_size); - EXPECT_THAT(actual_span, testing::ContainerEq(expected)); - // Check indicies - EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64); - auto actual_indicies = gsl::make_span(actual.indices().int64_data().data(), actual.indices().int64_data_size()); - EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies)); + ASSERT_THAT(actual_span, testing::ContainerEq(expected)); + SparseIndicesChecker(actual.indices(), expected_indicies); } template -static void TestDenseToSparseConversionValues( +static void TestDenseToSparseConversionValues(size_t indices_start, std::function& values, TensorProto& tp)> inserter, std::function expected, gsl::span expected_indicies, @@ -1082,7 +1120,7 @@ static void TestDenseToSparseConversionValues( // Path is required for loading external data // Using empty path here since the data is not external Path model_path; - TensorProto dense_tensor = CreateDenseTensor(inserter, expected_values, expected_indicies); + TensorProto dense_tensor = CreateDenseTensor(indices_start, inserter, expected_values, expected_indicies); SparseTensorProto sparse_tensor; utils::DenseTensorToSparseTensorProto(dense_tensor, model_path, sparse_tensor); @@ -1117,17 +1155,21 @@ static void TestDenseAllZerosToSparseConversion( } template -static void TestDenseToSparseConversion(std::function& values, TensorProto& tp)> inserter, +static void TestDenseToSparseConversion(size_t indices_start, + std::function& values, TensorProto& tp)> inserter, std::function expected, gsl::span expected_indicies, const SparseTensorProto& actual)> checker) { - TestDenseToSparseConversionValues(inserter, checker); + TestDenseToSparseConversionValues(indices_start, inserter, checker); TestDenseAllZerosToSparseConversion(inserter, checker); } TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { + // This one will test indices that are less than max int8 value + // which should result in int8 indices TestDenseToSparseConversion( + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_data_type(TensorProto_DataType_FLOAT); tp.set_name("dense_float"); @@ -1135,7 +1177,10 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { }, RawSparseDataChecker); + // This one will test indices that are max(int8) < ind < max(int16) value + // which should result in int16 indices TestDenseToSparseConversion( + static_cast(std::numeric_limits::max()) + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_data_type(TensorProto_DataType_DOUBLE); tp.set_name("dense_double"); @@ -1143,7 +1188,10 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { }, RawSparseDataChecker); + // This one will test indices that are max(int16) < ind < max(int32) value + // which should result in int32 indices TestDenseToSparseConversion( + static_cast(std::numeric_limits::max()) + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_data_type(TensorProto_DataType_BFLOAT16); tp.set_name("dense_bfloat16"); @@ -1153,7 +1201,11 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { }, RawSparseDataChecker); + // Protobuf can not hold anything more than 2Gb and it overflows. Can't test 64-bit indices + // on conversion unless explicitly created. + // which should result in int32 indices TestDenseToSparseConversion( + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_data_type(TensorProto_DataType_FLOAT16); tp.set_name("dense_float16"); @@ -1164,6 +1216,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { RawSparseDataChecker); TestDenseToSparseConversion( + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_name("dense_int16"); tp.set_data_type(TensorProto_DataType_INT16); @@ -1172,6 +1225,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { RawSparseDataChecker); TestDenseToSparseConversion( + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_name("dense_uint16"); tp.set_data_type(TensorProto_DataType_UINT16); @@ -1180,6 +1234,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { RawSparseDataChecker); TestDenseToSparseConversion( + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_name("dense_int32"); tp.set_data_type(TensorProto_DataType_INT32); @@ -1188,6 +1243,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { RawSparseDataChecker); TestDenseToSparseConversion( + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_name("dense_uint32"); tp.set_data_type(TensorProto_DataType_UINT32); @@ -1196,6 +1252,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { RawSparseDataChecker); TestDenseToSparseConversion( + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_name("dense_int64"); tp.set_data_type(TensorProto_DataType_INT64); @@ -1204,6 +1261,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { RawSparseDataChecker); TestDenseToSparseConversion( + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_name("dense_uint64"); tp.set_data_type(TensorProto_DataType_UINT64); @@ -1212,6 +1270,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { RawSparseDataChecker); TestDenseToSparseConversion( + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_name("dense_int8"); tp.set_data_type(TensorProto_DataType_INT8); @@ -1220,6 +1279,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { RawSparseDataChecker); TestDenseToSparseConversion( + 20U, [](const std::vector& values, TensorProto& tp) { tp.set_name("dense_int64"); RawDataWriter(values, tp, TensorProto_DataType_UINT8); diff --git a/onnxruntime/test/framework/test_utils.h b/onnxruntime/test/framework/test_utils.h index 9d5633ea34..c206492961 100644 --- a/onnxruntime/test/framework/test_utils.h +++ b/onnxruntime/test/framework/test_utils.h @@ -97,5 +97,9 @@ void AllocateMLValue(AllocatorPtr alloc, const std::vector& dims, OrtVa // Helper function to check that the graph transformations have been successfully applied. std::map CountOpsInGraph(const Graph& graph, bool recurse_into_subgraphs = true); +#if !defined(DISABLE_SPARSE_TENSORS) +void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl::span expected_indicies); +#endif // DISABLE_SPARSE_TENSORS + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/ir/graph_test.cc b/onnxruntime/test/ir/graph_test.cc index 2c2fc774cf..8bdce400fd 100644 --- a/onnxruntime/test/ir/graph_test.cc +++ b/onnxruntime/test/ir/graph_test.cc @@ -10,6 +10,7 @@ #include "gmock/gmock.h" #include "onnx/defs/function.h" #include "core/graph/function_impl.h" +#include "test/framework/test_utils.h" #ifdef __GNUC__ #define UNUSED __attribute__((unused)) @@ -233,6 +234,7 @@ static void ConstructSparseTensor(const std::string& name, std::copy(values.cbegin(), values.cend(), dest_span.begin()); const std::vector& indices = sparse_details::indices; // Not to exceed 59 + auto& m_indicies = *sparse_proto.mutable_indices(); m_indicies.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); *m_indicies.mutable_dims()->Add() = static_cast(indices.size()); @@ -264,10 +266,9 @@ static void ValidateSparseTensorProto(const SparseTensorProto& proto) { ++expected_begin; } // Check indices - EXPECT_EQ(proto.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64); + const auto& indices = proto.indices(); auto expected_indices = gsl::make_span(sparse_details::indices); - auto actual_indices = gsl::make_span(proto.indices().int64_data().data(), proto.indices().int64_data_size()); - EXPECT_THAT(actual_indices, testing::ContainerEq(expected_indices)); + SparseIndicesChecker(indices, expected_indices); // check shape const auto& dims = proto.dims(); auto actual_shape = gsl::make_span(dims.data(), dims.size()); diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc index 152e44bec3..a413adb733 100644 --- a/onnxruntime/test/util/test_utils.cc +++ b/onnxruntime/test/util/test_utils.cc @@ -4,7 +4,9 @@ #include "test/util/include/test_utils.h" #include "core/framework/ort_value.h" +#include "core/graph/onnx_protobuf.h" #include "core/session/inference_session.h" +#include "core/framework/tensorprotoutils.h" #include "test/util/include/asserts.h" #include "test/util/include/test/test_environment.h" @@ -115,5 +117,68 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id, VerifyOutputs(output_names, expected_fetches, fetches); } +#if !defined(DISABLE_SPARSE_TENSORS) +void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl::span expected_indicies) { + using namespace ONNX_NAMESPACE; + Path model_path; + std::vector unpack_buffer; + gsl::span ind_span; + std::vector converted_indices; + TensorShape ind_shape(indices_proto.dims().data(), indices_proto.dims().size()); + const auto elements = gsl::narrow(ind_shape.Size()); + const bool has_raw_data = indices_proto.has_raw_data(); + switch (indices_proto.data_type()) { + case ONNX_NAMESPACE::TensorProto_DataType_INT64: { + if (has_raw_data) { + const auto& rd = indices_proto.raw_data(); + ASSERT_EQ(rd.size(), elements * sizeof(int64_t)); + ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer)); + ind_span = gsl::make_span(unpack_buffer).as_span(); + } else { + ind_span = gsl::make_span(indices_proto.int64_data().cbegin(), indices_proto.int64_data().cend()); + } + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_INT32: { + if (has_raw_data) { + const auto& rd = indices_proto.raw_data(); + ASSERT_EQ(rd.size(), elements * sizeof(int32_t)); + ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer)); + auto int32_span = gsl::make_span(unpack_buffer).as_span(); + converted_indices.insert(converted_indices.cend(), int32_span.cbegin(), int32_span.cend()); + } else { + converted_indices.insert(converted_indices.cend(), indices_proto.int32_data().cbegin(), indices_proto.int32_data().cend()); + } + ind_span = gsl::make_span(converted_indices); + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_INT16: { + ASSERT_TRUE(has_raw_data); + const auto& rd = indices_proto.raw_data(); + ASSERT_EQ(rd.size(), elements * sizeof(int16_t)); + ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer)); + auto int16_span = gsl::make_span(unpack_buffer).as_span(); + converted_indices.insert(converted_indices.cend(), int16_span.cbegin(), int16_span.cend()); + ind_span = gsl::make_span(converted_indices); + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_INT8: { + ASSERT_TRUE(has_raw_data); + const auto& rd = indices_proto.raw_data(); + ASSERT_EQ(rd.size(), elements); + ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer)); + auto int8_span = gsl::make_span(unpack_buffer).as_span(); + converted_indices.insert(converted_indices.cend(), int8_span.cbegin(), int8_span.cend()); + ind_span = gsl::make_span(converted_indices); + break; + } + default: + ASSERT_TRUE(false); + } + ASSERT_THAT(ind_span, testing::ContainerEq(expected_indicies)); +} + +#endif // DISABLE_SPARSE_TENSORS + } // namespace test } // namespace onnxruntime diff --git a/tools/python/sparsify_initializers.py b/tools/python/sparsify_initializers.py index c81d25c78a..df461f1a92 100644 --- a/tools/python/sparsify_initializers.py +++ b/tools/python/sparsify_initializers.py @@ -26,7 +26,7 @@ def parse_arguments(): parser.add_argument('--exclude', required=False, type=str, help='semicolon separated list of initializer names to exclude') parser.add_argument('--tolerance', required=False, type=float, default=1e-6, - help='FP absolute tolerance. If not given simple compare to 0') + help='FP absolute tolerance.') parser.add_argument('--sparsity_threshold', required=False, type=float, default=0.5, help='convert to sparse initializers if sparsity is at least this much') @@ -49,11 +49,13 @@ def setup_logging(verbose): # type: (bool) -> None logger.setLevel(logging_level) -def convert_tensor_to_sparse(tensor, tolerance): # type: (TensorProto) -> Tuple[SparseTensorProto, float] +def convert_tensor_to_sparse(tensor, + sparsity_threshold, + tolerance): # type: (TensorProto, float, float) -> Tuple[SparseTensorProto, float] """ returns a tuple of sparse_tensor and sparsity level """ values = [] - indicies = [] + indices = [] nnz_count = 0 tensor_data = numpy_helper.to_array(tensor).flatten() data_len = len(tensor_data) @@ -62,25 +64,76 @@ def convert_tensor_to_sparse(tensor, tolerance): # type: (TensorProto) -> Tuple el = tensor_data[index] if abs(el) <= tolerance: values.append(el) - indicies.append(index) + indices.append(index) nnz_count += 1 else: for index in range(data_len): el = tensor_data[index] if el != 0: values.append(el) - indicies.append(index) + indices.append(index) nnz_count += 1 sparsity = float(1.) - float(nnz_count)/data_len - logger.debug(f"initializer={tensor.name}, dtype={tensor_data.dtype}, \ - len={data_len}, nnz={nnz_count}, sparsity={sparsity}") - values_tensor = onnx.helper.make_tensor(tensor.name, tensor.data_type, - [len(values)], np.array(values).astype(tensor_data.dtype)) + ind_data_type = TensorProto.INT8 + ind_dtype = np.int8 + ind_len = len(indices) + max_indices_value = 0 + if ind_len > 0: + max_indices_value = indices[-1] + if max_indices_value <= np.iinfo(np.int8).max: + ind_data_type = TensorProto.INT8 + ind_dtype = np.int8 + elif max_indices_value <= np.iinfo(np.int16).max: + ind_data_type = TensorProto.INT16 + ind_dtype = np.int16 + elif max_indices_value <= np.iinfo(np.int32).max: + ind_data_type = TensorProto.INT32 + ind_dtype = np.int32 + else: + ind_data_type = TensorProto.INT64 + ind_dtype = np.int64 + + logger.debug(f"initializer={tensor.name}, dtype={tensor_data.dtype}, \ + data_len={data_len}, nnz={nnz_count}, sparsity={sparsity}, \ + max_indices_value={max_indices_value}, sparse_indices_type={ind_dtype}") + + if sparsity < sparsity_threshold: + return (object(), sparsity) + + tensor_data_bytes = tensor_data.nbytes + # create np array and cast data to the appropriate type + np_values = np.array(values).astype(tensor_data.dtype) + # create np array and cast data to the inferred index type + np_indices = np.array(indices).astype(ind_dtype) + total_sparse_bytes = np_values.nbytes + np_indices.nbytes + + logger.debug(f"initializer={tensor.name}, initializer_bytes={tensor_data_bytes}, \ + sparse_initializer_bytes={total_sparse_bytes}") + + # This check is usually useful for sparsity_threshold=0.5 where much + # depends on the size of the indices entries and the size of the original tensor. + # Big dense tensors command larger indices data type and for large float32 tensors + # int32 indices are often selected, thus we really want to guard against loosing + # rather than winning. + if tensor_data_bytes <= total_sparse_bytes: + sparsity = float(1.) - float(tensor_data_bytes)/total_sparse_bytes + logger.debug(f"initializer={tensor.name}, adjusted_sparsity={sparsity}") + return (object(), sparsity) + + values_tensor = onnx.helper.make_tensor(tensor.name, + tensor.data_type, + [len(values)], + np_values.tobytes(), + raw=True) + indicies_tensor = onnx.helper.make_tensor(tensor.name + '_indicies', - TensorProto.INT64, - [len(indicies)], np.array(indicies).astype(np.int64)) + ind_data_type, + [ind_len], + np_indices.tobytes(), + raw=True) + sparse_tensor = onnx.helper.make_sparse_tensor(values_tensor, indicies_tensor, tensor.dims) return (sparse_tensor, sparsity) @@ -88,7 +141,7 @@ def convert_tensor_to_sparse(tensor, tolerance): # type: (TensorProto) -> Tuple def convert_initializers(model, exclude_names, sparsity_threshold, - tolerance): # type: (ModelProto, List[str], float) -> None + tolerance): # type: (ModelProto, List[str], float, float) -> None graph = model.graph converted_sparse = [] remaining_initializers = [] @@ -100,7 +153,7 @@ def convert_initializers(model, logger.info(f"initializer={initializer.name} contains bool, not converted") remaining_initializers.append(initializer) continue - sparse_tensor, sparsity = convert_tensor_to_sparse(initializer, tolerance) + sparse_tensor, sparsity = convert_tensor_to_sparse(initializer, sparsity_threshold, tolerance) if sparsity >= sparsity_threshold: logger.info(f"initializer={initializer.name} converted. sparsity={sparsity}") converted_sparse.append(sparse_tensor)