diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc index c149fda6e9..14eb498249 100644 --- a/onnxruntime/core/framework/tensorprotoutils.cc +++ b/onnxruntime/core/framework/tensorprotoutils.cc @@ -698,7 +698,6 @@ Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path, return Status::OK(); } - #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 6239) @@ -910,6 +909,7 @@ static Status CopySparseData(size_t n_sparse_elements, return status; } +namespace conversion_internal { struct UnsupportedSparseDataType { void operator()(int32_t dt_type, Status& status) const { status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ", dt_type); @@ -924,6 +924,10 @@ struct GetElementSize { } }; +using SupportedConversionTypeList = onnxruntime::TypeList; +} // namespace conversion_internal + common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse, const Path& model_path, ONNX_NAMESPACE::TensorProto& dense) { @@ -956,9 +960,9 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT void* sparse_data = sparse_data_storage.get(); size_t element_size = 0; // We want to this list to match the one used below in DenseTensorToSparseTensorProto() - MLTypeCallDispatcher type_disp(type); + MLTypeCallDispatcherFromTypeList type_disp(type); ORT_RETURN_IF_ERROR( - (type_disp.InvokeRetWithUnsupportedPolicy(element_size))); + (type_disp.InvokeRetWithUnsupportedPolicy(element_size))); // by putting the data into a std::string we can avoid a copy as set_raw_data can do a std::move // into the TensorProto. however to actually write to the buffer we have created in the std::string we need @@ -966,41 +970,67 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT // but using const_cast makes it more obvious we're doing something ugly. // C++17 add non-const data() where we could remove const_cast std::string dense_data_storage(n_dense_elements * element_size, 0); - void* dense_data = const_cast(dense_data_storage.data()); + if (n_sparse_elements > 0) { + void* dense_data = const_cast(dense_data_storage.data()); - switch (element_size) { - case 1: { - auto dense_data_span = gsl::make_span(static_cast(dense_data), n_dense_elements); - status = CopySparseData( - n_sparse_elements, - indices, dims, - [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) { - dense_data_span[to_idx] = static_cast(sparse_data)[from_idx]; - }); + switch (element_size) { + case 1: { + auto dense_data_span = gsl::make_span(static_cast(dense_data), n_dense_elements); + status = CopySparseData( + n_sparse_elements, + indices, dims, + [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) { + dense_data_span[to_idx] = static_cast(sparse_data)[from_idx]; + }); - break; - } - case 4: { - auto dense_data_span = gsl::make_span(static_cast(dense_data), n_dense_elements); - status = CopySparseData( - n_sparse_elements, - indices, dims, - [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) { - dense_data_span[to_idx] = static_cast(sparse_data)[from_idx]; - }); + break; + } + case 2: { + auto dense_data_span = gsl::make_span(static_cast(dense_data), n_dense_elements); + status = CopySparseData( + n_sparse_elements, + indices, dims, + [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) { + dense_data_span[to_idx] = static_cast(sparse_data)[from_idx]; + }); - break; - } - default: - ORT_THROW(false, "BUG! Report to onnxruntime team."); + break; + } + case 4: { + auto dense_data_span = gsl::make_span(static_cast(dense_data), n_dense_elements); + status = CopySparseData( + n_sparse_elements, + indices, dims, + [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) { + dense_data_span[to_idx] = static_cast(sparse_data)[from_idx]; + }); + + break; + } + case 8: { + auto dense_data_span = gsl::make_span(static_cast(dense_data), n_dense_elements); + status = CopySparseData( + n_sparse_elements, + indices, dims, + [sparse_data, dense_data_span](size_t from_idx, size_t to_idx) { + dense_data_span[to_idx] = static_cast(sparse_data)[from_idx]; + }); + break; + } + + default: + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, + " BUG! Report to onnxruntime team. element_size of: ", + element_size, " is not supported.", " type: ", type); + } + + ORT_RETURN_IF_ERROR(status); } - - ORT_RETURN_IF_ERROR(status); dense.set_raw_data(std::move(dense_data_storage)); } else { // No request for std::string - UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status); + conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status); } return status; } @@ -1041,13 +1071,15 @@ static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements, } } +// Here we are not using tolerance for FP types since these dense tensors were +// created from sparse initializers where zeros were absolute template -bool IsZero(const void* p) { +inline bool IsZero(const void* p) { return (static_cast(0) == *reinterpret_cast(p)); } template -void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) { +inline void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) { reinterpret_cast(dst)[dst_index] = reinterpret_cast(src)[src_index]; } @@ -1059,7 +1091,7 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& const bool is_string_data = dense_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING; if (is_string_data) { Status status{}; - UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status); + conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status); return status; } @@ -1081,25 +1113,36 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& std::unique_ptr dense_raw_data; ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data, tensor_bytes_size)); size_t element_size = 0; - MLTypeCallDispatcher type_disp(data_type); + // We want this type list to match the one above in SparseTensorProtoToDenseTensorProto + MLTypeCallDispatcherFromTypeList type_disp(data_type); ORT_RETURN_IF_ERROR( - (type_disp.InvokeRetWithUnsupportedPolicy(element_size))); + (type_disp.InvokeRetWithUnsupportedPolicy(element_size))); switch (element_size) { case 1: { - // bytes SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size, IsZero, CopyElement, values, indices); break; } + case 2: { + SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size, + IsZero, CopyElement, values, indices); + break; + } case 4: { - // float SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size, IsZero, CopyElement, values, indices); break; } + case 8: { + SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size, + IsZero, CopyElement, values, indices); + break; + } default: - ORT_THROW(false, "BUG! Report to onnxruntime team."); + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, + " BUG! Report to onnxruntime team. element_size of: ", + element_size, " is not supported.", " data_type: ", data_type); } // Fix up shapes diff --git a/onnxruntime/test/framework/sparse_kernels_test.cc b/onnxruntime/test/framework/sparse_kernels_test.cc index 332f585f76..84493ada0d 100644 --- a/onnxruntime/test/framework/sparse_kernels_test.cc +++ b/onnxruntime/test/framework/sparse_kernels_test.cc @@ -499,24 +499,28 @@ TEST_F(SparseTensorTests, Test2) { RunTest(); } +// Code below depends on the values being size 4 template static std::vector CreateValues() { return {1, 2, 3, 4}; } -/* std::string suport in the future +/* std::string support in the future template <> std::vector CreateValues() { return {"one", "two", "three", "four"}; } */ -/* BFloat16 support in the future template <> std::vector CreateValues() { return {BFloat16(1.f), BFloat16(2.f), BFloat16(3.f), BFloat16(4.f)}; } -*/ + +template <> +std::vector CreateValues() { + return {MLFloat16(1.f), MLFloat16(2.f), MLFloat16(3.f), MLFloat16(4.f)}; +} template static void CreateTensorWithExternalData( @@ -591,6 +595,45 @@ static NodeProto CreateConstantNode(bool indices_1D, return constant_node; } +template +static NodeProto CreateConstantNodeAllZeros(bool indices_1D, std::vector& expected_data) { + NodeProto constant_node; + constant_node.set_op_type("Constant"); + constant_node.add_output("dense_tensor_output"); + + std::vector indices; + std::vector shape{2, 3, 2}; + + AttributeProto& attrib = *constant_node.mutable_attribute()->Add(); + attrib.set_name("sparse_value_all_zeros"); + attrib.set_type(AttributeProto_AttributeType_SPARSE_TENSOR); + + SparseTensorProto& stp = *attrib.mutable_sparse_tensor(); + TensorProto& indices_tp = *stp.mutable_indices(); + + stp.mutable_dims()->Add(shape.cbegin(), shape.cend()); + + if (indices_1D) { + indices_tp.add_dims(0); + } else { + // indices are shape {NNZ, rank} so convert flattened values of 2, 5, 6 and 10 to rank 3 values + indices_tp.add_dims(0); + indices_tp.add_dims(0); + } + + indices_tp.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64); + + // Must be all zeros + expected_data.resize(2 * 3 * 2); + + auto& mutable_values = *stp.mutable_values(); + mutable_values.set_name("all_zeros"); + mutable_values.set_data_type(utils::ToTensorProtoElementType()); + mutable_values.add_dims(0); + + return constant_node; +} + template static void TestConversion(bool use_1D_indices, std::function& values, TensorProto& tp)> inserter, @@ -607,12 +650,29 @@ static void TestConversion(bool use_1D_indices, checker(expected_span, dense); } +template +static void TestConversionAllZeros(bool use_1D_indices, + std::function expected, const TensorProto& actual)> checker) { + std::vector expected; + auto node = CreateConstantNodeAllZeros(use_1D_indices, expected); + + TensorProto dense; + // Path is required for loading external data (if any) + // When path is empty it will look for the data in current dir + utils::ConstantNodeProtoToTensorProto(node, Path(), dense); + + gsl::span expected_span = gsl::make_span(expected.data(), expected.size()); + checker(expected_span, dense); +} + template static void TestConversion( std::function& values, TensorProto& tp)> inserter, std::function expected, const TensorProto& actual)> checker) { TestConversion(true, inserter, checker); TestConversion(false, inserter, checker); + TestConversionAllZeros(true, checker); + TestConversionAllZeros(false, checker); } template @@ -621,12 +681,17 @@ static void RawDataWriter(const std::vector& values, TensorProto& tp, TensorP tp.set_raw_data(values.data(), values.size() * sizeof(T)); } -template -static void RawDataChecker(gsl::span expected, const TensorProto& actual) { +int64_t ActualSize(const TensorProto& actual) { int64_t actual_size = 1; for (const auto dim : actual.dims()) { actual_size *= dim; } + return actual_size; +} + +template +static void RawDataChecker(gsl::span expected, const TensorProto& actual) { + int64_t actual_size = ActualSize(actual); const T* raw_data = reinterpret_cast(actual.raw_data().data()); auto actual_span = gsl::make_span(raw_data, actual_size); @@ -634,13 +699,20 @@ static void RawDataChecker(gsl::span expected, const TensorProto& actua EXPECT_THAT(actual_span, testing::ContainerEq(expected)); } -/* For BFloat16 support in the future. template <> -void RawDataChecker(gsl::span expected_bfloat, const TensorProto& actual) { - int64_t actual_size = 1; - for (const auto dim : actual.dims()) { - actual_size *= dim; - } +void RawDataChecker(gsl::span expected_bfloat, const TensorProto& actual) { + int64_t actual_size = ActualSize(actual); + + auto expected = expected_bfloat.as_span(); + const uint16_t* raw_data = reinterpret_cast(actual.raw_data().data()); + auto actual_span = gsl::make_span(raw_data, actual_size); + + EXPECT_THAT(actual_span, testing::ContainerEq(expected)); +} + +template <> +void RawDataChecker(gsl::span expected_bfloat, const TensorProto& actual) { + int64_t actual_size = ActualSize(actual); auto expected = expected_bfloat.as_span(); const uint16_t* raw_data = reinterpret_cast(actual.raw_data().data()); @@ -648,7 +720,6 @@ void RawDataChecker(gsl::span expected_bfloat, const T EXPECT_THAT(actual_span, testing::ContainerEq(expected)); } -*/ TEST(SparseTensorConversionTests, TestConstantNodeConversion) { TestConversion( @@ -658,6 +729,74 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) { }, RawDataChecker); + TestConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_DOUBLE); + tp.mutable_double_data()->Add(values.cbegin(), values.cend()); + }, + RawDataChecker); + + TestConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_BFLOAT16); + for (auto v : values) { + tp.mutable_int32_data()->Add(v.val); + } + }, + RawDataChecker); + + TestConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_FLOAT16); + for (auto v : values) { + tp.mutable_int32_data()->Add(v.val); + } + }, + RawDataChecker); + + TestConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_INT16); + tp.mutable_int32_data()->Add(values.cbegin(), values.cend()); + }, + RawDataChecker); + + TestConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_UINT16); + tp.mutable_int32_data()->Add(values.cbegin(), values.cend()); + }, + RawDataChecker); + + TestConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_INT32); + tp.mutable_int32_data()->Add(values.cbegin(), values.cend()); + }, + RawDataChecker); + + TestConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_UINT32); + tp.mutable_uint64_data()->Add(values.cbegin(), values.cend()); + }, + RawDataChecker); + + // Test all zeros case + TestConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_INT64); + tp.mutable_int64_data()->Add(values.cbegin(), values.cend()); + }, + RawDataChecker); + + TestConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_UINT64); + tp.mutable_uint64_data()->Add(values.cbegin(), values.cend()); + }, + RawDataChecker); + TestConversion( [](const std::vector& values, TensorProto& tp) { tp.set_data_type(TensorProto_DataType_INT8); @@ -673,11 +812,12 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) { // Test constant node conversion for SparseTensor with external data PathString tensor_filename(ORT_TSTR("tensor_XXXXXX")); - TestConversion(true, - [&tensor_filename](const std::vector& values, TensorProto& tp) { - CreateTensorWithExternalData(TensorProto_DataType_FLOAT, values, tensor_filename, tp); - }, - RawDataChecker); + TestConversion( + true, + [&tensor_filename](const std::vector& values, TensorProto& tp) { + CreateTensorWithExternalData(TensorProto_DataType_FLOAT, values, tensor_filename, tp); + }, + RawDataChecker); DeleteFileFromDisk(tensor_filename.c_str()); } @@ -696,12 +836,30 @@ std::vector CreateSparseValues() { } */ -/* BFloat16 support in the future template <> std::vector CreateSparseValues() { return {BFloat16(0.f), BFloat16(2.f), BFloat16(3.f), BFloat16(0.f)}; } -*/ + +template <> +std::vector CreateSparseValues() { + return {MLFloat16(0.f), MLFloat16(2.f), MLFloat16(3.f), MLFloat16(0.f)}; +} + +template +static std::vector CreateSparseValuesAllZeros() { + return {0, 0, 0, 0}; +} + +template <> +std::vector CreateSparseValuesAllZeros() { + return {BFloat16(0.f), BFloat16(0.f), BFloat16(0.f), BFloat16(0.f)}; +} + +template <> +std::vector CreateSparseValuesAllZeros() { + return {MLFloat16(0.f), MLFloat16(0.f), MLFloat16(0.f), MLFloat16(0.f)}; +} template TensorProto CreateDenseTensor(std::function& values, TensorProto& tp)> inserter, @@ -718,13 +876,27 @@ TensorProto CreateDenseTensor(std::function& values, T } template -static void RawSparseDataChecker(gsl::span expected_values, - gsl::span expected_indicies, - const SparseTensorProto& actual) { +TensorProto CreateDenseTensorAllZeros(std::function& values, TensorProto& tp)> inserter) { + TensorProto result; + std::vector values = CreateSparseValuesAllZeros(); + inserter(values, result); + result.add_dims(static_cast(values.size())); + return result; +} + +int64_t ActualSize(const SparseTensorProto& actual) { int64_t actual_size = 1; for (const auto dim : actual.values().dims()) { actual_size *= dim; } + return actual_size; +} + +template +static void RawSparseDataChecker(gsl::span expected_values, + gsl::span expected_indicies, + const SparseTensorProto& actual) { + const int64_t actual_size = ActualSize(actual); const T* raw_data = reinterpret_cast(actual.values().raw_data().data()); auto actual_span = gsl::make_span(raw_data, actual_size); @@ -737,15 +909,11 @@ static void RawSparseDataChecker(gsl::span expected_values, EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies)); } -/* When we support BFloat16 template <> void RawSparseDataChecker(gsl::span expected_bfloat, gsl::span expected_indicies, const SparseTensorProto& actual) { - int64_t actual_size = 1; - for (const auto dim : actual.values().dims()) { - actual_size *= dim; - } + const int64_t actual_size = ActualSize(actual); static_assert(sizeof(uint16_t) == sizeof(BFloat16), "Expecting equal sizes"); auto expected = expected_bfloat.as_span(); @@ -758,10 +926,27 @@ void RawSparseDataChecker(gsl::span expected_bfloat, auto actual_indicies = gsl::make_span(actual.indices().int64_data().data(), actual.indices().int64_data_size()); EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies)); } -*/ + +template <> +void RawSparseDataChecker(gsl::span expected_bfloat, + gsl::span expected_indicies, + const SparseTensorProto& actual) { + const int64_t actual_size = ActualSize(actual); + + static_assert(sizeof(uint16_t) == sizeof(MLFloat16), "Expecting equal sizes"); + auto expected = expected_bfloat.as_span(); + const uint16_t* raw_data = reinterpret_cast(actual.values().raw_data().data()); + auto actual_span = gsl::make_span(raw_data, actual_size); + + EXPECT_THAT(actual_span, testing::ContainerEq(expected)); + // Check indicies + EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64); + auto actual_indicies = gsl::make_span(actual.indices().int64_data().data(), actual.indices().int64_data_size()); + EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies)); +} template -static void TestDenseToSparseConversion( +static void TestDenseToSparseConversionValues( std::function& values, TensorProto& tp)> inserter, std::function expected, gsl::span expected_indicies, @@ -783,6 +968,40 @@ static void TestDenseToSparseConversion( checker(expected_values_span, expected_ind_span, sparse_tensor); } +template +static void TestDenseAllZerosToSparseConversion( + std::function& values, TensorProto& tp)> inserter, + std::function expected, + gsl::span expected_indicies, + const SparseTensorProto& actual)> + checker) { + std::vector expected_values; + std::vector expected_indicies; + // Path is required for loading external data + // Using empty path here since the data is not external + Path model_path; + TensorProto dense_tensor = CreateDenseTensorAllZeros(inserter); + + SparseTensorProto sparse_tensor; + utils::DenseTensorToSparseTensorProto(dense_tensor, model_path, sparse_tensor); + + gsl::span + expected_values_span = gsl::make_span(expected_values.data(), expected_values.size()); + gsl::span expected_ind_span = gsl::make_span(expected_indicies.data(), expected_indicies.size()); + checker(expected_values_span, expected_ind_span, sparse_tensor); +} + +template +static void TestDenseToSparseConversion(std::function& values, TensorProto& tp)> inserter, + std::function expected, + gsl::span expected_indicies, + const SparseTensorProto& actual)> + checker) { + TestDenseToSparseConversionValues(inserter, checker); + TestDenseAllZerosToSparseConversion(inserter, checker); +} + + TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { TestDenseToSparseConversion( [](const std::vector& values, TensorProto& tp) { @@ -792,6 +1011,82 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { }, RawSparseDataChecker); + TestDenseToSparseConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_DOUBLE); + tp.set_name("dense_double"); + tp.mutable_double_data()->Add(values.cbegin(), values.cend()); + }, + RawSparseDataChecker); + + TestDenseToSparseConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_BFLOAT16); + tp.set_name("dense_bfloat16"); + for (auto v : values) { + tp.mutable_int32_data()->Add(v.val); + } + }, + RawSparseDataChecker); + + TestDenseToSparseConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_data_type(TensorProto_DataType_FLOAT16); + tp.set_name("dense_float16"); + for (auto v : values) { + tp.mutable_int32_data()->Add(v.val); + } + }, + RawSparseDataChecker); + + TestDenseToSparseConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_name("dense_int16"); + tp.set_data_type(TensorProto_DataType_INT16); + tp.mutable_int32_data()->Add(values.cbegin(), values.cend()); + }, + RawSparseDataChecker); + + TestDenseToSparseConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_name("dense_uint16"); + tp.set_data_type(TensorProto_DataType_UINT16); + tp.mutable_int32_data()->Add(values.cbegin(), values.cend()); + }, + RawSparseDataChecker); + + TestDenseToSparseConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_name("dense_int32"); + tp.set_data_type(TensorProto_DataType_INT32); + tp.mutable_int32_data()->Add(values.cbegin(), values.cend()); + }, + RawSparseDataChecker); + + TestDenseToSparseConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_name("dense_uint32"); + tp.set_data_type(TensorProto_DataType_UINT32); + tp.mutable_uint64_data()->Add(values.cbegin(), values.cend()); + }, + RawSparseDataChecker); + + TestDenseToSparseConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_name("dense_int64"); + tp.set_data_type(TensorProto_DataType_INT64); + tp.mutable_int64_data()->Add(values.cbegin(), values.cend()); + }, + RawSparseDataChecker); + + TestDenseToSparseConversion( + [](const std::vector& values, TensorProto& tp) { + tp.set_name("dense_uint64"); + tp.set_data_type(TensorProto_DataType_UINT64); + tp.mutable_uint64_data()->Add(values.cbegin(), values.cend()); + }, + RawSparseDataChecker); + TestDenseToSparseConversion( [](const std::vector& values, TensorProto& tp) { tp.set_name("dense_int8"); @@ -808,7 +1103,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) { RawSparseDataChecker); } -#endif // !ORT_MINIMAL_BUILD +#endif // !ORT_MINIMAL_BUILD } // namespace test } // namespace onnxruntime diff --git a/tools/python/sparsify_initializers.py b/tools/python/sparsify_initializers.py new file mode 100644 index 0000000000..61e9476dcf --- /dev/null +++ b/tools/python/sparsify_initializers.py @@ -0,0 +1,134 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +# This script opens an existing model in onnx format and attempts to +# move initializers from model.graph.initializer field to model.graph.sparse_initializer field +# and convert them into ONNX COO flat index format. + +import argparse +import logging +import numpy as np +import sys +from typing import Tuple, List +import onnx +from onnx import ModelProto, SparseTensorProto, TensorProto, numpy_helper + +logger = logging.getLogger(__name__) + +real_types = set((np.float32, np.float64, np.double)) + + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument('--input', required=True, type=str, help='input model path') + parser.add_argument('--output', required=True, type=str, help='output model path') + parser.add_argument('--exclude', required=False, type=str, + help='semicolon separated list of initializer names to exclude') + parser.add_argument('--tolerance', required=False, type=float, default=1e-6, + help='FP absolute tolerance. If not given simple compare to 0') + parser.add_argument('--sparsity_threshold', required=False, + type=float, default=0.5, + help='convert to sparse initializers if sparsity is at least this much') + parser.add_argument('--verbose', required=False, action='store_true') + parser.set_defaults(verbose=False) + args = parser.parse_args() + return args + + +def setup_logging(verbose): # type: (bool) -> None + log_handler = logging.StreamHandler(sys.stdout) + if verbose: + log_handler.setFormatter(logging.Formatter('[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s')) + logging_level = logging.DEBUG + else: + log_handler.setFormatter(logging.Formatter('%(filename)20s: %(message)s')) + logging_level = logging.INFO + log_handler.setLevel(logging_level) + logger.addHandler(log_handler) + logger.setLevel(logging_level) + + +def convert_tensor_to_sparse(tensor, tolerance): # type: (TensorProto) -> Tuple[SparseTensorProto, float] + """ returns a tuple of sparse_tensor and sparsity level + """ + values = [] + indicies = [] + nnz_count = 0 + tensor_data = numpy_helper.to_array(tensor).flatten() + data_len = len(tensor_data) + if tensor_data.dtype in real_types: + for index in range(data_len): + el = tensor_data[index] + if abs(el) <= tolerance: + values.append(el) + indicies.append(index) + nnz_count += 1 + else: + for index in range(data_len): + el = tensor_data[index] + if el == 0: + values.append(el) + indicies.append(index) + nnz_count += 1 + + sparsity = float(1.) - float(nnz_count)/data_len + logger.debug(f"initializer={tensor.name}, dtype={tensor_data.dtype}, \ + len={data_len}, nnz={nnz_count}, sparsity={sparsity}") + + values_tensor = onnx.helper.make_tensor(tensor.name, tensor.data_type, + [len(values)], np.array(values).astype(tensor_data.dtype)) + indicies_tensor = onnx.helper.make_tensor(tensor.name + '_indicies', + TensorProto.INT64, + [len(indicies)], np.array(indicies).astype(np.int64)) + sparse_tensor = onnx.helper.make_sparse_tensor(values_tensor, indicies_tensor, tensor.dims) + return (sparse_tensor, sparsity) + + +def convert_initializers(model, + exclude_names, + sparsity_threshold, + tolerance): # type: (ModelProto, List[str], float) -> None + graph = model.graph + converted_sparse = [] + remaining_initializers = [] + for initializer in graph.initializer: + if initializer.name in exclude_names: + logger.info(f"initializer={initializer.name} was excluded") + continue + if initializer.data_type == TensorProto.BOOL: + logger.info(f"initializer={initializer.name} contains bool, not converted") + remaining_initializers.append(initializer) + continue + sparse_tensor, sparsity = convert_tensor_to_sparse(initializer, tolerance) + if sparsity >= sparsity_threshold: + logger.info(f"initializer={initializer.name} converted. sparsity={sparsity}") + converted_sparse.append(sparse_tensor) + else: + remaining_initializers.append(initializer) + logger.info(f"initializer={initializer.name} is not converted. sparsity={sparsity}") + + graph.sparse_initializer.extend(converted_sparse) + del graph.initializer[:] + graph.initializer.extend(remaining_initializers) + + +def main(): + args = parse_arguments() + setup_logging(args.verbose) + + exclude_names = set() if args.exclude is None else set(args.exclude.split(';')) + + model = ModelProto() + with open(args.input, "rb") as input_file: + model.ParseFromString(input_file.read()) + + convert_initializers(model, exclude_names, args.sparsity_threshold, args.tolerance) + + with open(args.output, "wb") as output_file: + s = model.SerializeToString() + output_file.write(s) + + +if __name__ == "__main__": + main()