Add types support for Sparse Initializer in Onnxruntime (#7004)

Add types support for DenseToSparse and SparseToDense conversions
  Address the case of empty sparse values and indicies when the initializer does
  not contain any NNZ.
  Add sparsify script.
This commit is contained in:
Dmitri Smirnov 2021-03-22 10:06:11 -07:00 committed by GitHub
parent 4a3d1176d7
commit 3b58fc7b97
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 540 additions and 68 deletions

View file

@ -698,7 +698,6 @@ Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
return Status::OK();
}
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 6239)
@ -910,6 +909,7 @@ static Status CopySparseData(size_t n_sparse_elements,
return status;
}
namespace conversion_internal {
struct UnsupportedSparseDataType {
void operator()(int32_t dt_type, Status& status) const {
status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ", dt_type);
@ -924,6 +924,10 @@ struct GetElementSize {
}
};
using SupportedConversionTypeList = onnxruntime::TypeList<float, double, MLFloat16, BFloat16,
int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t>;
} // namespace conversion_internal
common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
const Path& model_path,
ONNX_NAMESPACE::TensorProto& dense) {
@ -956,9 +960,9 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
void* sparse_data = sparse_data_storage.get();
size_t element_size = 0;
// We want to this list to match the one used below in DenseTensorToSparseTensorProto()
MLTypeCallDispatcher<float, int8_t, uint8_t> type_disp(type);
MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(type);
ORT_RETURN_IF_ERROR(
(type_disp.InvokeRetWithUnsupportedPolicy<Status, GetElementSize, UnsupportedSparseDataType>(element_size)));
(type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));
// by putting the data into a std::string we can avoid a copy as set_raw_data can do a std::move
// into the TensorProto. however to actually write to the buffer we have created in the std::string we need
@ -966,6 +970,7 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
// but using const_cast makes it more obvious we're doing something ugly.
// C++17 add non-const data() where we could remove const_cast
std::string dense_data_storage(n_dense_elements * element_size, 0);
if (n_sparse_elements > 0) {
void* dense_data = const_cast<char*>(dense_data_storage.data());
switch (element_size) {
@ -980,6 +985,17 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
break;
}
case 2: {
auto dense_data_span = gsl::make_span<uint16_t>(static_cast<uint16_t*>(dense_data), n_dense_elements);
status = CopySparseData<uint16_t>(
n_sparse_elements,
indices, dims,
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
dense_data_span[to_idx] = static_cast<const uint16_t*>(sparse_data)[from_idx];
});
break;
}
case 4: {
auto dense_data_span = gsl::make_span<uint32_t>(static_cast<uint32_t*>(dense_data), n_dense_elements);
status = CopySparseData<uint32_t>(
@ -991,16 +1007,30 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
break;
}
case 8: {
auto dense_data_span = gsl::make_span<uint64_t>(static_cast<uint64_t*>(dense_data), n_dense_elements);
status = CopySparseData<uint64_t>(
n_sparse_elements,
indices, dims,
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
dense_data_span[to_idx] = static_cast<const uint64_t*>(sparse_data)[from_idx];
});
break;
}
default:
ORT_THROW(false, "BUG! Report to onnxruntime team.");
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
" BUG! Report to onnxruntime team. element_size of: ",
element_size, " is not supported.", " type: ", type);
}
ORT_RETURN_IF_ERROR(status);
}
dense.set_raw_data(std::move(dense_data_storage));
} else {
// No request for std::string
UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
}
return status;
}
@ -1041,13 +1071,15 @@ static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements,
}
}
// Here we are not using tolerance for FP types since these dense tensors were
// created from sparse initializers where zeros were absolute
template <typename T>
bool IsZero(const void* p) {
inline bool IsZero(const void* p) {
return (static_cast<T>(0) == *reinterpret_cast<const T*>(p));
}
template <typename T>
void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) {
inline void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) {
reinterpret_cast<T*>(dst)[dst_index] = reinterpret_cast<const T*>(src)[src_index];
}
@ -1059,7 +1091,7 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
const bool is_string_data = dense_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
if (is_string_data) {
Status status{};
UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
return status;
}
@ -1081,25 +1113,36 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
std::unique_ptr<uint8_t[]> dense_raw_data;
ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data, tensor_bytes_size));
size_t element_size = 0;
MLTypeCallDispatcher<float, int8_t, uint8_t> type_disp(data_type);
// We want this type list to match the one above in SparseTensorProtoToDenseTensorProto
MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(data_type);
ORT_RETURN_IF_ERROR(
(type_disp.InvokeRetWithUnsupportedPolicy<Status, GetElementSize, UnsupportedSparseDataType>(element_size)));
(type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));
switch (element_size) {
case 1: {
// bytes
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
IsZero<uint8_t>, CopyElement<uint8_t>, values, indices);
break;
}
case 2: {
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
IsZero<uint16_t>, CopyElement<uint16_t>, values, indices);
break;
}
case 4: {
// float
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
IsZero<uint32_t>, CopyElement<uint32_t>, values, indices);
break;
}
case 8: {
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
IsZero<uint64_t>, CopyElement<uint64_t>, values, indices);
break;
}
default:
ORT_THROW(false, "BUG! Report to onnxruntime team.");
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
" BUG! Report to onnxruntime team. element_size of: ",
element_size, " is not supported.", " data_type: ", data_type);
}
// Fix up shapes

View file

@ -499,24 +499,28 @@ TEST_F(SparseTensorTests, Test2) {
RunTest();
}
// Code below depends on the values being size 4
template <typename T>
static std::vector<T> CreateValues() {
return {1, 2, 3, 4};
}
/* std::string suport in the future
/* std::string support in the future
template <>
std::vector<std::string> CreateValues<std::string>() {
return {"one", "two", "three", "four"};
}
*/
/* BFloat16 support in the future
template <>
std::vector<BFloat16> CreateValues<BFloat16>() {
return {BFloat16(1.f), BFloat16(2.f), BFloat16(3.f), BFloat16(4.f)};
}
*/
template <>
std::vector<MLFloat16> CreateValues<MLFloat16>() {
return {MLFloat16(1.f), MLFloat16(2.f), MLFloat16(3.f), MLFloat16(4.f)};
}
template <typename T>
static void CreateTensorWithExternalData(
@ -591,6 +595,45 @@ static NodeProto CreateConstantNode(bool indices_1D,
return constant_node;
}
template <typename T>
static NodeProto CreateConstantNodeAllZeros(bool indices_1D, std::vector<T>& expected_data) {
NodeProto constant_node;
constant_node.set_op_type("Constant");
constant_node.add_output("dense_tensor_output");
std::vector<int64_t> indices;
std::vector<int64_t> shape{2, 3, 2};
AttributeProto& attrib = *constant_node.mutable_attribute()->Add();
attrib.set_name("sparse_value_all_zeros");
attrib.set_type(AttributeProto_AttributeType_SPARSE_TENSOR);
SparseTensorProto& stp = *attrib.mutable_sparse_tensor();
TensorProto& indices_tp = *stp.mutable_indices();
stp.mutable_dims()->Add(shape.cbegin(), shape.cend());
if (indices_1D) {
indices_tp.add_dims(0);
} else {
// indices are shape {NNZ, rank} so convert flattened values of 2, 5, 6 and 10 to rank 3 values
indices_tp.add_dims(0);
indices_tp.add_dims(0);
}
indices_tp.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
// Must be all zeros
expected_data.resize(2 * 3 * 2);
auto& mutable_values = *stp.mutable_values();
mutable_values.set_name("all_zeros");
mutable_values.set_data_type(utils::ToTensorProtoElementType<T>());
mutable_values.add_dims(0);
return constant_node;
}
template <typename T>
static void TestConversion(bool use_1D_indices,
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
@ -607,12 +650,29 @@ static void TestConversion(bool use_1D_indices,
checker(expected_span, dense);
}
template <typename T>
static void TestConversionAllZeros(bool use_1D_indices,
std::function<void(gsl::span<const T> expected, const TensorProto& actual)> checker) {
std::vector<T> expected;
auto node = CreateConstantNodeAllZeros<T>(use_1D_indices, expected);
TensorProto dense;
// Path is required for loading external data (if any)
// When path is empty it will look for the data in current dir
utils::ConstantNodeProtoToTensorProto(node, Path(), dense);
gsl::span<const T> expected_span = gsl::make_span<const T>(expected.data(), expected.size());
checker(expected_span, dense);
}
template <typename T>
static void TestConversion(
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
std::function<void(gsl::span<const T> expected, const TensorProto& actual)> checker) {
TestConversion(true, inserter, checker);
TestConversion(false, inserter, checker);
TestConversionAllZeros(true, checker);
TestConversionAllZeros(false, checker);
}
template <typename T>
@ -621,12 +681,17 @@ static void RawDataWriter(const std::vector<T>& values, TensorProto& tp, TensorP
tp.set_raw_data(values.data(), values.size() * sizeof(T));
}
template <typename T>
static void RawDataChecker(gsl::span<const T> expected, const TensorProto& actual) {
int64_t ActualSize(const TensorProto& actual) {
int64_t actual_size = 1;
for (const auto dim : actual.dims()) {
actual_size *= dim;
}
return actual_size;
}
template <typename T>
static void RawDataChecker(gsl::span<const T> expected, const TensorProto& actual) {
int64_t actual_size = ActualSize(actual);
const T* raw_data = reinterpret_cast<const T*>(actual.raw_data().data());
auto actual_span = gsl::make_span<const T>(raw_data, actual_size);
@ -634,13 +699,20 @@ static void RawDataChecker(gsl::span<const T> expected, const TensorProto& actua
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
}
/* For BFloat16 support in the future.
template <>
void RawDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat, const TensorProto& actual) {
int64_t actual_size = 1;
for (const auto dim : actual.dims()) {
actual_size *= dim;
}
void RawDataChecker<MLFloat16>(gsl::span<const MLFloat16> expected_bfloat, const TensorProto& actual) {
int64_t actual_size = ActualSize(actual);
auto expected = expected_bfloat.as_span<const uint16_t>();
const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.raw_data().data());
auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
}
template <>
void RawDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat, const TensorProto& actual) {
int64_t actual_size = ActualSize(actual);
auto expected = expected_bfloat.as_span<const uint16_t>();
const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.raw_data().data());
@ -648,7 +720,6 @@ void RawDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat, const T
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
}
*/
TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
TestConversion<float>(
@ -658,6 +729,74 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
},
RawDataChecker<float>);
TestConversion<double>(
[](const std::vector<double>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_DOUBLE);
tp.mutable_double_data()->Add(values.cbegin(), values.cend());
},
RawDataChecker<double>);
TestConversion<BFloat16>(
[](const std::vector<BFloat16>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_BFLOAT16);
for (auto v : values) {
tp.mutable_int32_data()->Add(v.val);
}
},
RawDataChecker<BFloat16>);
TestConversion<MLFloat16>(
[](const std::vector<MLFloat16>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_FLOAT16);
for (auto v : values) {
tp.mutable_int32_data()->Add(v.val);
}
},
RawDataChecker<MLFloat16>);
TestConversion<int16_t>(
[](const std::vector<int16_t>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_INT16);
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
},
RawDataChecker<int16_t>);
TestConversion<uint16_t>(
[](const std::vector<uint16_t>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_UINT16);
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
},
RawDataChecker<uint16_t>);
TestConversion<int32_t>(
[](const std::vector<int32_t>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_INT32);
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
},
RawDataChecker<int32_t>);
TestConversion<uint32_t>(
[](const std::vector<uint32_t>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_UINT32);
tp.mutable_uint64_data()->Add(values.cbegin(), values.cend());
},
RawDataChecker<uint32_t>);
// Test all zeros case
TestConversion<int64_t>(
[](const std::vector<int64_t>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_INT64);
tp.mutable_int64_data()->Add(values.cbegin(), values.cend());
},
RawDataChecker<int64_t>);
TestConversion<uint64_t>(
[](const std::vector<uint64_t>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_UINT64);
tp.mutable_uint64_data()->Add(values.cbegin(), values.cend());
},
RawDataChecker<uint64_t>);
TestConversion<int8_t>(
[](const std::vector<int8_t>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_INT8);
@ -673,7 +812,8 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
// Test constant node conversion for SparseTensor with external data
PathString tensor_filename(ORT_TSTR("tensor_XXXXXX"));
TestConversion<float>(true,
TestConversion<float>(
true,
[&tensor_filename](const std::vector<float>& values, TensorProto& tp) {
CreateTensorWithExternalData<float>(TensorProto_DataType_FLOAT, values, tensor_filename, tp);
},
@ -696,12 +836,30 @@ std::vector<std::string> CreateSparseValues<std::string>() {
}
*/
/* BFloat16 support in the future
template <>
std::vector<BFloat16> CreateSparseValues<BFloat16>() {
return {BFloat16(0.f), BFloat16(2.f), BFloat16(3.f), BFloat16(0.f)};
}
*/
template <>
std::vector<MLFloat16> CreateSparseValues<MLFloat16>() {
return {MLFloat16(0.f), MLFloat16(2.f), MLFloat16(3.f), MLFloat16(0.f)};
}
template <typename T>
static std::vector<T> CreateSparseValuesAllZeros() {
return {0, 0, 0, 0};
}
template <>
std::vector<BFloat16> CreateSparseValuesAllZeros<BFloat16>() {
return {BFloat16(0.f), BFloat16(0.f), BFloat16(0.f), BFloat16(0.f)};
}
template <>
std::vector<MLFloat16> CreateSparseValuesAllZeros<MLFloat16>() {
return {MLFloat16(0.f), MLFloat16(0.f), MLFloat16(0.f), MLFloat16(0.f)};
}
template <typename T>
TensorProto CreateDenseTensor(std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
@ -718,13 +876,27 @@ TensorProto CreateDenseTensor(std::function<void(const std::vector<T>& values, T
}
template <typename T>
static void RawSparseDataChecker(gsl::span<const T> expected_values,
gsl::span<const int64_t> expected_indicies,
const SparseTensorProto& actual) {
TensorProto CreateDenseTensorAllZeros(std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter) {
TensorProto result;
std::vector<T> values = CreateSparseValuesAllZeros<T>();
inserter(values, result);
result.add_dims(static_cast<int64_t>(values.size()));
return result;
}
int64_t ActualSize(const SparseTensorProto& actual) {
int64_t actual_size = 1;
for (const auto dim : actual.values().dims()) {
actual_size *= dim;
}
return actual_size;
}
template <typename T>
static void RawSparseDataChecker(gsl::span<const T> expected_values,
gsl::span<const int64_t> expected_indicies,
const SparseTensorProto& actual) {
const int64_t actual_size = ActualSize(actual);
const T* raw_data = reinterpret_cast<const T*>(actual.values().raw_data().data());
auto actual_span = gsl::make_span<const T>(raw_data, actual_size);
@ -737,15 +909,11 @@ static void RawSparseDataChecker(gsl::span<const T> expected_values,
EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
}
/* When we support BFloat16
template <>
void RawSparseDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat,
gsl::span<const int64_t> expected_indicies,
const SparseTensorProto& actual) {
int64_t actual_size = 1;
for (const auto dim : actual.values().dims()) {
actual_size *= dim;
}
const int64_t actual_size = ActualSize(actual);
static_assert(sizeof(uint16_t) == sizeof(BFloat16), "Expecting equal sizes");
auto expected = expected_bfloat.as_span<const uint16_t>();
@ -758,10 +926,27 @@ void RawSparseDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat,
auto actual_indicies = gsl::make_span<const int64_t>(actual.indices().int64_data().data(), actual.indices().int64_data_size());
EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
}
*/
template <>
void RawSparseDataChecker<MLFloat16>(gsl::span<const MLFloat16> expected_bfloat,
gsl::span<const int64_t> expected_indicies,
const SparseTensorProto& actual) {
const int64_t actual_size = ActualSize(actual);
static_assert(sizeof(uint16_t) == sizeof(MLFloat16), "Expecting equal sizes");
auto expected = expected_bfloat.as_span<const uint16_t>();
const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.values().raw_data().data());
auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
// Check indicies
EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64);
auto actual_indicies = gsl::make_span<const int64_t>(actual.indices().int64_data().data(), actual.indices().int64_data_size());
EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
}
template <typename T>
static void TestDenseToSparseConversion(
static void TestDenseToSparseConversionValues(
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
std::function<void(gsl::span<const T> expected,
gsl::span<const int64_t> expected_indicies,
@ -783,6 +968,40 @@ static void TestDenseToSparseConversion(
checker(expected_values_span, expected_ind_span, sparse_tensor);
}
template <typename T>
static void TestDenseAllZerosToSparseConversion(
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
std::function<void(gsl::span<const T> expected,
gsl::span<const int64_t> expected_indicies,
const SparseTensorProto& actual)>
checker) {
std::vector<T> expected_values;
std::vector<int64_t> expected_indicies;
// Path is required for loading external data
// Using empty path here since the data is not external
Path model_path;
TensorProto dense_tensor = CreateDenseTensorAllZeros(inserter);
SparseTensorProto sparse_tensor;
utils::DenseTensorToSparseTensorProto(dense_tensor, model_path, sparse_tensor);
gsl::span<const T>
expected_values_span = gsl::make_span(expected_values.data(), expected_values.size());
gsl::span<const int64_t> expected_ind_span = gsl::make_span(expected_indicies.data(), expected_indicies.size());
checker(expected_values_span, expected_ind_span, sparse_tensor);
}
template <typename T>
static void TestDenseToSparseConversion(std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
std::function<void(gsl::span<const T> expected,
gsl::span<const int64_t> expected_indicies,
const SparseTensorProto& actual)>
checker) {
TestDenseToSparseConversionValues<T>(inserter, checker);
TestDenseAllZerosToSparseConversion<T>(inserter, checker);
}
TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
TestDenseToSparseConversion<float>(
[](const std::vector<float>& values, TensorProto& tp) {
@ -792,6 +1011,82 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
},
RawSparseDataChecker<float>);
TestDenseToSparseConversion<double>(
[](const std::vector<double>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_DOUBLE);
tp.set_name("dense_double");
tp.mutable_double_data()->Add(values.cbegin(), values.cend());
},
RawSparseDataChecker<double>);
TestDenseToSparseConversion<BFloat16>(
[](const std::vector<BFloat16>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_BFLOAT16);
tp.set_name("dense_bfloat16");
for (auto v : values) {
tp.mutable_int32_data()->Add(v.val);
}
},
RawSparseDataChecker<BFloat16>);
TestDenseToSparseConversion<MLFloat16>(
[](const std::vector<MLFloat16>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_FLOAT16);
tp.set_name("dense_float16");
for (auto v : values) {
tp.mutable_int32_data()->Add(v.val);
}
},
RawSparseDataChecker<MLFloat16>);
TestDenseToSparseConversion<int16_t>(
[](const std::vector<int16_t>& values, TensorProto& tp) {
tp.set_name("dense_int16");
tp.set_data_type(TensorProto_DataType_INT16);
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
},
RawSparseDataChecker<int16_t>);
TestDenseToSparseConversion<uint16_t>(
[](const std::vector<uint16_t>& values, TensorProto& tp) {
tp.set_name("dense_uint16");
tp.set_data_type(TensorProto_DataType_UINT16);
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
},
RawSparseDataChecker<uint16_t>);
TestDenseToSparseConversion<int32_t>(
[](const std::vector<int32_t>& values, TensorProto& tp) {
tp.set_name("dense_int32");
tp.set_data_type(TensorProto_DataType_INT32);
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
},
RawSparseDataChecker<int32_t>);
TestDenseToSparseConversion<uint32_t>(
[](const std::vector<uint32_t>& values, TensorProto& tp) {
tp.set_name("dense_uint32");
tp.set_data_type(TensorProto_DataType_UINT32);
tp.mutable_uint64_data()->Add(values.cbegin(), values.cend());
},
RawSparseDataChecker<uint32_t>);
TestDenseToSparseConversion<int64_t>(
[](const std::vector<int64_t>& values, TensorProto& tp) {
tp.set_name("dense_int64");
tp.set_data_type(TensorProto_DataType_INT64);
tp.mutable_int64_data()->Add(values.cbegin(), values.cend());
},
RawSparseDataChecker<int64_t>);
TestDenseToSparseConversion<uint64_t>(
[](const std::vector<uint64_t>& values, TensorProto& tp) {
tp.set_name("dense_uint64");
tp.set_data_type(TensorProto_DataType_UINT64);
tp.mutable_uint64_data()->Add(values.cbegin(), values.cend());
},
RawSparseDataChecker<uint64_t>);
TestDenseToSparseConversion<int8_t>(
[](const std::vector<int8_t>& values, TensorProto& tp) {
tp.set_name("dense_int8");

View file

@ -0,0 +1,134 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
# This script opens an existing model in onnx format and attempts to
# move initializers from model.graph.initializer field to model.graph.sparse_initializer field
# and convert them into ONNX COO flat index format.
import argparse
import logging
import numpy as np
import sys
from typing import Tuple, List
import onnx
from onnx import ModelProto, SparseTensorProto, TensorProto, numpy_helper
logger = logging.getLogger(__name__)
real_types = set((np.float32, np.float64, np.double))
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('--input', required=True, type=str, help='input model path')
parser.add_argument('--output', required=True, type=str, help='output model path')
parser.add_argument('--exclude', required=False, type=str,
help='semicolon separated list of initializer names to exclude')
parser.add_argument('--tolerance', required=False, type=float, default=1e-6,
help='FP absolute tolerance. If not given simple compare to 0')
parser.add_argument('--sparsity_threshold', required=False,
type=float, default=0.5,
help='convert to sparse initializers if sparsity is at least this much')
parser.add_argument('--verbose', required=False, action='store_true')
parser.set_defaults(verbose=False)
args = parser.parse_args()
return args
def setup_logging(verbose): # type: (bool) -> None
log_handler = logging.StreamHandler(sys.stdout)
if verbose:
log_handler.setFormatter(logging.Formatter('[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s'))
logging_level = logging.DEBUG
else:
log_handler.setFormatter(logging.Formatter('%(filename)20s: %(message)s'))
logging_level = logging.INFO
log_handler.setLevel(logging_level)
logger.addHandler(log_handler)
logger.setLevel(logging_level)
def convert_tensor_to_sparse(tensor, tolerance): # type: (TensorProto) -> Tuple[SparseTensorProto, float]
""" returns a tuple of sparse_tensor and sparsity level
"""
values = []
indicies = []
nnz_count = 0
tensor_data = numpy_helper.to_array(tensor).flatten()
data_len = len(tensor_data)
if tensor_data.dtype in real_types:
for index in range(data_len):
el = tensor_data[index]
if abs(el) <= tolerance:
values.append(el)
indicies.append(index)
nnz_count += 1
else:
for index in range(data_len):
el = tensor_data[index]
if el == 0:
values.append(el)
indicies.append(index)
nnz_count += 1
sparsity = float(1.) - float(nnz_count)/data_len
logger.debug(f"initializer={tensor.name}, dtype={tensor_data.dtype}, \
len={data_len}, nnz={nnz_count}, sparsity={sparsity}")
values_tensor = onnx.helper.make_tensor(tensor.name, tensor.data_type,
[len(values)], np.array(values).astype(tensor_data.dtype))
indicies_tensor = onnx.helper.make_tensor(tensor.name + '_indicies',
TensorProto.INT64,
[len(indicies)], np.array(indicies).astype(np.int64))
sparse_tensor = onnx.helper.make_sparse_tensor(values_tensor, indicies_tensor, tensor.dims)
return (sparse_tensor, sparsity)
def convert_initializers(model,
exclude_names,
sparsity_threshold,
tolerance): # type: (ModelProto, List[str], float) -> None
graph = model.graph
converted_sparse = []
remaining_initializers = []
for initializer in graph.initializer:
if initializer.name in exclude_names:
logger.info(f"initializer={initializer.name} was excluded")
continue
if initializer.data_type == TensorProto.BOOL:
logger.info(f"initializer={initializer.name} contains bool, not converted")
remaining_initializers.append(initializer)
continue
sparse_tensor, sparsity = convert_tensor_to_sparse(initializer, tolerance)
if sparsity >= sparsity_threshold:
logger.info(f"initializer={initializer.name} converted. sparsity={sparsity}")
converted_sparse.append(sparse_tensor)
else:
remaining_initializers.append(initializer)
logger.info(f"initializer={initializer.name} is not converted. sparsity={sparsity}")
graph.sparse_initializer.extend(converted_sparse)
del graph.initializer[:]
graph.initializer.extend(remaining_initializers)
def main():
args = parse_arguments()
setup_logging(args.verbose)
exclude_names = set() if args.exclude is None else set(args.exclude.split(';'))
model = ModelProto()
with open(args.input, "rb") as input_file:
model.ParseFromString(input_file.read())
convert_initializers(model, exclude_names, args.sparsity_threshold, args.tolerance)
with open(args.output, "wb") as output_file:
s = model.SerializeToString()
output_file.write(s)
if __name__ == "__main__":
main()