mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Add types support for Sparse Initializer in Onnxruntime (#7004)
Add types support for DenseToSparse and SparseToDense conversions Address the case of empty sparse values and indicies when the initializer does not contain any NNZ. Add sparsify script.
This commit is contained in:
parent
4a3d1176d7
commit
3b58fc7b97
3 changed files with 540 additions and 68 deletions
|
|
@ -698,7 +698,6 @@ Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 6239)
|
||||
|
|
@ -910,6 +909,7 @@ static Status CopySparseData(size_t n_sparse_elements,
|
|||
return status;
|
||||
}
|
||||
|
||||
namespace conversion_internal {
|
||||
struct UnsupportedSparseDataType {
|
||||
void operator()(int32_t dt_type, Status& status) const {
|
||||
status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ", dt_type);
|
||||
|
|
@ -924,6 +924,10 @@ struct GetElementSize {
|
|||
}
|
||||
};
|
||||
|
||||
using SupportedConversionTypeList = onnxruntime::TypeList<float, double, MLFloat16, BFloat16,
|
||||
int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t>;
|
||||
} // namespace conversion_internal
|
||||
|
||||
common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
|
||||
const Path& model_path,
|
||||
ONNX_NAMESPACE::TensorProto& dense) {
|
||||
|
|
@ -956,9 +960,9 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
|
|||
void* sparse_data = sparse_data_storage.get();
|
||||
size_t element_size = 0;
|
||||
// We want to this list to match the one used below in DenseTensorToSparseTensorProto()
|
||||
MLTypeCallDispatcher<float, int8_t, uint8_t> type_disp(type);
|
||||
MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(type);
|
||||
ORT_RETURN_IF_ERROR(
|
||||
(type_disp.InvokeRetWithUnsupportedPolicy<Status, GetElementSize, UnsupportedSparseDataType>(element_size)));
|
||||
(type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));
|
||||
|
||||
// by putting the data into a std::string we can avoid a copy as set_raw_data can do a std::move
|
||||
// into the TensorProto. however to actually write to the buffer we have created in the std::string we need
|
||||
|
|
@ -966,6 +970,7 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
|
|||
// but using const_cast makes it more obvious we're doing something ugly.
|
||||
// C++17 add non-const data() where we could remove const_cast
|
||||
std::string dense_data_storage(n_dense_elements * element_size, 0);
|
||||
if (n_sparse_elements > 0) {
|
||||
void* dense_data = const_cast<char*>(dense_data_storage.data());
|
||||
|
||||
switch (element_size) {
|
||||
|
|
@ -980,6 +985,17 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
|
|||
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
auto dense_data_span = gsl::make_span<uint16_t>(static_cast<uint16_t*>(dense_data), n_dense_elements);
|
||||
status = CopySparseData<uint16_t>(
|
||||
n_sparse_elements,
|
||||
indices, dims,
|
||||
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
|
||||
dense_data_span[to_idx] = static_cast<const uint16_t*>(sparse_data)[from_idx];
|
||||
});
|
||||
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
auto dense_data_span = gsl::make_span<uint32_t>(static_cast<uint32_t*>(dense_data), n_dense_elements);
|
||||
status = CopySparseData<uint32_t>(
|
||||
|
|
@ -991,16 +1007,30 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
|
|||
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
auto dense_data_span = gsl::make_span<uint64_t>(static_cast<uint64_t*>(dense_data), n_dense_elements);
|
||||
status = CopySparseData<uint64_t>(
|
||||
n_sparse_elements,
|
||||
indices, dims,
|
||||
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
|
||||
dense_data_span[to_idx] = static_cast<const uint64_t*>(sparse_data)[from_idx];
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
ORT_THROW(false, "BUG! Report to onnxruntime team.");
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
|
||||
" BUG! Report to onnxruntime team. element_size of: ",
|
||||
element_size, " is not supported.", " type: ", type);
|
||||
}
|
||||
|
||||
ORT_RETURN_IF_ERROR(status);
|
||||
}
|
||||
dense.set_raw_data(std::move(dense_data_storage));
|
||||
|
||||
} else {
|
||||
// No request for std::string
|
||||
UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
|
||||
conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
|
@ -1041,13 +1071,15 @@ static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements,
|
|||
}
|
||||
}
|
||||
|
||||
// Here we are not using tolerance for FP types since these dense tensors were
|
||||
// created from sparse initializers where zeros were absolute
|
||||
template <typename T>
|
||||
bool IsZero(const void* p) {
|
||||
inline bool IsZero(const void* p) {
|
||||
return (static_cast<T>(0) == *reinterpret_cast<const T*>(p));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) {
|
||||
inline void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) {
|
||||
reinterpret_cast<T*>(dst)[dst_index] = reinterpret_cast<const T*>(src)[src_index];
|
||||
}
|
||||
|
||||
|
|
@ -1059,7 +1091,7 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
|
|||
const bool is_string_data = dense_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
|
||||
if (is_string_data) {
|
||||
Status status{};
|
||||
UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
|
||||
conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
|
@ -1081,25 +1113,36 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
|
|||
std::unique_ptr<uint8_t[]> dense_raw_data;
|
||||
ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data, tensor_bytes_size));
|
||||
size_t element_size = 0;
|
||||
MLTypeCallDispatcher<float, int8_t, uint8_t> type_disp(data_type);
|
||||
// We want this type list to match the one above in SparseTensorProtoToDenseTensorProto
|
||||
MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(data_type);
|
||||
ORT_RETURN_IF_ERROR(
|
||||
(type_disp.InvokeRetWithUnsupportedPolicy<Status, GetElementSize, UnsupportedSparseDataType>(element_size)));
|
||||
(type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));
|
||||
|
||||
switch (element_size) {
|
||||
case 1: {
|
||||
// bytes
|
||||
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
|
||||
IsZero<uint8_t>, CopyElement<uint8_t>, values, indices);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
|
||||
IsZero<uint16_t>, CopyElement<uint16_t>, values, indices);
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
// float
|
||||
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
|
||||
IsZero<uint32_t>, CopyElement<uint32_t>, values, indices);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
SparsifyGeneric(dense_raw_data.get(), n_dense_elements, element_size,
|
||||
IsZero<uint64_t>, CopyElement<uint64_t>, values, indices);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ORT_THROW(false, "BUG! Report to onnxruntime team.");
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
|
||||
" BUG! Report to onnxruntime team. element_size of: ",
|
||||
element_size, " is not supported.", " data_type: ", data_type);
|
||||
}
|
||||
|
||||
// Fix up shapes
|
||||
|
|
|
|||
|
|
@ -499,24 +499,28 @@ TEST_F(SparseTensorTests, Test2) {
|
|||
RunTest();
|
||||
}
|
||||
|
||||
// Code below depends on the values being size 4
|
||||
template <typename T>
|
||||
static std::vector<T> CreateValues() {
|
||||
return {1, 2, 3, 4};
|
||||
}
|
||||
|
||||
/* std::string suport in the future
|
||||
/* std::string support in the future
|
||||
template <>
|
||||
std::vector<std::string> CreateValues<std::string>() {
|
||||
return {"one", "two", "three", "four"};
|
||||
}
|
||||
*/
|
||||
|
||||
/* BFloat16 support in the future
|
||||
template <>
|
||||
std::vector<BFloat16> CreateValues<BFloat16>() {
|
||||
return {BFloat16(1.f), BFloat16(2.f), BFloat16(3.f), BFloat16(4.f)};
|
||||
}
|
||||
*/
|
||||
|
||||
template <>
|
||||
std::vector<MLFloat16> CreateValues<MLFloat16>() {
|
||||
return {MLFloat16(1.f), MLFloat16(2.f), MLFloat16(3.f), MLFloat16(4.f)};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void CreateTensorWithExternalData(
|
||||
|
|
@ -591,6 +595,45 @@ static NodeProto CreateConstantNode(bool indices_1D,
|
|||
return constant_node;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static NodeProto CreateConstantNodeAllZeros(bool indices_1D, std::vector<T>& expected_data) {
|
||||
NodeProto constant_node;
|
||||
constant_node.set_op_type("Constant");
|
||||
constant_node.add_output("dense_tensor_output");
|
||||
|
||||
std::vector<int64_t> indices;
|
||||
std::vector<int64_t> shape{2, 3, 2};
|
||||
|
||||
AttributeProto& attrib = *constant_node.mutable_attribute()->Add();
|
||||
attrib.set_name("sparse_value_all_zeros");
|
||||
attrib.set_type(AttributeProto_AttributeType_SPARSE_TENSOR);
|
||||
|
||||
SparseTensorProto& stp = *attrib.mutable_sparse_tensor();
|
||||
TensorProto& indices_tp = *stp.mutable_indices();
|
||||
|
||||
stp.mutable_dims()->Add(shape.cbegin(), shape.cend());
|
||||
|
||||
if (indices_1D) {
|
||||
indices_tp.add_dims(0);
|
||||
} else {
|
||||
// indices are shape {NNZ, rank} so convert flattened values of 2, 5, 6 and 10 to rank 3 values
|
||||
indices_tp.add_dims(0);
|
||||
indices_tp.add_dims(0);
|
||||
}
|
||||
|
||||
indices_tp.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
|
||||
|
||||
// Must be all zeros
|
||||
expected_data.resize(2 * 3 * 2);
|
||||
|
||||
auto& mutable_values = *stp.mutable_values();
|
||||
mutable_values.set_name("all_zeros");
|
||||
mutable_values.set_data_type(utils::ToTensorProtoElementType<T>());
|
||||
mutable_values.add_dims(0);
|
||||
|
||||
return constant_node;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void TestConversion(bool use_1D_indices,
|
||||
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
|
||||
|
|
@ -607,12 +650,29 @@ static void TestConversion(bool use_1D_indices,
|
|||
checker(expected_span, dense);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void TestConversionAllZeros(bool use_1D_indices,
|
||||
std::function<void(gsl::span<const T> expected, const TensorProto& actual)> checker) {
|
||||
std::vector<T> expected;
|
||||
auto node = CreateConstantNodeAllZeros<T>(use_1D_indices, expected);
|
||||
|
||||
TensorProto dense;
|
||||
// Path is required for loading external data (if any)
|
||||
// When path is empty it will look for the data in current dir
|
||||
utils::ConstantNodeProtoToTensorProto(node, Path(), dense);
|
||||
|
||||
gsl::span<const T> expected_span = gsl::make_span<const T>(expected.data(), expected.size());
|
||||
checker(expected_span, dense);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void TestConversion(
|
||||
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
|
||||
std::function<void(gsl::span<const T> expected, const TensorProto& actual)> checker) {
|
||||
TestConversion(true, inserter, checker);
|
||||
TestConversion(false, inserter, checker);
|
||||
TestConversionAllZeros(true, checker);
|
||||
TestConversionAllZeros(false, checker);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
@ -621,12 +681,17 @@ static void RawDataWriter(const std::vector<T>& values, TensorProto& tp, TensorP
|
|||
tp.set_raw_data(values.data(), values.size() * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void RawDataChecker(gsl::span<const T> expected, const TensorProto& actual) {
|
||||
int64_t ActualSize(const TensorProto& actual) {
|
||||
int64_t actual_size = 1;
|
||||
for (const auto dim : actual.dims()) {
|
||||
actual_size *= dim;
|
||||
}
|
||||
return actual_size;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void RawDataChecker(gsl::span<const T> expected, const TensorProto& actual) {
|
||||
int64_t actual_size = ActualSize(actual);
|
||||
|
||||
const T* raw_data = reinterpret_cast<const T*>(actual.raw_data().data());
|
||||
auto actual_span = gsl::make_span<const T>(raw_data, actual_size);
|
||||
|
|
@ -634,13 +699,20 @@ static void RawDataChecker(gsl::span<const T> expected, const TensorProto& actua
|
|||
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
|
||||
}
|
||||
|
||||
/* For BFloat16 support in the future.
|
||||
template <>
|
||||
void RawDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat, const TensorProto& actual) {
|
||||
int64_t actual_size = 1;
|
||||
for (const auto dim : actual.dims()) {
|
||||
actual_size *= dim;
|
||||
}
|
||||
void RawDataChecker<MLFloat16>(gsl::span<const MLFloat16> expected_bfloat, const TensorProto& actual) {
|
||||
int64_t actual_size = ActualSize(actual);
|
||||
|
||||
auto expected = expected_bfloat.as_span<const uint16_t>();
|
||||
const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.raw_data().data());
|
||||
auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);
|
||||
|
||||
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
|
||||
}
|
||||
|
||||
template <>
|
||||
void RawDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat, const TensorProto& actual) {
|
||||
int64_t actual_size = ActualSize(actual);
|
||||
|
||||
auto expected = expected_bfloat.as_span<const uint16_t>();
|
||||
const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.raw_data().data());
|
||||
|
|
@ -648,7 +720,6 @@ void RawDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat, const T
|
|||
|
||||
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
|
||||
}
|
||||
*/
|
||||
|
||||
TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
|
||||
TestConversion<float>(
|
||||
|
|
@ -658,6 +729,74 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
|
|||
},
|
||||
RawDataChecker<float>);
|
||||
|
||||
TestConversion<double>(
|
||||
[](const std::vector<double>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_DOUBLE);
|
||||
tp.mutable_double_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawDataChecker<double>);
|
||||
|
||||
TestConversion<BFloat16>(
|
||||
[](const std::vector<BFloat16>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_BFLOAT16);
|
||||
for (auto v : values) {
|
||||
tp.mutable_int32_data()->Add(v.val);
|
||||
}
|
||||
},
|
||||
RawDataChecker<BFloat16>);
|
||||
|
||||
TestConversion<MLFloat16>(
|
||||
[](const std::vector<MLFloat16>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_FLOAT16);
|
||||
for (auto v : values) {
|
||||
tp.mutable_int32_data()->Add(v.val);
|
||||
}
|
||||
},
|
||||
RawDataChecker<MLFloat16>);
|
||||
|
||||
TestConversion<int16_t>(
|
||||
[](const std::vector<int16_t>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_INT16);
|
||||
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawDataChecker<int16_t>);
|
||||
|
||||
TestConversion<uint16_t>(
|
||||
[](const std::vector<uint16_t>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_UINT16);
|
||||
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawDataChecker<uint16_t>);
|
||||
|
||||
TestConversion<int32_t>(
|
||||
[](const std::vector<int32_t>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_INT32);
|
||||
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawDataChecker<int32_t>);
|
||||
|
||||
TestConversion<uint32_t>(
|
||||
[](const std::vector<uint32_t>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_UINT32);
|
||||
tp.mutable_uint64_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawDataChecker<uint32_t>);
|
||||
|
||||
// Test all zeros case
|
||||
TestConversion<int64_t>(
|
||||
[](const std::vector<int64_t>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_INT64);
|
||||
tp.mutable_int64_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawDataChecker<int64_t>);
|
||||
|
||||
TestConversion<uint64_t>(
|
||||
[](const std::vector<uint64_t>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_UINT64);
|
||||
tp.mutable_uint64_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawDataChecker<uint64_t>);
|
||||
|
||||
TestConversion<int8_t>(
|
||||
[](const std::vector<int8_t>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_INT8);
|
||||
|
|
@ -673,7 +812,8 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
|
|||
|
||||
// Test constant node conversion for SparseTensor with external data
|
||||
PathString tensor_filename(ORT_TSTR("tensor_XXXXXX"));
|
||||
TestConversion<float>(true,
|
||||
TestConversion<float>(
|
||||
true,
|
||||
[&tensor_filename](const std::vector<float>& values, TensorProto& tp) {
|
||||
CreateTensorWithExternalData<float>(TensorProto_DataType_FLOAT, values, tensor_filename, tp);
|
||||
},
|
||||
|
|
@ -696,12 +836,30 @@ std::vector<std::string> CreateSparseValues<std::string>() {
|
|||
}
|
||||
*/
|
||||
|
||||
/* BFloat16 support in the future
|
||||
template <>
|
||||
std::vector<BFloat16> CreateSparseValues<BFloat16>() {
|
||||
return {BFloat16(0.f), BFloat16(2.f), BFloat16(3.f), BFloat16(0.f)};
|
||||
}
|
||||
*/
|
||||
|
||||
template <>
|
||||
std::vector<MLFloat16> CreateSparseValues<MLFloat16>() {
|
||||
return {MLFloat16(0.f), MLFloat16(2.f), MLFloat16(3.f), MLFloat16(0.f)};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::vector<T> CreateSparseValuesAllZeros() {
|
||||
return {0, 0, 0, 0};
|
||||
}
|
||||
|
||||
template <>
|
||||
std::vector<BFloat16> CreateSparseValuesAllZeros<BFloat16>() {
|
||||
return {BFloat16(0.f), BFloat16(0.f), BFloat16(0.f), BFloat16(0.f)};
|
||||
}
|
||||
|
||||
template <>
|
||||
std::vector<MLFloat16> CreateSparseValuesAllZeros<MLFloat16>() {
|
||||
return {MLFloat16(0.f), MLFloat16(0.f), MLFloat16(0.f), MLFloat16(0.f)};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
TensorProto CreateDenseTensor(std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
|
||||
|
|
@ -718,13 +876,27 @@ TensorProto CreateDenseTensor(std::function<void(const std::vector<T>& values, T
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
static void RawSparseDataChecker(gsl::span<const T> expected_values,
|
||||
gsl::span<const int64_t> expected_indicies,
|
||||
const SparseTensorProto& actual) {
|
||||
TensorProto CreateDenseTensorAllZeros(std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter) {
|
||||
TensorProto result;
|
||||
std::vector<T> values = CreateSparseValuesAllZeros<T>();
|
||||
inserter(values, result);
|
||||
result.add_dims(static_cast<int64_t>(values.size()));
|
||||
return result;
|
||||
}
|
||||
|
||||
int64_t ActualSize(const SparseTensorProto& actual) {
|
||||
int64_t actual_size = 1;
|
||||
for (const auto dim : actual.values().dims()) {
|
||||
actual_size *= dim;
|
||||
}
|
||||
return actual_size;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void RawSparseDataChecker(gsl::span<const T> expected_values,
|
||||
gsl::span<const int64_t> expected_indicies,
|
||||
const SparseTensorProto& actual) {
|
||||
const int64_t actual_size = ActualSize(actual);
|
||||
|
||||
const T* raw_data = reinterpret_cast<const T*>(actual.values().raw_data().data());
|
||||
auto actual_span = gsl::make_span<const T>(raw_data, actual_size);
|
||||
|
|
@ -737,15 +909,11 @@ static void RawSparseDataChecker(gsl::span<const T> expected_values,
|
|||
EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
|
||||
}
|
||||
|
||||
/* When we support BFloat16
|
||||
template <>
|
||||
void RawSparseDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat,
|
||||
gsl::span<const int64_t> expected_indicies,
|
||||
const SparseTensorProto& actual) {
|
||||
int64_t actual_size = 1;
|
||||
for (const auto dim : actual.values().dims()) {
|
||||
actual_size *= dim;
|
||||
}
|
||||
const int64_t actual_size = ActualSize(actual);
|
||||
|
||||
static_assert(sizeof(uint16_t) == sizeof(BFloat16), "Expecting equal sizes");
|
||||
auto expected = expected_bfloat.as_span<const uint16_t>();
|
||||
|
|
@ -758,10 +926,27 @@ void RawSparseDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat,
|
|||
auto actual_indicies = gsl::make_span<const int64_t>(actual.indices().int64_data().data(), actual.indices().int64_data_size());
|
||||
EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
|
||||
}
|
||||
*/
|
||||
|
||||
template <>
|
||||
void RawSparseDataChecker<MLFloat16>(gsl::span<const MLFloat16> expected_bfloat,
|
||||
gsl::span<const int64_t> expected_indicies,
|
||||
const SparseTensorProto& actual) {
|
||||
const int64_t actual_size = ActualSize(actual);
|
||||
|
||||
static_assert(sizeof(uint16_t) == sizeof(MLFloat16), "Expecting equal sizes");
|
||||
auto expected = expected_bfloat.as_span<const uint16_t>();
|
||||
const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.values().raw_data().data());
|
||||
auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);
|
||||
|
||||
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
|
||||
// Check indicies
|
||||
EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64);
|
||||
auto actual_indicies = gsl::make_span<const int64_t>(actual.indices().int64_data().data(), actual.indices().int64_data_size());
|
||||
EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void TestDenseToSparseConversion(
|
||||
static void TestDenseToSparseConversionValues(
|
||||
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
|
||||
std::function<void(gsl::span<const T> expected,
|
||||
gsl::span<const int64_t> expected_indicies,
|
||||
|
|
@ -783,6 +968,40 @@ static void TestDenseToSparseConversion(
|
|||
checker(expected_values_span, expected_ind_span, sparse_tensor);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void TestDenseAllZerosToSparseConversion(
|
||||
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
|
||||
std::function<void(gsl::span<const T> expected,
|
||||
gsl::span<const int64_t> expected_indicies,
|
||||
const SparseTensorProto& actual)>
|
||||
checker) {
|
||||
std::vector<T> expected_values;
|
||||
std::vector<int64_t> expected_indicies;
|
||||
// Path is required for loading external data
|
||||
// Using empty path here since the data is not external
|
||||
Path model_path;
|
||||
TensorProto dense_tensor = CreateDenseTensorAllZeros(inserter);
|
||||
|
||||
SparseTensorProto sparse_tensor;
|
||||
utils::DenseTensorToSparseTensorProto(dense_tensor, model_path, sparse_tensor);
|
||||
|
||||
gsl::span<const T>
|
||||
expected_values_span = gsl::make_span(expected_values.data(), expected_values.size());
|
||||
gsl::span<const int64_t> expected_ind_span = gsl::make_span(expected_indicies.data(), expected_indicies.size());
|
||||
checker(expected_values_span, expected_ind_span, sparse_tensor);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void TestDenseToSparseConversion(std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
|
||||
std::function<void(gsl::span<const T> expected,
|
||||
gsl::span<const int64_t> expected_indicies,
|
||||
const SparseTensorProto& actual)>
|
||||
checker) {
|
||||
TestDenseToSparseConversionValues<T>(inserter, checker);
|
||||
TestDenseAllZerosToSparseConversion<T>(inserter, checker);
|
||||
}
|
||||
|
||||
|
||||
TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
|
||||
TestDenseToSparseConversion<float>(
|
||||
[](const std::vector<float>& values, TensorProto& tp) {
|
||||
|
|
@ -792,6 +1011,82 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
|
|||
},
|
||||
RawSparseDataChecker<float>);
|
||||
|
||||
TestDenseToSparseConversion<double>(
|
||||
[](const std::vector<double>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_DOUBLE);
|
||||
tp.set_name("dense_double");
|
||||
tp.mutable_double_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawSparseDataChecker<double>);
|
||||
|
||||
TestDenseToSparseConversion<BFloat16>(
|
||||
[](const std::vector<BFloat16>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_BFLOAT16);
|
||||
tp.set_name("dense_bfloat16");
|
||||
for (auto v : values) {
|
||||
tp.mutable_int32_data()->Add(v.val);
|
||||
}
|
||||
},
|
||||
RawSparseDataChecker<BFloat16>);
|
||||
|
||||
TestDenseToSparseConversion<MLFloat16>(
|
||||
[](const std::vector<MLFloat16>& values, TensorProto& tp) {
|
||||
tp.set_data_type(TensorProto_DataType_FLOAT16);
|
||||
tp.set_name("dense_float16");
|
||||
for (auto v : values) {
|
||||
tp.mutable_int32_data()->Add(v.val);
|
||||
}
|
||||
},
|
||||
RawSparseDataChecker<MLFloat16>);
|
||||
|
||||
TestDenseToSparseConversion<int16_t>(
|
||||
[](const std::vector<int16_t>& values, TensorProto& tp) {
|
||||
tp.set_name("dense_int16");
|
||||
tp.set_data_type(TensorProto_DataType_INT16);
|
||||
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawSparseDataChecker<int16_t>);
|
||||
|
||||
TestDenseToSparseConversion<uint16_t>(
|
||||
[](const std::vector<uint16_t>& values, TensorProto& tp) {
|
||||
tp.set_name("dense_uint16");
|
||||
tp.set_data_type(TensorProto_DataType_UINT16);
|
||||
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawSparseDataChecker<uint16_t>);
|
||||
|
||||
TestDenseToSparseConversion<int32_t>(
|
||||
[](const std::vector<int32_t>& values, TensorProto& tp) {
|
||||
tp.set_name("dense_int32");
|
||||
tp.set_data_type(TensorProto_DataType_INT32);
|
||||
tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawSparseDataChecker<int32_t>);
|
||||
|
||||
TestDenseToSparseConversion<uint32_t>(
|
||||
[](const std::vector<uint32_t>& values, TensorProto& tp) {
|
||||
tp.set_name("dense_uint32");
|
||||
tp.set_data_type(TensorProto_DataType_UINT32);
|
||||
tp.mutable_uint64_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawSparseDataChecker<uint32_t>);
|
||||
|
||||
TestDenseToSparseConversion<int64_t>(
|
||||
[](const std::vector<int64_t>& values, TensorProto& tp) {
|
||||
tp.set_name("dense_int64");
|
||||
tp.set_data_type(TensorProto_DataType_INT64);
|
||||
tp.mutable_int64_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawSparseDataChecker<int64_t>);
|
||||
|
||||
TestDenseToSparseConversion<uint64_t>(
|
||||
[](const std::vector<uint64_t>& values, TensorProto& tp) {
|
||||
tp.set_name("dense_uint64");
|
||||
tp.set_data_type(TensorProto_DataType_UINT64);
|
||||
tp.mutable_uint64_data()->Add(values.cbegin(), values.cend());
|
||||
},
|
||||
RawSparseDataChecker<uint64_t>);
|
||||
|
||||
TestDenseToSparseConversion<int8_t>(
|
||||
[](const std::vector<int8_t>& values, TensorProto& tp) {
|
||||
tp.set_name("dense_int8");
|
||||
|
|
|
|||
134
tools/python/sparsify_initializers.py
Normal file
134
tools/python/sparsify_initializers.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
# -------------------------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
# --------------------------------------------------------------------------
|
||||
# This script opens an existing model in onnx format and attempts to
|
||||
# move initializers from model.graph.initializer field to model.graph.sparse_initializer field
|
||||
# and convert them into ONNX COO flat index format.
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import numpy as np
|
||||
import sys
|
||||
from typing import Tuple, List
|
||||
import onnx
|
||||
from onnx import ModelProto, SparseTensorProto, TensorProto, numpy_helper
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
real_types = set((np.float32, np.float64, np.double))
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--input', required=True, type=str, help='input model path')
|
||||
parser.add_argument('--output', required=True, type=str, help='output model path')
|
||||
parser.add_argument('--exclude', required=False, type=str,
|
||||
help='semicolon separated list of initializer names to exclude')
|
||||
parser.add_argument('--tolerance', required=False, type=float, default=1e-6,
|
||||
help='FP absolute tolerance. If not given simple compare to 0')
|
||||
parser.add_argument('--sparsity_threshold', required=False,
|
||||
type=float, default=0.5,
|
||||
help='convert to sparse initializers if sparsity is at least this much')
|
||||
parser.add_argument('--verbose', required=False, action='store_true')
|
||||
parser.set_defaults(verbose=False)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def setup_logging(verbose): # type: (bool) -> None
|
||||
log_handler = logging.StreamHandler(sys.stdout)
|
||||
if verbose:
|
||||
log_handler.setFormatter(logging.Formatter('[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s'))
|
||||
logging_level = logging.DEBUG
|
||||
else:
|
||||
log_handler.setFormatter(logging.Formatter('%(filename)20s: %(message)s'))
|
||||
logging_level = logging.INFO
|
||||
log_handler.setLevel(logging_level)
|
||||
logger.addHandler(log_handler)
|
||||
logger.setLevel(logging_level)
|
||||
|
||||
|
||||
def convert_tensor_to_sparse(tensor, tolerance): # type: (TensorProto) -> Tuple[SparseTensorProto, float]
|
||||
""" returns a tuple of sparse_tensor and sparsity level
|
||||
"""
|
||||
values = []
|
||||
indicies = []
|
||||
nnz_count = 0
|
||||
tensor_data = numpy_helper.to_array(tensor).flatten()
|
||||
data_len = len(tensor_data)
|
||||
if tensor_data.dtype in real_types:
|
||||
for index in range(data_len):
|
||||
el = tensor_data[index]
|
||||
if abs(el) <= tolerance:
|
||||
values.append(el)
|
||||
indicies.append(index)
|
||||
nnz_count += 1
|
||||
else:
|
||||
for index in range(data_len):
|
||||
el = tensor_data[index]
|
||||
if el == 0:
|
||||
values.append(el)
|
||||
indicies.append(index)
|
||||
nnz_count += 1
|
||||
|
||||
sparsity = float(1.) - float(nnz_count)/data_len
|
||||
logger.debug(f"initializer={tensor.name}, dtype={tensor_data.dtype}, \
|
||||
len={data_len}, nnz={nnz_count}, sparsity={sparsity}")
|
||||
|
||||
values_tensor = onnx.helper.make_tensor(tensor.name, tensor.data_type,
|
||||
[len(values)], np.array(values).astype(tensor_data.dtype))
|
||||
indicies_tensor = onnx.helper.make_tensor(tensor.name + '_indicies',
|
||||
TensorProto.INT64,
|
||||
[len(indicies)], np.array(indicies).astype(np.int64))
|
||||
sparse_tensor = onnx.helper.make_sparse_tensor(values_tensor, indicies_tensor, tensor.dims)
|
||||
return (sparse_tensor, sparsity)
|
||||
|
||||
|
||||
def convert_initializers(model,
|
||||
exclude_names,
|
||||
sparsity_threshold,
|
||||
tolerance): # type: (ModelProto, List[str], float) -> None
|
||||
graph = model.graph
|
||||
converted_sparse = []
|
||||
remaining_initializers = []
|
||||
for initializer in graph.initializer:
|
||||
if initializer.name in exclude_names:
|
||||
logger.info(f"initializer={initializer.name} was excluded")
|
||||
continue
|
||||
if initializer.data_type == TensorProto.BOOL:
|
||||
logger.info(f"initializer={initializer.name} contains bool, not converted")
|
||||
remaining_initializers.append(initializer)
|
||||
continue
|
||||
sparse_tensor, sparsity = convert_tensor_to_sparse(initializer, tolerance)
|
||||
if sparsity >= sparsity_threshold:
|
||||
logger.info(f"initializer={initializer.name} converted. sparsity={sparsity}")
|
||||
converted_sparse.append(sparse_tensor)
|
||||
else:
|
||||
remaining_initializers.append(initializer)
|
||||
logger.info(f"initializer={initializer.name} is not converted. sparsity={sparsity}")
|
||||
|
||||
graph.sparse_initializer.extend(converted_sparse)
|
||||
del graph.initializer[:]
|
||||
graph.initializer.extend(remaining_initializers)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
setup_logging(args.verbose)
|
||||
|
||||
exclude_names = set() if args.exclude is None else set(args.exclude.split(';'))
|
||||
|
||||
model = ModelProto()
|
||||
with open(args.input, "rb") as input_file:
|
||||
model.ParseFromString(input_file.read())
|
||||
|
||||
convert_initializers(model, exclude_names, args.sparsity_threshold, args.tolerance)
|
||||
|
||||
with open(args.output, "wb") as output_file:
|
||||
s = model.SerializeToString()
|
||||
output_file.write(s)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in a new issue