Support of sparse initializers with smaller indices data type (#8834)

Support of sparse initializers with smaller indices data type to save space.
Make the script more efficient by selecting indices data type and checking resulting sparse bytes
Exclude new code from SPARSE_TENSORS
This commit is contained in:
Dmitri Smirnov 2021-08-27 14:02:48 -07:00 committed by GitHub
parent 775f862067
commit f3083f4bf3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 453 additions and 197 deletions

View file

@ -842,37 +842,93 @@ common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& n
}
#if !defined(DISABLE_SPARSE_TENSORS)
template <typename T>
static Status CopySparseData(size_t n_sparse_elements,
const ONNX_NAMESPACE::TensorProto& indices,
const Path& model_path,
gsl::span<const int64_t> dims,
std::function<void(size_t from_idx, size_t to_idx)> copier) {
Status status = Status::OK();
TensorShape indices_shape(indices.dims().data(), indices.dims().size());
const auto elements = gsl::narrow<size_t>(indices_shape.Size());
ORT_RETURN_IF_NOT(indices.data_type() == ONNX_NAMESPACE ::TensorProto_DataType_INT64, "Indicies expected to be INT64");
std::vector<int64_t> indices_values; // used for conversion of smaller size indices
std::vector<uint8_t> unpack_buffer;
gsl::span<const int64_t> indices_data;
const auto elements = static_cast<size_t>(indices_shape.Size());
if (indices.int64_data_size() > 0) {
indices_data = gsl::make_span<const int64_t>(indices.int64_data().data(), elements);
} else if (indices.has_raw_data()) {
ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int64_t)),
"Sparse Indicies raw data size does not match expected.");
indices_data = gsl::make_span<const int64_t>(reinterpret_cast<const int64_t*>(indices.raw_data().data()), elements);
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, "Invalid SparseTensor indices. Should either have raw or int64 data");
const bool has_raw_data = indices.has_raw_data();
switch (indices.data_type()) {
case ONNX_NAMESPACE::TensorProto_DataType_INT64:
if (has_raw_data) {
ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int64_t)),
"Sparse Indices raw data size does not match expected.");
ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
indices_data = gsl::make_span(unpack_buffer).as_span<const int64_t>();
} else {
ORT_RETURN_IF_NOT(indices.int64_data_size() == static_cast<int64_t>(elements), "Sparse indices int64 data size does not match expected");
indices_data = gsl::make_span(indices.int64_data().data(), elements);
}
break;
case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
if (has_raw_data) {
ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int32_t)),
"Sparse Indices raw data size does not match expected.");
ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
auto int32_span = gsl::make_span(unpack_buffer).as_span<const int32_t>();
indices_values.insert(indices_values.cend(), int32_span.cbegin(), int32_span.cend());
unpack_buffer.clear();
unpack_buffer.shrink_to_fit();
} else {
ORT_RETURN_IF_NOT(indices.int32_data_size() == static_cast<int64_t>(elements), "Sparse indices int32 data size does not match expected");
indices_values.insert(indices_values.cend(), indices.int32_data().cbegin(), indices.int32_data().cend());
}
indices_data = gsl::make_span(indices_values);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
if (has_raw_data) {
ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int16_t)),
"Sparse Indices raw data size does not match expected.");
ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
auto int16_span = gsl::make_span(unpack_buffer).as_span<const int16_t>();
indices_values.insert(indices_values.cend(), int16_span.cbegin(), int16_span.cend());
indices_data = gsl::make_span(indices_values);
unpack_buffer.clear();
unpack_buffer.shrink_to_fit();
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
"Invalid SparseTensor indices. INT16 indices must be in the raw data of indices tensor");
}
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
if (has_raw_data) {
ORT_RETURN_IF_NOT(indices.raw_data().size() == elements,
"Sparse Indices raw data size does not match expected.");
ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
auto int8_span = gsl::make_span(unpack_buffer).as_span<const int8_t>();
indices_values.insert(indices_values.cend(), int8_span.cbegin(), int8_span.cend());
indices_data = gsl::make_span(indices_values);
unpack_buffer.clear();
unpack_buffer.shrink_to_fit();
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
"Invalid SparseTensor indices. INT8 indices must be in the raw data of indices tensor");
}
break;
}
default:
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
"Invalid SparseTensor indices. Should one of the following types: int8, int16, int32 or int64");
}
if (indices_shape.NumDimensions() == 1) {
// flattened indexes
for (size_t i = 0; i < n_sparse_elements; ++i) {
copier(i, static_cast<size_t>(indices_data[i]));
copier(i, gsl::narrow<size_t>(indices_data[i]));
}
} else if (indices_shape.NumDimensions() == 2) {
// entries in format {NNZ, rank}
size_t rank = static_cast<size_t>(indices_shape[1]);
ORT_ENFORCE(rank == dims.size() && rank > 0);
ORT_ENFORCE(indices_shape[1] > 0 && static_cast<size_t>(indices_shape[1]) == dims.size());
auto rank = static_cast<size_t>(indices_shape[1]);
const int64_t* cur_index = indices_data.data();
std::vector<size_t> multipliers;
multipliers.resize(rank);
@ -880,20 +936,20 @@ static Status CopySparseData(size_t n_sparse_elements,
// calculate sum of inner dimension elements for each dimension.
// e.g. if shape {2,3,4}, the result should be {3*4, 4, 1}
multipliers[rank - 1] = 1;
for (int32_t r = static_cast<int32_t>(rank) - 2; r >= 0; --r) {
multipliers[r] = static_cast<size_t>(dims[r + 1]) * multipliers[r + 1];
for (auto r = rank - 1; r > 0; --r) {
multipliers[r - 1] = SafeInt<size_t>(dims[r]) * multipliers[r];
}
// calculate the offset for the entry
// e.g. if shape was {2,3,4} and entry was (1, 0, 2) the offset is 14
// as there are 2 rows, each with 12 entries per row
for (size_t i = 0; i < n_sparse_elements; ++i) {
size_t idx = 0;
SafeInt<int64_t> idx = 0;
for (size_t j = 0; j < rank; ++j) {
idx += static_cast<size_t>(cur_index[j]) * multipliers[j];
idx += SafeInt<int64_t>(cur_index[j]) * multipliers[j];
}
copier(i, idx);
copier(i, static_cast<size_t>(idx));
cur_index += rank;
}
@ -905,29 +961,7 @@ static Status CopySparseData(size_t n_sparse_elements,
return status;
}
#endif // !defined(DISABLE_SPARSE_TENSORS)
namespace conversion_internal {
#if !defined(DISABLE_SPARSE_TENSORS)
struct UnsupportedSparseDataType {
void operator()(int32_t dt_type, Status& status) const {
status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ", dt_type);
}
};
#endif
template <typename T>
struct GetElementSize {
Status operator()(size_t& element_size) const {
element_size = sizeof(T);
return Status::OK();
}
};
using SupportedConversionTypeList = onnxruntime::TypeList<float, double, MLFloat16, BFloat16,
int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t>;
} // namespace conversion_internal
#if !defined(DISABLE_SPARSE_TENSORS)
common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
const Path& model_path,
ONNX_NAMESPACE::TensorProto& dense) {
@ -953,74 +987,70 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
auto dims = gsl::make_span<const int64_t>(dense.dims().data(), dense.dims().size());
if (type != TensorProto_DataType_STRING) {
auto ml_data = DataTypeImpl::TensorTypeFromONNXEnum(type)->GetElementType();
size_t element_size = ml_data->Size();
// need to read in sparse data first as it could be in a type specific field, in raw data, or in external data
std::vector<uint8_t> sparse_data_storage;
ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage));
void* sparse_data = sparse_data_storage.data();
size_t element_size = 0;
// We want to this list to match the one used below in DenseTensorToSparseTensorProto()
MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(type);
ORT_RETURN_IF_ERROR(
(type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));
// by putting the data into a std::string we can avoid a copy as set_raw_data can do a std::move
// into the TensorProto. however to actually write to the buffer we have created in the std::string we need
// this somewhat dirty hack to get a mutable pointer. we could alternatively use &dense_data_storage.front()
// but using const_cast makes it more obvious we're doing something ugly.
// C++17 add non-const data() where we could remove const_cast
// into the TensorProto.
std::string dense_data_storage(n_dense_elements * element_size, 0);
if (n_sparse_elements > 0) {
void* dense_data = const_cast<char*>(dense_data_storage.data());
void* dense_data = dense_data_storage.data();
switch (element_size) {
case 1: {
auto dense_data_span = gsl::make_span<uint8_t>(static_cast<uint8_t*>(dense_data), n_dense_elements);
status = CopySparseData<uint8_t>(
status = CopySparseData(
n_sparse_elements,
indices, dims,
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
dense_data_span[to_idx] = static_cast<const uint8_t*>(sparse_data)[from_idx];
indices, model_path, dims,
[sparse_data, dense_data](size_t from_idx, size_t to_idx) {
static_cast<uint8_t*>(dense_data)[to_idx] = static_cast<const uint8_t*>(sparse_data)[from_idx];
});
break;
}
case 2: {
auto dense_data_span = gsl::make_span<uint16_t>(static_cast<uint16_t*>(dense_data), n_dense_elements);
status = CopySparseData<uint16_t>(
status = CopySparseData(
n_sparse_elements,
indices, dims,
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
dense_data_span[to_idx] = static_cast<const uint16_t*>(sparse_data)[from_idx];
indices, model_path, dims,
[sparse_data, dense_data](size_t from_idx, size_t to_idx) {
const auto* src = static_cast<const uint16_t*>(sparse_data) + from_idx;
auto* dst = static_cast<uint16_t*>(dense_data) + to_idx;
memcpy(dst, src, sizeof(uint16_t));
});
break;
}
case 4: {
auto dense_data_span = gsl::make_span<uint32_t>(static_cast<uint32_t*>(dense_data), n_dense_elements);
status = CopySparseData<uint32_t>(
status = CopySparseData(
n_sparse_elements,
indices, dims,
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
dense_data_span[to_idx] = static_cast<const uint32_t*>(sparse_data)[from_idx];
indices, model_path, dims,
[sparse_data, dense_data](size_t from_idx, size_t to_idx) {
const auto* src = static_cast<const uint32_t*>(sparse_data) + from_idx;
auto* dst = static_cast<uint32_t*>(dense_data) + to_idx;
memcpy(dst, src, sizeof(uint32_t));
});
break;
}
case 8: {
auto dense_data_span = gsl::make_span<uint64_t>(static_cast<uint64_t*>(dense_data), n_dense_elements);
status = CopySparseData<uint64_t>(
status = CopySparseData(
n_sparse_elements,
indices, dims,
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
dense_data_span[to_idx] = static_cast<const uint64_t*>(sparse_data)[from_idx];
indices, model_path, dims,
[sparse_data, dense_data](size_t from_idx, size_t to_idx) {
const auto* src = static_cast<const uint64_t*>(sparse_data) + from_idx;
auto* dst = static_cast<uint64_t*>(dense_data) + to_idx;
memcpy(dst, src, sizeof(uint64_t));
});
break;
}
default:
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
" BUG! Report to onnxruntime team. element_size of: ",
element_size, " is not supported.", " type: ", type);
"Element_size of: ", element_size, " is not supported.", " type: ", type);
}
ORT_RETURN_IF_ERROR(status);
@ -1029,7 +1059,8 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
} else {
// No request for std::string
conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ",
ONNX_NAMESPACE::TensorProto_DataType_STRING);
}
return status;
}
@ -1040,36 +1071,6 @@ using IsZeroFunc = bool (*)(const void*);
// Copy element
using CopyElementFunc = void (*)(void* dest, const void* src, int64_t dest_index, int64_t src_index);
static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements, size_t element_size,
IsZeroFunc is_zero, CopyElementFunc copy,
TensorProto& values, TensorProto& indices) {
auto advance = [element_size](const void* start, size_t elements) -> const void* {
return (reinterpret_cast<const uint8_t*>(start) + elements * element_size);
};
const auto* cbegin = dense_raw_data;
const auto* const cend = advance(cbegin, n_dense_elements);
auto& indices_data = *indices.mutable_int64_data();
int64_t index = 0;
while (cbegin != cend) {
if (!is_zero(cbegin)) {
indices_data.Add(index);
}
++index;
cbegin = advance(cbegin, 1U);
}
auto& raw_data = *values.mutable_raw_data();
raw_data.resize(indices.int64_data_size() * element_size);
void* data_dest = const_cast<char*>(raw_data.data());
int64_t dest_index = 0;
for (auto src_index : indices.int64_data()) {
copy(data_dest, dense_raw_data, dest_index, src_index);
++dest_index;
}
}
// Here we are not using tolerance for FP types since these dense tensors were
// created from sparse initializers where zeros were absolute
template <typename T>
@ -1079,7 +1080,85 @@ inline bool IsZero(const void* p) {
template <typename T>
inline void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) {
reinterpret_cast<T*>(dst)[dst_index] = reinterpret_cast<const T*>(src)[src_index];
const auto* src_p = reinterpret_cast<const T*>(src) + src_index;
auto* dst_p = reinterpret_cast<T*>(dst) + dst_index;
memcpy(dst_p, src_p, sizeof(T));
}
template <>
inline void CopyElement<uint8_t>(void* dst, const void* src, int64_t dst_index, int64_t src_index) {
reinterpret_cast<uint8_t*>(dst)[dst_index] = reinterpret_cast<const uint8_t*>(src)[src_index];
}
template <typename T>
static void SetIndices(gsl::span<int64_t> gathered_indices,
std::string& raw_indices,
TensorProto& indices) {
raw_indices.resize(gathered_indices.size() * sizeof(T));
auto* ind_dest = reinterpret_cast<T*>(raw_indices.data());
size_t dest_index = 0;
for (auto src_index : gathered_indices) {
ORT_IF_CONSTEXPR(sizeof(T) == sizeof(int8_t)) {
ind_dest[dest_index] = static_cast<T>(src_index);
} else {
auto* dst = ind_dest + dest_index;
T v = static_cast<T>(src_index);
memcpy(dst, &v, sizeof(T));
}
++dest_index;
}
indices.set_data_type(utils::ToTensorProtoElementType<T>());
}
static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements, size_t element_size,
IsZeroFunc is_zero, CopyElementFunc copy,
TensorProto& values, TensorProto& indices,
size_t& nnz) {
auto advance = [element_size](const void* start, size_t elements) -> const void* {
return (reinterpret_cast<const uint8_t*>(start) + elements * element_size);
};
const auto* cbegin = dense_raw_data;
const auto* const cend = advance(cbegin, n_dense_elements);
std::vector<int64_t> gathered_indices;
int64_t index = 0;
while (cbegin != cend) {
if (!is_zero(cbegin)) {
gathered_indices.push_back(index);
}
++index;
cbegin = advance(cbegin, 1U);
}
if (!gathered_indices.empty()) {
auto& raw_data = *values.mutable_raw_data();
raw_data.resize(gathered_indices.size() * element_size);
void* data_dest = raw_data.data();
int64_t dest_index = 0;
for (auto src_index : gathered_indices) {
copy(data_dest, dense_raw_data, dest_index, src_index);
++dest_index;
}
auto gathered_span = gsl::make_span(gathered_indices);
auto& raw_indices = *indices.mutable_raw_data();
const auto max_index = gathered_indices.back();
if (max_index <= std::numeric_limits<int8_t>::max()) {
SetIndices<int8_t>(gathered_span, raw_indices, indices);
} else if (max_index <= std::numeric_limits<int16_t>::max()) {
SetIndices<int16_t>(gathered_span, raw_indices, indices);
} else if (max_index <= std::numeric_limits<int32_t>::max()) {
SetIndices<int32_t>(gathered_span, raw_indices, indices);
} else {
SetIndices<int64_t>(gathered_span, raw_indices, indices);
}
} else {
indices.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT8);
indices.set_raw_data(std::string());
}
nnz = gathered_indices.size();
}
common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& dense_proto,
@ -1087,11 +1166,9 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
ONNX_NAMESPACE::SparseTensorProto& result) {
ORT_ENFORCE(HasDataType(dense_proto), "Must have a valid data type");
const bool is_string_data = dense_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
if (is_string_data) {
Status status{};
conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
return status;
if (dense_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING) {
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ",
ONNX_NAMESPACE::TensorProto_DataType_STRING);
}
const auto data_type = dense_proto.data_type();
@ -1101,51 +1178,47 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
values.set_data_type(data_type);
auto& indices = *sparse_proto.mutable_indices();
indices.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
SafeInt<size_t> n_dense_elements = 1;
for (auto dim : dense_proto.dims()) {
n_dense_elements *= dim;
}
auto ml_data = DataTypeImpl::TensorTypeFromONNXEnum(data_type)->GetElementType();
size_t element_size = ml_data->Size();
std::vector<uint8_t> dense_raw_data;
ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data));
size_t element_size = 0;
// We want this type list to match the one above in SparseTensorProtoToDenseTensorProto
MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(data_type);
ORT_RETURN_IF_ERROR(
(type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));
size_t nnz = 0;
void* dense_data = dense_raw_data.data();
switch (element_size) {
case 1: {
SparsifyGeneric(dense_data, n_dense_elements, element_size,
IsZero<uint8_t>, CopyElement<uint8_t>, values, indices);
IsZero<uint8_t>, CopyElement<uint8_t>, values, indices, nnz);
break;
}
case 2: {
SparsifyGeneric(dense_data, n_dense_elements, element_size,
IsZero<uint16_t>, CopyElement<uint16_t>, values, indices);
IsZero<uint16_t>, CopyElement<uint16_t>, values, indices, nnz);
break;
}
case 4: {
SparsifyGeneric(dense_data, n_dense_elements, element_size,
IsZero<uint32_t>, CopyElement<uint32_t>, values, indices);
IsZero<uint32_t>, CopyElement<uint32_t>, values, indices, nnz);
break;
}
case 8: {
SparsifyGeneric(dense_data, n_dense_elements, element_size,
IsZero<uint64_t>, CopyElement<uint64_t>, values, indices);
IsZero<uint64_t>, CopyElement<uint64_t>, values, indices, nnz);
break;
}
default:
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
" BUG! Report to onnxruntime team. element_size of: ",
element_size, " is not supported.", " data_type: ", data_type);
"Element_size of: ", element_size, " is not supported.", " data_type: ", data_type);
}
// Fix up shapes
const auto nnz = indices.int64_data_size();
values.add_dims(nnz);
indices.add_dims(nnz);

View file

@ -669,8 +669,51 @@ static void CreateTensorWithExternalData(
tensor_proto.set_data_type(type);
}
namespace {
void insert_indices_data(bool indices_1D,
size_t values_size, size_t shape_size,
std::vector<int8_t>& indices_data,
TensorProto& indices_tp) {
if (indices_1D) {
indices_data = {2, 5, 6, 10};
indices_tp.add_dims(indices_data.size());
} else {
// indices are shape {NNZ, rank} so convert flattened values of 2, 5, 6 and 10 to rank 3 values
indices_tp.add_dims(values_size);
indices_tp.add_dims(shape_size);
indices_data = {
0, 1, 0,
0, 2, 1,
1, 0, 0,
1, 2, 0};
}
}
template <typename T>
static NodeProto CreateConstantNode(bool indices_1D,
struct InsertIndices {
void operator()(bool indices_1D, size_t values_size, size_t shape_size, TensorProto& indices_tp) const {
static_assert(std::is_integral_v<T>, "indices data must be integral data type");
static_assert(std::is_signed_v<T>, "indices must be signed data type");
std::vector<int8_t> indices_data;
insert_indices_data(indices_1D, values_size, shape_size, indices_data, indices_tp);
indices_tp.set_data_type(utils::ToTensorProtoElementType<T>());
ORT_IF_CONSTEXPR (sizeof(T) == sizeof(int8_t)) {
indices_tp.mutable_raw_data()->assign(reinterpret_cast<const char*>(indices_data.data()), indices_data.size());
} else {
// Conversion on the fly to the target data type
std::vector<T> indices(indices_data.cbegin(), indices_data.cend());
indices_tp.mutable_raw_data()->assign(reinterpret_cast<const char*>(indices.data()), indices.size() * sizeof(T));
}
}
};
using SupportedIndicesTypeList = onnxruntime::TypeList<int8_t, int16_t, int32_t, int64_t>;
} // namespace
template <typename T>
static NodeProto CreateConstantNode(bool indices_1D, int32_t indices_type,
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
std::vector<T>& expected_data) {
NodeProto constant_node;
@ -678,7 +721,6 @@ static NodeProto CreateConstantNode(bool indices_1D,
constant_node.add_output("dense_tensor_output");
std::vector<T> values = CreateValues<T>();
std::vector<int64_t> indices;
std::vector<int64_t> shape{2, 3, 2};
AttributeProto& attrib = *constant_node.mutable_attribute()->Add();
@ -686,26 +728,11 @@ static NodeProto CreateConstantNode(bool indices_1D,
attrib.set_type(AttributeProto_AttributeType_SPARSE_TENSOR);
SparseTensorProto& stp = *attrib.mutable_sparse_tensor();
TensorProto& indices_tp = *stp.mutable_indices();
stp.mutable_dims()->Add(shape.cbegin(), shape.cend());
if (indices_1D) {
indices = {2, 5, 6, 10};
indices_tp.add_dims(indices.size());
} else {
// indices are shape {NNZ, rank} so convert flattened values of 2, 5, 6 and 10 to rank 3 values
indices_tp.add_dims(values.size());
indices_tp.add_dims(shape.size());
indices = {
0, 1, 0,
0, 2, 1,
1, 0, 0,
1, 2, 0};
}
indices_tp.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
indices_tp.mutable_int64_data()->Add(indices.cbegin(), indices.cend());
TensorProto& indices_tp = *stp.mutable_indices();
utils::MLTypeCallDispatcherFromTypeList<SupportedIndicesTypeList> type_disp(indices_type);
type_disp.Invoke<InsertIndices>(indices_1D, values.size(), shape.size(), indices_tp);
expected_data.resize(2 * 3 * 2);
expected_data[2] = values[0];
@ -733,10 +760,9 @@ static NodeProto CreateConstantNodeAllZeros(bool indices_1D, std::vector<T>& exp
attrib.set_type(AttributeProto_AttributeType_SPARSE_TENSOR);
SparseTensorProto& stp = *attrib.mutable_sparse_tensor();
TensorProto& indices_tp = *stp.mutable_indices();
stp.mutable_dims()->Add(shape.cbegin(), shape.cend());
TensorProto& indices_tp = *stp.mutable_indices();
if (indices_1D) {
indices_tp.add_dims(0);
} else {
@ -759,11 +785,11 @@ static NodeProto CreateConstantNodeAllZeros(bool indices_1D, std::vector<T>& exp
}
template <typename T>
static void TestConversion(bool use_1D_indices,
static void TestConversion(bool use_1D_indices, int32_t indices_type,
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
std::function<void(gsl::span<const T> expected, const TensorProto& actual)> checker) {
std::vector<T> expected;
auto node = CreateConstantNode<T>(use_1D_indices, inserter, expected);
auto node = CreateConstantNode<T>(use_1D_indices, indices_type, inserter, expected);
TensorProto dense;
// Path is required for loading external data (if any)
@ -793,8 +819,17 @@ template <typename T>
static void TestConversion(
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
std::function<void(gsl::span<const T> expected, const TensorProto& actual)> checker) {
TestConversion(true, inserter, checker);
TestConversion(false, inserter, checker);
std::vector<TensorProto_DataType> indices_types{
TensorProto_DataType_INT8,
TensorProto_DataType_INT16,
TensorProto_DataType_INT32,
TensorProto_DataType_INT64
};
for (auto dt : indices_types) {
TestConversion(true, dt, inserter, checker);
TestConversion(false, dt, inserter, checker);
}
TestConversionAllZeros(true, checker);
TestConversionAllZeros(false, checker);
}
@ -820,7 +855,7 @@ static void RawDataChecker(gsl::span<const T> expected, const TensorProto& actua
const T* raw_data = reinterpret_cast<const T*>(actual.raw_data().data());
auto actual_span = gsl::make_span<const T>(raw_data, actual_size);
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
ASSERT_THAT(actual_span, testing::ContainerEq(expected));
}
template <>
@ -831,7 +866,7 @@ void RawDataChecker<MLFloat16>(gsl::span<const MLFloat16> expected_bfloat, const
const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.raw_data().data());
auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
ASSERT_THAT(actual_span, testing::ContainerEq(expected));
}
template <>
@ -842,7 +877,7 @@ void RawDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat, const T
const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.raw_data().data());
auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
ASSERT_THAT(actual_span, testing::ContainerEq(expected));
}
TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
@ -938,6 +973,7 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
PathString tensor_filename(ORT_TSTR("tensor_XXXXXX"));
TestConversion<float>(
true,
TensorProto_DataType_INT64,
[&tensor_filename](const std::vector<float>& values, TensorProto& tp) {
CreateTensorWithExternalData<float>(TensorProto_DataType_FLOAT, values, tensor_filename, tp);
},
@ -950,8 +986,11 @@ TEST(SparseTensorConversionTests, TestConstantNodeConversion) {
#if !defined(ORT_MINIMAL_BUILD)
template <typename T>
static std::vector<T> CreateSparseValues() {
return {0, 2, 3, 0};
static std::vector<T> CreateSparseValues(size_t indices_start) {
std::vector<T> result(indices_start + 2);
result[indices_start] = 2;
result[indices_start + 1] = 3;
return result;
}
/* std::string support in the future
@ -962,13 +1001,19 @@ std::vector<std::string> CreateSparseValues<std::string>() {
*/
template <>
std::vector<BFloat16> CreateSparseValues<BFloat16>() {
return {BFloat16(0.f), BFloat16(2.f), BFloat16(3.f), BFloat16(0.f)};
std::vector<BFloat16> CreateSparseValues<BFloat16>(size_t indices_start) {
std::vector<BFloat16> result(indices_start + 2);
result[indices_start] = BFloat16(2.f);
result[indices_start + 1] = BFloat16(3.f);
return result;
}
template <>
std::vector<MLFloat16> CreateSparseValues<MLFloat16>() {
return {MLFloat16(0.f), MLFloat16(2.f), MLFloat16(3.f), MLFloat16(0.f)};
std::vector<MLFloat16> CreateSparseValues<MLFloat16>(size_t indices_start) {
std::vector<MLFloat16> result(indices_start + 2);
result[indices_start] = MLFloat16(2.f);
result[indices_start + 1] = MLFloat16(3.f);
return result;
}
template <typename T>
@ -987,11 +1032,13 @@ std::vector<MLFloat16> CreateSparseValuesAllZeros<MLFloat16>() {
}
template <typename T>
TensorProto CreateDenseTensor(std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
TensorProto CreateDenseTensor(size_t indices_start,
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
std::vector<T>& expected_values, std::vector<int64_t>& expected_indicies) {
TensorProto result;
std::vector<T> values = CreateSparseValues<T>();
expected_indicies = {1, 2};
std::vector<T> values = CreateSparseValues<T>(indices_start);
auto ind_start = static_cast<int64_t>(indices_start);
expected_indicies = {ind_start, ind_start + 1};
for (const auto& ind : expected_indicies) {
expected_values.push_back(values[ind]);
}
@ -1026,12 +1073,9 @@ static void RawSparseDataChecker(gsl::span<const T> expected_values,
const T* raw_data = reinterpret_cast<const T*>(actual.values().raw_data().data());
auto actual_span = gsl::make_span<const T>(raw_data, actual_size);
EXPECT_THAT(actual_span, testing::ContainerEq(expected_values));
ASSERT_THAT(actual_span, testing::ContainerEq(expected_values));
// Check indicies
EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64);
auto actual_indicies = gsl::make_span<const int64_t>(actual.indices().int64_data().data(), actual.indices().int64_data_size());
EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
SparseIndicesChecker(actual.indices(), expected_indicies);
}
template <>
@ -1045,11 +1089,8 @@ void RawSparseDataChecker<BFloat16>(gsl::span<const BFloat16> expected_bfloat,
const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.values().raw_data().data());
auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
// Check indicies
EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64);
auto actual_indicies = gsl::make_span<const int64_t>(actual.indices().int64_data().data(), actual.indices().int64_data_size());
EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
ASSERT_THAT(actual_span, testing::ContainerEq(expected));
SparseIndicesChecker(actual.indices(), expected_indicies);
}
template <>
@ -1063,15 +1104,12 @@ void RawSparseDataChecker<MLFloat16>(gsl::span<const MLFloat16> expected_bfloat,
const uint16_t* raw_data = reinterpret_cast<const uint16_t*>(actual.values().raw_data().data());
auto actual_span = gsl::make_span<const uint16_t>(raw_data, actual_size);
EXPECT_THAT(actual_span, testing::ContainerEq(expected));
// Check indicies
EXPECT_THAT(actual.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64);
auto actual_indicies = gsl::make_span<const int64_t>(actual.indices().int64_data().data(), actual.indices().int64_data_size());
EXPECT_THAT(actual_indicies, testing::ContainerEq(expected_indicies));
ASSERT_THAT(actual_span, testing::ContainerEq(expected));
SparseIndicesChecker(actual.indices(), expected_indicies);
}
template <typename T>
static void TestDenseToSparseConversionValues(
static void TestDenseToSparseConversionValues(size_t indices_start,
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
std::function<void(gsl::span<const T> expected,
gsl::span<const int64_t> expected_indicies,
@ -1082,7 +1120,7 @@ static void TestDenseToSparseConversionValues(
// Path is required for loading external data
// Using empty path here since the data is not external
Path model_path;
TensorProto dense_tensor = CreateDenseTensor(inserter, expected_values, expected_indicies);
TensorProto dense_tensor = CreateDenseTensor(indices_start, inserter, expected_values, expected_indicies);
SparseTensorProto sparse_tensor;
utils::DenseTensorToSparseTensorProto(dense_tensor, model_path, sparse_tensor);
@ -1117,17 +1155,21 @@ static void TestDenseAllZerosToSparseConversion(
}
template <typename T>
static void TestDenseToSparseConversion(std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
static void TestDenseToSparseConversion(size_t indices_start,
std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
std::function<void(gsl::span<const T> expected,
gsl::span<const int64_t> expected_indicies,
const SparseTensorProto& actual)>
checker) {
TestDenseToSparseConversionValues<T>(inserter, checker);
TestDenseToSparseConversionValues<T>(indices_start, inserter, checker);
TestDenseAllZerosToSparseConversion<T>(inserter, checker);
}
TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
// This one will test indices that are less than max int8 value
// which should result in int8 indices
TestDenseToSparseConversion<float>(
20U,
[](const std::vector<float>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_FLOAT);
tp.set_name("dense_float");
@ -1135,7 +1177,10 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
},
RawSparseDataChecker<float>);
// This one will test indices that are max(int8) < ind < max(int16) value
// which should result in int16 indices
TestDenseToSparseConversion<double>(
static_cast<size_t>(std::numeric_limits<int8_t>::max()) + 20U,
[](const std::vector<double>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_DOUBLE);
tp.set_name("dense_double");
@ -1143,7 +1188,10 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
},
RawSparseDataChecker<double>);
// This one will test indices that are max(int16) < ind < max(int32) value
// which should result in int32 indices
TestDenseToSparseConversion<BFloat16>(
static_cast<size_t>(std::numeric_limits<int16_t>::max()) + 20U,
[](const std::vector<BFloat16>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_BFLOAT16);
tp.set_name("dense_bfloat16");
@ -1153,7 +1201,11 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
},
RawSparseDataChecker<BFloat16>);
// Protobuf can not hold anything more than 2Gb and it overflows. Can't test 64-bit indices
// on conversion unless explicitly created.
// which should result in int32 indices
TestDenseToSparseConversion<MLFloat16>(
20U,
[](const std::vector<MLFloat16>& values, TensorProto& tp) {
tp.set_data_type(TensorProto_DataType_FLOAT16);
tp.set_name("dense_float16");
@ -1164,6 +1216,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
RawSparseDataChecker<MLFloat16>);
TestDenseToSparseConversion<int16_t>(
20U,
[](const std::vector<int16_t>& values, TensorProto& tp) {
tp.set_name("dense_int16");
tp.set_data_type(TensorProto_DataType_INT16);
@ -1172,6 +1225,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
RawSparseDataChecker<int16_t>);
TestDenseToSparseConversion<uint16_t>(
20U,
[](const std::vector<uint16_t>& values, TensorProto& tp) {
tp.set_name("dense_uint16");
tp.set_data_type(TensorProto_DataType_UINT16);
@ -1180,6 +1234,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
RawSparseDataChecker<uint16_t>);
TestDenseToSparseConversion<int32_t>(
20U,
[](const std::vector<int32_t>& values, TensorProto& tp) {
tp.set_name("dense_int32");
tp.set_data_type(TensorProto_DataType_INT32);
@ -1188,6 +1243,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
RawSparseDataChecker<int32_t>);
TestDenseToSparseConversion<uint32_t>(
20U,
[](const std::vector<uint32_t>& values, TensorProto& tp) {
tp.set_name("dense_uint32");
tp.set_data_type(TensorProto_DataType_UINT32);
@ -1196,6 +1252,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
RawSparseDataChecker<uint32_t>);
TestDenseToSparseConversion<int64_t>(
20U,
[](const std::vector<int64_t>& values, TensorProto& tp) {
tp.set_name("dense_int64");
tp.set_data_type(TensorProto_DataType_INT64);
@ -1204,6 +1261,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
RawSparseDataChecker<int64_t>);
TestDenseToSparseConversion<uint64_t>(
20U,
[](const std::vector<uint64_t>& values, TensorProto& tp) {
tp.set_name("dense_uint64");
tp.set_data_type(TensorProto_DataType_UINT64);
@ -1212,6 +1270,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
RawSparseDataChecker<uint64_t>);
TestDenseToSparseConversion<int8_t>(
20U,
[](const std::vector<int8_t>& values, TensorProto& tp) {
tp.set_name("dense_int8");
tp.set_data_type(TensorProto_DataType_INT8);
@ -1220,6 +1279,7 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
RawSparseDataChecker<int8_t>);
TestDenseToSparseConversion<uint8_t>(
20U,
[](const std::vector<uint8_t>& values, TensorProto& tp) {
tp.set_name("dense_int64");
RawDataWriter(values, tp, TensorProto_DataType_UINT8);

View file

@ -97,5 +97,9 @@ void AllocateMLValue(AllocatorPtr alloc, const std::vector<int64_t>& dims, OrtVa
// Helper function to check that the graph transformations have been successfully applied.
std::map<std::string, int> CountOpsInGraph(const Graph& graph, bool recurse_into_subgraphs = true);
#if !defined(DISABLE_SPARSE_TENSORS)
void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl::span<const int64_t> expected_indicies);
#endif // DISABLE_SPARSE_TENSORS
} // namespace test
} // namespace onnxruntime

View file

@ -10,6 +10,7 @@
#include "gmock/gmock.h"
#include "onnx/defs/function.h"
#include "core/graph/function_impl.h"
#include "test/framework/test_utils.h"
#ifdef __GNUC__
#define UNUSED __attribute__((unused))
@ -233,6 +234,7 @@ static void ConstructSparseTensor(const std::string& name,
std::copy(values.cbegin(), values.cend(), dest_span.begin());
const std::vector<int64_t>& indices = sparse_details::indices; // Not to exceed 59
auto& m_indicies = *sparse_proto.mutable_indices();
m_indicies.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
*m_indicies.mutable_dims()->Add() = static_cast<int64_t>(indices.size());
@ -264,10 +266,9 @@ static void ValidateSparseTensorProto(const SparseTensorProto& proto) {
++expected_begin;
}
// Check indices
EXPECT_EQ(proto.indices().data_type(), ONNX_NAMESPACE::TensorProto_DataType_INT64);
const auto& indices = proto.indices();
auto expected_indices = gsl::make_span(sparse_details::indices);
auto actual_indices = gsl::make_span<const int64_t>(proto.indices().int64_data().data(), proto.indices().int64_data_size());
EXPECT_THAT(actual_indices, testing::ContainerEq(expected_indices));
SparseIndicesChecker(indices, expected_indices);
// check shape
const auto& dims = proto.dims();
auto actual_shape = gsl::make_span<const int64_t>(dims.data(), dims.size());

View file

@ -4,7 +4,9 @@
#include "test/util/include/test_utils.h"
#include "core/framework/ort_value.h"
#include "core/graph/onnx_protobuf.h"
#include "core/session/inference_session.h"
#include "core/framework/tensorprotoutils.h"
#include "test/util/include/asserts.h"
#include "test/util/include/test/test_environment.h"
@ -115,5 +117,68 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
VerifyOutputs(output_names, expected_fetches, fetches);
}
#if !defined(DISABLE_SPARSE_TENSORS)
void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl::span<const int64_t> expected_indicies) {
using namespace ONNX_NAMESPACE;
Path model_path;
std::vector<uint8_t> unpack_buffer;
gsl::span<const int64_t> ind_span;
std::vector<int64_t> converted_indices;
TensorShape ind_shape(indices_proto.dims().data(), indices_proto.dims().size());
const auto elements = gsl::narrow<size_t>(ind_shape.Size());
const bool has_raw_data = indices_proto.has_raw_data();
switch (indices_proto.data_type()) {
case ONNX_NAMESPACE::TensorProto_DataType_INT64: {
if (has_raw_data) {
const auto& rd = indices_proto.raw_data();
ASSERT_EQ(rd.size(), elements * sizeof(int64_t));
ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer));
ind_span = gsl::make_span(unpack_buffer).as_span<const int64_t>();
} else {
ind_span = gsl::make_span(indices_proto.int64_data().cbegin(), indices_proto.int64_data().cend());
}
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
if (has_raw_data) {
const auto& rd = indices_proto.raw_data();
ASSERT_EQ(rd.size(), elements * sizeof(int32_t));
ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer));
auto int32_span = gsl::make_span(unpack_buffer).as_span<const int32_t>();
converted_indices.insert(converted_indices.cend(), int32_span.cbegin(), int32_span.cend());
} else {
converted_indices.insert(converted_indices.cend(), indices_proto.int32_data().cbegin(), indices_proto.int32_data().cend());
}
ind_span = gsl::make_span(converted_indices);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
ASSERT_TRUE(has_raw_data);
const auto& rd = indices_proto.raw_data();
ASSERT_EQ(rd.size(), elements * sizeof(int16_t));
ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer));
auto int16_span = gsl::make_span(unpack_buffer).as_span<const int16_t>();
converted_indices.insert(converted_indices.cend(), int16_span.cbegin(), int16_span.cend());
ind_span = gsl::make_span(converted_indices);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
ASSERT_TRUE(has_raw_data);
const auto& rd = indices_proto.raw_data();
ASSERT_EQ(rd.size(), elements);
ASSERT_STATUS_OK(utils::UnpackInitializerData(indices_proto, model_path, unpack_buffer));
auto int8_span = gsl::make_span(unpack_buffer).as_span<const int8_t>();
converted_indices.insert(converted_indices.cend(), int8_span.cbegin(), int8_span.cend());
ind_span = gsl::make_span(converted_indices);
break;
}
default:
ASSERT_TRUE(false);
}
ASSERT_THAT(ind_span, testing::ContainerEq(expected_indicies));
}
#endif // DISABLE_SPARSE_TENSORS
} // namespace test
} // namespace onnxruntime

View file

@ -26,7 +26,7 @@ def parse_arguments():
parser.add_argument('--exclude', required=False, type=str,
help='semicolon separated list of initializer names to exclude')
parser.add_argument('--tolerance', required=False, type=float, default=1e-6,
help='FP absolute tolerance. If not given simple compare to 0')
help='FP absolute tolerance.')
parser.add_argument('--sparsity_threshold', required=False,
type=float, default=0.5,
help='convert to sparse initializers if sparsity is at least this much')
@ -49,11 +49,13 @@ def setup_logging(verbose): # type: (bool) -> None
logger.setLevel(logging_level)
def convert_tensor_to_sparse(tensor, tolerance): # type: (TensorProto) -> Tuple[SparseTensorProto, float]
def convert_tensor_to_sparse(tensor,
sparsity_threshold,
tolerance): # type: (TensorProto, float, float) -> Tuple[SparseTensorProto, float]
""" returns a tuple of sparse_tensor and sparsity level
"""
values = []
indicies = []
indices = []
nnz_count = 0
tensor_data = numpy_helper.to_array(tensor).flatten()
data_len = len(tensor_data)
@ -62,25 +64,76 @@ def convert_tensor_to_sparse(tensor, tolerance): # type: (TensorProto) -> Tuple
el = tensor_data[index]
if abs(el) <= tolerance:
values.append(el)
indicies.append(index)
indices.append(index)
nnz_count += 1
else:
for index in range(data_len):
el = tensor_data[index]
if el != 0:
values.append(el)
indicies.append(index)
indices.append(index)
nnz_count += 1
sparsity = float(1.) - float(nnz_count)/data_len
logger.debug(f"initializer={tensor.name}, dtype={tensor_data.dtype}, \
len={data_len}, nnz={nnz_count}, sparsity={sparsity}")
values_tensor = onnx.helper.make_tensor(tensor.name, tensor.data_type,
[len(values)], np.array(values).astype(tensor_data.dtype))
ind_data_type = TensorProto.INT8
ind_dtype = np.int8
ind_len = len(indices)
max_indices_value = 0
if ind_len > 0:
max_indices_value = indices[-1]
if max_indices_value <= np.iinfo(np.int8).max:
ind_data_type = TensorProto.INT8
ind_dtype = np.int8
elif max_indices_value <= np.iinfo(np.int16).max:
ind_data_type = TensorProto.INT16
ind_dtype = np.int16
elif max_indices_value <= np.iinfo(np.int32).max:
ind_data_type = TensorProto.INT32
ind_dtype = np.int32
else:
ind_data_type = TensorProto.INT64
ind_dtype = np.int64
logger.debug(f"initializer={tensor.name}, dtype={tensor_data.dtype}, \
data_len={data_len}, nnz={nnz_count}, sparsity={sparsity}, \
max_indices_value={max_indices_value}, sparse_indices_type={ind_dtype}")
if sparsity < sparsity_threshold:
return (object(), sparsity)
tensor_data_bytes = tensor_data.nbytes
# create np array and cast data to the appropriate type
np_values = np.array(values).astype(tensor_data.dtype)
# create np array and cast data to the inferred index type
np_indices = np.array(indices).astype(ind_dtype)
total_sparse_bytes = np_values.nbytes + np_indices.nbytes
logger.debug(f"initializer={tensor.name}, initializer_bytes={tensor_data_bytes}, \
sparse_initializer_bytes={total_sparse_bytes}")
# This check is usually useful for sparsity_threshold=0.5 where much
# depends on the size of the indices entries and the size of the original tensor.
# Big dense tensors command larger indices data type and for large float32 tensors
# int32 indices are often selected, thus we really want to guard against loosing
# rather than winning.
if tensor_data_bytes <= total_sparse_bytes:
sparsity = float(1.) - float(tensor_data_bytes)/total_sparse_bytes
logger.debug(f"initializer={tensor.name}, adjusted_sparsity={sparsity}")
return (object(), sparsity)
values_tensor = onnx.helper.make_tensor(tensor.name,
tensor.data_type,
[len(values)],
np_values.tobytes(),
raw=True)
indicies_tensor = onnx.helper.make_tensor(tensor.name + '_indicies',
TensorProto.INT64,
[len(indicies)], np.array(indicies).astype(np.int64))
ind_data_type,
[ind_len],
np_indices.tobytes(),
raw=True)
sparse_tensor = onnx.helper.make_sparse_tensor(values_tensor, indicies_tensor, tensor.dims)
return (sparse_tensor, sparsity)
@ -88,7 +141,7 @@ def convert_tensor_to_sparse(tensor, tolerance): # type: (TensorProto) -> Tuple
def convert_initializers(model,
exclude_names,
sparsity_threshold,
tolerance): # type: (ModelProto, List[str], float) -> None
tolerance): # type: (ModelProto, List[str], float, float) -> None
graph = model.graph
converted_sparse = []
remaining_initializers = []
@ -100,7 +153,7 @@ def convert_initializers(model,
logger.info(f"initializer={initializer.name} contains bool, not converted")
remaining_initializers.append(initializer)
continue
sparse_tensor, sparsity = convert_tensor_to_sparse(initializer, tolerance)
sparse_tensor, sparsity = convert_tensor_to_sparse(initializer, sparsity_threshold, tolerance)
if sparsity >= sparsity_threshold:
logger.info(f"initializer={initializer.name} converted. sparsity={sparsity}")
converted_sparse.append(sparse_tensor)