mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-05 04:17:53 +00:00
* Move UnpackInitializerData to use vector * minor update * minor update * Update getclipminmax * Change uint8_t -> std::byte * fix build break * Revert "fix build break" This reverts commit 1ffa284ac54fd605c0651954ea4fb2cab0464526. * Revert "Change uint8_t -> std::byte" This reverts commit 764a656ebac6610cdf1f25e63770330c3aedece6. * Add todo notes for extra vector alignment * add check result size
1222 lines
51 KiB
C++
1222 lines
51 KiB
C++
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
#include "core/framework/tensorprotoutils.h"
|
|
|
|
#include <memory>
|
|
#include <algorithm>
|
|
#include <limits>
|
|
#include <gsl/gsl>
|
|
|
|
#include "core/common/logging/logging.h"
|
|
#include "core/graph/onnx_protobuf.h"
|
|
#include "core/framework/endian_utils.h"
|
|
#include "core/framework/op_kernel.h"
|
|
#include "core/framework/tensor.h"
|
|
#include "core/framework/ort_value_pattern_planner.h"
|
|
#include "core/framework/allocator.h"
|
|
#include "core/framework/callback.h"
|
|
#include "core/framework/data_types.h"
|
|
#include "core/platform/path_lib.h"
|
|
#include "core/session/ort_apis.h"
|
|
#include "onnx/defs/tensor_proto_util.h"
|
|
|
|
using namespace ONNX_NAMESPACE;
|
|
using namespace ::onnxruntime::common;
|
|
|
|
// Provide template specializations for onnxruntime-specific types.
|
|
namespace ONNX_NAMESPACE {
|
|
template <>
|
|
TensorProto ToTensor<onnxruntime::MLFloat16>(const onnxruntime::MLFloat16& value) {
|
|
TensorProto t;
|
|
t.set_data_type(TensorProto_DataType_FLOAT16);
|
|
t.add_int32_data(value.val);
|
|
return t;
|
|
}
|
|
|
|
template <>
|
|
TensorProto ToTensor<onnxruntime::MLFloat16>(const std::vector<onnxruntime::MLFloat16>& values) {
|
|
TensorProto t;
|
|
t.clear_int32_data();
|
|
t.set_data_type(TensorProto_DataType_FLOAT16);
|
|
for (const onnxruntime::MLFloat16& val : values) {
|
|
t.add_int32_data(val.val);
|
|
}
|
|
return t;
|
|
}
|
|
|
|
template <>
|
|
TensorProto ToTensor<onnxruntime::BFloat16>(const onnxruntime::BFloat16& value) {
|
|
TensorProto t;
|
|
t.set_data_type(TensorProto_DataType_BFLOAT16);
|
|
t.add_int32_data(value.val);
|
|
return t;
|
|
}
|
|
|
|
template <>
|
|
TensorProto ToTensor<onnxruntime::BFloat16>(const std::vector<onnxruntime::BFloat16>& values) {
|
|
TensorProto t;
|
|
t.clear_int32_data();
|
|
t.set_data_type(TensorProto_DataType_BFLOAT16);
|
|
for (const onnxruntime::BFloat16& val : values) {
|
|
t.add_int32_data(val.val);
|
|
}
|
|
return t;
|
|
}
|
|
|
|
bool operator==(const ONNX_NAMESPACE::TensorShapeProto_Dimension& l,
|
|
const ONNX_NAMESPACE::TensorShapeProto_Dimension& r) {
|
|
if (l.has_dim_value()) {
|
|
return r.has_dim_value() && l.dim_value() == r.dim_value();
|
|
} else if (l.has_dim_param()) {
|
|
return r.has_dim_param() && l.dim_param() == r.dim_param() && !l.dim_param().empty();
|
|
} else {
|
|
// l is unknown - has neither dim_value nor dim_param
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool operator!=(const ONNX_NAMESPACE::TensorShapeProto_Dimension& l,
|
|
const ONNX_NAMESPACE::TensorShapeProto_Dimension& r) {
|
|
return !(l == r);
|
|
}
|
|
|
|
} // namespace ONNX_NAMESPACE
|
|
|
|
namespace {
|
|
|
|
// This function doesn't support string tensors
|
|
static Status UnpackTensorWithRawDataImpl(const void* raw_data, size_t raw_data_len,
|
|
size_t expected_num_elements, size_t element_size,
|
|
/*out*/ unsigned char* p_data) {
|
|
auto src = gsl::make_span<const unsigned char>(static_cast<const unsigned char*>(raw_data), raw_data_len);
|
|
auto dst = gsl::make_span<unsigned char>(p_data, expected_num_elements * element_size);
|
|
|
|
size_t expected_size_in_bytes;
|
|
if (!onnxruntime::IAllocator::CalcMemSizeForArray(expected_num_elements, element_size, &expected_size_in_bytes)) {
|
|
return Status(onnxruntime::common::ONNXRUNTIME, onnxruntime::common::INVALID_ARGUMENT, "size overflow");
|
|
}
|
|
|
|
if (dst.size_bytes() != expected_size_in_bytes) {
|
|
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
|
|
"UnpackTensor: the pre-allocated size does not match the raw data size, expected ",
|
|
expected_size_in_bytes, ", got ", dst.size_bytes());
|
|
}
|
|
|
|
// ReadLittleEndian checks src and dst buffers are the same size
|
|
return onnxruntime::utils::ReadLittleEndian(element_size, src, dst);
|
|
}
|
|
|
|
template <typename T>
|
|
Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_len, size_t expected_num_elements,
|
|
/*out*/ T* p_data) {
|
|
static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
|
|
|
|
return UnpackTensorWithRawDataImpl(raw_data, raw_data_len, expected_num_elements, sizeof(T),
|
|
reinterpret_cast<unsigned char*>(p_data));
|
|
}
|
|
|
|
static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_proto,
|
|
const ORTCHAR_T* tensor_proto_dir,
|
|
std::basic_string<ORTCHAR_T>& external_file_path,
|
|
onnxruntime::FileOffsetType& file_offset,
|
|
SafeInt<size_t>& tensor_byte_size) {
|
|
ORT_RETURN_IF_NOT(onnxruntime::utils::HasExternalData(tensor_proto),
|
|
"Tensor does not have external data to read from.");
|
|
|
|
ORT_RETURN_IF_NOT(tensor_proto.data_type() != ONNX_NAMESPACE::TensorProto_DataType_STRING,
|
|
"External data type cannot be UNDEFINED or STRING.");
|
|
|
|
std::unique_ptr<onnxruntime::ExternalDataInfo> external_data_info;
|
|
ORT_RETURN_IF_ERROR(onnxruntime::ExternalDataInfo::Create(tensor_proto.external_data(), external_data_info));
|
|
|
|
if (tensor_proto_dir != nullptr) {
|
|
external_file_path = onnxruntime::ConcatPathComponent<ORTCHAR_T>(tensor_proto_dir, external_data_info->GetRelPath());
|
|
} else {
|
|
external_file_path = external_data_info->GetRelPath();
|
|
}
|
|
|
|
file_offset = external_data_info->GetOffset();
|
|
|
|
ORT_RETURN_IF_ERROR(onnxruntime::utils::GetSizeInBytesFromTensorProto<0>(tensor_proto, &tensor_byte_size));
|
|
const size_t external_data_length = external_data_info->GetLength();
|
|
|
|
ORT_RETURN_IF_NOT(external_data_length == 0 || external_data_length == tensor_byte_size,
|
|
"TensorProto external data size mismatch. Computed size: ", *&tensor_byte_size,
|
|
", external_data.length: ", external_data_length);
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
// Read external data for tensor in unint8_t* form and return Status::OK() if the data is read successfully.
|
|
// Uses the tensor_proto_dir to construct the full path for external data. If tensor_proto_dir == nullptr
|
|
// then uses the current directory instead.
|
|
// This function does not unpack string_data of an initializer tensor
|
|
static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
|
|
const ORTCHAR_T* tensor_proto_dir,
|
|
std::vector<uint8_t>& unpacked_tensor) {
|
|
std::basic_string<ORTCHAR_T> external_file_path;
|
|
onnxruntime::FileOffsetType file_offset;
|
|
SafeInt<size_t> tensor_byte_size;
|
|
ORT_RETURN_IF_ERROR(GetExternalDataInfo(
|
|
tensor_proto,
|
|
tensor_proto_dir,
|
|
external_file_path,
|
|
file_offset,
|
|
tensor_byte_size));
|
|
|
|
unpacked_tensor.resize(tensor_byte_size);
|
|
ORT_RETURN_IF_ERROR(onnxruntime::Env::Default().ReadFileIntoBuffer(
|
|
external_file_path.c_str(),
|
|
file_offset,
|
|
tensor_byte_size,
|
|
gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.data()), tensor_byte_size)));
|
|
|
|
return Status::OK();
|
|
}
|
|
} // namespace
|
|
|
|
namespace onnxruntime {
|
|
namespace utils {
|
|
#if !defined(ORT_MINIMAL_BUILD)
|
|
static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto& tensor,
|
|
const ORTCHAR_T* tensor_proto_dir,
|
|
size_t expected_num_elements, size_t element_size,
|
|
/*out*/ unsigned char* p_data) {
|
|
ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");
|
|
std::vector<uint8_t> unpacked_tensor;
|
|
ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor));
|
|
|
|
// ReadLittleEndian checks src and dst buffers are the same size
|
|
auto src_span = gsl::make_span(unpacked_tensor.data(), unpacked_tensor.size());
|
|
auto dst_span = gsl::make_span(p_data, expected_num_elements * element_size);
|
|
|
|
return onnxruntime::utils::ReadLittleEndian(element_size, src_span, dst_span);
|
|
}
|
|
|
|
template <typename T>
|
|
Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,
|
|
const ORTCHAR_T* tensor_proto_dir, size_t expected_num_elements,
|
|
/*out*/ T* p_data) {
|
|
static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
|
|
|
|
return UnpackTensorWithExternalDataImpl(tensor, tensor_proto_dir, expected_num_elements, sizeof(T),
|
|
reinterpret_cast<unsigned char*>(p_data));
|
|
}
|
|
|
|
#define INSTANTIATE_UNPACK_EXTERNAL_TENSOR(type) \
|
|
template Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto&, const ORTCHAR_T*, size_t, type*);
|
|
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(float)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(double)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint8_t)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int8_t)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int16_t)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint16_t)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int32_t)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int64_t)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint64_t)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint32_t)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(bool)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(MLFloat16)
|
|
INSTANTIATE_UNPACK_EXTERNAL_TENSOR(BFloat16)
|
|
|
|
template <>
|
|
Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& /*tensor*/,
|
|
const ORTCHAR_T* /*tensor_proto_dir*/, size_t /*expected_num_elements*/,
|
|
/*out*/ std::string* /*p_data*/) {
|
|
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "External data type cannot be STRING.");
|
|
}
|
|
#endif //!defined(ORT_MINIMAL_BUILD)
|
|
|
|
// implementation of type specific unpack of data contained within the TensorProto
|
|
template <typename T>
|
|
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
|
|
/*out*/ T* p_data, size_t expected_num_elements);
|
|
|
|
#define DEFINE_UNPACK_TENSOR_IMPL(T, Type, field_name, field_size) \
|
|
template <> \
|
|
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, \
|
|
/*out*/ T* p_data, size_t expected_num_elements) { \
|
|
if (nullptr == p_data) { \
|
|
const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size(); \
|
|
if (size == 0) return Status::OK(); \
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT); \
|
|
} \
|
|
if (nullptr == p_data || Type != tensor.data_type()) { \
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT); \
|
|
} \
|
|
if (raw_data != nullptr) { \
|
|
return UnpackTensorWithRawData(raw_data, raw_data_len, expected_num_elements, p_data); \
|
|
} \
|
|
if (static_cast<size_t>(tensor.field_size()) != expected_num_elements) \
|
|
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, \
|
|
"corrupted protobuf data: tensor shape size(", expected_num_elements, \
|
|
") does not match the data size(", tensor.field_size(), ") in proto"); \
|
|
auto& data = tensor.field_name(); \
|
|
for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter) \
|
|
*p_data++ = *reinterpret_cast<const T*>(data_iter); \
|
|
return Status::OK(); \
|
|
}
|
|
|
|
// TODO: complex64 complex128
|
|
DEFINE_UNPACK_TENSOR_IMPL(float, ONNX_NAMESPACE::TensorProto_DataType_FLOAT, float_data, float_data_size)
|
|
DEFINE_UNPACK_TENSOR_IMPL(double, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, double_data, double_data_size);
|
|
DEFINE_UNPACK_TENSOR_IMPL(uint8_t, ONNX_NAMESPACE::TensorProto_DataType_UINT8, int32_data, int32_data_size)
|
|
DEFINE_UNPACK_TENSOR_IMPL(int8_t, ONNX_NAMESPACE::TensorProto_DataType_INT8, int32_data, int32_data_size)
|
|
DEFINE_UNPACK_TENSOR_IMPL(int16_t, ONNX_NAMESPACE::TensorProto_DataType_INT16, int32_data, int32_data_size)
|
|
DEFINE_UNPACK_TENSOR_IMPL(uint16_t, ONNX_NAMESPACE::TensorProto_DataType_UINT16, int32_data, int32_data_size)
|
|
DEFINE_UNPACK_TENSOR_IMPL(int32_t, ONNX_NAMESPACE::TensorProto_DataType_INT32, int32_data, int32_data_size)
|
|
DEFINE_UNPACK_TENSOR_IMPL(int64_t, ONNX_NAMESPACE::TensorProto_DataType_INT64, int64_data, int64_data_size)
|
|
DEFINE_UNPACK_TENSOR_IMPL(uint64_t, ONNX_NAMESPACE::TensorProto_DataType_UINT64, uint64_data, uint64_data_size)
|
|
DEFINE_UNPACK_TENSOR_IMPL(uint32_t, ONNX_NAMESPACE::TensorProto_DataType_UINT32, uint64_data, uint64_data_size)
|
|
|
|
//
|
|
// Specializations of UnpackTensor that need custom handling for the input type
|
|
//
|
|
|
|
// UnpackTensor<std::string>. Note: doesn't support raw data
|
|
template <>
|
|
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* /*raw_data*/, size_t /*raw_data_len*/,
|
|
/*out*/ std::string* p_data, size_t expected_size) {
|
|
if (nullptr == p_data) {
|
|
if (tensor.string_data_size() == 0) return Status::OK();
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
|
|
}
|
|
if (ONNX_NAMESPACE::TensorProto_DataType_STRING != tensor.data_type()) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
|
|
}
|
|
|
|
if (static_cast<size_t>(tensor.string_data_size()) != expected_size)
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
|
|
"UnpackTensor: the pre-allocate size does not match the size in proto");
|
|
|
|
auto& string_data = tensor.string_data();
|
|
for (const auto& iter : string_data) {
|
|
*p_data++ = iter;
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
// UnpackTensor<bool>
|
|
template <>
|
|
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
|
|
/*out*/ bool* p_data, size_t expected_size) {
|
|
if (nullptr == p_data) {
|
|
const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
|
|
if (size == 0) return Status::OK();
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
|
|
}
|
|
if (ONNX_NAMESPACE::TensorProto_DataType_BOOL != tensor.data_type()) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
|
|
}
|
|
|
|
if (raw_data != nullptr) {
|
|
return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
|
|
}
|
|
|
|
if (static_cast<size_t>(tensor.int32_data_size()) != expected_size)
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
|
|
"UnpackTensor: the pre-allocate size does not match the size in proto");
|
|
for (int iter : tensor.int32_data()) {
|
|
*p_data++ = static_cast<bool>(iter);
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
// UnpackTensor<MLFloat16>
|
|
template <>
|
|
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
|
|
/*out*/ MLFloat16* p_data, size_t expected_size) {
|
|
if (nullptr == p_data) {
|
|
const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
|
|
if (size == 0) return Status::OK();
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
|
|
}
|
|
if (ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 != tensor.data_type()) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
|
|
}
|
|
|
|
if (raw_data != nullptr) {
|
|
return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
|
|
}
|
|
|
|
if (static_cast<size_t>(tensor.int32_data_size()) != expected_size)
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
|
|
"UnpackTensor: the pre-allocate size does not match the size in proto");
|
|
|
|
constexpr int max_value = std::numeric_limits<uint16_t>::max();
|
|
for (int i = 0; i < static_cast<int>(expected_size); i++) {
|
|
int v = tensor.int32_data()[i];
|
|
if (v < 0 || v > max_value) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "data overflow");
|
|
}
|
|
p_data[i] = MLFloat16(static_cast<uint16_t>(v));
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
// UnpackTensor<BFloat16>
|
|
template <>
|
|
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
|
|
/*out*/ BFloat16* p_data, size_t expected_size) {
|
|
if (nullptr == p_data) {
|
|
const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
|
|
if (size == 0)
|
|
return Status::OK();
|
|
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
|
|
}
|
|
if (ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16 != tensor.data_type()) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
|
|
}
|
|
|
|
if (raw_data != nullptr) {
|
|
return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
|
|
}
|
|
|
|
if (static_cast<size_t>(tensor.int32_data_size()) != expected_size)
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
|
|
"UnpackTensor: the pre-allocate size does not match the size in proto");
|
|
|
|
constexpr int max_value = std::numeric_limits<uint16_t>::max();
|
|
for (int i = 0; i < static_cast<int>(expected_size); i++) {
|
|
int v = tensor.int32_data()[i];
|
|
if (v < 0 || v > max_value) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "data overflow");
|
|
}
|
|
p_data[i] = BFloat16(static_cast<uint16_t>(v));
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
// UnpackTensor from raw data, external data or the type specific data field.
|
|
// Uses the model path to construct the full path for loading external data. In case when model_path is empty
|
|
// it uses current directory.
|
|
template <typename T>
|
|
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
|
|
/*out*/ T* p_data, size_t expected_num_elements) {
|
|
#if !defined(ORT_MINIMAL_BUILD)
|
|
if (HasExternalData(tensor)) {
|
|
return UnpackTensorWithExternalData(
|
|
tensor,
|
|
model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
|
|
expected_num_elements,
|
|
p_data);
|
|
}
|
|
#else
|
|
ORT_UNUSED_PARAMETER(model_path);
|
|
ORT_RETURN_IF(HasExternalData(tensor), "TensorProto with external data is not supported in ORT minimal build.");
|
|
#endif
|
|
|
|
return HasRawData(tensor)
|
|
? UnpackTensor(tensor, tensor.raw_data().data(), tensor.raw_data().size(), p_data, expected_num_elements)
|
|
: UnpackTensor(tensor, nullptr, 0, p_data, expected_num_elements);
|
|
}
|
|
|
|
// instantiate the UnpackTensor variant that supports external data
|
|
#define INSTANTIATE_UNPACK_TENSOR(type) \
|
|
template Status UnpackTensor(const ONNX_NAMESPACE::TensorProto&, const Path&, type* p_data, size_t);
|
|
|
|
INSTANTIATE_UNPACK_TENSOR(float)
|
|
INSTANTIATE_UNPACK_TENSOR(double)
|
|
INSTANTIATE_UNPACK_TENSOR(uint8_t)
|
|
INSTANTIATE_UNPACK_TENSOR(int8_t)
|
|
INSTANTIATE_UNPACK_TENSOR(int16_t)
|
|
INSTANTIATE_UNPACK_TENSOR(uint16_t)
|
|
INSTANTIATE_UNPACK_TENSOR(int32_t)
|
|
INSTANTIATE_UNPACK_TENSOR(int64_t)
|
|
INSTANTIATE_UNPACK_TENSOR(uint64_t)
|
|
INSTANTIATE_UNPACK_TENSOR(uint32_t)
|
|
INSTANTIATE_UNPACK_TENSOR(bool)
|
|
INSTANTIATE_UNPACK_TENSOR(MLFloat16)
|
|
INSTANTIATE_UNPACK_TENSOR(BFloat16)
|
|
INSTANTIATE_UNPACK_TENSOR(std::string)
|
|
|
|
#define CASE_PROTO_TRACE(X, Y) \
|
|
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X: \
|
|
if (!IAllocator::CalcMemSizeForArrayWithAlignment<alignment>(size, sizeof(Y), out)) { \
|
|
return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto"); \
|
|
} \
|
|
break;
|
|
|
|
template <size_t alignment>
|
|
common::Status GetSizeInBytesFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out) {
|
|
const auto& dims = tensor_proto.dims();
|
|
size_t size = 1;
|
|
for (google::protobuf::int64 dim : dims) {
|
|
if (dim < 0 || static_cast<uint64_t>(dim) >= std::numeric_limits<size_t>::max()) {
|
|
return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto");
|
|
}
|
|
if (!IAllocator::CalcMemSizeForArray(size, static_cast<size_t>(dim), &size)) {
|
|
return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Invalid TensorProto");
|
|
}
|
|
}
|
|
switch (tensor_proto.data_type()) {
|
|
CASE_PROTO_TRACE(FLOAT, float);
|
|
CASE_PROTO_TRACE(DOUBLE, double);
|
|
CASE_PROTO_TRACE(BOOL, bool);
|
|
CASE_PROTO_TRACE(INT8, int8_t);
|
|
CASE_PROTO_TRACE(INT16, int16_t);
|
|
CASE_PROTO_TRACE(INT32, int32_t);
|
|
CASE_PROTO_TRACE(INT64, int64_t);
|
|
CASE_PROTO_TRACE(UINT8, uint8_t);
|
|
CASE_PROTO_TRACE(UINT16, uint16_t);
|
|
CASE_PROTO_TRACE(UINT32, uint32_t);
|
|
CASE_PROTO_TRACE(UINT64, uint64_t);
|
|
CASE_PROTO_TRACE(FLOAT16, MLFloat16);
|
|
CASE_PROTO_TRACE(BFLOAT16, BFloat16);
|
|
CASE_PROTO_TRACE(STRING, std::string);
|
|
default:
|
|
return common::Status(common::ONNXRUNTIME, common::NOT_IMPLEMENTED);
|
|
}
|
|
return Status::OK();
|
|
}
|
|
|
|
TensorShape GetTensorShapeFromTensorShapeProto(const ONNX_NAMESPACE::TensorShapeProto& tensor_shape_proto) {
|
|
const auto& dims = tensor_shape_proto.dim();
|
|
std::vector<int64_t> tensor_shape_vec(static_cast<size_t>(dims.size()));
|
|
for (int i = 0; i < dims.size(); ++i) {
|
|
tensor_shape_vec[i] = HasDimValue(dims[i]) ? dims[i].dim_value()
|
|
: -1; /* symbolic dimensions are represented as -1 in onnxruntime*/
|
|
}
|
|
return TensorShape(std::move(tensor_shape_vec));
|
|
}
|
|
|
|
std::vector<int64_t> GetTensorShapeFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto) {
|
|
const auto& dims = tensor_proto.dims();
|
|
std::vector<int64_t> tensor_shape_vec(static_cast<size_t>(dims.size()));
|
|
for (int i = 0; i < dims.size(); ++i) {
|
|
tensor_shape_vec[i] = dims[i];
|
|
}
|
|
|
|
return tensor_shape_vec;
|
|
}
|
|
|
|
struct UnInitializeParam {
|
|
void* preallocated;
|
|
size_t preallocated_size;
|
|
ONNXTensorElementDataType ele_type;
|
|
};
|
|
|
|
ORT_API_STATUS_IMPL(OrtInitializeBufferForTensor, _In_opt_ void* input, size_t input_len,
|
|
enum ONNXTensorElementDataType type) {
|
|
OrtStatus* status = nullptr;
|
|
ORT_TRY {
|
|
if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return nullptr;
|
|
size_t tensor_size = input_len / sizeof(std::string);
|
|
std::string* ptr = reinterpret_cast<std::string*>(input);
|
|
for (size_t i = 0, n = tensor_size; i < n; ++i) {
|
|
new (ptr + i) std::string();
|
|
}
|
|
}
|
|
ORT_CATCH(const std::exception& ex) {
|
|
ORT_HANDLE_EXCEPTION([&]() {
|
|
status = OrtApis::CreateStatus(ORT_RUNTIME_EXCEPTION, ex.what());
|
|
});
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
ORT_API(void, OrtUninitializeBuffer, _In_opt_ void* input, size_t input_len, enum ONNXTensorElementDataType type) {
|
|
if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return;
|
|
size_t tensor_size = input_len / sizeof(std::string);
|
|
std::string* ptr = reinterpret_cast<std::string*>(input);
|
|
using std::string;
|
|
for (size_t i = 0, n = tensor_size; i < n; ++i) {
|
|
ptr[i].~string();
|
|
}
|
|
}
|
|
|
|
class AutoDelete {
|
|
public:
|
|
OrtCallback d{nullptr, nullptr};
|
|
AutoDelete() = default;
|
|
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(AutoDelete);
|
|
~AutoDelete() {
|
|
if (d.f != nullptr) {
|
|
d.f(d.param);
|
|
}
|
|
}
|
|
};
|
|
|
|
static void DeleteCharArray(void* param) noexcept {
|
|
auto arr = reinterpret_cast<char*>(param);
|
|
delete[] arr;
|
|
}
|
|
|
|
static Status GetFileContent(
|
|
const Env& env, const ORTCHAR_T* file_path, FileOffsetType offset, size_t length,
|
|
void*& raw_buffer, OrtCallback& deleter) {
|
|
// query length if it is 0
|
|
if (length == 0) {
|
|
ORT_RETURN_IF_ERROR(env.GetFileLength(file_path, length));
|
|
}
|
|
|
|
// first, try to map into memory
|
|
{
|
|
Env::MappedMemoryPtr mapped_memory{};
|
|
auto status = env.MapFileIntoMemory(file_path, offset, length, mapped_memory);
|
|
if (status.IsOK()) {
|
|
deleter = mapped_memory.get_deleter().callback;
|
|
raw_buffer = mapped_memory.release();
|
|
return Status::OK();
|
|
}
|
|
}
|
|
|
|
// if that fails, try to copy
|
|
auto buffer = std::make_unique<char[]>(length);
|
|
ORT_RETURN_IF_ERROR(env.ReadFileIntoBuffer(
|
|
file_path, offset, length, gsl::make_span(buffer.get(), length)));
|
|
|
|
deleter = OrtCallback{DeleteCharArray, buffer.get()};
|
|
raw_buffer = buffer.release();
|
|
return Status::OK();
|
|
}
|
|
|
|
#define CASE_PROTO(X, Y) \
|
|
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X: \
|
|
ORT_RETURN_IF_ERROR( \
|
|
UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len, \
|
|
(Y*)preallocated, static_cast<size_t>(tensor_size))); \
|
|
break;
|
|
|
|
/**
|
|
* @brief Convert tensor_proto to tensor format and store it to pre-allocated tensor
|
|
* @param env
|
|
* @param model_path
|
|
* @param tensor_proto tensor data in protobuf format
|
|
* @param tensorp pre-allocated tensor object, where we store the data
|
|
* @return
|
|
*/
|
|
Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
|
|
const ONNX_NAMESPACE::TensorProto& tensor_proto,
|
|
Tensor& tensor) {
|
|
// Validate tensor compatibility
|
|
std::vector<int64_t> tensor_shape_vec = GetTensorShapeFromTensorProto(tensor_proto);
|
|
if (tensor_shape_vec != tensor.Shape().GetDims()) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "TensorProtoToTensor() tensor shape mismatch!");
|
|
}
|
|
const DataTypeImpl* const source_type = DataTypeImpl::TensorTypeFromONNXEnum(tensor_proto.data_type())->GetElementType();
|
|
if (source_type->Size() > tensor.DataType()->Size()) {
|
|
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "TensorProto type ", DataTypeImpl::ToString(source_type),
|
|
" can not be writen into Tensor type ", DataTypeImpl::ToString(tensor.DataType()));
|
|
}
|
|
|
|
// find raw data in proto buf
|
|
void* raw_data = nullptr;
|
|
SafeInt<size_t> raw_data_len = 0;
|
|
AutoDelete deleter_for_file_data;
|
|
|
|
if (utils::HasExternalData(tensor_proto)) {
|
|
// Get the external data info
|
|
std::basic_string<ORTCHAR_T> external_data_file_path;
|
|
FileOffsetType file_offset;
|
|
std::basic_string<ORTCHAR_T> tensor_proto_dir;
|
|
if (model_path != nullptr) {
|
|
ORT_RETURN_IF_ERROR(GetDirNameFromFilePath(model_path, tensor_proto_dir));
|
|
}
|
|
ORT_RETURN_IF_ERROR(GetExternalDataInfo(
|
|
tensor_proto,
|
|
tensor_proto_dir.size() == 0 ? nullptr : tensor_proto_dir.c_str(),
|
|
external_data_file_path, file_offset, raw_data_len));
|
|
|
|
// load the file
|
|
ORT_RETURN_IF_ERROR(GetFileContent(
|
|
env, external_data_file_path.c_str(), file_offset, raw_data_len,
|
|
raw_data, deleter_for_file_data.d));
|
|
} else if (utils::HasRawData(tensor_proto)) {
|
|
raw_data = const_cast<char*>(tensor_proto.raw_data().data());
|
|
// TODO The line above has const-correctness issues. Below is a possible fix which copies the tensor_proto data
|
|
// into a writeable buffer. However, it requires extra memory which may exceed the limit for certain tests.
|
|
//auto buffer = std::make_unique<char[]>(tensor_proto.raw_data().size());
|
|
//std::memcpy(buffer.get(), tensor_proto.raw_data().data(), tensor_proto.raw_data().size());
|
|
//deleter_for_file_data.d = OrtCallback{DeleteCharArray, buffer.get()};
|
|
//raw_data = buffer.release();
|
|
raw_data_len = tensor_proto.raw_data().size();
|
|
}
|
|
|
|
if (nullptr != raw_data && utils::IsPrimitiveDataType<std::string>(source_type)) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "string tensor can not have raw data");
|
|
}
|
|
|
|
// unpacking tensor_proto data to preallocated tensor
|
|
void* preallocated = tensor.MutableDataRaw();
|
|
int64_t tensor_size = 1;
|
|
{
|
|
for (auto i : tensor_proto.dims()) {
|
|
if (i < 0) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "tensor can't contain negative dims");
|
|
}
|
|
tensor_size *= i;
|
|
}
|
|
}
|
|
// tensor_size could be zero. see test_slice_start_out_of_bounds\test_data_set_0\output_0.pb
|
|
if (static_cast<uint64_t>(tensor_size) > SIZE_MAX) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "size overflow");
|
|
}
|
|
switch (tensor_proto.data_type()) {
|
|
CASE_PROTO(FLOAT, float);
|
|
CASE_PROTO(DOUBLE, double);
|
|
CASE_PROTO(BOOL, bool);
|
|
CASE_PROTO(INT8, int8_t);
|
|
CASE_PROTO(INT16, int16_t);
|
|
CASE_PROTO(INT32, int32_t);
|
|
CASE_PROTO(INT64, int64_t);
|
|
CASE_PROTO(UINT8, uint8_t);
|
|
CASE_PROTO(UINT16, uint16_t);
|
|
CASE_PROTO(UINT32, uint32_t);
|
|
CASE_PROTO(UINT64, uint64_t);
|
|
CASE_PROTO(FLOAT16, MLFloat16);
|
|
CASE_PROTO(BFLOAT16, BFloat16);
|
|
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_STRING:
|
|
ORT_RETURN_IF_ERROR(UnpackTensor<std::string>(tensor_proto, raw_data, raw_data_len,
|
|
static_cast<std::string*>(preallocated),
|
|
static_cast<size_t>(tensor_size)));
|
|
break;
|
|
default: {
|
|
std::ostringstream ostr;
|
|
ostr << "Initialized tensor with unexpected type: " << tensor_proto.data_type();
|
|
return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, ostr.str());
|
|
}
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma warning(push)
|
|
#pragma warning(disable : 6239)
|
|
#endif
|
|
// TODO: Change the current interface to take Path object for model path
|
|
// so that validating and manipulating path for reading external data becomes easy
|
|
Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* model_path,
|
|
const ONNX_NAMESPACE::TensorProto& tensor_proto,
|
|
const MemBuffer& m, OrtValue& value) {
|
|
if (m.GetBuffer() == nullptr) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
|
|
"TensorProtoToMLValue() must take a pre-allocated MemBuffer!");
|
|
}
|
|
|
|
ONNXTensorElementDataType ele_type = utils::GetTensorElementType(tensor_proto);
|
|
if (ele_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING) {
|
|
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "string tensor can not use pre-allocated buffer");
|
|
}
|
|
|
|
// Note: We permit an empty tensor_shape_vec, and treat it as a scalar (a tensor of size 1).
|
|
TensorShape tensor_shape{GetTensorShapeFromTensorProto(tensor_proto)};
|
|
const DataTypeImpl* const type = DataTypeImpl::TensorTypeFromONNXEnum(tensor_proto.data_type())->GetElementType();
|
|
std::unique_ptr<Tensor> tensorp = std::make_unique<Tensor>(type, tensor_shape, m.GetBuffer(), m.GetAllocInfo());
|
|
if (tensorp->SizeInBytes() > m.GetLen()) {
|
|
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "The preallocated buffer is too small. Requires ",
|
|
tensorp->SizeInBytes(), ", Got ", m.GetLen());
|
|
}
|
|
|
|
TensorProtoToTensor(env, model_path, tensor_proto, *tensorp);
|
|
|
|
auto ml_tensor = DataTypeImpl::GetType<Tensor>();
|
|
value.Init(tensorp.release(), ml_tensor, ml_tensor->GetDeleteFunc());
|
|
return Status::OK();
|
|
}
|
|
#ifdef _MSC_VER
|
|
#pragma warning(pop)
|
|
#pragma warning(disable : 6239)
|
|
#endif
|
|
#define CASE_TYPE(X) \
|
|
case ONNX_NAMESPACE::TensorProto_DataType_##X: \
|
|
return ONNX_TENSOR_ELEMENT_DATA_TYPE_##X;
|
|
|
|
ONNXTensorElementDataType CApiElementTypeFromProtoType(int type) {
|
|
switch (type) {
|
|
CASE_TYPE(FLOAT)
|
|
CASE_TYPE(UINT8)
|
|
CASE_TYPE(INT8)
|
|
CASE_TYPE(UINT16)
|
|
CASE_TYPE(INT16)
|
|
CASE_TYPE(INT32)
|
|
CASE_TYPE(INT64)
|
|
CASE_TYPE(STRING)
|
|
CASE_TYPE(BOOL)
|
|
CASE_TYPE(FLOAT16)
|
|
CASE_TYPE(DOUBLE)
|
|
CASE_TYPE(UINT32)
|
|
CASE_TYPE(UINT64)
|
|
CASE_TYPE(COMPLEX64)
|
|
CASE_TYPE(COMPLEX128)
|
|
CASE_TYPE(BFLOAT16)
|
|
default:
|
|
return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
|
|
}
|
|
}
|
|
|
|
ONNXTensorElementDataType GetTensorElementType(const ONNX_NAMESPACE::TensorProto& tensor_proto) {
|
|
return CApiElementTypeFromProtoType(tensor_proto.data_type());
|
|
}
|
|
|
|
ONNX_NAMESPACE::TensorProto TensorToTensorProto(const Tensor& tensor, const std::string& tensor_proto_name) {
|
|
// Given we are using the raw_data field in the protobuf, this will work only for little-endian format.
|
|
ORT_IF_CONSTEXPR(endian::native != endian::little) {
|
|
ORT_THROW("Big endian not supported");
|
|
}
|
|
|
|
// Set name, dimensions, type, and data of the TensorProto.
|
|
ONNX_NAMESPACE::TensorProto tensor_proto;
|
|
|
|
tensor_proto.set_name(tensor_proto_name);
|
|
|
|
for (auto& dim : tensor.Shape().GetDims()) {
|
|
tensor_proto.add_dims(dim);
|
|
}
|
|
|
|
tensor_proto.set_data_type(tensor.GetElementType());
|
|
if (tensor.IsDataTypeString()) {
|
|
auto* mutable_string_data = tensor_proto.mutable_string_data();
|
|
auto f = tensor.Data<std::string>();
|
|
auto end = f + tensor.Shape().Size();
|
|
for (; f < end; ++f) {
|
|
*mutable_string_data->Add() = *f;
|
|
}
|
|
} else {
|
|
tensor_proto.set_raw_data(tensor.DataRaw(), tensor.SizeInBytes());
|
|
}
|
|
|
|
return tensor_proto;
|
|
}
|
|
|
|
common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& node,
|
|
const Path& model_path,
|
|
ONNX_NAMESPACE::TensorProto& tensor) {
|
|
const AttributeProto& constant_attribute = node.attribute(0);
|
|
|
|
switch (constant_attribute.type()) {
|
|
case AttributeProto_AttributeType_TENSOR:
|
|
tensor = constant_attribute.t();
|
|
break;
|
|
case AttributeProto_AttributeType_FLOAT:
|
|
tensor.set_data_type(TensorProto_DataType_FLOAT);
|
|
tensor.add_float_data(constant_attribute.f());
|
|
break;
|
|
case AttributeProto_AttributeType_FLOATS:
|
|
tensor.set_data_type(TensorProto_DataType_FLOAT);
|
|
*tensor.mutable_float_data() = constant_attribute.floats();
|
|
break;
|
|
case AttributeProto_AttributeType_INT:
|
|
tensor.set_data_type(TensorProto_DataType_INT64);
|
|
tensor.add_int64_data(constant_attribute.i());
|
|
break;
|
|
case AttributeProto_AttributeType_INTS:
|
|
tensor.set_data_type(TensorProto_DataType_INT64);
|
|
*tensor.mutable_int64_data() = constant_attribute.ints();
|
|
break;
|
|
case AttributeProto_AttributeType_STRING:
|
|
tensor.set_data_type(TensorProto_DataType_STRING);
|
|
tensor.add_string_data(constant_attribute.s());
|
|
break;
|
|
case AttributeProto_AttributeType_STRINGS: {
|
|
tensor.set_data_type(TensorProto_DataType_STRING);
|
|
*tensor.mutable_string_data() = constant_attribute.strings();
|
|
break;
|
|
}
|
|
case AttributeProto_AttributeType_SPARSE_TENSOR: {
|
|
auto& s = constant_attribute.sparse_tensor();
|
|
ORT_RETURN_IF_ERROR(SparseTensorProtoToDenseTensorProto(s, model_path, tensor));
|
|
break;
|
|
}
|
|
default:
|
|
ORT_THROW("Unsupported attribute value type of ", constant_attribute.type(),
|
|
" in 'Constant' node '", node.name(), "'");
|
|
}
|
|
|
|
// set name last in case attribute type was tensor (would copy over name)
|
|
*(tensor.mutable_name()) = node.output(0);
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
template <typename T>
|
|
static Status CopySparseData(size_t n_sparse_elements,
|
|
const ONNX_NAMESPACE::TensorProto& indices,
|
|
gsl::span<const int64_t> dims,
|
|
std::function<void(size_t from_idx, size_t to_idx)> copier) {
|
|
Status status = Status::OK();
|
|
TensorShape indices_shape(indices.dims().data(), indices.dims().size());
|
|
|
|
ORT_RETURN_IF_NOT(indices.data_type() == ONNX_NAMESPACE ::TensorProto_DataType_INT64, "Indicies expected to be INT64");
|
|
|
|
gsl::span<const int64_t> indices_data;
|
|
const auto elements = static_cast<size_t>(indices_shape.Size());
|
|
if (indices.int64_data_size() > 0) {
|
|
indices_data = gsl::make_span<const int64_t>(indices.int64_data().data(), elements);
|
|
} else if (indices.has_raw_data()) {
|
|
ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int64_t)),
|
|
"Sparse Indicies raw data size does not match expected.");
|
|
indices_data = gsl::make_span<const int64_t>(reinterpret_cast<const int64_t*>(indices.raw_data().data()), elements);
|
|
} else {
|
|
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, "Invalid SparseTensor indices. Should either have raw or int64 data");
|
|
}
|
|
|
|
if (indices_shape.NumDimensions() == 1) {
|
|
// flattened indexes
|
|
for (size_t i = 0; i < n_sparse_elements; ++i) {
|
|
copier(i, static_cast<size_t>(indices_data[i]));
|
|
}
|
|
} else if (indices_shape.NumDimensions() == 2) {
|
|
// entries in format {NNZ, rank}
|
|
size_t rank = static_cast<size_t>(indices_shape[1]);
|
|
ORT_ENFORCE(rank == dims.size() && rank > 0);
|
|
const int64_t* cur_index = indices_data.data();
|
|
std::vector<size_t> multipliers;
|
|
multipliers.resize(rank);
|
|
|
|
// calculate sum of inner dimension elements for each dimension.
|
|
// e.g. if shape {2,3,4}, the result should be {3*4, 4, 1}
|
|
multipliers[rank - 1] = 1;
|
|
for (int32_t r = static_cast<int32_t>(rank) - 2; r >= 0; --r) {
|
|
multipliers[r] = static_cast<size_t>(dims[r + 1]) * multipliers[r + 1];
|
|
}
|
|
|
|
// calculate the offset for the entry
|
|
// e.g. if shape was {2,3,4} and entry was (1, 0, 2) the offset is 14
|
|
// as there are 2 rows, each with 12 entries per row
|
|
for (size_t i = 0; i < n_sparse_elements; ++i) {
|
|
size_t idx = 0;
|
|
for (size_t j = 0; j < rank; ++j) {
|
|
idx += static_cast<size_t>(cur_index[j]) * multipliers[j];
|
|
}
|
|
|
|
copier(i, idx);
|
|
cur_index += rank;
|
|
}
|
|
|
|
ORT_ENFORCE(cur_index == &*indices_data.cend());
|
|
} else {
|
|
status = ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, "Invalid SparseTensor indices. Should be rank 0 or 1. Got:",
|
|
indices_shape);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
namespace conversion_internal {
|
|
struct UnsupportedSparseDataType {
|
|
void operator()(int32_t dt_type, Status& status) const {
|
|
status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Unsupported sparse tensor data type of ", dt_type);
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
struct GetElementSize {
|
|
Status operator()(size_t& element_size) const {
|
|
element_size = sizeof(T);
|
|
return Status::OK();
|
|
}
|
|
};
|
|
|
|
using SupportedConversionTypeList = onnxruntime::TypeList<float, double, MLFloat16, BFloat16,
|
|
int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t>;
|
|
} // namespace conversion_internal
|
|
|
|
common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
|
|
const Path& model_path,
|
|
ONNX_NAMESPACE::TensorProto& dense) {
|
|
Status status = Status::OK();
|
|
|
|
const auto& sparse_values = sparse.values();
|
|
auto type = sparse_values.data_type();
|
|
dense.set_data_type(type);
|
|
*dense.mutable_name() = sparse_values.name();
|
|
|
|
SafeInt<size_t> n_sparse_elements = 1;
|
|
for (auto dim : sparse_values.dims()) {
|
|
n_sparse_elements *= dim;
|
|
}
|
|
|
|
SafeInt<size_t> n_dense_elements = 1;
|
|
for (auto dim : sparse.dims()) {
|
|
n_dense_elements *= dim;
|
|
dense.add_dims(dim);
|
|
}
|
|
|
|
const auto& indices = sparse.indices();
|
|
auto dims = gsl::make_span<const int64_t>(dense.dims().data(), dense.dims().size());
|
|
|
|
if (type != TensorProto_DataType_STRING) {
|
|
// need to read in sparse data first as it could be in a type specific field, in raw data, or in external data
|
|
std::vector<uint8_t> sparse_data_storage;
|
|
ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage));
|
|
void* sparse_data = sparse_data_storage.data();
|
|
size_t element_size = 0;
|
|
// We want to this list to match the one used below in DenseTensorToSparseTensorProto()
|
|
MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(type);
|
|
ORT_RETURN_IF_ERROR(
|
|
(type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));
|
|
|
|
// by putting the data into a std::string we can avoid a copy as set_raw_data can do a std::move
|
|
// into the TensorProto. however to actually write to the buffer we have created in the std::string we need
|
|
// this somewhat dirty hack to get a mutable pointer. we could alternatively use &dense_data_storage.front()
|
|
// but using const_cast makes it more obvious we're doing something ugly.
|
|
// C++17 add non-const data() where we could remove const_cast
|
|
std::string dense_data_storage(n_dense_elements * element_size, 0);
|
|
if (n_sparse_elements > 0) {
|
|
void* dense_data = const_cast<char*>(dense_data_storage.data());
|
|
|
|
switch (element_size) {
|
|
case 1: {
|
|
auto dense_data_span = gsl::make_span<uint8_t>(static_cast<uint8_t*>(dense_data), n_dense_elements);
|
|
status = CopySparseData<uint8_t>(
|
|
n_sparse_elements,
|
|
indices, dims,
|
|
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
|
|
dense_data_span[to_idx] = static_cast<const uint8_t*>(sparse_data)[from_idx];
|
|
});
|
|
|
|
break;
|
|
}
|
|
case 2: {
|
|
auto dense_data_span = gsl::make_span<uint16_t>(static_cast<uint16_t*>(dense_data), n_dense_elements);
|
|
status = CopySparseData<uint16_t>(
|
|
n_sparse_elements,
|
|
indices, dims,
|
|
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
|
|
dense_data_span[to_idx] = static_cast<const uint16_t*>(sparse_data)[from_idx];
|
|
});
|
|
|
|
break;
|
|
}
|
|
case 4: {
|
|
auto dense_data_span = gsl::make_span<uint32_t>(static_cast<uint32_t*>(dense_data), n_dense_elements);
|
|
status = CopySparseData<uint32_t>(
|
|
n_sparse_elements,
|
|
indices, dims,
|
|
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
|
|
dense_data_span[to_idx] = static_cast<const uint32_t*>(sparse_data)[from_idx];
|
|
});
|
|
|
|
break;
|
|
}
|
|
case 8: {
|
|
auto dense_data_span = gsl::make_span<uint64_t>(static_cast<uint64_t*>(dense_data), n_dense_elements);
|
|
status = CopySparseData<uint64_t>(
|
|
n_sparse_elements,
|
|
indices, dims,
|
|
[sparse_data, dense_data_span](size_t from_idx, size_t to_idx) {
|
|
dense_data_span[to_idx] = static_cast<const uint64_t*>(sparse_data)[from_idx];
|
|
});
|
|
break;
|
|
}
|
|
|
|
default:
|
|
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
|
|
" BUG! Report to onnxruntime team. element_size of: ",
|
|
element_size, " is not supported.", " type: ", type);
|
|
}
|
|
|
|
ORT_RETURN_IF_ERROR(status);
|
|
}
|
|
dense.set_raw_data(std::move(dense_data_storage));
|
|
|
|
} else {
|
|
// No request for std::string
|
|
conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
#if !defined(ORT_MINIMAL_BUILD)
|
|
// Determines if this is a type specific zero
|
|
using IsZeroFunc = bool (*)(const void*);
|
|
// Copy element
|
|
using CopyElementFunc = void (*)(void* dest, const void* src, int64_t dest_index, int64_t src_index);
|
|
|
|
static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements, size_t element_size,
|
|
IsZeroFunc is_zero, CopyElementFunc copy,
|
|
TensorProto& values, TensorProto& indices) {
|
|
auto advance = [element_size](const void* start, size_t elements) -> const void* {
|
|
return (reinterpret_cast<const uint8_t*>(start) + elements * element_size);
|
|
};
|
|
|
|
const auto* cbegin = dense_raw_data;
|
|
const auto* const cend = advance(cbegin, n_dense_elements);
|
|
auto& indices_data = *indices.mutable_int64_data();
|
|
int64_t index = 0;
|
|
while (cbegin != cend) {
|
|
if (!is_zero(cbegin)) {
|
|
indices_data.Add(index);
|
|
}
|
|
++index;
|
|
cbegin = advance(cbegin, 1U);
|
|
}
|
|
|
|
auto& raw_data = *values.mutable_raw_data();
|
|
raw_data.resize(indices.int64_data_size() * element_size);
|
|
void* data_dest = const_cast<char*>(raw_data.data());
|
|
|
|
int64_t dest_index = 0;
|
|
for (auto src_index : indices.int64_data()) {
|
|
copy(data_dest, dense_raw_data, dest_index, src_index);
|
|
++dest_index;
|
|
}
|
|
}
|
|
|
|
// Here we are not using tolerance for FP types since these dense tensors were
|
|
// created from sparse initializers where zeros were absolute
|
|
template <typename T>
|
|
inline bool IsZero(const void* p) {
|
|
return (static_cast<T>(0) == *reinterpret_cast<const T*>(p));
|
|
}
|
|
|
|
template <typename T>
|
|
inline void CopyElement(void* dst, const void* src, int64_t dst_index, int64_t src_index) {
|
|
reinterpret_cast<T*>(dst)[dst_index] = reinterpret_cast<const T*>(src)[src_index];
|
|
}
|
|
|
|
common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& dense_proto,
|
|
const Path& model_path,
|
|
ONNX_NAMESPACE::SparseTensorProto& result) {
|
|
ORT_ENFORCE(HasDataType(dense_proto), "Must have a valid data type");
|
|
|
|
const bool is_string_data = dense_proto.data_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
|
|
if (is_string_data) {
|
|
Status status{};
|
|
conversion_internal::UnsupportedSparseDataType()(ONNX_NAMESPACE::TensorProto_DataType_STRING, status);
|
|
return status;
|
|
}
|
|
|
|
const auto data_type = dense_proto.data_type();
|
|
SparseTensorProto sparse_proto;
|
|
auto& values = *sparse_proto.mutable_values();
|
|
values.set_name(dense_proto.name());
|
|
values.set_data_type(data_type);
|
|
|
|
auto& indices = *sparse_proto.mutable_indices();
|
|
indices.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
|
|
|
|
SafeInt<size_t> n_dense_elements = 1;
|
|
for (auto dim : dense_proto.dims()) {
|
|
n_dense_elements *= dim;
|
|
}
|
|
|
|
std::vector<uint8_t> dense_raw_data;
|
|
ORT_RETURN_IF_ERROR(UnpackInitializerData(dense_proto, model_path, dense_raw_data));
|
|
size_t element_size = 0;
|
|
// We want this type list to match the one above in SparseTensorProtoToDenseTensorProto
|
|
MLTypeCallDispatcherFromTypeList<conversion_internal::SupportedConversionTypeList> type_disp(data_type);
|
|
ORT_RETURN_IF_ERROR(
|
|
(type_disp.InvokeRetWithUnsupportedPolicy<Status, conversion_internal::GetElementSize, conversion_internal::UnsupportedSparseDataType>(element_size)));
|
|
|
|
void* dense_data = dense_raw_data.data();
|
|
switch (element_size) {
|
|
case 1: {
|
|
SparsifyGeneric(dense_data, n_dense_elements, element_size,
|
|
IsZero<uint8_t>, CopyElement<uint8_t>, values, indices);
|
|
break;
|
|
}
|
|
case 2: {
|
|
SparsifyGeneric(dense_data, n_dense_elements, element_size,
|
|
IsZero<uint16_t>, CopyElement<uint16_t>, values, indices);
|
|
break;
|
|
}
|
|
case 4: {
|
|
SparsifyGeneric(dense_data, n_dense_elements, element_size,
|
|
IsZero<uint32_t>, CopyElement<uint32_t>, values, indices);
|
|
break;
|
|
}
|
|
case 8: {
|
|
SparsifyGeneric(dense_data, n_dense_elements, element_size,
|
|
IsZero<uint64_t>, CopyElement<uint64_t>, values, indices);
|
|
break;
|
|
}
|
|
default:
|
|
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
|
|
" BUG! Report to onnxruntime team. element_size of: ",
|
|
element_size, " is not supported.", " data_type: ", data_type);
|
|
}
|
|
|
|
// Fix up shapes
|
|
const auto nnz = indices.int64_data_size();
|
|
values.add_dims(nnz);
|
|
indices.add_dims(nnz);
|
|
|
|
// Save dense shape
|
|
*sparse_proto.mutable_dims() = dense_proto.dims();
|
|
swap(result, sparse_proto);
|
|
return Status::OK();
|
|
}
|
|
|
|
#endif // !ORT_MINIMAL_BUILD
|
|
|
|
template common::Status GetSizeInBytesFromTensorProto<kAllocAlignment>(const ONNX_NAMESPACE::TensorProto& tensor_proto,
|
|
size_t* out);
|
|
template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out);
|
|
|
|
#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE) \
|
|
case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: { \
|
|
SafeInt<size_t> tensor_byte_size; \
|
|
size_t element_count = 0; \
|
|
if (initializer.has_raw_data()) { \
|
|
tensor_byte_size = initializer.raw_data().size(); \
|
|
element_count = tensor_byte_size / sizeof(ELEMENT_TYPE); \
|
|
} else { \
|
|
element_count = initializer.DATA_SIZE(); \
|
|
tensor_byte_size = element_count * sizeof(ELEMENT_TYPE); \
|
|
} \
|
|
unpacked_tensor.resize(tensor_byte_size); \
|
|
return onnxruntime::utils::UnpackTensor( \
|
|
initializer, \
|
|
initializer.has_raw_data() ? initializer.raw_data().data() : nullptr, \
|
|
initializer.has_raw_data() ? initializer.raw_data().size() : 0, \
|
|
reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.data()), element_count); \
|
|
break; \
|
|
}
|
|
|
|
Status UnpackInitializerData(const onnx::TensorProto& initializer,
|
|
const Path& model_path,
|
|
std::vector<uint8_t>& unpacked_tensor) {
|
|
// TODO, if std::vector does not use a custom allocator, the default std::allocator will
|
|
// allocation the memory aligned to std::max_align_t, need look into allocating
|
|
// forced aligned memory (align as 16 or larger)for unpacked_tensor
|
|
if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {
|
|
ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(
|
|
initializer,
|
|
model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
|
|
unpacked_tensor));
|
|
return Status::OK();
|
|
}
|
|
|
|
switch (initializer.data_type()) {
|
|
CASE_UNPACK(FLOAT, float, float_data_size);
|
|
CASE_UNPACK(DOUBLE, double, double_data_size);
|
|
CASE_UNPACK(BOOL, bool, int32_data_size);
|
|
CASE_UNPACK(INT8, int8_t, int32_data_size);
|
|
CASE_UNPACK(INT16, int16_t, int32_data_size);
|
|
CASE_UNPACK(INT32, int32_t, int32_data_size);
|
|
CASE_UNPACK(INT64, int64_t, int64_data_size);
|
|
CASE_UNPACK(UINT8, uint8_t, int32_data_size);
|
|
CASE_UNPACK(UINT16, uint16_t, int32_data_size);
|
|
CASE_UNPACK(UINT32, uint32_t, uint64_data_size);
|
|
CASE_UNPACK(UINT64, uint64_t, uint64_data_size);
|
|
CASE_UNPACK(FLOAT16, onnxruntime::MLFloat16, int32_data_size);
|
|
CASE_UNPACK(BFLOAT16, onnxruntime::BFloat16, int32_data_size);
|
|
default:
|
|
break;
|
|
}
|
|
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
|
|
"Unsupported type: ", initializer.data_type());
|
|
}
|
|
#undef CASE_UNPACK
|
|
|
|
Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
|
|
std::vector<uint8_t>& unpacked_tensor) {
|
|
ORT_RETURN_IF(initializer.data_location() == TensorProto_DataLocation_EXTERNAL,
|
|
"The given initializer contains external data");
|
|
return UnpackInitializerData(initializer, Path(), unpacked_tensor);
|
|
}
|
|
|
|
} // namespace utils
|
|
} // namespace onnxruntime
|