onnxruntime/onnxruntime/core/framework/tensorprotoutils.h
Guoyu Wang 3406b7b528
Simplify UnpackInitializerData API (#8736)
* Move UnpackInitializerData to use vector

* minor update

* minor update

* Update getclipminmax

* Change uint8_t -> std::byte

* fix build break

* Revert "fix build break"

This reverts commit 1ffa284ac54fd605c0651954ea4fb2cab0464526.

* Revert "Change uint8_t -> std::byte"

This reverts commit 764a656ebac6610cdf1f25e63770330c3aedece6.

* Add todo notes for extra vector alignment

* add check result size
2021-08-17 18:11:46 -07:00

326 lines
14 KiB
C++

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <vector>
#include <type_traits>
#ifndef SHARED_PROVIDER
#include "core/common/common.h"
#include "core/common/path.h"
#include "core/common/status.h"
#include "core/framework/endian_utils.h"
#include "core/framework/allocator.h"
#include "core/framework/ort_value.h"
#include "core/framework/mem_buffer.h"
#include "core/framework/tensor_external_data_info.h"
#include "core/graph/onnx_protobuf.h"
#include "core/platform/env.h"
namespace ONNX_NAMESPACE {
class TensorProto;
class TensorShapeProto;
/** Test if two TensorShapeProto dimensions are equal. */
bool operator==(const TensorShapeProto_Dimension& l, const TensorShapeProto_Dimension& r);
bool operator!=(const TensorShapeProto_Dimension& l, const TensorShapeProto_Dimension& r);
} // namespace ONNX_NAMESPACE
#endif
namespace onnxruntime {
namespace utils {
#ifndef SHARED_PROVIDER
TensorShape GetTensorShapeFromTensorShapeProto(const ONNX_NAMESPACE::TensorShapeProto& tensor_shape_proto);
std::vector<int64_t> GetTensorShapeFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto);
/**
* deserialize a TensorProto into a preallocated memory buffer.
* \param tensor_proto_path A local file path of where the 'input' was loaded from. Can be NULL if the tensor proto doesn't
* have any external data or it was loaded from current working dir. This path could be either a
* relative path or an absolute path.
*/
common::Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* tensor_proto_path,
const ONNX_NAMESPACE::TensorProto& input, const MemBuffer& m, OrtValue& value);
/**
* @brief Deserialize a TensorProto into a preallocated empty Tensor
* @param env
* @param model_path
* @param tensor_proto source data
* @param tensorp destination empty tensor
* @return
*/
common::Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
const ONNX_NAMESPACE::TensorProto& tensor_proto,
Tensor& tensor);
/** Creates a TensorProto from a Tensor.
@param[in] tensor the Tensor whose data and shape will be used to create the TensorProto.
@param[in] tensor_proto_name the name of the TensorProto.
@return the TensorProto.
Note: Method currently requires that data is in little-endian format.
*/
ONNX_NAMESPACE::TensorProto TensorToTensorProto(const Tensor& tensor, const std::string& tensor_proto_name);
ONNXTensorElementDataType CApiElementTypeFromProtoType(int type);
ONNXTensorElementDataType GetTensorElementType(const ONNX_NAMESPACE::TensorProto& tensor_proto);
// How much memory it will need for putting the content of this tensor into a plain array
// complex64/complex128 tensors are not supported.
// The output value could be zero or -1.
template <size_t alignment>
common::Status GetSizeInBytesFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out);
// Convert the AttributeProto from a Constant node into a TensorProto that can be used as an initializer
// If AttributeProto contains a TensorProto, this tensor proto is converted as is including the case when the
// the data location is external. i.e. it does not load the external data.
// However if AttributeProto contains SparseTensorProto then it converts the data into dense tensor proto
// (including loading external data when applicable).
// model_path is used for contructing full path for external_data
common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& node,
const Path& model_path,
ONNX_NAMESPACE::TensorProto& tensor);
// Convert a SparseTensorProto to a dense TensorProto
// If the SparseTensorProto contains external data then it loads the data and converts to dense tensor proto
// The resulting TensorProto will contain the data as raw data.
// model_path is used for contructing full path for external_data
common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
const Path& model_path,
ONNX_NAMESPACE::TensorProto& dense);
#if !defined(ORT_MINIMAL_BUILD)
// Convert a TensorProto to a SparseTensorProto
// If the tensorproto contains external data then it loads the data and converts to sparse tensor
// The resulting SparseTensorProto will contain the data as raw data
// model_path is used for contructing full path for external_data
common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& dense,
const Path& model_path,
ONNX_NAMESPACE::SparseTensorProto& sparse);
#endif // !ORT_MINIMAL_BUILD
#endif
inline bool HasDimValue(const ONNX_NAMESPACE::TensorShapeProto_Dimension& dim) {
return dim.value_case() == ONNX_NAMESPACE::TensorShapeProto_Dimension::kDimValue;
}
inline bool HasDimParam(const ONNX_NAMESPACE::TensorShapeProto_Dimension& dim) {
return dim.value_case() == ONNX_NAMESPACE::TensorShapeProto_Dimension::kDimParam;
}
inline bool HasTensorType(const ONNX_NAMESPACE::TypeProto& type_proto) {
return type_proto.value_case() == ONNX_NAMESPACE::TypeProto::kTensorType;
}
inline bool HasElemType(const ONNX_NAMESPACE::TypeProto_Tensor& ten_proto) {
return ten_proto.elem_type() != ONNX_NAMESPACE::TensorProto::UNDEFINED;
}
inline bool HasShape(const ONNX_NAMESPACE::TypeProto_Tensor& ten_proto) {
// XXX: Figure out how do in proto3
return ten_proto.has_shape();
}
inline bool HasSparseTensorType(const ONNX_NAMESPACE::TypeProto& type_proto) {
return type_proto.value_case() == ONNX_NAMESPACE::TypeProto::kSparseTensorType;
}
inline bool HasShape(const ONNX_NAMESPACE::TypeProto_SparseTensor& ten_proto) {
// XXX: Figure out how do in proto3
return ten_proto.has_shape();
}
inline bool HasElemType(const ONNX_NAMESPACE::TypeProto_SparseTensor& ten_proto) {
return ten_proto.elem_type() != ONNX_NAMESPACE::TensorProto::UNDEFINED;
}
inline bool HasShape(const ONNX_NAMESPACE::TypeProto& type_proto) {
if (HasTensorType(type_proto) && HasShape(type_proto.tensor_type())) {
return true;
}
return HasSparseTensorType(type_proto) && HasShape(type_proto.sparse_tensor_type());
}
inline bool HasElementType(const ONNX_NAMESPACE::TypeProto& type_proto) {
if (HasTensorType(type_proto) && HasElemType(type_proto.tensor_type())) {
return true;
}
return HasSparseTensorType(type_proto) && HasElemType(type_proto.sparse_tensor_type());
}
inline const ONNX_NAMESPACE::TensorShapeProto& GetShape(const ONNX_NAMESPACE::TypeProto& type_proto) {
if (HasTensorType(type_proto) && HasShape(type_proto.tensor_type())) {
return type_proto.tensor_type().shape();
}
if (HasSparseTensorType(type_proto) && HasShape(type_proto.sparse_tensor_type())) {
return type_proto.sparse_tensor_type().shape();
}
ORT_THROW("TypeProto must have shape for this to run");
}
inline bool HasRawData(const ONNX_NAMESPACE::TensorProto& ten_proto) {
// Can not be UNDEFINED and can not be STRING but test for STRING is usually performed separately
// to return an error
return ten_proto.data_type() != ONNX_NAMESPACE::TensorProto::UNDEFINED &&
ten_proto.has_raw_data(); // XXX: Figure out how to do in proto3
}
inline bool HasExternalData(const ONNX_NAMESPACE::TensorProto& ten_proto) {
// Can not be UNDEFINED and can not be STRING but test for STRING is usually performed separately
// to return an error
return ten_proto.data_type() != ONNX_NAMESPACE::TensorProto::UNDEFINED &&
ten_proto.data_location() == ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL;
}
inline bool HasDataType(const ONNX_NAMESPACE::TensorProto& ten_proto) {
return ten_proto.data_type() != ONNX_NAMESPACE::TensorProto::UNDEFINED;
}
#ifndef SHARED_PROVIDER
inline bool HasName(const ONNX_NAMESPACE::TensorProto& ten_proto) {
return ten_proto.has_name(); // XXX
}
inline bool HasElemType(const ONNX_NAMESPACE::TypeProto_Sequence& seq_proto) {
return seq_proto.elem_type().value_case() != ONNX_NAMESPACE::TypeProto::VALUE_NOT_SET;
}
inline bool HasName(const ONNX_NAMESPACE::SparseTensorProto& ten_proto) {
return ten_proto.values().has_name(); // XXX
}
inline bool HasKeyType(const ONNX_NAMESPACE::TypeProto_Map& map_proto) {
return map_proto.key_type() != ONNX_NAMESPACE::TensorProto::UNDEFINED;
}
inline bool HasValueType(const ONNX_NAMESPACE::TypeProto_Map& map_proto) {
return map_proto.value_type().value_case() != ONNX_NAMESPACE::TypeProto::VALUE_NOT_SET;
}
#endif
inline bool HasType(const ONNX_NAMESPACE::ValueInfoProto& vi_proto) {
return vi_proto.type().value_case() != ONNX_NAMESPACE::TypeProto::VALUE_NOT_SET;
}
#ifndef SHARED_PROVIDER
inline bool HasName(const ONNX_NAMESPACE::ValueInfoProto& vi_proto) {
return vi_proto.has_name(); // XXX: Figure out proto3 way
}
inline bool HasDomain(const ONNX_NAMESPACE::TypeProto_Opaque& op_proto) {
return !op_proto.domain().empty();
}
inline bool HasName(const ONNX_NAMESPACE::TypeProto_Opaque& op_proto) {
return !op_proto.name().empty();
}
#endif
inline bool HasType(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() != ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_UNDEFINED;
}
inline bool HasFloat(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_FLOAT;
}
inline bool HasFloats(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_FLOATS;
}
inline bool HasInt(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_INT;
}
inline bool HasInts(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_INTS;
}
inline bool HasString(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_STRING;
}
inline bool HasStrings(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_STRINGS;
}
inline bool HasTensor(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_TENSOR;
}
inline bool HasTensors(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_TENSORS;
}
inline bool HasGraph(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_GRAPH;
}
inline bool HasGraphs(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.type() == ONNX_NAMESPACE::AttributeProto::AttributeType::AttributeProto_AttributeType_GRAPHS;
}
#ifndef SHARED_PROVIDER
inline bool HasName(const ONNX_NAMESPACE::AttributeProto& at_proto) {
return at_proto.has_name(); // XXX: Fugure out proto3
}
inline bool HasGraph(const ONNX_NAMESPACE::ModelProto& m_proto) {
return m_proto.has_graph(); // XXX proto3
}
inline bool HasIrVersion(const ONNX_NAMESPACE::ModelProto& m_proto) {
return m_proto.has_ir_version(); // XXX proto3
}
inline bool HasModelVersion(const ONNX_NAMESPACE::ModelProto& m_proto) {
return m_proto.has_model_version(); // XXX proto3
}
inline bool HasName(const ONNX_NAMESPACE::NodeProto& node_proto) {
//XXX: Figure out proto3 style
return node_proto.has_name();
}
#endif
// UnpackTensor from raw data or the type specific data field. Does not handle external data.
// If the tensor does not contain raw data then raw_data should be nullptr and raw_data_len should be 0.
template <typename T>
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
/*out*/ T* p_data, size_t expected_size);
// UnpackTensor from raw data, external data or the type specific data field.
// Uses the model path to construct the full path for loading external data. In case when model_path is empty
// it uses current directory.
template <typename T>
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
/*out*/ T* p_data, size_t expected_size);
/**
* Unpack the data from an initializer tensor
* Please note, this function does not unpack string_data of an initializer tensor
* @param initializer given initializer tensor
* @param model_path model_path to construct external data dir path. When this is empty, current dir is used.
* @param unpacked_tensor the vector holds data from the initializer in byte form
* @returns Status::OK() if data is unpacked successfully
*/
common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
const Path& model_path,
std::vector<uint8_t>& unpacked_tensor);
/**
* Unpack the data from an internal initializer tensor, will return error when the given initializer
* contains external data
* Please note, this function does not unpack string_data of an initializer tensor
* @param initializer given initializer tensor
* @param unpacked_tensor the vector holds data from the initializer in byte form
* @returns Status::OK() if data is unpacked successfully
*/
common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
std::vector<uint8_t>& unpacked_tensor);
} // namespace utils
} // namespace onnxruntime