mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-06 00:03:22 +00:00
Add tag types for Ort::Float16_t and Ort:Bfloat16_t structs (#5716)
Add tag types for Ort::Float16_t and Ort:Bfloat16_t structs that contain uint16_t values for float16 and bfloat16. These will serve as type dispatching types for C++ API. They are of uint16_t size and arrays of these types can be used to create Tensors of the corresponding types. Make documentation Doxygen compliant.
This commit is contained in:
parent
fff85a6a35
commit
2bf5046d4e
5 changed files with 129 additions and 7 deletions
|
|
@ -90,7 +90,7 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
// Copied from TensorProto::DataType
|
||||
// Currently, Ort doesn't support complex64, complex128, bfloat16 types
|
||||
// Currently, Ort doesn't support complex64, complex128
|
||||
typedef enum ONNXTensorElementDataType {
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED,
|
||||
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, // maps to c type float
|
||||
|
|
|
|||
|
|
@ -99,6 +99,77 @@ ORT_DEFINE_RELEASE(ModelMetadata);
|
|||
ORT_DEFINE_RELEASE(ThreadingOptions);
|
||||
ORT_DEFINE_RELEASE(IoBinding);
|
||||
|
||||
/*! \class Ort::Float16_t
|
||||
* \brief it is a structure that represents float16 data.
|
||||
* \details It is necessary for type dispatching to make use of C++ API
|
||||
* The type is implicitly convertible to/from uint16_t.
|
||||
* The size of the structure should align with uint16_t and one can freely cast
|
||||
* uint16_t buffers to/from Ort::Float16_t to feed and retrieve data.
|
||||
*
|
||||
* Generally, you can feed any of your types as float16/blfoat16 data to create a tensor
|
||||
* on top of it, providing it can form a continuous buffer with 16-bit elements with no padding.
|
||||
* And you can also feed a array of uint16_t elements directly. For example,
|
||||
*
|
||||
* \code{.unparsed}
|
||||
* uint16_t values[] = { 15360, 16384, 16896, 17408, 17664};
|
||||
* constexpr size_t values_length = sizeof(values) / sizeof(values[0]);
|
||||
* std::vector<int64_t> dims = {values_length}; // one dimensional example
|
||||
* Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
||||
* // Note we are passing bytes count in this api, not number of elements -> sizeof(values)
|
||||
* auto float16_tensor = Ort::Value::CreateTensor(info, values, sizeof(values),
|
||||
* dims.data(), dims.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
|
||||
* \endcode
|
||||
*
|
||||
* Here is another example, a little bit more elaborate. Let's assume that you use your own float16 type and you want to use
|
||||
* a templated version of the API above so the type is automatically set based on your type. You will need to supply an extra
|
||||
* template specialization.
|
||||
*
|
||||
* \code{.unparsed}
|
||||
* namespace yours { struct half {}; } // assume this is your type, define this:
|
||||
* namespace Ort {
|
||||
* template<>
|
||||
* struct TypeToTensorType<yours::half> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; };
|
||||
* } //namespace Ort
|
||||
*
|
||||
* std::vector<yours::half> values;
|
||||
* std::vector<int64_t> dims = {values.size()}; // one dimensional example
|
||||
* Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
||||
* // Here we are passing element count -> values.size()
|
||||
* auto float16_tensor = Ort::Value::CreateTensor<yours::half>(info, values.data(), values.size(), dims.data(), dims.size());
|
||||
*
|
||||
* \endcode
|
||||
*/
|
||||
struct Float16_t {
|
||||
uint16_t value;
|
||||
constexpr Float16_t() noexcept : value(0) {}
|
||||
constexpr Float16_t(uint16_t v) noexcept : value(v) {}
|
||||
constexpr operator uint16_t() const noexcept { return value; }
|
||||
constexpr bool operator==(const Float16_t& rhs) const noexcept { return value == rhs.value; };
|
||||
constexpr bool operator!=(const Float16_t& rhs) const noexcept { return value != rhs.value; };
|
||||
};
|
||||
|
||||
static_assert(sizeof(Float16_t) == sizeof(uint16_t), "Sizes must match");
|
||||
|
||||
/*! \class Ort::BFloat16_t
|
||||
* \brief is a structure that represents bfloat16 data.
|
||||
* \details It is necessary for type dispatching to make use of C++ API
|
||||
* The type is implicitly convertible to/from uint16_t.
|
||||
* The size of the structure should align with uint16_t and one can freely cast
|
||||
* uint16_t buffers to/from Ort::BFloat16_t to feed and retrieve data.
|
||||
*
|
||||
* See also code examples for Float16_t above.
|
||||
*/
|
||||
struct BFloat16_t {
|
||||
uint16_t value;
|
||||
constexpr BFloat16_t() noexcept : value(0) {}
|
||||
constexpr BFloat16_t(uint16_t v) noexcept : value(v) {}
|
||||
constexpr operator uint16_t() const noexcept { return value; }
|
||||
constexpr bool operator==(const BFloat16_t& rhs) const noexcept { return value == rhs.value; };
|
||||
constexpr bool operator!=(const BFloat16_t& rhs) const noexcept { return value != rhs.value; };
|
||||
};
|
||||
|
||||
static_assert(sizeof(BFloat16_t) == sizeof(uint16_t), "Sizes must match");
|
||||
|
||||
// This is used internally by the C++ API. This is the common base class used by the wrapper objects.
|
||||
template <typename T>
|
||||
struct Base {
|
||||
|
|
@ -252,7 +323,6 @@ struct SessionOptions : Base<OrtSessionOptions> {
|
|||
SessionOptions& AddConfigEntry(const char* config_key, const char* config_value);
|
||||
SessionOptions& AddInitializer(const char* name, const OrtValue* ort_val);
|
||||
OrtStatus* OrtSessionOptionsAppendExecutionProvider_CUDA(OrtSessionOptions* options, OrtCUDAProviderOptions* cuda_options);
|
||||
|
||||
};
|
||||
|
||||
struct ModelMetadata : Base<OrtModelMetadata> {
|
||||
|
|
@ -518,8 +588,7 @@ template <typename TOp, typename TKernel>
|
|||
struct CustomOpBase : OrtCustomOp {
|
||||
CustomOpBase() {
|
||||
OrtCustomOp::version = ORT_API_VERSION;
|
||||
OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* api, const OrtKernelInfo* info) {
|
||||
return static_cast<const TOp*>(this_)->CreateKernel(*api, info); };
|
||||
OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* api, const OrtKernelInfo* info) { return static_cast<const TOp*>(this_)->CreateKernel(*api, info); };
|
||||
OrtCustomOp::GetName = [](const OrtCustomOp* this_) { return static_cast<const TOp*>(this_)->GetName(); };
|
||||
|
||||
OrtCustomOp::GetExecutionProviderType = [](const OrtCustomOp* this_) { return static_cast<const TOp*>(this_)->GetExecutionProviderType(); };
|
||||
|
|
|
|||
|
|
@ -27,6 +27,10 @@ template <typename T>
|
|||
struct TypeToTensorType;
|
||||
template <>
|
||||
struct TypeToTensorType<float> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; };
|
||||
template<>
|
||||
struct TypeToTensorType<Float16_t> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; };
|
||||
template<>
|
||||
struct TypeToTensorType<BFloat16_t> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16; };
|
||||
template <>
|
||||
struct TypeToTensorType<double> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; };
|
||||
template <>
|
||||
|
|
|
|||
|
|
@ -9,6 +9,9 @@
|
|||
#include "core/graph/onnx_protobuf.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "core/util/math.h"
|
||||
#include <ostream>
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wignored-qualifiers"
|
||||
|
|
@ -433,7 +436,7 @@ TEST_F(DataTypeTest, BFloat16Test) {
|
|||
FloatToBFloat16(sample, converted, sizeof(sample) / sizeof(float));
|
||||
for (size_t i = 0; i < sizeof(sample) / sizeof(float); ++i) {
|
||||
const double diff = std::fabs(sample[i] - converted[i].ToFloat());
|
||||
if (diff > FLT_EPSILON || (std::isnan(diff) && !std::isnan(sample[i]))) {
|
||||
if ((std::isnan(diff) && !std::isnan(sample[i])) || diff > FLT_EPSILON) {
|
||||
EXPECT_TRUE(false);
|
||||
}
|
||||
}
|
||||
|
|
@ -442,7 +445,7 @@ TEST_F(DataTypeTest, BFloat16Test) {
|
|||
BFloat16ToFloat(converted, back_converted, sizeof(sample) / sizeof(float));
|
||||
for (size_t i = 0; i < sizeof(sample) / sizeof(float); ++i) {
|
||||
const double diff = std::fabs(sample[i] - back_converted[i]);
|
||||
if (diff > FLT_EPSILON || (std::isnan(diff) && !std::isnan(sample[i]))) {
|
||||
if ((std::isnan(diff) && !std::isnan(sample[i])) || diff > FLT_EPSILON) {
|
||||
EXPECT_TRUE(false);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -780,7 +780,7 @@ TEST(CApiTest, create_tensor_with_data) {
|
|||
std::vector<int64_t> dims = {4};
|
||||
Ort::Value tensor = Ort::Value::CreateTensor<float>(info, values, values_length, dims.data(), dims.size());
|
||||
|
||||
float* new_pointer = tensor.GetTensorMutableData<float>();
|
||||
const float* new_pointer = tensor.GetTensorData<float>();
|
||||
ASSERT_EQ(new_pointer, values);
|
||||
|
||||
auto type_info = tensor.GetTypeInfo();
|
||||
|
|
@ -790,6 +790,52 @@ TEST(CApiTest, create_tensor_with_data) {
|
|||
ASSERT_EQ(1u, tensor_info.GetDimensionsCount());
|
||||
}
|
||||
|
||||
TEST(CApiTest, create_tensor_with_data_float16) {
|
||||
// Example with C++. However, what we are feeding underneath is really
|
||||
// a continuous buffer of uint16_t
|
||||
// Use 3rd party libraries such as Eigen to convert floats and doubles to float16 types.
|
||||
Ort::Float16_t values[] = { 15360, 16384, 16896, 17408, 17664}; // 1.f, 2.f, 3.f, 4.f, 5.f
|
||||
constexpr size_t values_length = sizeof(values) / sizeof(values[0]);
|
||||
|
||||
std::vector<int64_t> dims = {values_length};
|
||||
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
||||
|
||||
Ort::Value tensor = Ort::Value::CreateTensor<Ort::Float16_t>(info, values, values_length, dims.data(), dims.size());
|
||||
const auto* new_pointer = tensor.GetTensorData<Ort::Float16_t>();
|
||||
ASSERT_EQ(new_pointer, values);
|
||||
auto type_info = tensor.GetTypeInfo();
|
||||
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
||||
ASSERT_NE(tensor_info, nullptr);
|
||||
ASSERT_EQ(1u, tensor_info.GetDimensionsCount());
|
||||
ASSERT_EQ(tensor_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
|
||||
|
||||
Ort::Float16_t value_at_1 = tensor.At<Ort::Float16_t>({1});
|
||||
ASSERT_EQ(values[1], value_at_1);
|
||||
}
|
||||
|
||||
TEST(CApiTest, create_tensor_with_data_bfloat16) {
|
||||
// Example with C++. However, what we are feeding underneath is really
|
||||
// a continuous buffer of uint16_t
|
||||
// Conversion from float to bfloat16 is simple. Strip off half of the bytes from float.
|
||||
Ort::BFloat16_t values[] = {16256, 16384, 16448, 16512, 16544}; // 1.f, 2.f, 3.f, 4.f, 5.f
|
||||
constexpr size_t values_length = sizeof(values) / sizeof(values[0]);
|
||||
std::vector<int64_t> dims = {values_length};
|
||||
|
||||
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
||||
|
||||
Ort::Value tensor = Ort::Value::CreateTensor<Ort::BFloat16_t>(info, values, values_length, dims.data(), dims.size());
|
||||
const auto* new_pointer = tensor.GetTensorData<Ort::BFloat16_t>();
|
||||
ASSERT_EQ(new_pointer, values);
|
||||
auto type_info = tensor.GetTypeInfo();
|
||||
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
||||
ASSERT_NE(tensor_info, nullptr);
|
||||
ASSERT_EQ(1u, tensor_info.GetDimensionsCount());
|
||||
ASSERT_EQ(tensor_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16);
|
||||
|
||||
Ort::BFloat16_t value_at_1 = tensor.At<Ort::BFloat16_t>({1});
|
||||
ASSERT_EQ(values[1], value_at_1);
|
||||
}
|
||||
|
||||
TEST(CApiTest, access_tensor_data_elements) {
|
||||
/**
|
||||
* Create a 2x3 data blob that looks like:
|
||||
|
|
|
|||
Loading…
Reference in a new issue