diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 309b21e9b3..32487aca1a 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -90,7 +90,7 @@ extern "C" { #endif // Copied from TensorProto::DataType -// Currently, Ort doesn't support complex64, complex128, bfloat16 types +// Currently, Ort doesn't support complex64, complex128 typedef enum ONNXTensorElementDataType { ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, // maps to c type float diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index 890d4f6112..5026cef60f 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -99,6 +99,77 @@ ORT_DEFINE_RELEASE(ModelMetadata); ORT_DEFINE_RELEASE(ThreadingOptions); ORT_DEFINE_RELEASE(IoBinding); +/*! \class Ort::Float16_t + * \brief it is a structure that represents float16 data. + * \details It is necessary for type dispatching to make use of C++ API + * The type is implicitly convertible to/from uint16_t. + * The size of the structure should align with uint16_t and one can freely cast + * uint16_t buffers to/from Ort::Float16_t to feed and retrieve data. + * + * Generally, you can feed any of your types as float16/blfoat16 data to create a tensor + * on top of it, providing it can form a continuous buffer with 16-bit elements with no padding. + * And you can also feed a array of uint16_t elements directly. For example, + * + * \code{.unparsed} + * uint16_t values[] = { 15360, 16384, 16896, 17408, 17664}; + * constexpr size_t values_length = sizeof(values) / sizeof(values[0]); + * std::vector dims = {values_length}; // one dimensional example + * Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); + * // Note we are passing bytes count in this api, not number of elements -> sizeof(values) + * auto float16_tensor = Ort::Value::CreateTensor(info, values, sizeof(values), + * dims.data(), dims.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16); + * \endcode + * + * Here is another example, a little bit more elaborate. Let's assume that you use your own float16 type and you want to use + * a templated version of the API above so the type is automatically set based on your type. You will need to supply an extra + * template specialization. + * + * \code{.unparsed} + * namespace yours { struct half {}; } // assume this is your type, define this: + * namespace Ort { + * template<> + * struct TypeToTensorType { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; }; + * } //namespace Ort + * + * std::vector values; + * std::vector dims = {values.size()}; // one dimensional example + * Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); + * // Here we are passing element count -> values.size() + * auto float16_tensor = Ort::Value::CreateTensor(info, values.data(), values.size(), dims.data(), dims.size()); + * + * \endcode + */ +struct Float16_t { + uint16_t value; + constexpr Float16_t() noexcept : value(0) {} + constexpr Float16_t(uint16_t v) noexcept : value(v) {} + constexpr operator uint16_t() const noexcept { return value; } + constexpr bool operator==(const Float16_t& rhs) const noexcept { return value == rhs.value; }; + constexpr bool operator!=(const Float16_t& rhs) const noexcept { return value != rhs.value; }; +}; + +static_assert(sizeof(Float16_t) == sizeof(uint16_t), "Sizes must match"); + +/*! \class Ort::BFloat16_t + * \brief is a structure that represents bfloat16 data. + * \details It is necessary for type dispatching to make use of C++ API + * The type is implicitly convertible to/from uint16_t. + * The size of the structure should align with uint16_t and one can freely cast + * uint16_t buffers to/from Ort::BFloat16_t to feed and retrieve data. + * + * See also code examples for Float16_t above. + */ +struct BFloat16_t { + uint16_t value; + constexpr BFloat16_t() noexcept : value(0) {} + constexpr BFloat16_t(uint16_t v) noexcept : value(v) {} + constexpr operator uint16_t() const noexcept { return value; } + constexpr bool operator==(const BFloat16_t& rhs) const noexcept { return value == rhs.value; }; + constexpr bool operator!=(const BFloat16_t& rhs) const noexcept { return value != rhs.value; }; +}; + +static_assert(sizeof(BFloat16_t) == sizeof(uint16_t), "Sizes must match"); + // This is used internally by the C++ API. This is the common base class used by the wrapper objects. template struct Base { @@ -252,7 +323,6 @@ struct SessionOptions : Base { SessionOptions& AddConfigEntry(const char* config_key, const char* config_value); SessionOptions& AddInitializer(const char* name, const OrtValue* ort_val); OrtStatus* OrtSessionOptionsAppendExecutionProvider_CUDA(OrtSessionOptions* options, OrtCUDAProviderOptions* cuda_options); - }; struct ModelMetadata : Base { @@ -518,8 +588,7 @@ template struct CustomOpBase : OrtCustomOp { CustomOpBase() { OrtCustomOp::version = ORT_API_VERSION; - OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* api, const OrtKernelInfo* info) { - return static_cast(this_)->CreateKernel(*api, info); }; + OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* api, const OrtKernelInfo* info) { return static_cast(this_)->CreateKernel(*api, info); }; OrtCustomOp::GetName = [](const OrtCustomOp* this_) { return static_cast(this_)->GetName(); }; OrtCustomOp::GetExecutionProviderType = [](const OrtCustomOp* this_) { return static_cast(this_)->GetExecutionProviderType(); }; diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index e0fe933d9d..0172dc7867 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -27,6 +27,10 @@ template struct TypeToTensorType; template <> struct TypeToTensorType { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; }; +template<> +struct TypeToTensorType { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; }; +template<> +struct TypeToTensorType { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16; }; template <> struct TypeToTensorType { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; }; template <> diff --git a/onnxruntime/test/framework/data_types_test.cc b/onnxruntime/test/framework/data_types_test.cc index b764062037..222271c6e3 100644 --- a/onnxruntime/test/framework/data_types_test.cc +++ b/onnxruntime/test/framework/data_types_test.cc @@ -9,6 +9,9 @@ #include "core/graph/onnx_protobuf.h" #include "gtest/gtest.h" +#include "core/util/math.h" +#include + #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wignored-qualifiers" @@ -433,7 +436,7 @@ TEST_F(DataTypeTest, BFloat16Test) { FloatToBFloat16(sample, converted, sizeof(sample) / sizeof(float)); for (size_t i = 0; i < sizeof(sample) / sizeof(float); ++i) { const double diff = std::fabs(sample[i] - converted[i].ToFloat()); - if (diff > FLT_EPSILON || (std::isnan(diff) && !std::isnan(sample[i]))) { + if ((std::isnan(diff) && !std::isnan(sample[i])) || diff > FLT_EPSILON) { EXPECT_TRUE(false); } } @@ -442,7 +445,7 @@ TEST_F(DataTypeTest, BFloat16Test) { BFloat16ToFloat(converted, back_converted, sizeof(sample) / sizeof(float)); for (size_t i = 0; i < sizeof(sample) / sizeof(float); ++i) { const double diff = std::fabs(sample[i] - back_converted[i]); - if (diff > FLT_EPSILON || (std::isnan(diff) && !std::isnan(sample[i]))) { + if ((std::isnan(diff) && !std::isnan(sample[i])) || diff > FLT_EPSILON) { EXPECT_TRUE(false); } } diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index 4108176e83..12524be636 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -780,7 +780,7 @@ TEST(CApiTest, create_tensor_with_data) { std::vector dims = {4}; Ort::Value tensor = Ort::Value::CreateTensor(info, values, values_length, dims.data(), dims.size()); - float* new_pointer = tensor.GetTensorMutableData(); + const float* new_pointer = tensor.GetTensorData(); ASSERT_EQ(new_pointer, values); auto type_info = tensor.GetTypeInfo(); @@ -790,6 +790,52 @@ TEST(CApiTest, create_tensor_with_data) { ASSERT_EQ(1u, tensor_info.GetDimensionsCount()); } +TEST(CApiTest, create_tensor_with_data_float16) { + // Example with C++. However, what we are feeding underneath is really + // a continuous buffer of uint16_t + // Use 3rd party libraries such as Eigen to convert floats and doubles to float16 types. + Ort::Float16_t values[] = { 15360, 16384, 16896, 17408, 17664}; // 1.f, 2.f, 3.f, 4.f, 5.f + constexpr size_t values_length = sizeof(values) / sizeof(values[0]); + + std::vector dims = {values_length}; + Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); + + Ort::Value tensor = Ort::Value::CreateTensor(info, values, values_length, dims.data(), dims.size()); + const auto* new_pointer = tensor.GetTensorData(); + ASSERT_EQ(new_pointer, values); + auto type_info = tensor.GetTypeInfo(); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + ASSERT_NE(tensor_info, nullptr); + ASSERT_EQ(1u, tensor_info.GetDimensionsCount()); + ASSERT_EQ(tensor_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16); + + Ort::Float16_t value_at_1 = tensor.At({1}); + ASSERT_EQ(values[1], value_at_1); +} + +TEST(CApiTest, create_tensor_with_data_bfloat16) { + // Example with C++. However, what we are feeding underneath is really + // a continuous buffer of uint16_t + // Conversion from float to bfloat16 is simple. Strip off half of the bytes from float. + Ort::BFloat16_t values[] = {16256, 16384, 16448, 16512, 16544}; // 1.f, 2.f, 3.f, 4.f, 5.f + constexpr size_t values_length = sizeof(values) / sizeof(values[0]); + std::vector dims = {values_length}; + + Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); + + Ort::Value tensor = Ort::Value::CreateTensor(info, values, values_length, dims.data(), dims.size()); + const auto* new_pointer = tensor.GetTensorData(); + ASSERT_EQ(new_pointer, values); + auto type_info = tensor.GetTypeInfo(); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + ASSERT_NE(tensor_info, nullptr); + ASSERT_EQ(1u, tensor_info.GetDimensionsCount()); + ASSERT_EQ(tensor_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16); + + Ort::BFloat16_t value_at_1 = tensor.At({1}); + ASSERT_EQ(values[1], value_at_1); +} + TEST(CApiTest, access_tensor_data_elements) { /** * Create a 2x3 data blob that looks like: