Add tag types for Ort::Float16_t and Ort:Bfloat16_t structs (#5716)

Add tag types for Ort::Float16_t and Ort:Bfloat16_t structs that contain uint16_t values for float16 and bfloat16. These will serve as type dispatching types for C++ API. They are of uint16_t size and arrays of these types can be used to create Tensors of the corresponding types. Make documentation Doxygen compliant.
2026-07-26 19:52:38 +00:00 · 2020-11-06 16:41:26 -08:00 · 2020-11-06 16:41:26 -08:00 · 2bf5046d4e
commit 2bf5046d4e
parent fff85a6a35
5 changed files with 129 additions and 7 deletions
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@ -90,7 +90,7 @@ extern "C" {
 #endif

 // Copied from TensorProto::DataType
-// Currently, Ort doesn't support complex64, complex128, bfloat16 types
+// Currently, Ort doesn't support complex64, complex128
 typedef enum ONNXTensorElementDataType {
  ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED,
  ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,   // maps to c type float
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@ -99,6 +99,77 @@ ORT_DEFINE_RELEASE(ModelMetadata);
 ORT_DEFINE_RELEASE(ThreadingOptions);
 ORT_DEFINE_RELEASE(IoBinding);

+/*! \class Ort::Float16_t
+  * \brief it is a structure that represents float16 data.
+  * \details It is necessary for type dispatching to make use of C++ API
+  * The type is implicitly convertible to/from uint16_t.
+  * The size of the structure should align with uint16_t and one can freely cast
+  * uint16_t buffers to/from Ort::Float16_t to feed and retrieve data.
+  * 
+  * Generally, you can feed any of your types as float16/blfoat16 data to create a tensor
+  * on top of it, providing it can form a continuous buffer with 16-bit elements with no padding.
+  * And you can also feed a array of uint16_t elements directly. For example,
+  * 
+  * \code{.unparsed}
+  * uint16_t values[] = { 15360, 16384, 16896, 17408, 17664};
+  * constexpr size_t values_length = sizeof(values) / sizeof(values[0]);
+  * std::vector<int64_t> dims = {values_length};  // one dimensional example
+  * Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+  * // Note we are passing bytes count in this api, not number of elements -> sizeof(values)
+  * auto float16_tensor = Ort::Value::CreateTensor(info, values, sizeof(values), 
+  *                                                dims.data(), dims.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
+  * \endcode
+  * 
+  * Here is another example, a little bit more elaborate. Let's assume that you use your own float16 type and you want to use
+  * a templated version of the API above so the type is automatically set based on your type. You will need to supply an extra
+  * template specialization.
+  * 
+  * \code{.unparsed}
+  * namespace yours { struct half {}; } // assume this is your type, define this:
+  * namespace Ort { 
+  * template<>
+  * struct TypeToTensorType<yours::half> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; };
+  * } //namespace Ort
+  * 
+  * std::vector<yours::half> values;
+  * std::vector<int64_t> dims = {values.size()}; // one dimensional example
+  * Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+  * // Here we are passing element count -> values.size()
+  * auto float16_tensor = Ort::Value::CreateTensor<yours::half>(info, values.data(), values.size(), dims.data(), dims.size());
+  * 
+  *  \endcode
+  */
+struct Float16_t {
+  uint16_t value;
+  constexpr Float16_t() noexcept : value(0) {}
+  constexpr Float16_t(uint16_t v) noexcept : value(v) {}
+  constexpr operator uint16_t() const noexcept { return value; }
+  constexpr bool operator==(const Float16_t& rhs) const noexcept { return value == rhs.value; };
+  constexpr bool operator!=(const Float16_t& rhs) const noexcept { return value != rhs.value; };
+};
+
+static_assert(sizeof(Float16_t) == sizeof(uint16_t), "Sizes must match");
+
+/*! \class Ort::BFloat16_t
+  * \brief is a structure that represents bfloat16 data.
+  * \details It is necessary for type dispatching to make use of C++ API
+  * The type is implicitly convertible to/from uint16_t.
+  * The size of the structure should align with uint16_t and one can freely cast
+  * uint16_t buffers to/from Ort::BFloat16_t to feed and retrieve data.
+  * 
+  * See also code examples for Float16_t above.
+  */
+struct BFloat16_t {
+  uint16_t value;
+  constexpr BFloat16_t() noexcept : value(0) {}
+  constexpr BFloat16_t(uint16_t v) noexcept : value(v) {}
+  constexpr operator uint16_t() const noexcept { return value; }
+  constexpr bool operator==(const BFloat16_t& rhs) const noexcept { return value == rhs.value; };
+  constexpr bool operator!=(const BFloat16_t& rhs) const noexcept { return value != rhs.value; };
+};
+
+static_assert(sizeof(BFloat16_t) == sizeof(uint16_t), "Sizes must match");
+
 // This is used internally by the C++ API. This is the common base class used by the wrapper objects.
 template <typename T>
 struct Base {
@ -252,7 +323,6 @@ struct SessionOptions : Base<OrtSessionOptions> {
  SessionOptions& AddConfigEntry(const char* config_key, const char* config_value);
  SessionOptions& AddInitializer(const char* name, const OrtValue* ort_val);
  OrtStatus* OrtSessionOptionsAppendExecutionProvider_CUDA(OrtSessionOptions* options, OrtCUDAProviderOptions* cuda_options);
-
 };

 struct ModelMetadata : Base<OrtModelMetadata> {
@ -518,8 +588,7 @@ template <typename TOp, typename TKernel>
 struct CustomOpBase : OrtCustomOp {
  CustomOpBase() {
    OrtCustomOp::version = ORT_API_VERSION;
-    OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* api, const OrtKernelInfo* info) {
-      return static_cast<const TOp*>(this_)->CreateKernel(*api, info); };
+    OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* api, const OrtKernelInfo* info) { return static_cast<const TOp*>(this_)->CreateKernel(*api, info); };
    OrtCustomOp::GetName = [](const OrtCustomOp* this_) { return static_cast<const TOp*>(this_)->GetName(); };

    OrtCustomOp::GetExecutionProviderType = [](const OrtCustomOp* this_) { return static_cast<const TOp*>(this_)->GetExecutionProviderType(); };
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@ -27,6 +27,10 @@ template <typename T>
 struct TypeToTensorType;
 template <>
 struct TypeToTensorType<float> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; };
+template<>
+struct TypeToTensorType<Float16_t> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; };
+template<>
+struct TypeToTensorType<BFloat16_t> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16; };
 template <>
 struct TypeToTensorType<double> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; };
 template <>
--- a/onnxruntime/test/framework/data_types_test.cc
+++ b/onnxruntime/test/framework/data_types_test.cc
@ -9,6 +9,9 @@
 #include "core/graph/onnx_protobuf.h"
 #include "gtest/gtest.h"

+#include "core/util/math.h"
+#include <ostream>
+
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wignored-qualifiers"
@ -433,7 +436,7 @@ TEST_F(DataTypeTest, BFloat16Test) {
    FloatToBFloat16(sample, converted, sizeof(sample) / sizeof(float));
    for (size_t i = 0; i < sizeof(sample) / sizeof(float); ++i) {
      const double diff = std::fabs(sample[i] - converted[i].ToFloat());
-      if (diff > FLT_EPSILON || (std::isnan(diff) && !std::isnan(sample[i]))) {
+      if ((std::isnan(diff) && !std::isnan(sample[i])) || diff > FLT_EPSILON) {
        EXPECT_TRUE(false);
      }
    }
@ -442,7 +445,7 @@ TEST_F(DataTypeTest, BFloat16Test) {
    BFloat16ToFloat(converted, back_converted, sizeof(sample) / sizeof(float));
    for (size_t i = 0; i < sizeof(sample) / sizeof(float); ++i) {
      const double diff = std::fabs(sample[i] - back_converted[i]);
-      if (diff > FLT_EPSILON || (std::isnan(diff) && !std::isnan(sample[i]))) {
+      if ((std::isnan(diff) && !std::isnan(sample[i])) || diff > FLT_EPSILON) {
        EXPECT_TRUE(false);
      }
    }
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@ -780,7 +780,7 @@ TEST(CApiTest, create_tensor_with_data) {
  std::vector<int64_t> dims = {4};
  Ort::Value tensor = Ort::Value::CreateTensor<float>(info, values, values_length, dims.data(), dims.size());

-  float* new_pointer = tensor.GetTensorMutableData<float>();
+  const float* new_pointer = tensor.GetTensorData<float>();
  ASSERT_EQ(new_pointer, values);

  auto type_info = tensor.GetTypeInfo();
@ -790,6 +790,52 @@ TEST(CApiTest, create_tensor_with_data) {
  ASSERT_EQ(1u, tensor_info.GetDimensionsCount());
 }

+TEST(CApiTest, create_tensor_with_data_float16) {
+  // Example with C++. However, what we are feeding underneath is really
+  // a continuous buffer of uint16_t
+  // Use 3rd party libraries such as Eigen to convert floats and doubles to float16 types.
+  Ort::Float16_t values[] = { 15360, 16384, 16896, 17408, 17664}; // 1.f, 2.f, 3.f, 4.f, 5.f
+  constexpr size_t values_length = sizeof(values) / sizeof(values[0]);
+
+  std::vector<int64_t> dims = {values_length};
+  Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+
+  Ort::Value tensor = Ort::Value::CreateTensor<Ort::Float16_t>(info, values, values_length, dims.data(), dims.size());
+  const auto* new_pointer = tensor.GetTensorData<Ort::Float16_t>();
+  ASSERT_EQ(new_pointer, values);
+  auto type_info = tensor.GetTypeInfo();
+  auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+  ASSERT_NE(tensor_info, nullptr);
+  ASSERT_EQ(1u, tensor_info.GetDimensionsCount());
+  ASSERT_EQ(tensor_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16);
+
+  Ort::Float16_t value_at_1 = tensor.At<Ort::Float16_t>({1});
+  ASSERT_EQ(values[1], value_at_1);
+}
+
+TEST(CApiTest, create_tensor_with_data_bfloat16) {
+  // Example with C++. However, what we are feeding underneath is really
+  // a continuous buffer of uint16_t
+  // Conversion from float to bfloat16 is simple. Strip off half of the bytes from float.
+  Ort::BFloat16_t values[] =  {16256, 16384, 16448, 16512, 16544}; // 1.f, 2.f, 3.f, 4.f, 5.f
+  constexpr size_t values_length = sizeof(values) / sizeof(values[0]);
+  std::vector<int64_t> dims = {values_length};
+
+  Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+
+  Ort::Value tensor = Ort::Value::CreateTensor<Ort::BFloat16_t>(info, values, values_length, dims.data(), dims.size());
+  const auto* new_pointer = tensor.GetTensorData<Ort::BFloat16_t>();
+  ASSERT_EQ(new_pointer, values);
+  auto type_info = tensor.GetTypeInfo();
+  auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+  ASSERT_NE(tensor_info, nullptr);
+  ASSERT_EQ(1u, tensor_info.GetDimensionsCount());
+  ASSERT_EQ(tensor_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16);
+
+  Ort::BFloat16_t value_at_1 = tensor.At<Ort::BFloat16_t>({1});
+  ASSERT_EQ(values[1], value_at_1);
+}
+
 TEST(CApiTest, access_tensor_data_elements) {
  /**
   * Create a 2x3 data blob that looks like: