Reduce tensorprotoutils binary size (#6634)

* Move type agnostic code out of UnpackInitializerData Refactor the unpack tensor logic to switch on data size Add test cases * Remove templatization of more parts
2026-07-27 20:02:15 +00:00 · 2021-02-12 16:48:13 +10:00 · 2021-02-12 16:48:13 +10:00 · 1916e35bea
commit 1916e35bea
parent fba46a76bc
7 changed files with 345 additions and 224 deletions
--- a/onnxruntime/core/framework/endian_utils.cc
+++ b/onnxruntime/core/framework/endian_utils.cc
@ -26,9 +26,9 @@ OutputIt ReverseCopy(BidirIt first, BidirIt last, OutputIt d_first) {

 }  // namespace

-void SwapByteOrderCopy(
-    size_t element_size_in_bytes,
-    gsl::span<const char> source_bytes, gsl::span<char> destination_bytes) {
+void SwapByteOrderCopy(size_t element_size_in_bytes,
+                       gsl::span<const unsigned char> source_bytes,
+                       gsl::span<unsigned char> destination_bytes) {
  assert(element_size_in_bytes > 0);
  assert(source_bytes.size_bytes() % element_size_in_bytes == 0);
  assert(source_bytes.size_bytes() == destination_bytes.size_bytes());
@ -40,28 +40,38 @@ void SwapByteOrderCopy(
  for (size_t element_offset = 0, element_offset_end = source_bytes.size_bytes();
       element_offset < element_offset_end;
       element_offset += element_size_in_bytes) {
-    const auto source_element_bytes =
-        source_bytes.subspan(element_offset, element_size_in_bytes);
-    const auto dest_element_bytes =
-        destination_bytes.subspan(element_offset, element_size_in_bytes);
-    ReverseCopy(
-        source_element_bytes.data(),
-        source_element_bytes.data() + source_element_bytes.size_bytes(),
-        dest_element_bytes.data());
+    const auto source_element_bytes = source_bytes.subspan(element_offset, element_size_in_bytes);
+    const auto dest_element_bytes = destination_bytes.subspan(element_offset, element_size_in_bytes);
+    ReverseCopy(source_element_bytes.data(),
+                source_element_bytes.data() + source_element_bytes.size_bytes(),
+                dest_element_bytes.data());
  }
 }

 namespace detail {

-void CopyLittleEndian(size_t element_size_in_bytes, gsl::span<const char> source_bytes, gsl::span<char> destination_bytes) {
+Status CopyLittleEndian(size_t element_size_in_bytes,
+                        gsl::span<const unsigned char> source_bytes,
+                        gsl::span<unsigned char> destination_bytes) {
+  ORT_RETURN_IF(source_bytes.size_bytes() != destination_bytes.size_bytes(),
+                "source and destination buffer size mismatch");
+
  if (endian::native == endian::little) {
    std::memcpy(destination_bytes.data(), source_bytes.data(), source_bytes.size_bytes());
  } else {
    SwapByteOrderCopy(element_size_in_bytes, source_bytes, destination_bytes);
  }
+
+  return Status::OK();
 }

 }  // namespace detail

+common::Status ReadLittleEndian(size_t element_size,
+                                gsl::span<const unsigned char> source_bytes,
+                                gsl::span<unsigned char> destination_bytes) {
+  return detail::CopyLittleEndian(element_size, source_bytes, destination_bytes);
+}
+
 }  // namespace utils
 }  // namespace onnxruntime
--- a/onnxruntime/core/framework/endian_utils.h
+++ b/onnxruntime/core/framework/endian_utils.h
@ -27,8 +27,9 @@ namespace utils {
 * @param source_bytes The source byte span.
 * @param destination_bytes The destination byte span.
 */
-void SwapByteOrderCopy(
-    size_t element_size_in_bytes, gsl::span<const char> source_bytes, gsl::span<char> destination_bytes);
+void SwapByteOrderCopy(size_t element_size_in_bytes,
+                       gsl::span<const unsigned char> source_bytes,
+                       gsl::span<unsigned char> destination_bytes);

 namespace detail {

@ -36,43 +37,45 @@ namespace detail {
 * Copies between two buffers where one is little-endian and the other has
 * native endian-ness.
 */
-void CopyLittleEndian(
-    size_t element_size_in_bytes, gsl::span<const char> source_bytes, gsl::span<char> destination_bytes);
+Status CopyLittleEndian(size_t element_size_in_bytes,
+                        gsl::span<const unsigned char> source_bytes,
+                        gsl::span<unsigned char> destination_bytes);

 }  // namespace detail

 /**
 * Reads from a little-endian source.
 */
+common::Status ReadLittleEndian(size_t element_size,
+                                gsl::span<const unsigned char> source_bytes,
+                                gsl::span<unsigned char> destination_bytes);
+
+/**
+ * Reads from a little-endian source with check that T is trivially copyable.
+ * @remarks Check is skipped for if building with gcc v4
+ */
 template <typename T>
-common::Status ReadLittleEndian(gsl::span<const char> source_bytes, gsl::span<T> destination) {
+common::Status ReadLittleEndian(gsl::span<const unsigned char> source_bytes, gsl::span<T> destination) {
 // std::is_trivially_copyable is not implemented in older versions of GCC
 #if !defined(__GNUC__) || __GNUC__ >= 5
  static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
 #endif
-  ORT_RETURN_IF_NOT(source_bytes.size_bytes() == destination.size_bytes(),
-                    "source and destination buffer size mismatch");
-  const auto destination_bytes = gsl::make_span(
-      reinterpret_cast<char*>(destination.data()), destination.size_bytes());
-  detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes);
-  return common::Status::OK();
+  const auto destination_bytes = gsl::make_span(reinterpret_cast<unsigned char*>(destination.data()),
+                                                destination.size_bytes());
+  return ReadLittleEndian(sizeof(T), source_bytes, destination_bytes);
 }

 /**
 * Writes to a little-endian destination.
 */
 template <typename T>
-common::Status WriteLittleEndian(gsl::span<const T> source, gsl::span<char> destination_bytes) {
+common::Status WriteLittleEndian(gsl::span<const T> source, gsl::span<unsigned char> destination_bytes) {
 // std::is_trivially_copyable is not implemented in older versions of GCC
 #if !defined(__GNUC__) || __GNUC__ >= 5
  static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
 #endif
-  ORT_RETURN_IF_NOT(source.size_bytes() == destination_bytes.size_bytes(),
-                    "source and destination buffer size mismatch");
-  const auto source_bytes = gsl::make_span(
-      reinterpret_cast<const char*>(source.data()), source.size_bytes());
-  detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes);
-  return common::Status::OK();
+  const auto source_bytes = gsl::make_span(reinterpret_cast<const unsigned char*>(source.data()), source.size_bytes());
+  return detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes);
 }

 }  // namespace utils
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@ -97,29 +97,44 @@ std::vector<int64_t> GetTensorShapeFromTensorProto(const ONNX_NAMESPACE::TensorP
 }

 // This function doesn't support string tensors
-template <typename T>
-static Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_length, size_t expected_size,
-                                      /*out*/ T* p_data) {
+static Status UnpackTensorWithRawDataImpl(const void* raw_data, size_t raw_data_len,
+                                          size_t expected_num_elements, size_t element_size,
+                                          /*out*/ unsigned char* p_data) {
+  auto src = gsl::make_span<const unsigned char>(static_cast<const unsigned char*>(raw_data), raw_data_len);
+  auto dst = gsl::make_span<unsigned char>(p_data, expected_num_elements * element_size);
+
  size_t expected_size_in_bytes;
-  if (!onnxruntime::IAllocator::CalcMemSizeForArray(expected_size, sizeof(T), &expected_size_in_bytes)) {
+  if (!onnxruntime::IAllocator::CalcMemSizeForArray(expected_num_elements, element_size, &expected_size_in_bytes)) {
    return Status(onnxruntime::common::ONNXRUNTIME, onnxruntime::common::INVALID_ARGUMENT, "size overflow");
  }
-  if (raw_data_length != expected_size_in_bytes)
+
+  if (dst.size_bytes() != expected_size_in_bytes) {
    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                           "UnpackTensor: the pre-allocated size does not match the raw data size, expected ",
-                           expected_size_in_bytes, ", got ", raw_data_length);
+                           expected_size_in_bytes, ", got ", dst.size_bytes());
+  }

-  const char* const raw_data_bytes = reinterpret_cast<const char*>(raw_data);
-  ORT_RETURN_IF_ERROR(onnxruntime::utils::ReadLittleEndian(
-      gsl::make_span(raw_data_bytes, raw_data_length), gsl::make_span(p_data, expected_size)));
-  return Status::OK();
+  // ReadLittleEndian checks src and dst buffers are the same size
+  return onnxruntime::utils::ReadLittleEndian(element_size, src, dst);
+}
+
+template <typename T>
+Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_len, size_t expected_num_elements,
+                               /*out*/ T* p_data) {
+  // std::is_trivially_copyable is not implemented in older versions of GCC
+#if !defined(__GNUC__) || __GNUC__ >= 5
+  static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
+#endif
+
+  return UnpackTensorWithRawDataImpl(raw_data, raw_data_len, expected_num_elements, sizeof(T),
+                                     reinterpret_cast<unsigned char*>(p_data));
 }

 static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                  const ORTCHAR_T* tensor_proto_dir,
                                  std::basic_string<ORTCHAR_T>& external_file_path,
                                  onnxruntime::FileOffsetType& file_offset,
-                                  SafeInt<size_t>& tensor_data_length) {
+                                  SafeInt<size_t>& tensor_byte_size) {
  ORT_RETURN_IF_NOT(onnxruntime::utils::HasExternalData(tensor_proto),
                    "Tensor does not have external data to read from.");

@ -137,16 +152,12 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot

  file_offset = external_data_info->GetOffset();

-  ORT_RETURN_IF_ERROR(onnxruntime::utils::GetSizeInBytesFromTensorProto<0>(
-      tensor_proto, &tensor_data_length));
+  ORT_RETURN_IF_ERROR(onnxruntime::utils::GetSizeInBytesFromTensorProto<0>(tensor_proto, &tensor_byte_size));
  const size_t external_data_length = external_data_info->GetLength();

-  ORT_RETURN_IF_NOT(
-      external_data_length == 0 ||
-          external_data_length == tensor_data_length,
-      "TensorProto external data size mismatch. ",
-      "Computed size: ", *&tensor_data_length,
-      ", external_data.length: ", external_data_length);
+  ORT_RETURN_IF_NOT(external_data_length == 0 || external_data_length == tensor_byte_size,
+                    "TensorProto external data size mismatch. Computed size: ", *&tensor_byte_size,
+                    ", external_data.length: ", external_data_length);

  return Status::OK();
 }
@ -157,8 +168,8 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
 // This function does not unpack string_data of an initializer tensor
 static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                        const ORTCHAR_T* tensor_proto_dir,
-                                        std::unique_ptr<uint8_t[]>& unpacked_tensor,
-                                        SafeInt<size_t>& tensor_data_length) {
+                                        std::unique_ptr<unsigned char[]>& unpacked_tensor,
+                                        SafeInt<size_t>& tensor_byte_size) {
  std::basic_string<ORTCHAR_T> external_file_path;
  onnxruntime::FileOffsetType file_offset;
  ORT_RETURN_IF_ERROR(GetExternalDataInfo(
@ -166,14 +177,14 @@ static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tenso
      tensor_proto_dir,
      external_file_path,
      file_offset,
-      tensor_data_length));
+      tensor_byte_size));

-  unpacked_tensor.reset(new uint8_t[*&tensor_data_length]);
+  unpacked_tensor.reset(new unsigned char[*&tensor_byte_size]);
  ORT_RETURN_IF_ERROR(onnxruntime::Env::Default().ReadFileIntoBuffer(
      external_file_path.c_str(),
      file_offset,
-      tensor_data_length,
-      gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.get()), tensor_data_length)));
+      tensor_byte_size,
+      gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.get()), tensor_byte_size)));

  return Status::OK();
 }
@ -182,91 +193,108 @@ static Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tenso
 namespace onnxruntime {
 namespace utils {
 #if !defined(ORT_MINIMAL_BUILD)
-#define DEFINE_UNPACK_EXTERNAL_TENSOR(T)                                                                                   \
-  template <>                                                                                                              \
-  Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,                                           \
-                                      const ORTCHAR_T* tensor_proto_dir, size_t expected_size,                             \
-                                      /*out*/ T* p_data) {                                                                 \
-    ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");                                                                 \
-                                                                                                                           \
-    std::unique_ptr<uint8_t[]> unpacked_tensor;                                                                            \
-    SafeInt<size_t> tensor_byte_size = 0;                                                                                  \
-    ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(                                                                         \
-        tensor,                                                                                                            \
-        tensor_proto_dir,                                                                                                  \
-        unpacked_tensor,                                                                                                   \
-        tensor_byte_size));                                                                                                \
-                                                                                                                           \
-    size_t element_count = tensor_byte_size / sizeof(T);                                                                   \
-    ORT_RETURN_IF_NOT(expected_size == element_count, "Expected data size does not match the actual external data size."); \
-    ORT_RETURN_IF_ERROR(onnxruntime::utils::ReadLittleEndian(                                                              \
-        gsl::make_span(reinterpret_cast<char*>(unpacked_tensor.get()), tensor_byte_size),                                  \
-        gsl::make_span(p_data, expected_size)));                                                                           \
-                                                                                                                           \
-    return Status::OK();                                                                                                   \
-  }
+static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto& tensor,
+                                               const ORTCHAR_T* tensor_proto_dir,
+                                               size_t expected_num_elements, size_t element_size,
+                                               /*out*/ unsigned char* p_data) {
+  ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");

-DEFINE_UNPACK_EXTERNAL_TENSOR(float)
-DEFINE_UNPACK_EXTERNAL_TENSOR(double)
-DEFINE_UNPACK_EXTERNAL_TENSOR(uint8_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(int8_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(int16_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(uint16_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(int32_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(int64_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(uint64_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(uint32_t)
-DEFINE_UNPACK_EXTERNAL_TENSOR(bool)
-DEFINE_UNPACK_EXTERNAL_TENSOR(MLFloat16)
-DEFINE_UNPACK_EXTERNAL_TENSOR(BFloat16)
+  std::unique_ptr<unsigned char[]> unpacked_tensor;
+  SafeInt<size_t> tensor_byte_size = 0;
+  ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor, tensor_byte_size));
+
+  // ReadLittleEndian checks src and dst buffers are the same size
+  auto src_span = gsl::make_span(unpacked_tensor.get(), tensor_byte_size);
+  auto dst_span = gsl::make_span(p_data, expected_num_elements * element_size);
+
+  return onnxruntime::utils::ReadLittleEndian(element_size, src_span, dst_span);
+}
+
+template <typename T>
+Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,
+                                    const ORTCHAR_T* tensor_proto_dir, size_t expected_num_elements,
+                                    /*out*/ T* p_data) {
+  // std::is_trivially_copyable is not implemented in older versions of GCC
+#if !defined(__GNUC__) || __GNUC__ >= 5
+  static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
+#endif
+
+  return UnpackTensorWithExternalDataImpl(tensor, tensor_proto_dir, expected_num_elements, sizeof(T),
+                                          reinterpret_cast<unsigned char*>(p_data));
+}
+
+#define INSTANTIATE_UNPACK_EXTERNAL_TENSOR(type) \
+  template Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto&, const ORTCHAR_T*, size_t, type*);
+
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(float)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(double)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint8_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int8_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int16_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint16_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int32_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(int64_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint64_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(uint32_t)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(bool)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(MLFloat16)
+INSTANTIATE_UNPACK_EXTERNAL_TENSOR(BFloat16)

 template <>
 Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& /*tensor*/,
-                                    const ORTCHAR_T* /*tensor_proto_dir*/, size_t /*expected_size*/,
+                                    const ORTCHAR_T* /*tensor_proto_dir*/, size_t /*expected_num_elements*/,
                                    /*out*/ std::string* /*p_data*/) {
-  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                         "External data type cannot be STRING.");
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "External data type cannot be STRING.");
 }
 #endif  //!defined(ORT_MINIMAL_BUILD)

-// This macro doesn't work for Float16/bool/string tensors
-#define DEFINE_UNPACK_TENSOR(T, Type, field_name, field_size)                                                      \
-  template <>                                                                                                      \
-  Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,        \
-                      /*out*/ T* p_data, size_t expected_size) {                                                   \
-    if (nullptr == p_data) {                                                                                       \
-      const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size();                                \
-      if (size == 0) return Status::OK();                                                                          \
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                                \
-    }                                                                                                              \
-    if (nullptr == p_data || Type != tensor.data_type()) {                                                         \
-      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                                \
-    }                                                                                                              \
-    if (raw_data != nullptr) {                                                                                     \
-      return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);                               \
-    }                                                                                                              \
-    if (static_cast<size_t>(tensor.field_size()) != expected_size)                                                 \
-      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "corrupted protobuf data: tensor shape size(",         \
-                             expected_size, ") does not match the data size(", tensor.field_size(), ") in proto"); \
-    auto& data = tensor.field_name();                                                                              \
-    for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter)                                    \
-      *p_data++ = *reinterpret_cast<const T*>(data_iter);                                                          \
-    return Status::OK();                                                                                           \
+// implementation of type specific unpack of data contained within the TensorProto
+template <typename T>
+Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                    /*out*/ T* p_data, size_t expected_num_elements);
+
+#define DEFINE_UNPACK_TENSOR_IMPL(T, Type, field_name, field_size)                                          \
+  template <>                                                                                               \
+  Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, \
+                      /*out*/ T* p_data, size_t expected_num_elements) {                                    \
+    if (nullptr == p_data) {                                                                                \
+      const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size();                         \
+      if (size == 0) return Status::OK();                                                                   \
+      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                         \
+    }                                                                                                       \
+    if (nullptr == p_data || Type != tensor.data_type()) {                                                  \
+      return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                         \
+    }                                                                                                       \
+    if (raw_data != nullptr) {                                                                              \
+      return UnpackTensorWithRawData(raw_data, raw_data_len, expected_num_elements, p_data);                \
+    }                                                                                                       \
+    if (static_cast<size_t>(tensor.field_size()) != expected_num_elements)                                  \
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,                                                 \
+                             "corrupted protobuf data: tensor shape size(", expected_num_elements,          \
+                             ") does not match the data size(", tensor.field_size(), ") in proto");         \
+    auto& data = tensor.field_name();                                                                       \
+    for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter)                             \
+      *p_data++ = *reinterpret_cast<const T*>(data_iter);                                                   \
+    return Status::OK();                                                                                    \
  }

 // TODO: complex64 complex128
-DEFINE_UNPACK_TENSOR(float, ONNX_NAMESPACE::TensorProto_DataType_FLOAT, float_data, float_data_size)
-DEFINE_UNPACK_TENSOR(double, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, double_data, double_data_size);
-DEFINE_UNPACK_TENSOR(uint8_t, ONNX_NAMESPACE::TensorProto_DataType_UINT8, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int8_t, ONNX_NAMESPACE::TensorProto_DataType_INT8, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int16_t, ONNX_NAMESPACE::TensorProto_DataType_INT16, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(uint16_t, ONNX_NAMESPACE::TensorProto_DataType_UINT16, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int32_t, ONNX_NAMESPACE::TensorProto_DataType_INT32, int32_data, int32_data_size)
-DEFINE_UNPACK_TENSOR(int64_t, ONNX_NAMESPACE::TensorProto_DataType_INT64, int64_data, int64_data_size)
-DEFINE_UNPACK_TENSOR(uint64_t, ONNX_NAMESPACE::TensorProto_DataType_UINT64, uint64_data, uint64_data_size)
-DEFINE_UNPACK_TENSOR(uint32_t, ONNX_NAMESPACE::TensorProto_DataType_UINT32, uint64_data, uint64_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(float, ONNX_NAMESPACE::TensorProto_DataType_FLOAT, float_data, float_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(double, ONNX_NAMESPACE::TensorProto_DataType_DOUBLE, double_data, double_data_size);
+DEFINE_UNPACK_TENSOR_IMPL(uint8_t, ONNX_NAMESPACE::TensorProto_DataType_UINT8, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(int8_t, ONNX_NAMESPACE::TensorProto_DataType_INT8, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(int16_t, ONNX_NAMESPACE::TensorProto_DataType_INT16, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(uint16_t, ONNX_NAMESPACE::TensorProto_DataType_UINT16, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(int32_t, ONNX_NAMESPACE::TensorProto_DataType_INT32, int32_data, int32_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(int64_t, ONNX_NAMESPACE::TensorProto_DataType_INT64, int64_data, int64_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(uint64_t, ONNX_NAMESPACE::TensorProto_DataType_UINT64, uint64_data, uint64_data_size)
+DEFINE_UNPACK_TENSOR_IMPL(uint32_t, ONNX_NAMESPACE::TensorProto_DataType_UINT32, uint64_data, uint64_data_size)

-// doesn't support raw data
+//
+// Specializations of UnpackTensor that need custom handling for the input type
+//
+
+// UnpackTensor<std::string>. Note: doesn't support raw data
 template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* /*raw_data*/, size_t /*raw_data_len*/,
                    /*out*/ std::string* p_data, size_t expected_size) {
@ -289,6 +317,8 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* /*raw

  return Status::OK();
 }
+
+// UnpackTensor<bool>
 template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
                    /*out*/ bool* p_data, size_t expected_size) {
@ -314,6 +344,8 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d

  return Status::OK();
 }
+
+// UnpackTensor<MLFloat16>
 template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
                    /*out*/ MLFloat16* p_data, size_t expected_size) {
@ -346,6 +378,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
  return Status::OK();
 }

+// UnpackTensor<BFloat16>
 template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
                    /*out*/ BFloat16* p_data, size_t expected_size) {
@ -380,6 +413,49 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
  return Status::OK();
 }

+// UnpackTensor from raw data, external data or the type specific data field.
+// Uses the model path to construct the full path for loading external data. In case when model_path is empty
+// it uses current directory.
+template <typename T>
+Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
+                    /*out*/ T* p_data, size_t expected_num_elements) {
+#if !defined(ORT_MINIMAL_BUILD)
+  if (HasExternalData(tensor)) {
+    return UnpackTensorWithExternalData(
+        tensor,
+        model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
+        expected_num_elements,
+        p_data);
+  }
+#else
+  ORT_UNUSED_PARAMETER(model_path);
+  ORT_RETURN_IF(HasExternalData(tensor), "TensorProto with external data is not supported in ORT minimal build.");
+#endif
+
+  return HasRawData(tensor)
+             ? UnpackTensor(tensor, tensor.raw_data().data(), tensor.raw_data().size(), p_data, expected_num_elements)
+             : UnpackTensor(tensor, nullptr, 0, p_data, expected_num_elements);
+}
+
+// instantiate the UnpackTensor variant that supports external data
+#define INSTANTIATE_UNPACK_TENSOR(type) \
+  template Status UnpackTensor(const ONNX_NAMESPACE::TensorProto&, const Path&, type* p_data, size_t);
+
+INSTANTIATE_UNPACK_TENSOR(float)
+INSTANTIATE_UNPACK_TENSOR(double)
+INSTANTIATE_UNPACK_TENSOR(uint8_t)
+INSTANTIATE_UNPACK_TENSOR(int8_t)
+INSTANTIATE_UNPACK_TENSOR(int16_t)
+INSTANTIATE_UNPACK_TENSOR(uint16_t)
+INSTANTIATE_UNPACK_TENSOR(int32_t)
+INSTANTIATE_UNPACK_TENSOR(int64_t)
+INSTANTIATE_UNPACK_TENSOR(uint64_t)
+INSTANTIATE_UNPACK_TENSOR(uint32_t)
+INSTANTIATE_UNPACK_TENSOR(bool)
+INSTANTIATE_UNPACK_TENSOR(MLFloat16)
+INSTANTIATE_UNPACK_TENSOR(BFloat16)
+INSTANTIATE_UNPACK_TENSOR(std::string)
+
 #define CASE_PROTO_TRACE(X, Y)                                                                     \
  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X:                             \
    if (!IAllocator::CalcMemSizeForArrayWithAlignment<alignment>(size, sizeof(Y), out)) {          \
@ -472,12 +548,6 @@ static void UnInitTensor(void* param) noexcept {
  delete p;
 }

-#define CASE_PROTO(X, Y)                                                                                            \
-  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X:                                              \
-    ORT_RETURN_IF_ERROR(                                                                                            \
-        UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len, (Y*)preallocated, static_cast<size_t>(tensor_size))); \
-    break;
-
 class AutoDelete {
 public:
  OrtCallback d{nullptr, nullptr};
@ -530,6 +600,14 @@ static void MoveOrtCallback(OrtCallback& from, OrtCallback& to) {
  from.f = nullptr;
  from.param = nullptr;
 }
+
+#define CASE_PROTO(X, Y)                                                      \
+  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X:        \
+    ORT_RETURN_IF_ERROR(                                                      \
+        UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len,                 \
+                        (Y*)preallocated, static_cast<size_t>(tensor_size))); \
+    break;
+
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable : 6239)
@ -632,7 +710,8 @@ Status TensorProtoToMLValue(const Env& env, const ORTCHAR_T* model_path,
            deleter.param = new UnInitializeParam{preallocated, preallocated_size, ele_type};
          }
          ORT_RETURN_IF_ERROR(UnpackTensor<std::string>(tensor_proto, raw_data, raw_data_len,
-                                                        (std::string*)preallocated, static_cast<size_t>(tensor_size)));
+                                                        static_cast<std::string*>(preallocated),
+                                                        static_cast<size_t>(tensor_size)));
          break;
        default: {
          std::ostringstream ostr;
@ -1034,41 +1113,42 @@ template common::Status GetSizeInBytesFromTensorProto<kAllocAlignment>(const ONN
                                                                       size_t* out);
 template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out);

-#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE)                                         \
-  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {                \
-    if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {                \
-      ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(                                       \
-          initializer,                                                                     \
-          model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(), \
-          unpacked_tensor,                                                                 \
-          tensor_byte_size));                                                              \
-      tensor_data_length = tensor_byte_size;                                               \
-      return Status::OK();                                                                 \
-    } else {                                                                               \
-      size_t element_count = 0;                                                            \
-      if (initializer.has_raw_data()) {                                                    \
-        tensor_byte_size = initializer.raw_data().size();                                  \
-        element_count = tensor_byte_size / sizeof(ELEMENT_TYPE);                           \
-      } else {                                                                             \
-        element_count = initializer.DATA_SIZE();                                           \
-        tensor_byte_size = element_count * sizeof(ELEMENT_TYPE);                           \
-      }                                                                                    \
-      tensor_data_length = tensor_byte_size;                                               \
-      unpacked_tensor.reset(new uint8_t[tensor_data_length]);                              \
-      return onnxruntime::utils::UnpackTensor(                                             \
-          initializer,                                                                     \
-          initializer.has_raw_data() ? initializer.raw_data().data() : nullptr,            \
-          initializer.has_raw_data() ? initializer.raw_data().size() : 0,                  \
-          reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.get()), element_count);          \
-    }                                                                                      \
-    break;                                                                                 \
+#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE)                              \
+  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {     \
+    size_t element_count = 0;                                                   \
+    if (initializer.has_raw_data()) {                                           \
+      tensor_byte_size = initializer.raw_data().size();                         \
+      element_count = tensor_byte_size / sizeof(ELEMENT_TYPE);                  \
+    } else {                                                                    \
+      element_count = initializer.DATA_SIZE();                                  \
+      tensor_byte_size = element_count * sizeof(ELEMENT_TYPE);                  \
+    }                                                                           \
+    tensor_byte_size_out = tensor_byte_size;                                    \
+    unpacked_tensor.reset(new unsigned char[tensor_byte_size_out]);             \
+    return onnxruntime::utils::UnpackTensor(                                    \
+        initializer,                                                            \
+        initializer.has_raw_data() ? initializer.raw_data().data() : nullptr,   \
+        initializer.has_raw_data() ? initializer.raw_data().size() : 0,         \
+        reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.get()), element_count); \
+    break;                                                                      \
  }

 Status UnpackInitializerData(const onnx::TensorProto& initializer,
                             const Path& model_path,
-                             std::unique_ptr<uint8_t[]>& unpacked_tensor,
-                             size_t& tensor_data_length) {
-  SafeInt<size_t> tensor_byte_size = tensor_data_length;
+                             std::unique_ptr<unsigned char[]>& unpacked_tensor,
+                             size_t& tensor_byte_size_out) {
+  SafeInt<size_t> tensor_byte_size;
+
+  if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {
+    ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(
+        initializer,
+        model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
+        unpacked_tensor,
+        tensor_byte_size));
+    tensor_byte_size_out = tensor_byte_size;
+    return Status::OK();
+  }
+
  switch (initializer.data_type()) {
    CASE_UNPACK(FLOAT, float, float_data_size);
    CASE_UNPACK(DOUBLE, double, double_data_size);
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@ -238,16 +238,6 @@ inline bool HasName(const ONNX_NAMESPACE::NodeProto& node_proto) {
  return node_proto.has_name();
 }

-#if !defined(ORT_MINIMAL_BUILD)
-// Unpack tensor which contains external data. Uses the tensor_proto_dir to construct the full path for external data.
-// If tensor_proto_dir == nullptr then uses the current directory instead.
-// This function does not unpack string_data of a tensor
-template <typename T>
-Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,
-                                    const ORTCHAR_T* tensor_proto_dir, size_t expected_size,
-                                    /*out*/ T* p_data);
-#endif  // !defined(ORT_MINIMAL_BUILD)
-
 // UnpackTensor from raw data or the type specific data field. Does not handle external data.
 // If the tensor does not contain raw data then raw_data should be nullptr and raw_data_len should be 0.
 template <typename T>
@ -258,37 +248,21 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
 // Uses the model path to construct the full path for loading external data. In case when model_path is empty
 // it uses current directory.
 template <typename T>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path, /*out*/ T* p_data, size_t expected_size) {
-#if !defined(ORT_MINIMAL_BUILD)
-  if (HasExternalData(tensor)) {
-    return UnpackTensorWithExternalData(
-        tensor,
-        model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
-        expected_size,
-        p_data);
-  }
-#else
-  ORT_UNUSED_PARAMETER(model_path);
-  ORT_RETURN_IF(HasExternalData(tensor), "TensorProto with external data is not supported in ORT minimal build.");
-#endif
-
-  return HasRawData(tensor)
-             ? UnpackTensor(tensor, tensor.raw_data().data(), tensor.raw_data().size(), p_data, expected_size)
-             : UnpackTensor(tensor, nullptr, 0, p_data, expected_size);
-}
+Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
+                    /*out*/ T* p_data, size_t expected_size);

 /**
 * Unpack the data from an initializer tensor
 * Please note, this function does not unpack string_data of an initializer tensor
 * @param initializer       given initializer tensor
 * @param initializer_dir   model_path to construct external data dir path. When this is empty, current dir is used.
- * @param unpacked_tensor   the data from the initaizlier in uint8_t* form
+ * @param unpacked_tensor   the data from the initializer in byte form
 * @param tensor_byte_size  the byte size of the unpacked_tensor
 * @returns                 Status::OK() if data is unpacked successfully
 */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
                                     const Path& model_path,
-                                     std::unique_ptr<uint8_t[]>& unpacked_tensor,
+                                     std::unique_ptr<unsigned char[]>& unpacked_tensor,
                                     size_t& tensor_byte_size) ORT_MUST_USE_RESULT;

 }  // namespace utils
--- a/onnxruntime/test/framework/endian_test.cc
+++ b/onnxruntime/test/framework/endian_test.cc
@ -14,7 +14,7 @@ namespace test {

 TEST(EndianTest, EndiannessDetection) {
  const uint16_t test_value = 0x1234;
-  const char* test_value_first_byte = reinterpret_cast<const char*>(&test_value);
+  const unsigned char* test_value_first_byte = reinterpret_cast<const unsigned char*>(&test_value);
  if (endian::native == endian::little) {
    EXPECT_EQ(*test_value_first_byte, 0x34);
  } else if (endian::native == endian::big) {
@ -23,13 +23,13 @@ TEST(EndianTest, EndiannessDetection) {
 }

 TEST(EndianTest, SwapByteOrderCopy) {
-  const auto src = std::vector<char>{
+  const auto src = std::vector<unsigned char>{
      'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l'};

-  auto result = std::vector<char>(src.size());
+  auto result = std::vector<unsigned char>(src.size());
  {
    SwapByteOrderCopy(3, gsl::make_span(src), gsl::make_span(result));
-    const auto expected = std::vector<char>{
+    const auto expected = std::vector<unsigned char>{
        'c', 'b', 'a',
        'f', 'e', 'd',
        'i', 'h', 'g',
@ -39,7 +39,7 @@ TEST(EndianTest, SwapByteOrderCopy) {

  {
    SwapByteOrderCopy(4, gsl::make_span(src), gsl::make_span(result));
-    const auto expected = std::vector<char>{
+    const auto expected = std::vector<unsigned char>{
        'd', 'c', 'b', 'a',
        'h', 'g', 'f', 'e',
        'l', 'k', 'j', 'i'};
--- a/onnxruntime/test/framework/tensorutils_test.cc
+++ b/onnxruntime/test/framework/tensorutils_test.cc
@ -76,8 +76,9 @@ TEST(TensorProtoUtilsTest, UnpackTensor) {
  EXPECT_FALSE(status.IsOK());
 }

+namespace {
 template <typename T>
-static std::vector<T> CreateValues() {
+std::vector<T> CreateValues() {
  return {1, 2, 3, 4};
 }

@ -86,17 +87,49 @@ std::vector<std::string> CreateValues<std::string>() {
  return {"one", "two", "three", "four"};
 }

+template <>
+std::vector<bool> CreateValues() {
+  return {true, false, false, true};
+}
+
+template <>
+std::vector<MLFloat16> CreateValues<MLFloat16>() {
+  return {MLFloat16(0.f), MLFloat16(1.f), MLFloat16(2.f), MLFloat16(3.f)};
+}
+
+template <>
+std::vector<BFloat16> CreateValues<BFloat16>() {
+  return {BFloat16(0.f), BFloat16(1.f), BFloat16(2.f), BFloat16(3.f)};
+}
+
 template <typename T>
-static void CreateTensorWithExternalData(
-    TensorProto_DataType type,
-    const std::vector<T>& test_data,
-    std::basic_string<ORTCHAR_T>& filename,
-    TensorProto& tensor_proto) {
+void WriteDataToFile(FILE* fp, const std::vector<T>& test_data) {
+  size_t size_in_bytes = test_data.size() * sizeof(T);
+  ASSERT_EQ(size_in_bytes, fwrite(test_data.data(), 1, size_in_bytes, fp));
+}
+
+std::unique_ptr<bool[]> BoolDataFromVector(const std::vector<bool>& test_data) {
+  auto arr = onnxruntime::make_unique<bool[]>(test_data.size());
+  std::copy(std::begin(test_data), std::end(test_data), arr.get());
+  return arr;
+}
+
+// work around std::vector<bool> storing data in bits
+template <>
+void WriteDataToFile<bool>(FILE* fp, const std::vector<bool>& test_data) {
+  auto arr = BoolDataFromVector(test_data);
+  size_t size_in_bytes = test_data.size() * sizeof(bool);
+  ASSERT_EQ(size_in_bytes, fwrite(arr.get(), 1, size_in_bytes, fp));
+}
+
+template <typename T>
+void CreateTensorWithExternalData(TensorProto_DataType type, const std::vector<T>& test_data,
+                                  std::basic_string<ORTCHAR_T>& filename,
+                                  TensorProto& tensor_proto) {
  // Create external data
  FILE* fp;
  CreateTestFile(fp, filename);
-  size_t size_in_bytes = test_data.size() * sizeof(T);
-  ASSERT_EQ(size_in_bytes, fwrite(test_data.data(), 1, size_in_bytes, fp));
+  WriteDataToFile(fp, test_data);
  ASSERT_EQ(0, fclose(fp));

  // set the tensor_proto to reference this external data
@ -109,15 +142,7 @@ static void CreateTensorWithExternalData(
 }

 template <typename T>
-static void TestUnpackExternalTensor(TensorProto_DataType type, const Path& model_path) {
-  // Create external data
-  std::basic_string<ORTCHAR_T> filename(ORT_TSTR("tensor_XXXXXX"));
-  TensorProto tensor_proto;
-  auto test_data = CreateValues<T>();
-  CreateTensorWithExternalData<T>(type, test_data, filename, tensor_proto);
-  std::unique_ptr<ORTCHAR_T, decltype(&DeleteFileFromDisk)> file_deleter(const_cast<ORTCHAR_T*>(filename.c_str()),
-                                                                         DeleteFileFromDisk);
-
+void UnpackAndValidate(const TensorProto& tensor_proto, const Path& model_path, const std::vector<T>& test_data) {
  // Unpack tensor with external data
  std::vector<T> val(test_data.size());
  auto st = utils::UnpackTensor(tensor_proto, model_path, val.data(), test_data.size());
@ -125,15 +150,45 @@ static void TestUnpackExternalTensor(TensorProto_DataType type, const Path& mode

  // Validate data
  for (size_t i = 0; i < test_data.size(); i++) {
-    ASSERT_EQ(val[i], test_data[i]);
+    ASSERT_TRUE(val[i] == test_data[i]);  // need to use ASSERT_TRUE with '==' to handle MFLoat16 and BFloat16
  }
 }

+template <>
+void UnpackAndValidate<bool>(const TensorProto& tensor_proto, const Path& model_path,
+                             const std::vector<bool>& test_data) {
+  // Unpack tensor with external data
+  auto arr = onnxruntime::make_unique<bool[]>(test_data.size());
+  auto st = utils::UnpackTensor(tensor_proto, model_path, arr.get(), test_data.size());
+  ASSERT_TRUE(st.IsOK()) << st.ErrorMessage();
+
+  // Validate data
+  for (size_t i = 0; i < test_data.size(); i++) {
+    ASSERT_TRUE(arr[i] == test_data[i]);
+  }
+}
+
+template <typename T>
+void TestUnpackExternalTensor(TensorProto_DataType type, const Path& model_path) {
+  // Create external data
+  std::basic_string<ORTCHAR_T> filename(ORT_TSTR("tensor_XXXXXX"));
+  TensorProto tensor_proto;
+  auto test_data = CreateValues<T>();
+  CreateTensorWithExternalData<T>(type, test_data, filename, tensor_proto);
+  std::unique_ptr<ORTCHAR_T, decltype(&DeleteFileFromDisk)> file_deleter(const_cast<ORTCHAR_T*>(filename.c_str()),
+                                                                         DeleteFileFromDisk);
+  UnpackAndValidate(tensor_proto, model_path, test_data);
+}
+}  // namespace
 TEST(TensorProtoUtilsTest, UnpackTensorWithExternalData) {
  Path model_path;
  TestUnpackExternalTensor<float>(TensorProto_DataType_FLOAT, model_path);
  TestUnpackExternalTensor<double>(TensorProto_DataType_DOUBLE, model_path);
  TestUnpackExternalTensor<int32_t>(TensorProto_DataType_INT32, model_path);
+  TestUnpackExternalTensor<int8_t>(TensorProto_DataType_INT8, model_path);
+  TestUnpackExternalTensor<MLFloat16>(TensorProto_DataType_FLOAT16, model_path);
+  TestUnpackExternalTensor<BFloat16>(TensorProto_DataType_BFLOAT16, model_path);
+  TestUnpackExternalTensor<bool>(TensorProto_DataType_BOOL, model_path);
 }

 template <typename T>
--- a/onnxruntime/test/optimizer/initializer_test.cc
+++ b/onnxruntime/test/optimizer/initializer_test.cc
@ -21,18 +21,17 @@ namespace test {
 namespace {
 template <typename T>
 Status WriteExternalDataFile(gsl::span<const T> data, const PathString& path, ScopedFileDeleter& file_deleter) {
-  std::vector<char> data_bytes(data.size_bytes());
+  std::vector<unsigned char> data_bytes(data.size_bytes());
  ORT_RETURN_IF_ERROR(onnxruntime::utils::WriteLittleEndian(data, gsl::make_span(data_bytes)));
  std::ofstream out{path, std::ios::binary | std::ios::trunc};
-  ORT_RETURN_IF_NOT(out && out.write(data_bytes.data(), data_bytes.size()),
+  ORT_RETURN_IF_NOT(out && out.write(reinterpret_cast<const char*>(data_bytes.data()), data_bytes.size()),
                    "out && out.write(data_bytes.data(), data_bytes.size()) was false");
  file_deleter = ScopedFileDeleter{path};
  return Status::OK();
 }

-void SetTensorProtoExternalData(
-    const std::string& key, const std::string& value,
-    ONNX_NAMESPACE::TensorProto& tensor_proto) {
+void SetTensorProtoExternalData(const std::string& key, const std::string& value,
+                                ONNX_NAMESPACE::TensorProto& tensor_proto) {
  auto* external_data = tensor_proto.mutable_external_data();
  auto kvp_it = std::find_if(
      external_data->begin(), external_data->end(),