Enable -Wshorten-64-to-32 warning if available. (#16524)

- Fix some warnings from Xcode build (`-Wshorten-64-to-32`). - Enable `-Wshorten-64-to-32` warning if available. Currently it's not fully enabled for `onnxruntime_test_all` and `onnxruntime_providers_xnnpack` yet. - Some clean up in build.py including setting CMake generator more consistently.
2026-05-14 20:48:00 +00:00 · 2023-07-07 08:11:44 -07:00 · 2023-07-07 08:11:44 -07:00 · 6be7b03e53
commit 6be7b03e53
parent e22b0836e7
24 changed files with 127 additions and 93 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -610,6 +610,11 @@ else()
      target_compile_options(libprotobuf-lite PRIVATE "-Wno-enum-constexpr-conversion")
    endif()
  endif()
+
+  # enable warning(s) that may not be on by default
+  if (HAS_SHORTEN_64_TO_32)
+    list(APPEND ORT_WARNING_FLAGS -Wshorten-64-to-32)
+  endif()
 endif()

 #names in this var must match the directory names under onnxruntime/core/providers
--- a/cmake/onnxruntime_config.h.in
+++ b/cmake/onnxruntime_config.h.in
@ -3,23 +3,24 @@

 #pragma once

+#cmakedefine HAS_BITWISE_INSTEAD_OF_LOGICAL
+#cmakedefine HAS_CAST_FUNCTION_TYPE
+#cmakedefine HAS_CATCH_VALUE
+#cmakedefine HAS_CLASS_MEMACCESS
+#cmakedefine HAS_DEPRECATED_COPY
+#cmakedefine HAS_DEPRECATED_DECLARATIONS
+#cmakedefine HAS_FORMAT_TRUNCATION
+#cmakedefine HAS_IGNORED_ATTRIBUTES
+#cmakedefine HAS_MAYBE_UNINITIALIZED
+#cmakedefine HAS_MISSING_BRACES
+#cmakedefine HAS_NONNULL_COMPARE
+#cmakedefine HAS_PARENTHESES
+#cmakedefine HAS_REALLOCARRAY
+#cmakedefine HAS_SHORTEN_64_TO_32
+#cmakedefine HAS_TAUTOLOGICAL_POINTER_COMPARE
 #cmakedefine HAS_UNUSED_BUT_SET_PARAMETER
 #cmakedefine HAS_UNUSED_BUT_SET_VARIABLE
 #cmakedefine HAS_UNUSED_VARIABLE
-#cmakedefine HAS_CAST_FUNCTION_TYPE
-#cmakedefine HAS_PARENTHESES
 #cmakedefine HAS_USELESS_CAST
-#cmakedefine HAS_NONNULL_COMPARE
-#cmakedefine HAS_TAUTOLOGICAL_POINTER_COMPARE
-#cmakedefine HAS_CATCH_VALUE
-#cmakedefine HAS_MISSING_BRACES
-#cmakedefine HAS_IGNORED_ATTRIBUTES
-#cmakedefine HAS_DEPRECATED_COPY
-#cmakedefine HAS_CLASS_MEMACCESS
-#cmakedefine HAS_MAYBE_UNINITIALIZED
-#cmakedefine HAS_DEPRECATED_DECLARATIONS
-#cmakedefine HAS_FORMAT_TRUNCATION
-#cmakedefine HAS_BITWISE_INSTEAD_OF_LOGICAL
-#cmakedefine HAS_REALLOCARRAY
-#cmakedefine ORT_VERSION u8"@ORT_VERSION@"
 #cmakedefine ORT_BUILD_INFO u8"@ORT_BUILD_INFO@"
+#cmakedefine ORT_VERSION u8"@ORT_VERSION@"
--- a/cmake/onnxruntime_providers.cmake
+++ b/cmake/onnxruntime_providers.cmake
@ -1785,6 +1785,12 @@ if (onnxruntime_USE_XNNPACK)
            RUNTIME   DESTINATION ${CMAKE_INSTALL_BINDIR}
            FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})
  endif()
+
+  # TODO fix shorten-64-to-32 warnings
+  # there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline'
+  if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+    target_compile_options(onnxruntime_providers_xnnpack PRIVATE -Wno-error=shorten-64-to-32)
+  endif()
 endif()

 if (onnxruntime_USE_CANN)
--- a/cmake/onnxruntime_python.cmake
+++ b/cmake/onnxruntime_python.cmake
@ -112,7 +112,7 @@ if (onnxruntime_USE_NCCL)
 endif()

 if(APPLE)
-  set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/python/exported_symbols.lst")
+  set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker -exported_symbols_list -Xlinker ${ONNXRUNTIME_ROOT}/python/exported_symbols.lst")
 elseif(UNIX)
  if (onnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS)
    set(ONNXRUNTIME_SO_LINK_FLAG "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/python/version_script_expose_onnx_protobuf.lds -Xlinker --gc-sections")
@ -223,7 +223,7 @@ if (MSVC)
  # Explicitly use the release version of the python library to make the project file consistent with this.
  target_link_libraries(onnxruntime_pybind11_state PRIVATE ${Python_LIBRARY_RELEASE})
 elseif (APPLE)
-  set_target_properties(onnxruntime_pybind11_state PROPERTIES LINK_FLAGS "${ONNXRUNTIME_SO_LINK_FLAG} -undefined dynamic_lookup")
+  set_target_properties(onnxruntime_pybind11_state PROPERTIES LINK_FLAGS "${ONNXRUNTIME_SO_LINK_FLAG} -Xlinker -undefined -Xlinker dynamic_lookup")
  set_target_properties(onnxruntime_pybind11_state PROPERTIES
    INSTALL_RPATH "@loader_path"
    BUILD_WITH_INSTALL_RPATH TRUE
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@ -839,6 +839,12 @@ else()
  target_compile_options(onnxruntime_test_all PRIVATE "-Wno-parentheses")
 endif()

+# TODO fix shorten-64-to-32 warnings
+# there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline'
+if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+  target_compile_options(onnxruntime_test_all PRIVATE -Wno-error=shorten-64-to-32)
+endif()
+
 if (UNIX AND onnxruntime_USE_TENSORRT)
    set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
 endif()
--- a/include/onnxruntime/core/common/eigen_common_wrapper.h
+++ b/include/onnxruntime/core/common/eigen_common_wrapper.h
@ -41,6 +41,14 @@
 #pragma GCC diagnostic ignored "-Wunused-but-set-variable"
 #endif

+// eigen-src/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h:231:56: error: implicit conversion loses integer
+//   precision: 'uint64_t' (aka 'unsigned long long') to 'size_t' (aka 'unsigned long') [-Werror,-Wshorten-64-to-32]
+// next = wnext == kStackMask ? nullptr : &waiters_[wnext];
+//                                         ~~~~~~~~ ^~~~~
+#ifdef HAS_SHORTEN_64_TO_32
+#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+#endif
+
 #elif defined(_MSC_VER)
 // build\windows\debug\external\eigen3\unsupported\eigen\cxx11\src/Tensor/Tensor.h(76):
 // warning C4554: '&': check operator precedence for possible error; use parentheses to clarify precedence
--- a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
+++ b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
@ -27,6 +27,13 @@
 #ifdef HAS_CLASS_MEMACCESS
 #pragma GCC diagnostic ignored "-Wclass-memaccess"
 #endif
+// eigen-src/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h:231:56: error: implicit conversion loses integer
+//   precision: 'uint64_t' (aka 'unsigned long long') to 'size_t' (aka 'unsigned long') [-Werror,-Wshorten-64-to-32]
+// next = wnext == kStackMask ? nullptr : &waiters_[wnext];
+//                                         ~~~~~~~~ ^~~~~
+#ifdef HAS_SHORTEN_64_TO_32
+#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+#endif
 #elif defined(_MSC_VER)
 #pragma warning(push)
 #pragma warning(disable : 4127)
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_gpt.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_gpt.h
@ -224,7 +224,7 @@ Status BeamSearchGpt<T>::Execute(const FeedsFetchesManager* init_run_feeds_fetch
                                         gpt_subgraph_.has_decoder_masked_attention_));

  if (gpt_subgraph_.past_present_share_buffer_) {  // Reuse past and present
-    fetches.reserve(static_cast<int64_t>(gpt_subgraph_.GetFirstPresentOutputIndex()) + gpt_subgraph_.num_layers);
+    fetches.reserve(static_cast<size_t>(gpt_subgraph_.GetFirstPresentOutputIndex()) + gpt_subgraph_.num_layers);
    fetches.resize(gpt_subgraph_.GetFirstPresentOutputIndex(), OrtValue());
    for (int layer = 0; layer < gpt_subgraph_.num_layers; layer++) {
      int feed_idx = gpt_subgraph_.GetFirstPastInputIndex() + layer;
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_t5.h
@ -259,7 +259,8 @@ Status BeamSearchT5<T>::Execute(const FeedsFetchesManager& encoder_feeds_fetches
                                                             decoder_subgraph_.has_decoder_masked_attention_));

    if (decoder_subgraph_.past_present_share_buffer_) {
-      decoder_fetches.reserve(static_cast<int64_t>(decoder_subgraph_.GetFirstPresentOutputIndex()) + 2 * static_cast<int64_t>(decoder_subgraph_.num_layers));
+      decoder_fetches.reserve(static_cast<size_t>(decoder_subgraph_.GetFirstPresentOutputIndex()) +
+                              2 * static_cast<size_t>(decoder_subgraph_.num_layers));
      decoder_fetches.resize(decoder_subgraph_.GetFirstPresentOutputIndex(), OrtValue());
      for (int layer = 0; layer < 2 * decoder_subgraph_.num_layers; layer++) {
        int feed_idx = decoder_subgraph_.GetFirstPastInputIndex() + layer;
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_whisper.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_impl_whisper.h
@ -251,7 +251,8 @@ Status BeamSearchWhisper<T>::Execute(const FeedsFetchesManager& encoder_feeds_fe
                                                             decoder_subgraph_.has_decoder_masked_attention_));

    if (decoder_subgraph_.past_present_share_buffer_) {
-      decoder_fetches.reserve(static_cast<int64_t>(decoder_subgraph_.GetFirstPresentOutputIndex()) + 2 * static_cast<int64_t>(decoder_subgraph_.num_layers));
+      decoder_fetches.reserve(static_cast<size_t>(decoder_subgraph_.GetFirstPresentOutputIndex()) +
+                              2 * static_cast<size_t>(decoder_subgraph_.num_layers));
      decoder_fetches.resize(decoder_subgraph_.GetFirstPresentOutputIndex(), OrtValue());
      for (int layer = 0; layer < 2 * decoder_subgraph_.num_layers; layer++) {
        int feed_idx = decoder_subgraph_.GetFirstPastInputIndex() + layer;
--- a/onnxruntime/contrib_ops/cpu/transformers/beam_search_scorer.cc
+++ b/onnxruntime/contrib_ops/cpu/transformers/beam_search_scorer.cc
@ -224,7 +224,8 @@ void BeamSearchScorer::Finalize(ISequences& sequences,
    if (!sequence_scores.empty())
      sequence_scores_buffer = sequence_scores.subspan(batch_index * num_return_sequences_, num_return_sequences_);

-    beam_hyp.Output(num_return_sequences_, max_length_, batch_output, sequence_scores_buffer);
+    beam_hyp.Output(narrow<int>(num_return_sequences_), narrow<int>(max_length_), batch_output,
+                    sequence_scores_buffer);
  }
 }

--- a/onnxruntime/contrib_ops/cpu/transformers/greedy_search_impl_gpt.h
+++ b/onnxruntime/contrib_ops/cpu/transformers/greedy_search_impl_gpt.h
@ -229,7 +229,7 @@ Status GreedySearchGpt<T, ParametersT>::Execute(const FeedsFetchesManager* init_
  ORT_RETURN_IF_ERROR(CreateInitialFeeds(greedy_state.sequence_lengths, expanded_input_ids_in_cpu, feeds, buffer));

  if (gpt_subgraph_.past_present_share_buffer_) {  // Reuse past and present
-    fetches.reserve((int64_t)gpt_subgraph_.GetFirstPresentOutputIndex() + gpt_subgraph_.num_layers);
+    fetches.reserve(static_cast<size_t>(gpt_subgraph_.GetFirstPresentOutputIndex()) + gpt_subgraph_.num_layers);
    fetches.resize(gpt_subgraph_.GetFirstPresentOutputIndex(), OrtValue());
    for (int layer = 0; layer < gpt_subgraph_.num_layers; layer++) {
      int feed_idx = gpt_subgraph_.GetFirstPastInputIndex() + layer;
--- a/onnxruntime/core/framework/print_tensor_utils.h
+++ b/onnxruntime/core/framework/print_tensor_utils.h
@ -139,9 +139,9 @@ void PrintCpuTensor(const Tensor& tensor, int threshold = kDefaultSnippetThresho
  bool is_snippet = (threshold > 0 && static_cast<int64_t>(threshold) < num_items);
  size_t num_dims = shape.NumDimensions();
  if (num_dims >= 3) {
-    int dim0 = static_cast<int>(shape.SizeToDimension(num_dims - 2));
-    int dim1 = static_cast<int>(shape[num_dims - 2]);
-    int dim2 = static_cast<int>(shape[num_dims - 1]);
+    int64_t dim0 = shape.SizeToDimension(num_dims - 2);
+    int64_t dim1 = shape[num_dims - 2];
+    int64_t dim2 = shape[num_dims - 1];
    if (is_snippet) {
      PrintCpuTensorSnippet<T>(data, dim0, dim1, dim2, edge_items);
    } else {
@ -150,11 +150,11 @@ void PrintCpuTensor(const Tensor& tensor, int threshold = kDefaultSnippetThresho
    return;
  }

-  size_t num_rows = 1;
+  int64_t num_rows = 1;
  if (num_dims > 1) {
-    num_rows = static_cast<size_t>(shape[0]);
+    num_rows = shape[0];
  }
-  size_t row_size = num_items / num_rows;
+  int64_t row_size = num_items / num_rows;

  if (is_snippet) {
    PrintCpuTensorSnippet<T>(data, num_rows, row_size, edge_items);
--- a/onnxruntime/core/framework/stream_execution_context.h
+++ b/onnxruntime/core/framework/stream_execution_context.h
@ -46,7 +46,9 @@ class StreamExecutionContext {
      return v_.fetch_sub(1, std::memory_order_relaxed) == 1;
    }

-    int32_t Get() { return v_.load(std::memory_order_relaxed); }
+    int32_t Get() {
+      return gsl::narrow_cast<int32_t>(v_.load(std::memory_order_relaxed));
+    }

    void Inc() {
      ++v_;
--- a/onnxruntime/core/platform/posix/env.cc
+++ b/onnxruntime/core/platform/posix/env.cc
@ -355,7 +355,7 @@ class PosixEnv : public Env {
        micros -= static_cast<int64_t>(sleep_time.tv_sec) * OneMillion;
      }
      if (micros < OneMillion) {
-        sleep_time.tv_nsec = 1000 * micros;
+        sleep_time.tv_nsec = static_cast<decltype(timespec::tv_nsec)>(1000 * micros);
        micros = 0;
      }
      while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) {
@ -457,9 +457,9 @@ class PosixEnv : public Env {
      return Status::OK();
    }

-    static const long page_size = sysconf(_SC_PAGESIZE);
+    static const size_t page_size = narrow<size_t>(sysconf(_SC_PAGESIZE));
    const FileOffsetType offset_to_page = offset % static_cast<FileOffsetType>(page_size);
-    const size_t mapped_length = length + offset_to_page;
+    const size_t mapped_length = length + static_cast<size_t>(offset_to_page);
    const FileOffsetType mapped_offset = offset - offset_to_page;
    void* const mapped_base =
        mmap(nullptr, mapped_length, PROT_READ | PROT_WRITE, MAP_PRIVATE, file_descriptor.Get(), mapped_offset);
--- a/onnxruntime/core/providers/cpu/ml/linearclassifier.cc
+++ b/onnxruntime/core/providers/cpu/ml/linearclassifier.cc
@ -35,7 +35,7 @@ LinearClassifier::LinearClassifier(const OpKernelInfo& info)
    ORT_ENFORCE(!coefficients_.empty());

  using_strings_ = !classlabels_strings_.empty();
-  class_count_ = static_cast<int64_t>(intercepts_.size());
+  class_count_ = static_cast<ptrdiff_t>(intercepts_.size());
 }

 // Use GEMM for the calculations, with broadcasting of intercepts
--- a/onnxruntime/core/providers/cpu/nn/dropout_op.h
+++ b/onnxruntime/core/providers/cpu/nn/dropout_op.h
@ -3,6 +3,7 @@

 #pragma once

+#include "core/common/narrow.h"
 #include "core/framework/op_kernel.h"
 #include "core/framework/random_generator.h"
 #include <chrono>
@ -56,10 +57,10 @@ Status Dropout<T1, T2>::Compute(OpKernelContext* context) const {
  auto Y_span = Y->MutableDataAsSpan<T1>();
  Tensor* mask = context->Output(1, X_shape);  // optional
  std::unique_ptr<bool[]> temp_mask_buffer{};  // temporary buffer to use if mask input is not provided
-  auto mask_span = [&X_shape, mask, &temp_mask_buffer]() {
+  auto mask_span = [X_size = narrow<size_t>(X_shape.Size()), mask, &temp_mask_buffer]() {
    if (mask) return mask->MutableDataAsSpan<bool>();
-    temp_mask_buffer = std::make_unique<bool[]>(X_shape.Size());
-    return gsl::make_span(temp_mask_buffer.get(), X_shape.Size());
+    temp_mask_buffer = std::make_unique<bool[]>(X_size);
+    return gsl::make_span(temp_mask_buffer.get(), X_size);
  }();

  ORT_ENFORCE(!mask || mask->Shape() == X_shape, "X and mask should have the same shape");
--- a/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc
+++ b/onnxruntime/core/providers/cpu/quantization/qlinearconv.cc
@ -805,7 +805,7 @@ Status QLinearConv<ActType>::Compute(OpKernelContext* context) const {
            strides.data(),
            dilations.data(),
            pads.data(),
-            static_cast<int64_t>(kernel_rank),
+            static_cast<ptrdiff_t>(kernel_rank),
            static_cast<ActType*>(col_buffer.get()) + group_id * col_buffer_size,
            X_zero_point_value);
      }
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@ -12,6 +12,7 @@
 #include "core/common/inlined_containers.h"
 #include "core/common/logging/logging.h"
 #include "core/common/logging/severity.h"
+#include "core/common/narrow.h"
 #include "core/common/optional.h"
 #include "core/common/path_string.h"
 #include "core/framework/arena_extend_strategy.h"
@ -95,7 +96,7 @@ void GetPyObjFromTensor(const Tensor& rtensor, py::object& obj,
  MLDataType dtype = rtensor.DataType();
  const int numpy_type = OnnxRuntimeTensorToNumpyType(dtype);
  obj = py::reinterpret_steal<py::object>(PyArray_SimpleNew(
-      shape.NumDimensions(), npy_dims.data(), numpy_type));
+      narrow<int>(shape.NumDimensions()), npy_dims.data(), numpy_type));

  void* out_ptr = static_cast<void*>(
      PyArray_DATA(reinterpret_cast<PyArrayObject*>(obj.ptr())));
@ -1604,7 +1605,7 @@ including arg name, arg type (contains both type and shape).)pbdoc")
          if (is_arg_file_name) {
            OrtPybindThrowIfError(sess->GetSessionHandle()->Load(arg));
          } else {
-            OrtPybindThrowIfError(sess->GetSessionHandle()->Load(arg.data(), arg.size()));
+            OrtPybindThrowIfError(sess->GetSessionHandle()->Load(arg.data(), narrow<int>(arg.size())));
          }
        }

--- a/onnxruntime/test/onnx/tensorprotoutils.cc
+++ b/onnxruntime/test/onnx/tensorprotoutils.cc
@ -79,7 +79,7 @@ static void UnpackTensorWithRawData(const void* raw_data, size_t raw_data_length
 #define DEFINE_UNPACK_TENSOR(T, Type, field_name, field_size)                                             \
  template <>                                                                                             \
  void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,           \
-                    /*out*/ T* p_data, int64_t expected_size) {                                           \
+                    /*out*/ T* p_data, size_t expected_size) {                                            \
    if (nullptr == p_data) {                                                                              \
      const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size();                       \
      if (size == 0) return;                                                                              \
@ -92,7 +92,7 @@ static void UnpackTensorWithRawData(const void* raw_data, size_t raw_data_length
      UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);                             \
      return;                                                                                             \
    }                                                                                                     \
-    if (tensor.field_size() != expected_size)                                                             \
+    if (static_cast<size_t>(tensor.field_size()) != expected_size)                                        \
      ORT_CXX_API_THROW(MakeString("corrupted protobuf data: tensor shape size(", expected_size,          \
                                   ") does not match the data size(", tensor.field_size(), ") in proto"), \
                        OrtErrorCode::ORT_FAIL);                                                          \
@ -117,7 +117,7 @@ DEFINE_UNPACK_TENSOR(uint32_t, onnx::TensorProto_DataType_UINT32, uint64_data, u
 // doesn't support raw data
 template <>
 void UnpackTensor(const onnx::TensorProto& tensor, const void* /*raw_data*/, size_t /*raw_data_len*/,
-                  /*out*/ std::string* p_data, int64_t expected_size) {
+                  /*out*/ std::string* p_data, size_t expected_size) {
  if (nullptr == p_data) {
    if (tensor.string_data_size() == 0) return;
    ORT_CXX_API_THROW("", OrtErrorCode::ORT_INVALID_ARGUMENT);
@ -126,7 +126,7 @@ void UnpackTensor(const onnx::TensorProto& tensor, const void* /*raw_data*/, siz
    ORT_CXX_API_THROW("", OrtErrorCode::ORT_INVALID_ARGUMENT);
  }

-  if (tensor.string_data_size() != expected_size)
+  if (static_cast<size_t>(tensor.string_data_size()) != expected_size)
    ORT_CXX_API_THROW(
        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);

@ -139,7 +139,7 @@ void UnpackTensor(const onnx::TensorProto& tensor, const void* /*raw_data*/, siz
 }
 template <>
 void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                  /*out*/ bool* p_data, int64_t expected_size) {
+                  /*out*/ bool* p_data, size_t expected_size) {
  if (nullptr == p_data) {
    const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
    if (size == 0) return;
@ -153,7 +153,7 @@ void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t
    return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
  }

-  if (tensor.int32_data_size() != expected_size)
+  if (static_cast<size_t>(tensor.int32_data_size()) != expected_size)
    ORT_CXX_API_THROW(
        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);
  for (int iter : tensor.int32_data()) {
@ -164,7 +164,7 @@ void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t
 }
 template <>
 void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                  /*out*/ MLFloat16* p_data, int64_t expected_size) {
+                  /*out*/ MLFloat16* p_data, size_t expected_size) {
  if (nullptr == p_data) {
    const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
    if (size == 0) return;
@ -178,7 +178,7 @@ void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t
    return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
  }

-  if (tensor.int32_data_size() != expected_size)
+  if (static_cast<size_t>(tensor.int32_data_size()) != expected_size)
    ORT_CXX_API_THROW(
        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);

@ -197,7 +197,7 @@ void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t

 template <>
 void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                  /*out*/ BFloat16* p_data, int64_t expected_size) {
+                  /*out*/ BFloat16* p_data, size_t expected_size) {
  if (nullptr == p_data) {
    const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
    if (size == 0)
@ -213,7 +213,7 @@ void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t
    return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);
  }

-  if (tensor.int32_data_size() != expected_size)
+  if (static_cast<size_t>(tensor.int32_data_size()) != expected_size)
    ORT_CXX_API_THROW(
        "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL);

@ -233,7 +233,7 @@ void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t
 #define DEFINE_UNPACK_TENSOR_FLOAT8(TYPE, ONNX_TYPE)                                                       \
  template <>                                                                                              \
  void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,            \
-                    /*out*/ TYPE* p_data, int64_t expected_size) {                                         \
+                    /*out*/ TYPE* p_data, size_t expected_size) {                                          \
    if (nullptr == p_data) {                                                                               \
      const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();                   \
      if (size == 0)                                                                                       \
@ -246,7 +246,7 @@ void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t
    if (raw_data != nullptr) {                                                                             \
      return UnpackTensorWithRawData(raw_data, raw_data_len, expected_size, p_data);                       \
    }                                                                                                      \
-    if (tensor.int32_data_size() != expected_size)                                                         \
+    if (static_cast<size_t>(tensor.int32_data_size()) != expected_size)                                    \
      ORT_CXX_API_THROW(                                                                                   \
          "UnpackTensor: the pre-allocate size does not match the size in proto", OrtErrorCode::ORT_FAIL); \
    constexpr int max_value = std::numeric_limits<uint8_t>::max();                                         \
@ -360,9 +360,10 @@ ORT_API(void, OrtUninitializeBuffer, _In_opt_ void* input, size_t input_len, enu
  }
 }

-#define CASE_PROTO(X, Y)                                                                                       \
-  case onnx::TensorProto_DataType::TensorProto_DataType_##X:                                                   \
-    ::onnxruntime::test::UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len, (Y*)preallocated, tensor_size); \
+#define CASE_PROTO(X, Y)                                                                      \
+  case onnx::TensorProto_DataType::TensorProto_DataType_##X:                                  \
+    ::onnxruntime::test::UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len,                \
+                                         (Y*)preallocated, static_cast<size_t>(tensor_size)); \
    break;

 #define CASE_TYPE(X)                   \
@ -466,7 +467,7 @@ Status TensorProtoToMLValue(const onnx::TensorProto& tensor_proto, const MemBuff
            deleter.param = new UnInitializeParam{preallocated, preallocated_size, ele_type};
          }
          ::onnxruntime::test::UnpackTensor<std::string>(tensor_proto, raw_data, raw_data_len,
-                                                         (std::string*)preallocated, tensor_size);
+                                                         (std::string*)preallocated, static_cast<size_t>(tensor_size));
          break;
        default: {
          std::ostringstream ostr;
--- a/onnxruntime/test/onnx/tensorprotoutils.h
+++ b/onnxruntime/test/onnx/tensorprotoutils.h
@ -38,9 +38,9 @@ common::Status TensorProtoToMLValue(const onnx::TensorProto& input, const MemBuf

 template <typename T>
 void UnpackTensor(const onnx::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                  /*out*/ T* p_data, int64_t expected_size);
+                  /*out*/ T* p_data, size_t expected_size);

 ONNXTensorElementDataType CApiElementTypeFromProtoType(int type);
 ONNXTensorElementDataType GetTensorElementType(const onnx::TensorProto& tensor_proto);
 }  // namespace test
-}  // namespace onnxruntime
+}  // namespace onnxruntime
--- a/onnxruntime/test/perftest/performance_runner.cc
+++ b/onnxruntime/test/perftest/performance_runner.cc
@ -32,6 +32,13 @@ using onnxruntime::Status;
 #ifdef HAS_CLASS_MEMACCESS
 #pragma GCC diagnostic ignored "-Wclass-memaccess"
 #endif
+// eigen-src/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h:231:56: error: implicit conversion loses integer
+//   precision: 'uint64_t' (aka 'unsigned long long') to 'size_t' (aka 'unsigned long') [-Werror,-Wshorten-64-to-32]
+// next = wnext == kStackMask ? nullptr : &waiters_[wnext];
+//                                         ~~~~~~~~ ^~~~~
+#ifdef HAS_SHORTEN_64_TO_32
+#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+#endif
 #endif
 #include <unsupported/Eigen/CXX11/ThreadPool>
 #if defined(__GNUC__)
--- a/orttraining/orttraining/core/framework/tensorboard/event_writer.cc
+++ b/orttraining/orttraining/core/framework/tensorboard/event_writer.cc
@ -2,6 +2,8 @@
 // Licensed under the MIT License.

 #include "orttraining/core/framework/tensorboard/event_writer.h"
+
+#include "onnxruntime_config.h"
 #include "orttraining/core/framework/tensorboard/crc32c.h"
 #include "core/platform/env.h"

@ -13,6 +15,9 @@
 #if defined(__GNUC__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
+#if defined(HAS_SHORTEN_64_TO_32)
+#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+#endif
 #endif
 #include "tensorboard/compat/proto/event.pb.h"
 #if defined(__GNUC__)
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@ -13,13 +13,6 @@ import subprocess
 import sys
 from pathlib import Path

-try:
-    from packaging.version import Version as LooseVersion
-except ImportError:
-    # This is deprecated and will be removed in Python 3.12.
-    # See https://docs.python.org/3/library/distutils.html.
-    from distutils.version import LooseVersion  # pylint: disable=W4901
-
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
 REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", ".."))

@ -53,13 +46,10 @@ class UsageError(BaseError):


 def _check_python_version():
-    # According to the BUILD.md, python 3.5+ is required:
-    # Python 2 is definitely not supported and it should be safer to consider
-    # it won't run with python 4:
-    if sys.version_info[0] != 3:  # noqa: YTT201
-        raise BuildError(f"Bad python major version: expecting python 3, found version '{sys.version}'")
-    if sys.version_info[1] < 6:  # noqa: YTT203
-        raise BuildError(f"Bad python minor version: expecting python 3.6+, found version '{sys.version}'")
+    if (sys.version_info.major, sys.version_info.minor) < (3, 7):
+        raise UsageError(
+            f"Invalid Python version. At least Python 3.7 is required. Actual Python version: {sys.version}"
+        )


 def _str_to_bool(s):
@ -382,7 +372,11 @@ def parse_arguments():
        "--xcode_code_signing_identity", default="", help="The development identity used for code signing in Xcode"
    )
    parser.add_argument(
-        "--use_xcode", action="store_true", help="Use Xcode as cmake generator, this is only supported on MacOS."
+        "--use_xcode",
+        action="store_const",
+        const="Xcode",
+        dest="cmake_generator",
+        help="Use Xcode as cmake generator, this is only supported on MacOS. Equivalent to '--cmake_generator Xcode'.",
    )
    parser.add_argument(
        "--osx_arch",
@ -551,7 +545,7 @@ def parse_arguments():
            "Xcode",
        ],
        default=None,
-        help="Specify the generator that CMake invokes. ",
+        help="Specify the generator that CMake invokes.",
    )
    parser.add_argument(
        "--enable_multi_device_test",
@ -1183,19 +1177,6 @@ def generate_build_tree(

    if is_macOS() and not args.android:
        cmake_args += ["-DCMAKE_OSX_ARCHITECTURES=" + args.osx_arch]
-        if args.use_xcode:
-            cmake_ver = LooseVersion(subprocess.check_output(["cmake", "--version"]).decode("utf-8").split()[2])
-            xcode_ver = LooseVersion(
-                subprocess.check_output(["xcrun", "xcodebuild", "-version"]).decode("utf-8").split()[1]
-            )
-            # Requires Cmake 3.21.1+ for XCode 13+
-            # The legacy build system is not longer supported on XCode 13+
-            if xcode_ver >= LooseVersion("13") and cmake_ver < LooseVersion("3.21.1"):
-                raise BuildError("CMake 3.21.1+ required to use XCode 13+")
-            # Use legacy build system for old CMake [3.19, 3.21.1) which uses new build system by default
-            # CMake 3.18- use the legacy build system by default
-            if cmake_ver >= LooseVersion("3.19.0") and cmake_ver < LooseVersion("3.21.1"):
-                cmake_args += ["-T", "buildsystem=1"]
        if args.apple_deploy_target:
            cmake_args += ["-DCMAKE_OSX_DEPLOYMENT_TARGET=" + args.apple_deploy_target]
        # Code sign the binaries, if the code signing development identity and/or team id are provided
@ -1225,13 +1206,14 @@ def generate_build_tree(
        cmake_args += ["-Donnxruntime_USE_SNPE=ON"]

    if args.ios:
+        if not args.cmake_generator == "Xcode":
+            raise BuildError("iOS build requires use of the Xcode CMake generator ('--cmake_generator Xcode').")
+
        needed_args = [
-            args.use_xcode,
            args.ios_sysroot,
            args.apple_deploy_target,
        ]
        arg_names = [
-            "--use_xcode            " + "<need use xcode to cross build iOS on MacOS>",  # noqa: ISC003
            "--ios_sysroot          " + "<the location or name of the macOS platform SDK>",  # noqa: ISC003
            "--apple_deploy_target  " + "<the minimum version of the target platform>",  # noqa: ISC003
        ]
@ -1437,7 +1419,7 @@ def build_targets(args, cmake_path, build_dir, configs, num_parallel_jobs, targe
                    "/nodeReuse:False",
                    f"/p:CL_MPCount={num_parallel_jobs}",
                ]
-            elif is_macOS() and args.use_xcode:
+            elif args.cmake_generator == "Xcode":
                # CMake will generate correct build tool args for Xcode
                cmd_args += ["--parallel", str(num_parallel_jobs)]
            else:
@ -2456,11 +2438,10 @@ def main():
                cmake_extra_args = ["-A", target_arch, "-T", toolset, "-G", args.cmake_generator]
            if args.enable_wcos:
                cmake_extra_defines.append("CMAKE_USER_MAKE_RULES_OVERRIDE=wcos_rules_override.cmake")
-        elif args.cmake_generator is not None and not (is_macOS() and args.use_xcode):
+        elif args.cmake_generator is not None:
            cmake_extra_args += ["-G", args.cmake_generator]
-        elif is_macOS():
-            if args.use_xcode:
-                cmake_extra_args += ["-G", "Xcode"]
+
+        if is_macOS():
            if not args.ios and not args.android and args.osx_arch == "arm64" and platform.machine() == "x86_64":
                if args.test:
                    log.warning("Cannot test ARM64 build on X86_64. Will skip test running after build.")