Fix some too popular warnings. (#3578)

Some pointless and noisy warnings either fixed or disabled.
2026-07-09 17:28:58 +00:00 · 2020-04-18 17:05:05 -07:00 · 2020-04-18 17:05:05 -07:00 · 38a18023c7
commit 38a18023c7
parent d68245853e
7 changed files with 21 additions and 11 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -583,6 +583,8 @@ if (WIN32)
    string(APPEND CMAKE_CXX_FLAGS " /wd4127")
    # class needs to have dll-interface to be used by clients
    string(APPEND CMAKE_CXX_FLAGS " /wd4251")
+    # issued by thrust nonstandard extension used: nameless struct/union
+    string(APPEND CMAKE_CXX_FLAGS " /wd4201")
    if (onnxruntime_ENABLE_STATIC_ANALYSIS)
        string(APPEND CMAKE_CXX_FLAGS
            " /analyze:stacksize 131072"
@ -795,6 +797,10 @@ if (onnxruntime_USE_CUDA)
  if (NOT WIN32)
    set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --expt-relaxed-constexpr --compiler-options -fPIC")
  endif()
+  # Options passed to cudafe
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=bad_friend_decl\"")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=unsigned_compare_with_zero\"")
+  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=expr_has_no_effect\"")
 endif()

 if (onnxruntime_USE_TENSORRT)
--- a/onnxruntime/core/common/threadpool.cc
+++ b/onnxruntime/core/common/threadpool.cc
@ -224,7 +224,7 @@ static ParallelForBlock CalculateParallelForBlock(const ptrdiff_t n, const Eigen
  // Calculate parallel efficiency as fraction of total CPU time used for
  // computations:
  double max_efficiency =
-      static_cast<double>(block_count) / (Eigen::divup<int>(block_count, num_threads) * num_threads);
+      static_cast<double>(block_count) / (Eigen::divup<ptrdiff_t>(block_count, num_threads) * num_threads);

  // Now try to increase block size up to max_block_size as long as it
  // doesn't decrease parallel efficiency.
@ -245,7 +245,7 @@ static ParallelForBlock CalculateParallelForBlock(const ptrdiff_t n, const Eigen
    assert(coarser_block_count < prev_block_count);
    prev_block_count = coarser_block_count;
    const double coarser_efficiency =
-        static_cast<double>(coarser_block_count) / (Eigen::divup<int>(coarser_block_count, num_threads) * num_threads);
+        static_cast<double>(coarser_block_count) / (Eigen::divup<ptrdiff_t>(coarser_block_count, num_threads) * num_threads);
    if (coarser_efficiency + 0.01 >= max_efficiency) {
      // Taking it.
      block_size = coarser_block_size;
--- a/onnxruntime/core/framework/data_transfer.h
+++ b/onnxruntime/core/framework/data_transfer.h
@ -23,6 +23,8 @@ class IDataTransfer {
 class CPUDataTransfer : public IDataTransfer {
 public:
  CPUDataTransfer() = default;
+  // Dampen MSVC warning about not fully overriding CopyTensor
+  using IDataTransfer::CopyTensor;
  bool CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const override;
  common::Status CopyTensor(const Tensor& src, Tensor& dst, int exec_queue_id) const override;
 };
--- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@ -40,7 +40,7 @@ void convTransposeWithDynamicPadsShapeInference(InferenceContext& ctx) {
  }

  // first dim is the batch axis and the next is the number of channels.
-  size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - 2);
+  size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - size_t{2});

  std::vector<int64_t> dilations;
  if (getRepeatedAttribute(ctx, "dilations", dilations)) {
@ -2046,7 +2046,7 @@ Example 4:

          // fill with zeros if needed to reach appropriate size
          if (pads_data.size() != 2 * static_cast<size_t>(input_rank))
-            pads_data.resize(2 * input_rank, 0);
+            pads_data.resize(size_t{2} * input_rank, 0);

          const auto& output_shape =
              ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();
--- a/onnxruntime/core/graph/contrib_ops/range_schema_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/range_schema_defs.cc
@ -69,10 +69,10 @@ template <typename T>
 static int64_t CalcRangeDim(const TensorProto* startShapeInitializer,
                            const TensorProto* limitShapeInitializer,
                            const TensorProto* deltaShapeInitializer) {
-    T start = GetFirstElement<T>(startShapeInitializer);
-    T limit = GetFirstElement<T>(limitShapeInitializer);
-    T delta = GetFirstElement<T>(deltaShapeInitializer);
-    if (delta == T{0}) {
+    auto start = static_cast<double>(GetFirstElement<T>(startShapeInitializer));
+    auto limit = static_cast<double>(GetFirstElement<T>(limitShapeInitializer));
+    auto delta = static_cast<double>(GetFirstElement<T>(deltaShapeInitializer));
+    if (delta == 0) {
        fail_shape_inference("delta in Range operator can not be zero!");
    }
    return static_cast<int64_t>(ceil((1.0 * (limit - start)) / delta));
--- a/onnxruntime/core/providers/cuda/gpu_data_transfer.h
+++ b/onnxruntime/core/providers/cuda/gpu_data_transfer.h
@ -22,6 +22,8 @@ class GPUDataTransfer : public IDataTransfer {

  bool CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const override;

+  // Dumpen MSVC warning about not fully overriding
+  using IDataTransfer::CopyTensor;
  common::Status CopyTensor(const Tensor& src, Tensor& dst, int exec_queue_id) const override;

  cudaStream_t GetStream(int queue_id) const {
--- a/onnxruntime/core/providers/cuda/tensor/resize_impl.cu
+++ b/onnxruntime/core/providers/cuda/tensor/resize_impl.cu
@ -461,9 +461,9 @@ void ResizeNearestImpl(
    float cubic_coeff_a,
    CudaFunctionOriginalCoordinate transform_coordinate,
    CudaFunctionNearestPixel calc_nearest_pixel,
-    int64_t* prefix_dim_sum,
+    int64_t* /* prefix_dim_sum */,
    NearestMappingInfo* dims_mapping) {
-  int blocksPerGrid = (int)(ceil(static_cast<float>(N) / GridDim::maxThreadsPerBlock));
+  int blocksPerGrid = static_cast<int>(ceil(static_cast<float>(N) / GridDim::maxThreadsPerBlock));

  bool could2d = rank >= 2 &&
                 transform_coordinate != GetDeviceOriginalCoordinateFunc(ResizeCoordinateTransformationMode::TF_CROP_AND_RESIZE) &&
@ -472,7 +472,7 @@ void ResizeNearestImpl(
    int64_t output_height = output_shape[rank - 2];
    int64_t output_width = output_shape[rank - 1];
    fast_divmod div_output_image = (rank > 2) ? output_div_pitches[rank - 3] : fast_divmod(output_height * output_width);
-    int blocksPerDimsMappingGrid = (int)(ceil((output_height + output_width) / 32.0));
+    int blocksPerDimsMappingGrid = static_cast<int>(ceil((output_height + output_width) / 32.0));

    _ResizeNearestMappingKernel2D<T><<<blocksPerDimsMappingGrid, 32, 0>>>(
        input_shape[rank - 2], input_shape[rank - 1],