mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-22 22:01:08 +00:00
Fix some too popular warnings. (#3578)
Some pointless and noisy warnings either fixed or disabled.
This commit is contained in:
parent
d68245853e
commit
38a18023c7
7 changed files with 21 additions and 11 deletions
|
|
@ -583,6 +583,8 @@ if (WIN32)
|
|||
string(APPEND CMAKE_CXX_FLAGS " /wd4127")
|
||||
# class needs to have dll-interface to be used by clients
|
||||
string(APPEND CMAKE_CXX_FLAGS " /wd4251")
|
||||
# issued by thrust nonstandard extension used: nameless struct/union
|
||||
string(APPEND CMAKE_CXX_FLAGS " /wd4201")
|
||||
if (onnxruntime_ENABLE_STATIC_ANALYSIS)
|
||||
string(APPEND CMAKE_CXX_FLAGS
|
||||
" /analyze:stacksize 131072"
|
||||
|
|
@ -795,6 +797,10 @@ if (onnxruntime_USE_CUDA)
|
|||
if (NOT WIN32)
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --expt-relaxed-constexpr --compiler-options -fPIC")
|
||||
endif()
|
||||
# Options passed to cudafe
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=bad_friend_decl\"")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=unsigned_compare_with_zero\"")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe \"--diag_suppress=expr_has_no_effect\"")
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_TENSORRT)
|
||||
|
|
|
|||
|
|
@ -224,7 +224,7 @@ static ParallelForBlock CalculateParallelForBlock(const ptrdiff_t n, const Eigen
|
|||
// Calculate parallel efficiency as fraction of total CPU time used for
|
||||
// computations:
|
||||
double max_efficiency =
|
||||
static_cast<double>(block_count) / (Eigen::divup<int>(block_count, num_threads) * num_threads);
|
||||
static_cast<double>(block_count) / (Eigen::divup<ptrdiff_t>(block_count, num_threads) * num_threads);
|
||||
|
||||
// Now try to increase block size up to max_block_size as long as it
|
||||
// doesn't decrease parallel efficiency.
|
||||
|
|
@ -245,7 +245,7 @@ static ParallelForBlock CalculateParallelForBlock(const ptrdiff_t n, const Eigen
|
|||
assert(coarser_block_count < prev_block_count);
|
||||
prev_block_count = coarser_block_count;
|
||||
const double coarser_efficiency =
|
||||
static_cast<double>(coarser_block_count) / (Eigen::divup<int>(coarser_block_count, num_threads) * num_threads);
|
||||
static_cast<double>(coarser_block_count) / (Eigen::divup<ptrdiff_t>(coarser_block_count, num_threads) * num_threads);
|
||||
if (coarser_efficiency + 0.01 >= max_efficiency) {
|
||||
// Taking it.
|
||||
block_size = coarser_block_size;
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ class IDataTransfer {
|
|||
class CPUDataTransfer : public IDataTransfer {
|
||||
public:
|
||||
CPUDataTransfer() = default;
|
||||
// Dampen MSVC warning about not fully overriding CopyTensor
|
||||
using IDataTransfer::CopyTensor;
|
||||
bool CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const override;
|
||||
common::Status CopyTensor(const Tensor& src, Tensor& dst, int exec_queue_id) const override;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ void convTransposeWithDynamicPadsShapeInference(InferenceContext& ctx) {
|
|||
}
|
||||
|
||||
// first dim is the batch axis and the next is the number of channels.
|
||||
size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - 2);
|
||||
size_t n_input_dims = static_cast<size_t>(input_shape.dim_size() - size_t{2});
|
||||
|
||||
std::vector<int64_t> dilations;
|
||||
if (getRepeatedAttribute(ctx, "dilations", dilations)) {
|
||||
|
|
@ -2046,7 +2046,7 @@ Example 4:
|
|||
|
||||
// fill with zeros if needed to reach appropriate size
|
||||
if (pads_data.size() != 2 * static_cast<size_t>(input_rank))
|
||||
pads_data.resize(2 * input_rank, 0);
|
||||
pads_data.resize(size_t{2} * input_rank, 0);
|
||||
|
||||
const auto& output_shape =
|
||||
ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape();
|
||||
|
|
|
|||
|
|
@ -69,10 +69,10 @@ template <typename T>
|
|||
static int64_t CalcRangeDim(const TensorProto* startShapeInitializer,
|
||||
const TensorProto* limitShapeInitializer,
|
||||
const TensorProto* deltaShapeInitializer) {
|
||||
T start = GetFirstElement<T>(startShapeInitializer);
|
||||
T limit = GetFirstElement<T>(limitShapeInitializer);
|
||||
T delta = GetFirstElement<T>(deltaShapeInitializer);
|
||||
if (delta == T{0}) {
|
||||
auto start = static_cast<double>(GetFirstElement<T>(startShapeInitializer));
|
||||
auto limit = static_cast<double>(GetFirstElement<T>(limitShapeInitializer));
|
||||
auto delta = static_cast<double>(GetFirstElement<T>(deltaShapeInitializer));
|
||||
if (delta == 0) {
|
||||
fail_shape_inference("delta in Range operator can not be zero!");
|
||||
}
|
||||
return static_cast<int64_t>(ceil((1.0 * (limit - start)) / delta));
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ class GPUDataTransfer : public IDataTransfer {
|
|||
|
||||
bool CanCopy(const OrtDevice& src_device, const OrtDevice& dst_device) const override;
|
||||
|
||||
// Dumpen MSVC warning about not fully overriding
|
||||
using IDataTransfer::CopyTensor;
|
||||
common::Status CopyTensor(const Tensor& src, Tensor& dst, int exec_queue_id) const override;
|
||||
|
||||
cudaStream_t GetStream(int queue_id) const {
|
||||
|
|
|
|||
|
|
@ -461,9 +461,9 @@ void ResizeNearestImpl(
|
|||
float cubic_coeff_a,
|
||||
CudaFunctionOriginalCoordinate transform_coordinate,
|
||||
CudaFunctionNearestPixel calc_nearest_pixel,
|
||||
int64_t* prefix_dim_sum,
|
||||
int64_t* /* prefix_dim_sum */,
|
||||
NearestMappingInfo* dims_mapping) {
|
||||
int blocksPerGrid = (int)(ceil(static_cast<float>(N) / GridDim::maxThreadsPerBlock));
|
||||
int blocksPerGrid = static_cast<int>(ceil(static_cast<float>(N) / GridDim::maxThreadsPerBlock));
|
||||
|
||||
bool could2d = rank >= 2 &&
|
||||
transform_coordinate != GetDeviceOriginalCoordinateFunc(ResizeCoordinateTransformationMode::TF_CROP_AND_RESIZE) &&
|
||||
|
|
@ -472,7 +472,7 @@ void ResizeNearestImpl(
|
|||
int64_t output_height = output_shape[rank - 2];
|
||||
int64_t output_width = output_shape[rank - 1];
|
||||
fast_divmod div_output_image = (rank > 2) ? output_div_pitches[rank - 3] : fast_divmod(output_height * output_width);
|
||||
int blocksPerDimsMappingGrid = (int)(ceil((output_height + output_width) / 32.0));
|
||||
int blocksPerDimsMappingGrid = static_cast<int>(ceil((output_height + output_width) / 32.0));
|
||||
|
||||
_ResizeNearestMappingKernel2D<T><<<blocksPerDimsMappingGrid, 32, 0>>>(
|
||||
input_shape[rank - 2], input_shape[rank - 1],
|
||||
|
|
|
|||
Loading…
Reference in a new issue