From b83ea3c2ff305069db779c4b5f980cfd38915207 Mon Sep 17 00:00:00 2001 From: Baiju Meswani Date: Mon, 29 Aug 2022 10:09:32 -0700 Subject: [PATCH] Address prefast static analysis warnings (#12756) --- include/onnxruntime/core/session/onnxruntime_c_api.h | 2 +- onnxruntime/core/session/ort_apis.h | 3 +-- orttraining/orttraining/core/graph/optimizer_config.h | 2 +- orttraining/orttraining/training_ops/cuda/gist/gist.cc | 4 ++-- .../cuda/loss/softmax_cross_entropy_loss_impl.cc | 8 ++++---- .../orttraining/training_ops/cuda/optimizer/lamb.cc | 7 +++---- .../orttraining/training_ops/cuda/tensor/concat.cc | 4 ++-- 7 files changed, 14 insertions(+), 16 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index e9a04fc67f..625c42ed42 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -3494,7 +3494,7 @@ struct OrtApi { * * \since Version 1.13 */ - const OrtTrainingApi*(ORT_API_CALL* GetTrainingApi)(uint32_t version)NO_EXCEPTION ORT_ALL_ARGS_NONNULL; + const OrtTrainingApi*(ORT_API_CALL* GetTrainingApi)(uint32_t version) NO_EXCEPTION; #ifdef __cplusplus OrtApi(const OrtApi&)=delete; // Prevent users from accidentally copying the API structure, it should always be passed as a pointer diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index f1bd212097..5c36c97aa0 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -388,7 +388,6 @@ ORT_API_STATUS_IMPL(CopyKernelInfo, _In_ const OrtKernelInfo* info, _Outptr_ Ort ORT_API(void, ReleaseKernelInfo, _Frees_ptr_opt_ OrtKernelInfo* info_copy); -_Check_return_ _Ret_maybenull_ const OrtTrainingApi* ORT_API_CALL GetTrainingApi(uint32_t version) - NO_EXCEPTION ORT_MUST_USE_RESULT; +ORT_API(const OrtTrainingApi*, GetTrainingApi, uint32_t version); } // namespace OrtApis diff --git a/orttraining/orttraining/core/graph/optimizer_config.h b/orttraining/orttraining/core/graph/optimizer_config.h index 4dca9c2ce5..195a84e2d7 100644 --- a/orttraining/orttraining/core/graph/optimizer_config.h +++ b/orttraining/orttraining/core/graph/optimizer_config.h @@ -24,7 +24,7 @@ enum class AdasumReductionType : int64_t { }; // Data types to support for mixed precision training. -enum MixedPrecisionDataType { +enum class MixedPrecisionDataType { FP16, BF16, }; diff --git a/orttraining/orttraining/training_ops/cuda/gist/gist.cc b/orttraining/orttraining/training_ops/cuda/gist/gist.cc index 1591bb8f58..c4c9b7343f 100644 --- a/orttraining/orttraining/training_ops/cuda/gist/gist.cc +++ b/orttraining/orttraining/training_ops/cuda/gist/gist.cc @@ -274,7 +274,7 @@ Status GistPackMsfp15EncoderOp::ComputeInternal(OpKernelContext* context) con const size_t ndims = shape.NumDimensions(); const size_t pre_axis_size = shape.SizeToDimension(ndims - 1); size_t axis_size = shape.SizeFromDimension(ndims - 1); - const size_t tile_size = 8; + constexpr size_t tile_size = 8; if (axis_size % tile_size != 0) axis_size = shape.SizeToDimension(ndims - 2); @@ -313,7 +313,7 @@ Status GistPackMsfp15DecoderOp::ComputeInternal(OpKernelContext* context) con const size_t ndims = shape.NumDimensions(); const size_t pre_axis_size = shape.SizeToDimension(ndims - 1); size_t axis_size = shape.SizeFromDimension(ndims - 1); - const size_t tile_size = 8; + constexpr size_t tile_size = 8; if (axis_size % tile_size != 0) axis_size = shape.SizeToDimension(ndims - 2); typedef typename ToCudaType::MappedType CudaT; diff --git a/orttraining/orttraining/training_ops/cuda/loss/softmax_cross_entropy_loss_impl.cc b/orttraining/orttraining/training_ops/cuda/loss/softmax_cross_entropy_loss_impl.cc index 3f4f09058c..c4ec52e48a 100644 --- a/orttraining/orttraining/training_ops/cuda/loss/softmax_cross_entropy_loss_impl.cc +++ b/orttraining/orttraining/training_ops/cuda/loss/softmax_cross_entropy_loss_impl.cc @@ -28,7 +28,7 @@ OrtValue AllocateTensorInMLValue(const MLDataType data_type, const TensorShape& kCudaExecutionProvider, \ (*KernelDefBuilder::Create()) \ .TypeConstraint("T", DataTypeImpl::GetTensorType()) \ - .TypeConstraint("Tind", DataTypeImpl::GetTensorType()), \ + .TypeConstraint("Tind", DataTypeImpl::GetTensorType()), \ Class); #define REGISTER_KERNEL_TYPED_TWO_TYPES(Class, T, Tin, domain, version) \ @@ -40,7 +40,7 @@ OrtValue AllocateTensorInMLValue(const MLDataType data_type, const TensorShape& kCudaExecutionProvider, \ (*KernelDefBuilder::Create()) \ .TypeConstraint("T", DataTypeImpl::GetTensorType()) \ - .TypeConstraint("Tind", DataTypeImpl::GetTensorType()), \ + .TypeConstraint("Tind", DataTypeImpl::GetTensorType()), \ Class); template @@ -263,7 +263,7 @@ Status SoftmaxCrossEntropyLossGrad::ComputeInternal(OpKernelContext* ctx reduction_buffer.get(), buffer_size)); } else { - const TBuf normalize_factor = static_cast(1.0f); + constexpr TBuf normalize_factor = static_cast(1.0f); CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(normalize_factor_data.get(), &normalize_factor, sizeof(TBuf), cudaMemcpyHostToDevice, Stream())); } @@ -316,7 +316,7 @@ INSTANTIATE_COMPUTE_SPARSE(SoftmaxCrossEntropyLossGrad, BFloat16, int64_t, kMSDo (*KernelDefBuilder::Create()) \ .InputMemoryType(OrtMemTypeCPUInput, CpuInputIndex) \ .TypeConstraint("T", DataTypeImpl::GetTensorType()) \ - .TypeConstraint("Tind", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("Tind", DataTypeImpl::GetTensorType()) \ .TypeConstraint("I", DataTypeImpl::GetTensorType()), \ ClassName); diff --git a/orttraining/orttraining/training_ops/cuda/optimizer/lamb.cc b/orttraining/orttraining/training_ops/cuda/optimizer/lamb.cc index 205db1640f..b36c46d161 100644 --- a/orttraining/orttraining/training_ops/cuda/optimizer/lamb.cc +++ b/orttraining/orttraining/training_ops/cuda/optimizer/lamb.cc @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. - #include "orttraining/training_ops/cuda/optimizer/lamb.h" #include "orttraining/training_ops/cuda/optimizer/lamb_impl.h" @@ -574,9 +573,9 @@ Status LambOptimizer::Compute size_t rbs = compute_reduction_buffer_size(max_tensor_size); // Enlarge reduction buffer to accomodate multi-tensor reduction kernel as well - const int tensor_group_size = 4; // w, d, w_norm, d_norm - const int max_blocks = ChunkGroup::max_block_count; - const size_t multitensor_block_reduce_buffer_size = 2 * max_blocks * sizeof(CudaT2); + constexpr int tensor_group_size = 4; // w, d, w_norm, d_norm + constexpr int max_blocks = ChunkGroup::max_block_count; + constexpr size_t multitensor_block_reduce_buffer_size = 2 * max_blocks * sizeof(CudaT2); rbs = std::max(rbs, multitensor_block_reduce_buffer_size); return rbs; diff --git a/orttraining/orttraining/training_ops/cuda/tensor/concat.cc b/orttraining/orttraining/training_ops/cuda/tensor/concat.cc index 0352679ab5..a2d3c616cf 100644 --- a/orttraining/orttraining/training_ops/cuda/tensor/concat.cc +++ b/orttraining/orttraining/training_ops/cuda/tensor/concat.cc @@ -41,7 +41,7 @@ Status ConcatTraining::ComputeInternal(OpKernelContext* ctx) const { TensorShapeVector axis_dimension_input_output_mapping(p.output_tensor->Shape()[p.axis]); int index = 0; for (int i = 0; i < input_count; ++i) { - auto input = p.inputs[i]; + const auto& input = p.inputs[i]; concat_sizes[i] = input.tensor->Shape()[p.axis]; input_ptr_cpuspan[i] = input.tensor->DataRaw(); for (int j = 0; j < input.tensor->Shape()[p.axis]; ++j) { @@ -56,7 +56,7 @@ Status ConcatTraining::ComputeInternal(OpKernelContext* ctx) const { auto element_bytes = p.output_tensor->DataType()->Size(); int block_size_inside_axis_dim = static_cast(p.output_axis_pitch / p.output_tensor->Shape()[p.axis]); int block_size_including_axis_dim = static_cast(p.output_axis_pitch); - if (std::all_of(concat_sizes.begin(), concat_sizes.end(), [&] (int64_t i) {return i == concat_sizes[0];})) { + if (std::all_of(concat_sizes.begin(), concat_sizes.end(), [&](int64_t i) { return i == concat_sizes[0]; })) { if (input_count <= 32) { // pass by value to avoid host-to-device copy on same stream TArray input_table(input_count);