Address prefast static analysis warnings (#12756)

This commit is contained in:
Baiju Meswani 2022-08-29 10:09:32 -07:00 committed by GitHub
parent 27304d9082
commit b83ea3c2ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 14 additions and 16 deletions

View file

@ -3494,7 +3494,7 @@ struct OrtApi {
*
* \since Version 1.13
*/
const OrtTrainingApi*(ORT_API_CALL* GetTrainingApi)(uint32_t version)NO_EXCEPTION ORT_ALL_ARGS_NONNULL;
const OrtTrainingApi*(ORT_API_CALL* GetTrainingApi)(uint32_t version) NO_EXCEPTION;
#ifdef __cplusplus
OrtApi(const OrtApi&)=delete; // Prevent users from accidentally copying the API structure, it should always be passed as a pointer

View file

@ -388,7 +388,6 @@ ORT_API_STATUS_IMPL(CopyKernelInfo, _In_ const OrtKernelInfo* info, _Outptr_ Ort
ORT_API(void, ReleaseKernelInfo, _Frees_ptr_opt_ OrtKernelInfo* info_copy);
_Check_return_ _Ret_maybenull_ const OrtTrainingApi* ORT_API_CALL GetTrainingApi(uint32_t version)
NO_EXCEPTION ORT_MUST_USE_RESULT;
ORT_API(const OrtTrainingApi*, GetTrainingApi, uint32_t version);
} // namespace OrtApis

View file

@ -24,7 +24,7 @@ enum class AdasumReductionType : int64_t {
};
// Data types to support for mixed precision training.
enum MixedPrecisionDataType {
enum class MixedPrecisionDataType {
FP16,
BF16,
};

View file

@ -274,7 +274,7 @@ Status GistPackMsfp15EncoderOp<T>::ComputeInternal(OpKernelContext* context) con
const size_t ndims = shape.NumDimensions();
const size_t pre_axis_size = shape.SizeToDimension(ndims - 1);
size_t axis_size = shape.SizeFromDimension(ndims - 1);
const size_t tile_size = 8;
constexpr size_t tile_size = 8;
if (axis_size % tile_size != 0)
axis_size = shape.SizeToDimension(ndims - 2);
@ -313,7 +313,7 @@ Status GistPackMsfp15DecoderOp<T>::ComputeInternal(OpKernelContext* context) con
const size_t ndims = shape.NumDimensions();
const size_t pre_axis_size = shape.SizeToDimension(ndims - 1);
size_t axis_size = shape.SizeFromDimension(ndims - 1);
const size_t tile_size = 8;
constexpr size_t tile_size = 8;
if (axis_size % tile_size != 0)
axis_size = shape.SizeToDimension(ndims - 2);
typedef typename ToCudaType<T>::MappedType CudaT;

View file

@ -28,7 +28,7 @@ OrtValue AllocateTensorInMLValue(const MLDataType data_type, const TensorShape&
kCudaExecutionProvider, \
(*KernelDefBuilder::Create()) \
.TypeConstraint("T", DataTypeImpl::GetTensorType<T>()) \
.TypeConstraint("Tind", DataTypeImpl::GetTensorType<Tin>()), \
.TypeConstraint("Tind", DataTypeImpl::GetTensorType<Tin>()), \
Class<T, Tin>);
#define REGISTER_KERNEL_TYPED_TWO_TYPES(Class, T, Tin, domain, version) \
@ -40,7 +40,7 @@ OrtValue AllocateTensorInMLValue(const MLDataType data_type, const TensorShape&
kCudaExecutionProvider, \
(*KernelDefBuilder::Create()) \
.TypeConstraint("T", DataTypeImpl::GetTensorType<T>()) \
.TypeConstraint("Tind", DataTypeImpl::GetTensorType<Tin>()), \
.TypeConstraint("Tind", DataTypeImpl::GetTensorType<Tin>()), \
Class<T, Tin>);
template <typename T, typename Tin>
@ -263,7 +263,7 @@ Status SoftmaxCrossEntropyLossGrad<T, Tin>::ComputeInternal(OpKernelContext* ctx
reduction_buffer.get(),
buffer_size));
} else {
const TBuf normalize_factor = static_cast<TBuf>(1.0f);
constexpr TBuf normalize_factor = static_cast<TBuf>(1.0f);
CUDA_RETURN_IF_ERROR(cudaMemcpyAsync(normalize_factor_data.get(), &normalize_factor, sizeof(TBuf), cudaMemcpyHostToDevice, Stream()));
}
@ -316,7 +316,7 @@ INSTANTIATE_COMPUTE_SPARSE(SoftmaxCrossEntropyLossGrad, BFloat16, int64_t, kMSDo
(*KernelDefBuilder::Create()) \
.InputMemoryType(OrtMemTypeCPUInput, CpuInputIndex) \
.TypeConstraint("T", DataTypeImpl::GetTensorType<T>()) \
.TypeConstraint("Tind", DataTypeImpl::GetTensorType<Tin>()) \
.TypeConstraint("Tind", DataTypeImpl::GetTensorType<Tin>()) \
.TypeConstraint("I", DataTypeImpl::GetTensorType<int64_t>()), \
ClassName<T, Tin>);

View file

@ -1,7 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "orttraining/training_ops/cuda/optimizer/lamb.h"
#include "orttraining/training_ops/cuda/optimizer/lamb_impl.h"
@ -574,9 +573,9 @@ Status LambOptimizer<T1, T2, T3, T4, T_GRAD_NORM, T_MIXED_PRECISION_FP>::Compute
size_t rbs = compute_reduction_buffer_size<CudaT2>(max_tensor_size);
// Enlarge reduction buffer to accomodate multi-tensor reduction kernel as well
const int tensor_group_size = 4; // w, d, w_norm, d_norm
const int max_blocks = ChunkGroup<tensor_group_size>::max_block_count;
const size_t multitensor_block_reduce_buffer_size = 2 * max_blocks * sizeof(CudaT2);
constexpr int tensor_group_size = 4; // w, d, w_norm, d_norm
constexpr int max_blocks = ChunkGroup<tensor_group_size>::max_block_count;
constexpr size_t multitensor_block_reduce_buffer_size = 2 * max_blocks * sizeof(CudaT2);
rbs = std::max(rbs, multitensor_block_reduce_buffer_size);
return rbs;

View file

@ -41,7 +41,7 @@ Status ConcatTraining::ComputeInternal(OpKernelContext* ctx) const {
TensorShapeVector axis_dimension_input_output_mapping(p.output_tensor->Shape()[p.axis]);
int index = 0;
for (int i = 0; i < input_count; ++i) {
auto input = p.inputs[i];
const auto& input = p.inputs[i];
concat_sizes[i] = input.tensor->Shape()[p.axis];
input_ptr_cpuspan[i] = input.tensor->DataRaw();
for (int j = 0; j < input.tensor->Shape()[p.axis]; ++j) {
@ -56,7 +56,7 @@ Status ConcatTraining::ComputeInternal(OpKernelContext* ctx) const {
auto element_bytes = p.output_tensor->DataType()->Size();
int block_size_inside_axis_dim = static_cast<int>(p.output_axis_pitch / p.output_tensor->Shape()[p.axis]);
int block_size_including_axis_dim = static_cast<int>(p.output_axis_pitch);
if (std::all_of(concat_sizes.begin(), concat_sizes.end(), [&] (int64_t i) {return i == concat_sizes[0];})) {
if (std::all_of(concat_sizes.begin(), concat_sizes.end(), [&](int64_t i) { return i == concat_sizes[0]; })) {
if (input_count <= 32) {
// pass by value to avoid host-to-device copy on same stream
TArray<const void*, 32> input_table(input_count);