From d01006fc222799f879c8ce70edd58e97f53b7767 Mon Sep 17 00:00:00 2001 From: RandySheriffH <48490400+RandySheriffH@users.noreply.github.com> Date: Wed, 3 Mar 2021 20:18:21 -0800 Subject: [PATCH] Move constants from heap to stack to avoid randomness on cudnn function (#6869) * move const from heap to stack * add namespace * add base prefix * define local type --- onnxruntime/contrib_ops/cuda/fused_conv.cc | 25 +++++++++++----------- onnxruntime/core/providers/cuda/nn/conv.cc | 10 +++++---- onnxruntime/core/providers/cuda/nn/conv.h | 3 +-- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/onnxruntime/contrib_ops/cuda/fused_conv.cc b/onnxruntime/contrib_ops/cuda/fused_conv.cc index 6cce365871..a45e784d8d 100644 --- a/onnxruntime/contrib_ops/cuda/fused_conv.cc +++ b/onnxruntime/contrib_ops/cuda/fused_conv.cc @@ -43,11 +43,12 @@ class FusedConv : public onnxruntime::cuda::Conv { } bool has_z = nullptr != Base::s_.z_data; bool has_b = nullptr != Base::s_.b_data; - auto alpha = &(Base::alpha_); - auto beta = &(Base::beta_); + typedef typename onnxruntime::cuda::ToCudaType::MappedType CudaT; + const auto alpha = onnxruntime::cuda::Consts::One; + const auto beta = onnxruntime::cuda::Consts::Zero; IAllocatorUniquePtr workspace = Base::GetWorkSpace(); auto cudnn_status = cudnnConvolutionBiasActivationForward(Base::CudnnHandle(), - alpha, + &alpha, Base::s_.x_tensor, Base::s_.x_data, Base::s_.w_desc, @@ -56,7 +57,7 @@ class FusedConv : public onnxruntime::cuda::Conv { Base::s_.algo, workspace.get(), Base::s_.workspace_bytes, - has_z ? alpha : beta, + has_z ? &alpha : &beta, has_z ? Base::s_.z_tensor : Base::s_.y_tensor, has_z ? Base::s_.z_data : Base::s_.y_data, Base::s_.b_tensor, @@ -66,7 +67,7 @@ class FusedConv : public onnxruntime::cuda::Conv { Base::s_.y_data); if (CUDNN_STATUS_SUCCESS != cudnn_status) { CUDNN_RETURN_IF_ERROR(cudnnConvolutionForward(Base::CudnnHandle(), - alpha, + &alpha, Base::s_.x_tensor, Base::s_.x_data, Base::s_.w_desc, @@ -75,19 +76,19 @@ class FusedConv : public onnxruntime::cuda::Conv { Base::s_.algo, workspace.get(), Base::s_.workspace_bytes, - beta, + &beta, Base::s_.y_tensor, Base::s_.y_data)); if (has_b) { - CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), alpha, Base::s_.b_tensor, Base::s_.b_data, - alpha, Base::s_.y_tensor, Base::s_.y_data)); + CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), &alpha, Base::s_.b_tensor, Base::s_.b_data, + &alpha, Base::s_.y_tensor, Base::s_.y_data)); } if (has_z) { - CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), alpha, Base::s_.z_tensor, Base::s_.z_data, - alpha, Base::s_.y_tensor, Base::s_.y_data)); + CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), &alpha, Base::s_.z_tensor, Base::s_.z_data, + &alpha, Base::s_.y_tensor, Base::s_.y_data)); } - CUDNN_RETURN_IF_ERROR(cudnnActivationForward(Base::CudnnHandle(), activation_desc_, alpha, Base::s_.y_tensor, - Base::s_.y_data, beta, Base::s_.y_tensor, Base::s_.y_data)); + CUDNN_RETURN_IF_ERROR(cudnnActivationForward(Base::CudnnHandle(), activation_desc_, &alpha, Base::s_.y_tensor, + Base::s_.y_data, &beta, Base::s_.y_tensor, Base::s_.y_data)); } if (Base::s_.post_slicing_required) { onnxruntime::cuda::SliceOutUnwantedOutputSection(this->Stream(), Base::s_.y_data, Base::s_.y_dims_with_adjusted_pads, Base::s_.Y->MutableDataRaw(), diff --git a/onnxruntime/core/providers/cuda/nn/conv.cc b/onnxruntime/core/providers/cuda/nn/conv.cc index f61f93fab5..4f40a494d1 100644 --- a/onnxruntime/core/providers/cuda/nn/conv.cc +++ b/onnxruntime/core/providers/cuda/nn/conv.cc @@ -286,9 +286,11 @@ Status Conv::ComputeInternal(OpKernelContext* context) const { if (s_.Y->Shape().Size() == 0) { return Status::OK(); } + const auto alpha = Consts::One; + const auto beta = Consts::Zero; IAllocatorUniquePtr workspace = GetWorkSpace(); CUDNN_RETURN_IF_ERROR(cudnnConvolutionForward(CudnnHandle(), - &alpha_, + &alpha, s_.x_tensor, s_.x_data, s_.w_desc, @@ -297,12 +299,12 @@ Status Conv::ComputeInternal(OpKernelContext* context) const { s_.algo, workspace.get(), s_.workspace_bytes, - &beta_, + &beta, s_.y_tensor, s_.y_data)); if (nullptr != s_.b_data) { - CUDNN_RETURN_IF_ERROR(cudnnAddTensor(CudnnHandle(), &alpha_, s_.b_tensor, s_.b_data, - &alpha_, s_.y_tensor, s_.y_data)); + CUDNN_RETURN_IF_ERROR(cudnnAddTensor(CudnnHandle(), &alpha, s_.b_tensor, s_.b_data, + &alpha, s_.y_tensor, s_.y_data)); } // To deal with asymmetric padding, we may have over-padded on one or both sides of the spatial dimensions // This may have lead to extra results that are unnecessary and hence we slice that off here diff --git a/onnxruntime/core/providers/cuda/nn/conv.h b/onnxruntime/core/providers/cuda/nn/conv.h index 04f9865a1a..e562048eea 100644 --- a/onnxruntime/core/providers/cuda/nn/conv.h +++ b/onnxruntime/core/providers/cuda/nn/conv.h @@ -181,8 +181,7 @@ class Conv : public CudaKernel { inline IAllocatorUniquePtr GetWorkSpace() const { return GetScratchBuffer(s_.workspace_bytes); } - const CudaT alpha_ = Consts::One; - const CudaT beta_ = Consts::Zero; + Status UpdateState(OpKernelContext* context, bool bias_expected = false) const; ConvAttributes conv_attrs_; mutable CudnnConvState s_;