Move constants from heap to stack to avoid randomness on cudnn function (#6869)

* move const from heap to stack

* add namespace

* add base prefix

* define local type
This commit is contained in:
RandySheriffH 2021-03-03 20:18:21 -08:00 committed by GitHub
parent ed1883a97c
commit d01006fc22
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 18 deletions

View file

@ -43,11 +43,12 @@ class FusedConv : public onnxruntime::cuda::Conv<T> {
}
bool has_z = nullptr != Base::s_.z_data;
bool has_b = nullptr != Base::s_.b_data;
auto alpha = &(Base::alpha_);
auto beta = &(Base::beta_);
typedef typename onnxruntime::cuda::ToCudaType<T>::MappedType CudaT;
const auto alpha = onnxruntime::cuda::Consts<CudaT>::One;
const auto beta = onnxruntime::cuda::Consts<CudaT>::Zero;
IAllocatorUniquePtr<void> workspace = Base::GetWorkSpace();
auto cudnn_status = cudnnConvolutionBiasActivationForward(Base::CudnnHandle(),
alpha,
&alpha,
Base::s_.x_tensor,
Base::s_.x_data,
Base::s_.w_desc,
@ -56,7 +57,7 @@ class FusedConv : public onnxruntime::cuda::Conv<T> {
Base::s_.algo,
workspace.get(),
Base::s_.workspace_bytes,
has_z ? alpha : beta,
has_z ? &alpha : &beta,
has_z ? Base::s_.z_tensor : Base::s_.y_tensor,
has_z ? Base::s_.z_data : Base::s_.y_data,
Base::s_.b_tensor,
@ -66,7 +67,7 @@ class FusedConv : public onnxruntime::cuda::Conv<T> {
Base::s_.y_data);
if (CUDNN_STATUS_SUCCESS != cudnn_status) {
CUDNN_RETURN_IF_ERROR(cudnnConvolutionForward(Base::CudnnHandle(),
alpha,
&alpha,
Base::s_.x_tensor,
Base::s_.x_data,
Base::s_.w_desc,
@ -75,19 +76,19 @@ class FusedConv : public onnxruntime::cuda::Conv<T> {
Base::s_.algo,
workspace.get(),
Base::s_.workspace_bytes,
beta,
&beta,
Base::s_.y_tensor,
Base::s_.y_data));
if (has_b) {
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), alpha, Base::s_.b_tensor, Base::s_.b_data,
alpha, Base::s_.y_tensor, Base::s_.y_data));
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), &alpha, Base::s_.b_tensor, Base::s_.b_data,
&alpha, Base::s_.y_tensor, Base::s_.y_data));
}
if (has_z) {
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), alpha, Base::s_.z_tensor, Base::s_.z_data,
alpha, Base::s_.y_tensor, Base::s_.y_data));
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), &alpha, Base::s_.z_tensor, Base::s_.z_data,
&alpha, Base::s_.y_tensor, Base::s_.y_data));
}
CUDNN_RETURN_IF_ERROR(cudnnActivationForward(Base::CudnnHandle(), activation_desc_, alpha, Base::s_.y_tensor,
Base::s_.y_data, beta, Base::s_.y_tensor, Base::s_.y_data));
CUDNN_RETURN_IF_ERROR(cudnnActivationForward(Base::CudnnHandle(), activation_desc_, &alpha, Base::s_.y_tensor,
Base::s_.y_data, &beta, Base::s_.y_tensor, Base::s_.y_data));
}
if (Base::s_.post_slicing_required) {
onnxruntime::cuda::SliceOutUnwantedOutputSection(this->Stream(), Base::s_.y_data, Base::s_.y_dims_with_adjusted_pads, Base::s_.Y->MutableDataRaw(),

View file

@ -286,9 +286,11 @@ Status Conv<T>::ComputeInternal(OpKernelContext* context) const {
if (s_.Y->Shape().Size() == 0) {
return Status::OK();
}
const auto alpha = Consts<CudaT>::One;
const auto beta = Consts<CudaT>::Zero;
IAllocatorUniquePtr<void> workspace = GetWorkSpace();
CUDNN_RETURN_IF_ERROR(cudnnConvolutionForward(CudnnHandle(),
&alpha_,
&alpha,
s_.x_tensor,
s_.x_data,
s_.w_desc,
@ -297,12 +299,12 @@ Status Conv<T>::ComputeInternal(OpKernelContext* context) const {
s_.algo,
workspace.get(),
s_.workspace_bytes,
&beta_,
&beta,
s_.y_tensor,
s_.y_data));
if (nullptr != s_.b_data) {
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(CudnnHandle(), &alpha_, s_.b_tensor, s_.b_data,
&alpha_, s_.y_tensor, s_.y_data));
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(CudnnHandle(), &alpha, s_.b_tensor, s_.b_data,
&alpha, s_.y_tensor, s_.y_data));
}
// To deal with asymmetric padding, we may have over-padded on one or both sides of the spatial dimensions
// This may have lead to extra results that are unnecessary and hence we slice that off here

View file

@ -181,8 +181,7 @@ class Conv : public CudaKernel {
inline IAllocatorUniquePtr<void> GetWorkSpace() const {
return GetScratchBuffer<void>(s_.workspace_bytes);
}
const CudaT alpha_ = Consts<CudaT>::One;
const CudaT beta_ = Consts<CudaT>::Zero;
Status UpdateState(OpKernelContext* context, bool bias_expected = false) const;
ConvAttributes conv_attrs_;
mutable CudnnConvState<cudnnConvolutionFwdAlgoPerf_t> s_;