mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-02 23:39:58 +00:00
Move constants from heap to stack to avoid randomness on cudnn function (#6869)
* move const from heap to stack * add namespace * add base prefix * define local type
This commit is contained in:
parent
ed1883a97c
commit
d01006fc22
3 changed files with 20 additions and 18 deletions
|
|
@ -43,11 +43,12 @@ class FusedConv : public onnxruntime::cuda::Conv<T> {
|
|||
}
|
||||
bool has_z = nullptr != Base::s_.z_data;
|
||||
bool has_b = nullptr != Base::s_.b_data;
|
||||
auto alpha = &(Base::alpha_);
|
||||
auto beta = &(Base::beta_);
|
||||
typedef typename onnxruntime::cuda::ToCudaType<T>::MappedType CudaT;
|
||||
const auto alpha = onnxruntime::cuda::Consts<CudaT>::One;
|
||||
const auto beta = onnxruntime::cuda::Consts<CudaT>::Zero;
|
||||
IAllocatorUniquePtr<void> workspace = Base::GetWorkSpace();
|
||||
auto cudnn_status = cudnnConvolutionBiasActivationForward(Base::CudnnHandle(),
|
||||
alpha,
|
||||
&alpha,
|
||||
Base::s_.x_tensor,
|
||||
Base::s_.x_data,
|
||||
Base::s_.w_desc,
|
||||
|
|
@ -56,7 +57,7 @@ class FusedConv : public onnxruntime::cuda::Conv<T> {
|
|||
Base::s_.algo,
|
||||
workspace.get(),
|
||||
Base::s_.workspace_bytes,
|
||||
has_z ? alpha : beta,
|
||||
has_z ? &alpha : &beta,
|
||||
has_z ? Base::s_.z_tensor : Base::s_.y_tensor,
|
||||
has_z ? Base::s_.z_data : Base::s_.y_data,
|
||||
Base::s_.b_tensor,
|
||||
|
|
@ -66,7 +67,7 @@ class FusedConv : public onnxruntime::cuda::Conv<T> {
|
|||
Base::s_.y_data);
|
||||
if (CUDNN_STATUS_SUCCESS != cudnn_status) {
|
||||
CUDNN_RETURN_IF_ERROR(cudnnConvolutionForward(Base::CudnnHandle(),
|
||||
alpha,
|
||||
&alpha,
|
||||
Base::s_.x_tensor,
|
||||
Base::s_.x_data,
|
||||
Base::s_.w_desc,
|
||||
|
|
@ -75,19 +76,19 @@ class FusedConv : public onnxruntime::cuda::Conv<T> {
|
|||
Base::s_.algo,
|
||||
workspace.get(),
|
||||
Base::s_.workspace_bytes,
|
||||
beta,
|
||||
&beta,
|
||||
Base::s_.y_tensor,
|
||||
Base::s_.y_data));
|
||||
if (has_b) {
|
||||
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), alpha, Base::s_.b_tensor, Base::s_.b_data,
|
||||
alpha, Base::s_.y_tensor, Base::s_.y_data));
|
||||
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), &alpha, Base::s_.b_tensor, Base::s_.b_data,
|
||||
&alpha, Base::s_.y_tensor, Base::s_.y_data));
|
||||
}
|
||||
if (has_z) {
|
||||
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), alpha, Base::s_.z_tensor, Base::s_.z_data,
|
||||
alpha, Base::s_.y_tensor, Base::s_.y_data));
|
||||
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(Base::CudnnHandle(), &alpha, Base::s_.z_tensor, Base::s_.z_data,
|
||||
&alpha, Base::s_.y_tensor, Base::s_.y_data));
|
||||
}
|
||||
CUDNN_RETURN_IF_ERROR(cudnnActivationForward(Base::CudnnHandle(), activation_desc_, alpha, Base::s_.y_tensor,
|
||||
Base::s_.y_data, beta, Base::s_.y_tensor, Base::s_.y_data));
|
||||
CUDNN_RETURN_IF_ERROR(cudnnActivationForward(Base::CudnnHandle(), activation_desc_, &alpha, Base::s_.y_tensor,
|
||||
Base::s_.y_data, &beta, Base::s_.y_tensor, Base::s_.y_data));
|
||||
}
|
||||
if (Base::s_.post_slicing_required) {
|
||||
onnxruntime::cuda::SliceOutUnwantedOutputSection(this->Stream(), Base::s_.y_data, Base::s_.y_dims_with_adjusted_pads, Base::s_.Y->MutableDataRaw(),
|
||||
|
|
|
|||
|
|
@ -286,9 +286,11 @@ Status Conv<T>::ComputeInternal(OpKernelContext* context) const {
|
|||
if (s_.Y->Shape().Size() == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
const auto alpha = Consts<CudaT>::One;
|
||||
const auto beta = Consts<CudaT>::Zero;
|
||||
IAllocatorUniquePtr<void> workspace = GetWorkSpace();
|
||||
CUDNN_RETURN_IF_ERROR(cudnnConvolutionForward(CudnnHandle(),
|
||||
&alpha_,
|
||||
&alpha,
|
||||
s_.x_tensor,
|
||||
s_.x_data,
|
||||
s_.w_desc,
|
||||
|
|
@ -297,12 +299,12 @@ Status Conv<T>::ComputeInternal(OpKernelContext* context) const {
|
|||
s_.algo,
|
||||
workspace.get(),
|
||||
s_.workspace_bytes,
|
||||
&beta_,
|
||||
&beta,
|
||||
s_.y_tensor,
|
||||
s_.y_data));
|
||||
if (nullptr != s_.b_data) {
|
||||
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(CudnnHandle(), &alpha_, s_.b_tensor, s_.b_data,
|
||||
&alpha_, s_.y_tensor, s_.y_data));
|
||||
CUDNN_RETURN_IF_ERROR(cudnnAddTensor(CudnnHandle(), &alpha, s_.b_tensor, s_.b_data,
|
||||
&alpha, s_.y_tensor, s_.y_data));
|
||||
}
|
||||
// To deal with asymmetric padding, we may have over-padded on one or both sides of the spatial dimensions
|
||||
// This may have lead to extra results that are unnecessary and hence we slice that off here
|
||||
|
|
|
|||
|
|
@ -181,8 +181,7 @@ class Conv : public CudaKernel {
|
|||
inline IAllocatorUniquePtr<void> GetWorkSpace() const {
|
||||
return GetScratchBuffer<void>(s_.workspace_bytes);
|
||||
}
|
||||
const CudaT alpha_ = Consts<CudaT>::One;
|
||||
const CudaT beta_ = Consts<CudaT>::Zero;
|
||||
|
||||
Status UpdateState(OpKernelContext* context, bool bias_expected = false) const;
|
||||
ConvAttributes conv_attrs_;
|
||||
mutable CudnnConvState<cudnnConvolutionFwdAlgoPerf_t> s_;
|
||||
|
|
|
|||
Loading…
Reference in a new issue