From 01b45fd052d231c6bce3cdb0af6ffe7f205fe894 Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Wed, 2 Dec 2015 15:12:04 -0800 Subject: [PATCH] backward support to cudnn R2 for TensorFlow benchmark references --- caffe2/core/common_cudnn.h | 6 ++++-- caffe2/operators/conv_op_cudnn.cc | 32 ++++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/caffe2/core/common_cudnn.h b/caffe2/core/common_cudnn.h index f6d2cf63152..a4211ee7b70 100644 --- a/caffe2/core/common_cudnn.h +++ b/caffe2/core/common_cudnn.h @@ -13,8 +13,8 @@ #include "caffe2/proto/caffe2.pb.h" #include "caffe2/core/logging.h" -static_assert(CUDNN_VERSION >= 3000, - "Caffe2 requires cudnn version 3.0 or above."); +static_assert(CUDNN_VERSION >= 2000, + "Caffe2 requires cudnn version 2.0 or above."); namespace caffe2 { @@ -78,10 +78,12 @@ template<> class cudnnTypeWrapper { static const cudnnDataType_t type = CUDNN_DATA_DOUBLE; }; +#if CUDNN_VERSION >= 3000 template<> class cudnnTypeWrapper { public: static const cudnnDataType_t type = CUDNN_DATA_HALF; }; +#endif // CUDNN_VERSION >= 3000 /** * A wrapper function to convert the Caffe storage order to cudnn storage order diff --git a/caffe2/operators/conv_op_cudnn.cc b/caffe2/operators/conv_op_cudnn.cc index 0cf0f7349b2..425a40cad31 100644 --- a/caffe2/operators/conv_op_cudnn.cc +++ b/caffe2/operators/conv_op_cudnn.cc @@ -106,9 +106,10 @@ class CudnnConvGradientOp final : public CudnnConvOpBase { bool RunWithCudnnWorkspace(CuDNNWorkspaceWrapper* cudnn_ws_wrapper) override; private: +#if CUDNN_VERSION >= 3000 cudnnConvolutionBwdFilterAlgo_t bwd_filter_algo_; cudnnConvolutionBwdDataAlgo_t bwd_data_algo_; - +#endif // CUDNN_VERSION >= 3000 // input: X, W, dY // output: dW, db, and optionally dX INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD); @@ -216,9 +217,16 @@ bool CudnnConvOp::RunWithCudnnWorkspace( algo_, cudnn_ws_wrapper->Get(cudnn_ws_nbytes_), cudnn_ws_nbytes_, &kZero, top_desc_, Y->template mutable_data())); // Bias +#if CUDNN_VERSION >= 3000 CUDNN_CHECK(cudnnAddTensor_v3( - cudnn_wrapper_.cudnn_handle(), &kOne, bias_desc_, - bias.template data(), &kOne, top_desc_, Y->template mutable_data())); + cudnn_wrapper_.cudnn_handle(), &kOne, bias_desc_, + bias.template data(), &kOne, top_desc_, Y->template mutable_data())); +#else // CUDNN_VERSION >= 3000 + CUDNN_CHECK(cudnnAddTensor( + cudnn_wrapper_.cudnn_handle(), CUDNN_ADD_SAME_C, + &kOne, bias_desc_, bias.template data(), &kOne, top_desc_, + Y->template mutable_data())); +#endif // Done. return true; } @@ -296,6 +304,7 @@ bool CudnnConvGradientOp::RunWithCudnnWorkspace( size_t bwd_filter_ws_size, bwd_data_ws_size; +#if CUDNN_VERSION >= 3000 // choose backward algorithm for filter CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm( cudnn_wrapper_.cudnn_handle(), @@ -320,6 +329,7 @@ bool CudnnConvGradientOp::RunWithCudnnWorkspace( bwd_data_algo_, &bwd_data_ws_size)); cudnn_ws_nbytes_ = std::max(bwd_filter_ws_size, bwd_data_ws_size); CAFFE_VLOG(1) << "CuDNN workspace size: " << cudnn_ws_nbytes_; +#endif // CUDNN_VERSION >= 3000 } // Now, actually run the computation. @@ -328,30 +338,46 @@ bool CudnnConvGradientOp::RunWithCudnnWorkspace( CUDNN_CHECK(cudnnConvolutionBackwardBias( cudnn_wrapper_.cudnn_handle(), &kOne, top_desc_, dY.template data(), &kZero, bias_desc_, dbias->template mutable_data())); +#if CUDNN_VERSION >= 3000 CUDNN_CHECK(cudnnConvolutionBackwardFilter_v3( cudnn_wrapper_.cudnn_handle(), &kOne, bottom_desc_, X.template data(), top_desc_, dY.template data(), conv_desc_, bwd_filter_algo_, cudnn_ws_wrapper->Get(cudnn_ws_nbytes_), cudnn_ws_nbytes_, &kZero, filter_desc_, dfilter->template mutable_data())); +#else // CUDNN_VERSION >= 3000 + CUDNN_CHECK(cudnnConvolutionBackwardFilter( + cudnn_wrapper_.cudnn_handle(), &kOne, bottom_desc_, X.template data(), + top_desc_, dY.template data(), conv_desc_, + &kZero, filter_desc_, dfilter->template mutable_data())); +#endif // CUDNN_VERSION >= 3000 if (OutputSize() == 3) { // Compute the gradient w.r.t. the input. auto *dX = Output(INPUT_GRAD); dX->ReshapeLike(X); +#if CUDNN_VERSION >= 3000 CUDNN_CHECK(cudnnConvolutionBackwardData_v3( cudnn_wrapper_.cudnn_handle(), &kOne, filter_desc_, filter.template data(), top_desc_, dY.template data(), conv_desc_, bwd_data_algo_, cudnn_ws_wrapper->Get(cudnn_ws_nbytes_), cudnn_ws_nbytes_, &kZero, bottom_desc_, dX->template mutable_data())); +#else // CUDNN_VERSION >= 3000 + CUDNN_CHECK(cudnnConvolutionBackwardData( + cudnn_wrapper_.cudnn_handle(), &kOne, filter_desc_, + filter.template data(), top_desc_, dY.template data(), + conv_desc_, &kZero, bottom_desc_, dX->template mutable_data())); +#endif // CUDNN_VERSION >= 3000 } return true; } REGISTER_CUDNN_OPERATOR(Conv, CudnnConvOp) REGISTER_CUDNN_OPERATOR(ConvGradient, CudnnConvGradientOp) +#if CUDNN_VERSION >= 3000 REGISTER_CUDNN_OPERATOR(ConvFp16, CudnnConvOp) REGISTER_CUDNN_OPERATOR(ConvFp16Gradient, CudnnConvGradientOp) +#endif // CUDNN_VERSION >= 3000 } // namespace caffe2