cmake/external_mkldnn.cmake | 1 + cmake/mkldnn_fix_memory.patch | 99 +++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 cmake/mkldnn_fix_memory.patch diff --git a/cmake/external_mkldnn.cmake b/cmake/external_mkldnn.cmake index 7874aca76..bbae6d1a4 100644 --- a/cmake/external_mkldnn.cmake +++ b/cmake/external_mkldnn.cmake @@ -194,6 +194,7 @@ if (WIN32) CONFIGURE_COMMAND PATCH_COMMAND ${MKLDNN_PATCH_REVERT_COMMAND} COMMAND git apply --ignore-space-change --ignore-whitespace ${CMAKE_SOURCE_DIR}/cmake/${MKLDNN_PATCH_FILE} + COMMAND git apply --ignore-space-change --ignore-whitespace ${CMAKE_SOURCE_DIR}/cmake/mkldnn_fix_memory.patch CMAKE_GENERATOR ${CMAKE_GENERATOR} CMAKE_GENERATOR_PLATFORM ${CMAKE_GENERATOR_PLATFORM} CMAKE_GENERATOR_TOOLSET ${CMAKE_GENERATOR_TOOLSET} diff --git a/cmake/mkldnn_fix_memory.patch b/cmake/mkldnn_fix_memory.patch new file mode 100644 index 000000000..ea1a3bd61 --- /dev/null +++ b/cmake/mkldnn_fix_memory.patch @@ -0,0 +1,99 @@ + src/cpu/jit_avx2_1x1_convolution.cpp | 6 +++--- + src/cpu/jit_avx512_common_1x1_convolution.cpp | 9 +++++---- + src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp | 2 +- + src/cpu/jit_uni_1x1_conv_utils.hpp | 1 + + 4 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/src/cpu/jit_avx2_1x1_convolution.cpp b/src/cpu/jit_avx2_1x1_convolution.cpp +index 46362886..edb2b6fb 100644 +--- a/src/cpu/jit_avx2_1x1_convolution.cpp ++++ b/src/cpu/jit_avx2_1x1_convolution.cpp +@@ -50,7 +50,7 @@ void jit_avx2_1x1_convolution_fwd_t::execute_forward() const { + const memory_desc_wrapper weights_d(pd()->weights_pd(0)); + + const auto &jcp = kernel_->jcp; +- auto rtus_space = scratchpad().get(key_conv_rtus_space); ++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad().get(key_conv_rtus_space):NULL; + + const int work_amount = jcp.mb * jcp.ngroups * jcp.nb_bcast; + const int ndims = dst_d.ndims(); +@@ -180,7 +180,7 @@ void jit_avx2_1x1_convolution_bwd_data_t::execute_backward_data() const { + const memory_desc_wrapper diff_src_d(pd()->diff_src_pd()); + + const auto &jcp = kernel_->jcp; +- auto rtus_space = scratchpad().get(key_conv_rtus_space); ++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad().get(key_conv_rtus_space):NULL; + + // TODO (Roma): remove this restriction + assert(jcp.stride_w == 1 && jcp.stride_h == 1); +@@ -306,7 +306,7 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() const { + const memory_desc_wrapper diff_bias_d(pd()->diff_weights_pd(1)); + + const auto &jcp = kernel_->jcp; +- auto rtus_space = scratchpad.get(key_conv_rtus_space); ++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get(key_conv_rtus_space):NULL; + + data_t *diff_bias = pd()->wants_padded_bias() + ? scratchpad.get(key_conv_padded_bias) : diff_bias_in; +diff --git a/src/cpu/jit_avx512_common_1x1_convolution.cpp b/src/cpu/jit_avx512_common_1x1_convolution.cpp +index 6879cd91..47cea4f4 100644 +--- a/src/cpu/jit_avx512_common_1x1_convolution.cpp ++++ b/src/cpu/jit_avx512_common_1x1_convolution.cpp +@@ -106,7 +106,7 @@ execute_forward_thr(const int ithr, const int nthr, const src_data_t *src, + const memory_desc_wrapper weights_d(pd()->weights_pd(0)); + + const auto &jcp = kernel_->jcp; +- auto rtus_space = scratchpad.get(key_conv_rtus_space); ++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get(key_conv_rtus_space):NULL; + + const int ndims = src_d.ndims(); + const int stride_h = (ndims == 3) ? 1 : pd()->desc()->strides[0]; +@@ -301,8 +301,9 @@ void jit_avx512_common_1x1_convolution_bwd_data_tdiff_src_pd()); + + const auto &jcp = kernel_->jcp; +- auto rtus_space = scratchpad().template get( +- key_conv_rtus_space); ++ auto rtus_space = pd()->rtus_.reduce_src_ ++ ? scratchpad().template get(key_conv_rtus_space) ++ : NULL; + + const int ndims = diff_src_d.ndims(); + +@@ -470,7 +471,7 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights() + + const auto scratchpad = this->scratchpad(); + +- auto rtus_space = scratchpad.get(key_conv_rtus_space); ++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get(key_conv_rtus_space):NULL; + data_t *diff_bias = pd()->wants_padded_bias() + ? scratchpad.get(key_conv_padded_bias) : diff_bias_in; + auto wei_reduction = scratchpad.get(key_conv_wei_reduction); +diff --git a/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp b/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp +index de303cd2..8129f2b2 100644 +--- a/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp ++++ b/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp +@@ -100,7 +100,7 @@ void jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t + ? types::data_type_size(pd()->desc()->bias_desc.data_type) : 0; + + const auto &jcp = kernel_->jcp; +- auto rtus_space = scratchpad.get(key_conv_rtus_space); ++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get(key_conv_rtus_space):NULL; + auto local_scales = scratchpad.get(key_conv_adjusted_scales); + + const int work_amount = jcp.mb * jcp.ngroups * jcp.nb_bcast; +diff --git a/src/cpu/jit_uni_1x1_conv_utils.hpp b/src/cpu/jit_uni_1x1_conv_utils.hpp +index a3ed769a..6d76ba56 100644 +--- a/src/cpu/jit_uni_1x1_conv_utils.hpp ++++ b/src/cpu/jit_uni_1x1_conv_utils.hpp +@@ -94,6 +94,7 @@ inline void rtus_prepare(conv_pd_t *self, const convolution_desc_t *&conv_d, + template + inline void rtus_prepare_space_info(conv_pd_t *self, + memory_tracking::registrar_t &scratchpad) { ++ if (!self->rtus_.reduce_src_) return; + const auto &jcp = self->jcp_; + + const int max_threads = mkldnn_get_max_threads(); +-- +2.20.1.windows.1 + -- 2.20.1.windows.1