onnxruntime/cmake/patches/ngraph/ngraph_fix_memory.patch
R. G. Esteves 93528d9b3c Reduce memory footprint of nGraph (#1296)
* Fix unnecessary memory allocation in MKLDNN 1x1 convolution.

* remove the patch header.
2019-07-07 20:23:19 -07:00

125 lines
6.1 KiB
Diff

cmake/external_mkldnn.cmake | 1 +
cmake/mkldnn_fix_memory.patch | 99 +++++++++++++++++++++++++++++++++++
2 files changed, 100 insertions(+)
create mode 100644 cmake/mkldnn_fix_memory.patch
diff --git a/cmake/external_mkldnn.cmake b/cmake/external_mkldnn.cmake
index 7874aca76..bbae6d1a4 100644
--- a/cmake/external_mkldnn.cmake
+++ b/cmake/external_mkldnn.cmake
@@ -194,6 +194,7 @@ if (WIN32)
CONFIGURE_COMMAND
PATCH_COMMAND ${MKLDNN_PATCH_REVERT_COMMAND}
COMMAND git apply --ignore-space-change --ignore-whitespace ${CMAKE_SOURCE_DIR}/cmake/${MKLDNN_PATCH_FILE}
+ COMMAND git apply --ignore-space-change --ignore-whitespace ${CMAKE_SOURCE_DIR}/cmake/mkldnn_fix_memory.patch
CMAKE_GENERATOR ${CMAKE_GENERATOR}
CMAKE_GENERATOR_PLATFORM ${CMAKE_GENERATOR_PLATFORM}
CMAKE_GENERATOR_TOOLSET ${CMAKE_GENERATOR_TOOLSET}
diff --git a/cmake/mkldnn_fix_memory.patch b/cmake/mkldnn_fix_memory.patch
new file mode 100644
index 000000000..ea1a3bd61
--- /dev/null
+++ b/cmake/mkldnn_fix_memory.patch
@@ -0,0 +1,99 @@
+ src/cpu/jit_avx2_1x1_convolution.cpp | 6 +++---
+ src/cpu/jit_avx512_common_1x1_convolution.cpp | 9 +++++----
+ src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp | 2 +-
+ src/cpu/jit_uni_1x1_conv_utils.hpp | 1 +
+ 4 files changed, 10 insertions(+), 8 deletions(-)
+
+diff --git a/src/cpu/jit_avx2_1x1_convolution.cpp b/src/cpu/jit_avx2_1x1_convolution.cpp
+index 46362886..edb2b6fb 100644
+--- a/src/cpu/jit_avx2_1x1_convolution.cpp
++++ b/src/cpu/jit_avx2_1x1_convolution.cpp
+@@ -50,7 +50,7 @@ void jit_avx2_1x1_convolution_fwd_t::execute_forward() const {
+ const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+ const auto &jcp = kernel_->jcp;
+- auto rtus_space = scratchpad().get<data_t>(key_conv_rtus_space);
++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad().get<data_t>(key_conv_rtus_space):NULL;
+
+ const int work_amount = jcp.mb * jcp.ngroups * jcp.nb_bcast;
+ const int ndims = dst_d.ndims();
+@@ -180,7 +180,7 @@ void jit_avx2_1x1_convolution_bwd_data_t::execute_backward_data() const {
+ const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+
+ const auto &jcp = kernel_->jcp;
+- auto rtus_space = scratchpad().get<data_t>(key_conv_rtus_space);
++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad().get<data_t>(key_conv_rtus_space):NULL;
+
+ // TODO (Roma): remove this restriction
+ assert(jcp.stride_w == 1 && jcp.stride_h == 1);
+@@ -306,7 +306,7 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() const {
+ const memory_desc_wrapper diff_bias_d(pd()->diff_weights_pd(1));
+
+ const auto &jcp = kernel_->jcp;
+- auto rtus_space = scratchpad.get<data_t>(key_conv_rtus_space);
++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get<data_t>(key_conv_rtus_space):NULL;
+
+ data_t *diff_bias = pd()->wants_padded_bias()
+ ? scratchpad.get<data_t>(key_conv_padded_bias) : diff_bias_in;
+diff --git a/src/cpu/jit_avx512_common_1x1_convolution.cpp b/src/cpu/jit_avx512_common_1x1_convolution.cpp
+index 6879cd91..47cea4f4 100644
+--- a/src/cpu/jit_avx512_common_1x1_convolution.cpp
++++ b/src/cpu/jit_avx512_common_1x1_convolution.cpp
+@@ -106,7 +106,7 @@ execute_forward_thr(const int ithr, const int nthr, const src_data_t *src,
+ const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+ const auto &jcp = kernel_->jcp;
+- auto rtus_space = scratchpad.get<src_data_t>(key_conv_rtus_space);
++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get<src_data_t>(key_conv_rtus_space):NULL;
+
+ const int ndims = src_d.ndims();
+ const int stride_h = (ndims == 3) ? 1 : pd()->desc()->strides[0];
+@@ -301,8 +301,9 @@ void jit_avx512_common_1x1_convolution_bwd_data_t<diff_dst_type, wei_type,
+ const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+
+ const auto &jcp = kernel_->jcp;
+- auto rtus_space = scratchpad().template get<diff_src_data_t>(
+- key_conv_rtus_space);
++ auto rtus_space = pd()->rtus_.reduce_src_
++ ? scratchpad().template get<diff_src_data_t>(key_conv_rtus_space)
++ : NULL;
+
+ const int ndims = diff_src_d.ndims();
+
+@@ -470,7 +471,7 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
+
+ const auto scratchpad = this->scratchpad();
+
+- auto rtus_space = scratchpad.get<data_t>(key_conv_rtus_space);
++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get<data_t>(key_conv_rtus_space):NULL;
+ data_t *diff_bias = pd()->wants_padded_bias()
+ ? scratchpad.get<data_t>(key_conv_padded_bias) : diff_bias_in;
+ auto wei_reduction = scratchpad.get<data_t>(key_conv_wei_reduction);
+diff --git a/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp b/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
+index de303cd2..8129f2b2 100644
+--- a/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
++++ b/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
+@@ -100,7 +100,7 @@ void jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<src_type, dst_type>
+ ? types::data_type_size(pd()->desc()->bias_desc.data_type) : 0;
+
+ const auto &jcp = kernel_->jcp;
+- auto rtus_space = scratchpad.get<src_data_t>(key_conv_rtus_space);
++ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get<src_data_t>(key_conv_rtus_space):NULL;
+ auto local_scales = scratchpad.get<float>(key_conv_adjusted_scales);
+
+ const int work_amount = jcp.mb * jcp.ngroups * jcp.nb_bcast;
+diff --git a/src/cpu/jit_uni_1x1_conv_utils.hpp b/src/cpu/jit_uni_1x1_conv_utils.hpp
+index a3ed769a..6d76ba56 100644
+--- a/src/cpu/jit_uni_1x1_conv_utils.hpp
++++ b/src/cpu/jit_uni_1x1_conv_utils.hpp
+@@ -94,6 +94,7 @@ inline void rtus_prepare(conv_pd_t *self, const convolution_desc_t *&conv_d,
+ template <typename conv_pd_t>
+ inline void rtus_prepare_space_info(conv_pd_t *self,
+ memory_tracking::registrar_t &scratchpad) {
++ if (!self->rtus_.reduce_src_) return;
+ const auto &jcp = self->jcp_;
+
+ const int max_threads = mkldnn_get_max_threads();
+--
+2.20.1.windows.1
+
--
2.20.1.windows.1