mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-29 03:30:52 +00:00
* MKL-DNN EP memory fix patch * Call default provider for Opset10 * opset 10 fix * removed email header from patch * UseSubgraph method refactored
107 lines
5.1 KiB
Diff
107 lines
5.1 KiB
Diff
|
|
---
|
|
src/cpu/jit_avx2_1x1_convolution.cpp | 6 +++---
|
|
src/cpu/jit_avx512_common_1x1_convolution.cpp | 9 ++++-----
|
|
src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp | 6 ++++--
|
|
src/cpu/jit_uni_1x1_conv_utils.hpp | 3 ++-
|
|
4 files changed, 13 insertions(+), 11 deletions(-)
|
|
|
|
diff --git a/src/cpu/jit_avx2_1x1_convolution.cpp b/src/cpu/jit_avx2_1x1_convolution.cpp
|
|
index 46362886..edb2b6fb 100644
|
|
--- a/src/cpu/jit_avx2_1x1_convolution.cpp
|
|
+++ b/src/cpu/jit_avx2_1x1_convolution.cpp
|
|
@@ -50,7 +50,7 @@ void jit_avx2_1x1_convolution_fwd_t::execute_forward() const {
|
|
const memory_desc_wrapper weights_d(pd()->weights_pd(0));
|
|
|
|
const auto &jcp = kernel_->jcp;
|
|
- auto rtus_space = scratchpad().get<data_t>(key_conv_rtus_space);
|
|
+ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad().get<data_t>(key_conv_rtus_space):NULL;
|
|
|
|
const int work_amount = jcp.mb * jcp.ngroups * jcp.nb_bcast;
|
|
const int ndims = dst_d.ndims();
|
|
@@ -180,7 +180,7 @@ void jit_avx2_1x1_convolution_bwd_data_t::execute_backward_data() const {
|
|
const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
|
|
|
|
const auto &jcp = kernel_->jcp;
|
|
- auto rtus_space = scratchpad().get<data_t>(key_conv_rtus_space);
|
|
+ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad().get<data_t>(key_conv_rtus_space):NULL;
|
|
|
|
// TODO (Roma): remove this restriction
|
|
assert(jcp.stride_w == 1 && jcp.stride_h == 1);
|
|
@@ -306,7 +306,7 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() const {
|
|
const memory_desc_wrapper diff_bias_d(pd()->diff_weights_pd(1));
|
|
|
|
const auto &jcp = kernel_->jcp;
|
|
- auto rtus_space = scratchpad.get<data_t>(key_conv_rtus_space);
|
|
+ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get<data_t>(key_conv_rtus_space):NULL;
|
|
|
|
data_t *diff_bias = pd()->wants_padded_bias()
|
|
? scratchpad.get<data_t>(key_conv_padded_bias) : diff_bias_in;
|
|
diff --git a/src/cpu/jit_avx512_common_1x1_convolution.cpp b/src/cpu/jit_avx512_common_1x1_convolution.cpp
|
|
index 6879cd91..6a32aa49 100644
|
|
--- a/src/cpu/jit_avx512_common_1x1_convolution.cpp
|
|
+++ b/src/cpu/jit_avx512_common_1x1_convolution.cpp
|
|
@@ -106,7 +106,7 @@ execute_forward_thr(const int ithr, const int nthr, const src_data_t *src,
|
|
const memory_desc_wrapper weights_d(pd()->weights_pd(0));
|
|
|
|
const auto &jcp = kernel_->jcp;
|
|
- auto rtus_space = scratchpad.get<src_data_t>(key_conv_rtus_space);
|
|
+ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get<src_data_t>(key_conv_rtus_space):NULL;
|
|
|
|
const int ndims = src_d.ndims();
|
|
const int stride_h = (ndims == 3) ? 1 : pd()->desc()->strides[0];
|
|
@@ -301,9 +301,8 @@ void jit_avx512_common_1x1_convolution_bwd_data_t<diff_dst_type, wei_type,
|
|
const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
|
|
|
|
const auto &jcp = kernel_->jcp;
|
|
- auto rtus_space = scratchpad().template get<diff_src_data_t>(
|
|
- key_conv_rtus_space);
|
|
-
|
|
+ auto rtus_space = pd()->rtus_.reduce_src_? scratchpad().template get<diff_src_data_t>(key_conv_rtus_space): NULL;
|
|
+
|
|
const int ndims = diff_src_d.ndims();
|
|
|
|
// TODO (Roma): remove this restriction
|
|
@@ -470,7 +469,7 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
|
|
|
|
const auto scratchpad = this->scratchpad();
|
|
|
|
- auto rtus_space = scratchpad.get<data_t>(key_conv_rtus_space);
|
|
+ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get<data_t>(key_conv_rtus_space):NULL;
|
|
data_t *diff_bias = pd()->wants_padded_bias()
|
|
? scratchpad.get<data_t>(key_conv_padded_bias) : diff_bias_in;
|
|
auto wei_reduction = scratchpad.get<data_t>(key_conv_wei_reduction);
|
|
diff --git a/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp b/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
|
|
index de303cd2..ec0c54e7 100644
|
|
--- a/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
|
|
+++ b/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
|
|
@@ -100,8 +100,10 @@ void jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<src_type, dst_type>
|
|
? types::data_type_size(pd()->desc()->bias_desc.data_type) : 0;
|
|
|
|
const auto &jcp = kernel_->jcp;
|
|
- auto rtus_space = scratchpad.get<src_data_t>(key_conv_rtus_space);
|
|
- auto local_scales = scratchpad.get<float>(key_conv_adjusted_scales);
|
|
+
|
|
+ auto rtus_space = pd()->rtus_.reduce_src_?scratchpad.get<src_data_t>(key_conv_rtus_space):NULL;
|
|
+
|
|
+ auto local_scales = scratchpad.get<float>(key_conv_adjusted_scales);
|
|
|
|
const int work_amount = jcp.mb * jcp.ngroups * jcp.nb_bcast;
|
|
|
|
diff --git a/src/cpu/jit_uni_1x1_conv_utils.hpp b/src/cpu/jit_uni_1x1_conv_utils.hpp
|
|
index a3ed769a..5a0e0635 100644
|
|
--- a/src/cpu/jit_uni_1x1_conv_utils.hpp
|
|
+++ b/src/cpu/jit_uni_1x1_conv_utils.hpp
|
|
@@ -94,7 +94,8 @@ inline void rtus_prepare(conv_pd_t *self, const convolution_desc_t *&conv_d,
|
|
template <typename conv_pd_t>
|
|
inline void rtus_prepare_space_info(conv_pd_t *self,
|
|
memory_tracking::registrar_t &scratchpad) {
|
|
- const auto &jcp = self->jcp_;
|
|
+ if (!self->rtus_.reduce_src_) return;
|
|
+ const auto &jcp = self->jcp_;
|
|
|
|
const int max_threads = mkldnn_get_max_threads();
|
|
const size_t factor = utils::pick_by_prop_kind(self->desc()->prop_kind,
|
|
--
|
|
2.17.0.windows.1
|
|
|