From 95fef56dc80806d4409f308639ff17e0fc44fc73 Mon Sep 17 00:00:00 2001 From: Sreekanth Yalachigere <17345104+sreekanth-yalachigere@users.noreply.github.com> Date: Wed, 16 Oct 2019 22:49:45 -0700 Subject: [PATCH] MKL-DNN EP (#2149) * make_unique, make_shared * make_unique, make_shared --- .../mkldnn/mkldnn_execution_provider.cc | 13 +-- .../mkldnn/subgraph/mkldnn_activations.h | 53 +++++---- .../mkldnn/subgraph/mkldnn_batchnorm.h | 68 ++++++------ .../providers/mkldnn/subgraph/mkldnn_conv.h | 102 ++++++++++-------- .../mkldnn/subgraph/mkldnn_conv_batchnorm.h | 73 +++++++------ .../mkldnn/subgraph/mkldnn_func_kernel.cc | 28 ++--- .../mkldnn/subgraph/mkldnn_kernel.cc | 6 +- .../providers/mkldnn/subgraph/mkldnn_lrn.h | 40 +++---- .../providers/mkldnn/subgraph/mkldnn_pool.h | 61 ++++++----- .../providers/mkldnn/subgraph/mkldnn_sum.h | 17 +-- 10 files changed, 246 insertions(+), 215 deletions(-) diff --git a/onnxruntime/core/providers/mkldnn/mkldnn_execution_provider.cc b/onnxruntime/core/providers/mkldnn/mkldnn_execution_provider.cc index cdbafe4fad..8f814eb4b6 100644 --- a/onnxruntime/core/providers/mkldnn/mkldnn_execution_provider.cc +++ b/onnxruntime/core/providers/mkldnn/mkldnn_execution_provider.cc @@ -202,7 +202,8 @@ std::vector> MKLDNNExecutionProvider::GetCapa // There are several identical graphs in Model zoo and only differ in // few attribute values. GetGraphName return graph-name + first-node-output name std::string graph_name = GetGraphName(graph_viewer); - subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name)); + subgraph_ptr = onnxruntime::make_unique( + mkl_dnn::Subgraph(graph_name)); // output name to node index map. Using it to find sub-graph end nodes // if output of a node is not an input to any node in a sub-graph is end node @@ -221,7 +222,7 @@ std::vector> MKLDNNExecutionProvider::GetCapa node_index++; if (subgraph_ptr->mkldnn_nodes.size() > 0) { CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result); - subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name)); + subgraph_ptr = std::make_shared(mkl_dnn::Subgraph(graph_name)); subgraph_attributes.clear(); output_to_source_node_map.clear(); } @@ -281,7 +282,7 @@ std::vector> MKLDNNExecutionProvider::GetCapa if (input_from_subgraph == false) { CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result); subgraph_attributes.clear(); - subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name)); + subgraph_ptr = std::make_shared(mkl_dnn::Subgraph(graph_name)); output_to_source_node_map.clear(); } } @@ -320,7 +321,7 @@ std::vector> MKLDNNExecutionProvider::GetCapa } if (create_subgraph) { CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result); - subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name)); + subgraph_ptr = std::make_shared(mkl_dnn::Subgraph(graph_name)); subgraph_attributes.clear(); output_to_source_node_map.clear(); } @@ -330,7 +331,7 @@ std::vector> MKLDNNExecutionProvider::GetCapa } else { if (!sub_var.subgraph_node_indexes.empty()) { CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result); - subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name)); + subgraph_ptr = std::make_shared(mkl_dnn::Subgraph(graph_name)); subgraph_attributes.clear(); output_to_source_node_map.clear(); } @@ -339,7 +340,7 @@ std::vector> MKLDNNExecutionProvider::GetCapa } // graph_viewer node iterator ends if (!sub_var.subgraph_node_indexes.empty()) { CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result); - subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name)); + subgraph_ptr = std::make_shared(mkl_dnn::Subgraph(graph_name)); subgraph_attributes.clear(); output_to_source_node_map.clear(); } diff --git a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_activations.h b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_activations.h index cf704f4cf2..8de044b9dc 100644 --- a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_activations.h +++ b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_activations.h @@ -27,7 +27,7 @@ class MklDnnRelu : public MklDnnKernel { OrtKernelContext* context, mkldnn::engine& cpu_engine, std::vector& net, - std::vector> &net_args) { + std::vector>& net_args) { Ort::CustomOpApi ort{*api}; int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index; @@ -45,12 +45,12 @@ class MklDnnRelu : public MklDnnKernel { ort_source_format_ = GetSourceFormat(static_cast(xdim)); - x_shape = TensorShape(xshape, xdim); + x_shape = TensorShape(xshape, xdim); - if (x_shape.NumDimensions() == 0) { - primitive_created_ = Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Shape of size zero " + x_shape.ToString()); - return primitive_created_; - } + if (x_shape.NumDimensions() == 0) { + primitive_created_ = Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Shape of size zero " + x_shape.ToString()); + return primitive_created_; + } mkldnn::memory::dims src_dims( x_shape.GetDims().begin(), x_shape.GetDims().end()); @@ -58,13 +58,13 @@ class MklDnnRelu : public MklDnnKernel { ort_source_desc_ = mkldnn::memory::desc( {src_dims}, MklDnnType(), ort_source_format_); source_desc_ = ort_source_desc_; - src_md_.reset(new mkldnn::memory::desc( - {src_dims}, MklDnnType(), ort_source_format_)); - src_mem_.reset( - new mkldnn::memory({{src_dims}, MklDnnType(), ort_source_format_}, cpu_engine, nullptr)); + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({src_dims}, MklDnnType(), ort_source_format_)); + src_mem_ = onnxruntime::make_unique( + mkldnn::memory({{src_dims}, MklDnnType(), ort_source_format_}, cpu_engine, nullptr)); } else { - src_md_.reset( - new mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_)); + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_)); src_mem_ = parents_[0].get()->primitive_dst_mem_; x_shape = parents_[0].get()->primitive_dst_shape_; ort_source_format_ = parents_[0].get()->ort_source_format_; @@ -76,13 +76,12 @@ class MklDnnRelu : public MklDnnKernel { mkldnn::memory::dims dst_dims_mkl(primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end()); mkldnn::algorithm algo = mkldnn::algorithm::eltwise_relu; - fwd_desc_.reset(new mkldnn::eltwise_forward::desc( - mkldnn::prop_kind::forward_inference, algo, *src_md_, 0)); + fwd_desc_ = onnxruntime::make_unique( + mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_inference, algo, *src_md_, 0)); + relu_fwd_pd_ = onnxruntime::make_unique( + mkldnn::eltwise_forward::primitive_desc(*fwd_desc_, cpu_engine)); - relu_fwd_pd_.reset(new mkldnn::eltwise_forward::primitive_desc( - *fwd_desc_, cpu_engine)); - - primitive_src_desc_ = relu_fwd_pd_.get()->src_desc(); + primitive_src_desc_ = relu_fwd_pd_.get()->src_desc(); primitive_dst_desc_ = relu_fwd_pd_.get()->dst_desc(); if (mklnode_ptr_->output_index >= 0) { @@ -90,24 +89,24 @@ class MklDnnRelu : public MklDnnKernel { if (primitive_dst_desc_ != ort_source_desc_) { // reorder neded. Use primitive output as input to reorder and // allocate buffer for reorder output, final output of this subgraph - primitive_dst_mem_.reset(new mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine)); + primitive_dst_mem_ = std::make_shared(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine)); } else { // Last node but re-order not needed. Allocate buffer to output of this node - primitive_dst_mem_.reset(new mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr)); + primitive_dst_mem_ = std::make_shared(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr)); } } else { // Intermediate node. Use mkldnn kernel internal memory for output and // use this as input to next node. - primitive_dst_mem_.reset(new mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine)); + primitive_dst_mem_ = std::make_shared(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine)); } - relu_fwd_.reset( - new mkldnn::eltwise_forward(*relu_fwd_pd_)); + relu_fwd_ = onnxruntime::make_unique( + mkldnn::eltwise_forward(*relu_fwd_pd_)); net.push_back(*relu_fwd_); - net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_}, - {MKLDNN_ARG_DST, *primitive_dst_mem_}}); + net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_}, + {MKLDNN_ARG_DST, *primitive_dst_mem_}}); if (mklnode_ptr_->output_index >= 0) { // one of the end nodes. Allocate output buffer memory and @@ -122,10 +121,10 @@ class MklDnnRelu : public MklDnnKernel { Status Bind(const OrtCustomOpApi* api, OrtKernelContext* context) override { Ort::CustomOpApi ort{*api}; - if (primitive_created_ != Status::OK()) + if (primitive_created_ != Status::OK()) return primitive_created_; - int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index; + int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index; if (mklnode_ptr_->parent_nodes.empty()) { // Sub-graph's first node. Read input from input buffer diff --git a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_batchnorm.h b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_batchnorm.h index cfa843c320..5af64356f8 100644 --- a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_batchnorm.h +++ b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_batchnorm.h @@ -124,11 +124,11 @@ class MklDnnBatchNorm : public MklDnnKernel { ort_source_desc_ = mkldnn::memory::desc( {src_dims}, MklDnnType(), ort_source_format_); source_desc_ = ort_source_desc_; - src_md_.reset(new mkldnn::memory::desc( - {src_dims}, MklDnnType(), ort_source_format_)); + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({src_dims}, MklDnnType(), ort_source_format_)); } else { - src_md_.reset( - new mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_)); + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_)); x_shape = parents_[0].get()->primitive_dst_shape_; ort_source_format_ = parents_[0].get()->ort_source_format_; ort_source_desc_ = parents_[0].get()->ort_source_desc_; @@ -195,33 +195,34 @@ class MklDnnBatchNorm : public MklDnnKernel { mkldnn::memory::dims dst_dims_mkl( primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end()); - scale_shift_md_.reset(new mkldnn::memory::desc( - {2, scale_dims_mkl[0]}, MklDnnType(), mkldnn::memory::format_tag::nc)); - mean_md_.reset(new mkldnn::memory::desc( - {mean_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::x)); - var_md_.reset(new mkldnn::memory::desc( - {var_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::x)); - primitive_dst_md_.reset(new mkldnn::memory::desc( - {dst_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + scale_shift_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({2, scale_dims_mkl[0]}, MklDnnType(), mkldnn::memory::format_tag::nc)); + mean_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({mean_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::x)); + var_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({var_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::x)); + primitive_dst_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({dst_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); // scale_shift_mem will allocate 2*C*sizeof(float) buffer // - scale_shift_mem_.reset( - new mkldnn::memory({*scale_shift_md_, cpu_engine})); + scale_shift_mem_ = onnxruntime::make_unique( + mkldnn::memory({*scale_shift_md_, cpu_engine})); - mean_mem_.reset( - new mkldnn::memory(*mean_md_, cpu_engine, nullptr)); - var_mem_.reset( - new mkldnn::memory(*var_md_, cpu_engine, nullptr)); + mean_mem_ = onnxruntime::make_unique( + mkldnn::memory(*mean_md_, cpu_engine, nullptr)); + var_mem_ = onnxruntime::make_unique( + mkldnn::memory(*var_md_, cpu_engine, nullptr)); - batchnorm_fwd_.reset(new mkldnn::batch_normalization_forward::desc( - mkldnn::prop_kind::forward_inference, *src_md_, epsilon_, - mkldnn::normalization_flags::use_scale_shift | - mkldnn::normalization_flags::use_global_stats)); + batchnorm_fwd_ = onnxruntime::make_unique( + mkldnn::batch_normalization_forward::desc( + mkldnn::prop_kind::forward_inference, *src_md_, epsilon_, + mkldnn::normalization_flags::use_scale_shift | + mkldnn::normalization_flags::use_global_stats)); if (fuse_relu_) { mkldnn::primitive_attr attr; - // attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest); + // attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest); // Execute RELU as Fuse PostOps const float ops_scale = 1.f; const float ops_alpha = 0.f; // relu negative slope @@ -230,11 +231,11 @@ class MklDnnBatchNorm : public MklDnnKernel { ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta); attr.set_post_ops(ops); - batchnorm_fwd_pd_.reset(new mkldnn::batch_normalization_forward::primitive_desc( - *batchnorm_fwd_, attr, cpu_engine)); + batchnorm_fwd_pd_ = onnxruntime::make_unique( + mkldnn::batch_normalization_forward::primitive_desc(*batchnorm_fwd_, attr, cpu_engine)); } else { - batchnorm_fwd_pd_.reset( - new mkldnn::batch_normalization_forward::primitive_desc( + batchnorm_fwd_pd_ = onnxruntime::make_unique( + mkldnn::batch_normalization_forward::primitive_desc( *batchnorm_fwd_, cpu_engine)); } @@ -245,8 +246,8 @@ class MklDnnBatchNorm : public MklDnnKernel { batchnorm_fwd_pd_.get()->dst_desc()); if (mklnode_ptr_->parent_nodes.empty()) { - src_mem_.reset( - new mkldnn::memory(batchnorm_fwd_pd_.get()->src_desc(), cpu_engine, nullptr)); + src_mem_ = onnxruntime::make_unique( + mkldnn::memory(batchnorm_fwd_pd_.get()->src_desc(), cpu_engine, nullptr)); } else { src_mem_ = parents_[0].get()->primitive_dst_mem_; } @@ -254,13 +255,16 @@ class MklDnnBatchNorm : public MklDnnKernel { if (mklnode_ptr_->output_index >= 0) { // Use mkldnn's internal output buffer if (primitive_dst_desc_ != ort_source_desc_) { - primitive_dst_mem_.reset(new mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine)); } else { - primitive_dst_mem_.reset(new mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine, nullptr)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine, nullptr)); } } else { // last node of sub-graph. need to allocate memory for output_tensor - primitive_dst_mem_.reset(new mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine)); } auto bn = mkldnn::batch_normalization_forward( *batchnorm_fwd_pd_); diff --git a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_conv.h b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_conv.h index 2dd02ec6ad..bf28c5349f 100644 --- a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_conv.h +++ b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_conv.h @@ -76,7 +76,7 @@ class MklDnnConv : public MklDnnKernel { std::vector& net, std::vector>& net_args) override { Ort::CustomOpApi ort{*api}; - stream_.reset(new mkldnn::stream(cpu_engine)); + stream_ = onnxruntime::make_unique(mkldnn::stream(cpu_engine)); int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index; const OrtValue* winput_tensor = ort.KernelContext_GetInput(context, input_index + 1); @@ -108,8 +108,7 @@ class MklDnnConv : public MklDnnKernel { ort_source_desc_ = parents_[0].get()->ort_source_desc_; source_desc_ = parents_[0].get()->primitive_dst_desc_; - mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end()); - src_md_.reset(new mkldnn::memory::desc(source_desc_)); + mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end()); } primitive_created_ = ValidateInputShape(x_shape, w_shape); @@ -165,8 +164,8 @@ class MklDnnConv : public MklDnnKernel { primitive_dst_shape_ = TensorShape(y_dims); TensorShape output_shape = y_shape.Slice(2); mkldnn::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end()); - primitive_dst_md_.reset(new mkldnn::memory::desc( - {dst_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + primitive_dst_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({dst_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); mkldnn::memory::dims filter_dims_mkl; if (group_mkl == 1) { @@ -221,36 +220,39 @@ class MklDnnConv : public MklDnnKernel { } } + mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end()); if (mklnode_ptr_->parent_nodes.empty()) { - mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end()); - - ort_source_format_ = src_format; - src_md_.reset(new mkldnn::memory::desc({src_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + ort_source_format_ = src_format; ort_source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType(), src_format); source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType(), src_format); } + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({src_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + // Set the memory descriptors to format::any to allow MKLDNN to decide what the optimal memory layout should be // for the computation given the input - filter_md_.reset(new mkldnn::memory::desc( - {filter_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + filter_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({filter_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); if (!bias_dims_mkl.empty()) - bias_md_.reset(new mkldnn::memory::desc( - {bias_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + bias_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({bias_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); - mkldnn::memory::dims conv_zero_padding = {0, 0}; + mkldnn::memory::dims conv_zero_padding = {0, 0}; if (!bias_dims_mkl.empty()) { - fwd_desc_.reset(new mkldnn::convolution_forward::desc( - mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_, - *filter_md_, *bias_md_, *primitive_dst_md_, - strides_mkl, dilations_mkl, padding_left_mkl, - padding_right_mkl)); + fwd_desc_ = onnxruntime::make_unique( + mkldnn::convolution_forward::desc( + mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_, + *filter_md_, *bias_md_, *primitive_dst_md_, + strides_mkl, dilations_mkl, padding_left_mkl, + padding_right_mkl)); } else { - fwd_desc_.reset(new mkldnn::convolution_forward::desc( - mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_, - *filter_md_, *primitive_dst_md_, strides_mkl, - dilations_mkl, padding_left_mkl, padding_right_mkl)); + fwd_desc_ = onnxruntime::make_unique( + mkldnn::convolution_forward::desc( + mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_, + *filter_md_, *primitive_dst_md_, strides_mkl, + dilations_mkl, padding_left_mkl, padding_right_mkl)); } if (fuse_relu_) { @@ -264,17 +266,17 @@ class MklDnnConv : public MklDnnKernel { ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta); attr.set_post_ops(ops); - conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc( - *fwd_desc_, attr, cpu_engine)); + conv_fwd_pd_ = onnxruntime::make_unique( + mkldnn::convolution_forward::primitive_desc(*fwd_desc_, attr, cpu_engine)); } else { - conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc( - *fwd_desc_, cpu_engine)); + conv_fwd_pd_ = onnxruntime::make_unique( + mkldnn::convolution_forward::primitive_desc(*fwd_desc_, cpu_engine)); } primitive_src_desc_ = static_cast( conv_fwd_pd_.get()->src_desc()); - filter_desc_= static_cast( + filter_desc_ = static_cast( conv_fwd_pd_.get()->weights_desc()); primitive_dst_desc_ = static_cast( @@ -284,25 +286,28 @@ class MklDnnConv : public MklDnnKernel { filter_size_ = conv_fwd_pd_.get()->weights_desc().get_size(); dst_size_ = conv_fwd_pd_.get()->dst_desc().get_size(); - filter_mem_.reset( - new mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr)); + filter_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr)); if (primitive_src_desc_ != source_desc_) { mkldnn::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end()); auto pd = mkldnn::memory::desc({{src_dims}, MklDnnType(), ort_source_format_}); if (mklnode_ptr_->parent_nodes.empty()) - src_mem_from_.reset(new mkldnn::memory(pd, cpu_engine, nullptr)); + src_mem_from_ = onnxruntime::make_unique( + mkldnn::memory(pd, cpu_engine, nullptr)); else src_mem_from_ = parents_[0].get()->primitive_dst_mem_; - src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr)); + src_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr)); net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_)); net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_}, {MKLDNN_ARG_TO, *src_mem_}}); } else { if (mklnode_ptr_->parent_nodes.empty()) { - src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr)); + src_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr)); } else { src_mem_ = parents_[0].get()->primitive_dst_mem_; } @@ -311,25 +316,31 @@ class MklDnnConv : public MklDnnKernel { if (mklnode_ptr_->output_index >= 0) { // Use mkldnn's internal output buffer if (primitive_dst_desc_ != ort_source_desc_) { - primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine)); } else { - primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr)); } } else { // last node of sub-graph. need to allocate memory for output_tensor - primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine)); } if (!bias_dims_mkl.empty()) { - bias_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr)); - conv_fwd_.reset(new mkldnn::convolution_forward(*conv_fwd_pd_)); + bias_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr)); + conv_fwd_ = onnxruntime::make_unique( + mkldnn::convolution_forward(*conv_fwd_pd_)); net.push_back(*conv_fwd_); net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_}, {MKLDNN_ARG_WEIGHTS, *filter_mem_}, {MKLDNN_ARG_BIAS, *bias_mem_}, {MKLDNN_ARG_DST, *primitive_dst_mem_}}); } else { - conv_fwd_.reset(new mkldnn::convolution_forward(*conv_fwd_pd_)); + conv_fwd_ = onnxruntime::make_unique( + mkldnn::convolution_forward(*conv_fwd_pd_)); net.push_back(*conv_fwd_); net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_}, {MKLDNN_ARG_WEIGHTS, *filter_mem_}, @@ -379,13 +390,13 @@ class MklDnnConv : public MklDnnKernel { mkldnn::memory src = mkldnn::memory({{filter_dims_mkl}, MklDnnType(), filter_format_}, cpu_engine, (void*)filter_data); IAllocatorUniquePtr filter_reorder_buffer = IAllocator::MakeUniquePtr(alloc_, filter_size_); - filter_dst_mem.reset( - new mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get())); - - mkldnn::reorder(src, *filter_dst_mem) + filter_dst_mem = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get())); + + mkldnn::reorder(src, *filter_dst_mem) .execute(cpu_engine, src, *filter_dst_mem); - provider_->SaveAllocatedMemory(std::move(filter_reorder_buffer)); + provider_->SaveAllocatedMemory(std::move(filter_reorder_buffer)); filter_data = static_cast(filter_dst_mem->get_data_handle()); provider_->SetWeightsMemoryBuffer(mklnode_ptr_->weight_name, filter_dst_mem); } @@ -522,7 +533,7 @@ class MklDnnConv : public MklDnnKernel { } private: - mkldnn::memory::desc filter_desc_; + mkldnn::memory::desc filter_desc_; mkldnn::memory::format_tag filter_format_; std::shared_ptr src_mem_from_; @@ -631,7 +642,6 @@ class MklDnnConv : public MklDnnKernel { } private: - std::unique_ptr stream_; std::vector kernel_shape_; // must use ComputeKernelShape(...), instead of kernel_shape_ AutoPadType auto_pad_; diff --git a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_conv_batchnorm.h b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_conv_batchnorm.h index eb18c12f7e..c7d4a89f83 100644 --- a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_conv_batchnorm.h +++ b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_conv_batchnorm.h @@ -29,7 +29,7 @@ class MklDnnConvBatchNorm : public MklDnnKernel { std::vector& net, std::vector>& net_args) override { Ort::CustomOpApi ort{*api}; - stream_.reset(new mkldnn::stream(cpu_engine)); + stream_ = onnxruntime::make_unique(mkldnn::stream(cpu_engine)); int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index; const OrtValue* winput_tensor = ort.KernelContext_GetInput(context, input_index + 1); auto wtensor_info = ort.GetTensorTypeAndShape(winput_tensor); @@ -114,8 +114,8 @@ class MklDnnConvBatchNorm : public MklDnnKernel { primitive_dst_shape_ = TensorShape(y_dims); TensorShape output_shape = y_shape.Slice(2); mkldnn::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end()); - primitive_dst_md_.reset(new mkldnn::memory::desc( - {dst_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + primitive_dst_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({dst_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); mkldnn::memory::dims filter_dims_mkl; if (group_mkl == 1) { @@ -188,23 +188,24 @@ class MklDnnConvBatchNorm : public MklDnnKernel { source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType(), src_format); } - src_md_.reset(new mkldnn::memory::desc({src_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({src_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); // Set the memory descriptors to format::any to allow MKLDNN to decide what the optimal memory layout should be // for the computation given the input - filter_md_.reset(new mkldnn::memory::desc( - {filter_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); - bias_md_.reset(new mkldnn::memory::desc( - {bias_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + filter_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({filter_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + bias_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({bias_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); mkldnn::memory::dims conv_zero_padding = {0, 0}; - fwd_desc_.reset(new mkldnn::convolution_forward::desc( - mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_, - *filter_md_, *bias_md_, *primitive_dst_md_, - strides_mkl, dilations_mkl, padding_left_mkl, - padding_right_mkl)); - + fwd_desc_ = onnxruntime::make_unique( + mkldnn::convolution_forward::desc( + mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_, + *filter_md_, *bias_md_, *primitive_dst_md_, + strides_mkl, dilations_mkl, padding_left_mkl, + padding_right_mkl)); if (fuse_relu_) { mkldnn::primitive_attr attr; @@ -217,11 +218,11 @@ class MklDnnConvBatchNorm : public MklDnnKernel { ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta); attr.set_post_ops(ops); - conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc( - *fwd_desc_, attr, cpu_engine)); + conv_fwd_pd_ = onnxruntime::make_unique( + mkldnn::convolution_forward::primitive_desc(*fwd_desc_, attr, cpu_engine)); } else { - conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc( - *fwd_desc_, cpu_engine)); + conv_fwd_pd_ = onnxruntime::make_unique( + mkldnn::convolution_forward::primitive_desc(*fwd_desc_, cpu_engine)); } primitive_src_desc_ = static_cast( @@ -237,25 +238,28 @@ class MklDnnConvBatchNorm : public MklDnnKernel { filter_size_ = conv_fwd_pd_.get()->weights_desc().get_size(); dst_size_ = conv_fwd_pd_.get()->dst_desc().get_size(); - filter_mem_.reset( - new mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr)); + filter_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr)); if (primitive_src_desc_ != source_desc_) { mkldnn::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end()); auto pd = mkldnn::memory::desc({{src_dims}, MklDnnType(), ort_source_format_}); if (mklnode_ptr_->parent_nodes.empty()) - src_mem_from_.reset(new mkldnn::memory(pd, cpu_engine, nullptr)); + src_mem_from_ = onnxruntime::make_unique( + mkldnn::memory(pd, cpu_engine, nullptr)); else src_mem_from_ = parents_[0].get()->primitive_dst_mem_; - src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr)); + src_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr)); net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_)); net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_}, {MKLDNN_ARG_TO, *src_mem_}}); } else { if (mklnode_ptr_->parent_nodes.empty()) { - src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr)); + src_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr)); } else { src_mem_ = parents_[0].get()->primitive_dst_mem_; } @@ -264,17 +268,22 @@ class MklDnnConvBatchNorm : public MklDnnKernel { if (mklnode_ptr_->output_index >= 0) { // Use mkldnn's internal output buffer if (primitive_dst_desc_ != ort_source_desc_) { - primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine)); } else { - primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr)); } } else { // last node of sub-graph. need to allocate memory for output_tensor - primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine)); } - bias_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr)); - conv_fwd_.reset(new mkldnn::convolution_forward(*conv_fwd_pd_)); + bias_mem_ = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr)); + conv_fwd_ = onnxruntime::make_unique( + mkldnn::convolution_forward(*conv_fwd_pd_)); net.push_back(*conv_fwd_); net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_}, {MKLDNN_ARG_WEIGHTS, *filter_mem_}, @@ -389,8 +398,8 @@ class MklDnnConvBatchNorm : public MklDnnKernel { mkldnn::memory src = mkldnn::memory({{filter_dims_mkl}, MklDnnType(), filter_format_}, cpu_engine, (void*)weights_scaled_by_axis.data()); IAllocatorUniquePtr filter_reorder_buffer = IAllocator::MakeUniquePtr(alloc_, filter_size_); - filter_dst_mem.reset( - new mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get())); + filter_dst_mem = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get())); mkldnn::reorder(src, *filter_dst_mem) .execute(cpu_engine, src, *filter_dst_mem); @@ -404,8 +413,8 @@ class MklDnnConvBatchNorm : public MklDnnKernel { auto bias_size = conv_fwd_pd_.get()->bias_desc().get_size(); IAllocatorUniquePtr bias_buffer = IAllocator::MakeUniquePtr(alloc_, bias_size); - bias_mem.reset( - new mkldnn::memory(conv_fwd_pd_->bias_desc(), cpu_engine, bias_buffer.get())); + bias_mem = onnxruntime::make_unique( + mkldnn::memory(conv_fwd_pd_->bias_desc(), cpu_engine, bias_buffer.get())); float* bias_buffer_data = static_cast(bias_buffer.get()); if (mklnode_ptr_->num_inputs == 7) { const OrtValue* conv_bias_tensor = ort.KernelContext_GetInput(context, input_index + 2); diff --git a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_func_kernel.cc b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_func_kernel.cc index f1f1a97f21..0bf8d4527f 100644 --- a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_func_kernel.cc +++ b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_func_kernel.cc @@ -28,7 +28,7 @@ class SubgraphPrimitive : public PrimitiveBase { OrtKernelContext* context, const SubgraphParams& params) : cpu_engine_(GetEngine()) { - context_.stream.reset(new mkldnn::stream(cpu_engine_)); + context_.stream = onnxruntime::make_unique(mkldnn::stream(cpu_engine_)); if (context_.net.size() == 0) { CreateKernels(params); @@ -67,7 +67,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "Conv-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnConv(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); } @@ -76,7 +76,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "Conv-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnConv(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); kernel->fuse_relu_ = true; for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); @@ -86,7 +86,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "Relu-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnRelu(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); } @@ -95,7 +95,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "BatchNormalization-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnBatchNorm(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); } @@ -104,7 +104,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "BatchNormalization-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnBatchNorm(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); kernel->fuse_relu_ = true; for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); @@ -114,7 +114,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "Conv-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnConvBatchNorm(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); } @@ -123,7 +123,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "Conv-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnConvBatchNorm(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); kernel->fuse_relu_ = true; for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); @@ -133,7 +133,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "MaxPool-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnPool(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); } @@ -142,7 +142,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "GlobalMaxPool-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnPool(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); } @@ -151,7 +151,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "AveragePool-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnPool(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); } @@ -160,7 +160,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "GlobalAveragePool-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnPool(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); } @@ -169,7 +169,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "LRN-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnLrn(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); } @@ -178,7 +178,7 @@ class SubgraphPrimitive : public PrimitiveBase { std::ostringstream os; os << "Sum-" << mkldnn_node.node_index << "-"; std::shared_ptr> kernel; - kernel.reset(new MklDnnSum(mkldnn_node, params.provider, params.attributes, os.str())); + kernel = std::make_shared>(mkldnn_node, params.provider, params.attributes, os.str()); for (auto index : mkldnn_node.parent_nodes) { kernel->parents_.push_back(context_.kernels[index]); } diff --git a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_kernel.cc b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_kernel.cc index 05533beceb..7222fed5b9 100644 --- a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_kernel.cc +++ b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_kernel.cc @@ -11,14 +11,14 @@ void MklDnnKernel::InitDstReorderOutput(mkldnn::engine& cpu_engine, std::vector& net, std::vector>& net_args) { // Allocate dst buffer if reorder is necessary - if (primitive_dst_desc_ != ort_source_desc_) - { + if (primitive_dst_desc_ != ort_source_desc_) { // reorder to ONNXRuntime format mkldnn::memory::dims dst_dims_mkl( primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end()); mkldnn::memory::desc dst_des = mkldnn::memory::desc(dst_dims_mkl, data_type, ort_source_format_); - reorder_dst_mem_to_.reset(new mkldnn::memory(dst_des, cpu_engine)); + reorder_dst_mem_to_ = onnxruntime::make_unique( + mkldnn::memory(dst_des, cpu_engine)); net.push_back(mkldnn::reorder(*primitive_dst_mem_, *reorder_dst_mem_to_)); net_args.push_back({{MKLDNN_ARG_FROM, *primitive_dst_mem_}, {MKLDNN_ARG_TO, *reorder_dst_mem_to_}}); diff --git a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_lrn.h b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_lrn.h index 2b03717c14..c574290c59 100644 --- a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_lrn.h +++ b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_lrn.h @@ -47,13 +47,13 @@ class MklDnnLrn : public MklDnnKernel { ort_source_desc_ = mkldnn::memory::desc( {src_dims}, MklDnnType(), ort_source_format_); - src_md_.reset(new mkldnn::memory::desc( - {src_dims}, MklDnnType(), ort_source_format_)); - src_mem_.reset( - new mkldnn::memory(*src_md_, cpu_engine, nullptr)); + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({src_dims}, MklDnnType(), ort_source_format_)); + src_mem_ = onnxruntime::make_unique( + mkldnn::memory(*src_md_, cpu_engine, nullptr)); } else { - src_md_.reset( - new mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_)); + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_)); src_mem_ = parents_[0].get()->primitive_dst_mem_; x_shape = parents_[0].get()->primitive_dst_shape_; ort_source_format_ = parents_[0].get()->ort_source_format_; @@ -64,12 +64,12 @@ class MklDnnLrn : public MklDnnKernel { primitive_dst_shape_ = TensorShape(x_shape); mkldnn::algorithm algo = mkldnn::algorithm::lrn_across_channels; - fwd_desc_.reset(new mkldnn::lrn_forward::desc( - mkldnn::prop_kind::forward_scoring, algo, *src_md_, - size_, alpha_, beta_, bias_)); + fwd_desc_ = onnxruntime::make_unique( + mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, algo, *src_md_, + size_, alpha_, beta_, bias_)); - fwd_primitive_desc_.reset(new mkldnn::lrn_forward::primitive_desc( - *fwd_desc_, cpu_engine)); + fwd_primitive_desc_ = onnxruntime::make_unique( + mkldnn::lrn_forward::primitive_desc(*fwd_desc_, cpu_engine)); primitive_src_desc_ = fwd_primitive_desc_.get()->src_desc(); primitive_dst_desc_ = fwd_primitive_desc_.get()->dst_desc(); @@ -79,22 +79,22 @@ class MklDnnLrn : public MklDnnKernel { if (primitive_dst_desc_ != ort_source_desc_) { // reorder neded. Use primitive output as input to reorder and // allocate buffer for reorder output, final output of this subgraph - primitive_dst_mem_.reset( - new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine)); } else { // Last node but re-order not needed. Allocate buffer to output of this node - primitive_dst_mem_.reset( - new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr)); } } else { // Intermediate node. Use mkldnn kernel internal memory for output and // use this as input to next node. - primitive_dst_mem_.reset( - new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine)); } - lrn_fwd_.reset( - new mkldnn::lrn_forward(*fwd_primitive_desc_)); + lrn_fwd_ = onnxruntime::make_unique( + mkldnn::lrn_forward(*fwd_primitive_desc_)); net.push_back(*lrn_fwd_); net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_}, {MKLDNN_ARG_DST, *primitive_dst_mem_}}); @@ -126,7 +126,7 @@ class MklDnnLrn : public MklDnnKernel { OrtValue* output = ort.KernelContext_GetOutput(context, mklnode_ptr_->output_index, &y_dims[0], static_cast(primitive_dst_shape_.GetDims().size())); T* dst_data = ort.GetTensorMutableData(output); - if (primitive_dst_desc_!= ort_source_desc_) { + if (primitive_dst_desc_ != ort_source_desc_) { reorder_dst_mem_to_->set_data_handle(dst_data); } else { primitive_dst_mem_->set_data_handle(dst_data); diff --git a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_pool.h b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_pool.h index 268c108f3c..9184f4f3f7 100644 --- a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_pool.h +++ b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_pool.h @@ -48,7 +48,8 @@ class MklDnnPool : public MklDnnKernel { // reorder for better performance mkldnn::memory::format_tag src_format = GetAVXFormat(src_dims_mkl); - src_md_.reset(new mkldnn::memory::desc({src_dims_mkl}, MklDnnType(), src_format)); + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({src_dims_mkl}, MklDnnType(), src_format)); } else { // get the output of previous node (mkldnn block propagation). // TODO Sourcenode will set src of this node. @@ -63,10 +64,11 @@ class MklDnnPool : public MklDnnKernel { if (source_desc_ == ort_source_desc_) { // reorder for better performance mkldnn::memory::format_tag fmt = GetAVXFormat(src_dims_mkl); - src_md_.reset(new mkldnn::memory::desc( - {src_dims_mkl}, MklDnnType(), fmt)); + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({src_dims_mkl}, MklDnnType(), fmt)); } else { - src_md_.reset(new mkldnn::memory::desc(parents_[0].get()->primitive_dst_mem_->get_desc())); + src_md_ = onnxruntime::make_unique( + mkldnn::memory::desc(parents_[0].get()->primitive_dst_mem_->get_desc())); } } @@ -95,8 +97,8 @@ class MklDnnPool : public MklDnnKernel { mkldnn::memory::dims padding_left_mkl(pads_.begin(), pads_.begin() + (pads_.size() / 2)); mkldnn::memory::dims padding_right_mkl(pads_.begin() + (pads_.size() / 2), pads_.end()); - primitive_dst_md_.reset(new mkldnn::memory::desc( - {dst_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + primitive_dst_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({dst_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); mkldnn::algorithm algo = mkldnn::algorithm::pooling_max; if (op_name_ == "AveragePool" || op_name_ == "GlobalAveragePool") { @@ -105,47 +107,50 @@ class MklDnnPool : public MklDnnKernel { algo = mkldnn::algorithm::pooling_avg_include_padding; } } - fwd_desc_.reset(new mkldnn::pooling_forward::desc( - mkldnn::prop_kind::forward_inference, algo, - *src_md_, *primitive_dst_md_, - strides_mkl, kernel_mkl, - padding_left_mkl, padding_right_mkl)); + fwd_desc_ = onnxruntime::make_unique( + mkldnn::pooling_forward::desc(mkldnn::prop_kind::forward_inference, algo, + *src_md_, *primitive_dst_md_, + strides_mkl, kernel_mkl, + padding_left_mkl, padding_right_mkl)); - fwd_primitive_desc_.reset(new mkldnn::pooling_forward::primitive_desc( - *fwd_desc_, cpu_engine)); + fwd_primitive_desc_ = onnxruntime::make_unique( + mkldnn::pooling_forward::primitive_desc(*fwd_desc_, cpu_engine)); if (mklnode_ptr_->parent_nodes.empty()) { // Sub-graph's first node. Read input from input buffer - src_mem_.reset(new mkldnn::memory( - fwd_primitive_desc_.get()->src_desc(), cpu_engine, nullptr)); + src_mem_ = onnxruntime::make_unique( + mkldnn::memory(fwd_primitive_desc_.get()->src_desc(), cpu_engine, nullptr)); } else { // Sub-graph's inner node. set input to parent's output src_mem_ = parents_[0].get()->primitive_dst_mem_; } - primitive_src_desc_ = fwd_primitive_desc_.get()->src_desc(); + primitive_src_desc_ = fwd_primitive_desc_.get()->src_desc(); primitive_dst_desc_ = fwd_primitive_desc_.get()->dst_desc(); src_size_ = fwd_primitive_desc_.get()->src_desc().get_size(); dst_size_ = fwd_primitive_desc_.get()->dst_desc().get_size(); - // reorder source memory for best performance (AVX512); + // reorder source memory for best performance (AVX512); if (primitive_src_desc_ != source_desc_) { mkldnn::memory::dims src_dims(x_shape_.GetDims().begin(), x_shape_.GetDims().end()); auto pd = mkldnn::memory::desc(source_desc_); if (mklnode_ptr_->parent_nodes.empty()) - src_mem_from_.reset(new mkldnn::memory(pd, cpu_engine, nullptr)); + src_mem_from_ = onnxruntime::make_unique( + mkldnn::memory(pd, cpu_engine, nullptr)); else src_mem_from_ = parents_[0].get()->primitive_dst_mem_; - src_mem_.reset(new mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr)); + src_mem_ = onnxruntime::make_unique( + mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr)); net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_)); net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_}, {MKLDNN_ARG_TO, *src_mem_}}); } else { if (mklnode_ptr_->parent_nodes.empty()) { - src_mem_.reset(new mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr)); + src_mem_ = onnxruntime::make_unique( + mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr)); } else { src_mem_ = parents_[0].get()->primitive_dst_mem_; } @@ -156,21 +161,21 @@ class MklDnnPool : public MklDnnKernel { if (primitive_dst_desc_ != ort_source_desc_) { // reorder neded. Use primitive output as input to reorder and // allocate buffer for reorder output, final output of this subgraph - primitive_dst_mem_.reset( - new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine)); } else { // Last node but re-order not needed. Allocate buffer to output of this node - primitive_dst_mem_.reset( - new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr)); } } else { // Intermediate node. Use mkldnn kernel internal memory for output and // use this as input to next node. - primitive_dst_mem_.reset( - new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine)); } - pool_fwd_.reset( - new mkldnn::pooling_forward(*fwd_primitive_desc_)); + pool_fwd_ = onnxruntime::make_unique( + mkldnn::pooling_forward(*fwd_primitive_desc_)); net.push_back(*pool_fwd_); net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_}, diff --git a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_sum.h b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_sum.h index 3899e5b093..71d1448283 100644 --- a/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_sum.h +++ b/onnxruntime/core/providers/mkldnn/subgraph/mkldnn_sum.h @@ -89,25 +89,28 @@ class MklDnnSum : public MklDnnKernel { } } - primitive_dst_md_.reset(new mkldnn::memory::desc( - {dst_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); - sum_pd_.reset(new mkldnn::sum::primitive_desc( - *primitive_dst_md_, coeff, srcs_pd_, cpu_engine)); + primitive_dst_md_ = onnxruntime::make_unique( + mkldnn::memory::desc({dst_dims_mkl}, MklDnnType(), mkldnn::memory::format_tag::any)); + sum_pd_ = onnxruntime::make_unique( + mkldnn::sum::primitive_desc(*primitive_dst_md_, coeff, srcs_pd_, cpu_engine)); if (mklnode_ptr_->output_index >= 0) { // last node of sub-graph. need to allocate memory for output_tensor if (primitive_dst_desc_ != ort_source_desc_) { // reorder neded. Use primitive output as input to reorder and // allocate buffer for reorder output, final output of this subgraph - primitive_dst_mem_.reset(new mkldnn::memory(sum_pd_->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(sum_pd_->dst_desc(), cpu_engine)); } else { // Last node but re-order not needed. Allocate buffer to output of this node - primitive_dst_mem_.reset(new mkldnn::memory(sum_pd_->dst_desc(), cpu_engine, nullptr)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(sum_pd_->dst_desc(), cpu_engine, nullptr)); } } else { // Intermediate node. Use mkldnn kernel internal memory for output and // use this as input to next node. - primitive_dst_mem_.reset(new mkldnn::memory(sum_pd_->dst_desc(), cpu_engine)); + primitive_dst_mem_ = onnxruntime::make_unique( + mkldnn::memory(sum_pd_->dst_desc(), cpu_engine)); } primitive_dst_desc_ = sum_pd_->dst_desc();