MKL-DNN EP (#2149)

* make_unique, make_shared

* make_unique, make_shared
This commit is contained in:
Sreekanth Yalachigere 2019-10-16 22:49:45 -07:00 committed by George Wu
parent 6445e7182c
commit 95fef56dc8
10 changed files with 246 additions and 215 deletions

View file

@ -202,7 +202,8 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
// There are several identical graphs in Model zoo and only differ in
// few attribute values. GetGraphName return graph-name + first-node-output name
std::string graph_name = GetGraphName(graph_viewer);
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
subgraph_ptr = onnxruntime::make_unique<mkl_dnn::Subgraph>(
mkl_dnn::Subgraph(graph_name));
// output name to node index map. Using it to find sub-graph end nodes
// if output of a node is not an input to any node in a sub-graph is end node
@ -221,7 +222,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
node_index++;
if (subgraph_ptr->mkldnn_nodes.size() > 0) {
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
subgraph_attributes.clear();
output_to_source_node_map.clear();
}
@ -281,7 +282,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
if (input_from_subgraph == false) {
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
subgraph_attributes.clear();
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
output_to_source_node_map.clear();
}
}
@ -320,7 +321,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
}
if (create_subgraph) {
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
subgraph_attributes.clear();
output_to_source_node_map.clear();
}
@ -330,7 +331,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
} else {
if (!sub_var.subgraph_node_indexes.empty()) {
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
subgraph_attributes.clear();
output_to_source_node_map.clear();
}
@ -339,7 +340,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
} // graph_viewer node iterator ends
if (!sub_var.subgraph_node_indexes.empty()) {
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
subgraph_attributes.clear();
output_to_source_node_map.clear();
}

View file

@ -27,7 +27,7 @@ class MklDnnRelu : public MklDnnKernel {
OrtKernelContext* context,
mkldnn::engine& cpu_engine,
std::vector<mkldnn::primitive>& net,
std::vector<std::unordered_map<int, mkldnn::memory>> &net_args) {
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) {
Ort::CustomOpApi ort{*api};
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
@ -45,12 +45,12 @@ class MklDnnRelu : public MklDnnKernel {
ort_source_format_ = GetSourceFormat(static_cast<int>(xdim));
x_shape = TensorShape(xshape, xdim);
x_shape = TensorShape(xshape, xdim);
if (x_shape.NumDimensions() == 0) {
primitive_created_ = Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Shape of size zero " + x_shape.ToString());
return primitive_created_;
}
if (x_shape.NumDimensions() == 0) {
primitive_created_ = Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Shape of size zero " + x_shape.ToString());
return primitive_created_;
}
mkldnn::memory::dims src_dims(
x_shape.GetDims().begin(), x_shape.GetDims().end());
@ -58,13 +58,13 @@ class MklDnnRelu : public MklDnnKernel {
ort_source_desc_ = mkldnn::memory::desc(
{src_dims}, MklDnnType<T>(), ort_source_format_);
source_desc_ = ort_source_desc_;
src_md_.reset(new mkldnn::memory::desc(
{src_dims}, MklDnnType<T>(), ort_source_format_));
src_mem_.reset(
new mkldnn::memory({{src_dims}, MklDnnType<T>(), ort_source_format_}, cpu_engine, nullptr));
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({src_dims}, MklDnnType<T>(), ort_source_format_));
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory({{src_dims}, MklDnnType<T>(), ort_source_format_}, cpu_engine, nullptr));
} else {
src_md_.reset(
new mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
src_mem_ = parents_[0].get()->primitive_dst_mem_;
x_shape = parents_[0].get()->primitive_dst_shape_;
ort_source_format_ = parents_[0].get()->ort_source_format_;
@ -76,13 +76,12 @@ class MklDnnRelu : public MklDnnKernel {
mkldnn::memory::dims dst_dims_mkl(primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
mkldnn::algorithm algo = mkldnn::algorithm::eltwise_relu;
fwd_desc_.reset(new mkldnn::eltwise_forward::desc(
mkldnn::prop_kind::forward_inference, algo, *src_md_, 0));
fwd_desc_ = onnxruntime::make_unique<mkldnn::eltwise_forward::desc>(
mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_inference, algo, *src_md_, 0));
relu_fwd_pd_ = onnxruntime::make_unique<mkldnn::eltwise_forward::primitive_desc>(
mkldnn::eltwise_forward::primitive_desc(*fwd_desc_, cpu_engine));
relu_fwd_pd_.reset(new mkldnn::eltwise_forward::primitive_desc(
*fwd_desc_, cpu_engine));
primitive_src_desc_ = relu_fwd_pd_.get()->src_desc();
primitive_src_desc_ = relu_fwd_pd_.get()->src_desc();
primitive_dst_desc_ = relu_fwd_pd_.get()->dst_desc();
if (mklnode_ptr_->output_index >= 0) {
@ -90,24 +89,24 @@ class MklDnnRelu : public MklDnnKernel {
if (primitive_dst_desc_ != ort_source_desc_) {
// reorder neded. Use primitive output as input to reorder and
// allocate buffer for reorder output, final output of this subgraph
primitive_dst_mem_.reset(new mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
primitive_dst_mem_ = std::make_shared<mkldnn::memory>(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
} else {
// Last node but re-order not needed. Allocate buffer to output of this node
primitive_dst_mem_.reset(new mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
primitive_dst_mem_ = std::make_shared<mkldnn::memory>(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
}
} else {
// Intermediate node. Use mkldnn kernel internal memory for output and
// use this as input to next node.
primitive_dst_mem_.reset(new mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
primitive_dst_mem_ = std::make_shared<mkldnn::memory>(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
}
relu_fwd_.reset(
new mkldnn::eltwise_forward(*relu_fwd_pd_));
relu_fwd_ = onnxruntime::make_unique<mkldnn::eltwise_forward>(
mkldnn::eltwise_forward(*relu_fwd_pd_));
net.push_back(*relu_fwd_);
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
if (mklnode_ptr_->output_index >= 0) {
// one of the end nodes. Allocate output buffer memory and
@ -122,10 +121,10 @@ class MklDnnRelu : public MklDnnKernel {
Status Bind(const OrtCustomOpApi* api, OrtKernelContext* context) override {
Ort::CustomOpApi ort{*api};
if (primitive_created_ != Status::OK())
if (primitive_created_ != Status::OK())
return primitive_created_;
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
if (mklnode_ptr_->parent_nodes.empty()) {
// Sub-graph's first node. Read input from input buffer

View file

@ -124,11 +124,11 @@ class MklDnnBatchNorm : public MklDnnKernel {
ort_source_desc_ = mkldnn::memory::desc(
{src_dims}, MklDnnType<T>(), ort_source_format_);
source_desc_ = ort_source_desc_;
src_md_.reset(new mkldnn::memory::desc(
{src_dims}, MklDnnType<T>(), ort_source_format_));
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({src_dims}, MklDnnType<T>(), ort_source_format_));
} else {
src_md_.reset(
new mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
x_shape = parents_[0].get()->primitive_dst_shape_;
ort_source_format_ = parents_[0].get()->ort_source_format_;
ort_source_desc_ = parents_[0].get()->ort_source_desc_;
@ -195,33 +195,34 @@ class MklDnnBatchNorm : public MklDnnKernel {
mkldnn::memory::dims dst_dims_mkl(
primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
scale_shift_md_.reset(new mkldnn::memory::desc(
{2, scale_dims_mkl[0]}, MklDnnType<T>(), mkldnn::memory::format_tag::nc));
mean_md_.reset(new mkldnn::memory::desc(
{mean_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::x));
var_md_.reset(new mkldnn::memory::desc(
{var_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::x));
primitive_dst_md_.reset(new mkldnn::memory::desc(
{dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
scale_shift_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({2, scale_dims_mkl[0]}, MklDnnType<T>(), mkldnn::memory::format_tag::nc));
mean_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({mean_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::x));
var_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({var_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::x));
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
// scale_shift_mem will allocate 2*C*sizeof(float) buffer
//
scale_shift_mem_.reset(
new mkldnn::memory({*scale_shift_md_, cpu_engine}));
scale_shift_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory({*scale_shift_md_, cpu_engine}));
mean_mem_.reset(
new mkldnn::memory(*mean_md_, cpu_engine, nullptr));
var_mem_.reset(
new mkldnn::memory(*var_md_, cpu_engine, nullptr));
mean_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(*mean_md_, cpu_engine, nullptr));
var_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(*var_md_, cpu_engine, nullptr));
batchnorm_fwd_.reset(new mkldnn::batch_normalization_forward::desc(
mkldnn::prop_kind::forward_inference, *src_md_, epsilon_,
mkldnn::normalization_flags::use_scale_shift |
mkldnn::normalization_flags::use_global_stats));
batchnorm_fwd_ = onnxruntime::make_unique<mkldnn::batch_normalization_forward::desc>(
mkldnn::batch_normalization_forward::desc(
mkldnn::prop_kind::forward_inference, *src_md_, epsilon_,
mkldnn::normalization_flags::use_scale_shift |
mkldnn::normalization_flags::use_global_stats));
if (fuse_relu_) {
mkldnn::primitive_attr attr;
// attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
// attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
// Execute RELU as Fuse PostOps
const float ops_scale = 1.f;
const float ops_alpha = 0.f; // relu negative slope
@ -230,11 +231,11 @@ class MklDnnBatchNorm : public MklDnnKernel {
ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
attr.set_post_ops(ops);
batchnorm_fwd_pd_.reset(new mkldnn::batch_normalization_forward::primitive_desc(
*batchnorm_fwd_, attr, cpu_engine));
batchnorm_fwd_pd_ = onnxruntime::make_unique<mkldnn::batch_normalization_forward::primitive_desc>(
mkldnn::batch_normalization_forward::primitive_desc(*batchnorm_fwd_, attr, cpu_engine));
} else {
batchnorm_fwd_pd_.reset(
new mkldnn::batch_normalization_forward::primitive_desc(
batchnorm_fwd_pd_ = onnxruntime::make_unique<mkldnn::batch_normalization_forward::primitive_desc>(
mkldnn::batch_normalization_forward::primitive_desc(
*batchnorm_fwd_, cpu_engine));
}
@ -245,8 +246,8 @@ class MklDnnBatchNorm : public MklDnnKernel {
batchnorm_fwd_pd_.get()->dst_desc());
if (mklnode_ptr_->parent_nodes.empty()) {
src_mem_.reset(
new mkldnn::memory(batchnorm_fwd_pd_.get()->src_desc(), cpu_engine, nullptr));
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(batchnorm_fwd_pd_.get()->src_desc(), cpu_engine, nullptr));
} else {
src_mem_ = parents_[0].get()->primitive_dst_mem_;
}
@ -254,13 +255,16 @@ class MklDnnBatchNorm : public MklDnnKernel {
if (mklnode_ptr_->output_index >= 0) {
// Use mkldnn's internal output buffer
if (primitive_dst_desc_ != ort_source_desc_) {
primitive_dst_mem_.reset(new mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
} else {
primitive_dst_mem_.reset(new mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine, nullptr));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine, nullptr));
}
} else {
// last node of sub-graph. need to allocate memory for output_tensor
primitive_dst_mem_.reset(new mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
}
auto bn = mkldnn::batch_normalization_forward(
*batchnorm_fwd_pd_);

View file

@ -76,7 +76,7 @@ class MklDnnConv : public MklDnnKernel {
std::vector<mkldnn::primitive>& net,
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) override {
Ort::CustomOpApi ort{*api};
stream_.reset(new mkldnn::stream(cpu_engine));
stream_ = onnxruntime::make_unique<mkldnn::stream>(mkldnn::stream(cpu_engine));
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
const OrtValue* winput_tensor = ort.KernelContext_GetInput(context, input_index + 1);
@ -108,8 +108,7 @@ class MklDnnConv : public MklDnnKernel {
ort_source_desc_ = parents_[0].get()->ort_source_desc_;
source_desc_ = parents_[0].get()->primitive_dst_desc_;
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
src_md_.reset(new mkldnn::memory::desc(source_desc_));
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
}
primitive_created_ = ValidateInputShape(x_shape, w_shape);
@ -165,8 +164,8 @@ class MklDnnConv : public MklDnnKernel {
primitive_dst_shape_ = TensorShape(y_dims);
TensorShape output_shape = y_shape.Slice(2);
mkldnn::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end());
primitive_dst_md_.reset(new mkldnn::memory::desc(
{dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
mkldnn::memory::dims filter_dims_mkl;
if (group_mkl == 1) {
@ -221,36 +220,39 @@ class MklDnnConv : public MklDnnKernel {
}
}
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
if (mklnode_ptr_->parent_nodes.empty()) {
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
ort_source_format_ = src_format;
src_md_.reset(new mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
ort_source_format_ = src_format;
ort_source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format);
source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format);
}
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
// Set the memory descriptors to format::any to allow MKLDNN to decide what the optimal memory layout should be
// for the computation given the input
filter_md_.reset(new mkldnn::memory::desc(
{filter_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
filter_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({filter_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
if (!bias_dims_mkl.empty())
bias_md_.reset(new mkldnn::memory::desc(
{bias_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
bias_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({bias_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
mkldnn::memory::dims conv_zero_padding = {0, 0};
mkldnn::memory::dims conv_zero_padding = {0, 0};
if (!bias_dims_mkl.empty()) {
fwd_desc_.reset(new mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
*filter_md_, *bias_md_, *primitive_dst_md_,
strides_mkl, dilations_mkl, padding_left_mkl,
padding_right_mkl));
fwd_desc_ = onnxruntime::make_unique<mkldnn::convolution_forward::desc>(
mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
*filter_md_, *bias_md_, *primitive_dst_md_,
strides_mkl, dilations_mkl, padding_left_mkl,
padding_right_mkl));
} else {
fwd_desc_.reset(new mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
*filter_md_, *primitive_dst_md_, strides_mkl,
dilations_mkl, padding_left_mkl, padding_right_mkl));
fwd_desc_ = onnxruntime::make_unique<mkldnn::convolution_forward::desc>(
mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
*filter_md_, *primitive_dst_md_, strides_mkl,
dilations_mkl, padding_left_mkl, padding_right_mkl));
}
if (fuse_relu_) {
@ -264,17 +266,17 @@ class MklDnnConv : public MklDnnKernel {
ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
attr.set_post_ops(ops);
conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc(
*fwd_desc_, attr, cpu_engine));
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, attr, cpu_engine));
} else {
conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc(
*fwd_desc_, cpu_engine));
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, cpu_engine));
}
primitive_src_desc_ = static_cast<mkldnn::memory::desc>(
conv_fwd_pd_.get()->src_desc());
filter_desc_= static_cast<mkldnn::memory::desc>(
filter_desc_ = static_cast<mkldnn::memory::desc>(
conv_fwd_pd_.get()->weights_desc());
primitive_dst_desc_ = static_cast<mkldnn::memory::desc>(
@ -284,25 +286,28 @@ class MklDnnConv : public MklDnnKernel {
filter_size_ = conv_fwd_pd_.get()->weights_desc().get_size();
dst_size_ = conv_fwd_pd_.get()->dst_desc().get_size();
filter_mem_.reset(
new mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
filter_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
if (primitive_src_desc_ != source_desc_) {
mkldnn::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end());
auto pd = mkldnn::memory::desc({{src_dims}, MklDnnType<T>(), ort_source_format_});
if (mklnode_ptr_->parent_nodes.empty())
src_mem_from_.reset(new mkldnn::memory(pd, cpu_engine, nullptr));
src_mem_from_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(pd, cpu_engine, nullptr));
else
src_mem_from_ = parents_[0].get()->primitive_dst_mem_;
src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_));
net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_},
{MKLDNN_ARG_TO, *src_mem_}});
} else {
if (mklnode_ptr_->parent_nodes.empty()) {
src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
} else {
src_mem_ = parents_[0].get()->primitive_dst_mem_;
}
@ -311,25 +316,31 @@ class MklDnnConv : public MklDnnKernel {
if (mklnode_ptr_->output_index >= 0) {
// Use mkldnn's internal output buffer
if (primitive_dst_desc_ != ort_source_desc_) {
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
} else {
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
}
} else {
// last node of sub-graph. need to allocate memory for output_tensor
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
}
if (!bias_dims_mkl.empty()) {
bias_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
conv_fwd_.reset(new mkldnn::convolution_forward(*conv_fwd_pd_));
bias_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
conv_fwd_ = onnxruntime::make_unique<mkldnn::convolution_forward>(
mkldnn::convolution_forward(*conv_fwd_pd_));
net.push_back(*conv_fwd_);
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
{MKLDNN_ARG_WEIGHTS, *filter_mem_},
{MKLDNN_ARG_BIAS, *bias_mem_},
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
} else {
conv_fwd_.reset(new mkldnn::convolution_forward(*conv_fwd_pd_));
conv_fwd_ = onnxruntime::make_unique<mkldnn::convolution_forward>(
mkldnn::convolution_forward(*conv_fwd_pd_));
net.push_back(*conv_fwd_);
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
{MKLDNN_ARG_WEIGHTS, *filter_mem_},
@ -379,13 +390,13 @@ class MklDnnConv : public MklDnnKernel {
mkldnn::memory src = mkldnn::memory({{filter_dims_mkl}, MklDnnType<T>(), filter_format_}, cpu_engine, (void*)filter_data);
IAllocatorUniquePtr<void> filter_reorder_buffer =
IAllocator::MakeUniquePtr<void>(alloc_, filter_size_);
filter_dst_mem.reset(
new mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
mkldnn::reorder(src, *filter_dst_mem)
filter_dst_mem = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
mkldnn::reorder(src, *filter_dst_mem)
.execute(cpu_engine, src, *filter_dst_mem);
provider_->SaveAllocatedMemory(std::move(filter_reorder_buffer));
provider_->SaveAllocatedMemory(std::move(filter_reorder_buffer));
filter_data = static_cast<T*>(filter_dst_mem->get_data_handle());
provider_->SetWeightsMemoryBuffer(mklnode_ptr_->weight_name, filter_dst_mem);
}
@ -522,7 +533,7 @@ class MklDnnConv : public MklDnnKernel {
}
private:
mkldnn::memory::desc filter_desc_;
mkldnn::memory::desc filter_desc_;
mkldnn::memory::format_tag filter_format_;
std::shared_ptr<mkldnn::memory> src_mem_from_;
@ -631,7 +642,6 @@ class MklDnnConv : public MklDnnKernel {
}
private:
std::unique_ptr<mkldnn::stream> stream_;
std::vector<int64_t> kernel_shape_; // must use ComputeKernelShape(...), instead of kernel_shape_
AutoPadType auto_pad_;

View file

@ -29,7 +29,7 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
std::vector<mkldnn::primitive>& net,
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) override {
Ort::CustomOpApi ort{*api};
stream_.reset(new mkldnn::stream(cpu_engine));
stream_ = onnxruntime::make_unique<mkldnn::stream>(mkldnn::stream(cpu_engine));
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
const OrtValue* winput_tensor = ort.KernelContext_GetInput(context, input_index + 1);
auto wtensor_info = ort.GetTensorTypeAndShape(winput_tensor);
@ -114,8 +114,8 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
primitive_dst_shape_ = TensorShape(y_dims);
TensorShape output_shape = y_shape.Slice(2);
mkldnn::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end());
primitive_dst_md_.reset(new mkldnn::memory::desc(
{dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
mkldnn::memory::dims filter_dims_mkl;
if (group_mkl == 1) {
@ -188,23 +188,24 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format);
}
src_md_.reset(new mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
// Set the memory descriptors to format::any to allow MKLDNN to decide what the optimal memory layout should be
// for the computation given the input
filter_md_.reset(new mkldnn::memory::desc(
{filter_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
bias_md_.reset(new mkldnn::memory::desc(
{bias_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
filter_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({filter_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
bias_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({bias_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
mkldnn::memory::dims conv_zero_padding = {0, 0};
fwd_desc_.reset(new mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
*filter_md_, *bias_md_, *primitive_dst_md_,
strides_mkl, dilations_mkl, padding_left_mkl,
padding_right_mkl));
fwd_desc_ = onnxruntime::make_unique<mkldnn::convolution_forward::desc>(
mkldnn::convolution_forward::desc(
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
*filter_md_, *bias_md_, *primitive_dst_md_,
strides_mkl, dilations_mkl, padding_left_mkl,
padding_right_mkl));
if (fuse_relu_) {
mkldnn::primitive_attr attr;
@ -217,11 +218,11 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
attr.set_post_ops(ops);
conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc(
*fwd_desc_, attr, cpu_engine));
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, attr, cpu_engine));
} else {
conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc(
*fwd_desc_, cpu_engine));
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, cpu_engine));
}
primitive_src_desc_ = static_cast<mkldnn::memory::desc>(
@ -237,25 +238,28 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
filter_size_ = conv_fwd_pd_.get()->weights_desc().get_size();
dst_size_ = conv_fwd_pd_.get()->dst_desc().get_size();
filter_mem_.reset(
new mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
filter_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
if (primitive_src_desc_ != source_desc_) {
mkldnn::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end());
auto pd = mkldnn::memory::desc({{src_dims}, MklDnnType<T>(), ort_source_format_});
if (mklnode_ptr_->parent_nodes.empty())
src_mem_from_.reset(new mkldnn::memory(pd, cpu_engine, nullptr));
src_mem_from_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(pd, cpu_engine, nullptr));
else
src_mem_from_ = parents_[0].get()->primitive_dst_mem_;
src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_));
net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_},
{MKLDNN_ARG_TO, *src_mem_}});
} else {
if (mklnode_ptr_->parent_nodes.empty()) {
src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
} else {
src_mem_ = parents_[0].get()->primitive_dst_mem_;
}
@ -264,17 +268,22 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
if (mklnode_ptr_->output_index >= 0) {
// Use mkldnn's internal output buffer
if (primitive_dst_desc_ != ort_source_desc_) {
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
} else {
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
}
} else {
// last node of sub-graph. need to allocate memory for output_tensor
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
}
bias_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
conv_fwd_.reset(new mkldnn::convolution_forward(*conv_fwd_pd_));
bias_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
conv_fwd_ = onnxruntime::make_unique<mkldnn::convolution_forward>(
mkldnn::convolution_forward(*conv_fwd_pd_));
net.push_back(*conv_fwd_);
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
{MKLDNN_ARG_WEIGHTS, *filter_mem_},
@ -389,8 +398,8 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
mkldnn::memory src = mkldnn::memory({{filter_dims_mkl}, MklDnnType<T>(), filter_format_}, cpu_engine, (void*)weights_scaled_by_axis.data());
IAllocatorUniquePtr<void> filter_reorder_buffer =
IAllocator::MakeUniquePtr<void>(alloc_, filter_size_);
filter_dst_mem.reset(
new mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
filter_dst_mem = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
mkldnn::reorder(src, *filter_dst_mem)
.execute(cpu_engine, src, *filter_dst_mem);
@ -404,8 +413,8 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
auto bias_size = conv_fwd_pd_.get()->bias_desc().get_size();
IAllocatorUniquePtr<void> bias_buffer =
IAllocator::MakeUniquePtr<void>(alloc_, bias_size);
bias_mem.reset(
new mkldnn::memory(conv_fwd_pd_->bias_desc(), cpu_engine, bias_buffer.get()));
bias_mem = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(conv_fwd_pd_->bias_desc(), cpu_engine, bias_buffer.get()));
float* bias_buffer_data = static_cast<float*>(bias_buffer.get());
if (mklnode_ptr_->num_inputs == 7) {
const OrtValue* conv_bias_tensor = ort.KernelContext_GetInput(context, input_index + 2);

View file

@ -28,7 +28,7 @@ class SubgraphPrimitive : public PrimitiveBase {
OrtKernelContext* context,
const SubgraphParams& params)
: cpu_engine_(GetEngine()) {
context_.stream.reset(new mkldnn::stream(cpu_engine_));
context_.stream = onnxruntime::make_unique<mkldnn::stream>(mkldnn::stream(cpu_engine_));
if (context_.net.size() == 0) {
CreateKernels(params);
@ -67,7 +67,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "Conv-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnConv<T>> kernel;
kernel.reset(new MklDnnConv<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnConv<T>>(mkldnn_node, params.provider, params.attributes, os.str());
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
}
@ -76,7 +76,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "Conv-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnConv<T>> kernel;
kernel.reset(new MklDnnConv<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnConv<T>>(mkldnn_node, params.provider, params.attributes, os.str());
kernel->fuse_relu_ = true;
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
@ -86,7 +86,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "Relu-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnRelu<T>> kernel;
kernel.reset(new MklDnnRelu<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnRelu<T>>(mkldnn_node, params.provider, params.attributes, os.str());
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
}
@ -95,7 +95,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "BatchNormalization-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnBatchNorm<T>> kernel;
kernel.reset(new MklDnnBatchNorm<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
}
@ -104,7 +104,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "BatchNormalization-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnBatchNorm<T>> kernel;
kernel.reset(new MklDnnBatchNorm<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
kernel->fuse_relu_ = true;
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
@ -114,7 +114,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "Conv-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnConvBatchNorm<T>> kernel;
kernel.reset(new MklDnnConvBatchNorm<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnConvBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
}
@ -123,7 +123,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "Conv-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnConvBatchNorm<T>> kernel;
kernel.reset(new MklDnnConvBatchNorm<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnConvBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
kernel->fuse_relu_ = true;
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
@ -133,7 +133,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "MaxPool-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnPool<T>> kernel;
kernel.reset(new MklDnnPool<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
}
@ -142,7 +142,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "GlobalMaxPool-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnPool<T>> kernel;
kernel.reset(new MklDnnPool<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
}
@ -151,7 +151,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "AveragePool-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnPool<T>> kernel;
kernel.reset(new MklDnnPool<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
}
@ -160,7 +160,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "GlobalAveragePool-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnPool<T>> kernel;
kernel.reset(new MklDnnPool<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
}
@ -169,7 +169,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "LRN-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnLrn<T>> kernel;
kernel.reset(new MklDnnLrn<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnLrn<T>>(mkldnn_node, params.provider, params.attributes, os.str());
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
}
@ -178,7 +178,7 @@ class SubgraphPrimitive : public PrimitiveBase {
std::ostringstream os;
os << "Sum-" << mkldnn_node.node_index << "-";
std::shared_ptr<MklDnnSum<T>> kernel;
kernel.reset(new MklDnnSum<T>(mkldnn_node, params.provider, params.attributes, os.str()));
kernel = std::make_shared<MklDnnSum<T>>(mkldnn_node, params.provider, params.attributes, os.str());
for (auto index : mkldnn_node.parent_nodes) {
kernel->parents_.push_back(context_.kernels[index]);
}

View file

@ -11,14 +11,14 @@ void MklDnnKernel::InitDstReorderOutput(mkldnn::engine& cpu_engine,
std::vector<mkldnn::primitive>& net,
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) {
// Allocate dst buffer if reorder is necessary
if (primitive_dst_desc_ != ort_source_desc_)
{
if (primitive_dst_desc_ != ort_source_desc_) {
// reorder to ONNXRuntime format
mkldnn::memory::dims dst_dims_mkl(
primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
mkldnn::memory::desc dst_des = mkldnn::memory::desc(dst_dims_mkl,
data_type, ort_source_format_);
reorder_dst_mem_to_.reset(new mkldnn::memory(dst_des, cpu_engine));
reorder_dst_mem_to_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(dst_des, cpu_engine));
net.push_back(mkldnn::reorder(*primitive_dst_mem_, *reorder_dst_mem_to_));
net_args.push_back({{MKLDNN_ARG_FROM, *primitive_dst_mem_},
{MKLDNN_ARG_TO, *reorder_dst_mem_to_}});

View file

@ -47,13 +47,13 @@ class MklDnnLrn : public MklDnnKernel {
ort_source_desc_ = mkldnn::memory::desc(
{src_dims}, MklDnnType<T>(), ort_source_format_);
src_md_.reset(new mkldnn::memory::desc(
{src_dims}, MklDnnType<T>(), ort_source_format_));
src_mem_.reset(
new mkldnn::memory(*src_md_, cpu_engine, nullptr));
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({src_dims}, MklDnnType<T>(), ort_source_format_));
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(*src_md_, cpu_engine, nullptr));
} else {
src_md_.reset(
new mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
src_mem_ = parents_[0].get()->primitive_dst_mem_;
x_shape = parents_[0].get()->primitive_dst_shape_;
ort_source_format_ = parents_[0].get()->ort_source_format_;
@ -64,12 +64,12 @@ class MklDnnLrn : public MklDnnKernel {
primitive_dst_shape_ = TensorShape(x_shape);
mkldnn::algorithm algo = mkldnn::algorithm::lrn_across_channels;
fwd_desc_.reset(new mkldnn::lrn_forward::desc(
mkldnn::prop_kind::forward_scoring, algo, *src_md_,
size_, alpha_, beta_, bias_));
fwd_desc_ = onnxruntime::make_unique<mkldnn::lrn_forward::desc>(
mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, algo, *src_md_,
size_, alpha_, beta_, bias_));
fwd_primitive_desc_.reset(new mkldnn::lrn_forward::primitive_desc(
*fwd_desc_, cpu_engine));
fwd_primitive_desc_ = onnxruntime::make_unique<mkldnn::lrn_forward::primitive_desc>(
mkldnn::lrn_forward::primitive_desc(*fwd_desc_, cpu_engine));
primitive_src_desc_ = fwd_primitive_desc_.get()->src_desc();
primitive_dst_desc_ = fwd_primitive_desc_.get()->dst_desc();
@ -79,22 +79,22 @@ class MklDnnLrn : public MklDnnKernel {
if (primitive_dst_desc_ != ort_source_desc_) {
// reorder neded. Use primitive output as input to reorder and
// allocate buffer for reorder output, final output of this subgraph
primitive_dst_mem_.reset(
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
} else {
// Last node but re-order not needed. Allocate buffer to output of this node
primitive_dst_mem_.reset(
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
}
} else {
// Intermediate node. Use mkldnn kernel internal memory for output and
// use this as input to next node.
primitive_dst_mem_.reset(
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
}
lrn_fwd_.reset(
new mkldnn::lrn_forward(*fwd_primitive_desc_));
lrn_fwd_ = onnxruntime::make_unique<mkldnn::lrn_forward>(
mkldnn::lrn_forward(*fwd_primitive_desc_));
net.push_back(*lrn_fwd_);
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
@ -126,7 +126,7 @@ class MklDnnLrn : public MklDnnKernel {
OrtValue* output = ort.KernelContext_GetOutput(context, mklnode_ptr_->output_index, &y_dims[0], static_cast<int>(primitive_dst_shape_.GetDims().size()));
T* dst_data = ort.GetTensorMutableData<T>(output);
if (primitive_dst_desc_!= ort_source_desc_) {
if (primitive_dst_desc_ != ort_source_desc_) {
reorder_dst_mem_to_->set_data_handle(dst_data);
} else {
primitive_dst_mem_->set_data_handle(dst_data);

View file

@ -48,7 +48,8 @@ class MklDnnPool : public MklDnnKernel {
// reorder for better performance
mkldnn::memory::format_tag src_format = GetAVXFormat(src_dims_mkl);
src_md_.reset(new mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format));
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format));
} else {
// get the output of previous node (mkldnn block propagation).
// TODO Sourcenode will set src of this node.
@ -63,10 +64,11 @@ class MklDnnPool : public MklDnnKernel {
if (source_desc_ == ort_source_desc_) {
// reorder for better performance
mkldnn::memory::format_tag fmt = GetAVXFormat(src_dims_mkl);
src_md_.reset(new mkldnn::memory::desc(
{src_dims_mkl}, MklDnnType<T>(), fmt));
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), fmt));
} else {
src_md_.reset(new mkldnn::memory::desc(parents_[0].get()->primitive_dst_mem_->get_desc()));
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc(parents_[0].get()->primitive_dst_mem_->get_desc()));
}
}
@ -95,8 +97,8 @@ class MklDnnPool : public MklDnnKernel {
mkldnn::memory::dims padding_left_mkl(pads_.begin(), pads_.begin() + (pads_.size() / 2));
mkldnn::memory::dims padding_right_mkl(pads_.begin() + (pads_.size() / 2), pads_.end());
primitive_dst_md_.reset(new mkldnn::memory::desc(
{dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
mkldnn::algorithm algo = mkldnn::algorithm::pooling_max;
if (op_name_ == "AveragePool" || op_name_ == "GlobalAveragePool") {
@ -105,47 +107,50 @@ class MklDnnPool : public MklDnnKernel {
algo = mkldnn::algorithm::pooling_avg_include_padding;
}
}
fwd_desc_.reset(new mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward_inference, algo,
*src_md_, *primitive_dst_md_,
strides_mkl, kernel_mkl,
padding_left_mkl, padding_right_mkl));
fwd_desc_ = onnxruntime::make_unique<mkldnn::pooling_forward::desc>(
mkldnn::pooling_forward::desc(mkldnn::prop_kind::forward_inference, algo,
*src_md_, *primitive_dst_md_,
strides_mkl, kernel_mkl,
padding_left_mkl, padding_right_mkl));
fwd_primitive_desc_.reset(new mkldnn::pooling_forward::primitive_desc(
*fwd_desc_, cpu_engine));
fwd_primitive_desc_ = onnxruntime::make_unique<mkldnn::pooling_forward::primitive_desc>(
mkldnn::pooling_forward::primitive_desc(*fwd_desc_, cpu_engine));
if (mklnode_ptr_->parent_nodes.empty()) {
// Sub-graph's first node. Read input from input buffer
src_mem_.reset(new mkldnn::memory(
fwd_primitive_desc_.get()->src_desc(), cpu_engine, nullptr));
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(fwd_primitive_desc_.get()->src_desc(), cpu_engine, nullptr));
} else {
// Sub-graph's inner node. set input to parent's output
src_mem_ = parents_[0].get()->primitive_dst_mem_;
}
primitive_src_desc_ = fwd_primitive_desc_.get()->src_desc();
primitive_src_desc_ = fwd_primitive_desc_.get()->src_desc();
primitive_dst_desc_ = fwd_primitive_desc_.get()->dst_desc();
src_size_ = fwd_primitive_desc_.get()->src_desc().get_size();
dst_size_ = fwd_primitive_desc_.get()->dst_desc().get_size();
// reorder source memory for best performance (AVX512);
// reorder source memory for best performance (AVX512);
if (primitive_src_desc_ != source_desc_) {
mkldnn::memory::dims src_dims(x_shape_.GetDims().begin(), x_shape_.GetDims().end());
auto pd = mkldnn::memory::desc(source_desc_);
if (mklnode_ptr_->parent_nodes.empty())
src_mem_from_.reset(new mkldnn::memory(pd, cpu_engine, nullptr));
src_mem_from_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(pd, cpu_engine, nullptr));
else
src_mem_from_ = parents_[0].get()->primitive_dst_mem_;
src_mem_.reset(new mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_));
net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_},
{MKLDNN_ARG_TO, *src_mem_}});
} else {
if (mklnode_ptr_->parent_nodes.empty()) {
src_mem_.reset(new mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
} else {
src_mem_ = parents_[0].get()->primitive_dst_mem_;
}
@ -156,21 +161,21 @@ class MklDnnPool : public MklDnnKernel {
if (primitive_dst_desc_ != ort_source_desc_) {
// reorder neded. Use primitive output as input to reorder and
// allocate buffer for reorder output, final output of this subgraph
primitive_dst_mem_.reset(
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
} else {
// Last node but re-order not needed. Allocate buffer to output of this node
primitive_dst_mem_.reset(
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
}
} else {
// Intermediate node. Use mkldnn kernel internal memory for output and
// use this as input to next node.
primitive_dst_mem_.reset(
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
}
pool_fwd_.reset(
new mkldnn::pooling_forward(*fwd_primitive_desc_));
pool_fwd_ = onnxruntime::make_unique<mkldnn::pooling_forward>(
mkldnn::pooling_forward(*fwd_primitive_desc_));
net.push_back(*pool_fwd_);
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},

View file

@ -89,25 +89,28 @@ class MklDnnSum : public MklDnnKernel {
}
}
primitive_dst_md_.reset(new mkldnn::memory::desc(
{dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
sum_pd_.reset(new mkldnn::sum::primitive_desc(
*primitive_dst_md_, coeff, srcs_pd_, cpu_engine));
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
sum_pd_ = onnxruntime::make_unique<mkldnn::sum::primitive_desc>(
mkldnn::sum::primitive_desc(*primitive_dst_md_, coeff, srcs_pd_, cpu_engine));
if (mklnode_ptr_->output_index >= 0) {
// last node of sub-graph. need to allocate memory for output_tensor
if (primitive_dst_desc_ != ort_source_desc_) {
// reorder neded. Use primitive output as input to reorder and
// allocate buffer for reorder output, final output of this subgraph
primitive_dst_mem_.reset(new mkldnn::memory(sum_pd_->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(sum_pd_->dst_desc(), cpu_engine));
} else {
// Last node but re-order not needed. Allocate buffer to output of this node
primitive_dst_mem_.reset(new mkldnn::memory(sum_pd_->dst_desc(), cpu_engine, nullptr));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(sum_pd_->dst_desc(), cpu_engine, nullptr));
}
} else {
// Intermediate node. Use mkldnn kernel internal memory for output and
// use this as input to next node.
primitive_dst_mem_.reset(new mkldnn::memory(sum_pd_->dst_desc(), cpu_engine));
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
mkldnn::memory(sum_pd_->dst_desc(), cpu_engine));
}
primitive_dst_desc_ = sum_pd_->dst_desc();