mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
MKL-DNN EP (#2149)
* make_unique, make_shared * make_unique, make_shared
This commit is contained in:
parent
6445e7182c
commit
95fef56dc8
10 changed files with 246 additions and 215 deletions
|
|
@ -202,7 +202,8 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
// There are several identical graphs in Model zoo and only differ in
|
||||
// few attribute values. GetGraphName return graph-name + first-node-output name
|
||||
std::string graph_name = GetGraphName(graph_viewer);
|
||||
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = onnxruntime::make_unique<mkl_dnn::Subgraph>(
|
||||
mkl_dnn::Subgraph(graph_name));
|
||||
|
||||
// output name to node index map. Using it to find sub-graph end nodes
|
||||
// if output of a node is not an input to any node in a sub-graph is end node
|
||||
|
|
@ -221,7 +222,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
node_index++;
|
||||
if (subgraph_ptr->mkldnn_nodes.size() > 0) {
|
||||
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
|
||||
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_attributes.clear();
|
||||
output_to_source_node_map.clear();
|
||||
}
|
||||
|
|
@ -281,7 +282,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
if (input_from_subgraph == false) {
|
||||
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
|
||||
subgraph_attributes.clear();
|
||||
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
|
||||
output_to_source_node_map.clear();
|
||||
}
|
||||
}
|
||||
|
|
@ -320,7 +321,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
}
|
||||
if (create_subgraph) {
|
||||
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
|
||||
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_attributes.clear();
|
||||
output_to_source_node_map.clear();
|
||||
}
|
||||
|
|
@ -330,7 +331,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
} else {
|
||||
if (!sub_var.subgraph_node_indexes.empty()) {
|
||||
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
|
||||
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_attributes.clear();
|
||||
output_to_source_node_map.clear();
|
||||
}
|
||||
|
|
@ -339,7 +340,7 @@ std::vector<std::unique_ptr<ComputeCapability>> MKLDNNExecutionProvider::GetCapa
|
|||
} // graph_viewer node iterator ends
|
||||
if (!sub_var.subgraph_node_indexes.empty()) {
|
||||
CreateMetaDef(graph_viewer, subgraph_attributes, subgraph_ptr, sub_var, result);
|
||||
subgraph_ptr.reset(new mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_ptr = std::make_shared<mkl_dnn::Subgraph>(mkl_dnn::Subgraph(graph_name));
|
||||
subgraph_attributes.clear();
|
||||
output_to_source_node_map.clear();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
OrtKernelContext* context,
|
||||
mkldnn::engine& cpu_engine,
|
||||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>> &net_args) {
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
|
||||
|
|
@ -45,12 +45,12 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
|
||||
ort_source_format_ = GetSourceFormat(static_cast<int>(xdim));
|
||||
|
||||
x_shape = TensorShape(xshape, xdim);
|
||||
x_shape = TensorShape(xshape, xdim);
|
||||
|
||||
if (x_shape.NumDimensions() == 0) {
|
||||
primitive_created_ = Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Shape of size zero " + x_shape.ToString());
|
||||
return primitive_created_;
|
||||
}
|
||||
if (x_shape.NumDimensions() == 0) {
|
||||
primitive_created_ = Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Shape of size zero " + x_shape.ToString());
|
||||
return primitive_created_;
|
||||
}
|
||||
|
||||
mkldnn::memory::dims src_dims(
|
||||
x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
|
|
@ -58,13 +58,13 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
ort_source_desc_ = mkldnn::memory::desc(
|
||||
{src_dims}, MklDnnType<T>(), ort_source_format_);
|
||||
source_desc_ = ort_source_desc_;
|
||||
src_md_.reset(new mkldnn::memory::desc(
|
||||
{src_dims}, MklDnnType<T>(), ort_source_format_));
|
||||
src_mem_.reset(
|
||||
new mkldnn::memory({{src_dims}, MklDnnType<T>(), ort_source_format_}, cpu_engine, nullptr));
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims}, MklDnnType<T>(), ort_source_format_));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory({{src_dims}, MklDnnType<T>(), ort_source_format_}, cpu_engine, nullptr));
|
||||
} else {
|
||||
src_md_.reset(
|
||||
new mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
x_shape = parents_[0].get()->primitive_dst_shape_;
|
||||
ort_source_format_ = parents_[0].get()->ort_source_format_;
|
||||
|
|
@ -76,13 +76,12 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
|
||||
mkldnn::memory::dims dst_dims_mkl(primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
|
||||
mkldnn::algorithm algo = mkldnn::algorithm::eltwise_relu;
|
||||
fwd_desc_.reset(new mkldnn::eltwise_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, algo, *src_md_, 0));
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::eltwise_forward::desc>(
|
||||
mkldnn::eltwise_forward::desc(mkldnn::prop_kind::forward_inference, algo, *src_md_, 0));
|
||||
relu_fwd_pd_ = onnxruntime::make_unique<mkldnn::eltwise_forward::primitive_desc>(
|
||||
mkldnn::eltwise_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
|
||||
relu_fwd_pd_.reset(new mkldnn::eltwise_forward::primitive_desc(
|
||||
*fwd_desc_, cpu_engine));
|
||||
|
||||
primitive_src_desc_ = relu_fwd_pd_.get()->src_desc();
|
||||
primitive_src_desc_ = relu_fwd_pd_.get()->src_desc();
|
||||
primitive_dst_desc_ = relu_fwd_pd_.get()->dst_desc();
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
|
|
@ -90,24 +89,24 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder neded. Use primitive output as input to reorder and
|
||||
// allocate buffer for reorder output, final output of this subgraph
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = std::make_shared<mkldnn::memory>(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
// Last node but re-order not needed. Allocate buffer to output of this node
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = std::make_shared<mkldnn::memory>(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// Intermediate node. Use mkldnn kernel internal memory for output and
|
||||
// use this as input to next node.
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = std::make_shared<mkldnn::memory>(mkldnn::memory(relu_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
}
|
||||
|
||||
relu_fwd_.reset(
|
||||
new mkldnn::eltwise_forward(*relu_fwd_pd_));
|
||||
relu_fwd_ = onnxruntime::make_unique<mkldnn::eltwise_forward>(
|
||||
mkldnn::eltwise_forward(*relu_fwd_pd_));
|
||||
|
||||
net.push_back(*relu_fwd_);
|
||||
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// one of the end nodes. Allocate output buffer memory and
|
||||
|
|
@ -122,10 +121,10 @@ class MklDnnRelu : public MklDnnKernel {
|
|||
Status Bind(const OrtCustomOpApi* api, OrtKernelContext* context) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
|
||||
if (primitive_created_ != Status::OK())
|
||||
if (primitive_created_ != Status::OK())
|
||||
return primitive_created_;
|
||||
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
// Sub-graph's first node. Read input from input buffer
|
||||
|
|
|
|||
|
|
@ -124,11 +124,11 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
ort_source_desc_ = mkldnn::memory::desc(
|
||||
{src_dims}, MklDnnType<T>(), ort_source_format_);
|
||||
source_desc_ = ort_source_desc_;
|
||||
src_md_.reset(new mkldnn::memory::desc(
|
||||
{src_dims}, MklDnnType<T>(), ort_source_format_));
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims}, MklDnnType<T>(), ort_source_format_));
|
||||
} else {
|
||||
src_md_.reset(
|
||||
new mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
x_shape = parents_[0].get()->primitive_dst_shape_;
|
||||
ort_source_format_ = parents_[0].get()->ort_source_format_;
|
||||
ort_source_desc_ = parents_[0].get()->ort_source_desc_;
|
||||
|
|
@ -195,33 +195,34 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
mkldnn::memory::dims dst_dims_mkl(
|
||||
primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
|
||||
|
||||
scale_shift_md_.reset(new mkldnn::memory::desc(
|
||||
{2, scale_dims_mkl[0]}, MklDnnType<T>(), mkldnn::memory::format_tag::nc));
|
||||
mean_md_.reset(new mkldnn::memory::desc(
|
||||
{mean_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::x));
|
||||
var_md_.reset(new mkldnn::memory::desc(
|
||||
{var_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::x));
|
||||
primitive_dst_md_.reset(new mkldnn::memory::desc(
|
||||
{dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
scale_shift_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({2, scale_dims_mkl[0]}, MklDnnType<T>(), mkldnn::memory::format_tag::nc));
|
||||
mean_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({mean_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::x));
|
||||
var_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({var_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::x));
|
||||
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
|
||||
// scale_shift_mem will allocate 2*C*sizeof(float) buffer
|
||||
//
|
||||
scale_shift_mem_.reset(
|
||||
new mkldnn::memory({*scale_shift_md_, cpu_engine}));
|
||||
scale_shift_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory({*scale_shift_md_, cpu_engine}));
|
||||
|
||||
mean_mem_.reset(
|
||||
new mkldnn::memory(*mean_md_, cpu_engine, nullptr));
|
||||
var_mem_.reset(
|
||||
new mkldnn::memory(*var_md_, cpu_engine, nullptr));
|
||||
mean_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(*mean_md_, cpu_engine, nullptr));
|
||||
var_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(*var_md_, cpu_engine, nullptr));
|
||||
|
||||
batchnorm_fwd_.reset(new mkldnn::batch_normalization_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, *src_md_, epsilon_,
|
||||
mkldnn::normalization_flags::use_scale_shift |
|
||||
mkldnn::normalization_flags::use_global_stats));
|
||||
batchnorm_fwd_ = onnxruntime::make_unique<mkldnn::batch_normalization_forward::desc>(
|
||||
mkldnn::batch_normalization_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, *src_md_, epsilon_,
|
||||
mkldnn::normalization_flags::use_scale_shift |
|
||||
mkldnn::normalization_flags::use_global_stats));
|
||||
|
||||
if (fuse_relu_) {
|
||||
mkldnn::primitive_attr attr;
|
||||
// attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
|
||||
// attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
|
||||
// Execute RELU as Fuse PostOps
|
||||
const float ops_scale = 1.f;
|
||||
const float ops_alpha = 0.f; // relu negative slope
|
||||
|
|
@ -230,11 +231,11 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
|
||||
attr.set_post_ops(ops);
|
||||
|
||||
batchnorm_fwd_pd_.reset(new mkldnn::batch_normalization_forward::primitive_desc(
|
||||
*batchnorm_fwd_, attr, cpu_engine));
|
||||
batchnorm_fwd_pd_ = onnxruntime::make_unique<mkldnn::batch_normalization_forward::primitive_desc>(
|
||||
mkldnn::batch_normalization_forward::primitive_desc(*batchnorm_fwd_, attr, cpu_engine));
|
||||
} else {
|
||||
batchnorm_fwd_pd_.reset(
|
||||
new mkldnn::batch_normalization_forward::primitive_desc(
|
||||
batchnorm_fwd_pd_ = onnxruntime::make_unique<mkldnn::batch_normalization_forward::primitive_desc>(
|
||||
mkldnn::batch_normalization_forward::primitive_desc(
|
||||
*batchnorm_fwd_, cpu_engine));
|
||||
}
|
||||
|
||||
|
|
@ -245,8 +246,8 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
batchnorm_fwd_pd_.get()->dst_desc());
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
src_mem_.reset(
|
||||
new mkldnn::memory(batchnorm_fwd_pd_.get()->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(batchnorm_fwd_pd_.get()->src_desc(), cpu_engine, nullptr));
|
||||
} else {
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
}
|
||||
|
|
@ -254,13 +255,16 @@ class MklDnnBatchNorm : public MklDnnKernel {
|
|||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// Use mkldnn's internal output buffer
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// last node of sub-graph. need to allocate memory for output_tensor
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(batchnorm_fwd_pd_->dst_desc(), cpu_engine));
|
||||
}
|
||||
auto bn = mkldnn::batch_normalization_forward(
|
||||
*batchnorm_fwd_pd_);
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class MklDnnConv : public MklDnnKernel {
|
|||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
stream_.reset(new mkldnn::stream(cpu_engine));
|
||||
stream_ = onnxruntime::make_unique<mkldnn::stream>(mkldnn::stream(cpu_engine));
|
||||
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
const OrtValue* winput_tensor = ort.KernelContext_GetInput(context, input_index + 1);
|
||||
|
|
@ -108,8 +108,7 @@ class MklDnnConv : public MklDnnKernel {
|
|||
ort_source_desc_ = parents_[0].get()->ort_source_desc_;
|
||||
source_desc_ = parents_[0].get()->primitive_dst_desc_;
|
||||
|
||||
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
src_md_.reset(new mkldnn::memory::desc(source_desc_));
|
||||
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
}
|
||||
|
||||
primitive_created_ = ValidateInputShape(x_shape, w_shape);
|
||||
|
|
@ -165,8 +164,8 @@ class MklDnnConv : public MklDnnKernel {
|
|||
primitive_dst_shape_ = TensorShape(y_dims);
|
||||
TensorShape output_shape = y_shape.Slice(2);
|
||||
mkldnn::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end());
|
||||
primitive_dst_md_.reset(new mkldnn::memory::desc(
|
||||
{dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
|
||||
mkldnn::memory::dims filter_dims_mkl;
|
||||
if (group_mkl == 1) {
|
||||
|
|
@ -221,36 +220,39 @@ class MklDnnConv : public MklDnnKernel {
|
|||
}
|
||||
}
|
||||
|
||||
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
mkldnn::memory::dims src_dims_mkl(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
|
||||
ort_source_format_ = src_format;
|
||||
src_md_.reset(new mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
ort_source_format_ = src_format;
|
||||
ort_source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format);
|
||||
source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format);
|
||||
}
|
||||
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
|
||||
// Set the memory descriptors to format::any to allow MKLDNN to decide what the optimal memory layout should be
|
||||
// for the computation given the input
|
||||
filter_md_.reset(new mkldnn::memory::desc(
|
||||
{filter_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
filter_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({filter_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
if (!bias_dims_mkl.empty())
|
||||
bias_md_.reset(new mkldnn::memory::desc(
|
||||
{bias_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
bias_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({bias_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
|
||||
mkldnn::memory::dims conv_zero_padding = {0, 0};
|
||||
mkldnn::memory::dims conv_zero_padding = {0, 0};
|
||||
|
||||
if (!bias_dims_mkl.empty()) {
|
||||
fwd_desc_.reset(new mkldnn::convolution_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
|
||||
*filter_md_, *bias_md_, *primitive_dst_md_,
|
||||
strides_mkl, dilations_mkl, padding_left_mkl,
|
||||
padding_right_mkl));
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::convolution_forward::desc>(
|
||||
mkldnn::convolution_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
|
||||
*filter_md_, *bias_md_, *primitive_dst_md_,
|
||||
strides_mkl, dilations_mkl, padding_left_mkl,
|
||||
padding_right_mkl));
|
||||
} else {
|
||||
fwd_desc_.reset(new mkldnn::convolution_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
|
||||
*filter_md_, *primitive_dst_md_, strides_mkl,
|
||||
dilations_mkl, padding_left_mkl, padding_right_mkl));
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::convolution_forward::desc>(
|
||||
mkldnn::convolution_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
|
||||
*filter_md_, *primitive_dst_md_, strides_mkl,
|
||||
dilations_mkl, padding_left_mkl, padding_right_mkl));
|
||||
}
|
||||
|
||||
if (fuse_relu_) {
|
||||
|
|
@ -264,17 +266,17 @@ class MklDnnConv : public MklDnnKernel {
|
|||
ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
|
||||
attr.set_post_ops(ops);
|
||||
|
||||
conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc(
|
||||
*fwd_desc_, attr, cpu_engine));
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
|
||||
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, attr, cpu_engine));
|
||||
} else {
|
||||
conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc(
|
||||
*fwd_desc_, cpu_engine));
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
|
||||
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
}
|
||||
|
||||
primitive_src_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
conv_fwd_pd_.get()->src_desc());
|
||||
|
||||
filter_desc_= static_cast<mkldnn::memory::desc>(
|
||||
filter_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
conv_fwd_pd_.get()->weights_desc());
|
||||
|
||||
primitive_dst_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
|
|
@ -284,25 +286,28 @@ class MklDnnConv : public MklDnnKernel {
|
|||
filter_size_ = conv_fwd_pd_.get()->weights_desc().get_size();
|
||||
dst_size_ = conv_fwd_pd_.get()->dst_desc().get_size();
|
||||
|
||||
filter_mem_.reset(
|
||||
new mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
|
||||
filter_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
|
||||
|
||||
if (primitive_src_desc_ != source_desc_) {
|
||||
mkldnn::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
auto pd = mkldnn::memory::desc({{src_dims}, MklDnnType<T>(), ort_source_format_});
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty())
|
||||
src_mem_from_.reset(new mkldnn::memory(pd, cpu_engine, nullptr));
|
||||
src_mem_from_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(pd, cpu_engine, nullptr));
|
||||
else
|
||||
src_mem_from_ = parents_[0].get()->primitive_dst_mem_;
|
||||
|
||||
src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_));
|
||||
net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_},
|
||||
{MKLDNN_ARG_TO, *src_mem_}});
|
||||
} else {
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
} else {
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
}
|
||||
|
|
@ -311,25 +316,31 @@ class MklDnnConv : public MklDnnKernel {
|
|||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// Use mkldnn's internal output buffer
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// last node of sub-graph. need to allocate memory for output_tensor
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
}
|
||||
|
||||
if (!bias_dims_mkl.empty()) {
|
||||
bias_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
|
||||
conv_fwd_.reset(new mkldnn::convolution_forward(*conv_fwd_pd_));
|
||||
bias_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
|
||||
conv_fwd_ = onnxruntime::make_unique<mkldnn::convolution_forward>(
|
||||
mkldnn::convolution_forward(*conv_fwd_pd_));
|
||||
net.push_back(*conv_fwd_);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_WEIGHTS, *filter_mem_},
|
||||
{MKLDNN_ARG_BIAS, *bias_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
} else {
|
||||
conv_fwd_.reset(new mkldnn::convolution_forward(*conv_fwd_pd_));
|
||||
conv_fwd_ = onnxruntime::make_unique<mkldnn::convolution_forward>(
|
||||
mkldnn::convolution_forward(*conv_fwd_pd_));
|
||||
net.push_back(*conv_fwd_);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_WEIGHTS, *filter_mem_},
|
||||
|
|
@ -379,13 +390,13 @@ class MklDnnConv : public MklDnnKernel {
|
|||
mkldnn::memory src = mkldnn::memory({{filter_dims_mkl}, MklDnnType<T>(), filter_format_}, cpu_engine, (void*)filter_data);
|
||||
IAllocatorUniquePtr<void> filter_reorder_buffer =
|
||||
IAllocator::MakeUniquePtr<void>(alloc_, filter_size_);
|
||||
filter_dst_mem.reset(
|
||||
new mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
|
||||
|
||||
mkldnn::reorder(src, *filter_dst_mem)
|
||||
filter_dst_mem = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
|
||||
|
||||
mkldnn::reorder(src, *filter_dst_mem)
|
||||
.execute(cpu_engine, src, *filter_dst_mem);
|
||||
|
||||
provider_->SaveAllocatedMemory(std::move(filter_reorder_buffer));
|
||||
provider_->SaveAllocatedMemory(std::move(filter_reorder_buffer));
|
||||
filter_data = static_cast<T*>(filter_dst_mem->get_data_handle());
|
||||
provider_->SetWeightsMemoryBuffer(mklnode_ptr_->weight_name, filter_dst_mem);
|
||||
}
|
||||
|
|
@ -522,7 +533,7 @@ class MklDnnConv : public MklDnnKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
mkldnn::memory::desc filter_desc_;
|
||||
mkldnn::memory::desc filter_desc_;
|
||||
mkldnn::memory::format_tag filter_format_;
|
||||
|
||||
std::shared_ptr<mkldnn::memory> src_mem_from_;
|
||||
|
|
@ -631,7 +642,6 @@ class MklDnnConv : public MklDnnKernel {
|
|||
}
|
||||
|
||||
private:
|
||||
|
||||
std::unique_ptr<mkldnn::stream> stream_;
|
||||
std::vector<int64_t> kernel_shape_; // must use ComputeKernelShape(...), instead of kernel_shape_
|
||||
AutoPadType auto_pad_;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) override {
|
||||
Ort::CustomOpApi ort{*api};
|
||||
stream_.reset(new mkldnn::stream(cpu_engine));
|
||||
stream_ = onnxruntime::make_unique<mkldnn::stream>(mkldnn::stream(cpu_engine));
|
||||
int input_index = mklnode_ptr_->input_start_index < 0 ? 0 : mklnode_ptr_->input_start_index;
|
||||
const OrtValue* winput_tensor = ort.KernelContext_GetInput(context, input_index + 1);
|
||||
auto wtensor_info = ort.GetTensorTypeAndShape(winput_tensor);
|
||||
|
|
@ -114,8 +114,8 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
primitive_dst_shape_ = TensorShape(y_dims);
|
||||
TensorShape output_shape = y_shape.Slice(2);
|
||||
mkldnn::memory::dims dst_dims_mkl(y_dims.begin(), y_dims.end());
|
||||
primitive_dst_md_.reset(new mkldnn::memory::desc(
|
||||
{dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
|
||||
mkldnn::memory::dims filter_dims_mkl;
|
||||
if (group_mkl == 1) {
|
||||
|
|
@ -188,23 +188,24 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
source_desc_ = mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format);
|
||||
}
|
||||
|
||||
src_md_.reset(new mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
|
||||
// Set the memory descriptors to format::any to allow MKLDNN to decide what the optimal memory layout should be
|
||||
// for the computation given the input
|
||||
filter_md_.reset(new mkldnn::memory::desc(
|
||||
{filter_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
bias_md_.reset(new mkldnn::memory::desc(
|
||||
{bias_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
filter_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({filter_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
bias_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({bias_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
|
||||
mkldnn::memory::dims conv_zero_padding = {0, 0};
|
||||
|
||||
fwd_desc_.reset(new mkldnn::convolution_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
|
||||
*filter_md_, *bias_md_, *primitive_dst_md_,
|
||||
strides_mkl, dilations_mkl, padding_left_mkl,
|
||||
padding_right_mkl));
|
||||
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::convolution_forward::desc>(
|
||||
mkldnn::convolution_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, mkldnn::algorithm::convolution_direct, *src_md_,
|
||||
*filter_md_, *bias_md_, *primitive_dst_md_,
|
||||
strides_mkl, dilations_mkl, padding_left_mkl,
|
||||
padding_right_mkl));
|
||||
|
||||
if (fuse_relu_) {
|
||||
mkldnn::primitive_attr attr;
|
||||
|
|
@ -217,11 +218,11 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
ops.append_eltwise(ops_scale, mkldnn::algorithm::eltwise_relu, ops_alpha, ops_beta);
|
||||
attr.set_post_ops(ops);
|
||||
|
||||
conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc(
|
||||
*fwd_desc_, attr, cpu_engine));
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
|
||||
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, attr, cpu_engine));
|
||||
} else {
|
||||
conv_fwd_pd_.reset(new mkldnn::convolution_forward::primitive_desc(
|
||||
*fwd_desc_, cpu_engine));
|
||||
conv_fwd_pd_ = onnxruntime::make_unique<mkldnn::convolution_forward::primitive_desc>(
|
||||
mkldnn::convolution_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
}
|
||||
|
||||
primitive_src_desc_ = static_cast<mkldnn::memory::desc>(
|
||||
|
|
@ -237,25 +238,28 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
filter_size_ = conv_fwd_pd_.get()->weights_desc().get_size();
|
||||
dst_size_ = conv_fwd_pd_.get()->dst_desc().get_size();
|
||||
|
||||
filter_mem_.reset(
|
||||
new mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
|
||||
filter_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->weights_desc(), cpu_engine, nullptr));
|
||||
|
||||
if (primitive_src_desc_ != source_desc_) {
|
||||
mkldnn::memory::dims src_dims(x_shape.GetDims().begin(), x_shape.GetDims().end());
|
||||
auto pd = mkldnn::memory::desc({{src_dims}, MklDnnType<T>(), ort_source_format_});
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty())
|
||||
src_mem_from_.reset(new mkldnn::memory(pd, cpu_engine, nullptr));
|
||||
src_mem_from_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(pd, cpu_engine, nullptr));
|
||||
else
|
||||
src_mem_from_ = parents_[0].get()->primitive_dst_mem_;
|
||||
|
||||
src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_));
|
||||
net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_},
|
||||
{MKLDNN_ARG_TO, *src_mem_}});
|
||||
} else {
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
src_mem_.reset(new mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->src_desc(), cpu_engine, nullptr));
|
||||
} else {
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
}
|
||||
|
|
@ -264,17 +268,22 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// Use mkldnn's internal output buffer
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// last node of sub-graph. need to allocate memory for output_tensor
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->dst_desc(), cpu_engine));
|
||||
}
|
||||
|
||||
bias_mem_.reset(new mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
|
||||
conv_fwd_.reset(new mkldnn::convolution_forward(*conv_fwd_pd_));
|
||||
bias_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_.get()->bias_desc(), cpu_engine, nullptr));
|
||||
conv_fwd_ = onnxruntime::make_unique<mkldnn::convolution_forward>(
|
||||
mkldnn::convolution_forward(*conv_fwd_pd_));
|
||||
net.push_back(*conv_fwd_);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_WEIGHTS, *filter_mem_},
|
||||
|
|
@ -389,8 +398,8 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
mkldnn::memory src = mkldnn::memory({{filter_dims_mkl}, MklDnnType<T>(), filter_format_}, cpu_engine, (void*)weights_scaled_by_axis.data());
|
||||
IAllocatorUniquePtr<void> filter_reorder_buffer =
|
||||
IAllocator::MakeUniquePtr<void>(alloc_, filter_size_);
|
||||
filter_dst_mem.reset(
|
||||
new mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
|
||||
filter_dst_mem = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->weights_desc(), cpu_engine, filter_reorder_buffer.get()));
|
||||
|
||||
mkldnn::reorder(src, *filter_dst_mem)
|
||||
.execute(cpu_engine, src, *filter_dst_mem);
|
||||
|
|
@ -404,8 +413,8 @@ class MklDnnConvBatchNorm : public MklDnnKernel {
|
|||
auto bias_size = conv_fwd_pd_.get()->bias_desc().get_size();
|
||||
IAllocatorUniquePtr<void> bias_buffer =
|
||||
IAllocator::MakeUniquePtr<void>(alloc_, bias_size);
|
||||
bias_mem.reset(
|
||||
new mkldnn::memory(conv_fwd_pd_->bias_desc(), cpu_engine, bias_buffer.get()));
|
||||
bias_mem = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(conv_fwd_pd_->bias_desc(), cpu_engine, bias_buffer.get()));
|
||||
float* bias_buffer_data = static_cast<float*>(bias_buffer.get());
|
||||
if (mklnode_ptr_->num_inputs == 7) {
|
||||
const OrtValue* conv_bias_tensor = ort.KernelContext_GetInput(context, input_index + 2);
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
OrtKernelContext* context,
|
||||
const SubgraphParams& params)
|
||||
: cpu_engine_(GetEngine()) {
|
||||
context_.stream.reset(new mkldnn::stream(cpu_engine_));
|
||||
context_.stream = onnxruntime::make_unique<mkldnn::stream>(mkldnn::stream(cpu_engine_));
|
||||
|
||||
if (context_.net.size() == 0) {
|
||||
CreateKernels(params);
|
||||
|
|
@ -67,7 +67,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "Conv-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnConv<T>> kernel;
|
||||
kernel.reset(new MklDnnConv<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnConv<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
|
|
@ -76,7 +76,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "Conv-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnConv<T>> kernel;
|
||||
kernel.reset(new MklDnnConv<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnConv<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
kernel->fuse_relu_ = true;
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
|
|
@ -86,7 +86,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "Relu-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnRelu<T>> kernel;
|
||||
kernel.reset(new MklDnnRelu<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnRelu<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
|
|
@ -95,7 +95,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "BatchNormalization-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnBatchNorm<T>> kernel;
|
||||
kernel.reset(new MklDnnBatchNorm<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
|
|
@ -104,7 +104,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "BatchNormalization-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnBatchNorm<T>> kernel;
|
||||
kernel.reset(new MklDnnBatchNorm<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
kernel->fuse_relu_ = true;
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
|
|
@ -114,7 +114,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "Conv-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnConvBatchNorm<T>> kernel;
|
||||
kernel.reset(new MklDnnConvBatchNorm<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnConvBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
|
|
@ -123,7 +123,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "Conv-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnConvBatchNorm<T>> kernel;
|
||||
kernel.reset(new MklDnnConvBatchNorm<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnConvBatchNorm<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
kernel->fuse_relu_ = true;
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
|
|
@ -133,7 +133,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "MaxPool-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnPool<T>> kernel;
|
||||
kernel.reset(new MklDnnPool<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
|
|
@ -142,7 +142,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "GlobalMaxPool-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnPool<T>> kernel;
|
||||
kernel.reset(new MklDnnPool<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
|
|
@ -151,7 +151,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "AveragePool-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnPool<T>> kernel;
|
||||
kernel.reset(new MklDnnPool<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
|
|
@ -160,7 +160,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "GlobalAveragePool-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnPool<T>> kernel;
|
||||
kernel.reset(new MklDnnPool<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnPool<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
|
|
@ -169,7 +169,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "LRN-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnLrn<T>> kernel;
|
||||
kernel.reset(new MklDnnLrn<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnLrn<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
|
|
@ -178,7 +178,7 @@ class SubgraphPrimitive : public PrimitiveBase {
|
|||
std::ostringstream os;
|
||||
os << "Sum-" << mkldnn_node.node_index << "-";
|
||||
std::shared_ptr<MklDnnSum<T>> kernel;
|
||||
kernel.reset(new MklDnnSum<T>(mkldnn_node, params.provider, params.attributes, os.str()));
|
||||
kernel = std::make_shared<MklDnnSum<T>>(mkldnn_node, params.provider, params.attributes, os.str());
|
||||
for (auto index : mkldnn_node.parent_nodes) {
|
||||
kernel->parents_.push_back(context_.kernels[index]);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,14 +11,14 @@ void MklDnnKernel::InitDstReorderOutput(mkldnn::engine& cpu_engine,
|
|||
std::vector<mkldnn::primitive>& net,
|
||||
std::vector<std::unordered_map<int, mkldnn::memory>>& net_args) {
|
||||
// Allocate dst buffer if reorder is necessary
|
||||
if (primitive_dst_desc_ != ort_source_desc_)
|
||||
{
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder to ONNXRuntime format
|
||||
mkldnn::memory::dims dst_dims_mkl(
|
||||
primitive_dst_shape_.GetDims().begin(), primitive_dst_shape_.GetDims().end());
|
||||
mkldnn::memory::desc dst_des = mkldnn::memory::desc(dst_dims_mkl,
|
||||
data_type, ort_source_format_);
|
||||
reorder_dst_mem_to_.reset(new mkldnn::memory(dst_des, cpu_engine));
|
||||
reorder_dst_mem_to_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(dst_des, cpu_engine));
|
||||
net.push_back(mkldnn::reorder(*primitive_dst_mem_, *reorder_dst_mem_to_));
|
||||
net_args.push_back({{MKLDNN_ARG_FROM, *primitive_dst_mem_},
|
||||
{MKLDNN_ARG_TO, *reorder_dst_mem_to_}});
|
||||
|
|
|
|||
|
|
@ -47,13 +47,13 @@ class MklDnnLrn : public MklDnnKernel {
|
|||
|
||||
ort_source_desc_ = mkldnn::memory::desc(
|
||||
{src_dims}, MklDnnType<T>(), ort_source_format_);
|
||||
src_md_.reset(new mkldnn::memory::desc(
|
||||
{src_dims}, MklDnnType<T>(), ort_source_format_));
|
||||
src_mem_.reset(
|
||||
new mkldnn::memory(*src_md_, cpu_engine, nullptr));
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims}, MklDnnType<T>(), ort_source_format_));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(*src_md_, cpu_engine, nullptr));
|
||||
} else {
|
||||
src_md_.reset(
|
||||
new mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc(parents_[0].get()->primitive_dst_desc_));
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
x_shape = parents_[0].get()->primitive_dst_shape_;
|
||||
ort_source_format_ = parents_[0].get()->ort_source_format_;
|
||||
|
|
@ -64,12 +64,12 @@ class MklDnnLrn : public MklDnnKernel {
|
|||
primitive_dst_shape_ = TensorShape(x_shape);
|
||||
|
||||
mkldnn::algorithm algo = mkldnn::algorithm::lrn_across_channels;
|
||||
fwd_desc_.reset(new mkldnn::lrn_forward::desc(
|
||||
mkldnn::prop_kind::forward_scoring, algo, *src_md_,
|
||||
size_, alpha_, beta_, bias_));
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::lrn_forward::desc>(
|
||||
mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, algo, *src_md_,
|
||||
size_, alpha_, beta_, bias_));
|
||||
|
||||
fwd_primitive_desc_.reset(new mkldnn::lrn_forward::primitive_desc(
|
||||
*fwd_desc_, cpu_engine));
|
||||
fwd_primitive_desc_ = onnxruntime::make_unique<mkldnn::lrn_forward::primitive_desc>(
|
||||
mkldnn::lrn_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
|
||||
primitive_src_desc_ = fwd_primitive_desc_.get()->src_desc();
|
||||
primitive_dst_desc_ = fwd_primitive_desc_.get()->dst_desc();
|
||||
|
|
@ -79,22 +79,22 @@ class MklDnnLrn : public MklDnnKernel {
|
|||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder neded. Use primitive output as input to reorder and
|
||||
// allocate buffer for reorder output, final output of this subgraph
|
||||
primitive_dst_mem_.reset(
|
||||
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
// Last node but re-order not needed. Allocate buffer to output of this node
|
||||
primitive_dst_mem_.reset(
|
||||
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// Intermediate node. Use mkldnn kernel internal memory for output and
|
||||
// use this as input to next node.
|
||||
primitive_dst_mem_.reset(
|
||||
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
}
|
||||
|
||||
lrn_fwd_.reset(
|
||||
new mkldnn::lrn_forward(*fwd_primitive_desc_));
|
||||
lrn_fwd_ = onnxruntime::make_unique<mkldnn::lrn_forward>(
|
||||
mkldnn::lrn_forward(*fwd_primitive_desc_));
|
||||
net.push_back(*lrn_fwd_);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
{MKLDNN_ARG_DST, *primitive_dst_mem_}});
|
||||
|
|
@ -126,7 +126,7 @@ class MklDnnLrn : public MklDnnKernel {
|
|||
OrtValue* output = ort.KernelContext_GetOutput(context, mklnode_ptr_->output_index, &y_dims[0], static_cast<int>(primitive_dst_shape_.GetDims().size()));
|
||||
T* dst_data = ort.GetTensorMutableData<T>(output);
|
||||
|
||||
if (primitive_dst_desc_!= ort_source_desc_) {
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
reorder_dst_mem_to_->set_data_handle(dst_data);
|
||||
} else {
|
||||
primitive_dst_mem_->set_data_handle(dst_data);
|
||||
|
|
|
|||
|
|
@ -48,7 +48,8 @@ class MklDnnPool : public MklDnnKernel {
|
|||
|
||||
// reorder for better performance
|
||||
mkldnn::memory::format_tag src_format = GetAVXFormat(src_dims_mkl);
|
||||
src_md_.reset(new mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format));
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), src_format));
|
||||
} else {
|
||||
// get the output of previous node (mkldnn block propagation).
|
||||
// TODO Sourcenode will set src of this node.
|
||||
|
|
@ -63,10 +64,11 @@ class MklDnnPool : public MklDnnKernel {
|
|||
if (source_desc_ == ort_source_desc_) {
|
||||
// reorder for better performance
|
||||
mkldnn::memory::format_tag fmt = GetAVXFormat(src_dims_mkl);
|
||||
src_md_.reset(new mkldnn::memory::desc(
|
||||
{src_dims_mkl}, MklDnnType<T>(), fmt));
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({src_dims_mkl}, MklDnnType<T>(), fmt));
|
||||
} else {
|
||||
src_md_.reset(new mkldnn::memory::desc(parents_[0].get()->primitive_dst_mem_->get_desc()));
|
||||
src_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc(parents_[0].get()->primitive_dst_mem_->get_desc()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -95,8 +97,8 @@ class MklDnnPool : public MklDnnKernel {
|
|||
mkldnn::memory::dims padding_left_mkl(pads_.begin(), pads_.begin() + (pads_.size() / 2));
|
||||
mkldnn::memory::dims padding_right_mkl(pads_.begin() + (pads_.size() / 2), pads_.end());
|
||||
|
||||
primitive_dst_md_.reset(new mkldnn::memory::desc(
|
||||
{dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
|
||||
mkldnn::algorithm algo = mkldnn::algorithm::pooling_max;
|
||||
if (op_name_ == "AveragePool" || op_name_ == "GlobalAveragePool") {
|
||||
|
|
@ -105,47 +107,50 @@ class MklDnnPool : public MklDnnKernel {
|
|||
algo = mkldnn::algorithm::pooling_avg_include_padding;
|
||||
}
|
||||
}
|
||||
fwd_desc_.reset(new mkldnn::pooling_forward::desc(
|
||||
mkldnn::prop_kind::forward_inference, algo,
|
||||
*src_md_, *primitive_dst_md_,
|
||||
strides_mkl, kernel_mkl,
|
||||
padding_left_mkl, padding_right_mkl));
|
||||
fwd_desc_ = onnxruntime::make_unique<mkldnn::pooling_forward::desc>(
|
||||
mkldnn::pooling_forward::desc(mkldnn::prop_kind::forward_inference, algo,
|
||||
*src_md_, *primitive_dst_md_,
|
||||
strides_mkl, kernel_mkl,
|
||||
padding_left_mkl, padding_right_mkl));
|
||||
|
||||
fwd_primitive_desc_.reset(new mkldnn::pooling_forward::primitive_desc(
|
||||
*fwd_desc_, cpu_engine));
|
||||
fwd_primitive_desc_ = onnxruntime::make_unique<mkldnn::pooling_forward::primitive_desc>(
|
||||
mkldnn::pooling_forward::primitive_desc(*fwd_desc_, cpu_engine));
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
// Sub-graph's first node. Read input from input buffer
|
||||
src_mem_.reset(new mkldnn::memory(
|
||||
fwd_primitive_desc_.get()->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->src_desc(), cpu_engine, nullptr));
|
||||
} else {
|
||||
// Sub-graph's inner node. set input to parent's output
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
}
|
||||
|
||||
primitive_src_desc_ = fwd_primitive_desc_.get()->src_desc();
|
||||
primitive_src_desc_ = fwd_primitive_desc_.get()->src_desc();
|
||||
primitive_dst_desc_ = fwd_primitive_desc_.get()->dst_desc();
|
||||
|
||||
src_size_ = fwd_primitive_desc_.get()->src_desc().get_size();
|
||||
dst_size_ = fwd_primitive_desc_.get()->dst_desc().get_size();
|
||||
|
||||
// reorder source memory for best performance (AVX512);
|
||||
// reorder source memory for best performance (AVX512);
|
||||
if (primitive_src_desc_ != source_desc_) {
|
||||
mkldnn::memory::dims src_dims(x_shape_.GetDims().begin(), x_shape_.GetDims().end());
|
||||
auto pd = mkldnn::memory::desc(source_desc_);
|
||||
|
||||
if (mklnode_ptr_->parent_nodes.empty())
|
||||
src_mem_from_.reset(new mkldnn::memory(pd, cpu_engine, nullptr));
|
||||
src_mem_from_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(pd, cpu_engine, nullptr));
|
||||
else
|
||||
src_mem_from_ = parents_[0].get()->primitive_dst_mem_;
|
||||
|
||||
src_mem_.reset(new mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
|
||||
net.push_back(mkldnn::reorder(*src_mem_from_, *src_mem_));
|
||||
net_args.push_back({{MKLDNN_ARG_FROM, *src_mem_from_},
|
||||
{MKLDNN_ARG_TO, *src_mem_}});
|
||||
} else {
|
||||
if (mklnode_ptr_->parent_nodes.empty()) {
|
||||
src_mem_.reset(new mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
|
||||
src_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_->src_desc(), cpu_engine, nullptr));
|
||||
} else {
|
||||
src_mem_ = parents_[0].get()->primitive_dst_mem_;
|
||||
}
|
||||
|
|
@ -156,21 +161,21 @@ class MklDnnPool : public MklDnnKernel {
|
|||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder neded. Use primitive output as input to reorder and
|
||||
// allocate buffer for reorder output, final output of this subgraph
|
||||
primitive_dst_mem_.reset(
|
||||
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
// Last node but re-order not needed. Allocate buffer to output of this node
|
||||
primitive_dst_mem_.reset(
|
||||
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// Intermediate node. Use mkldnn kernel internal memory for output and
|
||||
// use this as input to next node.
|
||||
primitive_dst_mem_.reset(
|
||||
new mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(fwd_primitive_desc_.get()->dst_desc(), cpu_engine));
|
||||
}
|
||||
pool_fwd_.reset(
|
||||
new mkldnn::pooling_forward(*fwd_primitive_desc_));
|
||||
pool_fwd_ = onnxruntime::make_unique<mkldnn::pooling_forward>(
|
||||
mkldnn::pooling_forward(*fwd_primitive_desc_));
|
||||
|
||||
net.push_back(*pool_fwd_);
|
||||
net_args.push_back({{MKLDNN_ARG_SRC, *src_mem_},
|
||||
|
|
|
|||
|
|
@ -89,25 +89,28 @@ class MklDnnSum : public MklDnnKernel {
|
|||
}
|
||||
}
|
||||
|
||||
primitive_dst_md_.reset(new mkldnn::memory::desc(
|
||||
{dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
sum_pd_.reset(new mkldnn::sum::primitive_desc(
|
||||
*primitive_dst_md_, coeff, srcs_pd_, cpu_engine));
|
||||
primitive_dst_md_ = onnxruntime::make_unique<mkldnn::memory::desc>(
|
||||
mkldnn::memory::desc({dst_dims_mkl}, MklDnnType<T>(), mkldnn::memory::format_tag::any));
|
||||
sum_pd_ = onnxruntime::make_unique<mkldnn::sum::primitive_desc>(
|
||||
mkldnn::sum::primitive_desc(*primitive_dst_md_, coeff, srcs_pd_, cpu_engine));
|
||||
|
||||
if (mklnode_ptr_->output_index >= 0) {
|
||||
// last node of sub-graph. need to allocate memory for output_tensor
|
||||
if (primitive_dst_desc_ != ort_source_desc_) {
|
||||
// reorder neded. Use primitive output as input to reorder and
|
||||
// allocate buffer for reorder output, final output of this subgraph
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(sum_pd_->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(sum_pd_->dst_desc(), cpu_engine));
|
||||
} else {
|
||||
// Last node but re-order not needed. Allocate buffer to output of this node
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(sum_pd_->dst_desc(), cpu_engine, nullptr));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(sum_pd_->dst_desc(), cpu_engine, nullptr));
|
||||
}
|
||||
} else {
|
||||
// Intermediate node. Use mkldnn kernel internal memory for output and
|
||||
// use this as input to next node.
|
||||
primitive_dst_mem_.reset(new mkldnn::memory(sum_pd_->dst_desc(), cpu_engine));
|
||||
primitive_dst_mem_ = onnxruntime::make_unique<mkldnn::memory>(
|
||||
mkldnn::memory(sum_pd_->dst_desc(), cpu_engine));
|
||||
}
|
||||
primitive_dst_desc_ = sum_pd_->dst_desc();
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue