diff --git a/onnxruntime/core/providers/dnnl/dnnl_node_capability.cc b/onnxruntime/core/providers/dnnl/dnnl_node_capability.cc index d4c72840dd..ab6fbb5f5d 100644 --- a/onnxruntime/core/providers/dnnl/dnnl_node_capability.cc +++ b/onnxruntime/core/providers/dnnl/dnnl_node_capability.cc @@ -191,34 +191,31 @@ bool DnnlBatchNormalizationNodeCapability::IsDimensionSupported(const Node* node return true; } -// DnnlReduceMeanNodeCapability class +// DnnlReduceNodeCapability class //------------------------------------- -bool DnnlReduceMeanNodeCapability::Supported(const Node* node, const GraphViewer& graph_viewer) const { - ORT_UNUSED_PARAMETER(graph_viewer); +bool DnnlReduceNodeCapability::Supported(const Node* node, const GraphViewer& graph_viewer) const { + // These reduction operators use elementwise ops so elementwise operators must also be supported. + if(node->OpType() == "ReduceLogSum" || + node->OpType() == "ReduceLogSumExp" || + node->OpType() == "ReduceSumSquare") { + if(!_eltwise.Supported(node, graph_viewer)) return false; + } if (!IsTypeSupported(node)) return false; - if (!IsAttributeSupported(node)) return false; if (!IsDimensionSupported(node)) return false; return true; } -bool DnnlReduceMeanNodeCapability::IsAttributeSupported(const Node* node) const { - const NodeAttributes& attributes = node->GetAttributes(); - auto attr = attributes.find("keepdims"); - if (attr != attributes.end() && attr->second().i() == 0) { - return false; - } - return true; -} - -bool DnnlReduceMeanNodeCapability::IsDimensionSupported(const Node* node) const { +bool DnnlReduceNodeCapability::IsDimensionSupported(const Node* node) const { auto node_inputs = node->InputDefs(); if (node_inputs[0]->Shape() != nullptr && node_inputs[0]->Shape()->dim_size() == 0) { + LOGS_DEFAULT(INFO) << "Reduction op not supported because input data is a scalar\n"; return false; } return true; } // DnnlSoftmaxNodeCapability class +//------------------------------------- bool DnnlSoftmaxNodeCapability::Supported(const Node* node, const GraphViewer& graph_viewer) const { ORT_UNUSED_PARAMETER(graph_viewer); if (!IsTypeSupported(node)) return false; diff --git a/onnxruntime/core/providers/dnnl/dnnl_node_capability.h b/onnxruntime/core/providers/dnnl/dnnl_node_capability.h index 0e3a3ef131..e81f80f320 100644 --- a/onnxruntime/core/providers/dnnl/dnnl_node_capability.h +++ b/onnxruntime/core/providers/dnnl/dnnl_node_capability.h @@ -145,22 +145,6 @@ class DnnlBatchNormalizationNodeCapability : public DnnlDefaultNodeCapability { bool IsDimensionSupported(const Node* node) const; }; -/** - * Decide if a ReduceMean op is supported by DnnlExecutionProvider - * - * Dnnl does not support the "keepdims" attribute when it is `0` - */ -class DnnlReduceMeanNodeCapability : public DnnlDefaultNodeCapability { - public: - DnnlReduceMeanNodeCapability() : DnnlDefaultNodeCapability({type_float32}) {} - - bool Supported(const Node* node, const GraphViewer& graph_viewer) const override; - - private: - bool IsAttributeSupported(const Node* node) const; - bool IsDimensionSupported(const Node* node) const; -}; - /** * Decide if a Softmax op is supported by DnnlExecutionProvider * @@ -249,6 +233,21 @@ class DnnlElementwiseCapability : public DnnlDefaultNodeCapability { bool IsDimensionSupported(const Node* node) const; }; +/** + * Decide if a Reduce op is supported by DnnlExecutionProvider + */ +class DnnlReduceNodeCapability : public DnnlDefaultNodeCapability { + public: + DnnlReduceNodeCapability() : DnnlDefaultNodeCapability({type_float32}) {} + + bool Supported(const Node* node, const GraphViewer& graph_viewer) const override; + + private: + bool IsDimensionSupported(const Node* node) const; + DnnlElementwiseCapability _eltwise; + +}; + class DnnlPowNodeCapability : public DnnlDefaultMultiInputNodeCapability { public: DnnlPowNodeCapability() diff --git a/onnxruntime/core/providers/dnnl/dnnl_op_manager.cc b/onnxruntime/core/providers/dnnl/dnnl_op_manager.cc index fa58933a47..43e3dc1508 100644 --- a/onnxruntime/core/providers/dnnl/dnnl_op_manager.cc +++ b/onnxruntime/core/providers/dnnl/dnnl_op_manager.cc @@ -31,7 +31,16 @@ DnnlOpManager::DnnlOpManager() { dnnl_ops_map_.emplace(std::make_pair("Mul", std::unique_ptr(new DnnlBinaryNodeCapability()))); dnnl_ops_map_.emplace(std::make_pair("Pow", std::unique_ptr(new DnnlPowNodeCapability()))); dnnl_ops_map_.emplace(std::make_pair("QAttention", std::unique_ptr(new DnnlQAttentionNodeCapability()))); - dnnl_ops_map_.emplace(std::make_pair("ReduceMean", std::unique_ptr(new DnnlReduceMeanNodeCapability()))); + dnnl_ops_map_.emplace(std::make_pair("ReduceL1", std::unique_ptr(new DnnlReduceNodeCapability()))); + dnnl_ops_map_.emplace(std::make_pair("ReduceL2", std::unique_ptr(new DnnlReduceNodeCapability()))); + dnnl_ops_map_.emplace(std::make_pair("ReduceLogSum", std::unique_ptr(new DnnlReduceNodeCapability()))); + dnnl_ops_map_.emplace(std::make_pair("ReduceLogSumExp", std::unique_ptr(new DnnlReduceNodeCapability()))); + dnnl_ops_map_.emplace(std::make_pair("ReduceMax", std::unique_ptr(new DnnlReduceNodeCapability()))); + dnnl_ops_map_.emplace(std::make_pair("ReduceMean", std::unique_ptr(new DnnlReduceNodeCapability()))); + dnnl_ops_map_.emplace(std::make_pair("ReduceMin", std::unique_ptr(new DnnlReduceNodeCapability()))); + dnnl_ops_map_.emplace(std::make_pair("ReduceProd", std::unique_ptr(new DnnlReduceNodeCapability()))); + dnnl_ops_map_.emplace(std::make_pair("ReduceSum", std::unique_ptr(new DnnlReduceNodeCapability()))); + dnnl_ops_map_.emplace(std::make_pair("ReduceSumSquare", std::unique_ptr(new DnnlReduceNodeCapability()))); dnnl_ops_map_.emplace(std::make_pair("Relu", std::unique_ptr(new DnnlElementwiseCapability()))); dnnl_ops_map_.emplace(std::make_pair("Reshape", std::unique_ptr(new DnnlReshapeNodeCapability()))); dnnl_ops_map_.emplace(std::make_pair("Round", std::unique_ptr(new DnnlElementwiseCapability()))); diff --git a/onnxruntime/core/providers/dnnl/subgraph/dnnl_qattention.cc b/onnxruntime/core/providers/dnnl/subgraph/dnnl_qattention.cc index d3f2361a0b..b08c1d1bb9 100644 --- a/onnxruntime/core/providers/dnnl/subgraph/dnnl_qattention.cc +++ b/onnxruntime/core/providers/dnnl/subgraph/dnnl_qattention.cc @@ -45,97 +45,53 @@ dnnl::memory DnnlQAttention::ComputeTotalScale(DnnlSubgraphPrimitive& sp, DnnlNo } /* -input_tensor weight_tensor - + input_tensor weight_tensor \ / - \ / - \ / - \ / - - matmulinteger + matmulinteger with input and weight zero point, input and weight scale and bias | - | - | QKV - | - slice - - / | \ - / | \ - / | \ - / | \ - + / | \ |Q |K |V - | | | - reshape reshape reshape - | | | - permute permute permute - | | | - | transpose | - - \ | | - \ | | - \ | | - \ | | - - matmul | - - | | - - | | - - sqrt(head_dim) | | - - \ | | - - \ | | - \ | | - + matmul | + | | + | | + sqrt(head_dim) | | + \ | | + \ | | + \ | | div | - - | | - - (mask) | - - | / - - softmax / - - | / - + | | + (mask) / + | / + softmax / + | / matmul - | - permute - | - reshape - | - output */ /* diff --git a/onnxruntime/core/providers/dnnl/subgraph/dnnl_reduce.cc b/onnxruntime/core/providers/dnnl/subgraph/dnnl_reduce.cc new file mode 100644 index 0000000000..7be31bb49c --- /dev/null +++ b/onnxruntime/core/providers/dnnl/subgraph/dnnl_reduce.cc @@ -0,0 +1,342 @@ +// Copyright(C) 2021 Intel Corporation +// Licensed under the MIT License +#include "dnnl_reduce.h" +#include "dnnl_subgraph.h" +#include "dnnl_subgraph_primitive.h" +#include "core/providers/common.h" + +namespace onnxruntime { +namespace ort_dnnl { + +DnnlReduce::DnnlReduce() {} + +// assume all dims are available +void DnnlReduce::CreatePrimitive(DnnlSubgraphPrimitive& sp, DnnlNode& node) { + + using namespace dnnl; + + // get the engine, currently only support either single gpu or single cpu device + auto dnnl_engine = sp.GetEngine(); + + enum ReduceOp { + ReduceL1, + ReduceL2, + ReduceLogSum, + ReduceLogSumExp, + ReduceMax, + ReduceMean, + ReduceMin, + ReduceProd, + ReduceSum, + ReduceSumSquare + }; + + ReduceOp reduce_op = ReduceSum; + dnnl::algorithm algo = dnnl::algorithm::reduction_sum; + if (node.OpType() == "ReduceL1") { + reduce_op = ReduceL1; + algo = dnnl::algorithm::reduction_norm_lp_power_p_sum; + } else if (node.OpType() == "ReduceL2") { + reduce_op = ReduceL2; + algo = dnnl::algorithm::reduction_norm_lp_sum; + } else if(node.OpType() == "ReduceLogSum") { + reduce_op = ReduceLogSum; + algo = dnnl::algorithm::reduction_sum; + } else if(node.OpType() == "ReduceLogSumExp") { + reduce_op = ReduceLogSumExp; + algo = dnnl::algorithm::reduction_sum; + } else if (node.OpType() == "ReduceMax") { + reduce_op = ReduceMax; + algo = dnnl::algorithm::reduction_max; + } else if (node.OpType() == "ReduceMean") { + reduce_op = ReduceMean; + algo = dnnl::algorithm::reduction_mean; + } else if (node.OpType() == "ReduceMin") { + reduce_op = ReduceMin; + algo = dnnl::algorithm::reduction_min; + } else if (node.OpType() == "ReduceProd") { + reduce_op = ReduceProd; + algo = dnnl::algorithm::reduction_mul; + } else if (node.OpType() == "ReduceSum") { + reduce_op = ReduceSum; + algo = dnnl::algorithm::reduction_sum; + } else if (node.OpType() == "ReduceSumSquare") { + reduce_op = ReduceSumSquare; + algo = dnnl::algorithm::reduction_sum; + } + + + + auto opset = node.SinceVersion(); + dnnl::memory::dims axes; + if (reduce_op == ReduceSum) { + // in ReduceSum opset older than version 13 the Axes came in as an attribute + // after version 13 the axis is an optional tensor input. + if (opset < 13) { + axes = ReadAxes(node); + } else { + if (node.Input(IN_AXES).Exists()) { + auto axes_mem = sp.GetMemory(node.Input(IN_AXES)); + dnnl::memory::dims axes_dims = axes_mem.get_desc().dims(); + int64_t* p_axes_data = (int64_t*)axes_mem.get_data_handle(); + axes = std::vector(p_axes_data, p_axes_data + axes_dims[0]); + } + } + } else { + axes = ReadAxes(node); + } + + auto src_mem = sp.GetMemoryInOrtFormat(node.Input(IN_DATA), dnnl_engine); + auto src_md = src_mem.get_desc(); + + if (reduce_op == ReduceSum) { + // If axes is empty and the noop_with_empty_axes != 0 return the IN_DATA as the output. + if (axes.empty()) { + if (NoOpWithEmptyAxes(node)) { + sp.SetMemory(node.Output(OUT_REDUCED), src_mem, true); + return; + } + } + } + + //We need to calculate output tensor shape + //First we initialize it with input shape and then we modify it based on the attribute values + //This is because the DNNL primitive functionality is determined by the input and output shapes. + auto src_dims = src_md.dims(); + auto ndim = src_dims.size(); + + // convert negative axis values to the positive axis + for (size_t i = 0; i < axes.size(); ++i) { + axes[i] = HandleNegativeAxis(axes[i], ndim); + } + // Handle out of order and repeating dims. + std::sort(axes.begin(), axes.end()); + axes.erase(std::unique(axes.begin(), axes.end()), axes.end()); + + // if axes is empty change all non-zero shape dims to 1 + if (axes.size() == 0) { + for (size_t i = 0; i < ndim; ++i) { + if (src_dims[i] != 0) + src_dims[i] = 1; + } + //If there is axis, then make the respective dimensions 1, keeping the other dimension values untouched. + } else { + for (size_t i = 0; i < axes.size(); i++) { + if (src_dims[axes[i]] != 0) + src_dims[axes[i]] = 1; + } + } + + auto dst_shape = TensorShape(src_dims.data(), ndim); + dnnl::memory::dims dst_dims_mkl(dst_shape.GetDims().begin(), dst_shape.GetDims().end()); + auto dst_md = dnnl::memory::desc({dst_dims_mkl}, src_md.data_type(), dnnl::memory::format_tag::any); + + // Check to see if the destination shape and source shape are the same. + bool src_and_dst_dims_equal = true; + if (src_md.dims().size() == dst_md.dims().size()) { + for (size_t i = 0; i < src_md.dims().size(); ++i) { + if (src_md.dims()[i] != dst_md.dims()[i]) { + src_and_dst_dims_equal = false; + break; + } + } + } + + /* + * OneDNN will return an error if a reduction algorithm is called that does not result in a + * shape reduction. For this reason we have code paths that are taken if the source dimensions and + * destination dimensions are equal that will not call the reduction op. + * + * "ReduceLogSum" is equivelent to Log(ReduceSum(input)) + * - if the reduction op is called then the eltwise_log post op will added to the reduction primitive. + * - if the reduction op is not called then the eltwise_log primitive is added as its own primitive + * - NOTE "ReduceLogSum" follows the code flow of "All other reduce ops" with the exception of the added + * post op and an extra check if src_dims == dest_dims. + * "ReduceLogSumExp" is equivelent to Log(ReduceSum(Exp(input))) + * - if the reduction op is called then the eltwise_exp primitive is added before the reduction op + * the eletwise_log post op will be added to the reduction primitive + * - if the reduction op is not called then the input is not modified since Log(Exp(input) == input + * "ReduceSumSquare" is equivelent to ReduceSum(Square(input)) + * - the eltwise_square primitive is added before the reduction op + * - if the source and destination dimensions are not equal the reduction op is called + * All other reduce ops + * - if the source and destination dimensions are not equal call the reduction op + * - otherwise don't modify the input. + * + * After the Reduction check the "KeepDims" attribute + * - if KeepDims == 1 the output is the result of the reduction op + * - if KeepDims == 0 we perform a squeeze operation on the output of the reduction op + * - NOTE: Even if reduction op is not called KeepDims attribute can result in the output being modified + */ + dnnl::memory reduce_src_mem; + dnnl::memory reduce_dst_mem; + dnnl::primitive_attr dnnl_primitive_attr; + if ((reduce_op == ReduceLogSum || reduce_op == ReduceLogSumExp ) && !src_and_dst_dims_equal) { + dnnl::post_ops eltwise_post_op; + eltwise_post_op.append_eltwise(1.0f, dnnl::algorithm::eltwise_log, 1.0f, 1.0f); + dnnl_primitive_attr.set_post_ops(eltwise_post_op); + } + + if (reduce_op == ReduceLogSumExp) { + if (!src_and_dst_dims_equal) { + auto elementwise_desc = dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_inference, dnnl::algorithm::eltwise_exp, src_md); + auto elementwise_pd = dnnl::eltwise_forward::primitive_desc(elementwise_desc, dnnl_engine); + + auto elementwise_dst_mem = dnnl::memory(elementwise_pd.dst_desc(), dnnl_engine); + + auto elemenwise_primitive = dnnl::eltwise_forward(elementwise_pd); + sp.AddPrimitive(elemenwise_primitive, {{DNNL_ARG_SRC, src_mem}, + {DNNL_ARG_DST, elementwise_dst_mem}}); + auto reduce_desc = dnnl::reduction::desc(algo, src_md, dst_md, 0.f, 0.f); + auto reduce_pd = dnnl::reduction::primitive_desc(reduce_desc, dnnl_primitive_attr, dnnl_engine); + + reduce_dst_mem = dnnl::memory(reduce_pd.dst_desc(), dnnl_engine); + + auto reducemean_op = dnnl::reduction(reduce_pd); + sp.AddPrimitive(reducemean_op, {{DNNL_ARG_SRC, elementwise_dst_mem}, + {DNNL_ARG_DST, reduce_dst_mem}}); + } else { + reduce_dst_mem = src_mem; + } + } else if(reduce_op == ReduceSumSquare) { + auto elementwise_desc = dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_inference, dnnl::algorithm::eltwise_square, src_md); + auto elementwise_pd = dnnl::eltwise_forward::primitive_desc(elementwise_desc, dnnl_engine); + + auto elementwise_dst_mem = dnnl::memory(elementwise_pd.dst_desc(), dnnl_engine); + + auto elemenwise_primitive = dnnl::eltwise_forward(elementwise_pd); + sp.AddPrimitive(elemenwise_primitive, {{DNNL_ARG_SRC, src_mem}, + {DNNL_ARG_DST, elementwise_dst_mem}}); + if (!src_and_dst_dims_equal) { + auto reduce_desc = dnnl::reduction::desc(algo, src_md, dst_md, 0.f, 0.f); + auto reduce_pd = dnnl::reduction::primitive_desc(reduce_desc, dnnl_engine); + + reduce_dst_mem = dnnl::memory(reduce_pd.dst_desc(), dnnl_engine); + + auto reducemean_op = dnnl::reduction(reduce_pd); + sp.AddPrimitive(reducemean_op, {{DNNL_ARG_SRC, elementwise_dst_mem}, + {DNNL_ARG_DST, reduce_dst_mem}}); + } else { + reduce_dst_mem = elementwise_dst_mem; + } + } else { + // If calculated source and destination shape are the same do not do the reduction operation. + if (!src_and_dst_dims_equal) { + float p_val = 0.f; + if (reduce_op == ReduceL1) { + p_val = 1.0f; + } else if (reduce_op == ReduceL2) { + p_val = 2.0f; + } + + auto reduce_desc = dnnl::reduction::desc(algo, src_md, dst_md, p_val, 0.f); + auto reduce_pd = dnnl::reduction::primitive_desc(reduce_desc, dnnl_primitive_attr, dnnl_engine); + + // If using GPU this will move the memory from the CPU to the GPU. + reduce_src_mem = sp.GetMemoryAndReshape(node.Input(IN_DATA), reduce_pd.src_desc(), dnnl_engine); + reduce_dst_mem = dnnl::memory(reduce_pd.dst_desc(), dnnl_engine); + + auto reducemean_op = dnnl::reduction(reduce_pd); + sp.AddPrimitive(reducemean_op, {{DNNL_ARG_SRC, reduce_src_mem}, + {DNNL_ARG_DST, reduce_dst_mem}}); + } else { + if (reduce_op == ReduceLogSum) { + auto elementwise_desc = dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_inference, dnnl::algorithm::eltwise_log, src_md); + auto elementwise_pd = dnnl::eltwise_forward::primitive_desc(elementwise_desc, dnnl_engine); + + reduce_dst_mem = dnnl::memory(elementwise_pd.dst_desc(), dnnl_engine); + + auto elemenwise_primitive = dnnl::eltwise_forward(elementwise_pd); + sp.AddPrimitive(elemenwise_primitive, {{DNNL_ARG_SRC, src_mem}, + {DNNL_ARG_DST, reduce_dst_mem}}); + } else { + reduce_dst_mem = src_mem; + } + } + } + + + // If keepdims != 0 set the output to the reduce op results + auto keepdims = Keepdims(node); + if (keepdims) { + if (src_and_dst_dims_equal) { + sp.SetMemory(node.Output(OUT_REDUCED), reduce_dst_mem, true); + } else { + sp.SetMemory(node.Output(OUT_REDUCED), reduce_dst_mem); + } + // if keepdims == 0 we do a squeeze operation on reduce output shape. + } else { + std::vector output_shape; + size_t j = 0; + for (size_t i = 0; i < ndim; ++i) { + if ((j < axes.size() && axes[j] == static_cast(i)) || + (axes.size() == 0 && src_dims[i] == 1)) { + ORT_ENFORCE(src_dims[i] == 1, "Dimension of input ", i, " must be 1 instead of ", src_dims[i], + ". shape=", src_dims); + ++j; + continue; + } + + if ((j < axes.size() && axes[j] == static_cast(i) && src_dims[i] == 0) || + (axes.size() == 0 && src_dims[i] == 0)) { + ORT_ENFORCE(keepdims, + "Can't reduce on dim with value of 0 if 'keepdims' is false. " + "Invalid output shape would be produced. input_shape:", + TensorShape(src_md.dims())); + } + output_shape.push_back(src_dims[i]); + } + + // OneDNN does not support scalar output if the output shape is {} change it to {1} + bool is_scalar_output = false; + if (output_shape.empty()) { + output_shape.push_back(1); + is_scalar_output = true; + } + dnnl::memory::desc squeeze_md(output_shape, node.Input(IN_DATA).Type(), sp.GetDnnlFormat(output_shape.size())); + dnnl::memory squeeze_mem = dnnl::memory(squeeze_md, dnnl_engine, nullptr); + // if the src and dst dims are equal then we will have a valid data handle here. + // Otherwise we must get the data handle at runtime using the AddReshape function. + // reading the data handle directy is more efficent if is it possible. + if (!src_and_dst_dims_equal) { + squeeze_mem.set_data_handle(reduce_dst_mem.get_data_handle()); + } else { + sp.AddReshape(reduce_dst_mem, squeeze_mem); + } + sp.SetMemory(node.Output(OUT_REDUCED), squeeze_mem, true, is_scalar_output); + } +} + +std::vector DnnlReduce::ReadAxes(DnnlNode& node) { + auto attr = node.Attributes().find("axes"); + std::vector axes; + if (attr != node.Attributes().end()) { + auto& proto = attr->second(); + axes.reserve(proto.ints_size()); + for (int i = 0; i < proto.ints_size(); i++) { + axes.push_back(proto.ints(i)); + } + } + return axes; +} + +bool DnnlReduce::Keepdims(DnnlNode& node) { + auto attr = node.Attributes().find("keepdims"); + if (attr != node.Attributes().end() && + attr->second().i() == 0) { + return false; + } + return true; +} + +bool DnnlReduce::NoOpWithEmptyAxes(DnnlNode& node) { + auto attr = node.Attributes().find("noop_with_empty_axes"); + if (attr != node.Attributes().end() && + attr->second().i() != 0) { + return true; + } + return false; +} + +} // namespace ort_dnnl +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/dnnl/subgraph/dnnl_reducemean.h b/onnxruntime/core/providers/dnnl/subgraph/dnnl_reduce.h similarity index 73% rename from onnxruntime/core/providers/dnnl/subgraph/dnnl_reducemean.h rename to onnxruntime/core/providers/dnnl/subgraph/dnnl_reduce.h index 675fa0f0ec..b5a89fd53a 100644 --- a/onnxruntime/core/providers/dnnl/subgraph/dnnl_reducemean.h +++ b/onnxruntime/core/providers/dnnl/subgraph/dnnl_reduce.h @@ -8,18 +8,21 @@ namespace onnxruntime { namespace ort_dnnl { -class DnnlReduceMean { +class DnnlReduce { public: enum InputTensors : int { - IN_X = 0 + IN_DATA = 0, + IN_AXES = 1 }; enum OutputTensors : int { - OUT_Y = 0 + OUT_REDUCED = 0 }; - DnnlReduceMean(); + DnnlReduce(); void CreatePrimitive(DnnlSubgraphPrimitive& sp, DnnlNode& node); std::vector ReadAxes(DnnlNode& node); + bool Keepdims(DnnlNode& node); + bool NoOpWithEmptyAxes(DnnlNode& node); }; } // namespace ort_dnnl diff --git a/onnxruntime/core/providers/dnnl/subgraph/dnnl_reducemean.cc b/onnxruntime/core/providers/dnnl/subgraph/dnnl_reducemean.cc deleted file mode 100644 index 8f6f7c12c6..0000000000 --- a/onnxruntime/core/providers/dnnl/subgraph/dnnl_reducemean.cc +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright(C) 2021 Intel Corporation -// Licensed under the MIT License -#include "dnnl_reducemean.h" -#include "dnnl_subgraph.h" -#include "dnnl_subgraph_primitive.h" - -namespace onnxruntime { -namespace ort_dnnl { - - -DnnlReduceMean::DnnlReduceMean() {} - -// assume all dims are available -void DnnlReduceMean::CreatePrimitive(DnnlSubgraphPrimitive& sp, DnnlNode& node) { - - using namespace dnnl; - - // get the engine, currently only support either single gpu or single cpu device - auto dnnl_engine = sp.GetEngine(); - - auto axes = ReadAxes(node); - - auto reducemean_src_mem = sp.GetMemory(node.Input(IN_X)); - auto src_md = reducemean_src_mem.get_desc(); - - //We need to calculate output tensor shape - //First we initialize it with input shape and then we modify it based on the attribute values - //This is because the DNNL primitive functionality is determined by the input and output shapes. - - auto src_dims = src_md.dims(); - auto ndim = src_dims.size(); - for (unsigned long int i = 0; i < ndim; i++) { - if (axes.size() == 0) - src_dims[i] = 1; //If no axis is specified, then output shape is just all 1's - else if (i < axes.size()) { - if (axes[i] < 0) - src_dims[ndim + axes[i]] = 1; - else - src_dims[axes[i]] = 1; - } //If there is axis, then make the respective dimensions 1, keeping the other dimension values untouched. - } - - auto dst_shape = TensorShape(src_dims.data(), ndim); - dnnl::memory::dims dst_dims_mkl(dst_shape.GetDims().begin(), dst_shape.GetDims().end()); - auto dst_md = dnnl::memory::desc({dst_dims_mkl}, src_md.data_type(), dnnl::memory::format_tag::any); - - auto reducemean_desc = dnnl::reduction::desc(dnnl::algorithm::reduction_mean, src_md, dst_md, 0.f, 0.f); - auto reducemean_pd = dnnl::reduction::primitive_desc(reducemean_desc, dnnl_engine); - - // If using GPU this will move the memory from the CPU to the GPU. - reducemean_src_mem = sp.GetMemoryAndReshape(node.Input(IN_X), reducemean_pd.src_desc(), dnnl_engine); - auto reducemean_dst_mem = dnnl::memory(reducemean_pd.dst_desc(), dnnl_engine); - - auto reducemean_op = dnnl::reduction(reducemean_pd); - sp.AddPrimitive(reducemean_op, {{DNNL_ARG_SRC, reducemean_src_mem}, - {DNNL_ARG_DST, reducemean_dst_mem}}); - - sp.SetMemory(node.Output(OUT_Y), reducemean_dst_mem); -} - -std::vector DnnlReduceMean::ReadAxes(DnnlNode& node) { - auto attr = node.Attributes().find("axes"); - std::vector axes; - if (attr != node.Attributes().end()) { - auto& proto = attr->second(); - axes.reserve(proto.ints_size()); - for (int i = 0; i < proto.ints_size(); i++) { - axes.push_back(proto.ints(i)); - } - } - return axes; -} - -} // namespace ort_dnnl -} // namespace onnxruntime diff --git a/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph.cc b/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph.cc index 2b56943bdc..b32402f1d0 100644 --- a/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph.cc +++ b/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph.cc @@ -175,6 +175,10 @@ NodeAttributes& DnnlNode::Attributes() { return *attr_; } +int DnnlNode::SinceVersion() { + return onnx_node_->SinceVersion(); +} + DnnlSubgraph::DnnlSubgraph(const GraphViewer& graph_viewer) : graph_viewer_(graph_viewer) { Build(); is_dynamic_ = false; diff --git a/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph.h b/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph.h index 6283bcda5d..963ed815c3 100644 --- a/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph.h +++ b/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph.h @@ -76,6 +76,7 @@ class DnnlNode { NodeAttributes& Attributes(); std::vector& Inputs(); std::vector& Outputs(); + int SinceVersion(); private: const Node* onnx_node_ = nullptr; diff --git a/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph_primitive.cc b/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph_primitive.cc index 583e6151b5..6860b1039d 100644 --- a/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph_primitive.cc +++ b/onnxruntime/core/providers/dnnl/subgraph/dnnl_subgraph_primitive.cc @@ -16,7 +16,7 @@ #include "dnnl_pool.h" #include "dnnl_pow.h" #include "dnnl_qattention.h" -#include "dnnl_reducemean.h" +#include "dnnl_reduce.h" #include "dnnl_reshape.h" #include "dnnl_softmax.h" #include "dnnl_softmaxgrad.h" @@ -126,6 +126,7 @@ void DnnlSubgraphPrimitive::AddKernels() { std::unordered_set binary_ops = {"Add", "Div", "Mul", "Sub"}; std::unordered_set elementwise_ops = {"Abs", "Elu", "Exp", "LeakyRelu", "Log", "Relu", "Round", "Sigmoid", "Softplus", "Sqrt", "Tanh"}; std::unordered_set pool_ops = {"AveragePool", "GlobalAveragePool", "GlobalMaxPool", "MaxPool"}; + std::unordered_set reduce_ops = {"ReduceL1", "ReduceL2", "ReduceLogSum", "ReduceLogSumExp", "ReduceMax", "ReduceMean", "ReduceMin", "ReduceProd", "ReduceSum", "ReduceSumSquare"}; auto indices = subgraph_->GetDnnlNodesInTopologicalOrder(); for (auto index : indices) { @@ -158,8 +159,8 @@ void DnnlSubgraphPrimitive::AddKernels() { DnnlPow().CreatePrimitive(*this, node); } else if (node.OpType() == "QAttention") { DnnlQAttention().CreatePrimitive(*this, node); - } else if (node.OpType() == "ReduceMean") { - DnnlReduceMean().CreatePrimitive(*this, node); + } else if (reduce_ops.count(node.OpType())) { + DnnlReduce().CreatePrimitive(*this, node); } else if (node.OpType() == "Reshape") { DnnlReshape().CreatePrimitive(*this, node); } else if (node.OpType() == "Softmax") { diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc index c841f28f79..55f516bed7 100644 --- a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc +++ b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc @@ -557,6 +557,28 @@ TEST(ReductionOpTest, ReduceLogSumExp_double) { test.Run(); } +TEST(ReductionOpTest, ReduceLogSumExp_float_no_reduction) { + OpTester test("ReduceLogSumExp"); + test.AddAttribute("axes", std::vector{0}); + test.AddAttribute("keepdims", (int64_t)0); + test.AddInput("data", {1, 2, 2}, + {1.0f, 2.0f, + 3.0f, 4.0f}); + test.AddOutput("reduced", {2, 2}, {1.f, 2.f, 3.f, 4.f}); + test.Run(); +} + +TEST(ReductionOpTest, ReduceLogSumExp_float_no_reduction_keepdims) { + OpTester test("ReduceLogSumExp"); + test.AddAttribute("axes", std::vector{0}); + test.AddAttribute("keepdims", (int64_t)1); + test.AddInput("data", {1, 2, 2}, + {1.0f, 2.0f, + 3.0f, 4.0f}); + test.AddOutput("reduced", {1, 2, 2}, {1.f, 2.f, 3.f, 4.f}); + test.Run(); +} + #if defined(USE_CUDA) || defined(USE_ROCM) TEST(ReductionOpTest, ReduceLogSumExp_half) { OpTester test("ReduceLogSumExp"); @@ -1082,6 +1104,24 @@ TEST(ReductionOpTest, ReduceMean0DTensor_double) { } #endif // !(defined USE_TVM) +TEST(ReductionOpTest, ReduceMean_keepdims_results_in_noop) { + OpTester test("ReduceMean"); + test.AddAttribute("axes", std::vector{0}); + test.AddAttribute("keepdims", (int64_t)1); + test.AddInput("data", {1, 3}, {1.0, 2.0, 3.0}); + test.AddOutput("reduced", {1, 3}, {1.0, 2.0, 3.0}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(ReductionOpTest, ReduceMean_keepdims_results_in_shape_change) { + OpTester test("ReduceMean"); + test.AddAttribute("axes", std::vector{0}); + test.AddAttribute("keepdims", (int64_t)0); + test.AddInput("data", {1, 3}, {1.0, 2.0, 3.0}); + test.AddOutput("reduced", {3}, {1.0, 2.0, 3.0}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + TEST(ReductionOpTest, ReduceMin_default_axes_keepdims) { OpTester test("ReduceMin"); test.AddAttribute("keepdims", (int64_t)1); @@ -2450,7 +2490,7 @@ TEST(ReductionOpTest, OptimizeShapeForFastReduce_ReduceDimWithZero3) { ASSERT_EQ(fast_kind, FastReduceKind::kKR); } -TEST(ReductionOpTest, ReduceDimWithZero3) { +TEST(ReductionOpTest, ReduceSum_ReduceDimWithZero3) { auto run = [](OpTester& tester, const std::string& error_msg = "") { auto expect = error_msg.empty() ? OpTester::ExpectResult::kExpectSuccess : OpTester::ExpectResult::kExpectFailure; diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc index 84e35add8c..9b95925410 100644 --- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc +++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc @@ -443,7 +443,7 @@ TEST(GradientCheckerTest, LogGrad) { float max_error; #ifdef USE_DNNL - float error_tolerance = 3e-3f; + float error_tolerance = 4e-3f; #else float error_tolerance = 1e-3f; #endif