diff --git a/dockerfiles/Dockerfile.openvino b/dockerfiles/Dockerfile.openvino index 7f1238818c..958d31bc15 100644 --- a/dockerfiles/Dockerfile.openvino +++ b/dockerfiles/Dockerfile.openvino @@ -7,7 +7,7 @@ FROM ubuntu:16.04 RUN apt update && \ apt -y install git sudo wget \ - zip x11-apps lsb-core cpio libboost-python-dev libpng-dev zlib1g-dev libnuma1 ocl-icd-libopencl1 clinfo libboost-filesystem1.58.0 libboost-thread1.58.0 protobuf-compiler libprotoc-dev libusb-1.0-0-dev + zip x11-apps lsb-core cpio libboost-python-dev libpng-dev zlib1g-dev libnuma1 ocl-icd-libopencl1 clinfo libboost-filesystem1.58.0 libboost-thread1.58.0 protobuf-compiler libprotoc-dev libusb-1.0-0-dev autoconf automake libtool ARG DEVICE=CPU_FP32 ARG ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h index f08df584ba..12245aeb51 100644 --- a/include/onnxruntime/core/graph/graph.h +++ b/include/onnxruntime/core/graph/graph.h @@ -1106,6 +1106,14 @@ class Graph { // Graph value_info. std::vector value_info_; + // Strings which have been used as node names. + // New node name should not conflict with this set. + std::unordered_set generated_node_names_; + + // Strings which have been used as node_arg names. + // New node_arg name should not conflict this this set. + std::unordered_set generated_node_arg_names_; + // All node args owned by <*this> graph. Key is node arg name. std::unordered_map> node_args_; diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index a5e4c3c73f..d939d25876 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -2378,28 +2378,57 @@ Node& Graph::AddNode(const NodeProto& node_proto, } std::string Graph::GenerateNodeArgName(const std::string& base_name) { - std::string new_name; - do { + std::string new_name = base_name; + // Check if new_name has been used in as any of node_args_' names. + // Check if new_name has been generated by this function. + // If both are not, add new_name into name set and return the new_name + // as the generated name. Otherwise, keep generating new names. + while (node_args_.find(new_name) != node_args_.end() || + generated_node_arg_names_.find(new_name) != generated_node_arg_names_.end()) { std::ostringstream str; - str << base_name << "_" << name_generator_++; + str << base_name << "_token_" << name_generator_++; new_name = str.str(); - } while (node_args_.find(new_name) != node_args_.end()); + } + + generated_node_arg_names_.insert(new_name); return new_name; } std::string Graph::GenerateNodeName(const std::string& base_name) { - std::string new_name; - bool keep_going = true; + // Define name-checking function for node name. + // Return true if the input name hasn't been used. Otherwise, return false. + auto name_is_ok = [&] (const std::string name) { + for (auto it = nodes_.begin(); it != nodes_.end(); ++it) { + if (*it == nullptr) { + continue; + } + if (it->get()->Name() != name) { + continue; + } + // Find a matched name so we cannot reuse the input name. + return false; + } - do { + if (generated_node_names_.find(name) != generated_node_names_.end()) { + // Find a matched name so we cannot reuse the input name. + return false; + } + + // The input name can be reused. + return true; + }; + + // Start with the input name. + std::string new_name = base_name; + + while (!name_is_ok(new_name)) { std::ostringstream str; - str << base_name << "_" << name_generator_++; + str << base_name << "_token_" << name_generator_++; new_name = str.str(); + } - keep_going = std::find_if(nodes_.cbegin(), nodes_.cend(), [&new_name](const std::unique_ptr& n) { - return (n != nullptr) && (n->Name() == new_name); - }) != nodes_.end(); - } while (keep_going); + // Make sure this new_name is not going to be reused. + generated_node_names_.insert(new_name); return new_name; } diff --git a/onnxruntime/core/optimizer/fast_gelu_fusion.cc b/onnxruntime/core/optimizer/fast_gelu_fusion.cc index ca122f79ba..c71555bb83 100644 --- a/onnxruntime/core/optimizer/fast_gelu_fusion.cc +++ b/onnxruntime/core/optimizer/fast_gelu_fusion.cc @@ -108,7 +108,7 @@ MatchResult FastGeluFusion::CheckFirstFormula(Graph& graph, Node& mul1_node, MatchResult FastGeluFusion::CheckSecondFormula(Graph& graph, Node& pow1_node, std::vector>& nodes_to_fuse) const { MatchResult matchResult{false, nullptr, nullptr}; - if (!graph_utils::IsSupportedOptypeVersionAndDomain(pow1_node, "Pow", {7}) || + if (!graph_utils::IsSupportedOptypeVersionAndDomain(pow1_node, "Pow", {7, 12}) || !graph_utils::IsSupportedProvider(pow1_node, GetCompatibleExecutionProviders()) || pow1_node.GetOutputEdgesCount() != 1 || !IsSupportedDataType(pow1_node)) { diff --git a/onnxruntime/core/optimizer/layer_norm_fusion.cc b/onnxruntime/core/optimizer/layer_norm_fusion.cc index 327639f425..dddb62bf80 100644 --- a/onnxruntime/core/optimizer/layer_norm_fusion.cc +++ b/onnxruntime/core/optimizer/layer_norm_fusion.cc @@ -169,10 +169,10 @@ Status LayerNormFusion::ApplyImpl(Graph& graph, bool& modified, int graph_level, continue; } nodes_to_remove.push_back(reduce_mean2_node); - + // Traceback the reduceMean node to find pow --> reduceMean Node& pow_node = *graph.GetNode(reduce_mean2_node.InputNodesBegin()->Index()); - if (!graph_utils::IsSupportedOptypeVersionAndDomain(pow_node, "Pow", {7}) || + if (!graph_utils::IsSupportedOptypeVersionAndDomain(pow_node, "Pow", {7, 12}) || pow_node.GetExecutionProviderType() != reduce_mean_node.GetExecutionProviderType() || pow_node.GetOutputEdgesCount() != 1 || !IsSupportedDataType(pow_node)) { diff --git a/onnxruntime/core/optimizer/reshape_fusion.cc b/onnxruntime/core/optimizer/reshape_fusion.cc index 5d39939d30..399dcf744c 100644 --- a/onnxruntime/core/optimizer/reshape_fusion.cc +++ b/onnxruntime/core/optimizer/reshape_fusion.cc @@ -46,7 +46,7 @@ each of which is a constant initializer or a Shape->Gather->Unsqueeze chain with index corresponding to the index of the argument.) Before fusion: - [Sub-graph Root Node ] + [Sub-graph Root] | / \ | Shape Shape | | | @@ -61,13 +61,14 @@ Before fusion: Reshape After fusion: - [Sub-graph Root Node] (Constant Initializer) + [Sub-graph Root] (Constant Initializer) \ [0, a, 0, b] \ / Reshape */ bool ReshapeFusion::Fuse_Subgraph1(Node& reshape, Graph& graph, const logging::Logger& logger) { - const Node* p_root = graph_utils::GetInputNode(reshape, 0); + // The root could be either a graph input or a node so use node arg to compare. + const NodeArg& root_input = *(reshape.InputDefs()[0]); const Node* p_concat = graph_utils::GetInputNode(reshape, 1); if (nullptr == p_concat) { @@ -90,11 +91,8 @@ bool ReshapeFusion::Fuse_Subgraph1(Node& reshape, Graph& graph, const logging::L enum class NodeType { Unsqueeze, Gather, Shape }; std::set> candidates_for_removal; for (int i = 0; i < concat_input_count; ++i) { - // First check if the i-th argument is an initializer. - // We do not check whether the initializer is constant. - // Some model uses constant initializer and some does not. - // Here we assume that no one will override the initializer using graph input. - if (optimizer_utils::AppendTensorFromInitializer(graph, *(concat.InputDefs()[i]), shape_value)) { + // First check if the i-th argument is a constant initializer. + if (optimizer_utils::AppendTensorFromInitializer(graph, *(concat.InputDefs()[i]), shape_value, true)) { continue; } @@ -113,7 +111,8 @@ bool ReshapeFusion::Fuse_Subgraph1(Node& reshape, Graph& graph, const logging::L const Node& gather = edges[1]->GetNode(); const Node& shape = edges[2]->GetNode(); - if (graph_utils::GetInputNode(shape, 0) != p_root) { + const NodeArg& shape_input = *(shape.InputDefs()[0]); + if (shape_input.Name() != root_input.Name()) { return false; } diff --git a/onnxruntime/core/optimizer/utils.cc b/onnxruntime/core/optimizer/utils.cc index 4aff66352a..052f5ff67f 100644 --- a/onnxruntime/core/optimizer/utils.cc +++ b/onnxruntime/core/optimizer/utils.cc @@ -141,7 +141,11 @@ bool IsAttributeWithExpectedValues(const Node& node, const std::string& attr_nam return true; } -bool AppendTensorFromInitializer(const Graph& graph, const NodeArg& input_arg, std::vector& data) { +bool AppendTensorFromInitializer(const Graph& graph, const NodeArg& input_arg, std::vector& data, bool require_constant) { + if (require_constant && !graph_utils::IsConstantInitializer(graph, input_arg.Name(), true)) { + return false; + } + const ONNX_NAMESPACE::TensorProto* tensor_proto = nullptr; if (!graph.GetInitializedTensor(input_arg.Name(), tensor_proto)) { return false; diff --git a/onnxruntime/core/optimizer/utils.h b/onnxruntime/core/optimizer/utils.h index 7887bfd360..0cafe0d49c 100644 --- a/onnxruntime/core/optimizer/utils.h +++ b/onnxruntime/core/optimizer/utils.h @@ -42,7 +42,7 @@ bool IsAttributeWithExpectedValues(const Node& node, const std::string& attr_nam /** Get values of an integer tensor from initializer, and append them to a vector. @remarks only support int32 and int64 tensor. This function does not clear vector before appending. */ -bool AppendTensorFromInitializer(const Graph& graph, const NodeArg& input_arg, std::vector& data); +bool AppendTensorFromInitializer(const Graph& graph, const NodeArg& input_arg, std::vector& data, bool require_constant = true); /** Check Shape of node input or output. @remarks when expected dim value > 0, the dim is expected to known and match the dim value. diff --git a/onnxruntime/core/providers/acl/nn/conv.cc b/onnxruntime/core/providers/acl/nn/conv.cc index f95d6cd6ef..c7ccaf5f04 100644 --- a/onnxruntime/core/providers/acl/nn/conv.cc +++ b/onnxruntime/core/providers/acl/nn/conv.cc @@ -208,7 +208,7 @@ Status Conv::Compute(OpKernelContext* context) const { if(optimizable) { //optimized depthwise convolution #if defined(ACL_1902) || defined(ACL_1905) - auto layer = std::make_shared(); + auto layer = std::make_shared(); #endif #ifdef ACL_1908 auto layer = std::make_shared(); diff --git a/onnxruntime/core/providers/cpu/math/clip.cc b/onnxruntime/core/providers/cpu/math/clip.cc index 75ae6d4cdd..92c7590d53 100644 --- a/onnxruntime/core/providers/cpu/math/clip.cc +++ b/onnxruntime/core/providers/cpu/math/clip.cc @@ -3,6 +3,7 @@ #include "core/providers/cpu/math/clip.h" #include "core/framework/data_types_internal.h" +#include "core/util/math_cpuonly.h" namespace onnxruntime { @@ -31,6 +32,17 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( REG_KERNEL_NONTEMPL(Clip, 12, Clip, float, double, int8_t, uint8_t, int64_t, uint64_t); +template +Status Clip_6::Compute(OpKernelContext* ctx) const { + const auto* X = ctx->Input(0); + Tensor* Y = ctx->Output(0, X->Shape()); + EigenVectorMap(Y->template MutableData(), Y->Shape().Size()) = + ConstEigenVectorMap(X->template Data(), X->Shape().Size()) + .cwiseMax(this->min_) + .cwiseMin(this->max_); + return Status::OK(); +} + template struct Clip::ComputeImpl { void operator()(const Tensor* X, const Tensor* min, const Tensor* max, Tensor* Y) const { diff --git a/onnxruntime/core/providers/cpu/math/clip.h b/onnxruntime/core/providers/cpu/math/clip.h index b365aa33fd..b7c6032c6b 100644 --- a/onnxruntime/core/providers/cpu/math/clip.h +++ b/onnxruntime/core/providers/cpu/math/clip.h @@ -5,7 +5,6 @@ #include "core/common/common.h" #include "core/framework/op_kernel.h" -#include "core/util/math_cpuonly.h" namespace onnxruntime { @@ -33,15 +32,7 @@ class Clip_6 final : public clip_internal::Clip_6Base, public OpKernel { explicit Clip_6(const OpKernelInfo& info) : clip_internal::Clip_6Base(info), OpKernel(info) { } - Status Compute(OpKernelContext* ctx) const override { - const auto* X = ctx->Input(0); - Tensor* Y = ctx->Output(0, X->Shape()); - EigenVectorMap(Y->template MutableData(), Y->Shape().Size()) = - ConstEigenVectorMap(X->template Data(), X->Shape().Size()) - .cwiseMax(this->min_) - .cwiseMin(this->max_); - return Status::OK(); - } + Status Compute(OpKernelContext* ctx) const override; }; // Since version 11. Min and Max are inputs diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp index f815793d97..d806303fa0 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp @@ -14,8 +14,21 @@ namespace Dml::GraphDescBuilder // mismatch is fixed (WindowsAI: 21114358, Lotus: 1953), this workaround should be removed. static std::string GetFusedNodeArgNameMatchingGraph(const std::string& fusedNodeArgeName) { - // The suffix used when inserting mem copies is equal to the below, followed by an incrementing number. - const char* suffix = strstr(fusedNodeArgeName.c_str(), "_DmlExecutionProvider_"); + const char* suffix = nullptr; + + // The suffix used when inserting mem copies is equal to the below, probably followed by an incrementing number. + if (!suffix) { + suffix = strstr(fusedNodeArgeName.c_str(), "_DmlExecutionProvider_"); + } + + // The suffix used when inserting mem copies is equal to the below, not followed by an incrementing number. + if (!suffix) { + suffix = strstr(fusedNodeArgeName.c_str(), "_DmlExecutionProvider"); + } + + if (!suffix) { + suffix = strstr(fusedNodeArgeName.c_str(), "_token_"); + } if (suffix) { @@ -23,9 +36,9 @@ namespace Dml::GraphDescBuilder fusedNodeArgeName.begin(), fusedNodeArgeName.begin() + (suffix - fusedNodeArgeName.c_str()) ); + } else { + return fusedNodeArgeName; } - - return fusedNodeArgeName; } const std::string& GetUniqueNodeName(const onnxruntime::Node& node) diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index 67f8a2b9dd..92db7addb4 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -495,7 +495,6 @@ int real_main(int argc, char* argv[], Ort::Env& env) { {"bitshift_left_uint16", "BitShift(11) uint16 support not enabled currently"}, {"dropout_default", "result differs", {"onnxtip"}}, {"dropout_random", "result differs", {"onnxtip"}}, - {"celu", "invalid model", {"onnxtip"}}, {"maxunpool_export_with_output_shape", "Invalid output in ONNX test. See https://github.com/onnx/onnx/issues/2398"} }; diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index 7ff3f75e3d..6aae2911c0 100644 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -1116,6 +1116,27 @@ TEST_F(GraphTransformationTests, ReshapeFusionInternalReuseTest) { } } + +TEST_F(GraphTransformationTests, ReshapeFusionGraphInputsTest) { + auto model_uri = MODEL_FOLDER "fusion/reshape_fusion_with_graph_inputs.onnx"; + std::shared_ptr p_model; + ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_)); + Graph& graph = p_model->MainGraph(); + + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level1); + auto ret = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_); + ASSERT_TRUE(ret.IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_EQ(op_to_count["Shape"], 1); + ASSERT_EQ(op_to_count["Gather"], 1); + ASSERT_EQ(op_to_count["Unsqueeze"], 1); + ASSERT_EQ(op_to_count["Concat"], 1); + ASSERT_EQ(op_to_count["Reshape"], 1); +} + + TEST_F(GraphTransformationTests, ExpandElimination) { auto model_uri = MODEL_FOLDER "expand_elimination.onnx"; std::shared_ptr model; diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py index de036c8b1f..5b9d8760ae 100644 --- a/onnxruntime/test/python/onnx_backend_test_series.py +++ b/onnxruntime/test/python/onnx_backend_test_series.py @@ -91,7 +91,6 @@ def create_backend_test(testname=None): '^test_batchnorm_epsilon_training_mode_cpu', '^test_batchnorm_example_old_cpu', '^test_batchnorm_example_training_mode_cpu', - '^test_celu_cpu', '^test_dropout_default_cpu', '^test_dropout_random_cpu', '^test_einsum_batch_diagonal_cpu', diff --git a/onnxruntime/test/testdata/transform/fusion/reshape_fusion_gen.py b/onnxruntime/test/testdata/transform/fusion/reshape_fusion_gen.py index 3f01ffea8f..156d9a2147 100644 --- a/onnxruntime/test/testdata/transform/fusion/reshape_fusion_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/reshape_fusion_gen.py @@ -70,3 +70,30 @@ graph = helper.make_graph( save_model(graph, 'reshape_fusion_internal_node_is_graph_output.onnx') + + +graph = helper.make_graph( + [ # nodes + helper.make_node("Shape", ["query"], ["shape0_out"], "shape0"), + helper.make_node("Gather", ["shape0_out", "indices0"], ["gather0_out"], "gather0", axis=0), + helper.make_node("Unsqueeze", ["gather0_out"], ["unsqueeze0_out"], "unsqueeze0", axes=[0]), + helper.make_node("Concat", ["a", "unsqueeze0_out"], ["concat_out"], "concat", axis=0), + helper.make_node("Reshape", ["doc_word_mask", "concat_out"], ["Result"], "reshape"), + ], + "Reshape_Fusion", #name + [ # inputs + helper.make_tensor_value_info('query', TensorProto.FLOAT, [1, 50]), + helper.make_tensor_value_info('doc_word_mask', TensorProto.FLOAT, [1, 200, 50]), + ], + [ # outputs + helper.make_tensor_value_info('Result', TensorProto.FLOAT, [10, 20, 'unk']), + ], + [ # initializers + helper.make_tensor('a', TensorProto.INT64, [1], [-1]), + helper.make_tensor('indices0', TensorProto.INT64, [], [1]), + ] +) + +save_model(graph, 'reshape_fusion_with_graph_inputs.onnx') + + diff --git a/onnxruntime/test/testdata/transform/fusion/reshape_fusion_with_graph_inputs.onnx b/onnxruntime/test/testdata/transform/fusion/reshape_fusion_with_graph_inputs.onnx new file mode 100644 index 0000000000..e3609a566d Binary files /dev/null and b/onnxruntime/test/testdata/transform/fusion/reshape_fusion_with_graph_inputs.onnx differ diff --git a/orttraining/orttraining/core/optimizer/insert_output_rewriter.cc b/orttraining/orttraining/core/optimizer/insert_output_rewriter.cc index b1ef4b18e5..ceff7c3f77 100644 --- a/orttraining/orttraining/core/optimizer/insert_output_rewriter.cc +++ b/orttraining/orttraining/core/optimizer/insert_output_rewriter.cc @@ -25,7 +25,7 @@ Status InsertMaxPoolOutput::Apply(Graph& graph, Node& node, RewriteRuleEffect& r } bool InsertMaxPoolOutput::SatisfyCondition(const Graph& /*graph*/, const Node& node, const logging::Logger& /*logger*/) const { - if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "MaxPool", {8}) && + if (graph_utils::IsSupportedOptypeVersionAndDomain(node, "MaxPool", {8, 10, 11, 12}) && node.OutputDefs().size() == 1) { return true; }