From 92b8a7a2be76de41e55b6026affc2af487f3e66f Mon Sep 17 00:00:00 2001 From: pengwa Date: Fri, 21 Feb 2020 18:25:43 +0800 Subject: [PATCH] GPT2 Gelu Fusion & Test (#3009) * GPT2 Gelu Fusion & Test * change header path * Refine code & add missing test onnx file * Fix builds & refine float/double/fp16 compare. * Fix builds * Add Bias Check and UTs * Fix build and uts * Fuse with second formula & test * minor change * disable FastGelu to see whether the builds can pass * Verify where is wrong * disable for debugging * Revert "disable for debugging" This reverts commit 535c0817fb36fb95a75773a7f00c8b969dd5362c. * Revert "Verify where is wrong" This reverts commit ffc43ec1d136636ba2cee30df49f563a75e84676. * disable the transformer for inference currently * Enable FastGeluFusion and fix segement fault when run bertsquad10.onnx test * Add more Unit tests convering Gelu subgraph use graph input/output (cherry picked from commit 0739ab985240c6d9acdb8f0afd40c5fb316166af) * Mode Bias Fusion in BiasGelu.cc Co-authored-by: Changming Sun --- .../core/optimizer/bias_gelu_fusion.cc | 14 +- .../core/optimizer/fast_gelu_fusion.cc | 244 ++++++++++++++++++ onnxruntime/core/optimizer/fast_gelu_fusion.h | 39 +++ .../core/optimizer/graph_transformer_utils.cc | 2 + onnxruntime/core/optimizer/utils.cc | 44 +++- onnxruntime/core/optimizer/utils.h | 10 + .../test/optimizer/graph_transform_test.cc | 158 ++++++++++++ .../testdata/transform/fusion/fast_gelu.onnx | Bin 0 -> 687 bytes .../testdata/transform/fusion/fast_gelu.py | 172 ++++++++++++ .../testdata/transform/fusion/fast_gelu2.onnx | Bin 0 -> 642 bytes .../testdata/transform/fusion/fast_gelu2.py | 163 ++++++++++++ .../fusion/fast_gelu2_use_graph_input.onnx | Bin 0 -> 554 bytes .../fusion/fast_gelu2_with_bias.onnx | Bin 0 -> 933 bytes .../fast_gelu2_with_bias_use_graph_input.onnx | Bin 0 -> 867 bytes .../fusion/fast_gelu_use_graph_input.onnx | Bin 0 -> 588 bytes .../transform/fusion/fast_gelu_with_bias.onnx | Bin 0 -> 966 bytes .../fast_gelu_with_bias_use_graph_input.onnx | Bin 0 -> 900 bytes 17 files changed, 837 insertions(+), 9 deletions(-) create mode 100644 onnxruntime/core/optimizer/fast_gelu_fusion.cc create mode 100644 onnxruntime/core/optimizer/fast_gelu_fusion.h create mode 100644 onnxruntime/test/testdata/transform/fusion/fast_gelu.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/fast_gelu.py create mode 100644 onnxruntime/test/testdata/transform/fusion/fast_gelu2.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/fast_gelu2.py create mode 100644 onnxruntime/test/testdata/transform/fusion/fast_gelu2_use_graph_input.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/fast_gelu2_with_bias.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/fast_gelu2_with_bias_use_graph_input.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/fast_gelu_use_graph_input.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/fast_gelu_with_bias.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/fast_gelu_with_bias_use_graph_input.onnx diff --git a/onnxruntime/core/optimizer/bias_gelu_fusion.cc b/onnxruntime/core/optimizer/bias_gelu_fusion.cc index 370e8b144b..5bdad45b9c 100644 --- a/onnxruntime/core/optimizer/bias_gelu_fusion.cc +++ b/onnxruntime/core/optimizer/bias_gelu_fusion.cc @@ -64,20 +64,28 @@ Status BiasGelu::ApplyImpl(Graph& graph, bool& modified, int graph_level, const } const Node& next_node = (*next_node_itr); - if (!graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Gelu", {1}, kMSDomain) || + if (!(graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Gelu", {1}, kMSDomain) || + graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "FastGelu", {1}, kMSDomain)) || next_node.GetExecutionProviderType() != node.GetExecutionProviderType()) { continue; } + bool is_fast_gelu = next_node.OpType().compare("FastGelu") == 0; + if (is_fast_gelu && next_node.InputDefs().size() > 1) { + continue; + } + if (!graph.GetNodeOutputsInGraphOutputs(node).empty()) { continue; } Node& add_node = node; Node& gelu_node = const_cast(next_node); + std::string op_type = "BiasGelu"; + if (is_fast_gelu) op_type = "FastGelu"; - Node& gelu_add_fusion_node = graph.AddNode(graph.GenerateNodeName("BiasGelu"), - "BiasGelu", + Node& gelu_add_fusion_node = graph.AddNode(graph.GenerateNodeName(op_type), + op_type, "fused Add and Gelu", gelu_input, {}, diff --git a/onnxruntime/core/optimizer/fast_gelu_fusion.cc b/onnxruntime/core/optimizer/fast_gelu_fusion.cc new file mode 100644 index 0000000000..d0394a0da5 --- /dev/null +++ b/onnxruntime/core/optimizer/fast_gelu_fusion.cc @@ -0,0 +1,244 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/optimizer/initializer.h" +#include "core/optimizer/fast_gelu_fusion.h" +#include "core/optimizer/utils.h" +#include "core/graph/graph_utils.h" +#include "float.h" +#include + +using namespace ONNX_NAMESPACE; +using namespace onnxruntime::common; +namespace onnxruntime { + +// FastGelu supports limited data types. +static std::vector supported_data_types{"tensor(float16)", "tensor(float)"}; + +static bool CheckNode(const Node& node, const std::string& op_name, int32_t opset_version, ProviderType provider, + bool require_single_output=false){ + return graph_utils::IsSupportedOptypeVersionAndDomain(node, op_name, {opset_version}) && + node.GetExecutionProviderType() == provider && + optimizer_utils::IsSupportedDataType(node, supported_data_types) && + (!require_single_output || node.GetOutputEdgesCount() == 1); +} + +MatchResult FastGeluFusion::CheckFirstFormula(Graph& graph, Node& mul1_node, + std::vector>& nodes_to_fuse) const { + MatchResult matchResult{false, nullptr, nullptr}; + if (!graph_utils::IsSupportedOptypeVersionAndDomain(mul1_node, "Mul", {7}) || + !graph_utils::IsSupportedProvider(mul1_node, GetCompatibleExecutionProviders()) || + mul1_node.GetOutputEdgesCount() != 1 || + !optimizer_utils::IsSupportedDataType(mul1_node, supported_data_types)) { + return matchResult; + } + + int32_t input_index = -1; + const float mul_val = 0.044715f; + for (auto i = 0; i < 2; i++) { + if (optimizer_utils::IsInitializerWithExpectedValue(graph, *(mul1_node.InputDefs()[i]), mul_val, true)){ + input_index = i; + break; + } + } + + if (input_index == -1) return matchResult; + + NodeArg* gelu_without_bias_input_arg = mul1_node.MutableInputDefs()[(input_index + 1) % 2]; + nodes_to_fuse.push_back(mul1_node); + + + Node& mul2_node = *graph.GetNode(mul1_node.OutputNodesBegin()->Index()); + input_index = optimizer_utils::IndexOfNodeInput(mul2_node, *mul1_node.MutableOutputDefs()[0]); + if (!CheckNode(mul2_node, "Mul", 7, mul1_node.GetExecutionProviderType(), true) || + mul2_node.MutableInputDefs()[(input_index + 1) % 2]->Name() != gelu_without_bias_input_arg->Name()) { + return matchResult;; + } + nodes_to_fuse.push_back(mul2_node); + + + Node& add1_node = *graph.GetNode(mul2_node.OutputNodesBegin()->Index()); + input_index = optimizer_utils::IndexOfNodeInput(add1_node, *mul2_node.MutableOutputDefs()[0]); + if (!CheckNode(add1_node, "Add", 7, mul1_node.GetExecutionProviderType(), true) || + !optimizer_utils::IsInitializerWithExpectedValue(graph, *(add1_node.InputDefs()[(input_index + 1) % 2]), 1.0f, true)) { + return matchResult; + } + nodes_to_fuse.push_back(add1_node); + + + Node& mul3_node = *graph.GetNode(add1_node.OutputNodesBegin()->Index()); + if (!CheckNode(mul3_node, "Mul", 7, mul1_node.GetExecutionProviderType(), true)) { + return matchResult; + } + nodes_to_fuse.push_back(mul3_node); + + input_index = optimizer_utils::IndexOfNodeInput(mul3_node, *add1_node.MutableOutputDefs()[0]); + const Node* p_mul3_input_node = graph_utils::GetInputNode(mul3_node, (input_index + 1) % 2); + if (p_mul3_input_node == nullptr) return matchResult; + Node& mul4_node = const_cast(*p_mul3_input_node); + if (!CheckNode(mul4_node, "Mul", 7, mul1_node.GetExecutionProviderType(), true)) { + return matchResult; + } + + input_index = -1; + const float mul4_val = 0.7978845834732056f; + for (auto i = 0; i < 2; i++) { + if (optimizer_utils::IsInitializerWithExpectedValue(graph, *(mul4_node.InputDefs()[i]), mul4_val, true)){ + input_index = i; + break; + } + } + + if (input_index == -1 || mul4_node.InputDefs()[(input_index + 1) % 2]->Name() != gelu_without_bias_input_arg->Name()) + return matchResult; + nodes_to_fuse.push_back(mul4_node); + + matchResult.matched = true; + matchResult.gelu_without_bias_input_arg = gelu_without_bias_input_arg; + matchResult.tanh_input_node = &mul3_node; + return matchResult; +} + +MatchResult FastGeluFusion::CheckSecondFormula(Graph& graph, Node& pow1_node, + std::vector>& nodes_to_fuse) const { + MatchResult matchResult{false, nullptr, nullptr}; + if (!graph_utils::IsSupportedOptypeVersionAndDomain(pow1_node, "Pow", {7}) || + !graph_utils::IsSupportedProvider(pow1_node, GetCompatibleExecutionProviders()) || + pow1_node.GetOutputEdgesCount() != 1 || + !optimizer_utils::IsSupportedDataType(pow1_node, supported_data_types)) { + return matchResult; + } + + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(pow1_node.InputDefs()[1]), 3.0f, true)){ + return matchResult; + } + + NodeArg* pow_input_arg = pow1_node.MutableInputDefs()[0]; + nodes_to_fuse.push_back(pow1_node); + + Node& mul1_node = *graph.GetNode(pow1_node.OutputNodesBegin()->Index()); + auto input_index = optimizer_utils::IndexOfNodeInput(mul1_node, *pow1_node.MutableOutputDefs()[0]); + if (!CheckNode(mul1_node, "Mul", 7, pow1_node.GetExecutionProviderType(), true) || + !optimizer_utils::IsInitializerWithExpectedValue(graph, *(mul1_node.InputDefs()[(input_index + 1) % 2]), + 0.044714998453855515f, true)) { + return matchResult; + } + nodes_to_fuse.push_back(mul1_node); + + + Node& add1_node = *graph.GetNode(mul1_node.OutputNodesBegin()->Index()); + input_index = optimizer_utils::IndexOfNodeInput(add1_node, *mul1_node.MutableOutputDefs()[0]); + if (!CheckNode(add1_node, "Add", 7, pow1_node.GetExecutionProviderType(), true) || + add1_node.MutableInputDefs()[(input_index + 1) % 2]->Name() != pow_input_arg->Name()) { + return matchResult; + } + nodes_to_fuse.push_back(add1_node); + + + Node& mul2_node = *graph.GetNode(add1_node.OutputNodesBegin()->Index()); + input_index = optimizer_utils::IndexOfNodeInput(mul2_node, *add1_node.MutableOutputDefs()[0]); + if (!CheckNode(mul2_node, "Mul", 7, pow1_node.GetExecutionProviderType(), true) || + !optimizer_utils::IsInitializerWithExpectedValue(graph, *(mul2_node.InputDefs()[(input_index + 1) % 2]), + 0.7978845834732056f, true)) { + return matchResult; + } + nodes_to_fuse.push_back(mul2_node); + + matchResult.matched = true; + matchResult.gelu_without_bias_input_arg = pow_input_arg; + matchResult.tanh_input_node = &mul2_node; + return matchResult; +} + +Status FastGeluFusion::ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const { + GraphViewer graph_viewer(graph); + const auto& node_topology_list = graph_viewer.GetNodesInTopologicalOrder(); + + for (auto node_index : node_topology_list) { + auto* p_node = graph.GetNode(node_index); + if (p_node == nullptr) + continue; + + Node& node = *p_node; + ORT_RETURN_IF_ERROR(Recurse(node, modified, graph_level, logger)); + + std::vector> nodes_to_fuse; + MatchResult matchRet = CheckFirstFormula(graph, node, nodes_to_fuse); + if (!matchRet.matched) { + nodes_to_fuse.clear(); + matchRet = CheckSecondFormula(graph, node, nodes_to_fuse); + + if(!matchRet.matched) continue; + }; + + Node& tanh_node = *graph.GetNode(matchRet.tanh_input_node->OutputNodesBegin()->Index()); + if (!CheckNode(tanh_node, "Tanh", 6, node.GetExecutionProviderType(), true)) { + continue; + } + + + Node& add2_node = *graph.GetNode(tanh_node.OutputNodesBegin()->Index()); + if (!CheckNode(add2_node, "Add", 7, node.GetExecutionProviderType(), true)) { + continue; + } + + auto input_index = optimizer_utils::IndexOfNodeInput(add2_node, *tanh_node.MutableOutputDefs()[0]); + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(add2_node.InputDefs()[(input_index + 1) % 2]), 1.0f, true)) { + continue; + } + + Node& mul5_node = *graph.GetNode(add2_node.OutputNodesBegin()->Index()); + // This is the output of the Gelu subgraph, we don't need check it has single edge. + if (!CheckNode(mul5_node, "Mul", 7, node.GetExecutionProviderType(), false)) { + continue; + } + + // ingnore the transformer if Gelu's output is the graph's output. + if (!graph.GetNodeOutputsInGraphOutputs(mul5_node).empty()) { + continue; + } + + input_index = optimizer_utils::IndexOfNodeInput(mul5_node, *add2_node.MutableOutputDefs()[0]); + const Node* p_mul5_input_node = graph_utils::GetInputNode(mul5_node, (input_index + 1) % 2); + if (p_mul5_input_node == nullptr) continue; + Node& mul6_node = const_cast(*p_mul5_input_node); + if (!CheckNode(mul6_node, "Mul", 7, node.GetExecutionProviderType(), false)) { + continue; + } + + input_index = -1; + for (auto i = 0; i < 2; i++) { + if (optimizer_utils::IsInitializerWithExpectedValue(graph, *(mul6_node.InputDefs()[i]), 0.5f, true)){ + input_index = i; + break; + } + } + + if (input_index == -1 || mul6_node.InputDefs()[(input_index + 1) % 2]->Name() != matchRet.gelu_without_bias_input_arg->Name()) + continue; + + std::vector gelu_input_defs{matchRet.gelu_without_bias_input_arg}; + nodes_to_fuse.insert(nodes_to_fuse.end(), {tanh_node, add2_node, mul6_node, mul5_node}); + + auto type_info = *node.MutableOutputDefs()[0]->TypeAsProto(); + auto& shape_output = graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("fast_gelu_output"), &type_info); + Node& fast_gelu_node = graph.AddNode(graph.GenerateNodeName("GPT2Gelu"), + "FastGelu", + "fused GPT2Gelu subgraphs ", + gelu_input_defs, + {&shape_output}, {}, kMSDomain); + + // assign provider to this new node, provider should be same as the provider for old node. + fast_gelu_node.SetExecutionProviderType(node.GetExecutionProviderType()); + + // move input edges to node (first in list) across to the fast_gelu_node. + // move output definitions and output edges from mul5_node (last in list) to fast_gelu_node. + // remove all nodes. + graph_utils::FinalizeNodeFusion(graph, nodes_to_fuse, fast_gelu_node); + + modified = true; + } + + return Status::OK(); +} +} // namespace onnxruntime diff --git a/onnxruntime/core/optimizer/fast_gelu_fusion.h b/onnxruntime/core/optimizer/fast_gelu_fusion.h new file mode 100644 index 0000000000..e2d70c18a1 --- /dev/null +++ b/onnxruntime/core/optimizer/fast_gelu_fusion.h @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/optimizer/graph_transformer.h" + +namespace onnxruntime { + +struct MatchResult { + public: + bool matched; + NodeArg* gelu_without_bias_input_arg; // The Gelu input arg if not considering bias node. + Node* tanh_input_node; +}; + +/** +@Class FastGeluFusion + +Rewrite graph fusing Gelu activation subgraph to a single Gelu node. + +The formula corresponding to Gelu activation subgraph: +x * 0.5 * (1.0 + tanh(0.7978845608028654 * x * (1.0 + 0.044715 * x * x))) or +x * 0.5 * (1.0 + tanh((sqrt(2 / pi) * (x + 0.044715 * pow(x, 3))))), where x is the input. + +*/ +class FastGeluFusion : public GraphTransformer { + public: + FastGeluFusion(const std::unordered_set& compatible_execution_providers = {}) noexcept + : GraphTransformer("FastGeluFusion", compatible_execution_providers) {} + + Status ApplyImpl(Graph& graph, bool& modified, int graph_level, const logging::Logger& logger) const override; + + MatchResult CheckFirstFormula(Graph& graph, Node& node, std::vector>& nodes_to_fuse) const; + + MatchResult CheckSecondFormula(Graph& graph, Node& nodes, std::vector>& nodes_to_fuse) const; +}; + +} // namespace onnxruntime diff --git a/onnxruntime/core/optimizer/graph_transformer_utils.cc b/onnxruntime/core/optimizer/graph_transformer_utils.cc index 31e57907cc..fe972c1ecf 100644 --- a/onnxruntime/core/optimizer/graph_transformer_utils.cc +++ b/onnxruntime/core/optimizer/graph_transformer_utils.cc @@ -21,6 +21,7 @@ #include "core/optimizer/bias_gelu_fusion.h" #include "core/optimizer/gelu_fusion.h" #include "core/optimizer/gelu_approximation.h" +#include "core/optimizer/fast_gelu_fusion.h" #include "core/optimizer/layer_norm_fusion.h" #include "core/optimizer/skip_layer_norm_fusion.h" #include "core/optimizer/embed_layer_norm_fusion.h" @@ -135,6 +136,7 @@ std::vector> GenerateTransformers(TransformerL std::unordered_set cuda_execution_providers = {onnxruntime::kCudaExecutionProvider}; transformers.emplace_back(onnxruntime::make_unique(cuda_execution_providers)); + transformers.emplace_back(onnxruntime::make_unique(cuda_execution_providers)); #endif } break; diff --git a/onnxruntime/core/optimizer/utils.cc b/onnxruntime/core/optimizer/utils.cc index c58bac0498..9af9fa4269 100644 --- a/onnxruntime/core/optimizer/utils.cc +++ b/onnxruntime/core/optimizer/utils.cc @@ -36,6 +36,8 @@ bool IsInitializerWithExpectedValue(const Graph& graph, const NodeArg& input_arg return false; } + const float atol = 1e-8f; + const float rtol = 1e-5f; const ONNX_NAMESPACE::TensorProto* tensor_proto = nullptr; if (is_constant) { tensor_proto = graph_utils::GetConstantInitializer(graph, input_arg.Name()); @@ -51,20 +53,28 @@ bool IsInitializerWithExpectedValue(const Graph& graph, const NodeArg& input_arg const auto data_type = tensor_proto->data_type(); if (data_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { const float* val = init_const->data(); - float diff = std::abs(val[0] - static_cast(expected_value)); - if (diff > FLT_EPSILON) { + if (std::isnan(val[0]) || std::isinf(val[0])) return false; + + float diff = std::abs(val[0] - expected_value); + if (diff > (atol + rtol * std::abs(expected_value))) { return false; } } else if (data_type == ONNX_NAMESPACE::TensorProto_DataType_DOUBLE) { const double* val = init_const->data(); - double diff = std::abs(val[0] - static_cast(expected_value)); - if (diff > DBL_EPSILON) { + if (std::isnan(val[0]) || std::isinf(val[0])) return false; + + const double expected_val = static_cast(expected_value); + double diff = std::abs(val[0] - expected_val); + if (diff > (atol + rtol * std::abs(expected_value))) { return false; } } else if (data_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT16) { const MLFloat16* val = init_const->data(); - float diff = std::abs(math::halfToFloat(val[0].val) - math::halfToFloat(math::floatToHalf(expected_value))); - if (diff > FLT_EPSILON) { + const float flt_val = math::halfToFloat(val[0].val); + if (std::isnan(flt_val) || std::isinf(flt_val)) return false; + const float expected_val = math::halfToFloat(math::floatToHalf(expected_value)); + float diff = std::abs(flt_val - expected_val); + if (diff > (atol + rtol * std::abs(expected_value))) { return false; } } else { @@ -176,5 +186,27 @@ bool IsShapeKnownOnAllDims(const NodeArg& node_arg, int expected_dim_size) { return true; } +int32_t IndexOfNodeInput(const Node& node, const NodeArg& node_arg) { + int32_t index = 0; + for (auto& input_arg : node.InputDefs()) { + if (input_arg->Name().compare(node_arg.Name()) == 0) { + return index; + } + index++; + } + + return -1; +} + +bool IsSupportedDataType(const Node& node, const std::vector& supported_data_types) { + for (const auto& input_arg : node.InputDefs()) { + if (std::find(supported_data_types.begin(), supported_data_types.end(), + *(input_arg->Type())) == supported_data_types.end()) { + return false; + } + } + return true; +} + } // namespace optimizer_utils } // namespace onnxruntime diff --git a/onnxruntime/core/optimizer/utils.h b/onnxruntime/core/optimizer/utils.h index 0fd434993f..89d56a81f0 100644 --- a/onnxruntime/core/optimizer/utils.h +++ b/onnxruntime/core/optimizer/utils.h @@ -52,5 +52,15 @@ bool ValidateShape(const NodeArg& node_arg, const std::initializer_list */ bool IsShapeKnownOnAllDims(const NodeArg& node_arg, int expected_dim_size); +/** Get the index of node_arg among the node's all inputs. +@remarks -1 when node_arg is not in node's inputs.. +*/ +int32_t IndexOfNodeInput(const Node& node, const NodeArg& node_arg); + +/** Check whether node's input data types are in supported data type list. +@param supported_data_types specify the supported data types. +*/ +bool IsSupportedDataType(const Node& node, const std::vector& supported_data_types); + } // namespace optimizer_utils } // namespace onnxruntime diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index f4bc9ce8f9..242f18dc52 100644 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -33,6 +33,7 @@ #include "core/optimizer/unsqueeze_elimination.h" #include "core/optimizer/reshape_fusion.h" #include "core/optimizer/attention_fusion.h" +#include "core/optimizer/fast_gelu_fusion.h" #include "core/optimizer/utils.h" #include "core/platform/env.h" #include "core/util/math.h" @@ -1201,6 +1202,163 @@ TEST(GraphTransformationTests, GeluApproximation_Gelu_Add_MatMul) { EXPECT_EQ(op_to_count["FastGelu"], 1); } +TEST(GraphTransformationTests, FastGeluFusionTest) { + auto model_uri = MODEL_FOLDER "fusion/fast_gelu.onnx"; + std::shared_ptr p_model; + auto load_ret = Model::Load(model_uri, p_model, nullptr, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(load_ret.IsOK()); + Graph& graph = p_model->MainGraph(); + + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + auto ret = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(ret.IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Identity"] == 2); + ASSERT_TRUE(op_to_count["Add"] == 0); + ASSERT_TRUE(op_to_count["Tanh"] == 0); + ASSERT_TRUE(op_to_count["Mul"] == 0); + ASSERT_TRUE(op_to_count["FastGelu"] == 1); +} + +TEST(GraphTransformationTests, FastGeluUseGraphInputFusionTest) { + auto model_uri = MODEL_FOLDER "fusion/fast_gelu_use_graph_input.onnx"; + std::shared_ptr p_model; + auto load_ret = Model::Load(model_uri, p_model, nullptr, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(load_ret.IsOK()); + Graph& graph = p_model->MainGraph(); + + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + auto ret = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(ret.IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Add"] == 0); + ASSERT_TRUE(op_to_count["Tanh"] == 0); + ASSERT_TRUE(op_to_count["Mul"] == 0); + ASSERT_TRUE(op_to_count["FastGelu"] == 1); +} + +TEST(GraphTransformationTests, FastGeluWithBiasFusionTest) { + auto model_uri = MODEL_FOLDER "fusion/fast_gelu_with_bias.onnx"; + std::shared_ptr p_model; + auto load_ret = Model::Load(model_uri, p_model, nullptr, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(load_ret.IsOK()); + Graph& graph = p_model->MainGraph(); + + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + auto ret = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(ret.IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Add"] == 0); + ASSERT_TRUE(op_to_count["Tanh"] == 0); + ASSERT_TRUE(op_to_count["Mul"] == 0); + ASSERT_TRUE(op_to_count["FastGelu"] == 1); +} + +TEST(GraphTransformationTests, FastGeluWithBiasUseGraphInputFusionTest) { + auto model_uri = MODEL_FOLDER "fusion/fast_gelu_with_bias_use_graph_input.onnx"; + std::shared_ptr p_model; + auto load_ret = Model::Load(model_uri, p_model, nullptr, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(load_ret.IsOK()); + Graph& graph = p_model->MainGraph(); + + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + auto ret = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(ret.IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Add"] == 0); + ASSERT_TRUE(op_to_count["Tanh"] == 0); + ASSERT_TRUE(op_to_count["Mul"] == 0); + ASSERT_TRUE(op_to_count["FastGelu"] == 1); +} + +TEST(GraphTransformationTests, FastGeluFusionTest2) { + auto model_uri = MODEL_FOLDER "fusion/fast_gelu2.onnx"; + std::shared_ptr p_model; + auto load_ret = Model::Load(model_uri, p_model, nullptr, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(load_ret.IsOK()); + Graph& graph = p_model->MainGraph(); + + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + auto ret = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(ret.IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Add"] == 0); + ASSERT_TRUE(op_to_count["Tanh"] == 0); + ASSERT_TRUE(op_to_count["Mul"] == 0); + ASSERT_TRUE(op_to_count["FastGelu"] == 1); +} + +TEST(GraphTransformationTests, FastGeluUseGraphInputFusionTest2) { + auto model_uri = MODEL_FOLDER "fusion/fast_gelu2_use_graph_input.onnx"; + std::shared_ptr p_model; + auto load_ret = Model::Load(model_uri, p_model, nullptr, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(load_ret.IsOK()); + Graph& graph = p_model->MainGraph(); + + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + auto ret = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(ret.IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Add"] == 0); + ASSERT_TRUE(op_to_count["Tanh"] == 0); + ASSERT_TRUE(op_to_count["Mul"] == 0); + ASSERT_TRUE(op_to_count["FastGelu"] == 1); +} + +TEST(GraphTransformationTests, FastGeluWithBiasFusionTest2) { + auto model_uri = MODEL_FOLDER "fusion/fast_gelu2_with_bias.onnx"; + std::shared_ptr p_model; + auto load_ret = Model::Load(model_uri, p_model, nullptr, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(load_ret.IsOK()); + Graph& graph = p_model->MainGraph(); + + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + auto ret = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(ret.IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Add"] == 0); + ASSERT_TRUE(op_to_count["Tanh"] == 0); + ASSERT_TRUE(op_to_count["Mul"] == 0); + ASSERT_TRUE(op_to_count["FastGelu"] == 1); +} + +TEST(GraphTransformationTests, FastGeluWithBiasUseGraphInputFusionTest2) { + auto model_uri = MODEL_FOLDER "fusion/fast_gelu2_with_bias_use_graph_input.onnx"; + std::shared_ptr p_model; + auto load_ret = Model::Load(model_uri, p_model, nullptr, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(load_ret.IsOK()); + Graph& graph = p_model->MainGraph(); + + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + auto ret = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, DefaultLoggingManager().DefaultLogger()); + ASSERT_TRUE(ret.IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Add"] == 0); + ASSERT_TRUE(op_to_count["Tanh"] == 0); + ASSERT_TRUE(op_to_count["Mul"] == 0); + ASSERT_TRUE(op_to_count["FastGelu"] == 1); +} + TEST(GraphTransformationTests, LayerNormFusionTest) { auto model_uri = MODEL_FOLDER "fusion/layer_norm.onnx"; std::shared_ptr p_model; diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu.onnx b/onnxruntime/test/testdata/transform/fusion/fast_gelu.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d94dede7e7494685a5f7413ec4af0621a354bfe4 GIT binary patch literal 687 zcmaJj^IqS)*;l(xk1{EjJ-&?7dBSghY7jzA$a$`)BN&%TsIPItViPv6{}nru7B?NVtRp`+m}f c)dh4vF(P0&5S_yDG)u2iG5yRI*_(1IzvD%~NB{r; literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu.py b/onnxruntime/test/testdata/transform/fusion/fast_gelu.py new file mode 100644 index 0000000000..bfd5c785f0 --- /dev/null +++ b/onnxruntime/test/testdata/transform/fusion/fast_gelu.py @@ -0,0 +1,172 @@ +import onnx +from onnx import helper +from onnx import AttributeProto, TensorProto, GraphProto, OperatorSetIdProto +from onnx import numpy_helper +import numpy as np + +# Gelu formula: x * 0.5 * (1.0 + tanh(0.7978845608028654 * x * (1.0 + 0.044715 * x * x))) + +has_bias = True # change it to True to generate fast_gelu_with_bias.onnx +gelu_use_graph_input = True # change it to False to let Gelu don't have graph inputs as inputs. + +X = helper.make_tensor_value_info('input', TensorProto.FLOAT, ["batch", "seqlen", 64]) +Y = helper.make_tensor_value_info('output', TensorProto.FLOAT, ["batch", "seqlen", 64]) + +bias_np_vals = (0.01 * np.arange(64)).astype(np.float32).reshape((64)) +bias_initializer = numpy_helper.from_array(bias_np_vals, "input_bias") + +a_weight_np_vals = np.asarray([0.044714998453855515]).astype(np.float32).reshape(()) +a_weight_initializer = numpy_helper.from_array(a_weight_np_vals, "mul1_init") + +b_weight_np_vals = np.asarray([0.7978845834732056]).astype(np.float32).reshape(()) +b_weight_initializer = numpy_helper.from_array(b_weight_np_vals, "mul2_init") + +c_weight_np_vals = np.asarray([0.5]).astype(np.float32).reshape(()) +c_weight_initializer = numpy_helper.from_array(c_weight_np_vals, "mul3_init") + +a_bias_np_vals = np.asarray([1.0]).astype(np.float32).reshape(()) +a_bias_initializer = numpy_helper.from_array(a_bias_np_vals, "add1_init") + +b_bias_np_vals = np.asarray([1.0]).astype(np.float32).reshape(()) +b_bias_initializer = numpy_helper.from_array(b_bias_np_vals, "add2_init") + +nodes = [] +gelu_input = "input" +if not gelu_use_graph_input: + leading_identity = helper.make_node( + 'Identity', + [gelu_input], + ['identity_leading'], + name="identity_leading" + ) + gelu_input = "identity_leading" + nodes.append(leading_identity) + +mul_input_name = gelu_input +if has_bias: + add0 = helper.make_node( + 'Add', + [gelu_input, bias_initializer.name], + ['add0'], + name="add0" + ) + mul_input_name = "add0" + nodes.append(add0) + + +mul1 = helper.make_node( + 'Mul', + [mul_input_name, a_weight_initializer.name], + ['mul1'], + name="mul1" +) +nodes.append(mul1) + +mul2 = helper.make_node( + 'Mul', + [mul_input_name, 'mul1'], + ['mul2'], + name="mul2" +) +nodes.append(mul2) + +add1 = helper.make_node( + 'Add', + ['mul2', a_bias_initializer.name], + ['add1'], + name="add1" +) +nodes.append(add1) + +mul3 = helper.make_node( + 'Mul', + [mul_input_name, b_weight_initializer.name], + ['mul3'], + name="mul3" +) +nodes.append(mul3) + +mul4 = helper.make_node( + 'Mul', + ['mul3', 'add1'], + ['mul4'], + name="mul4" +) +nodes.append(mul4) + +tanh = helper.make_node( + 'Tanh', + ['mul4'], + ['tanh'], + name="tanh" +) +nodes.append(tanh) + +add2 = helper.make_node( + 'Add', + ['tanh', b_bias_initializer.name], + ['add2'], + name="add2" +) +nodes.append(add2) + +mul5 = helper.make_node( + 'Mul', + [mul_input_name, c_weight_initializer.name], + ['mul5'], + name="mul5" +) +nodes.append(mul5) + +mul6 = helper.make_node( + 'Mul', + ['mul5', 'add2'], + ['mul6'], + name="mul6" +) +ending_identity = helper.make_node( + 'Identity', + ['mul6'], + ['output'], + name="identity_ending" +) +nodes.extend([mul6, ending_identity]) + +initializers = [] +if has_bias: + initializers = [bias_initializer] + +initializers.extend([a_weight_initializer, a_bias_initializer, b_weight_initializer, b_bias_initializer, c_weight_initializer]) +# Create the graph (GraphProto) +graph_def = helper.make_graph( + nodes, + 'test-model', + [X], + [Y], + initializers +) + +opsets = [] +onnxdomain = OperatorSetIdProto() +onnxdomain.version = 10 +onnxdomain.domain = "" # The empty string ("") or absence of this field implies the operator set that is defined as part of the ONNX specification. +opsets.append(onnxdomain) + +msdomain = OperatorSetIdProto() +msdomain.version = 1 +msdomain.domain = "com.microsoft" + +opsets.append(msdomain) +kwargs={} +kwargs["opset_imports"] = opsets + +model_def = helper.make_model(graph_def, producer_name='onnx-example', **kwargs) + +file_name = "fast_gelu" +if has_bias: + file_name += "_with_bias" + +if gelu_use_graph_input: + file_name += "_use_graph_input" +onnx.save(model_def, file_name + ".onnx") + diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu2.onnx b/onnxruntime/test/testdata/transform/fusion/fast_gelu2.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2239de14bcdb317cfd6e5712bb2b96e5d5909795 GIT binary patch literal 642 zcmaJ>|-P&zI09W(3{Hjw8o&NIErbV8U;126$S^E(aYafYJpJ; zGPRCT3(Q_hQ+19^c~snHAlwwGRA#$plxvG;h3A#C7$n`uej*5sd)TRkrK?k9S(8bx zt+2|*DQb7`9!fAfXNr2^^Ed;`8};~44wi>#&>FvA7oVU5S9T3wz%g#jBe__Djhh>~ kjwy#Ddou4regOWzF+^ZF5T3*7LL`HPExyD?d`PGE2X-#AWB>pF literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py b/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py new file mode 100644 index 0000000000..648336eeb2 --- /dev/null +++ b/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py @@ -0,0 +1,163 @@ +import onnx +from onnx import helper +from onnx import AttributeProto, TensorProto, GraphProto, OperatorSetIdProto +from onnx import numpy_helper +import numpy as np + +# Gelu formula: x * 0.5 * (1.0 + tanh((sqrt(2 / pi) * (x + 0.044715 * pow(x, 3))))) +has_bias = False # change it to True to generate fast_gelu_openai_with_bias.onnx +gelu_use_graph_input = True # change it to False to let Gelu don't have graph inputs/outputs as inputs/outputs. + +X = helper.make_tensor_value_info('input', TensorProto.FLOAT, ["batch", "seqlen", 64]) +Y = helper.make_tensor_value_info('output', TensorProto.FLOAT, ["batch", "seqlen", 64]) + +bias_np_vals = (0.01 * np.arange(64)).astype(np.float32).reshape((64)) +bias_initializer = numpy_helper.from_array(bias_np_vals, "input_bias") + +pow_np_vals = np.asarray([3]).astype(np.float32).reshape(()) +pow_initializer = numpy_helper.from_array(pow_np_vals, "pow_init") + +a_weight_np_vals = np.asarray([0.044714998453855515]).astype(np.float32).reshape(()) +a_weight_initializer = numpy_helper.from_array(a_weight_np_vals, "mul1_init") + +b_weight_np_vals = np.asarray([0.7978845834732056]).astype(np.float32).reshape(()) +b_weight_initializer = numpy_helper.from_array(b_weight_np_vals, "mul2_init") + +c_weight_np_vals = np.asarray([0.5]).astype(np.float32).reshape(()) +c_weight_initializer = numpy_helper.from_array(c_weight_np_vals, "mul3_init") + +b_bias_np_vals = np.asarray([1.0]).astype(np.float32).reshape(()) +b_bias_initializer = numpy_helper.from_array(b_bias_np_vals, "add2_init") + +nodes = [] +gelu_input = "input" +if not gelu_use_graph_input: + leading_identity = helper.make_node( + 'Identity', + [gelu_input], + ['identity_leading'], + name="identity_leading" + ) + gelu_input = "identity_leading" + nodes.append(leading_identity) + +mul_input_name = gelu_input +if has_bias: + add0 = helper.make_node( + 'Add', + [gelu_input, bias_initializer.name], + ['add0'], + name="add0" + ) + mul_input_name = "add0" + nodes.append(add0) + + +pow1 = helper.make_node( + 'Pow', + [mul_input_name, pow_initializer.name], + ['pow1'], + name="pow1" +) +nodes.append(pow1) + +mul1 = helper.make_node( + 'Mul', + ['pow1', a_weight_initializer.name], + ['mul1'], + name="mul1" +) +nodes.append(mul1) + +add1 = helper.make_node( + 'Add', + [mul_input_name, "mul1"], + ['add1'], + name="add1" +) +nodes.append(add1) + +mul2 = helper.make_node( + 'Mul', + ['add1', b_weight_initializer.name], + ['mul2'], + name="mul2" +) +nodes.append(mul2) + +tanh = helper.make_node( + 'Tanh', + ['mul2'], + ['tanh'], + name="tanh" +) +nodes.append(tanh) + +add2 = helper.make_node( + 'Add', + ['tanh', b_bias_initializer.name], + ['add2'], + name="add2" +) +nodes.append(add2) + +mul5 = helper.make_node( + 'Mul', + [mul_input_name, c_weight_initializer.name], + ['mul5'], + name="mul5" +) +nodes.append(mul5) + +mul6 = helper.make_node( + 'Mul', + ['mul5', 'add2'], + ['mul6'], + name="mul6" +) +ending_identity = helper.make_node( + 'Identity', + ['mul6'], + ['output'], + name="ending_identity" +) +nodes.extend([mul6, ending_identity]) + +initializers = [] +if has_bias: + initializers = [bias_initializer] + +initializers.extend([pow_initializer, a_weight_initializer, b_weight_initializer, b_bias_initializer, c_weight_initializer]) +# Create the graph (GraphProto) +graph_def = helper.make_graph( + nodes, + 'test-model', + [X], + [Y], + initializers +) + +opsets = [] +onnxdomain = OperatorSetIdProto() +onnxdomain.version = 10 +onnxdomain.domain = "" # The empty string ("") or absence of this field implies the operator set that is defined as part of the ONNX specification. +opsets.append(onnxdomain) + +msdomain = OperatorSetIdProto() +msdomain.version = 1 +msdomain.domain = "com.microsoft" + +opsets.append(msdomain) +kwargs={} +kwargs["opset_imports"] = opsets + +model_def = helper.make_model(graph_def, producer_name='onnx-example', **kwargs) + +file_name = "fast_gelu2" +if has_bias: + file_name += "_with_bias" + +if gelu_use_graph_input: + file_name += "_use_graph_input" +onnx.save(model_def, file_name + ".onnx") + diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu2_use_graph_input.onnx b/onnxruntime/test/testdata/transform/fusion/fast_gelu2_use_graph_input.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e609f74134b61dda3d2f6e96d76fa9a1def4db14 GIT binary patch literal 554 zcmah_!AiqG5KYatrh{VL9+XHuM6g(>&`2W)(wdt^!Gk9+CGD~}< zJROuXnEC``;6CRmoNsw7MoGe$>0=Ds2gZ;C3(FDiI6})2796L5%ONE?kqh6(fV`R< zw-JM;;n;3tY8A{jDqL(Yz%y@l5<@F7EF?NsVrY*sLS{0u?Wky9*>*%5D%a*I{t=f< z$d9RDTxy}e1MjiSC}_UY{Y){Q_GqgXSO1$3#~F`%ZG-I|HX-fq?R^Cn9www0KaC2o zGKT9vC0H3j&>6j5mKJFT&b%5ufqhz^C3?O9k2Y5PGvyNYyz#6HrGbA2V**YK;$t|R QtL!=x^Dnhh@48j{0|{J`iU0rr literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu2_with_bias.onnx b/onnxruntime/test/testdata/transform/fusion/fast_gelu2_with_bias.onnx new file mode 100644 index 0000000000000000000000000000000000000000..332435ebc32aaab87028bb30e910f1ce4bd3b97d GIT binary patch literal 933 zcmaJ=F>DfH6h7df^rfcsN>W9ODNSrk(%RC1#`NBgEGVgoZA?rI2|0R)y$E+yj;(~(<9h_KPSX}7fLKB^Q_d~&?i8uWB{_nl-`(EC=^LnHy(=d)! zwIiia%xm|4dmsvK-6)nV$)~HDVd>VZOkPt|-FP0jgoL~ghc$2;E@c6@P$sJ@B}r6N zbv?k(kn6sx!UFIMc#Gy?MmKaz5-HjU@H6CkY#u@g_yr~l<^0C5#ts9beTH0*%6XU{ z%5aFgP@23bZM%?!ec=+3u|$MRLoT(IqsI}m&9 zu+AMe#|}~M5FIs14pH6<`A{6=>DVB>!W)Q591Vb42rv4HWIC9t%TwOTM$ zEgydC^u~RT#6MF`Rh$An&{^DqPJOG^>eG8sK5cKywN_bPN!Mh;<+Gh8VQsmQ>=NFW zn?X0Oq^I%y$u*>UeC=Puvz@z`t|gGTD8o~1Bo(|_eSxHdf>6O`u!;xr8{*Zld;E^L zC%D`EK>Rv>?SCST25xSCCY>gJEPf%a7Wy0CNUx36-gnaM;Q8_o((Pg{_mi}H*s1>_ z{XRbE{vi*6rE5+`y)l1df*r`fPTNzA`efDK4C@~InGAD`dS>U#H`s17$QsE77M5rv zqi-rD3PL;{o+WJj5v9c0r91Kn94s~^h7;Rt0FJh8(eNF#}ij%)(kh-+O76%$((O`~`=M BBD??q literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu2_with_bias_use_graph_input.onnx b/onnxruntime/test/testdata/transform/fusion/fast_gelu2_with_bias_use_graph_input.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a83905e34a67cb00d696b623d4da7ecb41bd89a3 GIT binary patch literal 867 zcmah{F>F#%6#c-5(wmyLza&+(m{Ma4Nt+e|8q$`y>1h@=0r5 zSXf*bbg6?24o)l%vbf;jLI)=n7Zw*fxX{6=_dgKae8YYBo%_x?H}5{TOPVuu{diM7 z&J;@p_1+g3+ybYjm#QWJ-_ltvQ<21sqHKG4^gHe=3akP@fV*TIr8QkMC6TfnFOPo5 zW8(zpr`en^txCd{2ytxI(EYIltNHS#w`0SX01sDnZQ;5+mgB0V(pG15ayR4f-R zR*yWDJ(LMFh6&cc)WjKo}(=mi0OLnKVm8q!&I^pk8F# zq}fD2w&h4xV)dv3zh6Jn@gom&{&(c9E M=I>day{MM`4Fu4d-~a#s literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu_with_bias.onnx b/onnxruntime/test/testdata/transform/fusion/fast_gelu_with_bias.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a7e212ac95736c2b59e4838741faa93c55dbc18b GIT binary patch literal 966 zcmaJ=J!}$T7zTR$^i56am86OmBTZ}}X_a4%>Ajz_prj6MOiT<(b8v^f2nWcK)Vg$G zabeJ<9b9m5VsT+{!NG+NPAo1gE_86AgOl%l{56tz!*}03@B2Q_bN77ba)=8>T|eAZ z59LBBuRb_+LJaJhUaA_RTT@is(2O_xc~w?4{iSyX@w=Xkdf*PsQ~@~Aenyikq97~E zT`yn#bB`1S{Ja&yIog zBA%XHL+Z!3o-I7zy^ra70%;Z{c#h4aj5lkqka*ByImVW+hGpp;&DF7Y^nqqiaIf`| z<{S8J@R>N87>Rr#o)&&8eI>3odYa#fuYw)Df17(VDh=}S%Nm86OmBQ>^=v=I)_nBM=C1toQ8V`5@R$iW@)7dklk--DLM4&LzJyYK)1@B7~S-v#`_tfA|> zYwE6Ct5?-~Km6n-@o9R!WfH<(#gZmB1zuK^+hG=?Qx6n{L<##KGqqMVQPgzJ6nJ+T zW-&VTxK$-f;~ECBbdsf$qcl=rN0Q7S_bG+DWHO11GRcXI$ausv8RQX}Sc1uVhG*d{QH;`rqlFUGcr8A>+j2XxX zexqgLxrXO7Mb%Bsd{tC+Mbn>01CPdlKuoo1uGI`ht;XMa1NVcT?71oA_q`lGq_@05 zdYujHaER|^>8QIYS%)ntRkDj{_g; zUy$n%LiRW0>q2|*9XWe&vicKw`%tO;Lhb?dI=_*B2oL*zP=lid%S+K44cXJQArX4i zou%lLwg>aHzZaqsS)}Nho~&M@+fIbqxf?XTfJZX9b9sJY<&HE0&u&qk#~n;6N0ZI- z@$BXpbf?Qk`A_9&zAnhoXSc?`fv`lb20X$dnHGE{*(~pnfZ%VcjjE~>oKR_njOY5F a!Yog?Aj!^?OJ$?BUen4ijiymCgPy