From fda0aa14c8b172d03a94dfe59663d17db064e325 Mon Sep 17 00:00:00 2001 From: Sunghoon <35605090+hanbitmyths@users.noreply.github.com> Date: Thu, 13 Apr 2023 23:07:47 -0700 Subject: [PATCH] SkipLayerNorm fusion with different input and output type (#15500) SkipLayerNorm fusion fuses LayerNorm and one or more Add kernels now. While LayerNormalization kernel allows different input and output type by definition, SkipLayerNormalization must have the same input and output type. This graph is valid as the output of Add node is float16 and two inputs from initializers are float. ![image](https://user-images.githubusercontent.com/35605090/231874079-3f3b03cc-f751-4ad9-a002-31116a35117f.png) But, when Add and LayerNormalization are fused, it fails because two inputs of Add node are float16 type and SkipLayerNormalization must have the same input types. To avoid this failure, this PR adds Cast node before inputs of SkipLayerNormalization when input and output type are different and output type is float. The above graph is fused as follows, ![image](https://user-images.githubusercontent.com/35605090/231874097-6405713a-7c95-4b5b-a293-1305976edc94.png) For performance, it'd better for SkipLayerNormalization to support different input and output type, but this PR is to unblock Turing NLR v5 base mode in Babel. When we have more cases, we can support it. --- .../core/optimizer/skip_layer_norm_fusion.cc | 38 +++++ .../test/optimizer/graph_transform_test.cc | 68 ++++++--- ...r_norm_format1_graph_output_with_cast.onnx | Bin 0 -> 848 bytes ..._layer_norm_format1_partial_with_cast.onnx | Bin 0 -> 847 bytes .../skip_layer_norm_format1_with_cast.onnx | Bin 0 -> 816 bytes ...r_norm_format2_graph_output_with_cast.onnx | Bin 0 -> 848 bytes ..._layer_norm_format2_partial_with_cast.onnx | Bin 0 -> 847 bytes .../skip_layer_norm_format2_with_cast.onnx | Bin 0 -> 816 bytes ...r_norm_format3_graph_output_with_cast.onnx | Bin 0 -> 784 bytes ...ayer_norm_format3_no_fusion_with_cast.onnx | Bin 0 -> 783 bytes .../skip_layer_norm_format3_with_cast.onnx | Bin 0 -> 755 bytes .../transform/fusion/skip_layer_norm_gen.py | 143 ++++++++++++------ ...yer_norm_input_output_with_cast_check.onnx | Bin 0 -> 966 bytes .../skip_layer_norm_no_beta_with_cast.onnx | Bin 0 -> 274 bytes 14 files changed, 182 insertions(+), 67 deletions(-) create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format1_graph_output_with_cast.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format1_partial_with_cast.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format1_with_cast.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format2_graph_output_with_cast.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format2_partial_with_cast.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format2_with_cast.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format3_graph_output_with_cast.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format3_no_fusion_with_cast.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format3_with_cast.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_input_output_with_cast_check.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/skip_layer_norm_no_beta_with_cast.onnx diff --git a/onnxruntime/core/optimizer/skip_layer_norm_fusion.cc b/onnxruntime/core/optimizer/skip_layer_norm_fusion.cc index 903563d364..f265dfc7f5 100644 --- a/onnxruntime/core/optimizer/skip_layer_norm_fusion.cc +++ b/onnxruntime/core/optimizer/skip_layer_norm_fusion.cc @@ -90,6 +90,36 @@ static bool CheckSecondAdd(Graph& graph, Node& add, ProviderType providertype) { add_input1_shape->dim(2).dim_value() == add_input2_shape->dim(0).dim_value(); } +// Add a Cast to convert input from float16/bfloat16 to float when input type is different fromm output type +static NodeArg* CastToFloat(Graph& graph, NodeArg* input, int32_t output_data_type, ProviderType provider_type) { + if (nullptr == input->Type() || + input->TypeAsProto()->tensor_type().elem_type() == output_data_type || + output_data_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { + return input; + } + + auto input_shape = input->Shape(); + TypeProto input_float; + input_float.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); + for (auto i = 0; i < input_shape->dim_size(); ++i) { + auto dim = input_float.mutable_tensor_type()->mutable_shape()->add_dim(); + *dim = input_shape->dim(i); + } + auto& cast_float = graph.GetOrCreateNodeArg(graph.GenerateNodeArgName(input->Name() + "_Float"), &input_float); + + auto& node = graph.AddNode(graph.GenerateNodeName(input->Name() + "_Cast"), + "Cast", + "Cast Input to float", + std::array{input}, + std::array{&cast_float}, + nullptr, + kOnnxDomain); + + node.AddAttribute("to", int64_t{ONNX_NAMESPACE::TensorProto_DataType_FLOAT}); + node.SetExecutionProviderType(provider_type); + return &cast_float; +} + /** Skip Layer Normalization will fuse Add + LayerNormalization into one node, and another Add if applicable @@ -243,6 +273,14 @@ Status SkipLayerNormFusion::ApplyImpl(Graph& graph, bool& modified, int graph_le nodes_to_remove.push_back(*p_add1); nodes_to_remove.push_back(ln_node); + // If input types are different than output type and output type is float, insert cast node after inputs. + for (auto& input_def: skip_layer_norm_input_defs) { + input_def = CastToFloat(graph, + input_def, + ln_node.MutableOutputDefs()[0]->TypeAsProto()->tensor_type().elem_type(), + ln_node.GetExecutionProviderType()); + } + Node& skip_layer_norm_node = graph.AddNode(graph.GenerateNodeName("SkipLayerNormalization"), "SkipLayerNormalization", "fused SkipLayerNorm subgraphs ", diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index da9cd3caac..0c4a685371 100755 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -4906,7 +4906,7 @@ TEST_F(GraphTransformationTests, SimplifiedLayerNormWithCastsFusionTestCudaEp) { } static void TestSkipLayerNormFusion(const std::basic_string& file_path, int add_count, int ln_count, - int skip_ln_count, logging::Logger* logger) { + int skip_ln_count, int cast_count, logging::Logger* logger) { std::shared_ptr p_model; ASSERT_TRUE(Model::Load(file_path, p_model, nullptr, *logger).IsOK()); Graph& graph = p_model->MainGraph(); @@ -4925,43 +4925,57 @@ static void TestSkipLayerNormFusion(const std::basic_string& file_pat ASSERT_TRUE(op_to_count["Sqrt"] == 0); ASSERT_TRUE(op_to_count["LayerNormalization"] == ln_count); ASSERT_TRUE(op_to_count["com.microsoft.SkipLayerNormalization"] == skip_ln_count); + ASSERT_TRUE(op_to_count["Cast"] == cast_count); } TEST_F(GraphTransformationTests, SkipLayerNormFusionTest) { - TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format1.onnx", 0, 0, 1, logger_.get()); - TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format2.onnx", 0, 0, 1, logger_.get()); - TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format3.onnx", 0, 0, 1, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format1.onnx", 0, 0, 1, 0, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format2.onnx", 0, 0, 1, 0, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format3.onnx", 0, 0, 1, 0, logger_.get()); - TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format1_partial.onnx", 1, 0, 1, logger_.get()); - TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format2_partial.onnx", 1, 0, 1, logger_.get()); - TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format3_no_fusion.onnx", 1, 1, 0, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format1_partial.onnx", 1, 0, 1, 0, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format2_partial.onnx", 1, 0, 1, 0, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format3_no_fusion.onnx", 1, 1, 0, 0, logger_.get()); - TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format1_graph_output.onnx", 1, 0, 1, logger_.get()); - TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format2_graph_output.onnx", 1, 0, 1, logger_.get()); - TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format3_graph_output.onnx", 1, 1, 0, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format1_graph_output.onnx", 1, 0, 1, 0, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format2_graph_output.onnx", 1, 0, 1, 0, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format3_graph_output.onnx", 1, 1, 0, 0, logger_.get()); } -TEST_F(GraphTransformationTests, SkipLayerNormFusion_Input_Output_Check) { - constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/skip_layer_norm_input_output_check.onnx"; +TEST_F(GraphTransformationTests, SkipLayerNormFusionWithCastTest) { + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format1_with_cast.onnx", 0, 0, 1, 3, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format2_with_cast.onnx", 0, 0, 1, 3, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format3_with_cast.onnx", 0, 0, 1, 2, logger_.get()); + + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format1_partial_with_cast.onnx", 1, 0, 1, 2, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format2_partial_with_cast.onnx", 1, 0, 1, 2, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format3_no_fusion_with_cast.onnx", 1, 1, 0, 0, logger_.get()); + + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format1_graph_output_with_cast.onnx", 1, 0, 1, 2, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format2_graph_output_with_cast.onnx", 1, 0, 1, 2, logger_.get()); + TestSkipLayerNormFusion(MODEL_FOLDER "fusion/skip_layer_norm_format3_graph_output_with_cast.onnx", 1, 1, 0, 0, logger_.get()); +} + +static void TestSkipLayerNormFusionInputOutputCheck(const std::basic_string& model_uri, bool with_cast, logging::Logger* logger) { std::shared_ptr p_model; - ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_)); + ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger)); Graph& graph = p_model->MainGraph(); onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique(), TransformerLevel::Level2)); ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique(), TransformerLevel::Level2)); - ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, *logger_)); + ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, *logger)); for (Node& node : graph.Nodes()) { if (node.OpType() == "SkipLayerNormalization") { // check inputs std::vector& input_defs = node.MutableInputDefs(); EXPECT_EQ(input_defs.size(), 5u) << "SkipLayerNormalization number of inputs does not equal to 5. Got:" << node.InputDefs().size(); - EXPECT_EQ(input_defs[0]->Name(), "input.1"); - EXPECT_EQ(input_defs[1]->Name(), "6"); + EXPECT_EQ(input_defs[0]->Name(), ((with_cast) ? "input.1_Float" : "input.1")); + EXPECT_EQ(input_defs[1]->Name(), ((with_cast) ? "6_Float" : "6")); EXPECT_EQ(input_defs[2]->Name(), "1"); EXPECT_EQ(input_defs[3]->Name(), "2"); - EXPECT_EQ(input_defs[4]->Name(), "4"); + EXPECT_EQ(input_defs[4]->Name(), ((with_cast) ? "4_Float" : "4")); // check outputs std::vector& output_defs = node.MutableOutputDefs(); @@ -4971,26 +4985,38 @@ TEST_F(GraphTransformationTests, SkipLayerNormFusion_Input_Output_Check) { EXPECT_EQ(node.OutputDefs().size(), 1u) << "SkipLayerNormalization number of outputs does not equal to 1. Got:" << node.OutputDefs().size(); #endif EXPECT_EQ(output_defs[0]->Name(), "19"); + } else if (node.OpType() == "Cast") { + EXPECT_TRUE(with_cast) << "Unexpected node: " << node.OpType() << "," << node.Name(); } else { EXPECT_EQ(node.OpType(), "MatMul") << "Unexpected node: " << node.OpType() << "," << node.Name(); } } } -TEST_F(GraphTransformationTests, SkipLayerNormFusion_NoBeta) { - constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/skip_layer_norm_no_beta.onnx"; +TEST_F(GraphTransformationTests, SkipLayerNormFusion_Input_Output_Check) { + TestSkipLayerNormFusionInputOutputCheck(MODEL_FOLDER "fusion/skip_layer_norm_input_output_check.onnx", false, logger_.get()); + TestSkipLayerNormFusionInputOutputCheck(MODEL_FOLDER "fusion/skip_layer_norm_input_output_with_cast_check.onnx", true, logger_.get()); +} + +static void TestSkipLayerNormFusionNoBeta(const std::basic_string& model_uri, bool with_cast, logging::Logger* logger) { std::shared_ptr p_model; - ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_)); + ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger)); Graph& graph = p_model->MainGraph(); onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique(), TransformerLevel::Level2)); - ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, *logger_)); + ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, *logger)); std::map op_to_count = CountOpsInGraph(graph); ASSERT_TRUE(op_to_count["Add"] == 0); ASSERT_TRUE(op_to_count["LayerNormalization"] == 0); ASSERT_TRUE(op_to_count["com.microsoft.SkipLayerNormalization"] == 1); + ASSERT_TRUE(op_to_count["Cast"] == ((with_cast) ? 2 : 0)); +} + +TEST_F(GraphTransformationTests, SkipLayerNormFusion_NoBeta) { + TestSkipLayerNormFusionNoBeta(MODEL_FOLDER "fusion/skip_layer_norm_no_beta.onnx", false, logger_.get()); + TestSkipLayerNormFusionNoBeta(MODEL_FOLDER "fusion/skip_layer_norm_no_beta_with_cast.onnx", true, logger_.get()); } TEST_F(GraphTransformationTests, EmbedLayerNormFusionFormat1) { diff --git a/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format1_graph_output_with_cast.onnx b/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format1_graph_output_with_cast.onnx new file mode 100644 index 0000000000000000000000000000000000000000..043c0545eb0d70094e5e9730acd7129b20eb54e3 GIT binary patch literal 848 zcmbVK&2G~`5VqqD-j0fJm!hJImdZd8u7rqlt3*g0j%kI+4Ix?UpCYSqoOA{D3xR8WD6ZiQ z8d7@DZI&MV|MBoj4zKUsh>MA+!nETv%9G`Oh(xY+k|b~dE1FSpSyoFAh+Hzl2rs1x zwE$QpFgwYvbS469^*+0TXVCt_^PEE?GCQxw+A0FqI<3dB2Mq}%N-NIH!LA(VjUHzo z%o7wcF4VYPeFy4Ytv^-cDjLjy>HkADz?GvP_GomGO^5tjTAmeU9-d?1Ro7#V=@P>* z`oZc$cn>8v`~CeWxLcC9M+wr7Vcfiv>GfkxRL!=O_6CxA_N-+Pei1zR7uPY1)7|v#} S1^443f1PJ>S%~7ia?HOJnCKq> literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format1_partial_with_cast.onnx b/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format1_partial_with_cast.onnx new file mode 100644 index 0000000000000000000000000000000000000000..58a5417aca9cdcc1433dac9bb76f99ac915a5f60 GIT binary patch literal 847 zcmbVK%Wl&^6t&|Co?aDUrbVENKxJOSl@M`uNQBg3nMR205Rx^qr;*jz4t}(->j&@y zWyLSB=M$3O$Zx=%nK%_vS6H6W(K%=C+;eS8-~74-&!9ce!X#tuD9r2XfJD5kTr$Mn z+ksY&X|!Jp>A!?t$^=$R0UI7v%g z?dhf(G}V}0PIXyQ*3+(QP3kG!=<;=!=<>DeIpA5v0$t=67$x+XEs>04A#kk^`2~!j zA!P<#W|P7HpA0W$^ZMSMD9_3&6kVTD9&hGDByz3eIEF*m(u|g0ifWSqkxM4H!wVTg z4FI+VX2;37b|S!1?~`+Q0__=3Qx1*D?6jWis0du^q@F_u8WPA)h zq~R%Uyy|((F~mdm%R z<*5JT>Z*^pAE>C_KC0l+_2+fGO4f5^G3TfI7@09}D6m~j$u6 zS5|xopOE}Uegp2z#6d`1VR=SJ=bX87&$TIi{rv_!h3<3~Wi!@IqN1sHNg}G+CnMbX z7If<3zWvkgz!;3q+%oNHqg%?fNo3E54;mavG2=Tx#1~l&f42`FcKI!!r&7+-EU!$p zC+lj^R%31zo3f;8;-2qJni#Hi1*S`M1;+IZ@T_Km&Wj6-5(UgvNX2mw_|E&{0*;|2 zRR&XLogw(23@=pk=H9KOm{oNodjX>&UC)Oo#Sf9L`@a?Hc-PEND=SbUY`aZ%>c32vh9bLQDTu`FvC zZaxIRqqxPdZwKMuA@My*koGL=@~yJti|cY2nrQrik(Z0bo8@9W_;GbLKs*R_7QTZj zaJu@mN|)JciYx~Ccn>o%1|9{r4IS#?+Q&td5MC$5D2%jV|9c`NV0qi&ZMc~f`O7>@ L%AzVxYR~=yCwSw0 literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format2_graph_output_with_cast.onnx b/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format2_graph_output_with_cast.onnx new file mode 100644 index 0000000000000000000000000000000000000000..96683f2cd44a7336533dc8549f3fefc2e536de59 GIT binary patch literal 848 zcmbVK%Wl&^6t&|Co?aE<4n;*3EtP>HTnQ0pSBa21EYk{+9YV6kk0PsaoO)KJcIUl5@r+Dj>Dp^c1g@d<&pvJ z-UhU);-UH7Y{3vT&+Ibks7AX?lR8d2F1%OZH02X-8wmfLivF+W!Naz959mdjPLnJb zTJ7ml4H`9Oi%827QAZuu8r2c3?eeut?DBQjA>dh61v)LRF-qt&TOt|9Lf~5OifcH7 zrYJLLnPmq5e=@v~=JmZBaWN59n09egV9Ac9r7<}c~+Epc#a#dx*l^(ml%f8 z4^|(-yDw|ApI?uHKST2ND8aI07&mWZJHEJX`hkw-J<{}iHhVpv4SU~iZ+nP)fy&}- zBMTl~d|V{+Y%#_v2Klgqo*4s&0^5WZb#NWvB9aK!5fKstMVSAdi1z==zD6-1U^tt> T7Tk@C{AHfSWg&|5$}xWf4om1C literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format2_partial_with_cast.onnx b/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format2_partial_with_cast.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ff19625a273f2026f6236f68aa6afeb7a3f775ec GIT binary patch literal 847 zcmbVK%Wl&^6t&|Co?aDUrbVENKxJOSl@M`uNQBg3nMR205Rx^qr;*jz4t}(->j&@y zWyLSB=M$3O$Zx=%nK*#d6_#gobk3PO_gtIOS3hpR6KKz~Fv(au3iG-;AQ3Msmke?D zcA!<|_ss8R3r3)IW*2c+8SO&EbtJkjyjS2v@XXr>!as|0@T+<7u zNSQ&G*<|qlC&P2uyuNoQ%CoWxMb~GP$D8>OiCpVAj^PltG^6FGqS|CYQ5Nod~eh+vFS`L3_s2ltUvjJFVwBDgxI!sprsvh6FMd6=&w~K(_NzxAOqZ z0EL8?s@AsbctaYgJAoeygkV+e||j*{>;$pP=d5$7?*FP9ba6RgFq+u?rV9qT)ti{ zNBwVCSAE3&Kt=WTQ3a2#Kd$3dvYsQ0IX~RRNQ{9)f$c(zI=BvT5h(=M2~!k?N-+OD Z5fCt(-QYIdjPmqFnnXoj=BJfo{sy08?1umV literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format2_with_cast.onnx b/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format2_with_cast.onnx new file mode 100644 index 0000000000000000000000000000000000000000..566db31e9b5e492fc8226aab8e41bc0b9eea5017 GIT binary patch literal 816 zcmbVK%Wl&^6t&|Co?aDUh9Xc!p)xNql@M`uNQBg3nO2DG5Rx^vr^sp?r+&1s>j$u6 zS5|xopOE}Uegp2z#6d`1VR=SJ=iE7S&$TIi{rv_!h3<3~Wi!@IqN1sHNg}G+CnMbX z7If<3zWvkgz!;3q+%oOyLbsG@lgOSAA2c|UV#arXh%d4l{%#*U?DAVcPoQ12W45!i>iW34dre`o_7Ip$$^C#TtbEWXO}xG3}J1UFIlIrD6vSe7*m zHy?uEQQYF!w}bHSkoX=YNPCub`Bs(Vi|cY2nrQrik(Z0bo8@9W_;GbLKs*R_7QTZj zaJu@mN|)JciYx~Ccn^DK3_J>K8#>g(wU3J^A-qnAQ5b2#{`W*k!1A`k+i)`}@|Ss* LltopX)SmqZAuHp2 literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format3_graph_output_with_cast.onnx b/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format3_graph_output_with_cast.onnx new file mode 100644 index 0000000000000000000000000000000000000000..840fc7ecda9bddc8120057522c1c53690059dea3 GIT binary patch literal 784 zcmbVK%TC)s6t&}tJX}%LOsPOTBxG7Dj8rx54yhqHECWJh$Hp4lLuCA-6tjY}%^P+S~A9rsF zTBTe!pUf5vK=aJblTIzPbCFcB=(uoEgHyrB-YO7&CyL&m=E1|Z_ZH~25R)X$3f=9) ztQ$1lm|aA=Eh(y~<66Tif`uwyyF`_*ReOME>nhNRyu~P?&uoQM91DSKoy%J|gQlo5 z=r*$q{{LjySLXG1mtr|C%20HCMtL%u4^haqj*|p-U|wdl_>q^h42WDZ#2r4V5cL3H zE?{<&-e@HPY;}^}z%I1E^DN`gkj&1ik&cSMwN9%MY(rB75{Z&Cv%jJ2e9?B+!8`^t zVEV_`49*gbviHO5bTZ&SMSdppEWE;vmtQ^Rm@Y94qZiC4?7dU9>F-|$!N2t0HYHeg z4CDSt$??T?-wU*7Z%fn9)9JV8>7e`g@X$rv4Qi8@YiNrxa44{4Xi*2(4lbgIU=a}^ k(XR>f^@(V+xzr2V1q2LdIaq-=vCKYaX`IVKUX_md7m3Bx@Bjb+ literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format3_no_fusion_with_cast.onnx b/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format3_no_fusion_with_cast.onnx new file mode 100644 index 0000000000000000000000000000000000000000..173be529cd9cf622d525bae0de5e9ab4f5536590 GIT binary patch literal 783 zcmbVK%Wl&^6t&}tJiRKy3?QnAO4aZZMyiOrLn5RO%QQk{$Hp4l)5vPB~=p-xifEKxKkCFE>RU2)l0y$Z58NBU1F3dV6H|wj)Qi`$gM0ghwBQa?S``;5G0n6JBAHcmt<%fBel&VsvwP*hU0D#$8 literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format3_with_cast.onnx b/onnxruntime/test/testdata/transform/fusion/skip_layer_norm_format3_with_cast.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5646faf09e08d9646d8529b279dca4809aeac7f5 GIT binary patch literal 755 zcmbVJ%TC)s6t&}tJX}%X4xl1bBxIl!Mnc5wDm4ub%YZ7fV`Gi&p|XfG!4DogKA=B9 zSN)KFLRj)W^o}PELSlvG86Dko?)5p>lzw|^!w$4Z<1ihwb{xv8TqiLvN|*HU_7?FO`N(4CSIK74~X#e6_#-SmZomC?p6@hD=R3q4crU)bwC1+-TUD-L%cGkc=0yALx z$JY$*5{&8$~+4%@Z#m6#~jlohGF!A`G&n^Rh$0%y%)Ts_ckcOvSS$c zKa?C_Jomjod-gtS`f)n_{x}_U|2#Z&5qE>y|`DsEaDGQT1&T!~OcT?B=Dt zGkP{Rc#-v?_Gm|uf*hoVGvj%^9YL=oG9bp%Jt@5>Kc~*M^|G~DlnNE%cprd1d@J_$J03n{ICI% z8pIG`N^GDvJEg@qA8}A6S&3xjE%KCY$a7WTe^jXdRk8T8S*fa!Nk2@O(aMhEl^u~$ zQIuq3EY)`@r)x;${B8jE5Wp24BcQ?tyN4$=M3zo`c_sw{cO;b!y7z?0#)&T@Dg^FW zaW+`KC%Mw+AOtEyw#rgnsR_~sbAC7AYoJ%pHju4*nNayvw2chwZRzRpY)BGt#X1#(wII3NP?s17i(~CLLwO z(c%@nAC)8^@r$>tRo*Xdm_GzdJ|>SCVe76uZ#;9RTN>LDu3fq#G!*XR>kEwCYrm3i zq6>Jo)w@)3)}>S7=ZR*%ZATBDdn1%g)jExleLxug*9>IQHS{npS6CO@69^?ZG>+u z4+U_t?z^zs^v!rJesng>U4^ekeesCX-}v{}VHg;qAyW&zH27n1z4~|yi9(Fd zQrtd?m8nH}`9--vl|o>SACQxnlUbEml9`{UCBns$Sdm#Q@gD{l7cequ32?Ec78C>J zMb{ivH($WWC?pe{oml|aAD;#?q{JjXFFz@@BvDJ2gGGQ*Nq~W&!JdJE!2yVXxWT~* t;*cnDpzDQ%xCA)3gm}1^I0S%L0f<>Z5>D78l7JG