From eabf6dc9eebffaf4aa6b2ae2191453b0540bb29e Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Mon, 6 Jul 2020 19:37:37 -0700 Subject: [PATCH] Add Fusion for GPT Attention with both past state and attention mask (#4437) Add Fusion for GPT Attention with past state and attention mask --- .../core/optimizer/attention_fusion.cc | 250 +--- .../core/optimizer/attention_fusion_helper.h | 1142 +++++++++++++++++ .../test/optimizer/graph_transform_test.cc | 18 + .../fusion/gpt2_past_mask_one_layer.onnx | Bin 0 -> 10363 bytes 4 files changed, 1204 insertions(+), 206 deletions(-) create mode 100644 onnxruntime/core/optimizer/attention_fusion_helper.h create mode 100644 onnxruntime/test/testdata/transform/fusion/gpt2_past_mask_one_layer.onnx diff --git a/onnxruntime/core/optimizer/attention_fusion.cc b/onnxruntime/core/optimizer/attention_fusion.cc index bffe2372c6..fb8d17b43d 100644 --- a/onnxruntime/core/optimizer/attention_fusion.cc +++ b/onnxruntime/core/optimizer/attention_fusion.cc @@ -5,12 +5,9 @@ #include "core/optimizer/initializer.h" #include "core/optimizer/attention_fusion.h" #include "core/optimizer/utils.h" +#include "core/optimizer/attention_fusion_helper.h" #include -#define DEBUG_LOG(x) LOGS(logger, VERBOSE) << x - -using namespace ONNX_NAMESPACE; -using namespace onnxruntime::common; namespace onnxruntime { static bool ValidateMatMulInitializer(const Graph& graph, const Node& matmul, int64_t hidden_size) { @@ -31,7 +28,7 @@ static bool ValidateAddBiasInitializer(const Graph& graph, const Node& add, int6 return optimizer_utils::ValidateShape(input_b, {hidden_size}); } -// Merge 1-D weights (q, k and v) by concanating them one by one. +// Merge 1-D weights (q, k and v) by concatenating them one by one. template void MergeWeights(const T* q, const T* k, const T* v, std::vector& result, int64_t element_count) { for (int64_t i = 0; i < element_count; i++) { @@ -50,7 +47,7 @@ void MergeWeights(const T* q, const T* k, const T* v, std::vector& result, in } } -// Merge 2-D weights (q, k and v) by concanating them row by row. +// Merge 2-D weights (q, k and v) by concatenating them row by row. template void MergeMatMulWeights(const T* q_weight, const T* k_weight, const T* v_weight, std::vector& result, int64_t hidden_size) { const T* q = q_weight; @@ -146,36 +143,6 @@ static NodeArg& MergeQkvWeights(Graph& graph, int64_t hidden_size, return graph_utils::AddInitializer(graph, initializer); } -// Add a Cast to convert Mask from int64 to int32. -static NodeArg& CastMaskToInt32(Graph& graph, NodeArg* mask_input, ProviderType provider_type) { - const TensorShapeProto* mask_shape = mask_input->Shape(); - TypeProto mask_int32; - mask_int32.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT32); - auto dim0 = mask_int32.mutable_tensor_type()->mutable_shape()->add_dim(); - *dim0 = mask_shape->dim(0); - auto dim1 = mask_int32.mutable_tensor_type()->mutable_shape()->add_dim(); - *dim1 = mask_shape->dim(1); - auto& cast32 = graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("Mask_Int32"), &mask_int32); - - Node& node = graph.AddNode(graph.GenerateNodeName("MaskCast"), - "Cast", - "Cast mask from int64 to int32", - {mask_input}, - {&cast32}, - nullptr, - kOnnxDomain); - - // Add attribute: "to" = 6 - ONNX_NAMESPACE::AttributeProto to; - to.set_name("to"); - to.set_type(ONNX_NAMESPACE::AttributeProto_AttributeType::AttributeProto_AttributeType_INT); - to.set_i(static_cast(ONNX_NAMESPACE::TensorProto_DataType_INT32)); - node.AddAttribute("to", to); - - node.SetExecutionProviderType(provider_type); - return cast32; -} - static NodeArg& AddMaskReduceSum(Graph& graph, NodeArg* reduce_sum_input, TypeProto& output_type, ProviderType provider_type) { NodeArg& reduce_sum_output = graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("MaskIndex_Int32"), &output_type); @@ -229,7 +196,7 @@ static NodeArg* ProcessMask(Graph& graph, NodeArg* mask_input, ProviderType prov NodeArg* reduce_sum_input = mask_input; if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT64 || data_type == ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { - NodeArg& cast_int32 = CastMaskToInt32(graph, mask_input, provider_type); + NodeArg& cast_int32 = AttentionFusionHelper::CastMaskToInt32(graph, mask_input, provider_type); reduce_sum_input = &cast_int32; } @@ -272,6 +239,9 @@ Status AttentionFusion::ApplyImpl(Graph& graph, bool& modified, int graph_level, // A map from mask input arg name to mask index output. std::map mask_index_map; + // A map from mask input arg name to the one casted to int32 + std::map mask_int32_map; + int fused_count = 0; for (auto node_index : node_topology_list) { auto* p_node = graph.GetNode(node_index); @@ -296,23 +266,31 @@ Status AttentionFusion::ApplyImpl(Graph& graph, bool& modified, int graph_level, const Node* add_node = nullptr; int add_count = 0; int matmul_count = 0; + int shape_count = 0; + int reshape_count = 0; for (auto it = node.OutputNodesBegin(); it != node.OutputNodesEnd(); ++it) { if ((*it).OpType().compare("Add") == 0) { add_count++; add_node = &(*it); } else if ((*it).OpType().compare("MatMul") == 0) { matmul_count++; + } else if ((*it).OpType().compare("Shape") == 0) { + shape_count++; + } else if ((*it).OpType().compare("Reshape") == 0) { + reshape_count++; } } - if (add_count != 1 || matmul_count != 3) { - DEBUG_LOG("Attention subgraph expects 1 Add and 3 MatMul as children of LayerNormalization."); - continue; - } - - if (AttentionFusion::FuseSubGraph(node, *add_node, graph, hidden_size, mask_index_map, logger)) { - fused_count++; - modified = true; + if (add_count == 1 && matmul_count == 3) { // BERT + if (AttentionFusion::FuseSubGraph(node, *add_node, graph, hidden_size, mask_index_map, logger)) { + fused_count++; + modified = true; + } + } else if (reshape_count == 1 && shape_count == 3) { // GPT + if (AttentionFusionHelper::FuseGptAttention(node, graph, hidden_size, mask_int32_map, logger)) { + fused_count++; + modified = true; + } } } } @@ -345,9 +323,9 @@ Status AttentionFusion::ApplyImpl(Graph& graph, bool& modified, int graph_level, | (0,2,1,3) (0,2,3,1) (perm=0,2,1,3) | | \ / | mask_Unsqueeze(axes=2) | qk_MatMul | | - | | [B=2] | ([A=1] mask_Cast(to=1)) + | | [B=2] | ([A=1.0] mask_Cast(to=1)) | | / | \ / - | qk_Div | mask_Sub [A=1000] + | qk_Div | mask_Sub [B=-10000.0] | \ | \ / | mask_Add <-------- /---------------------mask_Mul | | / @@ -413,46 +391,16 @@ bool AttentionFusion::FuseSubGraph(Node& layer_norm, const Node& add_after_layer return false; } - // Internal nodes of attention subgraph only allow edges within the subgraph, and no graph output is allowed. - // No constraints for four nodes: reshape node is last node of Attention; and add, matmul and v_root are not in attention subgraph. - if (!optimizer_utils::CheckOutputEdges(graph, transpose, 1) || - !optimizer_utils::CheckOutputEdges(graph, qkv_matmul, 1) || - !optimizer_utils::CheckOutputEdges(graph, v_transpose, 1) || - !optimizer_utils::CheckOutputEdges(graph, v_reshape, 1) || - !optimizer_utils::CheckOutputEdges(graph, v_add, 1) || + if (!optimizer_utils::CheckOutputEdges(graph, v_add, 1) || !optimizer_utils::CheckOutputEdges(graph, v_matmul, 1)) { - DEBUG_LOG("Output edge count not expected for nodes in path v"); + DEBUG_LOG("Output edge count not expected for Add or MatMul in path v"); return false; } - std::vector perm; - if (!(graph_utils::GetRepeatedNodeAttributeValues(transpose, "perm", perm) && perm.size() == 4 && perm[0] == 0 && perm[1] == 2 && perm[2] == 1 && perm[3] == 3)) { - DEBUG_LOG("Failed in match Transpose attribute perm. Expected: 0, 2, 1, 3"); - return false; - } - if (!(graph_utils::GetRepeatedNodeAttributeValues(v_transpose, "perm", perm) && perm.size() == 4 && perm[0] == 0 && perm[1] == 2 && perm[2] == 1 && perm[3] == 3)) { - DEBUG_LOG("Failed in match v_transpose attribute perm. Expected: 0, 2, 1, 3"); - return false; - } - - std::vector v_reshape_shape; - if (!optimizer_utils::AppendTensorFromInitializer(graph, *(v_reshape.InputDefs()[1]), v_reshape_shape) || - v_reshape_shape.size() != 4 || - v_reshape_shape[2] <= 0 || - v_reshape_shape[3] <= 0 || - hidden_size != v_reshape_shape[2] * v_reshape_shape[3]) { - DEBUG_LOG("v_reshape initializer value is not expected"); - return false; - } - - const int64_t num_attention_head = v_reshape_shape[2]; - const int64_t attention_head_size = v_reshape_shape[3]; - - std::vector reshape_shape; - if (!optimizer_utils::AppendTensorFromInitializer(graph, *(reshape.InputDefs()[1]), reshape_shape) || - reshape_shape.size() != 3 || - reshape_shape[2] != hidden_size) { - DEBUG_LOG("reshape initializer value is not expected"); + int64_t num_heads = 0; // will be updated in CheckNodesInPathV + int64_t head_size = 0; // will be updated in CheckNodesInPathV + if (!AttentionFusionHelper::CheckNodesInPathV(graph, reshape, transpose, qkv_matmul, v_transpose, v_reshape, num_heads, head_size, hidden_size, logger)) { + DEBUG_LOG("CheckNodesInPathV return false"); return false; } @@ -465,86 +413,11 @@ bool AttentionFusion::FuseSubGraph(Node& layer_norm, const Node& add_after_layer return false; } - // path 2 to find mask. Unsqueeze -> Unsqueeze -> (Cast) -> Sub -> Mul -> Add -> Softmax - // The "Cast" node in parentheses is optional. - std::vector mask_path{ - {0, 0, "Softmax", {1, 11}, kOnnxDomain}, - {0, 0, "Add", {7}, kOnnxDomain}, - {0, 1, "Mul", {7}, kOnnxDomain}, - {0, 0, "Sub", {7}, kOnnxDomain}}; - - if (!graph_utils::FindPath(qkv_matmul, true, mask_path, edges, logger)) { - DEBUG_LOG("Failed to find path for mask"); - return false; - } - - const Node& softmax = edges[0]->GetNode(); - const Node& mask_add = edges[1]->GetNode(); - const Node& mask_mul = edges[2]->GetNode(); - const Node& mask_sub = edges[3]->GetNode(); - - // Match optional mask cast node - Node* p_mask_cast = nullptr; - Node* p_mask_unsqueeze_2 = nullptr; - Node* p_mask_unsqueeze_1 = nullptr; - std::vector mask_path_format_1{ - {0, 1, "Cast", {9}, kOnnxDomain}, - {0, 0, "Unsqueeze", {1, 11}, kOnnxDomain}, - {0, 0, "Unsqueeze", {1, 11}, kOnnxDomain}}; - - std::vector mask_path_format_2{ - {0, 1, "Unsqueeze", {1, 11}, kOnnxDomain}, - {0, 0, "Unsqueeze", {1, 11}, kOnnxDomain}}; - - if (graph_utils::FindPath(mask_sub, true, mask_path_format_1, edges, logger)) { - p_mask_cast = const_cast(&edges[0]->GetNode()); - p_mask_unsqueeze_2 = const_cast(&edges[1]->GetNode()); - p_mask_unsqueeze_1 = const_cast(&edges[2]->GetNode()); - } else if (graph_utils::FindPath(mask_sub, true, mask_path_format_2, edges, logger)) { - p_mask_unsqueeze_2 = const_cast(&edges[0]->GetNode()); - p_mask_unsqueeze_1 = const_cast(&edges[1]->GetNode()); - } else { - DEBUG_LOG("Failed to find path for mask"); - return false; - } - - const Node& mask_unsqueeze_2 = *p_mask_unsqueeze_2; - const Node& mask_unsqueeze_1 = *p_mask_unsqueeze_1; - - - if (!optimizer_utils::CheckOutputEdges(graph, softmax, 1) || - !optimizer_utils::CheckOutputEdges(graph, mask_add, 1) || - !optimizer_utils::CheckOutputEdges(graph, mask_sub, 1) || - (p_mask_cast != nullptr && !optimizer_utils::CheckOutputEdges(graph, *p_mask_cast, 1)) || - !optimizer_utils::CheckOutputEdges(graph, mask_unsqueeze_2, 1) || - !optimizer_utils::CheckOutputEdges(graph, mask_unsqueeze_1, 1)) { - DEBUG_LOG("Output edge count not expected for mask nodes"); - return false; - } - - if (!optimizer_utils::IsAttributeWithExpectedValue(softmax, "axis", 3)) { - DEBUG_LOG("Softmax attribute axis is expected to be 3"); - return false; - } - - std::vector axes; - if (!(graph_utils::GetRepeatedNodeAttributeValues(mask_unsqueeze_1, "axes", axes) && axes.size() == 1 && axes[0] == 1)) { - DEBUG_LOG("mask_unsqueeze_1 axes not matched. Expect: 1"); - return false; - } - - if (!(graph_utils::GetRepeatedNodeAttributeValues(mask_unsqueeze_2, "axes", axes) && axes.size() == 1 && axes[0] == 2)) { - DEBUG_LOG("mask_unsqueeze_2 axes not matched. Expect: 2"); - return false; - } - - if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(mask_sub.InputDefs()[0]), float(1), false)) { - DEBUG_LOG("mask_sub const input not matched"); - return false; - } - - if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(mask_mul.InputDefs()[1]), float(-10000), false)) { - DEBUG_LOG("mask_mul const input not matched"); + // Find mask nodes: Unsqueeze -> Unsqueeze -> (Cast) -> Sub -> Mul -> Add -> Softmax --> [MatMul] + // The "Cast" node in parentheses is optional. + AttentionFusionHelper::AttentionMaskNodes mask_nodes; + if (!AttentionFusionHelper::MatchInputMaskSubgraph(graph, qkv_matmul, mask_nodes, logger)) { + DEBUG_LOG("Failed in match input mask subgraph"); return false; } @@ -558,7 +431,7 @@ bool AttentionFusion::FuseSubGraph(Node& layer_norm, const Node& add_after_layer {0, 0, "MatMul", {1, 9}, kOnnxDomain}, {0, 0, "LayerNormalization", {1}, kOnnxDomain}}; - if (!graph_utils::FindPath(mask_add, true, q_path, edges, logger)) { + if (!graph_utils::FindPath(*(mask_nodes.add), true, q_path, edges, logger)) { DEBUG_LOG("Failed to find path for q"); return false; } @@ -575,23 +448,8 @@ bool AttentionFusion::FuseSubGraph(Node& layer_norm, const Node& add_after_layer return false; } - std::vector q_reshape_shape; - if (!optimizer_utils::AppendTensorFromInitializer(graph, *(q_reshape.InputDefs()[1]), q_reshape_shape) || - q_reshape_shape.size() != 4 || - q_reshape_shape[2] != num_attention_head || - q_reshape_shape[3] != attention_head_size) { - DEBUG_LOG("q_reshape const not matched"); - return false; - } - - float expected_value = std::sqrt(static_cast(attention_head_size)); - if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(qk_div.InputDefs()[1]), expected_value, false)) { - DEBUG_LOG("qk_div const not matched."); - return false; - } - - if (!(graph_utils::GetRepeatedNodeAttributeValues(q_transpose, "perm", perm) && perm.size() == 4 && perm[0] == 0 && perm[1] == 2 && perm[2] == 1 && perm[3] == 3)) { - DEBUG_LOG("q_transpose perm attribute not matched"); + if (!AttentionFusionHelper::CheckNodesInPathQ(graph, qk_div, q_reshape, q_transpose, num_heads, head_size, logger)) { + DEBUG_LOG("CheckNodesInPathQ returns false"); return false; } @@ -624,8 +482,8 @@ bool AttentionFusion::FuseSubGraph(Node& layer_norm, const Node& add_after_layer return false; } - if (!(graph_utils::GetRepeatedNodeAttributeValues(k_transpose, "perm", perm) && perm.size() == 4 && perm[0] == 0 && perm[1] == 2 && perm[2] == 3 && perm[3] == 1)) { - DEBUG_LOG("k_transpose perm attribute not matched"); + if (!AttentionFusionHelper::CheckNodesInPathK(graph, k_reshape, k_transpose, num_heads, head_size, logger)) { + DEBUG_LOG("CheckNodesInPathK returns false"); return false; } @@ -635,15 +493,6 @@ bool AttentionFusion::FuseSubGraph(Node& layer_norm, const Node& add_after_layer return false; } - std::vector k_reshape_shape; - if (!optimizer_utils::AppendTensorFromInitializer(graph, *(k_reshape.InputDefs()[1]), k_reshape_shape) || - k_reshape_shape.size() != 4 || - k_reshape_shape[2] != num_attention_head || - k_reshape_shape[3] != attention_head_size) { - DEBUG_LOG("k_reshape const not matched"); - return false; - } - // Load q, k and v weights const ONNX_NAMESPACE::TensorProto* q_weight_tensor = nullptr; const ONNX_NAMESPACE::TensorProto* k_weight_tensor = nullptr; @@ -662,7 +511,7 @@ bool AttentionFusion::FuseSubGraph(Node& layer_norm, const Node& add_after_layer } // Now everything is ready, we will start fusing subgraph. - NodeArg* mask_input = graph.GetNode(mask_unsqueeze_1.Index())->MutableInputDefs()[0]; + NodeArg* mask_input = graph.GetNode(mask_nodes.unsqueeze_1->Index())->MutableInputDefs()[0]; NodeArg* mask_index = GetOrCreateMaskIndex(graph, mask_input, mask_index_map, layer_norm.GetExecutionProviderType(), logger); if (nullptr == mask_index) { DEBUG_LOG("Failed to create mask index"); @@ -684,7 +533,7 @@ bool AttentionFusion::FuseSubGraph(Node& layer_norm, const Node& add_after_layer output_defs, nullptr, kMSDomain); - attention_node.AddAttribute("num_heads", num_attention_head); + attention_node.AddAttribute("num_heads", num_heads); // Assign provider to this new node. attention_node.SetExecutionProviderType(layer_norm.GetExecutionProviderType()); @@ -698,8 +547,6 @@ bool AttentionFusion::FuseSubGraph(Node& layer_norm, const Node& add_after_layer v_reshape.Index(), v_add.Index(), v_matmul.Index(), - softmax.Index(), - mask_add.Index(), qk_div.Index(), qk_matmul.Index(), q_transpose.Index(), @@ -711,16 +558,7 @@ bool AttentionFusion::FuseSubGraph(Node& layer_norm, const Node& add_after_layer k_add.Index(), k_matmul.Index()}; - // When the last Attention node is fused. Original mask processing nodes can be removed safely. - if (optimizer_utils::CheckOutputEdges(graph, mask_mul, 1)) { - nodes_to_remove.push_back(mask_mul.Index()); - nodes_to_remove.push_back(mask_sub.Index()); - if (p_mask_cast != nullptr) { - nodes_to_remove.push_back((*p_mask_cast).Index()); - } - nodes_to_remove.push_back(mask_unsqueeze_2.Index()); - nodes_to_remove.push_back(mask_unsqueeze_1.Index()); - } + AttentionFusionHelper::SetMaskNodesToRemove(graph, mask_nodes, nodes_to_remove); for (const auto& node_index : nodes_to_remove) { Node* node = graph.GetNode(node_index); diff --git a/onnxruntime/core/optimizer/attention_fusion_helper.h b/onnxruntime/core/optimizer/attention_fusion_helper.h new file mode 100644 index 0000000000..b588efcc10 --- /dev/null +++ b/onnxruntime/core/optimizer/attention_fusion_helper.h @@ -0,0 +1,1142 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#define DEBUG_LOG(x) LOGS(logger, VERBOSE) << x + +using namespace ONNX_NAMESPACE; +using namespace onnxruntime::common; +namespace onnxruntime { + +// This file is for helping attention fusion for GPT models. +namespace AttentionFusionHelper { + +struct MatchGemmResult { + const Node* gemm; // the Gemm node. + const Node* input_node; // one node in the subgraph that accept the input. + const Node* output_node; // the node that have output of the subgraph. + std::vector node_indices; // id of all nodes. +}; + +// Compare the expected parameters (starts, ends, axes and step) +bool CheckSliceParameters(const Graph& graph, const Node& slice, const std::vector& input_indices, const std::vector& expected_values, const logging::Logger& logger) { + ORT_ENFORCE(input_indices.size() == expected_values.size() && input_indices.size() > 0); + + // Here assumes that the last element of input_indices is the maximum one. + if (slice.InputDefs().size() <= static_cast(input_indices[input_indices.size() - 1])) { + DEBUG_LOG("Slice does not have enough number of inputs"); + return false; + } + + for (size_t i = 0; i < expected_values.size(); i++) { + const NodeArg& input = *(slice.InputDefs()[input_indices[i]]); + if (expected_values[i] >= static_cast(INT_MAX)) { + std::vector ends; + if (!(optimizer_utils::AppendTensorFromInitializer(graph, input, ends, true) && ends.size() == 1 && ends[0] >= INT_MAX)) { + DEBUG_LOG("Slice ends is less than INT_MAX"); + return false; + } + } else if (!optimizer_utils::IsInitializerWithExpectedValue(graph, input, expected_values[i], true)) { + DEBUG_LOG("Slice parameter is not expected. Input index:" << input_indices[i] << "Expected value:" << expected_values[i]); + return false; + } + } + + return true; +} +/** Match GEMM subgraph: + +-----------------------------------------------------------------------------------------+ + | | + | (*,-1,max,0) v +[Input]--> Shape --> Slice ---------> Squeeze --> Unsqueeze (axes=0) --> Concat (-1, *) --> Reshape-->Gemm (B:W*4W, C:4W, or B:W*W, C:W, or B:4W*W, C:W) + | | + | Concat ( , , 4W or W)-------------Reshape ----> [Output] + | ^ ^ + | | | + +----> Shape --> Gather (indices=0) --> Unsqueeze (axes=0) -----------+ | + | | + +----> Shape --> Gather (indices=1) --> Unsqueeze (axes=0) --------------+ +*/ +bool MatchGemmSubgraph(Graph& graph, + Node& node_after_gemm_reshape, + int dst_arg_index, + MatchGemmResult& result, + const logging::Logger& logger) { + DEBUG_LOG("Start MatchGemmSubgraph"); + // GPT Attention fusion supports opset version 9 or later. + std::vector parent_path{ + {0, dst_arg_index, "Reshape", {5, 13}, kOnnxDomain}, + {0, 0, "Gemm", {9, 11, 13}, kOnnxDomain}, + {0, 0, "Reshape", {5, 13}, kOnnxDomain}, + {0, 1, "Concat", {4, 11, 13}, kOnnxDomain}, + {0, 1, "Unsqueeze", {1, 11, 13}, kOnnxDomain}, + {0, 0, "Squeeze", {1, 11, 13}, kOnnxDomain}, + {0, 0, "Slice", {1, 10, 11, 13}, kOnnxDomain}, + {0, 0, "Shape", {1, 13}, kOnnxDomain}}; + + std::vector edges; + if (!graph_utils::FindPath(node_after_gemm_reshape, true, parent_path, edges, logger)) { + DEBUG_LOG("Faild to match gemm path"); + return false; + } + + const Node& reshape_after_gemm = edges[0]->GetNode(); + const Node& gemm = edges[1]->GetNode(); + const Node& reshape_before_gemm = edges[2]->GetNode(); + const Node& concat = edges[3]->GetNode(); + const Node& unsqueeze = edges[4]->GetNode(); + const Node& squeeze = edges[5]->GetNode(); + const Node& slice = edges[6]->GetNode(); + const Node& shape_before_slice = edges[7]->GetNode(); + + const auto& subgraph_input = shape_before_slice.InputDefs()[0]; + if (reshape_before_gemm.InputDefs()[0]->Name() != subgraph_input->Name()) { + DEBUG_LOG("Input of reshape_before_gemm is not the input of subgraph"); + return false; + } + + if (!optimizer_utils::CheckOutputEdges(graph, shape_before_slice, 1) || + !optimizer_utils::CheckOutputEdges(graph, slice, 1) || + !optimizer_utils::CheckOutputEdges(graph, squeeze, 1) || + !optimizer_utils::CheckOutputEdges(graph, unsqueeze, 1) || + !optimizer_utils::CheckOutputEdges(graph, concat, 1) || + !optimizer_utils::CheckOutputEdges(graph, reshape_before_gemm, 1) || + !optimizer_utils::CheckOutputEdges(graph, gemm, 1) || + !optimizer_utils::CheckOutputEdges(graph, reshape_after_gemm, 1)) { + DEBUG_LOG("Output edge count not expected for nodes in gemm path"); + return false; + } + + if (gemm.InputDefs().size() != 3) { + DEBUG_LOG("Gemm does not have 3 inputs"); + return false; + } + + // Get the shape of bias, to be compared with the last input value of Concat + if (!graph_utils::IsInitializer(graph, gemm.InputDefs()[2]->Name(), true)) { + DEBUG_LOG("Gemm bias is not constant"); + return false; + } + auto bias_shape = gemm.InputDefs()[2]->Shape(); + if (bias_shape == nullptr || static_cast(bias_shape->dim_size()) != 1 || !utils::HasDimValue(bias_shape->dim(0))) { + DEBUG_LOG("Gemm bias shape not expected"); + return false; + } + + if (!CheckSliceParameters(graph, slice, {1, 2, 3}, {-1, INT_MAX, 0}, logger)) { + DEBUG_LOG("CheckSliceParameters return false"); + return false; + } + + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(concat.InputDefs()[0]), (int64_t)-1, true)) { + DEBUG_LOG("concat first input value is not -1"); + return false; + } + + // Find the concat node for Gather paths. + std::vector edge_to_match{{0, 1, "Concat", {4, 11, 13}, kOnnxDomain}}; + if (!graph_utils::FindPath(reshape_after_gemm, true, edge_to_match, edges, logger)) { + DEBUG_LOG("Faild to match concat node for Gather paths"); + return false; + } + + const Node& concat_after_gather = edges[0]->GetNode(); + if (concat_after_gather.InputDefs().size() != 3 || + !optimizer_utils::CheckOutputEdges(graph, concat_after_gather, 1)) { + DEBUG_LOG("concat_after_gather does not have expected number of inputs or output edges"); + return false; + } + + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(concat_after_gather.InputDefs()[2]), bias_shape->dim(0).dim_value(), true)) { + DEBUG_LOG("concat_after_gather input 2 does not have expected value"); + return false; + } + + result.node_indices.reserve(15); + + // Match: [Input] ----> Shape --> Gather (indices=0 or 1) --> Unsqueeze (axes=0) ----> Concat ( , , ) + for (int i = 0; i < 2; i++) { + std::vector gather_path1{ + {0, i, "Unsqueeze", {1, 11, 13}, kOnnxDomain}, + {0, 0, "Gather", {1, 11, 13}, kOnnxDomain}, + {0, 0, "Shape", {1, 13}, kOnnxDomain}}; + + if (!graph_utils::FindPath(concat_after_gather, true, gather_path1, edges, logger)) { + DEBUG_LOG("Faild to match gemm gather path"); + return false; + } + + const Node& unsqueeze_after_gather = edges[0]->GetNode(); + const Node& gather = edges[1]->GetNode(); + const Node& shape = edges[2]->GetNode(); + + if (!optimizer_utils::CheckOutputEdges(graph, unsqueeze_after_gather, 1) || + !optimizer_utils::CheckOutputEdges(graph, gather, 1) || + !optimizer_utils::CheckOutputEdges(graph, shape, 1)) { //TODO: deal with shared Shape node which has output edges > 1 + DEBUG_LOG("Output edge count not expected for nodes in gemm gather path"); + return false; + } + + result.node_indices.push_back(unsqueeze_after_gather.Index()); + result.node_indices.push_back(gather.Index()); + result.node_indices.push_back(shape.Index()); + + if (shape.InputDefs()[0]->Name() != subgraph_input->Name()) { + return false; + } + + std::vector axes; + if (!(graph_utils::GetRepeatedNodeAttributeValues(unsqueeze_after_gather, "axes", axes) && axes.size() == 1 && axes[0] == 0)) { + DEBUG_LOG("unsqueeze_after_gather axes value not expected"); + return false; + } + + if (!optimizer_utils::IsAttributeWithExpectedValue(gather, "axis", (int64_t)0)) { + DEBUG_LOG("gather axis value not expected"); + return false; + } + + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(gather.InputDefs()[1]), static_cast(i), true)) { + DEBUG_LOG("gather input 1 value is not expected"); + return false; + } + } + + result.gemm = &gemm; + result.input_node = &shape_before_slice; + result.output_node = &reshape_after_gemm; + result.node_indices.insert(result.node_indices.end(), + {reshape_after_gemm.Index(), + gemm.Index(), + reshape_before_gemm.Index(), + concat.Index(), + unsqueeze.Index(), + squeeze.Index(), + slice.Index(), + shape_before_slice.Index(), + concat_after_gather.Index()}); + + DEBUG_LOG("Pass MatchGemmSubgraph"); + return true; +} + +bool ValidateGemmInitializer(const Graph& graph, const Node& gemm, int64_t hidden_size, bool is_before_split, const logging::Logger& logger) { + DEBUG_LOG("Start ValidateGemmInitializer"); + const NodeArg& bias = *(gemm.InputDefs()[2]); + if (!graph_utils::IsInitializer(graph, bias.Name(), true)) { + DEBUG_LOG("Gemm bias is not constant initializer"); + return false; + } + + int64_t bias_length = (is_before_split ? 3 : 1) * hidden_size; + if (!optimizer_utils::ValidateShape(bias, {bias_length})) { + DEBUG_LOG("Gemm bias shape is not expected"); + return false; + } + + const NodeArg& weights = *(gemm.InputDefs()[1]); + if (!graph_utils::IsInitializer(graph, weights.Name(), true)) { + DEBUG_LOG("Gemm weight is not constant initializer"); + return false; + } + + if (!optimizer_utils::ValidateShape(weights, {hidden_size, bias_length})) { + DEBUG_LOG("Gemm weight shape is not expected"); + return false; + } + + DEBUG_LOG("Pass ValidateGemmInitializer"); + return true; +} + +struct MatchUnidirMaskResult { + const Node* div_node; // the root node (Div) of the subgraph + std::vector node_indices; // id of all nodes in the subgraph for removing later. +}; + +/** Match Unidirectional Mask subgraph. + In the below graph, ':' is followed by variable name in code. * means the input on the left side. + + + (axes=0) + +---------------------Unsqueeze----------------------------------------+ + | :unsqueeze2 | + | (axes=0) | + +---------------------Unsqueeze-------------------+ | + | :unsqueeze3 | | + (*,-1,max,0) | (axes=0) A (axes=0) starts |ends | + [Div] --> Shape --> Slice ---------> Squeeze -----> Sub --> Unsqueeze ----------------+ | |ends + | :shape1 :slice1 :squeeze1 ^ :unsqueeze1 v v v + | |B Slice(1x1xWxW, , ,2,1) --> Slice(*,0, ,3, 1) :last_slice + | | :mask_slice | + | (*, -2, -1, 0) (axes=0) | Cast(9) + +----> Shape --> Slice ---------> Squeeze-------+ | + | :shape2 :slice2 :squeeze2 v condition + +----------------------------------------------------------------------------------------->Where( ,*,-10000)--->[Add] +*/ +bool MatchUnidirMaskSubgraph(const Graph& graph, const Node& add_node, MatchUnidirMaskResult& result, const logging::Logger& logger) { + DEBUG_LOG("Start MatchUnidirMaskSubgraph"); + std::vector root_path{ + {0, 0, "Where", {9}, kOnnxDomain}, + {0, 1, "Div", {7, 13}, kOnnxDomain}}; + + std::vector edges; + if (!graph_utils::FindPath(add_node, true, root_path, edges, logger)) { + DEBUG_LOG("Faild to match the path (Div-->Where-->Add) for unidirectional mask"); + return false; + } + + const Node& where_node = edges[0]->GetNode(); + const Node& div_node = edges[1]->GetNode(); + + const float expected_value = -10000.0f; + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(where_node.InputDefs()[2]), expected_value, true)) { + return false; + } + + std::vector path1{ + {0, 0, "Cast", {9, 13}, kOnnxDomain}, + {0, 0, "Slice", {10, 11, 13}, kOnnxDomain}, // Last Slice + {0, 0, "Slice", {10, 11, 13}, kOnnxDomain}, // Mask Slice + {0, 1, "Unsqueeze", {9, 11, 13}, kOnnxDomain}, + {0, 0, "Sub", {7, 13}, kOnnxDomain}, + {0, 0, "Squeeze", {1, 11, 13}, kOnnxDomain}, + {0, 0, "Slice", {10, 11, 13}, kOnnxDomain}, // Slice 1 + {0, 0, "Shape", {1, 13}, kOnnxDomain}}; + + if (!graph_utils::FindPath(where_node, true, path1, edges, logger)) { + DEBUG_LOG("Faild to match path 1 for unidirectional mask"); + return false; + } + + const Node& cast = edges[0]->GetNode(); + const Node& last_slice = edges[1]->GetNode(); + const Node& mask_slice = edges[2]->GetNode(); + const Node& unsqueeze1 = edges[3]->GetNode(); + const Node& sub = edges[4]->GetNode(); + const Node& squeeze1 = edges[5]->GetNode(); + const Node& slice1 = edges[6]->GetNode(); + const Node& shape1 = edges[7]->GetNode(); + + if (!optimizer_utils::CheckOutputEdges(graph, where_node, 1) || + !optimizer_utils::CheckOutputEdges(graph, cast, 1) || + !optimizer_utils::CheckOutputEdges(graph, last_slice, 1) || + !optimizer_utils::CheckOutputEdges(graph, mask_slice, 1) || + !optimizer_utils::CheckOutputEdges(graph, unsqueeze1, 1) || + !optimizer_utils::CheckOutputEdges(graph, sub, 1) || + !optimizer_utils::CheckOutputEdges(graph, squeeze1, 3) || + !optimizer_utils::CheckOutputEdges(graph, slice1, 1) || + !optimizer_utils::CheckOutputEdges(graph, shape1, 1) || + !optimizer_utils::CheckOutputEdges(graph, mask_slice, 1)) { + DEBUG_LOG("Output edge count not expected for nodes in path 1 of unidirectional mask"); + return false; + } + + if (div_node.OutputDefs()[0]->Name() != shape1.InputDefs()[0]->Name()) { + DEBUG_LOG("Div and Shape1 does not have edge"); + return false; + } + + if (!CheckSliceParameters(graph, last_slice, {1, 3, 4}, {0, 3, 1}, logger)) { + DEBUG_LOG("CheckSliceParameters returns false for last_slice"); + return false; + } + + if (!CheckSliceParameters(graph, mask_slice, {3, 4}, {2, 1}, logger)) { + DEBUG_LOG("CheckSliceParameters returns false for mask_slice"); + return false; + } + + if (!CheckSliceParameters(graph, slice1, {1, 2, 3}, {-1, INT_MAX, 0}, logger)) { + DEBUG_LOG("CheckSliceParameters returns false for slice1"); + return false; + } + + std::vector slice_ends_path{ + {0, 2, "Unsqueeze", {9, 11, 13}, kOnnxDomain}, + {0, 0, "Squeeze", {1, 11, 13}, kOnnxDomain}}; + + if (!graph_utils::FindPath(last_slice, true, slice_ends_path, edges, logger) || + edges[1]->GetNode().Index() != squeeze1.Index()) { + DEBUG_LOG("Faild to match path 2 for unidirectional mask"); + return false; + } + + const Node& unsqueeze2 = edges[0]->GetNode(); + if (!optimizer_utils::CheckOutputEdges(graph, unsqueeze2, 1)) { + DEBUG_LOG("Output edge count not expected for unsqueeze2 of unidirectional mask"); + return false; + } + + if (!graph_utils::FindPath(mask_slice, true, slice_ends_path, edges, logger) || + edges[1]->GetNode().Index() != squeeze1.Index()) { + DEBUG_LOG("Faild to match path 3 for unidirectional mask"); + return false; + } + + const Node& unsqueeze3 = edges[0]->GetNode(); + if (!optimizer_utils::CheckOutputEdges(graph, unsqueeze3, 1)) { + DEBUG_LOG("Output edge count not expected for unsqueeze3 of unidirectional mask"); + return false; + } + + std::vector path4{ + {0, 1, "Squeeze", {1, 11, 13}, kOnnxDomain}, + {0, 0, "Slice", {10, 11, 13}, kOnnxDomain}, // Slice 2 + {0, 0, "Shape", {1, 13}, kOnnxDomain}}; + + if (!graph_utils::FindPath(sub, true, path4, edges, logger)) { + DEBUG_LOG("Faild to match path 4 for unidirectional mask"); + return false; + } + + if (div_node.OutputDefs()[0]->Name() != edges[2]->GetNode().InputDefs()[0]->Name()) { + DEBUG_LOG("Div and Shape does not have edge"); + return false; + } + + const Node& squeeze2 = edges[0]->GetNode(); + const Node& slice2 = edges[1]->GetNode(); + const Node& shape2 = edges[2]->GetNode(); + if (!optimizer_utils::CheckOutputEdges(graph, squeeze2, 1) || + !optimizer_utils::CheckOutputEdges(graph, slice2, 1) || + !optimizer_utils::CheckOutputEdges(graph, shape2, 1)) { + DEBUG_LOG("Output edge count not expected for squeeze_2/slices2/shape2 of unidirectional mask"); + return false; + } + + if (!CheckSliceParameters(graph, slice2, {1, 2, 3}, {-2, -1, 0}, logger)) { + DEBUG_LOG("CheckSliceParameters return false for slice2"); + return false; + } + + result.div_node = &div_node; + result.node_indices = { + where_node.Index(), + cast.Index(), + last_slice.Index(), + mask_slice.Index(), + unsqueeze1.Index(), + sub.Index(), + squeeze1.Index(), + slice1.Index(), + shape1.Index(), + unsqueeze2.Index(), + unsqueeze3.Index(), + squeeze2.Index(), + slice2.Index(), + shape2.Index()}; + + DEBUG_LOG("Pass MatchUnidirMaskSubgraph"); + return true; +} + +struct AttentionMaskNodes { + const Node* softmax; + const Node* add; + const Node* mul; + const Node* sub; + const Node* cast; + const Node* unsqueeze_2; + const Node* unsqueeze_1; +}; + +void SetMaskNodesToRemove(const Graph& graph, AttentionMaskNodes& mask_nodes, std::vector& nodes_to_remove) { + nodes_to_remove.push_back(mask_nodes.softmax->Index()); + nodes_to_remove.push_back(mask_nodes.add->Index()); + + // When the last Attention node is fused. Original mask processing nodes can be removed safely. + if (optimizer_utils::CheckOutputEdges(graph, *(mask_nodes.mul), 1)) { + nodes_to_remove.push_back(mask_nodes.mul->Index()); + nodes_to_remove.push_back(mask_nodes.sub->Index()); + if (mask_nodes.cast != nullptr) { + nodes_to_remove.push_back(mask_nodes.cast->Index()); + } + nodes_to_remove.push_back(mask_nodes.unsqueeze_2->Index()); + nodes_to_remove.push_back(mask_nodes.unsqueeze_1->Index()); + } +} + +/** Match Input Mask subgraph: + {UnidirMask Subgraph} + | + (optional) v +[Attention_mask] --> Unsqueeze (axes=1) --> Unsqueeze (axes=2) --> Cast ---->Sub(1,*) --> Mul(*, -10000.0) --> Add( ,*)--->SoftMax -->[MatMul] +*/ +bool MatchInputMaskSubgraph(const Graph& graph, const Node& qkv_matmul, AttentionMaskNodes& result, const logging::Logger& logger) { + DEBUG_LOG("Start MatchInputMaskSubgraph"); + std::vector mask_path{ + {0, 0, "Softmax", {1, 11, 13}, kOnnxDomain}, + {0, 0, "Add", {7, 13}, kOnnxDomain}, + {0, 1, "Mul", {7, 13}, kOnnxDomain}, + {0, 0, "Sub", {7, 13}, kOnnxDomain}}; + + std::vector edges; + if (!graph_utils::FindPath(qkv_matmul, true, mask_path, edges, logger)) { + DEBUG_LOG("Failed to find path for mask"); + return false; + } + + const Node& softmax = edges[0]->GetNode(); + const Node& mask_add = edges[1]->GetNode(); + const Node& mask_mul = edges[2]->GetNode(); + const Node& mask_sub = edges[3]->GetNode(); + + // Match optional mask cast node + Node* p_mask_cast = nullptr; + Node* p_mask_unsqueeze_2 = nullptr; + Node* p_mask_unsqueeze_1 = nullptr; + std::vector mask_path_format_1{ + {0, 1, "Cast", {9}, kOnnxDomain}, + {0, 0, "Unsqueeze", {1, 11}, kOnnxDomain}, + {0, 0, "Unsqueeze", {1, 11}, kOnnxDomain}}; + + std::vector mask_path_format_2{ + {0, 1, "Unsqueeze", {1, 11}, kOnnxDomain}, + {0, 0, "Unsqueeze", {1, 11}, kOnnxDomain}}; + + if (graph_utils::FindPath(mask_sub, true, mask_path_format_1, edges, logger)) { + p_mask_cast = const_cast(&edges[0]->GetNode()); + p_mask_unsqueeze_2 = const_cast(&edges[1]->GetNode()); + p_mask_unsqueeze_1 = const_cast(&edges[2]->GetNode()); + } else if (graph_utils::FindPath(mask_sub, true, mask_path_format_2, edges, logger)) { + p_mask_unsqueeze_2 = const_cast(&edges[0]->GetNode()); + p_mask_unsqueeze_1 = const_cast(&edges[1]->GetNode()); + } else { + DEBUG_LOG("Failed to find path for mask"); + return false; + } + + const Node& mask_unsqueeze_2 = *p_mask_unsqueeze_2; + const Node& mask_unsqueeze_1 = *p_mask_unsqueeze_1; + + if (!optimizer_utils::CheckOutputEdges(graph, softmax, 1) || + !optimizer_utils::CheckOutputEdges(graph, mask_add, 1) || + !optimizer_utils::CheckOutputEdges(graph, mask_sub, 1) || + (p_mask_cast != nullptr && !optimizer_utils::CheckOutputEdges(graph, *p_mask_cast, 1)) || + !optimizer_utils::CheckOutputEdges(graph, mask_unsqueeze_2, 1) || + !optimizer_utils::CheckOutputEdges(graph, mask_unsqueeze_1, 1)) { + DEBUG_LOG("Output edge count not expected for mask nodes"); + return false; + } + + if (!optimizer_utils::IsAttributeWithExpectedValue(softmax, "axis", 3)) { + DEBUG_LOG("Softmax attribute axis is expected to be 3"); + return false; + } + + std::vector axes; + if (!(graph_utils::GetRepeatedNodeAttributeValues(mask_unsqueeze_1, "axes", axes) && axes.size() == 1 && axes[0] == 1)) { + DEBUG_LOG("mask_unsqueeze_1 axes not matched. Expect: 1"); + return false; + } + + if (!(graph_utils::GetRepeatedNodeAttributeValues(mask_unsqueeze_2, "axes", axes) && axes.size() == 1 && axes[0] == 2)) { + DEBUG_LOG("mask_unsqueeze_2 axes not matched. Expect: 2"); + return false; + } + + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(mask_sub.InputDefs()[0]), float(1), false)) { + DEBUG_LOG("mask_sub const input not matched"); + return false; + } + + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(mask_mul.InputDefs()[1]), float(-10000), false)) { + DEBUG_LOG("mask_mul const input not matched"); + return false; + } + + result.softmax = &softmax; + result.add = &mask_add; + result.mul = &mask_mul; + result.sub = &mask_sub; + result.cast = p_mask_cast; + result.unsqueeze_2 = p_mask_unsqueeze_2; + result.unsqueeze_1 = p_mask_unsqueeze_1; + DEBUG_LOG("Pass MatchInputMaskSubgraph"); + return true; +} + +struct MatchPastResult { + NodeArg* past; + NodeArg* present; + std::vector node_indices; +}; + +/** Match Past Subgraph + --> Gather (indices=1) --> v_Concat(*, ) --> Unsqueeze(axes=0)--------------------------------------------------------------------+ + / v + [Past] --> Gather (indices=0) --> Transpose (perm=0,1,3,2) --> k_Concat(*, )--> Transpose(perm=0,1,3,2) --> Unsqueeze(axes=0)-->Concat(*, ) --> [Present] +*/ +bool MatchPastSubgraph(Graph& graph, const Node& k_concat, const Node& v_concat, MatchPastResult& result, const logging::Logger& logger) { + DEBUG_LOG("Start MatchPastSubgraph"); + std::vector past_k_path{ + {0, 0, "Transpose", {1, 13}, kOnnxDomain}, + {0, 0, "Gather", {1, 11, 13}, kOnnxDomain}}; + + std::vector edges; + if (!graph_utils::FindPath(k_concat, true, past_k_path, edges, logger)) { + DEBUG_LOG("Failed to find path for past_k"); + return false; + } + const Node& past_k_transpose = edges[0]->GetNode(); + const Node& past_k_gather = edges[1]->GetNode(); + + std::vector present_k_path{ + {0, 0, "Transpose", {1, 13}, kOnnxDomain}, + {0, 0, "Unsqueeze", {1, 11, 13}, kOnnxDomain}, + {0, 0, "Concat", {4, 11, 13}, kOnnxDomain}}; + if (!graph_utils::FindPath(k_concat, false, present_k_path, edges, logger)) { + DEBUG_LOG("Failed to find path for present_k"); + return false; + } + const Node& present_k_transpose = edges[0]->GetNode(); + const Node& present_k_unsqueeze = edges[1]->GetNode(); + const Node& present_concat = edges[2]->GetNode(); + + std::vector present_past_v_path{ + {0, 1, "Unsqueeze", {1, 11, 13}, kOnnxDomain}, + {0, 0, "Concat", {4, 11, 13}, kOnnxDomain}, + {0, 0, "Gather", {1, 11, 13}, kOnnxDomain}}; + if (!graph_utils::FindPath(present_concat, true, present_past_v_path, edges, logger)) { + DEBUG_LOG("Failed to find path for present_v and past_v"); + return false; + } + const Node& present_v_unsqueeze = edges[0]->GetNode(); + const Node& past_v_concat = edges[1]->GetNode(); + const Node& past_v_gather = edges[2]->GetNode(); + if (past_v_concat.Index() != v_concat.Index()) { + DEBUG_LOG("Failed to match v_concat"); + return false; + } + + std::vector perm; + if (!(graph_utils::GetRepeatedNodeAttributeValues(past_k_transpose, "perm", perm) && perm.size() == 4 && perm[0] == 0 && perm[1] == 1 && perm[2] == 3 && perm[3] == 2)) { + DEBUG_LOG("past_k_transpose perm attribute not matched"); + return false; + } + + if (!(graph_utils::GetRepeatedNodeAttributeValues(present_k_transpose, "perm", perm) && perm.size() == 4 && perm[0] == 0 && perm[1] == 1 && perm[2] == 3 && perm[3] == 2)) { + DEBUG_LOG("present_k_transpose perm attribute not matched"); + return false; + } + + std::vector axes; + if (!(graph_utils::GetRepeatedNodeAttributeValues(present_k_unsqueeze, "axes", axes) && axes.size() == 1 && axes[0] == 0)) { + DEBUG_LOG("present_k_unsqueeze axes value not expected"); + return false; + } + + if (!(graph_utils::GetRepeatedNodeAttributeValues(present_v_unsqueeze, "axes", axes) && axes.size() == 1 && axes[0] == 0)) { + DEBUG_LOG("present_v_unsqueeze axes value not expected"); + return false; + } + + // Check Gather for past_v has indices == 1 + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(past_v_gather.InputDefs()[1]), int64_t(1), true)) { + DEBUG_LOG("past_v_gather indices != 1"); + return false; + } + + // Check Gather for past_v has indices == 0 + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(past_k_gather.InputDefs()[1]), int64_t(0), true)) { + DEBUG_LOG("past_k_gather indices != 0"); + return false; + } + + if (past_v_gather.InputDefs()[0]->Name() != past_k_gather.InputDefs()[0]->Name()) { + DEBUG_LOG("past_v_gather and past_k_gather does not have same past input"); + return false; + } + + if (!optimizer_utils::CheckOutputEdges(graph, k_concat, 2) || + !optimizer_utils::CheckOutputEdges(graph, past_k_transpose, 1) || + !optimizer_utils::CheckOutputEdges(graph, past_k_gather, 1) || + !optimizer_utils::CheckOutputEdges(graph, present_k_transpose, 1) || + !optimizer_utils::CheckOutputEdges(graph, present_k_unsqueeze, 1) || + present_concat.GetOutputEdgesCount() != 0 || // present_concat only has a graph output, but no output edges to other nodes. + !optimizer_utils::CheckOutputEdges(graph, present_v_unsqueeze, 1) || + !optimizer_utils::CheckOutputEdges(graph, past_v_concat, 2) || + !optimizer_utils::CheckOutputEdges(graph, past_v_gather, 1)) { + DEBUG_LOG("Output edge count not expected for nodes in past subgraph"); + return false; + } + result.node_indices = { + k_concat.Index(), + past_k_transpose.Index(), + past_k_gather.Index(), + present_k_transpose.Index(), + present_k_unsqueeze.Index(), + present_concat.Index(), + present_v_unsqueeze.Index(), + past_v_concat.Index(), + past_v_gather.Index()}; + + result.past = graph.GetNode(past_v_gather.Index())->MutableInputDefs()[0]; + result.present = graph.GetNode(present_concat.Index())->MutableOutputDefs()[0]; + + DEBUG_LOG("Pass MatchPastSubgraph"); + return true; +} + +/** Check the following nodes (optional Concat is excluded) for path v: + v_Reshape (shape=0,0,H,-1) + | + v_Transpose (perm=0,2,1,3) + | + [p_Concat?] + \ / + qkv_MatMul + | + Transpose (perm=0,2,1,3) + | + Reshape---[shape=0,0,-1] +*/ + +bool CheckNodesInPathV(const Graph& graph, const Node& reshape, const Node& transpose, const Node& qkv_matmul, const Node& v_transpose, const Node& v_reshape, + int64_t& num_heads, int64_t& head_size, int64_t hidden_size, const logging::Logger& logger) { + DEBUG_LOG("Start CheckNodesInPathV"); + // Internal nodes of attention subgraph only allow edges within the subgraph, and no graph output is allowed. + // No constraints for reshape node since it is the last node of Attention. + if (!optimizer_utils::CheckOutputEdges(graph, transpose, 1) || + !optimizer_utils::CheckOutputEdges(graph, qkv_matmul, 1) || + !optimizer_utils::CheckOutputEdges(graph, v_transpose, 1) || + !optimizer_utils::CheckOutputEdges(graph, v_reshape, 1)) { + DEBUG_LOG("Output edge count not expected for nodes in path v"); + return false; + } + + std::vector perm; + if (!(graph_utils::GetRepeatedNodeAttributeValues(transpose, "perm", perm) && perm.size() == 4 && perm[0] == 0 && perm[1] == 2 && perm[2] == 1 && perm[3] == 3)) { + DEBUG_LOG("Failed in match Transpose attribute perm. Expected: 0, 2, 1, 3"); + return false; + } + + if (!(graph_utils::GetRepeatedNodeAttributeValues(v_transpose, "perm", perm) && perm.size() == 4 && perm[0] == 0 && perm[1] == 2 && perm[2] == 1 && perm[3] == 3)) { + DEBUG_LOG("Failed in match v_transpose attribute perm. Expected: 0, 2, 1, 3"); + return false; + } + + if (num_heads > 0 && head_size > 0 && head_size != num_heads * head_size) { + DEBUG_LOG("hidden_size != num_heads * head_size"); + return false; + } + + // Check reshape for q, k or v has shape input (0, 0, N, -1) or (0, 0, N, H) + std::vector v_reshape_shape; + if (!optimizer_utils::AppendTensorFromInitializer(graph, *(v_reshape.InputDefs()[1]), v_reshape_shape) || + v_reshape_shape.size() != 4 || + v_reshape_shape[0] != 0 || + v_reshape_shape[1] != 0 || + v_reshape_shape[2] <= 0 || + v_reshape_shape[2] > hidden_size || + (head_size < 0 && v_reshape_shape[3] != -1) || + (head_size == 0 && v_reshape_shape[2] * v_reshape_shape[3] != hidden_size)) { + DEBUG_LOG("v_reshape initializer value is not expected"); + return false; + } + + num_heads = v_reshape_shape[2]; + head_size = v_reshape_shape[3]; + + // Check reshape for attention output has shape input (0, 0, -1) or (0, 0, N*H) + std::vector reshape_shape; + if (!optimizer_utils::AppendTensorFromInitializer(graph, *(reshape.InputDefs()[1]), reshape_shape) || + reshape_shape.size() != 3 || + reshape_shape[0] != 0 || + reshape_shape[1] != 0 || + (reshape_shape[2] != num_heads * head_size && reshape_shape[2] != -1)) { + DEBUG_LOG("reshape initializer value is not expected"); + return false; + } + + DEBUG_LOG("Pass CheckNodesInPathV"); + return true; +} + +bool CheckNodesInPathQ(const Graph& graph, const Node& qk_div, const Node& q_reshape, const Node& q_transpose, int64_t num_heads, int64_t head_size, const logging::Logger& logger) { + DEBUG_LOG("Start CheckNodesInPathQ"); + std::vector q_reshape_shape; + if (!optimizer_utils::AppendTensorFromInitializer(graph, *(q_reshape.InputDefs()[1]), q_reshape_shape) || + q_reshape_shape.size() != 4 || + q_reshape_shape[0] != 0 || + q_reshape_shape[1] != 0 || + q_reshape_shape[2] != num_heads || + q_reshape_shape[3] != head_size) { + DEBUG_LOG("q_reshape const not matched"); + return false; + } + + float expected_value = std::sqrt(static_cast(head_size)); + if (!optimizer_utils::IsInitializerWithExpectedValue(graph, *(qk_div.InputDefs()[1]), expected_value, false)) { + DEBUG_LOG("qk_div const not matched."); + return false; + } + + std::vector perm; + if (!(graph_utils::GetRepeatedNodeAttributeValues(q_transpose, "perm", perm) && perm.size() == 4 && perm[0] == 0 && perm[1] == 2 && perm[2] == 1 && perm[3] == 3)) { + DEBUG_LOG("q_transpose perm attribute not matched"); + return false; + } + DEBUG_LOG("Pass CheckNodesInPathQ"); + return true; +} + +bool CheckNodesInPathK(const Graph& graph, const Node& k_reshape, const Node& k_transpose, int64_t num_heads, int64_t head_size, const logging::Logger& logger) { + DEBUG_LOG("Start CheckNodesInPathK"); + std::vector perm; + if (!(graph_utils::GetRepeatedNodeAttributeValues(k_transpose, "perm", perm) && perm.size() == 4 && perm[0] == 0 && perm[1] == 2 && perm[2] == 3 && perm[3] == 1)) { + DEBUG_LOG("k_transpose perm attribute not matched"); + return false; + } + + std::vector k_reshape_shape; + if (!optimizer_utils::AppendTensorFromInitializer(graph, *(k_reshape.InputDefs()[1]), k_reshape_shape) || + k_reshape_shape.size() != 4 || + k_reshape_shape[0] != 0 || + k_reshape_shape[1] != 0 || + k_reshape_shape[2] != num_heads || + k_reshape_shape[3] != head_size) { + DEBUG_LOG("k_reshape const not matched"); + return false; + } + DEBUG_LOG("Pass CheckNodesInPathK"); + return true; +} + +// Add a Cast to convert Mask from int64 to int32. +NodeArg& CastMaskToInt32(Graph& graph, NodeArg* mask_input, ProviderType provider_type) { + // Derive int32 shape info from mask_input + TypeProto mask_int32; + mask_int32.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT32); + auto dim0 = mask_int32.mutable_tensor_type()->mutable_shape()->add_dim(); + auto dim1 = mask_int32.mutable_tensor_type()->mutable_shape()->add_dim(); + const TensorShapeProto* mask_shape = mask_input->Shape(); + if (mask_shape != nullptr && static_cast(mask_shape->dim_size()) == 2) { + *dim0 = mask_shape->dim(0); + *dim1 = mask_shape->dim(1); + } + + NodeArg& cast32 = graph.GetOrCreateNodeArg(graph.GenerateNodeArgName("Mask_Int32"), &mask_int32); + const std::vector input_defs{mask_input}; + const std::vector output_defs{&cast32}; + Node& node = graph.AddNode(graph.GenerateNodeName("MaskCast"), + "Cast", + "Cast mask from int64 to int32", + input_defs, + output_defs, + nullptr, + kOnnxDomain); + + // Add attribute: "to" = 6 + ONNX_NAMESPACE::AttributeProto to; + to.set_name("to"); + to.set_type(ONNX_NAMESPACE::AttributeProto_AttributeType::AttributeProto_AttributeType_INT); + to.set_i(static_cast(ONNX_NAMESPACE::TensorProto_DataType_INT32)); + node.AddAttribute("to", to); + + node.SetExecutionProviderType(provider_type); + return cast32; +} + +NodeArg* GetOrCreateMaskInt32( + Graph& graph, + NodeArg* mask_input, + std::map& mask_int32_map, + ProviderType provider_type) { + // Lookup in cache map + auto search = mask_int32_map.find(mask_input->Name()); + if (search != mask_int32_map.end()) { + return search->second; + } + + NodeArg& cast32 = CastMaskToInt32(graph, mask_input, provider_type); + + // Add it to cache map. + mask_int32_map.insert(std::pair(mask_input->Name(), &cast32)); + return &cast32; +} + +/** Fuse Attention SubGraph. +@remark add_after_layer_norm is the Add node in the bottom of sub-graph. + Abbreviatios: B is batch_size, S is sequence_length, W is hidden_size, P is past sequence length, + N is number of attention heads, H is head size, and W=N*H, h=Sqrt(H) + B and S could be symbolic. ? means it is optional. + Graph before Fusion (q_, k_, v_, qk_, qkv_ and mask_ prefix is added before Operator type): + Add + / \ [Input](BxSxW) + / \ + / LayerNormalization + / | + / {Gemm_Subgraph} <---[weights](Wx3W); [Bias](3W) + | | + | Split + | / | \ + | / | \ + | q_Reshape k_Reshape v_Reshape (shape=0,0,H,-1) + | | | | + |q_Transpose k_Transpose v_Transpose + | (0,2,1,3) (0,2,3,1) (perm=0,2,1,3) + | \ / | [Past]? + \ / | | + | \ p_Concat? <------|---------------------{Past_Subgraphj}? + | \ / | | + | qk_MatMul | | + | | [B=h] | | + | | / | / + | qk_Div p_Concat? <------------------ + | | | + | {Unidir_Mask_Subgraph} | [Mask]? + | | / | + | mask_Add? <--------/---------------------{Attention_Mask_Subgraph}? + | | / + | Softmax / + | \ / + | \ / + | qkv_MatMul + | | + | Transpose (perm=0,2,1,3) + | | + | Reshape---[shape=0,0,-1] + | | + | {Gemm_Subgraph} <---[weights](WxW); [Bias](W) + | / + +--------------> Add + +After Fusion: + + Add + | \ + | LayerNormalization [Weights] [Bias] [Mask]? [Past]? + | \ | / / / + \ \ | / / / + \ Attention <------------------ + \ | | + \ {Gemm_Subgraph} v + \ | [Present]? + \ | + \ / + --------> Add +TODO: replace Gemm_Subgraph by MatMul + Add +*/ +bool FuseGptAttention(Node& layer_norm, Graph& graph, int64_t hidden_size, std::map& mask_int32_map, const logging::Logger& logger) { + DEBUG_LOG("Start FuseGptAttention"); + const Node* parent_node = graph_utils::GetInputNode(layer_norm, 0); + if (nullptr == parent_node || !graph_utils::IsSupportedOptypeVersionAndDomain(*parent_node, "Add", {7, 13}, kOnnxDomain)) { + return false; + } + + const Node* add_after_gemm = graph_utils::FirstChildByType(*graph.GetNode(parent_node->Index()), "Add"); + if (nullptr == add_after_gemm) { + return false; + } + + MatchGemmResult gemm1_result; + if (!MatchGemmSubgraph(graph, *graph.GetNode(add_after_gemm->Index()), 1, gemm1_result, logger) || + !ValidateGemmInitializer(graph, *gemm1_result.gemm, hidden_size, false, logger)) { + return false; + } + + std::vector path1{ + {0, 0, "Reshape", {5, 13}, kOnnxDomain}, + {0, 0, "Transpose", {1, 13}, kOnnxDomain}, + {0, 0, "MatMul", {1, 9}, kOnnxDomain}}; + + std::vector edges; + if (!graph_utils::FindPath(*gemm1_result.input_node, true, path1, edges, logger)) { + DEBUG_LOG("Faild to find path to qkv_matmul"); + return false; + } + + const Node& reshape = edges[0]->GetNode(); + const Node& transpose = edges[1]->GetNode(); + const Node& qkv_matmul = edges[2]->GetNode(); + + const Node* v_concat = graph_utils::GetInputNode(qkv_matmul, 1); + if (v_concat == nullptr) { + return false; + } + + bool has_past = graph_utils::IsSupportedOptypeVersionAndDomain(*v_concat, "Concat", {4, 11, 13}, kOnnxDomain); + + std::vector path2{ + {0, 1, "Transpose", {1, 13}, kOnnxDomain}, + {0, 0, "Reshape", {5, 13}, kOnnxDomain}, + {2, 0, "Split", {2, 11, 13}, kOnnxDomain}}; + + if (!graph_utils::FindPath(has_past ? *v_concat : qkv_matmul, true, path2, edges, logger)) { + DEBUG_LOG("Faild to find path v to Split"); + return false; + } + + const Node& v_transpose = edges[0]->GetNode(); + const Node& v_reshape = edges[1]->GetNode(); + const Node& v_split = edges[2]->GetNode(); + + MatchGemmResult gemm0_result; + if (!MatchGemmSubgraph(graph, *graph.GetNode(v_split.Index()), 0, gemm0_result, logger) || + !ValidateGemmInitializer(graph, *gemm0_result.gemm, hidden_size, true, logger)) { + return false; + } + + const Node* gemm0_parent = graph_utils::GetInputNode(*gemm0_result.input_node, 0); + if (gemm0_parent == nullptr || gemm0_parent->Index() != layer_norm.Index()) { + return false; + } + + int64_t num_heads = 0; // will be updated in CheckNodesInPathV + int64_t head_size = -1; + if (!CheckNodesInPathV(graph, reshape, transpose, qkv_matmul, v_transpose, v_reshape, num_heads, head_size, hidden_size, logger)) { + DEBUG_LOG("CheckNodesInPathV return false"); + return false; + } + + if (!optimizer_utils::CheckOutputEdges(graph, v_split, 3)) { + DEBUG_LOG("Output edge count not expected for nodes in path v"); + return false; + } + + // Find input mask. Unsqueeze -> Unsqueeze -> (Cast) -> Sub -> Mul -> Add -> Softmax + AttentionMaskNodes mask_nodes; + if (!MatchInputMaskSubgraph(graph, qkv_matmul, mask_nodes, logger)) { + DEBUG_LOG("MatchInputMaskSubgraph returns false"); + return false; + } + + MatchUnidirMaskResult unidir_mask_result; + if (!MatchUnidirMaskSubgraph(graph, *(mask_nodes.add), unidir_mask_result, logger)) { + DEBUG_LOG("MatchUnidirMaskSubgraph returns NULL"); + return false; + } + + // path to q + std::vector q_path{ + {0, 0, "MatMul", {1, 9, 13}, kOnnxDomain}, + {0, 0, "Transpose", {1, 13}, kOnnxDomain}, + {0, 0, "Reshape", {5, 13}, kOnnxDomain}, + {0, 0, "Split", {2, 11, 13}, kOnnxDomain}}; + + const Node* qk_div = unidir_mask_result.div_node; + if (!graph_utils::FindPath(*qk_div, true, q_path, edges, logger)) { + DEBUG_LOG("Failed to find path for q"); + return false; + } + + const Node& qk_matmul = edges[0]->GetNode(); + const Node& q_transpose = edges[1]->GetNode(); + const Node& q_reshape = edges[2]->GetNode(); + const Node& q_split = edges[3]->GetNode(); + if (q_split.Index() != v_split.Index()) { + DEBUG_LOG("q and v are not from same Split node"); + return false; + } + + if (!CheckNodesInPathQ(graph, *qk_div, q_reshape, q_transpose, num_heads, head_size, logger)) { + DEBUG_LOG("CheckNodesInPathQ returns false"); + return false; + } + + const Node* k_concat = nullptr; + if (has_past) { + k_concat = graph_utils::GetInputNode(qk_matmul, 1); + if (k_concat == nullptr || !graph_utils::IsSupportedOptypeVersionAndDomain(*k_concat, "Concat", {4, 11, 13}, kOnnxDomain)) { + return false; + } + } + + // path to k + std::vector k_path{ + {0, 1, "Transpose", {1, 13}, kOnnxDomain}, + {0, 0, "Reshape", {5, 13}, kOnnxDomain}, + {1, 0, "Split", {2, 11, 13}, kOnnxDomain}}; + + if (!graph_utils::FindPath(has_past ? *k_concat : qk_matmul, true, k_path, edges, logger)) { + DEBUG_LOG("Failed to find path for k"); + return false; + } + + const Node& k_transpose = edges[0]->GetNode(); + const Node& k_reshape = edges[1]->GetNode(); + const Node& k_split = edges[2]->GetNode(); + if (k_split.Index() != v_split.Index()) { + DEBUG_LOG("k and v are not from same Split node"); + return false; + } + + if (!CheckNodesInPathK(graph, k_reshape, k_transpose, num_heads, head_size, logger)) { + DEBUG_LOG("CheckNodesInPathK returns false"); + return false; + } + + MatchPastResult past_result; + if (has_past && !MatchPastSubgraph(graph, *k_concat, *v_concat, past_result, logger)) { + DEBUG_LOG("MatchPastSubgraph returns false"); + return false; + } + + // Now everything is ready, we will start fusing subgraph. + NodeArg* mask_input = graph.GetNode(mask_nodes.unsqueeze_1->Index())->MutableInputDefs()[0]; + NodeArg* mask_int32 = GetOrCreateMaskInt32(graph, mask_input, mask_int32_map, layer_norm.GetExecutionProviderType()); + + NodeArg* qkv_weights = graph.GetNode(gemm0_result.gemm->Index())->MutableInputDefs()[1]; + NodeArg* qkv_bias = graph.GetNode(gemm0_result.gemm->Index())->MutableInputDefs()[2]; + + // Create Attention Node. + std::vector input_defs{layer_norm.MutableOutputDefs()[0], qkv_weights, qkv_bias, mask_int32}; + std::vector output_defs{graph.GetNode(reshape.Index())->MutableOutputDefs()[0]}; + + if (has_past) { + input_defs.push_back(past_result.past); + output_defs.push_back(past_result.present); + } + + Node& attention_node = graph.AddNode( + graph.GenerateNodeName("Attention"), + "Attention", + "Fused Attention subgraphs ", + input_defs, + output_defs, + nullptr, + kMSDomain); + attention_node.AddAttribute("num_heads", num_heads); + attention_node.AddAttribute("unidirectional", (int64_t)1); + + // Assign provider to this new node. + attention_node.SetExecutionProviderType(layer_norm.GetExecutionProviderType()); + + // Remove nodes that are not used anymore. + std::vector nodes_to_remove{ + reshape.Index(), + transpose.Index(), + qkv_matmul.Index(), + v_transpose.Index(), + v_reshape.Index(), + v_split.Index(), + qk_div->Index(), + qk_matmul.Index(), + q_transpose.Index(), + q_reshape.Index(), + k_transpose.Index(), + k_reshape.Index()}; + + nodes_to_remove.insert(nodes_to_remove.end(), unidir_mask_result.node_indices.begin(), unidir_mask_result.node_indices.end()); + nodes_to_remove.insert(nodes_to_remove.end(), gemm0_result.node_indices.begin(), gemm0_result.node_indices.end()); + if (has_past) { + nodes_to_remove.insert(nodes_to_remove.end(), past_result.node_indices.begin(), past_result.node_indices.end()); + } + SetMaskNodesToRemove(graph, mask_nodes, nodes_to_remove); + + for (const auto& node_index : nodes_to_remove) { + Node* node = graph.GetNode(node_index); + graph_utils::RemoveNodeOutputEdges(graph, *node); + graph.RemoveNode(node->Index()); + } + + DEBUG_LOG("Fused an attention node for GPT."); + return true; +} + +}; // namespace AttentionFusionHelper + +} // namespace onnxruntime diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index 37e8f95465..6ab3de3888 100644 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -1718,6 +1718,24 @@ TEST_F(GraphTransformationTests, AttentionFusionFloat32Test) { ValidateAttention(graph); } +// Test GPT-2 Attention Fusion with float32 mask +TEST_F(GraphTransformationTests, AttentionFusionGPTWithPastAndMaskTest) { + auto model_uri = MODEL_FOLDER "fusion/gpt2_past_mask_one_layer.onnx"; + std::shared_ptr p_model; + ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_)); + Graph& graph = p_model->MainGraph(); + + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + auto ret = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2, *logger_); + ASSERT_TRUE(ret.IsOK()); + + std::map op_to_count = CountOpsInGraph(graph); + EXPECT_EQ(op_to_count["Transpose"], 0); + EXPECT_EQ(op_to_count["Softmax"], 0); + EXPECT_EQ(op_to_count["Attention"], 1); +} + TEST_F(GraphTransformationTests, GeluFusionTest) { auto model_uri = MODEL_FOLDER "fusion/gelu.onnx"; std::shared_ptr p_model; diff --git a/onnxruntime/test/testdata/transform/fusion/gpt2_past_mask_one_layer.onnx b/onnxruntime/test/testdata/transform/fusion/gpt2_past_mask_one_layer.onnx new file mode 100644 index 0000000000000000000000000000000000000000..bba15177ce30378af6c38a089ebe542e054ab8ff GIT binary patch literal 10363 zcmcIqd3;k<_9y9*^wHK5DMDDp00jb+x95fCzLtXGR2Byi{h;&7CoRE7yM!iHK-41Q z2tQnY;({ZNjEs(gxZ(nR_fc`g1yM#FWn^Sz+&35%#%1QGzjI#lUS8US;SZ<%e3F-Y z&%O7Yd(QW4iM!b2iZ4sHcQiG73%F`&${TYSo+*w;63H;j6bM4V122lVWJV;}9PJ46 zystR*=Lc z@T}l1J+CdXq%#^_4m--e66;fn`)HyLxOXuuQz%G5k-;UnPV|;cYj10cB*T2bSDbpx zmUj|U3IfKGvGz7(9aoCDQt~o$qluIRiqDmPW`4m8PvWI4ZtrM~c2r*;k5*qEja}TF z40l8mvE@-*FTr{)n2|1+TROk(#_&>Pyp%xZSb@!dY_iX>uoREYC|KZfA3F?K

}f z!UCT47EfKYC@k>40(fV795BPPjL$A*b2VGt(iY}=i8?YTO=B#QNM8o*F9f`mGb78Q z9kb!WA}z7y5#y!>!RP5a40h#;#uKrY_O{B0o*x_70rj7F_^UncR%6k`gCZdZ* zT3d`vbT(ex5s5b^MlwS|0^2DFKp~=CiGrmiA-@d*8PH0K-3EpHHfWs> zXiH;22-q;d3wZki-oD6Y&R24)3orm=czz)3{9s-TNU%Z*;EQ6Q0k1-agVI0{+1x;| z*cL^TZILtC_9GVvD-rwyFXHYZnz@J~w#1sEVUY(~d}1czBzO>B40@S)Y0Qd(&y{}e zjaVc^`FOj0qL^2{6vmMc33Vue4o%TOhq*958fXRMMhr!Y8H|Z?24fGQ=}RIrzPDLN zlF7E}rm*pr4fe4oBFzSZBzavkqOGlAG2ko22mdKdQKTi_9I5Qrud5bKy__j*j3zUm z%a~$g|EY#&_M0E;Q;r&il2F$&T8GTpgtGaoDyHbt=(1|Cc!2{tfwR#`HV1=lp98ln zaG-|ymfM%O+-~0qZXbZ?V!)IkQs83Hg&=}YPwPU0_>$H|P6#vO7#G+^hrt`Kfb-)O zoAZ;vUQC~hs#eg76@Z|PH!uB!l|$M9Pe4mY0+f!Wi)wLo~D*Nx&BXzJOR0 zz=+eL$%0)p34DPd$r;gPF^_0?-!hN;fyM<{@D|VOYz)hiuK?a5eB&5SNLga!ceb;b zK2gZPsSSL%C*TCG2Hc^{qFAz_r=TSa>eTRE#$?c-LnZ@)@raiYKoJA7wZu{_;1I@f zGT?Ph!*ojy`U>&EU&1((?So4&t*SyQ!&uOL$(UmZ0bt;K$i<@J0P8C$n( zQwb78HyFbS@TBwF7bjaIOT!8W=AL@CvP*D5&a4a^qqqP$iJ5^bd6Wi@@v=xuXVh@o z0vd{7OGcG!#|U#s0I0WIVOaT3j?wUUXzBvmEha%}9V?jtYC7#7?RgivR1h3qFeMTc zaQA7Ks>qg0#ZaQ4w@|=)*|-92S)X#bYXp)4P!n0;N(76w5^%*9&;TGX2Lxa}>Fi1g zntlra0^|4s=Zjn>y9+|nldIYR%K|c><+HN8;0gU^>U`D$XwdSTcs>iaDwCC|?$GgZZ;p&XCHG5_QN6UM6jv9GfR| z9Rn<}#+4YF6ute;%8g?Im}VNW4e~pY&!6ALJK8VlRpI2z?+j8~h(SgnG#dFG$MQc< zf`?-9co6~`h``y!vN?g_;HjWsLkcA0y)LZhI95#gt7lPafzMgc@x8?D6H1}zY+$+)wiZ?O*9t;srv>6?-2O@Iw0N5p(jl+chQloNBJ zFvnr_jE~GDoP~%aLL%7TEbTZBf_qN%qmY;yicmXe37Iq08~2=oKWYwIvSK-QuQRU>Kd3E*lV-?x%pyLEiUs zRCXwkjCUs+1`@Cd#M?@?nMi?=5K@%VvmoD`5pC%Vb3EsBo1ge0sXob!bLaCoYipBC zDQjwPt!|Aqb+jiSbHS_!Hp1O91`a*Kw3VQNC}`jcs#CEUI5YXN!g>7=YiQeZMT>4P`u?H5Mv73v;t|Rh%Hc2LF*2poGFY!v(JVxY>3obsVbD#$l>DMjc~WI77L;j&Y)HIv*9cV*&1jV)ojljpuJ}a1pyWVC{iF9^l^*;3q3uf zV~Ta6kqv{}FSi8~9@;DUG+-p)pfmw`ZRJ){4A{-rZ?W3!#%Wj+h*Gc9^6MKc56JOF z_LRh-PYuOd7~G`kSfPcMTkEqSwScaL*{AWOT1g(WJb~me-V#fOMKpALvM>QPnA2au z6eV!9t`PsBA>*kqzMhzLNh~=jx-{P2k@OFAJ1SiDM$f*%ZB`n#Q7PxBoO3kL;4bZ# z`m=U`a=*dt>^sUgxYL9EY_vjyyTGI^uW$fgP^dQ)_NyDV$3~TlC&^Q!1GS zp87~7vr2=zqAzVQ7cIxy_MPA^gxi8%8{054z215=g|v1%XX5%ks!iFSsWQsK})Zp@SDOXYyIhCp@(H#W(|VY%_28LDcY1_QG(uj%Ljdu z9F#IJlUBsl)}Asfw>UKS48WaPgN7tfJKBy(4RJZH8%i`AU3x#L;H;B>GEatz&KkVh zpV~1?SLvY~{6Q|5;}E<@%g)S`CAddR=G2ag{l3#fc6rUEN{)+#SZVE-ktArp*ir5R zv~lPcGz>QNV!m7K4Rt$HK#&_M%t4dLG(ABOPr~#bak?Gw4}TR7Ul-u5tJd5o_dCv? zh{l}HM`NGQMbmq}H%+d2J59cMljcP8UA6Uns>R-oSAW=D3RlGz96he~TvNR3<`)m@ z@`rQv`(JVDm$pAgCv4fS@7%DF9xV*fE1w=h6W2zxrvgN;&sTQ$IpCPXU1*%qG zwZ7|5tM&P_UZjtYenOX?jH;LKn@mIDx72qx@Ty}^o!;?7jT*jh;I0u%p3xRCPY}o9 zLF(AHq`LaS_3C;4s@e~CPEwEmYb9;GeZLyL<&{v)w`q5#xe%@J2$&SBA)FTrs=%vd-`riGG>Z0Ry`g0o= zYq#(Ci#qkt)#_)vHtD){in`2qi+cURJ*0W*Ub0}+3{5}0O07Tnubuq05CNEcwu3uO+NWY?BFU@;zy|XEQ0gmQ*`wu7k%NTRoeV7;@Yh827TbZf0LFak?z(vkJfbFHi8!4P^+#!|9|K! zXK8fovm59LZ;MucQbnl$F(>E~5A1Rt`=W=;;9nveuPmU~U%Q6Xw0%LpyvASi^4g6x z%`&5JeEWWFLf2F(|7w^z;Lr=?p6jO2KZZ&|YhIM-FJ8Ts^sI2`kF>0zv%mj7v}f}) z?Y60euK4}T?nUQ5LNxhtx?|@&+P~@>Qqd77Yron^D#y>DUzM$+^vE=__>vn#L#1;_ z+3vwK{^OZ++oPMi8>{|J=)^L$=AYx#bJkU9$8K=atFGTiznd2)<>iBR2Clo8ZY+O| zK0k5`b^T#|_sknr?Sq+Ts8?KdjXL11)AY_!Yt%RE0(#*+tMq$|_R@8S=IJlre1Kj* z=P_;0_Ca(`Nm6~cc%6E9=kN4pWrbd!r=aLv-uu;*P0hVTA=9f0$*l9Aq#tz`Y5CUo zfzT69cZIP9B?(SU(3Ly4kx~7FnycV$`fYPb$WyYL z{(1Hq@>hkD$DeVKQIFP=S%a35e>^;d9vridl z;}4#y4yfL*y}7!QupLL~sL$`BJwvZ0`_COgZiqcZPdVn)3)$DYSA8&=TsW{x8#L{{ z(5fk=x-ar6rDgY#>sP%Rn)=*V+Uz|yk;y%ky8ht-ZRN=K$)7&@j!b{}Y100}2JP;a z`@1!=P@D0^E###K?j&d5x|K}1Qy^dOI7W`|+M*R4_?U{z_teZ47tmPaRC-(O3*@GS zZ)>}vM`-;?mwWyPFn*%v>2iEJ={K5pT!a4GuUFMf`Xm@ye-_x`c^6S_&Te{e2Pz+>5&wa5ABm51q=w?E5F@atan?q->dY^v1E zWsRfh)aeVzd*gl}cYfDF_jdWHR`6%KqTo&PsP}aJ?u$a9*QQVHe&fPI^17>$c;i=) zp6ys*VUMa_ z>j!xT+Sg7n&z_w9d$Iq>Y~z_srsno2j0|V-dHBt9CQ>&RVFN;{cUJVTKaDvxt9Mu3 z*3^7?6BKf%Geff{T3Z%JBgxJV_+eT{0tm_(CK9V|Z);l$UC#<<{XnL|{0tqNSX->^ cVxStv3^qrhTX0z{dU>^Nb^w#zIjnH}KX(CzIsgCw literal 0 HcmV?d00001