From e745575187acd64a1f7ce2b321e05ed6985363f6 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Fri, 1 Sep 2023 08:18:50 -0700 Subject: [PATCH] fix assert error in attention fusion script (#17375) Add a check of num_heads and hidden_size to avoid assert error (https://github.com/microsoft/onnxruntime/issues/17254) --- onnxruntime/python/tools/transformers/fusion_attention.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py index 31496c5052..5bcbce1df8 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_attention.py @@ -1166,6 +1166,13 @@ class FusionAttention(Fusion): attention_last_node = reshape_qkv if einsum_node is None else transpose_qkv q_num_heads, q_hidden_size = self.get_num_heads_and_hidden_size(reshape_q) + if q_num_heads <= 0 or q_hidden_size <= 0: + logger.warning( + "Failed to detect num_heads and hidden_size for Attention fusion. " + "Please specify those parameters in argument." + ) + return + # number of heads are same for all the paths, hence to create attention node, we pass the q_num_heads # the input_hidden_size represents the input hidden size, this is used as needed but hidden sizes for Q, K are extracted appropriately new_node = self.create_attention_node(