From de7a868d5f3390d7c095a53c26abd39f402f3f93 Mon Sep 17 00:00:00 2001 From: Ye Wang <52801275+wangyems@users.noreply.github.com> Date: Fri, 20 Jan 2023 15:03:50 -0800 Subject: [PATCH] Update quantization_defs.cc (#14380) ### Description ### Motivation and Context --- docs/ContribOperators.md | 2 ++ onnxruntime/core/graph/contrib_ops/quantization_defs.cc | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md index 40f70e0b6b..1e6d46963c 100644 --- a/docs/ContribOperators.md +++ b/docs/ContribOperators.md @@ -2408,6 +2408,8 @@ This version of the operator has been available since version 1 of the 'com.micr #### Attributes
+
mask_filter_value : float
+
The value to be filled in the attention mask. Default value is -10000.0f
num_heads : int (required)
Number of attention heads
past_present_share_buffer : int
diff --git a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc index c45b5a79e5..6111afbd5d 100644 --- a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc @@ -952,6 +952,10 @@ ONNX_MS_OPERATOR_SET_SCHEMA( .Attr("past_present_share_buffer", "Corresponding past and present are same tensor, its shape is " "(2, batch_size, num_heads, max_sequence_length, head_size)", AttributeProto::INT, OPTIONAL_VALUE) + .Attr("mask_filter_value", + "The value to be filled in the attention mask. Default value is -10000.0f", + AttributeProto::FLOAT, + OPTIONAL_VALUE) .Attr("scale", "Custom scale will be used if specified. Default value is 1/sqrt(head_size)", AttributeProto::FLOAT,