diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md index 40f70e0b6b..1e6d46963c 100644 --- a/docs/ContribOperators.md +++ b/docs/ContribOperators.md @@ -2408,6 +2408,8 @@ This version of the operator has been available since version 1 of the 'com.micr #### Attributes
+
mask_filter_value : float
+
The value to be filled in the attention mask. Default value is -10000.0f
num_heads : int (required)
Number of attention heads
past_present_share_buffer : int
diff --git a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc index c45b5a79e5..6111afbd5d 100644 --- a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc @@ -952,6 +952,10 @@ ONNX_MS_OPERATOR_SET_SCHEMA( .Attr("past_present_share_buffer", "Corresponding past and present are same tensor, its shape is " "(2, batch_size, num_heads, max_sequence_length, head_size)", AttributeProto::INT, OPTIONAL_VALUE) + .Attr("mask_filter_value", + "The value to be filled in the attention mask. Default value is -10000.0f", + AttributeProto::FLOAT, + OPTIONAL_VALUE) .Attr("scale", "Custom scale will be used if specified. Default value is 1/sqrt(head_size)", AttributeProto::FLOAT,