Update quantization_defs.cc (#14380)

### Description  ### Motivation and Context
2026-07-04 04:07:22 +00:00 · 2023-01-20 15:03:50 -08:00 · 2023-01-20 15:03:50 -08:00 · de7a868d5f
commit de7a868d5f
parent 2d8ee5251c
2 changed files with 6 additions and 0 deletions
--- a/docs/ContribOperators.md
+++ b/docs/ContribOperators.md
@ -2408,6 +2408,8 @@ This version of the operator has been available since version 1 of the 'com.micr
 #### Attributes

 <dl>
+<dt><tt>mask_filter_value</tt> : float</dt>
+<dd>The value to be filled in the attention mask. Default value is -10000.0f</dd>
 <dt><tt>num_heads</tt> : int (required)</dt>
 <dd>Number of attention heads</dd>
 <dt><tt>past_present_share_buffer</tt> : int</dt>
--- a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc
@ -952,6 +952,10 @@ ONNX_MS_OPERATOR_SET_SCHEMA(
        .Attr("past_present_share_buffer", "Corresponding past and present are same tensor, its shape is "
              "(2, batch_size, num_heads, max_sequence_length, head_size)",
              AttributeProto::INT, OPTIONAL_VALUE)
+        .Attr("mask_filter_value",
+              "The value to be filled in the attention mask. Default value is -10000.0f",
+              AttributeProto::FLOAT,
+              OPTIONAL_VALUE)
        .Attr("scale",
              "Custom scale will be used if specified. Default value is 1/sqrt(head_size)",
              AttributeProto::FLOAT,