From de7a868d5f3390d7c095a53c26abd39f402f3f93 Mon Sep 17 00:00:00 2001
From: Ye Wang <52801275+wangyems@users.noreply.github.com>
Date: Fri, 20 Jan 2023 15:03:50 -0800
Subject: [PATCH] Update quantization_defs.cc (#14380)

### Description
<!-- Describe your changes. -->



### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
---
 docs/ContribOperators.md                                | 2 ++
 onnxruntime/core/graph/contrib_ops/quantization_defs.cc | 4 ++++
 2 files changed, 6 insertions(+)
diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md
index 40f70e0b6b..1e6d46963c 100644
--- a/docs/ContribOperators.md
+++ b/docs/ContribOperators.md
@@ -2408,6 +2408,8 @@ This version of the operator has been available since version 1 of the 'com.micr
 #### Attributes
 
 <dl>
+<dt><tt>mask_filter_value</tt> : float</dt>
+<dd>The value to be filled in the attention mask. Default value is -10000.0f</dd>
 <dt><tt>num_heads</tt> : int (required)</dt>
 <dd>Number of attention heads</dd>
 <dt><tt>past_present_share_buffer</tt> : int</dt>
diff --git a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc
index c45b5a79e5..6111afbd5d 100644
--- a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc
+++ b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc
@@ -952,6 +952,10 @@ ONNX_MS_OPERATOR_SET_SCHEMA(
         .Attr("past_present_share_buffer", "Corresponding past and present are same tensor, its shape is "
               "(2, batch_size, num_heads, max_sequence_length, head_size)",
               AttributeProto::INT, OPTIONAL_VALUE)
+        .Attr("mask_filter_value",
+              "The value to be filled in the attention mask. Default value is -10000.0f",
+              AttributeProto::FLOAT,
+              OPTIONAL_VALUE)
         .Attr("scale",
               "Custom scale will be used if specified. Default value is 1/sqrt(head_size)",
               AttributeProto::FLOAT,