diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py
index 3c9b319c78..184fe54a3f 100644
--- a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py
+++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py
@@ -165,10 +165,13 @@ def get_qnn_qdq_config(
         "WeightSymmetric": weight_symmetric,
     }
 
-    # TODO: Remove this extra option once ORT uses an ONNX version that supports 16-bit Q/DQ ops.
-    overrides_have_int16 = any(t in Q16_TYPES for t in overrides_helper.get_quant_types())
-    if activation_type in Q16_TYPES or weight_type in Q16_TYPES or overrides_have_int16:
-        extra_options["UseQDQContribOps"] = True
+    # ONNX opset < 21 does not support 16-bit quantization, so must use 'com.microsoft' domain
+    # on Q/DQ operators if using 16-bit quantization.
+    onnx_opset = next(x for x in model.opset_import if x.domain == "" or x.domain == "ai.onnx")
+    if onnx_opset.version < 21:
+        overrides_have_int16 = any(t in Q16_TYPES for t in overrides_helper.get_quant_types())
+        if activation_type in Q16_TYPES or weight_type in Q16_TYPES or overrides_have_int16:
+            extra_options["UseQDQContribOps"] = True
 
     return StaticQuantConfig(
         calibration_data_reader,
diff --git a/onnxruntime/python/tools/quantization/qdq_quantizer.py b/onnxruntime/python/tools/quantization/qdq_quantizer.py
index 2416cf970e..724c3549e2 100644
--- a/onnxruntime/python/tools/quantization/qdq_quantizer.py
+++ b/onnxruntime/python/tools/quantization/qdq_quantizer.py
@@ -187,20 +187,22 @@ class QDQQuantizer(BaseQuantizer):
 
         self.qdq_op_domain = ms_domain if extra_options.get("UseQDQContribOps", False) else None
 
-        # The ONNX spec does not yet support 16-bit Q/DQ ops. So, must override the Q/DQ op domain to 'com.microsoft'
-        # if the activation or weight types are 16-bit integers.
-        # TODO: Remove this override (and use only the 'UseQDQContribOps' option) if/when ONNX adds 16-bit support.
-        int16_types = (TensorProto.UINT16, TensorProto.INT16)
-        overrides_have_int16 = any(t.tensor_type in int16_types for t in self.tensor_quant_override_qtypes)
-        if not self.qdq_op_domain and (
-            self.activation_qType in int16_types or self.weight_qType in int16_types or overrides_have_int16
-        ):
-            logging.warning(
-                "ONNX QuantizeLinear and DequantizeLinear operators do not support 16-bit integer quantization types. "
-                f"The domain of QuantizeLinear and DequantizeLinear operators will be set to '{ms_domain}' to "
-                "enable support."
-            )
-            self.qdq_op_domain = ms_domain
+        # The ONNX spec did not support 16-bit Q/DQ ops before opset 21.
+        # So, may have to override the Q/DQ op domain to 'com.microsoft' if the activation or weight types
+        # are 16-bit integers.
+        if self.opset_version < 21:
+            int16_types = (TensorProto.UINT16, TensorProto.INT16)
+            overrides_have_int16 = any(t.tensor_type in int16_types for t in self.tensor_quant_override_qtypes)
+            if not self.qdq_op_domain and (
+                self.activation_qType in int16_types or self.weight_qType in int16_types or overrides_have_int16
+            ):
+                logging.warning(
+                    "ONNX QuantizeLinear and DequantizeLinear operators do not support "
+                    "16-bit integer quantization types prior to opset 21. "
+                    f"The domain of QuantizeLinear and DequantizeLinear operators will be set to '{ms_domain}' to "
+                    "enable support."
+                )
+                self.qdq_op_domain = ms_domain
 
         self.quantization_params = self.calc_graph_quant_params()
 
diff --git a/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py b/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py
index ff97e04fb7..8691471b04 100644
--- a/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py
+++ b/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py
@@ -52,7 +52,7 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
             "OUT": (0, np.float32(0.005075461231172085)),
         }
 
-    def build_float32_model(self):
+    def build_float32_model(self, opset=13):
         #    (input)
         #       |
         #    Sigmoid
@@ -72,11 +72,13 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
         graph = onnx.helper.make_graph(
             [sigmoid_node, conv_node], "test", [inp], [out], initializer=[wgt_init, bias_init]
         )
-        model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", 13)])
+        model = onnx.helper.make_model(graph, opset_imports=[onnx.helper.make_opsetid("", opset)])
         onnx.save(model, "model.onnx")
 
-    def perform_qdq_quantization(self, output_model_name, extra_options=None, per_channel=False, activation_type=None):
-        self.build_float32_model()
+    def perform_qdq_quantization(
+        self, output_model_name, extra_options=None, per_channel=False, activation_type=None, opset=13
+    ):
+        self.build_float32_model(opset)
 
         if activation_type is None:
             activation_type = self.default_act_qtype
@@ -428,8 +430,9 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
 
     def test_16bit_overrides_set_ms_domain(self):
         """
-        Test that overriding a tensor to 16bit (when default is 8bit) automatically sets the 'com.microsoft'
-        domain on DQ and Q ops.
+        Test that overriding a tensor to 16bit (when default is 8bit) automatically
+        sets the 'com.microsoft' domain on DQ and Q ops for opset < 21.
+        Before ONNX 1.16.0, we had to use the 'com.microsoft' domain to be able to use 16-bit quantization.
         """
         qdq_model_name = "model_quant_overrides_to_16bit.onnx"
         inp_zp, _, sig_out_zp, _, _, _, _, _, out_zp, _ = self.perform_qdq_quantization(
@@ -441,6 +444,7 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
                     "SIG_OUT": [{"quant_type": QuantType.QUInt16}],
                 }
             },
+            opset=19,
         )
 
         # Input and Sigmoid's output should be overridden to 16bit
@@ -456,6 +460,38 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
             if node.op_type in {"QuantizeLinear", "DequantizeLinear"}:
                 self.assertEqual(node.domain, ms_domain)
 
+    def test_16bit_overrides_not_set_ms_domain(self):
+        """
+        Test that overriding a tensor to 16bit (when default is 8bit) no longer automatically
+        sets the 'com.microsoft' domain on DQ and Q ops for opset >= 21.
+        Before ONNX 1.16.0, we had to use the 'com.microsoft' domain to be able to use 16-bit quantization.
+        """
+        qdq_model_name = "model_quant_overrides_to_16bit.onnx"
+        inp_zp, _, sig_out_zp, _, _, _, _, _, out_zp, _ = self.perform_qdq_quantization(
+            qdq_model_name,
+            activation_type=onnx.TensorProto.UINT8,  # Default to 8bit activations
+            extra_options={
+                "TensorQuantOverrides": {
+                    "INP": [{"quant_type": QuantType.QUInt16}],
+                    "SIG_OUT": [{"quant_type": QuantType.QUInt16}],
+                }
+            },
+            opset=21,
+        )
+
+        # Input and Sigmoid's output should be overridden to 16bit
+        self.assertEqual(inp_zp.data_type, onnx.TensorProto.UINT16)
+        self.assertEqual(sig_out_zp.data_type, onnx.TensorProto.UINT16)
+
+        # Output should the default uint8 type
+        self.assertEqual(out_zp.data_type, onnx.TensorProto.UINT8)
+
+        # Q/DQ ops should all have the 'com.microsoft' domain
+        qdq_model = onnx.load_model(qdq_model_name)
+        for node in qdq_model.graph.node:
+            if node.op_type in {"QuantizeLinear", "DequantizeLinear"}:
+                self.assertNotEqual(node.domain, ms_domain)
+
     def test_override_validation_nonexisting_tensor(self):
         """
         Test that specifying a non-existing tensor should fail.