diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py
index 7f1cd8fc7f..7a9e9d9197 100644
--- a/onnxruntime/python/tools/quantization/calibrate.py
+++ b/onnxruntime/python/tools/quantization/calibrate.py
@@ -39,12 +39,13 @@ class CalibrationDataReader(metaclass=abc.ABCMeta):
 
 
 class CalibraterBase:
-    def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', symmetric=False):
+    def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', symmetric=False, use_external_data_format=False):
         '''
         :param model: ONNX model to calibrate. It can be a ModelProto or a model path
         :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
         :param augmented_model_path: save augmented model to this path.
         :param symmetric: make range of tensor symmetric (central point is 0).
+        :param use_external_data_format: use external data format to store model which size is >= 2Gb
         '''
         if isinstance(model, str):
             self.model = onnx.load(model)
@@ -56,6 +57,7 @@ class CalibraterBase:
         self.op_types_to_calibrate = op_types_to_calibrate
         self.augmented_model_path = augmented_model_path
         self.symmetric = symmetric
+        self.use_external_data_format = use_external_data_format
 
         # augment graph
         self.augment_model = None
@@ -138,14 +140,15 @@ class CalibraterBase:
 
 
 class MinMaxCalibrater(CalibraterBase):
-    def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', symmetric=False):
+    def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', symmetric=False, use_external_data_format=False):
         '''
         :param model: ONNX model to calibrate. It can be a ModelProto or a model path
         :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
         :param augmented_model_path: save augmented model to this path.
         :param symmetric: make range of tensor symmetric (central point is 0).
+        :param use_external_data_format: use external data format to store model which size is >= 2Gb
         '''
-        super(MinMaxCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, symmetric)
+        super(MinMaxCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, symmetric, use_external_data_format)
         self.intermediate_outputs = []
         self.calibrate_tensors_range = None
         self.num_model_outputs = len(self.model.graph.output)
@@ -195,7 +198,7 @@ class MinMaxCalibrater(CalibraterBase):
 
         model.graph.node.extend(added_nodes)
         model.graph.output.extend(added_outputs)
-        onnx.save(model, self.augmented_model_path)
+        onnx.save(model, self.augmented_model_path, save_as_external_data=self.use_external_data_format)
         self.augment_model = model
 
     def clear_collected_data(self):
@@ -268,7 +271,6 @@ class MinMaxCalibrater(CalibraterBase):
             else:
                 pairs.append(tuple([min_value, max_value]))
 
-
         new_calibrate_tensors_range = dict(zip(calibrate_tensor_names, pairs))
         if self.calibrate_tensors_range:
             self.calibrate_tensors_range = self.merge_range(self.calibrate_tensors_range, new_calibrate_tensors_range)
@@ -282,6 +284,7 @@ class HistogramCalibrater(CalibraterBase):
                  model,
                  op_types_to_calibrate=[],
                  augmented_model_path='augmented_model.onnx',
+                 use_external_data_format=False,
                  method='percentile',
                  symmetric=False,
                  num_bins=128,
@@ -291,13 +294,14 @@ class HistogramCalibrater(CalibraterBase):
         :param model: ONNX model to calibrate. It can be a ModelProto or a model path
         :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
         :param augmented_model_path: save augmented model to this path.
+        :param use_external_data_format: use external data format to store model which size is >= 2Gb
         :param method: A string. One of ['entropy', 'percentile'].
         :param symmetric: make range of tensor symmetric (central point is 0).
         :param num_bins: number of bins to create a new histogram for collecting tensor values.
         :param num_quantized_bins: number of quantized bins. Default 128.
         :param percentile: A float number between [0, 100]. Default 99.99.
         '''
-        super(HistogramCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path)
+        super(HistogramCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, use_external_data_format)
         self.intermediate_outputs = []
         self.calibrate_tensors_range = None
         self.num_model_outputs = len(self.model.graph.output)
@@ -327,7 +331,7 @@ class HistogramCalibrater(CalibraterBase):
 
         model.graph.node.extend(added_nodes)
         model.graph.output.extend(added_outputs)
-        onnx.save(model, self.augmented_model_path)
+        onnx.save(model, self.augmented_model_path, save_as_external_data=self.use_external_data_format)
         self.augment_model = model
 
     def clear_collected_data(self):
@@ -343,7 +347,6 @@ class HistogramCalibrater(CalibraterBase):
                 break
             self.intermediate_outputs.append(self.infer_session.run(None, inputs))
 
-
         if len(self.intermediate_outputs) == 0:
             raise ValueError("No data is collected.")
 
@@ -384,6 +387,7 @@ class EntropyCalibrater(HistogramCalibrater):
                  model,
                  op_types_to_calibrate=[],
                  augmented_model_path='augmented_model.onnx',
+                 use_external_data_format=False,
                  method='entropy',
                  symmetric=False,
                  num_bins=128,
@@ -392,19 +396,21 @@ class EntropyCalibrater(HistogramCalibrater):
         :param model: ONNX model to calibrate. It can be a ModelProto or a model path
         :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
         :param augmented_model_path: save augmented model to this path.
+        :param use_external_data_format: use external data format to store model which size is >= 2Gb
         :param method: A string. One of ['entropy', 'percentile'].
         :param symmetric: make range of tensor symmetric (central point is 0).
         :param num_bins: number of bins to create a new histogram for collecting tensor values.
         :param num_quantized_bins: number of quantized bins. Default 128.
         '''
-        super(EntropyCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, method=method,
-                                                symmetric=symmetric, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
+        super(EntropyCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, use_external_data_format,
+                                                method=method, symmetric=symmetric, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
 
 class PercentileCalibrater(HistogramCalibrater):
     def __init__(self,
                  model,
                  op_types_to_calibrate=[],
                  augmented_model_path='augmented_model.onnx',
+                 use_external_data_format=False,
                  method='percentile',
                  symmetric=False,
                  num_bins=2048,
@@ -413,13 +419,14 @@ class PercentileCalibrater(HistogramCalibrater):
         :param model: ONNX model to calibrate. It can be a ModelProto or a model path
         :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
         :param augmented_model_path: save augmented model to this path.
+        :param use_external_data_format: use external data format to store model which size is >= 2Gb
         :param method: A string. One of ['entropy', 'percentile'].
         :param symmetric: make range of tensor symmetric (central point is 0).
         :param num_quantized_bins: number of quantized bins. Default 128.
         :param percentile: A float number between [0, 100]. Default 99.99.
         '''
-        super(PercentileCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, method=method,
-                                                   symmetric=symmetric, num_bins=num_bins, percentile=percentile)
+        super(PercentileCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, use_external_data_format,
+                                                   method=method, symmetric=symmetric, num_bins=num_bins, percentile=percentile)
 
 class CalibrationDataCollector(metaclass=abc.ABCMeta):
     """
@@ -635,13 +642,13 @@ class HistogramCollector(CalibrationDataCollector):
         #        <--- quantized bins ---->
         # |======|===========|===========|=======|
         #              zero bin index
-        #        ^                       ^        
+        #        ^                       ^
         #        |                       |
-        #   start index               end index          (start of iteration) 
-        #     ^                             ^        
+        #   start index               end index          (start of iteration)
+        #     ^                             ^
         #     |                             |
         #  start index                  end index               ...
-        # ^                                      ^        
+        # ^                                      ^
         # |                                      |
         # start index                    end index       (end of iteration)
 
@@ -703,23 +710,40 @@ def create_calibrator(model,
                       op_types_to_calibrate=[],
                       augmented_model_path='augmented_model.onnx',
                       calibrate_method=CalibrationMethod.MinMax,
+                      use_external_data_format=False,
                       extra_options={}):
 
     if calibrate_method == CalibrationMethod.MinMax:
         # default settings for min-max algorithm
         symmetric = False if 'symmetric' not in extra_options else extra_options['symmetric']
-        return MinMaxCalibrater(model, op_types_to_calibrate, augmented_model_path, symmetric=symmetric)
+        return MinMaxCalibrater(
+            model, op_types_to_calibrate, augmented_model_path,
+            use_external_data_format=use_external_data_format,
+            symmetric=symmetric
+        )
     elif calibrate_method == CalibrationMethod.Entropy:
         # default settings for entropy algorithm
         num_bins = 128 if 'num_bins' not in extra_options else extra_options['num_bins']
         num_quantized_bins = 128 if 'num_quantized_bins' not in extra_options else extra_options['num_quantized_bins']
         symmetric = False if 'symmetric' not in extra_options else extra_options['symmetric']
-        return EntropyCalibrater(model, op_types_to_calibrate, augmented_model_path, symmetric=symmetric, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
+        return EntropyCalibrater(
+            model, op_types_to_calibrate, augmented_model_path,
+            use_external_data_format=use_external_data_format,
+            symmetric=symmetric,
+            num_bins=num_bins,
+            num_quantized_bins=num_quantized_bins
+        )
     elif calibrate_method == CalibrationMethod.Percentile:
         # default settings for percentile algorithm
         num_bins = 2048 if 'num_bins' not in extra_options else extra_options['num_bins']
         percentile = 99.999 if 'percentile' not in extra_options else extra_options['percentile']
         symmetric = True if 'symmetric' not in extra_options else extra_options['symmetric']
-        return PercentileCalibrater(model, op_types_to_calibrate, augmented_model_path, symmetric=symmetric, num_bins=num_bins, percentile=percentile)
+        return PercentileCalibrater(
+            model, op_types_to_calibrate, augmented_model_path,
+            use_external_data_format=use_external_data_format,
+            symmetric=symmetric,
+            num_bins=num_bins,
+            percentile=percentile
+        )
 
     raise ValueError('Unsupported calibration method {}'.format(calibrate_method))
diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py
index 96bc6ebc32..fe099cdf37 100644
--- a/onnxruntime/python/tools/quantization/quantize.py
+++ b/onnxruntime/python/tools/quantization/quantize.py
@@ -236,7 +236,13 @@ def quantize_static(model_input,
     model = load_model(Path(model_input), optimize_model, False)
 
     calib_extra_options = {} if 'CalibTensorRangeSymmetric' not in extra_options else {'symmetric': extra_options['CalibTensorRangeSymmetric']} 
-    calibrator = create_calibrator(model, op_types_to_quantize, calibrate_method=calibrate_method, extra_options=calib_extra_options)
+    calibrator = create_calibrator(
+        model,
+        op_types_to_quantize,
+        calibrate_method=calibrate_method,
+        use_external_data_format=use_external_data_format,
+        extra_options=calib_extra_options
+    )
     calibrator.collect_data(calibration_data_reader)
     tensors_range = calibrator.compute_range()