diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py
index 0762506e62..dc95cbabbb 100644
--- a/onnxruntime/python/tools/quantization/calibrate.py
+++ b/onnxruntime/python/tools/quantization/calibrate.py
@@ -21,21 +21,24 @@ import re
 import subprocess
 import json
 
-
-def augment_graph(model):
+def augment_graph(model, quantization_candidates=['Conv', 'MatMul'], black_nodes=[], white_nodes=[]):
     '''
-    Adds ReduceMin and ReduceMax nodes to all Conv and MatMul nodes in
+    Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
     model and ensures their outputs are stored as part of the graph output
         parameter model: loaded FP32 ONNX model to quantize
+        parameter quantization_candidates: node op types for nodes to be quantized.
+                                           Calibraton will be done for them.
+        parameter black_nodes: nodes with these names will be force ignored by this
+                               calibration augmentation, no mather what's their op type.
+        parameter white_nodes: nodes with these names will be force to be calibration augmented.
         return: augmented ONNX model
     '''
-    # Candidate nodes for quantization. Calibration will be done for these nodes only
-    # When more nodes are extended to support quantization, add them to this list
-    quantization_candidates = ['Conv', 'MatMul']
+
     added_nodes = []
     added_outputs = []
     for node in model.graph.node:
-        if node.op_type in quantization_candidates:
+        should_be_calibrate = ((node.op_type in quantization_candidates) and (node.name not in black_nodes)) or (node.name in white_nodes)
+        if should_be_calibrate:
             input_name = node.output[0]
             # Adding ReduceMin nodes
             reduce_min_name = ''
@@ -217,6 +220,15 @@ def main():
     parser = argparse.ArgumentParser(description='parsing model and test data set paths')
     parser.add_argument('--model_path', required=True)
     parser.add_argument('--dataset_path', required=True)
+    parser.add_argument('--force_fusions', default=False, action='store_true')
+    parser.add_argument('--op_types', type=str, default='Conv,MatMul',
+                        help='comma delimited operator types to be calibrated and quantized')
+    parser.add_argument('--black_nodes', type=str, default='',
+                        help='comma delimited operator names that should not be quantized')
+    parser.add_argument('--white_nodes', type=str, default='',
+                        help='comma delimited operator names force to be quantized')
+    parser.add_argument('--augmented_model_path', type=str, default = 'augmented_model.onnx',
+                        help='save augmented model to this file for verification purpose')
     parser.add_argument('--output_model_path', type=str, default='calibrated_quantized_model.onnx')
     parser.add_argument('--dataset_size',
                         type=int,
@@ -228,6 +240,9 @@ def main():
                         choices=['preprocess_method1', 'preprocess_method2', 'None'],
                         help="Refer to Readme.md for guidance on choosing this option.")
     args = parser.parse_args()
+    calibrate_op_types = args.op_types.split(',')
+    black_nodes = args.black_nodes.split(',')
+    white_nodes = args.white_nodes.split(',')
     model_path = args.model_path
     output_model_path = args.output_model_path
     images_folder = args.dataset_path
@@ -235,25 +250,25 @@ def main():
     size_limit = args.dataset_size
 
     # Generating augmented ONNX model
-    augmented_model_path = 'augmented_model.onnx'
     model = onnx.load(model_path)
-    augmented_model = augment_graph(model)
-    onnx.save(augmented_model, augmented_model_path)
+    augmented_model = augment_graph(model, calibrate_op_types, black_nodes, white_nodes)
+    onnx.save(augmented_model, args.augmented_model_path)
 
     # Conducting inference
-    session = onnxruntime.InferenceSession(augmented_model_path, None)
+    session = onnxruntime.InferenceSession(args.augmented_model_path, None)
     (samples, channels, height, width) = session.get_inputs()[0].shape
 
     # Generating inputs for quantization
     if args.data_preprocess == "None":
         inputs = load_pb_file(images_folder, args.dataset_size, samples, channels, height, width)
     else:
-        inputs = load_batch(images_folder, height, width, size_limit, args.data_preprocess)
+        inputs = load_batch(images_folder, height, width, args.data_preprocess, size_limit)
     print(inputs.shape)
     dict_for_quantization = get_intermediate_outputs(model_path, session, inputs, calib_mode)
     quantization_params_dict = calculate_quantization_params(model, quantization_thresholds=dict_for_quantization)
     calibrated_quantized_model = quantize(onnx.load(model_path),
                                           quantization_mode=QuantizationMode.QLinearOps,
+                                          force_fusions=args.force_fusions,
                                           quantization_params=quantization_params_dict)
     onnx.save(calibrated_quantized_model, output_model_path)
 
diff --git a/onnxruntime/python/tools/quantization/data_preprocess.py b/onnxruntime/python/tools/quantization/data_preprocess.py
index 1e73010360..8b1551e8a0 100644
--- a/onnxruntime/python/tools/quantization/data_preprocess.py
+++ b/onnxruntime/python/tools/quantization/data_preprocess.py
@@ -30,7 +30,8 @@ def preprocess_method1(image_filepath, height, width):
         parameter width: image width in pixels
         return: matrix characterizing image
     '''
-    pillow_img = Image.open(image_filepath).resize((width, height))
+    pillow_img = Image.new("RGB", (width, height))
+    pillow_img.paste(Image.open(image_filepath).resize((width, height)))
     input_data = np.float32(pillow_img) / 127.5 - 1.0  # normalization
     input_data -= np.mean(input_data)  # normalization
     nhwc_data = np.expand_dims(input_data, axis=0)
@@ -47,7 +48,8 @@ def preprocess_method2(image_filepath, height, width):
         parameter width: image width in pixels
         return: matrix characterizing image
     '''
-    pillow_img = Image.open(image_filepath).resize((width, height))
+    pillow_img = Image.new("RGB", (width, height))
+    pillow_img.paste(Image.open(image_filepath).resize((width, height)))
     input_data = np.float32(pillow_img) - \
         np.array([123.68, 116.78, 103.94], dtype=np.float32)
     nhwc_data = np.expand_dims(input_data, axis=0)
diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py
index 5983de3ac7..83f1ae711b 100644
--- a/onnxruntime/python/tools/quantization/quantize.py
+++ b/onnxruntime/python/tools/quantization/quantize.py
@@ -15,6 +15,7 @@ from onnx import shape_inference
 __producer__ = "onnx.quantize"
 __version__ = "0.1.0"
 onnx_domain = "ai.onnx"
+ms_domain = "com.microsoft"
 onnx_op_set_version = 11
 
 type_to_name = {
@@ -314,6 +315,8 @@ class ONNXQuantizer:
                     new_list += self._quantize_matmul(node, new_list)
                 elif node.op_type == 'Gather' and self._is_valid_quantize_value(node.input[0]):
                     new_list += self._quantize_gather_ops(node, new_list)
+                elif node.op_type == 'Add' or node.op_type == 'Mul':
+                    new_list += self._quantize_binary_math_ops(node, new_list)
                 elif node.op_type == 'Relu' or node.op_type == 'Clip':
                     new_list += self._handle_activation_ops(node, new_list)
                 else:
@@ -875,7 +878,6 @@ class ONNXQuantizer:
                      List of scale names used for input quantization,
                      List of new QuantizeLinear nodes created)
         '''
-        assert (node.op_type == "Conv" or node.op_type == "MatMul" or node.op_type == "Gather")
 
         quantized_input_names = []
         zero_point_names = []
@@ -1018,6 +1020,61 @@ class ONNXQuantizer:
 
         return []
 
+    def _quantize_binary_math_ops(self, node, new_nodes_list):
+        '''
+        Used when self.mode is QuantizationMode.QLinearOps.
+        Quantize the given binary math op, like Add, Mul, etc, to QLinearAdd, QLinearMul...
+
+            parameter node: Current binary math node
+            parameter new_nodes_list: List of new nodes created before processing current node
+            return: List of nodes in topological order that represents quantized binary math node
+        '''
+        if self.mode is not QuantizationMode.QLinearOps:
+            return self._handle_other_ops(node, new_nodes_list)
+
+        data_found, output_scale_name, output_zp_name, _, _ = \
+            self._get_quantization_params(node.output[0])
+        if (not data_found): # only try to quantize when given quantization parameters for it
+            return self._handle_other_ops(node, new_nodes_list)
+
+        (quantized_input_names, zero_point_names, scale_names, nodes) = \
+            self._quantize_inputs(node, [0, 1], new_nodes_list)
+
+        qlinear_binary_math_output = node.output[0] + "_quantized"
+        qlinear_binary_math_name = ""
+        if node.name != "":
+            qlinear_binary_math_name = node.name + "_quant"
+        kwargs = {}
+        for attribute in node.attribute:
+            kwargs.update(_attribute_to_kwarg(attribute))
+        kwargs["domain"]=ms_domain
+
+        qlinear_binary_math_inputs = []
+        # Input 0
+        qlinear_binary_math_inputs.append(quantized_input_names[0])
+        qlinear_binary_math_inputs.append(scale_names[0])
+        qlinear_binary_math_inputs.append(zero_point_names[0])
+        # Input 1
+        qlinear_binary_math_inputs.append(quantized_input_names[1])
+        qlinear_binary_math_inputs.append(scale_names[1])
+        qlinear_binary_math_inputs.append(zero_point_names[1])
+
+        # Output
+        qlinear_binary_math_inputs.append(output_scale_name)
+        qlinear_binary_math_inputs.append(output_zp_name)
+
+        qlinear_binary_math_node = onnx.helper.make_node(
+            "QLinear" + node.op_type, qlinear_binary_math_inputs,
+            [qlinear_binary_math_output], qlinear_binary_math_name, **kwargs)
+        nodes.append(qlinear_binary_math_node)
+
+        # Create an entry for this quantized value
+        q_output = QuantizedValue(node.output[0], qlinear_binary_math_output, output_scale_name,
+                                  output_zp_name, QuantizedValueType.Input)
+        self.quantized_value_map[node.output[0]] = q_output
+
+        return nodes
+
     def _quantize_gather_ops(self, node, new_nodes_list):
         assert (node.op_type == "Gather")
         (quantized_input_names, zero_point_names, scale_names, nodes) = \