Add support of generating symmetric/asymmetric tensor's range for calibration (#10663)

* add support of symmetric/asymmetric range of value

* modify comment

* Update calibrate.py

* update quantize.py

* remove newline at end of file
This commit is contained in:
Chi Lo 2022-02-28 16:33:45 -08:00 committed by GitHub
parent ffde44cd09
commit d2d22f2195
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 99 additions and 23 deletions

View file

@ -15,7 +15,7 @@ from onnx import onnx_pb as onnx_proto
from six import string_types
from enum import Enum
from .quant_utils import QuantType, smooth_distribution
from .quant_utils import QuantType, smooth_distribution, apply_plot
from .registry import QLinearOpsRegistry
import abc
@ -39,11 +39,12 @@ class CalibrationDataReader(metaclass=abc.ABCMeta):
class CalibraterBase:
def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx'):
def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', symmetric=False):
'''
:param model: ONNX model to calibrate. It can be a ModelProto or a model path
:param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
:param augmented_model_path: save augmented model to this path.
:param symmetric: make range of tensor symmetric (central point is 0).
'''
if isinstance(model, str):
self.model = onnx.load(model)
@ -54,6 +55,7 @@ class CalibraterBase:
self.op_types_to_calibrate = op_types_to_calibrate
self.augmented_model_path = augmented_model_path
self.symmetric = symmetric
# augment graph
self.augment_model = None
@ -136,13 +138,14 @@ class CalibraterBase:
class MinMaxCalibrater(CalibraterBase):
def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx'):
def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', symmetric=False):
'''
:param model: ONNX model to calibrate. It can be a ModelProto or a model path
:param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
:param augmented_model_path: save augmented model to this path.
:param symmetric: make range of tensor symmetric (central point is 0).
'''
super(MinMaxCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path)
super(MinMaxCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, symmetric)
self.intermediate_outputs = []
self.calibrate_tensors_range = None
self.num_model_outputs = len(self.model.graph.output)
@ -259,7 +262,12 @@ class MinMaxCalibrater(CalibraterBase):
if type(max_value_array) == int or max_value_array.size > 0:
max_value = float(max_value_array)
pairs.append(tuple([min_value, max_value]))
if self.symmetric:
max_absolute_value = max(abs(min_value), abs(max_value))
pairs.append(tuple([-max_absolute_value, max_absolute_value]))
else:
pairs.append(tuple([min_value, max_value]))
new_calibrate_tensors_range = dict(zip(calibrate_tensor_names, pairs))
if self.calibrate_tensors_range:
@ -275,6 +283,7 @@ class HistogramCalibrater(CalibraterBase):
op_types_to_calibrate=[],
augmented_model_path='augmented_model.onnx',
method='percentile',
symmetric=False,
num_bins=128,
num_quantized_bins=2048,
percentile=99.999):
@ -283,6 +292,8 @@ class HistogramCalibrater(CalibraterBase):
:param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
:param augmented_model_path: save augmented model to this path.
:param method: A string. One of ['entropy', 'percentile'].
:param symmetric: make range of tensor symmetric (central point is 0).
:param num_bins: number of bins to create a new histogram for collecting tensor values.
:param num_quantized_bins: number of quantized bins. Default 128.
:param percentile: A float number between [0, 100]. Default 99.99.
'''
@ -293,6 +304,7 @@ class HistogramCalibrater(CalibraterBase):
self.model_original_outputs = set(output.name for output in self.model.graph.output)
self.collector = None
self.method = method
self.symmetric = symmetric
self.num_bins = num_bins
self.num_quantized_bins = num_quantized_bins
self.percentile = percentile
@ -349,6 +361,7 @@ class HistogramCalibrater(CalibraterBase):
if not self.collector:
self.collector = HistogramCollector(method=self.method,
symmetric=self.symmetric,
num_bins=self.num_bins,
num_quantized_bins=self.num_quantized_bins,
percentile=self.percentile)
@ -359,7 +372,7 @@ class HistogramCalibrater(CalibraterBase):
def compute_range(self):
'''
Compute the min-max range of tensor
:return: dictionary mapping: {added node names: (ReduceMin, ReduceMax) pairs }
:return: dictionary mapping: {tensor name: (min value, max value)}
'''
if not self.collector:
raise ValueError("No collector created and can't generate calibration data.")
@ -372,6 +385,7 @@ class EntropyCalibrater(HistogramCalibrater):
op_types_to_calibrate=[],
augmented_model_path='augmented_model.onnx',
method='entropy',
symmetric=False,
num_bins=128,
num_quantized_bins=128):
'''
@ -379,11 +393,12 @@ class EntropyCalibrater(HistogramCalibrater):
:param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
:param augmented_model_path: save augmented model to this path.
:param method: A string. One of ['entropy', 'percentile'].
:param symmetric: make range of tensor symmetric (central point is 0).
:param num_bins: number of bins to create a new histogram for collecting tensor values.
:param num_quantized_bins: number of quantized bins. Default 128.
'''
super(EntropyCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path,
method=method, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
super(EntropyCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, method=method,
symmetric=symmetric, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
class PercentileCalibrater(HistogramCalibrater):
def __init__(self,
@ -391,6 +406,7 @@ class PercentileCalibrater(HistogramCalibrater):
op_types_to_calibrate=[],
augmented_model_path='augmented_model.onnx',
method='percentile',
symmetric=False,
num_bins=2048,
percentile=99.999):
'''
@ -398,12 +414,12 @@ class PercentileCalibrater(HistogramCalibrater):
:param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
:param augmented_model_path: save augmented model to this path.
:param method: A string. One of ['entropy', 'percentile'].
:param symmetric: make range of tensor symmetric (central point is 0).
:param num_quantized_bins: number of quantized bins. Default 128.
:param percentile: A float number between [0, 100]. Default 99.99.
'''
super(PercentileCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path,
method=method, num_bins=num_bins,
percentile=percentile)
super(PercentileCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, method=method,
symmetric=symmetric, num_bins=num_bins, percentile=percentile)
class CalibrationDataCollector(metaclass=abc.ABCMeta):
"""
@ -434,9 +450,10 @@ class HistogramCollector(CalibrationDataCollector):
ref: https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/_modules/
pytorch_quantization/calib/histogram.html
"""
def __init__(self, method, num_bins, num_quantized_bins, percentile):
def __init__(self, method, symmetric, num_bins, num_quantized_bins, percentile):
self.histogram_dict = {}
self.method = method
self.symmetric = symmetric
self.num_bins = num_bins
self.num_quantized_bins= num_quantized_bins
self.percentile = percentile
@ -450,13 +467,19 @@ class HistogramCollector(CalibrationDataCollector):
# TODO: Currently we have different collect() for entropy and percentile method respectively.
# Need unified collect in the future.
if self.method == 'entropy':
return self.collect_for_entropy(name_to_arr)
return self.collect_value(name_to_arr)
elif self.method == 'percentile':
return self.collect_for_percentile(name_to_arr)
if self.symmetric:
return self.collect_absolute_value(name_to_arr)
else:
return self.collect_value(name_to_arr)
else:
raise ValueError('Only \'entropy\' or \'percentile\' method are supported')
def collect_for_percentile(self, name_to_arr):
def collect_absolute_value(self, name_to_arr):
'''
Collect histogram on absolute value
'''
for tensor, data_arr in name_to_arr.items():
data_arr = np.asarray(data_arr)
data_arr = data_arr.flatten()
@ -481,7 +504,10 @@ class HistogramCollector(CalibrationDataCollector):
hist[:len(old_hist)] += old_hist
self.histogram_dict[tensor] = (hist, hist_edges)
def collect_for_entropy(self, name_to_arr):
def collect_value(self, name_to_arr):
'''
Collect histogram on real value
'''
for tensor, data_arr in name_to_arr.items():
data_arr = np.asarray(data_arr)
data_arr = data_arr.flatten()
@ -553,8 +579,17 @@ class HistogramCollector(CalibrationDataCollector):
hist_edges = histogram[1]
total = hist.sum()
cdf = np.cumsum(hist/total)
idx = np.searchsorted(cdf, percentile/100)
thresholds_dict[tensor] = (float(hist_edges[idx]), float(hist_edges[idx]))
if self.symmetric:
idx_right = np.searchsorted(cdf, percentile/100)
thresholds_dict[tensor] = (-float(hist_edges[idx_right]), float(hist_edges[idx_right]))
else:
idx_right = np.searchsorted(cdf, percentile/200)
idx_left = np.searchsorted(cdf, (1.0 - percentile/200))
thresholds_dict[tensor] = (float(hist_edges[idx_left]), float(hist_edges[idx_right]))
# Plot histogram for debug only
if False:
apply_plot(hist, hist_edges)
return thresholds_dict
@ -572,6 +607,10 @@ class HistogramCollector(CalibrationDataCollector):
optimal_threshold = self.get_entropy_threshold(histogram, num_quantized_bins)
thresholds_dict[tensor] = optimal_threshold
# Plot histogram for debug only
if False:
apply_plot(histogram[0], histogram[1])
return thresholds_dict
def get_entropy_threshold(self, histogram, num_quantized_bins):
@ -583,7 +622,8 @@ class HistogramCollector(CalibrationDataCollector):
from scipy.stats import entropy
import copy
hist, hist_edges, _, _, _ = histogram
hist = histogram[0]
hist_edges = histogram[1]
num_bins = hist.size
zero_bin_index = num_bins // 2
num_half_quantized_bin = num_quantized_bins // 2
@ -591,6 +631,20 @@ class HistogramCollector(CalibrationDataCollector):
kl_divergence = np.zeros(zero_bin_index - num_half_quantized_bin + 1)
thresholds = [(0, 0) for i in range(kl_divergence.size)]
# <------------ num bins ---------------->
# <--- quantized bins ---->
# |======|===========|===========|=======|
# zero bin index
# ^ ^
# | |
# start index end index (start of iteration)
# ^ ^
# | |
# start index end index ...
# ^ ^
# | |
# start index end index (end of iteration)
for i in range(num_half_quantized_bin, zero_bin_index + 1, 1):
start_index = zero_bin_index - i
end_index = zero_bin_index + i + 1 if (zero_bin_index + i + 1) <= num_bins else num_bins
@ -650,17 +704,22 @@ def create_calibrator(model,
augmented_model_path='augmented_model.onnx',
calibrate_method=CalibrationMethod.MinMax,
extra_options={}):
if calibrate_method == CalibrationMethod.MinMax:
return MinMaxCalibrater(model, op_types_to_calibrate, augmented_model_path)
# default settings for min-max algorithm
symmetric = False if 'symmetric' not in extra_options else extra_options['symmetric']
return MinMaxCalibrater(model, op_types_to_calibrate, augmented_model_path, symmetric=symmetric)
elif calibrate_method == CalibrationMethod.Entropy:
# default settings for entropy algorithm
num_bins = 128 if 'num_bins' not in extra_options else extra_options['num_bins']
num_quantized_bins = 128 if 'num_quantized_bins' not in extra_options else extra_options['num_quantized_bins']
return EntropyCalibrater(model, op_types_to_calibrate, augmented_model_path, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
symmetric = False if 'symmetric' not in extra_options else extra_options['symmetric']
return EntropyCalibrater(model, op_types_to_calibrate, augmented_model_path, symmetric=symmetric, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
elif calibrate_method == CalibrationMethod.Percentile:
# default settings for percentile algorithm
num_bins = 2048 if 'num_bins' not in extra_options else extra_options['num_bins']
percentile = 99.999 if 'percentile' not in extra_options else extra_options['percentile']
return PercentileCalibrater(model, op_types_to_calibrate, augmented_model_path, num_bins=num_bins, percentile=percentile)
symmetric = True if 'symmetric' not in extra_options else extra_options['symmetric']
return PercentileCalibrater(model, op_types_to_calibrate, augmented_model_path, symmetric=symmetric, num_bins=num_bins, percentile=percentile)
raise ValueError('Unsupported calibration method {}'.format(calibrate_method))

View file

@ -343,6 +343,21 @@ def generate_identified_filename(filename: Path, identifier: str) -> Path:
'''
return filename.parent.joinpath(filename.stem + identifier).with_suffix(filename.suffix)
def apply_plot(hist, hist_edges):
import sys
import numpy
import matplotlib.pyplot as plt
numpy.set_printoptions(threshold=sys.maxsize)
print("Histogram:")
print(hist)
print("Histogram Edges:")
print(hist_edges)
plt.stairs(hist, hist_edges, fill=True)
plt.xlabel('Tensor value')
plt.ylabel('Counts')
plt.title('Tensor value V.S. Counts')
plt.show()
def write_calibration_table(calibration_cache):
'''
Helper function to write calibration table to files.

View file

@ -205,6 +205,7 @@ def quantize_static(model_input,
and it's effective only when per channel quantization is supported and per_channel is True.
If specific op type supports per channel quantization but not explicitly specified with channel axis,
default channel axis will be used.
CalibTensorRangeSymmetric = True/False : Default is False. If enabled, the final range of tensor during calibration will be explicitly set to symmetric to central point "0".
'''
mode = QuantizationMode.QLinearOps
@ -214,7 +215,8 @@ def quantize_static(model_input,
model = load_model(Path(model_input), optimize_model, False)
calibrator = create_calibrator(model, op_types_to_quantize, calibrate_method=calibrate_method)
calib_extra_options = {} if 'CalibTensorRangeSymmetric' not in extra_options else {'symmetric': extra_options['CalibTensorRangeSymmetric']}
calibrator = create_calibrator(model, op_types_to_quantize, calibrate_method=calibrate_method, extra_options=calib_extra_options)
calibrator.collect_data(calibration_data_reader)
tensors_range = calibrator.compute_range()