mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-31 23:27:43 +00:00
Add support of generating symmetric/asymmetric tensor's range for calibration (#10663)
* add support of symmetric/asymmetric range of value * modify comment * Update calibrate.py * update quantize.py * remove newline at end of file
This commit is contained in:
parent
ffde44cd09
commit
d2d22f2195
3 changed files with 99 additions and 23 deletions
|
|
@ -15,7 +15,7 @@ from onnx import onnx_pb as onnx_proto
|
|||
from six import string_types
|
||||
from enum import Enum
|
||||
|
||||
from .quant_utils import QuantType, smooth_distribution
|
||||
from .quant_utils import QuantType, smooth_distribution, apply_plot
|
||||
from .registry import QLinearOpsRegistry
|
||||
|
||||
import abc
|
||||
|
|
@ -39,11 +39,12 @@ class CalibrationDataReader(metaclass=abc.ABCMeta):
|
|||
|
||||
|
||||
class CalibraterBase:
|
||||
def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx'):
|
||||
def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', symmetric=False):
|
||||
'''
|
||||
:param model: ONNX model to calibrate. It can be a ModelProto or a model path
|
||||
:param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
|
||||
:param augmented_model_path: save augmented model to this path.
|
||||
:param symmetric: make range of tensor symmetric (central point is 0).
|
||||
'''
|
||||
if isinstance(model, str):
|
||||
self.model = onnx.load(model)
|
||||
|
|
@ -54,6 +55,7 @@ class CalibraterBase:
|
|||
|
||||
self.op_types_to_calibrate = op_types_to_calibrate
|
||||
self.augmented_model_path = augmented_model_path
|
||||
self.symmetric = symmetric
|
||||
|
||||
# augment graph
|
||||
self.augment_model = None
|
||||
|
|
@ -136,13 +138,14 @@ class CalibraterBase:
|
|||
|
||||
|
||||
class MinMaxCalibrater(CalibraterBase):
|
||||
def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx'):
|
||||
def __init__(self, model, op_types_to_calibrate=[], augmented_model_path='augmented_model.onnx', symmetric=False):
|
||||
'''
|
||||
:param model: ONNX model to calibrate. It can be a ModelProto or a model path
|
||||
:param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
|
||||
:param augmented_model_path: save augmented model to this path.
|
||||
:param symmetric: make range of tensor symmetric (central point is 0).
|
||||
'''
|
||||
super(MinMaxCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path)
|
||||
super(MinMaxCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, symmetric)
|
||||
self.intermediate_outputs = []
|
||||
self.calibrate_tensors_range = None
|
||||
self.num_model_outputs = len(self.model.graph.output)
|
||||
|
|
@ -259,7 +262,12 @@ class MinMaxCalibrater(CalibraterBase):
|
|||
if type(max_value_array) == int or max_value_array.size > 0:
|
||||
max_value = float(max_value_array)
|
||||
|
||||
pairs.append(tuple([min_value, max_value]))
|
||||
if self.symmetric:
|
||||
max_absolute_value = max(abs(min_value), abs(max_value))
|
||||
pairs.append(tuple([-max_absolute_value, max_absolute_value]))
|
||||
else:
|
||||
pairs.append(tuple([min_value, max_value]))
|
||||
|
||||
|
||||
new_calibrate_tensors_range = dict(zip(calibrate_tensor_names, pairs))
|
||||
if self.calibrate_tensors_range:
|
||||
|
|
@ -275,6 +283,7 @@ class HistogramCalibrater(CalibraterBase):
|
|||
op_types_to_calibrate=[],
|
||||
augmented_model_path='augmented_model.onnx',
|
||||
method='percentile',
|
||||
symmetric=False,
|
||||
num_bins=128,
|
||||
num_quantized_bins=2048,
|
||||
percentile=99.999):
|
||||
|
|
@ -283,6 +292,8 @@ class HistogramCalibrater(CalibraterBase):
|
|||
:param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
|
||||
:param augmented_model_path: save augmented model to this path.
|
||||
:param method: A string. One of ['entropy', 'percentile'].
|
||||
:param symmetric: make range of tensor symmetric (central point is 0).
|
||||
:param num_bins: number of bins to create a new histogram for collecting tensor values.
|
||||
:param num_quantized_bins: number of quantized bins. Default 128.
|
||||
:param percentile: A float number between [0, 100]. Default 99.99.
|
||||
'''
|
||||
|
|
@ -293,6 +304,7 @@ class HistogramCalibrater(CalibraterBase):
|
|||
self.model_original_outputs = set(output.name for output in self.model.graph.output)
|
||||
self.collector = None
|
||||
self.method = method
|
||||
self.symmetric = symmetric
|
||||
self.num_bins = num_bins
|
||||
self.num_quantized_bins = num_quantized_bins
|
||||
self.percentile = percentile
|
||||
|
|
@ -349,6 +361,7 @@ class HistogramCalibrater(CalibraterBase):
|
|||
|
||||
if not self.collector:
|
||||
self.collector = HistogramCollector(method=self.method,
|
||||
symmetric=self.symmetric,
|
||||
num_bins=self.num_bins,
|
||||
num_quantized_bins=self.num_quantized_bins,
|
||||
percentile=self.percentile)
|
||||
|
|
@ -359,7 +372,7 @@ class HistogramCalibrater(CalibraterBase):
|
|||
def compute_range(self):
|
||||
'''
|
||||
Compute the min-max range of tensor
|
||||
:return: dictionary mapping: {added node names: (ReduceMin, ReduceMax) pairs }
|
||||
:return: dictionary mapping: {tensor name: (min value, max value)}
|
||||
'''
|
||||
if not self.collector:
|
||||
raise ValueError("No collector created and can't generate calibration data.")
|
||||
|
|
@ -372,6 +385,7 @@ class EntropyCalibrater(HistogramCalibrater):
|
|||
op_types_to_calibrate=[],
|
||||
augmented_model_path='augmented_model.onnx',
|
||||
method='entropy',
|
||||
symmetric=False,
|
||||
num_bins=128,
|
||||
num_quantized_bins=128):
|
||||
'''
|
||||
|
|
@ -379,11 +393,12 @@ class EntropyCalibrater(HistogramCalibrater):
|
|||
:param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
|
||||
:param augmented_model_path: save augmented model to this path.
|
||||
:param method: A string. One of ['entropy', 'percentile'].
|
||||
:param symmetric: make range of tensor symmetric (central point is 0).
|
||||
:param num_bins: number of bins to create a new histogram for collecting tensor values.
|
||||
:param num_quantized_bins: number of quantized bins. Default 128.
|
||||
'''
|
||||
super(EntropyCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path,
|
||||
method=method, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
|
||||
super(EntropyCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, method=method,
|
||||
symmetric=symmetric, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
|
||||
|
||||
class PercentileCalibrater(HistogramCalibrater):
|
||||
def __init__(self,
|
||||
|
|
@ -391,6 +406,7 @@ class PercentileCalibrater(HistogramCalibrater):
|
|||
op_types_to_calibrate=[],
|
||||
augmented_model_path='augmented_model.onnx',
|
||||
method='percentile',
|
||||
symmetric=False,
|
||||
num_bins=2048,
|
||||
percentile=99.999):
|
||||
'''
|
||||
|
|
@ -398,12 +414,12 @@ class PercentileCalibrater(HistogramCalibrater):
|
|||
:param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
|
||||
:param augmented_model_path: save augmented model to this path.
|
||||
:param method: A string. One of ['entropy', 'percentile'].
|
||||
:param symmetric: make range of tensor symmetric (central point is 0).
|
||||
:param num_quantized_bins: number of quantized bins. Default 128.
|
||||
:param percentile: A float number between [0, 100]. Default 99.99.
|
||||
'''
|
||||
super(PercentileCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path,
|
||||
method=method, num_bins=num_bins,
|
||||
percentile=percentile)
|
||||
super(PercentileCalibrater, self).__init__(model, op_types_to_calibrate, augmented_model_path, method=method,
|
||||
symmetric=symmetric, num_bins=num_bins, percentile=percentile)
|
||||
|
||||
class CalibrationDataCollector(metaclass=abc.ABCMeta):
|
||||
"""
|
||||
|
|
@ -434,9 +450,10 @@ class HistogramCollector(CalibrationDataCollector):
|
|||
ref: https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/_modules/
|
||||
pytorch_quantization/calib/histogram.html
|
||||
"""
|
||||
def __init__(self, method, num_bins, num_quantized_bins, percentile):
|
||||
def __init__(self, method, symmetric, num_bins, num_quantized_bins, percentile):
|
||||
self.histogram_dict = {}
|
||||
self.method = method
|
||||
self.symmetric = symmetric
|
||||
self.num_bins = num_bins
|
||||
self.num_quantized_bins= num_quantized_bins
|
||||
self.percentile = percentile
|
||||
|
|
@ -450,13 +467,19 @@ class HistogramCollector(CalibrationDataCollector):
|
|||
# TODO: Currently we have different collect() for entropy and percentile method respectively.
|
||||
# Need unified collect in the future.
|
||||
if self.method == 'entropy':
|
||||
return self.collect_for_entropy(name_to_arr)
|
||||
return self.collect_value(name_to_arr)
|
||||
elif self.method == 'percentile':
|
||||
return self.collect_for_percentile(name_to_arr)
|
||||
if self.symmetric:
|
||||
return self.collect_absolute_value(name_to_arr)
|
||||
else:
|
||||
return self.collect_value(name_to_arr)
|
||||
else:
|
||||
raise ValueError('Only \'entropy\' or \'percentile\' method are supported')
|
||||
|
||||
def collect_for_percentile(self, name_to_arr):
|
||||
def collect_absolute_value(self, name_to_arr):
|
||||
'''
|
||||
Collect histogram on absolute value
|
||||
'''
|
||||
for tensor, data_arr in name_to_arr.items():
|
||||
data_arr = np.asarray(data_arr)
|
||||
data_arr = data_arr.flatten()
|
||||
|
|
@ -481,7 +504,10 @@ class HistogramCollector(CalibrationDataCollector):
|
|||
hist[:len(old_hist)] += old_hist
|
||||
self.histogram_dict[tensor] = (hist, hist_edges)
|
||||
|
||||
def collect_for_entropy(self, name_to_arr):
|
||||
def collect_value(self, name_to_arr):
|
||||
'''
|
||||
Collect histogram on real value
|
||||
'''
|
||||
for tensor, data_arr in name_to_arr.items():
|
||||
data_arr = np.asarray(data_arr)
|
||||
data_arr = data_arr.flatten()
|
||||
|
|
@ -553,8 +579,17 @@ class HistogramCollector(CalibrationDataCollector):
|
|||
hist_edges = histogram[1]
|
||||
total = hist.sum()
|
||||
cdf = np.cumsum(hist/total)
|
||||
idx = np.searchsorted(cdf, percentile/100)
|
||||
thresholds_dict[tensor] = (float(hist_edges[idx]), float(hist_edges[idx]))
|
||||
if self.symmetric:
|
||||
idx_right = np.searchsorted(cdf, percentile/100)
|
||||
thresholds_dict[tensor] = (-float(hist_edges[idx_right]), float(hist_edges[idx_right]))
|
||||
else:
|
||||
idx_right = np.searchsorted(cdf, percentile/200)
|
||||
idx_left = np.searchsorted(cdf, (1.0 - percentile/200))
|
||||
thresholds_dict[tensor] = (float(hist_edges[idx_left]), float(hist_edges[idx_right]))
|
||||
|
||||
# Plot histogram for debug only
|
||||
if False:
|
||||
apply_plot(hist, hist_edges)
|
||||
|
||||
return thresholds_dict
|
||||
|
||||
|
|
@ -572,6 +607,10 @@ class HistogramCollector(CalibrationDataCollector):
|
|||
optimal_threshold = self.get_entropy_threshold(histogram, num_quantized_bins)
|
||||
thresholds_dict[tensor] = optimal_threshold
|
||||
|
||||
# Plot histogram for debug only
|
||||
if False:
|
||||
apply_plot(histogram[0], histogram[1])
|
||||
|
||||
return thresholds_dict
|
||||
|
||||
def get_entropy_threshold(self, histogram, num_quantized_bins):
|
||||
|
|
@ -583,7 +622,8 @@ class HistogramCollector(CalibrationDataCollector):
|
|||
from scipy.stats import entropy
|
||||
import copy
|
||||
|
||||
hist, hist_edges, _, _, _ = histogram
|
||||
hist = histogram[0]
|
||||
hist_edges = histogram[1]
|
||||
num_bins = hist.size
|
||||
zero_bin_index = num_bins // 2
|
||||
num_half_quantized_bin = num_quantized_bins // 2
|
||||
|
|
@ -591,6 +631,20 @@ class HistogramCollector(CalibrationDataCollector):
|
|||
kl_divergence = np.zeros(zero_bin_index - num_half_quantized_bin + 1)
|
||||
thresholds = [(0, 0) for i in range(kl_divergence.size)]
|
||||
|
||||
# <------------ num bins ---------------->
|
||||
# <--- quantized bins ---->
|
||||
# |======|===========|===========|=======|
|
||||
# zero bin index
|
||||
# ^ ^
|
||||
# | |
|
||||
# start index end index (start of iteration)
|
||||
# ^ ^
|
||||
# | |
|
||||
# start index end index ...
|
||||
# ^ ^
|
||||
# | |
|
||||
# start index end index (end of iteration)
|
||||
|
||||
for i in range(num_half_quantized_bin, zero_bin_index + 1, 1):
|
||||
start_index = zero_bin_index - i
|
||||
end_index = zero_bin_index + i + 1 if (zero_bin_index + i + 1) <= num_bins else num_bins
|
||||
|
|
@ -650,17 +704,22 @@ def create_calibrator(model,
|
|||
augmented_model_path='augmented_model.onnx',
|
||||
calibrate_method=CalibrationMethod.MinMax,
|
||||
extra_options={}):
|
||||
|
||||
if calibrate_method == CalibrationMethod.MinMax:
|
||||
return MinMaxCalibrater(model, op_types_to_calibrate, augmented_model_path)
|
||||
# default settings for min-max algorithm
|
||||
symmetric = False if 'symmetric' not in extra_options else extra_options['symmetric']
|
||||
return MinMaxCalibrater(model, op_types_to_calibrate, augmented_model_path, symmetric=symmetric)
|
||||
elif calibrate_method == CalibrationMethod.Entropy:
|
||||
# default settings for entropy algorithm
|
||||
num_bins = 128 if 'num_bins' not in extra_options else extra_options['num_bins']
|
||||
num_quantized_bins = 128 if 'num_quantized_bins' not in extra_options else extra_options['num_quantized_bins']
|
||||
return EntropyCalibrater(model, op_types_to_calibrate, augmented_model_path, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
|
||||
symmetric = False if 'symmetric' not in extra_options else extra_options['symmetric']
|
||||
return EntropyCalibrater(model, op_types_to_calibrate, augmented_model_path, symmetric=symmetric, num_bins=num_bins, num_quantized_bins=num_quantized_bins)
|
||||
elif calibrate_method == CalibrationMethod.Percentile:
|
||||
# default settings for percentile algorithm
|
||||
num_bins = 2048 if 'num_bins' not in extra_options else extra_options['num_bins']
|
||||
percentile = 99.999 if 'percentile' not in extra_options else extra_options['percentile']
|
||||
return PercentileCalibrater(model, op_types_to_calibrate, augmented_model_path, num_bins=num_bins, percentile=percentile)
|
||||
symmetric = True if 'symmetric' not in extra_options else extra_options['symmetric']
|
||||
return PercentileCalibrater(model, op_types_to_calibrate, augmented_model_path, symmetric=symmetric, num_bins=num_bins, percentile=percentile)
|
||||
|
||||
raise ValueError('Unsupported calibration method {}'.format(calibrate_method))
|
||||
|
|
|
|||
|
|
@ -343,6 +343,21 @@ def generate_identified_filename(filename: Path, identifier: str) -> Path:
|
|||
'''
|
||||
return filename.parent.joinpath(filename.stem + identifier).with_suffix(filename.suffix)
|
||||
|
||||
def apply_plot(hist, hist_edges):
|
||||
import sys
|
||||
import numpy
|
||||
import matplotlib.pyplot as plt
|
||||
numpy.set_printoptions(threshold=sys.maxsize)
|
||||
print("Histogram:")
|
||||
print(hist)
|
||||
print("Histogram Edges:")
|
||||
print(hist_edges)
|
||||
plt.stairs(hist, hist_edges, fill=True)
|
||||
plt.xlabel('Tensor value')
|
||||
plt.ylabel('Counts')
|
||||
plt.title('Tensor value V.S. Counts')
|
||||
plt.show()
|
||||
|
||||
def write_calibration_table(calibration_cache):
|
||||
'''
|
||||
Helper function to write calibration table to files.
|
||||
|
|
|
|||
|
|
@ -205,6 +205,7 @@ def quantize_static(model_input,
|
|||
and it's effective only when per channel quantization is supported and per_channel is True.
|
||||
If specific op type supports per channel quantization but not explicitly specified with channel axis,
|
||||
default channel axis will be used.
|
||||
CalibTensorRangeSymmetric = True/False : Default is False. If enabled, the final range of tensor during calibration will be explicitly set to symmetric to central point "0".
|
||||
'''
|
||||
|
||||
mode = QuantizationMode.QLinearOps
|
||||
|
|
@ -214,7 +215,8 @@ def quantize_static(model_input,
|
|||
|
||||
model = load_model(Path(model_input), optimize_model, False)
|
||||
|
||||
calibrator = create_calibrator(model, op_types_to_quantize, calibrate_method=calibrate_method)
|
||||
calib_extra_options = {} if 'CalibTensorRangeSymmetric' not in extra_options else {'symmetric': extra_options['CalibTensorRangeSymmetric']}
|
||||
calibrator = create_calibrator(model, op_types_to_quantize, calibrate_method=calibrate_method, extra_options=calib_extra_options)
|
||||
calibrator.collect_data(calibration_data_reader)
|
||||
tensors_range = calibrator.compute_range()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue