mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-03 23:49:44 +00:00
Fixes for quantizing a BERT from HuggingFace (#3939)
* Fixes for quantizing a BERT from HuggingFace * Address CR and some other minor fixes
This commit is contained in:
parent
33208c9f6b
commit
132ce3a561
2 changed files with 36 additions and 30 deletions
|
|
@ -53,7 +53,7 @@ class QuantizeConfig:
|
|||
('QuantizationType', 'Signed' if self.sign_bit_ else 'Unsigned'),
|
||||
('ReservedBit', self.reserved_bits_)])
|
||||
|
||||
def quantize_matmul_2d_with_weight(in_node, in_graph, nf, converted_weights, quantized_inputs, qcfg_dict, update_qcfg_dict, default_qcfg):
|
||||
def quantize_matmul_2d_with_weight(in_node, in_graph, nf, converted_weights, quantized_inputs, qcfg_dict, update_qcfg_dict, default_qcfg, onnx_opset_ver):
|
||||
assert in_node.op_type == 'MatMul'
|
||||
|
||||
# quantize weight
|
||||
|
|
@ -158,7 +158,11 @@ def quantize_matmul_2d_with_weight(in_node, in_graph, nf, converted_weights, qua
|
|||
Q_Xf = nf.make_node('Mul', [norm_X, np.asarray(x_qcfg.q_range()).astype(np.float32)])
|
||||
Q_Xf = nf.make_node('Add', [Q_Xf, np.asarray(0.5).astype(np.float32)])
|
||||
Q_Xf = nf.make_node('Floor', Q_Xf)
|
||||
Q_Xf = nf.make_node('Clip', Q_Xf, {'max':x_qcfg.q_max(), 'min':x_qcfg.q_min()})
|
||||
if onnx_opset_ver < 11:
|
||||
Q_Xf = nf.make_node('Clip', Q_Xf, {'max':x_qcfg.q_max(), 'min':x_qcfg.q_min()})
|
||||
else:
|
||||
# Clip changed min max to inputs in opset 11
|
||||
Q_Xf = nf.make_node('Clip', [Q_Xf, np.asarray(x_qcfg.q_min()).astype(np.float32), np.asarray(x_qcfg.q_max()).astype(np.float32)])
|
||||
Q_X = nf.make_node('Cast', Q_Xf, {'to':int({np.uint8 : onnx.TensorProto.UINT8,
|
||||
np.int8 : onnx.TensorProto.INT8,
|
||||
np.uint16 : onnx.TensorProto.UINT16,
|
||||
|
|
@ -238,7 +242,7 @@ def convert_matmul_model(input_model, output_model, only_for_scan=False, share_i
|
|||
out_mp = onnx.ModelProto()
|
||||
out_mp.CopyFrom(in_mp)
|
||||
out_mp.ir_version = 5 # update ir version to avoid requirement of initializer in graph input
|
||||
ensure_opset(out_mp, 10) # bump up to ONNX opset 10, which is required for MatMulInteger
|
||||
onnx_opset_ver = ensure_opset(out_mp, 10) # bump up to ONNX opset 10, which is required for MatMulInteger
|
||||
ensure_opset(out_mp, 1, 'com.microsoft') # add MS domain for MatMulInteger16
|
||||
out_mp.graph.ClearField('node')
|
||||
nf = NodeFactory(out_mp.graph)
|
||||
|
|
@ -249,7 +253,7 @@ def convert_matmul_model(input_model, output_model, only_for_scan=False, share_i
|
|||
continue
|
||||
|
||||
if in_n.op_type == 'MatMul' and not only_for_scan:
|
||||
if quantize_matmul_2d_with_weight(in_n, in_mp.graph, nf, converted_weights, quantized_inputs, qcfg_dict, export_qcfg_json, default_qcfg):
|
||||
if quantize_matmul_2d_with_weight(in_n, in_mp.graph, nf, converted_weights, quantized_inputs, qcfg_dict, export_qcfg_json, default_qcfg, onnx_opset_ver):
|
||||
continue
|
||||
|
||||
out_n = out_mp.graph.node.add()
|
||||
|
|
@ -262,7 +266,7 @@ def convert_matmul_model(input_model, output_model, only_for_scan=False, share_i
|
|||
subgraph_quantized_inputs = {} if share_input_quantization else None # remember quantized inputs that might be able to share between MatMuls
|
||||
for in_sn in in_subgraph.node:
|
||||
if in_sn.op_type == 'MatMul':
|
||||
if quantize_matmul_2d_with_weight(in_sn, in_subgraph, scan_nf, converted_weights, subgraph_quantized_inputs, qcfg_dict, export_qcfg_json, default_qcfg):
|
||||
if quantize_matmul_2d_with_weight(in_sn, in_subgraph, scan_nf, converted_weights, subgraph_quantized_inputs, qcfg_dict, export_qcfg_json, default_qcfg, onnx_opset_ver):
|
||||
continue
|
||||
|
||||
if upgrade_op(scan_nf, in_sn):
|
||||
|
|
|
|||
|
|
@ -230,12 +230,16 @@ class SymbolicShapeInference:
|
|||
if self.auto_merge_:
|
||||
assert len(dims) == 2 # only allow symbol->int merge in binary ops for now
|
||||
is_int = [is_literal(d) for d in dims]
|
||||
assert sum(is_int) == 1
|
||||
int_dim = is_int.index(1)
|
||||
if self.verbose_ > 0:
|
||||
print('dim {} has been merged with value {}'.format(dims[1 - int_dim], dims[int_dim]))
|
||||
self._check_merged_dims(dims, allow_broadcast=False)
|
||||
return dims[int_dim]
|
||||
if sum(is_int) == 1:
|
||||
int_dim = is_int.index(1)
|
||||
if self.verbose_ > 0:
|
||||
print('dim {} has been merged with value {}'.format(dims[1 - int_dim], dims[int_dim]))
|
||||
self._check_merged_dims(dims, allow_broadcast=False)
|
||||
return dims[int_dim]
|
||||
else:
|
||||
if self.verbose_ > 0:
|
||||
print('dim {} has been mergd with dim {}'.format(dims[0], dims[1]))
|
||||
return dims[0]
|
||||
else:
|
||||
return None
|
||||
if all([d == dims[0] for d in dims]):
|
||||
|
|
@ -645,12 +649,10 @@ class SymbolicShapeInference:
|
|||
def _infer_Expand(self, node):
|
||||
expand_to_shape = self._try_get_value(node, 1)
|
||||
if expand_to_shape is not None:
|
||||
sympy_shape = self._get_sympy_shape(node, 0)
|
||||
new_sympy_shape = self._broadcast_shapes(sympy_shape, expand_to_shape)
|
||||
|
||||
# new_shape's dim can come from 'Expand' computation
|
||||
self._update_computed_dims(new_sympy_shape)
|
||||
new_shape = get_shape_from_sympy_shape(new_sympy_shape)
|
||||
# new_shape's dim can come from shape value
|
||||
self._update_computed_dims(expand_to_shape)
|
||||
shape = self._get_shape(node, 0)
|
||||
new_shape = self._broadcast_shapes(shape, get_shape_from_sympy_shape(expand_to_shape))
|
||||
vi = self.known_vi_[node.output[0]]
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, new_shape))
|
||||
|
||||
|
|
@ -780,13 +782,13 @@ class SymbolicShapeInference:
|
|||
rank = len(sympy_shape)
|
||||
if pads is not None:
|
||||
assert len(pads) == 2*rank
|
||||
new_shape = [d + pad_up + pad_down for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:])]
|
||||
self._update_computed_dims(new_shape)
|
||||
new_sympy_shape = [d + pad_up + pad_down for d, pad_up, pad_down in zip(sympy_shape, pads[:rank], pads[rank:])]
|
||||
self._update_computed_dims(new_sympy_shape)
|
||||
else:
|
||||
# dynamic pads, create new symbolic dimensions
|
||||
new_shape = self._new_symbolic_shape(rank, node)
|
||||
new_sympy_shape = self._new_symbolic_shape(rank, node)
|
||||
output_tp = self.known_vi_[node.input[0]].type.tensor_type.elem_type
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_tp, get_shape_from_sympy_shape(new_shape)))
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], output_tp, get_shape_from_sympy_shape(new_sympy_shape)))
|
||||
|
||||
def _infer_Pool(self, node):
|
||||
sympy_shape = self._compute_conv_pool_shape(node)
|
||||
|
|
@ -804,12 +806,12 @@ class SymbolicShapeInference:
|
|||
start = as_scalar(input_data[0])
|
||||
limit = as_scalar(input_data[1])
|
||||
delta = as_scalar(input_data[2])
|
||||
new_shape = [sympy.Max(sympy.ceiling((limit - start)/delta), 0)]
|
||||
new_sympy_shape = [sympy.Max(sympy.ceiling((limit - start)/delta), 0)]
|
||||
else:
|
||||
new_dim = self._new_symbolic_dim_from_output(node)
|
||||
new_shape = [self.symbolic_dims_[new_dim]]
|
||||
self._update_computed_dims(new_shape)
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, get_shape_from_sympy_shape(new_shape)))
|
||||
new_sympy_shape = [self.symbolic_dims_[new_dim]]
|
||||
self._update_computed_dims(new_sympy_shape)
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0], self.known_vi_[node.input[0]].type.tensor_type.elem_type, get_shape_from_sympy_shape(new_sympy_shape)))
|
||||
|
||||
def _infer_ReduceProd(self, node):
|
||||
axes = get_attribute(node, 'axes')
|
||||
|
|
@ -1042,15 +1044,15 @@ class SymbolicShapeInference:
|
|||
def _infer_Tile(self, node):
|
||||
repeats_value = self._get_value(node, 1)
|
||||
input_sympy_shape = self._get_sympy_shape(node, 0)
|
||||
new_shape = []
|
||||
new_sympy_shape = []
|
||||
for i,d in enumerate(input_sympy_shape):
|
||||
new_dim = d * repeats_value[i]
|
||||
new_shape.append(new_dim)
|
||||
self._update_computed_dims(new_shape)
|
||||
new_sympy_shape.append(new_dim)
|
||||
self._update_computed_dims(new_sympy_shape)
|
||||
vi = self.known_vi_[node.output[0]]
|
||||
vi.CopyFrom(helper.make_tensor_value_info(node.output[0],
|
||||
vi.type.tensor_type.elem_type,
|
||||
get_shape_from_sympy_shape(new_shape)))
|
||||
get_shape_from_sympy_shape(new_sympy_shape)))
|
||||
|
||||
def _infer_TopK(self, node):
|
||||
rank = self._get_shape_rank(node, 0)
|
||||
|
|
@ -1268,4 +1270,4 @@ if __name__ == '__main__':
|
|||
print('output model ' + args.output)
|
||||
print('Doing symbolic shape inference...')
|
||||
out_mp = SymbolicShapeInference.infer_shapes(args.input, args.output, args.int_max, args.auto_merge, args.guess_output_rank, args.verbose)
|
||||
print('Done!')
|
||||
print('Done!')
|
||||
Loading…
Reference in a new issue