QDQ debugger - activations compare (#12544)

Debugger for QDQ loss - activation matching

This is the first part of the QDQ debugger tool: activation matching, where we identify and match corresponding activations from the float model and the qdq model. The idea is that during quantization, we have an original float model and a qdq model. The debugger can run the two models side by side using the same input data. By comparing intermediate activations, we can help the model author figure out where the values differ, and take steps to reduce precision loss.
This commit is contained in:
Chen Fu 2022-08-15 17:03:28 -07:00 committed by GitHub
parent 30ee5a4f79
commit eb6aa861cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 177 additions and 42 deletions

View file

@ -10,7 +10,7 @@ import itertools
import uuid
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional, Sequence
from typing import Optional, Sequence
import numpy as np
import onnx

View file

@ -50,7 +50,7 @@ from onnx import ModelProto, TensorProto, helper, numpy_helper
import onnxruntime
from .calibrate import CalibraterBase, CalibrationDataReader
from .quant_utils import clone_model_with_shape_infer
from .quant_utils import DEQUANT_OUTPUT_SUFFIX, QUANT_INPUT_SUFFIX, clone_model_with_shape_infer
_TENSOR_SAVE_POSTFIX = "_ReshapedSavedOutput"
_TENSOR_SAVE_POSTFIX_LEN = len(_TENSOR_SAVE_POSTFIX)
@ -145,3 +145,80 @@ def collect_activations(
output_dict.setdefault(output_name, []).append(output_data)
return output_dict
_POST_QDQ_POSTFIX1 = DEQUANT_OUTPUT_SUFFIX + "_1"
def _add_pre_post_qdq_pair(
qdq_cmp: Dict[str, Dict[str, Sequence[numpy.ndarray]]],
activation_name: str,
pre_qdq_tensors: Optional[Sequence[numpy.ndarray]],
post_qdq_tensors: Optional[Sequence[numpy.ndarray]],
) -> None:
if post_qdq_tensors and pre_qdq_tensors:
qdq_cmp[activation_name] = {}
qdq_cmp[activation_name]["pre_qdq"] = pre_qdq_tensors
qdq_cmp[activation_name]["post_qdq"] = post_qdq_tensors
def create_activation_matching(
qdq_activations: Dict[str, Sequence[numpy.ndarray]],
float_activations: Optional[Dict[str, Sequence[numpy.ndarray]]] = None,
) -> Dict[str, Dict[str, Sequence[numpy.ndarray]]]:
"""Comparing activation values to help debugging accuracy loss due to quantization.
This functions takes saved activations from the QDQ model and (optionally) the
float point model, and provides a data structure for comparing:
* from the qdq model, activation values before and after QDQ operation
* across both models, activations from the orignal model vs the corresponding
activations in the QDQ model
Arg:
qdq_activations: Output of `collect_activations`. This must be from a quantized
model with QDQ format.
float_activations: Output of `collect_activations`. This must be from the float
point model.
Returns:
Dict for comparing pre and post quantized activation tensors. E.g.
```
qdq_cmp = cmp_qdq_input_output(qdq_activations)
print(qdq_cmp['activation1']['pre_qdq'][0])
print(qdq_cmp['activation1'][`post_qdq'][0])
qdq_cmp = cmp_qdq_input_output(qdq_activations, float_activations)
print(qdq_cmp['activation1']['float'][0])
print(qdq_cmp['activation1']['pre_qdq'][0])
print(qdq_cmp['activation1'][`post_qdq'][0])
```
"""
qdq_cmp: Dict[str, Dict[str, Sequence[numpy.ndarray]]] = {}
for tensor_name, tensors in qdq_activations.items():
if tensor_name.endswith(QUANT_INPUT_SUFFIX):
pre_name = tensor_name[: -len(QUANT_INPUT_SUFFIX)]
post_qdq_tensors = qdq_activations.get(pre_name)
pre_qdq_tensors = tensors
_add_pre_post_qdq_pair(qdq_cmp, pre_name, pre_qdq_tensors, post_qdq_tensors)
elif tensor_name.endswith(DEQUANT_OUTPUT_SUFFIX):
pre_name = tensor_name[: -len(DEQUANT_OUTPUT_SUFFIX)]
pre_qdq_tensors = qdq_activations.get(pre_name)
post_qdq_tensors = tensors
_add_pre_post_qdq_pair(qdq_cmp, pre_name, pre_qdq_tensors, post_qdq_tensors)
elif tensor_name.endswith(_POST_QDQ_POSTFIX1):
pre_name = tensor_name[: -len(_POST_QDQ_POSTFIX1)]
pre_qdq_tensors = qdq_activations.get(pre_name)
post_qdq_tensors = tensors
_add_pre_post_qdq_pair(qdq_cmp, pre_name, pre_qdq_tensors, post_qdq_tensors)
if not float_activations:
return qdq_cmp
for act_name, act_values in qdq_cmp.items():
float_acts = float_activations.get(act_name)
if float_acts:
act_values["float"] = float_acts
return qdq_cmp

View file

@ -15,7 +15,10 @@ __version__ = "0.1.0"
onnx_domain = "ai.onnx"
ms_domain = "com.microsoft"
QUANT_OP_NAME = "QuantizeLinear"
QUANT_INPUT_SUFFIX = "_QuantizeLinear_Input"
DEQUANT_OP_NAME = "DequantizeLinear"
DEQUANT_OUTPUT_SUFFIX = "_DequantizeLinear_Output"
type_to_name = {
1: "FLOAT",
@ -573,7 +576,7 @@ def add_quant_suffix(tensor_name):
def add_quant_input_suffix(tensor_name):
return tensor_name + "_QuantizeLinear_Input"
return tensor_name + QUANT_INPUT_SUFFIX
def add_quant_output_suffix(tensor_name):
@ -589,4 +592,4 @@ def add_dequant_input_suffix(tensor_name):
def add_dequant_output_suffix(tensor_name):
return tensor_name + "_DequantizeLinear_Output"
return tensor_name + DEQUANT_OUTPUT_SUFFIX

View file

@ -9,14 +9,20 @@
import tempfile
import unittest
from pathlib import Path
from typing import Dict, List
import numpy as np
import onnx
from onnx import TensorProto, helper, numpy_helper
import onnxruntime
from onnxruntime.quantization import QuantFormat, QuantType, quantize_static
from onnxruntime.quantization.calibrate import CalibrationDataReader
from onnxruntime.quantization.save_activations import collect_activations, modify_model_output_intermediate_tensors
from onnxruntime.quantization.qdq_loss_debug import (
collect_activations,
create_activation_matching,
modify_model_output_intermediate_tensors,
)
def generate_input_initializer(tensor_shape, tensor_dtype, input_name):
@ -28,53 +34,53 @@ def generate_input_initializer(tensor_shape, tensor_dtype, input_name):
return init
def construct_test_model1(test_model_path):
def construct_test_model1(test_model_path: str, activations_as_outputs=False):
""" Create an ONNX model shaped as:
```
(input)
|
Relu
/ \
Conv \
| \
Relu Conv
| |
Conv |
\ /
Relu1
/ \
Conv1 \
| \
Relu2 Conv3
| |
Conv2 |
\ /
Add
|
(X6)
(AddOut)
```
We are keeping all intermediate tensors as output, just for test verification
purposes
"""
input_vi = helper.make_tensor_value_info("input", TensorProto.FLOAT, [1, 3, 1, 3])
x1_output = helper.make_tensor_value_info("X1", TensorProto.FLOAT, [1, 3, 1, 3])
x2_output = helper.make_tensor_value_info("X2", TensorProto.FLOAT, [1, 3, 1, 3])
x3_output = helper.make_tensor_value_info("X3", TensorProto.FLOAT, [1, 3, 1, 3])
x4_output = helper.make_tensor_value_info("X4", TensorProto.FLOAT, [1, 3, 1, 3])
x5_output = helper.make_tensor_value_info("X5", TensorProto.FLOAT, [1, 3, 1, 3])
x6_output = helper.make_tensor_value_info("X6", TensorProto.FLOAT, [1, 3, 1, 3])
x1_output = helper.make_tensor_value_info("Relu1Out", TensorProto.FLOAT, [1, 3, 1, 3])
x2_output = helper.make_tensor_value_info("Conv1Out", TensorProto.FLOAT, [1, 3, 1, 3])
x3_output = helper.make_tensor_value_info("Relu2Out", TensorProto.FLOAT, [1, 3, 1, 3])
x4_output = helper.make_tensor_value_info("Conv2Out", TensorProto.FLOAT, [1, 3, 1, 3])
x5_output = helper.make_tensor_value_info("Conv3Out", TensorProto.FLOAT, [1, 3, 1, 3])
x6_output = helper.make_tensor_value_info("AddOut", TensorProto.FLOAT, [1, 3, 1, 3])
w1 = generate_input_initializer([3, 3, 1, 1], np.float32, "W1")
b1 = generate_input_initializer([3], np.float32, "B1")
w3 = generate_input_initializer([3, 3, 1, 1], np.float32, "W3")
b3 = generate_input_initializer([3], np.float32, "B3")
w5 = generate_input_initializer([3, 3, 1, 1], np.float32, "W5")
b5 = generate_input_initializer([3], np.float32, "B5")
relu_node_1 = helper.make_node("Relu", ["input"], ["X1"], name="Relu1")
conv_node_1 = helper.make_node("Conv", ["X1", "W1", "B1"], ["X2"], name="Conv1")
relu_node_2 = helper.make_node("Relu", ["X2"], ["X3"], name="Relu2")
conv_node_2 = helper.make_node("Conv", ["X3", "W3", "B3"], ["X4"], name="Conv2")
conv_node_3 = helper.make_node("Conv", ["X1", "W5", "B5"], ["X5"], name="Conv3")
add_node = helper.make_node("Add", ["X4", "X5"], ["X6"], name="Add")
relu_node_1 = helper.make_node("Relu", ["input"], ["Relu1Out"], name="Relu1")
conv_node_1 = helper.make_node("Conv", ["Relu1Out", "W1", "B1"], ["Conv1Out"], name="Conv1")
relu_node_2 = helper.make_node("Relu", ["Conv1Out"], ["Relu2Out"], name="Relu2")
conv_node_2 = helper.make_node("Conv", ["Relu2Out", "W3", "B3"], ["Conv2Out"], name="Conv2")
conv_node_3 = helper.make_node("Conv", ["Relu1Out", "W5", "B5"], ["Conv3Out"], name="Conv3")
add_node = helper.make_node("Add", ["Conv2Out", "Conv3Out"], ["AddOut"], name="Add")
# we are keeping all tensors in the output anyway for verification purpose
outputs = [x6_output]
if activations_as_outputs:
outputs.extend([x1_output, x2_output, x3_output, x4_output, x5_output])
graph = helper.make_graph(
[relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node],
"test_graph_4",
[input_vi],
[x1_output, x2_output, x3_output, x4_output, x5_output, x6_output],
[relu_node_1, conv_node_1, relu_node_2, conv_node_2, conv_node_3, add_node], "test_graph_4", [input_vi], outputs
)
graph.initializer.add().CopyFrom(w1)
graph.initializer.add().CopyFrom(b1)
@ -108,6 +114,21 @@ class TestDataReader(CalibrationDataReader):
self.preprocess_flag = True
def augment_model_collect_activations(
model_path: str, augmented_model_path: str, data_reader: TestDataReader
) -> Dict[str, List[np.ndarray]]:
aug_model = modify_model_output_intermediate_tensors(model_path)
onnx.save(
aug_model,
augmented_model_path,
save_as_external_data=False,
)
tensor_dict = collect_activations(augmented_model_path, data_reader)
return tensor_dict
class TestSaveActivations(unittest.TestCase):
@classmethod
def setUpClass(cls):
@ -118,20 +139,12 @@ class TestSaveActivations(unittest.TestCase):
cls._tmp_model_dir.cleanup()
def test_saved_tensors_match_internal_tensors(self):
test_model_path = str(Path(self._tmp_model_dir.name) / "augmented_model.onnx")
construct_test_model1(test_model_path)
test_model_path = str(Path(self._tmp_model_dir.name) / "test_model1.onnx")
construct_test_model1(test_model_path, activations_as_outputs=True)
data_reader = TestDataReader()
aug_model = modify_model_output_intermediate_tensors(test_model_path)
augmented_model_path = str(Path(self._tmp_model_dir.name).joinpath("augmented_test_model_1.onnx"))
onnx.save(
aug_model,
augmented_model_path,
save_as_external_data=False,
)
tensor_dict = collect_activations(augmented_model_path, data_reader)
tensor_dict = augment_model_collect_activations(test_model_path, augmented_model_path, data_reader)
# run original model and compare the tensors
sess_options = onnxruntime.SessionOptions()
@ -160,6 +173,48 @@ class TestSaveActivations(unittest.TestCase):
act = actual.reshape(-1)
np.testing.assert_equal(exp, act)
def test_create_activation_matching_present(self):
float_model_path = str(Path(self._tmp_model_dir.name) / "float_model2.onnx")
construct_test_model1(float_model_path, activations_as_outputs=False)
data_reader = TestDataReader()
qdq_model_path = str(Path(self._tmp_model_dir.name) / "qdq_model2.onnx")
quantize_static(
float_model_path,
qdq_model_path,
data_reader,
quant_format=QuantFormat.QDQ,
per_channel=False,
reduce_range=False,
activation_type=QuantType.QInt8,
weight_type=QuantType.QInt8,
)
data_reader.rewind()
augmented_float_model_path = str(Path(self._tmp_model_dir.name).joinpath("augmented_float_model2.onnx"))
float_activations = augment_model_collect_activations(float_model_path, augmented_float_model_path, data_reader)
data_reader.rewind()
augmented_qdq_model_path = str(Path(self._tmp_model_dir.name).joinpath("augmented_qdq_model2.onnx"))
qdq_activations = augment_model_collect_activations(qdq_model_path, augmented_qdq_model_path, data_reader)
compare_dict = create_activation_matching(qdq_activations, float_activations)
# 'Conv1Out' is combined with 'Relu2Out'
tensor_names = [
"Relu1Out",
"Relu2Out",
"Conv2Out",
"Conv3Out",
"AddOut",
]
for tensor_name in tensor_names:
self.assertTrue(compare_dict[tensor_name]["float"])
self.assertTrue(compare_dict[tensor_name]["pre_qdq"])
self.assertTrue(compare_dict[tensor_name]["post_qdq"])
self.assertFalse(compare_dict.get("Conv1Out"))
if __name__ == "__main__":
unittest.main()