diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index e5cc605aa4..7c75f2deb1 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -186,6 +186,11 @@ file(GLOB onnxruntime_python_tools_srcs CONFIGURE_DEPENDS file(GLOB onnxruntime_python_tools_featurizers_src CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/python/tools/featurizer_ops/*.py" ) +file(GLOB onnxruntime_python_quantization_src CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/python/tools/quantization/*.py" +) +list(REMOVE_ITEM onnxruntime_python_quantization_src + "${ONNXRUNTIME_ROOT}/python/tools/quantization/test_calibrate.py") file(GLOB onnxruntime_python_datasets_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/python/datasets/*.py" ) @@ -204,6 +209,7 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/datasets COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/tools COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/tools/featurizer_ops + COMMAND ${CMAKE_COMMAND} -E make_directory $/onnxruntime/quantization COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_ROOT}/__init__.py $/onnxruntime/ @@ -243,6 +249,9 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E copy ${onnxruntime_python_tools_featurizers_src} $/onnxruntime/tools/featurizer_ops/ + COMMAND ${CMAKE_COMMAND} -E copy + ${onnxruntime_python_quantization_src} + $/onnxruntime/quantization/ COMMAND ${CMAKE_COMMAND} -E copy ${REPO_ROOT}/VERSION_NUMBER $ diff --git a/onnxruntime/python/tools/quantization/__init__.py b/onnxruntime/python/tools/quantization/__init__.py new file mode 100644 index 0000000000..97bfa3d69e --- /dev/null +++ b/onnxruntime/python/tools/quantization/__init__.py @@ -0,0 +1,2 @@ +from onnxruntime.quantization.quantize import quantize +from onnxruntime.quantization.quantize import QuantizationMode \ No newline at end of file diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 1458421a2c..c493abbb35 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -295,7 +295,46 @@ class ONNXQuantizer: # Map of all original value names to quantized value names self.quantized_value_map = {} + def replace_gemm_with_matmul(self): + nodes_to_remove = [] + nodes_to_add = [] + for node in self.model.graph.node: + if node.op_type == 'Gemm': + alpha = 1.0 + beta = 1.0 + transA = 0 + transB = 0 + for attr in node.attribute: + if attr.name == 'alpha': + alpha = onnx.helper.get_attribute_value(attr) + elif attr.name == 'beta': + beta = onnx.helper.get_attribute_value(attr) + elif attr.name == 'transA': + transA = onnx.helper.get_attribute_value(attr) + elif attr.name == 'transB': + transB = onnx.helper.get_attribute_value(attr) + if alpha == 1.0 and beta == 1.0 and transA == 0 and transB == 0: + matmul_node = onnx.helper.make_node( + 'MatMul', + [node.input[0], node.input[1]], + [node.output[0]+'_MatMul'], + name=node.output[0]+'_MatMul') + + add_node = onnx.helper.make_node( + 'Add', + inputs=[node.output[0]+'_MatMul', node.input[2]], + outputs=node.output, + name=node.output[0]+'_Add') + + nodes_to_remove.extend([node]) + nodes_to_add.extend([matmul_node, add_node]) + + self.model.graph.node.extend(nodes_to_add) + for node in nodes_to_remove: + self.model.graph.node.remove(node) + def quantize_model(self): + self.replace_gemm_with_matmul() # Create a new topologically sorted list for quantizing a model new_list = [] for node in self.model.graph.node: diff --git a/setup.py b/setup.py index 3cbe88e8c5..072f6d15d5 100644 --- a/setup.py +++ b/setup.py @@ -222,6 +222,7 @@ packages = [ 'onnxruntime.capi.training', 'onnxruntime.datasets', 'onnxruntime.tools', + 'onnxruntime.quantization', ] package_data = {}