mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
Update the python wrapper script to support weight sharing case (#22341)
Update the python wrapper script to support weight sharing case ### Description update the script to support json file that from QNN converter or the one extracted from QNN context binary file for the weight sharing scenario
This commit is contained in:
parent
a732f7a4b3
commit
d2a5ee2e5e
1 changed files with 260 additions and 115 deletions
|
|
@ -20,135 +20,158 @@ class QnnTensorStruct:
|
|||
self.dim = []
|
||||
|
||||
|
||||
def is_quantized_data_type(qnn_data_type):
|
||||
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_FIXED_POINT_16
|
||||
return qnn_data_type == 0x0408 or qnn_data_type == 0x0416 or qnn_data_type == 0x0308 or qnn_data_type == 0x0316
|
||||
|
||||
|
||||
def qnn_data_type_to_onnx_data_type(qnn_data_type):
|
||||
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UINT_8
|
||||
if qnn_data_type == 0x0408 or qnn_data_type == 0x0108:
|
||||
return TensorProto.UINT8
|
||||
# QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_UINT_16
|
||||
elif qnn_data_type == 0x0416 or qnn_data_type == 0x0116:
|
||||
return TensorProto.UINT16
|
||||
# QNN_DATATYPE_UFIXED_POINT_32 QNN_DATATYPE_UINT_32
|
||||
elif qnn_data_type == 0x0432 or qnn_data_type == 0x0132:
|
||||
return TensorProto.UINT32
|
||||
# QNN_DATATYPE_UINT_64
|
||||
elif qnn_data_type == 0x0164:
|
||||
return TensorProto.UINT64
|
||||
# QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_INT_8
|
||||
elif qnn_data_type == 0x0308 or qnn_data_type == 0x0008:
|
||||
return TensorProto.INT8
|
||||
# QNN_DATATYPE_FIXED_POINT_16 QNN_DATATYPE_INT_16
|
||||
elif qnn_data_type == 0x0316 or qnn_data_type == 0x0016:
|
||||
return TensorProto.INT16
|
||||
# QNN_DATATYPE_FIXED_POINT_32 QNN_DATATYPE_INT_32
|
||||
elif qnn_data_type == 0x0332 or qnn_data_type == 0x0032:
|
||||
return TensorProto.INT32
|
||||
# QNN_DATATYPE_INT_64
|
||||
elif qnn_data_type == 0x0064:
|
||||
return TensorProto.INT64
|
||||
# QNN_DATATYPE_FLOAT_16
|
||||
elif qnn_data_type == 0x0216:
|
||||
return TensorProto.FLOAT16
|
||||
# QNN_DATATYPE_FLOAT_32
|
||||
elif qnn_data_type == 0x0232:
|
||||
return TensorProto.FLOAT
|
||||
# QNN_DATATYPE_BOOL_8
|
||||
elif qnn_data_type == 0x0508:
|
||||
return TensorProto.BOOL
|
||||
def is_quantized_data_type(qnn_data_type, is_converter_json):
|
||||
if is_converter_json:
|
||||
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_FIXED_POINT_16
|
||||
return qnn_data_type == 0x0408 or qnn_data_type == 0x0416 or qnn_data_type == 0x0308 or qnn_data_type == 0x0316
|
||||
else:
|
||||
return TensorProto.UNDEFINED
|
||||
return (
|
||||
qnn_data_type == "QNN_DATATYPE_UFIXED_POINT_8"
|
||||
or qnn_data_type == "QNN_DATATYPE_UFIXED_POINT_16"
|
||||
or qnn_data_type == "QNN_DATATYPE_FIXED_POINT_8"
|
||||
or qnn_data_type == "QNN_DATATYPE_FIXED_POINT_16"
|
||||
)
|
||||
|
||||
|
||||
def parse_qnn_json_file(qnn_json_file_path, qnn_input_tensor_dic, qnn_output_tensor_dic):
|
||||
with open(qnn_json_file_path) as qnn_json_file:
|
||||
qnn_json = json.load(qnn_json_file)
|
||||
assert "graph" in qnn_json, "QNN converted json file not valid. Can't find graph."
|
||||
assert "tensors" in qnn_json["graph"], "QNN converted json file not valid. Can't find tensors."
|
||||
for qnn_tensor_name, qnn_tensor_attribute in qnn_json["graph"]["tensors"].items():
|
||||
# type:0 - QNN input tensor, type:1 - QNN output tensor
|
||||
assert (
|
||||
"type" in qnn_tensor_attribute
|
||||
and "data_type" in qnn_tensor_attribute
|
||||
and "dims" in qnn_tensor_attribute
|
||||
), "QNN converted json file not valid. Can't find some keys from tensors"
|
||||
def qnn_data_type_to_onnx_data_type(qnn_data_type, is_converter_json):
|
||||
if is_converter_json:
|
||||
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UINT_8
|
||||
if qnn_data_type == 0x0408 or qnn_data_type == 0x0108:
|
||||
return TensorProto.UINT8
|
||||
# QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_UINT_16
|
||||
elif qnn_data_type == 0x0416 or qnn_data_type == 0x0116:
|
||||
return TensorProto.UINT16
|
||||
# QNN_DATATYPE_UFIXED_POINT_32 QNN_DATATYPE_UINT_32
|
||||
elif qnn_data_type == 0x0432 or qnn_data_type == 0x0132:
|
||||
return TensorProto.UINT32
|
||||
# QNN_DATATYPE_UINT_64
|
||||
elif qnn_data_type == 0x0164:
|
||||
return TensorProto.UINT64
|
||||
# QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_INT_8
|
||||
elif qnn_data_type == 0x0308 or qnn_data_type == 0x0008:
|
||||
return TensorProto.INT8
|
||||
# QNN_DATATYPE_FIXED_POINT_16 QNN_DATATYPE_INT_16
|
||||
elif qnn_data_type == 0x0316 or qnn_data_type == 0x0016:
|
||||
return TensorProto.INT16
|
||||
# QNN_DATATYPE_FIXED_POINT_32 QNN_DATATYPE_INT_32
|
||||
elif qnn_data_type == 0x0332 or qnn_data_type == 0x0032:
|
||||
return TensorProto.INT32
|
||||
# QNN_DATATYPE_INT_64
|
||||
elif qnn_data_type == 0x0064:
|
||||
return TensorProto.INT64
|
||||
# QNN_DATATYPE_FLOAT_16
|
||||
elif qnn_data_type == 0x0216:
|
||||
return TensorProto.FLOAT16
|
||||
# QNN_DATATYPE_FLOAT_32
|
||||
elif qnn_data_type == 0x0232:
|
||||
return TensorProto.FLOAT
|
||||
# QNN_DATATYPE_BOOL_8
|
||||
elif qnn_data_type == 0x0508:
|
||||
return TensorProto.BOOL
|
||||
else:
|
||||
return TensorProto.UNDEFINED
|
||||
else:
|
||||
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UINT_8
|
||||
if qnn_data_type == "QNN_DATATYPE_UFIXED_POINT_8" or qnn_data_type == "QNN_DATATYPE_UINT_8":
|
||||
return TensorProto.UINT8
|
||||
# QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_UINT_16
|
||||
elif qnn_data_type == "QNN_DATATYPE_UFIXED_POINT_16" or qnn_data_type == "QNN_DATATYPE_UINT_16":
|
||||
return TensorProto.UINT16
|
||||
# QNN_DATATYPE_UFIXED_POINT_32 QNN_DATATYPE_UINT_32
|
||||
elif qnn_data_type == "QNN_DATATYPE_UFIXED_POINT_32" or qnn_data_type == "QNN_DATATYPE_UINT_32":
|
||||
return TensorProto.UINT32
|
||||
# QNN_DATATYPE_UINT_64
|
||||
elif qnn_data_type == "QNN_DATATYPE_UINT_64":
|
||||
return TensorProto.UINT64
|
||||
# QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_INT_8
|
||||
elif qnn_data_type == "QNN_DATATYPE_FIXED_POINT_8" or qnn_data_type == "QNN_DATATYPE_INT_8":
|
||||
return TensorProto.INT8
|
||||
# QNN_DATATYPE_FIXED_POINT_16 QNN_DATATYPE_INT_16
|
||||
elif qnn_data_type == "QNN_DATATYPE_FIXED_POINT_16" or qnn_data_type == "QNN_DATATYPE_INT_16":
|
||||
return TensorProto.INT16
|
||||
# QNN_DATATYPE_FIXED_POINT_32 QNN_DATATYPE_INT_32
|
||||
elif qnn_data_type == "QNN_DATATYPE_FIXED_POINT_32" or qnn_data_type == "QNN_DATATYPE_INT_32":
|
||||
return TensorProto.INT32
|
||||
# QNN_DATATYPE_INT_64
|
||||
elif qnn_data_type == "QNN_DATATYPE_INT_64":
|
||||
return TensorProto.INT64
|
||||
# QNN_DATATYPE_FLOAT_16
|
||||
elif qnn_data_type == "QNN_DATATYPE_FLOAT_16":
|
||||
return TensorProto.FLOAT16
|
||||
# QNN_DATATYPE_FLOAT_32
|
||||
elif qnn_data_type == "QNN_DATATYPE_FLOAT_32":
|
||||
return TensorProto.FLOAT
|
||||
# QNN_DATATYPE_BOOL_8
|
||||
elif qnn_data_type == "QNN_DATATYPE_BOOL_8":
|
||||
return TensorProto.BOOL
|
||||
else:
|
||||
return TensorProto.UNDEFINED
|
||||
|
||||
# Get all graph inputs
|
||||
if qnn_tensor_attribute["type"] == 0:
|
||||
qnn_tensor = QnnTensorStruct()
|
||||
qnn_tensor.name = qnn_tensor_name
|
||||
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(qnn_tensor_attribute["data_type"])
|
||||
qnn_tensor.is_quantized = is_quantized_data_type(qnn_tensor_attribute["data_type"])
|
||||
qnn_tensor.dim = qnn_tensor_attribute["dims"]
|
||||
if (
|
||||
qnn_tensor_attribute["quant_params"]["definition"] == 1
|
||||
and qnn_tensor_attribute["quant_params"]["encoding"] == 0
|
||||
):
|
||||
qnn_tensor.scale = qnn_tensor_attribute["quant_params"]["scale_offset"]["scale"]
|
||||
qnn_tensor.offset = 0 - qnn_tensor_attribute["quant_params"]["scale_offset"]["offset"]
|
||||
qnn_input_tensor_dic[qnn_tensor_name] = qnn_tensor
|
||||
|
||||
# Get all graph outputs
|
||||
if qnn_tensor_attribute["type"] == 1:
|
||||
qnn_tensor = QnnTensorStruct()
|
||||
qnn_tensor.name = qnn_tensor_name
|
||||
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(qnn_tensor_attribute["data_type"])
|
||||
qnn_tensor.is_quantized = is_quantized_data_type(qnn_tensor_attribute["data_type"])
|
||||
qnn_tensor.dim = qnn_tensor_attribute["dims"]
|
||||
if (
|
||||
qnn_tensor_attribute["quant_params"]["definition"] == 1
|
||||
and qnn_tensor_attribute["quant_params"]["encoding"] == 0
|
||||
):
|
||||
qnn_tensor.scale = qnn_tensor_attribute["quant_params"]["scale_offset"]["scale"]
|
||||
qnn_tensor.offset = 0 - qnn_tensor_attribute["quant_params"]["scale_offset"]["offset"]
|
||||
qnn_output_tensor_dic[qnn_tensor_name] = qnn_tensor
|
||||
def parse_qnn_converter_json_file(qnn_convert_json, qnn_input_tensor_dic, qnn_output_tensor_dic):
|
||||
is_qnn_converter_json = True
|
||||
for qnn_tensor_name, qnn_tensor_attribute in qnn_convert_json["graph"]["tensors"].items():
|
||||
# type:0 - QNN input tensor, type:1 - QNN output tensor
|
||||
assert (
|
||||
"type" in qnn_tensor_attribute and "data_type" in qnn_tensor_attribute and "dims" in qnn_tensor_attribute
|
||||
), "QNN converted json file not valid. Can't find some keys from tensors"
|
||||
|
||||
# Get all graph inputs
|
||||
if qnn_tensor_attribute["type"] == 0:
|
||||
qnn_tensor = QnnTensorStruct()
|
||||
qnn_tensor.name = qnn_tensor_name
|
||||
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(
|
||||
qnn_tensor_attribute["data_type"], is_qnn_converter_json
|
||||
)
|
||||
qnn_tensor.is_quantized = is_quantized_data_type(qnn_tensor_attribute["data_type"], is_qnn_converter_json)
|
||||
qnn_tensor.dim = qnn_tensor_attribute["dims"]
|
||||
if (
|
||||
qnn_tensor_attribute["quant_params"]["definition"] == 1
|
||||
and qnn_tensor_attribute["quant_params"]["encoding"] == 0
|
||||
):
|
||||
qnn_tensor.scale = qnn_tensor_attribute["quant_params"]["scale_offset"]["scale"]
|
||||
qnn_tensor.offset = 0 - qnn_tensor_attribute["quant_params"]["scale_offset"]["offset"]
|
||||
qnn_input_tensor_dic[qnn_tensor_name] = qnn_tensor
|
||||
|
||||
# Get all graph outputs
|
||||
if qnn_tensor_attribute["type"] == 1:
|
||||
qnn_tensor = QnnTensorStruct()
|
||||
qnn_tensor.name = qnn_tensor_name
|
||||
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(
|
||||
qnn_tensor_attribute["data_type"], is_qnn_converter_json
|
||||
)
|
||||
qnn_tensor.is_quantized = is_quantized_data_type(qnn_tensor_attribute["data_type"], is_qnn_converter_json)
|
||||
qnn_tensor.dim = qnn_tensor_attribute["dims"]
|
||||
if (
|
||||
qnn_tensor_attribute["quant_params"]["definition"] == 1
|
||||
and qnn_tensor_attribute["quant_params"]["encoding"] == 0
|
||||
):
|
||||
qnn_tensor.scale = qnn_tensor_attribute["quant_params"]["scale_offset"]["scale"]
|
||||
qnn_tensor.offset = 0 - qnn_tensor_attribute["quant_params"]["scale_offset"]["offset"]
|
||||
qnn_output_tensor_dic[qnn_tensor_name] = qnn_tensor
|
||||
|
||||
assert (
|
||||
len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1
|
||||
), "Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
|
||||
|
||||
# Onnxruntime QNN EP can support context binary file generated by QNN tool chain. However QNN generated context binary file
|
||||
# uses channel last data layout and 8 bits or 16 bits for input and output.
|
||||
# This script gets the QNN model input & output information from QNN converted model_net.json file, compare them with Onnx model
|
||||
# and inserts Cast, Transpose nodes to Onnx model if required
|
||||
def main():
|
||||
parser = ArgumentParser("Generate Onnx model which includes the QNN context binary.")
|
||||
parser.add_argument("-b", "--qnn_bin", help="Required. Path to Qnn context binary file.", required=True, type=str)
|
||||
parser.add_argument(
|
||||
"-q", "--qnn_json", help="Required. Path to Qnn converted model_net.json file.", required=True, type=str
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable_embed_mode",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Set embed_mode=1 which mean embed Qnn context binary into the onnx model. Otherwise, set context binary file path in the onnx model",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse Qnn model_net.json file to get the graph input output information
|
||||
qnn_input_tensor_dic = {}
|
||||
qnn_output_tensor_dic = {}
|
||||
parse_qnn_json_file(args.qnn_json, qnn_input_tensor_dic, qnn_output_tensor_dic)
|
||||
|
||||
if args.disable_embed_mode:
|
||||
ep_cache_context_content = args.qnn_bin
|
||||
ctx_embed_mode = 0
|
||||
else:
|
||||
with open(args.qnn_bin, "rb") as file:
|
||||
ep_cache_context_content = file.read()
|
||||
ctx_embed_mode = 1
|
||||
|
||||
def generate_wrapper_onnx_file(
|
||||
grap_name,
|
||||
model_file_name,
|
||||
qnn_input_tensor_dic,
|
||||
qnn_output_tensor_dic,
|
||||
disable_embed_mode,
|
||||
qnn_ctx_file,
|
||||
quantized_IO,
|
||||
qnn_sdk_version="unknown",
|
||||
):
|
||||
graph_nodes = []
|
||||
ini_list = []
|
||||
value_infos = []
|
||||
|
||||
model_inputs = []
|
||||
for qnn_input in qnn_input_tensor_dic.values():
|
||||
if qnn_input.is_quantized:
|
||||
if qnn_input.is_quantized and not quantized_IO:
|
||||
q_scale_input_name = qnn_input.name + "_scale"
|
||||
q_offset_input_name = qnn_input.name + "_zp"
|
||||
q_scale = helper.make_tensor(q_scale_input_name, TensorProto.FLOAT, [], [qnn_input.scale])
|
||||
|
|
@ -170,13 +193,22 @@ def main():
|
|||
else:
|
||||
model_inputs.append(helper.make_tensor_value_info(qnn_input.name, qnn_input.onnx_data_type, qnn_input.dim))
|
||||
|
||||
if disable_embed_mode:
|
||||
ep_cache_context_content = qnn_ctx_file
|
||||
ctx_embed_mode = 0
|
||||
else:
|
||||
with open(qnn_ctx_file, "rb") as file:
|
||||
ep_cache_context_content = file.read()
|
||||
ctx_embed_mode = 1
|
||||
|
||||
qnn_ep_context_node = helper.make_node(
|
||||
"EPContext",
|
||||
name="QnnContext",
|
||||
name=grap_name,
|
||||
inputs=qnn_input_tensor_dic.keys(),
|
||||
outputs=qnn_output_tensor_dic.keys(),
|
||||
ep_cache_context=ep_cache_context_content,
|
||||
embed_mode=ctx_embed_mode,
|
||||
ep_sdk_version=qnn_sdk_version,
|
||||
source="Qnn",
|
||||
domain="com.microsoft",
|
||||
)
|
||||
|
|
@ -184,7 +216,7 @@ def main():
|
|||
|
||||
model_outputs = []
|
||||
for qnn_output in qnn_output_tensor_dic.values():
|
||||
if qnn_output.is_quantized:
|
||||
if qnn_output.is_quantized and not quantized_IO:
|
||||
dq_scale_input_name = qnn_output.name + "_scale"
|
||||
dq_offset_input_name = qnn_output.name + "_zp"
|
||||
dq_scale = helper.make_tensor(dq_scale_input_name, TensorProto.FLOAT, [], [qnn_output.scale])
|
||||
|
|
@ -214,7 +246,120 @@ def main():
|
|||
|
||||
model_def = helper.make_model(graph_def, producer_name="MS")
|
||||
|
||||
onnx.save(model_def, args.qnn_json.replace(".json", "_qnn_ctx.onnx"))
|
||||
onnx.save(model_def, model_file_name)
|
||||
|
||||
|
||||
# parse Qnn graph from the json file that extracted from context binary file
|
||||
def parse_qnn_graph(qnn_graph, qnn_input_tensor_dic, qnn_output_tensor_dic):
|
||||
is_qnn_converter_json = False
|
||||
graph_name = qnn_graph["info"]["graphName"]
|
||||
raw_inputs = qnn_graph["info"]["graphInputs"]
|
||||
raw_outputs = qnn_graph["info"]["graphOutputs"]
|
||||
|
||||
for raw_input in raw_inputs:
|
||||
tensor_info = raw_input["info"]
|
||||
qnn_tensor = QnnTensorStruct()
|
||||
qnn_tensor.name = tensor_info["name"]
|
||||
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(tensor_info["dataType"], is_qnn_converter_json)
|
||||
qnn_tensor.is_quantized = is_quantized_data_type(tensor_info["dataType"], is_qnn_converter_json)
|
||||
qnn_tensor.dim = tensor_info["dimensions"]
|
||||
if (
|
||||
tensor_info["quantizeParams"]["definition"] == "QNN_DEFINITION_DEFINED"
|
||||
and tensor_info["quantizeParams"]["quantizationEncoding"] == "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET"
|
||||
):
|
||||
qnn_tensor.scale = tensor_info["quantizeParams"]["scaleOffset"]["scale"]
|
||||
qnn_tensor.offset = 0 - tensor_info["quantizeParams"]["scaleOffset"]["offset"]
|
||||
qnn_input_tensor_dic[qnn_tensor.name] = qnn_tensor
|
||||
|
||||
for raw_output in raw_outputs:
|
||||
tensor_info = raw_output["info"]
|
||||
qnn_tensor = QnnTensorStruct()
|
||||
qnn_tensor.name = tensor_info["name"]
|
||||
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(tensor_info["dataType"], is_qnn_converter_json)
|
||||
qnn_tensor.is_quantized = is_quantized_data_type(tensor_info["dataType"], is_qnn_converter_json)
|
||||
qnn_tensor.dim = tensor_info["dimensions"]
|
||||
if (
|
||||
tensor_info["quantizeParams"]["definition"] == "QNN_DEFINITION_DEFINED"
|
||||
and tensor_info["quantizeParams"]["quantizationEncoding"] == "QNN_QUANTIZATION_ENCODING_SCALE_OFFSET"
|
||||
):
|
||||
qnn_tensor.scale = tensor_info["quantizeParams"]["scaleOffset"]["scale"]
|
||||
qnn_tensor.offset = 0 - tensor_info["quantizeParams"]["scaleOffset"]["offset"]
|
||||
qnn_output_tensor_dic[qnn_tensor.name] = qnn_tensor
|
||||
|
||||
assert (
|
||||
len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1
|
||||
), "Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
|
||||
return graph_name
|
||||
|
||||
|
||||
# Onnxruntime QNN EP can support context binary file generated by QNN tool chain. However QNN generated context binary file
|
||||
# uses channel last data layout and 8 bits or 16 bits for input and output.
|
||||
# This script gets the QNN model input & output information from QNN converted model_net.json file, compare them with Onnx model
|
||||
# and inserts Cast, Transpose nodes to Onnx model if required
|
||||
def main():
|
||||
parser = ArgumentParser("Generate Onnx model which includes the QNN context binary.")
|
||||
parser.add_argument("-b", "--qnn_bin", help="Required. Path to Qnn context binary file.", required=True, type=str)
|
||||
parser.add_argument(
|
||||
"-q", "--qnn_json", help="Required. Path to Qnn converted model_net.json file.", required=True, type=str
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable_embed_mode",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Set embed_mode=1 which mean embed Qnn context binary into the onnx model. Otherwise, set context binary file path in the onnx model",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--quantized_IO",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="QNN converted context binary use quantized data as graph inputs and outputs. Will keep it if quantized_IO=True, otherwise, will insert Q and DQ nodes accordingly to make the graph inputs & outputs as float32 data type.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse Qnn model_net.json file to get the graph input output information
|
||||
|
||||
with open(args.qnn_json) as qnn_json_file:
|
||||
qnn_json_obj = json.load(qnn_json_file)
|
||||
if "graph" in qnn_json_obj and "tensors" in qnn_json_obj["graph"]:
|
||||
print("This json file is from Qnn converter")
|
||||
qnn_input_tensor_dic = {}
|
||||
qnn_output_tensor_dic = {}
|
||||
parse_qnn_converter_json_file(qnn_json_obj, qnn_input_tensor_dic, qnn_output_tensor_dic)
|
||||
|
||||
generate_wrapper_onnx_file(
|
||||
"QnnContext",
|
||||
args.qnn_json.replace(".json", "_qnn_ctx.onnx"),
|
||||
qnn_input_tensor_dic,
|
||||
qnn_output_tensor_dic,
|
||||
args.disable_embed_mode,
|
||||
args.qnn_bin,
|
||||
args.quantized_IO,
|
||||
)
|
||||
elif "info" in qnn_json_obj and "graphs" in qnn_json_obj["info"]:
|
||||
print("This json file is extracted from QNN context binary file")
|
||||
qnn_version = qnn_json_obj["info"]["buildId"]
|
||||
for qnn_graph in qnn_json_obj["info"]["graphs"]:
|
||||
qnn_input_tensor_dic = {}
|
||||
qnn_output_tensor_dic = {}
|
||||
graph_name = parse_qnn_graph(qnn_graph, qnn_input_tensor_dic, qnn_output_tensor_dic)
|
||||
|
||||
ctx_file_name = graph_name + "_qnn_ctx.onnx"
|
||||
if not args.quantized_IO:
|
||||
ctx_file_name = ctx_file_name.replace(".onnx", "_fp32_io.onnx")
|
||||
|
||||
generate_wrapper_onnx_file(
|
||||
graph_name,
|
||||
ctx_file_name,
|
||||
qnn_input_tensor_dic,
|
||||
qnn_output_tensor_dic,
|
||||
args.disable_embed_mode,
|
||||
args.qnn_bin,
|
||||
args.quantized_IO,
|
||||
qnn_version,
|
||||
)
|
||||
else:
|
||||
print("json file unrecoginized.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
Loading…
Reference in a new issue