mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-06 00:03:22 +00:00
[QNN EP] Clean-up todo for OnnxInputInfo (#18416)
### Description Renames `OnnxInputInfo` struct to `TensorInfo` because this struct can be used for both input and output tensors. ### Motivation and Context Clean up TODO item
This commit is contained in:
parent
5aeed62630
commit
c9d5345c46
9 changed files with 53 additions and 54 deletions
|
|
@ -56,8 +56,8 @@ Status BaseOpBuilder::ProcessInput(QnnModelWrapper& qnn_model_wrapper,
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
OnnxInputInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(input, input_info));
|
||||
TensorInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(input, input_info));
|
||||
|
||||
std::vector<uint8_t> unpacked_tensor;
|
||||
if (input_info.is_initializer) {
|
||||
|
|
@ -126,8 +126,8 @@ Status BaseOpBuilder::ProcessOutputs(QnnModelWrapper& qnn_model_wrapper,
|
|||
for (size_t output_i = 0; output_i < output_count; ++output_i) {
|
||||
const auto& output_name = outputs[output_i].node_arg.Name();
|
||||
|
||||
OnnxInputInfo output_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[output_i], output_info));
|
||||
TensorInfo output_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(outputs[output_i], output_info));
|
||||
|
||||
if (output_info.quant_param.encodingDefinition == QNN_DEFINITION_DEFINED) {
|
||||
ORT_RETURN_IF_ERROR(OverrideOutputQuantParam(qnn_model_wrapper, node_unit, logger, input_names,
|
||||
|
|
|
|||
|
|
@ -251,7 +251,7 @@ class BatchNormOpBuilder : public BaseOpBuilder {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PreprocessMean(const OnnxInputInfo& mean_info,
|
||||
Status PreprocessMean(const TensorInfo& mean_info,
|
||||
const bool is_npu_backend,
|
||||
const uint8_t* mean_raw_ptr,
|
||||
const size_t mean_raw_ptr_length,
|
||||
|
|
@ -273,7 +273,7 @@ class BatchNormOpBuilder : public BaseOpBuilder {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PreprocessStd(const OnnxInputInfo& var_info,
|
||||
Status PreprocessStd(const TensorInfo& var_info,
|
||||
const bool is_npu_backend,
|
||||
const uint8_t* var_raw_ptr,
|
||||
const size_t var_raw_ptr_length,
|
||||
|
|
@ -297,7 +297,7 @@ class BatchNormOpBuilder : public BaseOpBuilder {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PreprocessScale(const OnnxInputInfo& scale_info,
|
||||
Status PreprocessScale(const TensorInfo& scale_info,
|
||||
const bool is_npu_backend,
|
||||
const uint8_t* scale_raw_ptr,
|
||||
const size_t scale_raw_ptr_length,
|
||||
|
|
@ -325,7 +325,7 @@ class BatchNormOpBuilder : public BaseOpBuilder {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status PreprocessBias(const OnnxInputInfo& bias_info,
|
||||
Status PreprocessBias(const TensorInfo& bias_info,
|
||||
const bool is_npu_backend,
|
||||
const uint8_t* bias_raw_ptr,
|
||||
const size_t bias_raw_ptr_length,
|
||||
|
|
@ -354,7 +354,7 @@ class BatchNormOpBuilder : public BaseOpBuilder {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Postprocess(const OnnxInputInfo& info,
|
||||
Status Postprocess(const TensorInfo& info,
|
||||
const bool is_npu_backend,
|
||||
const std::vector<double>& double_tensor,
|
||||
const double rmax,
|
||||
|
|
@ -476,14 +476,14 @@ Status BatchNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
|
|||
{
|
||||
const std::string& scale_name = inputs[1].node_arg.Name();
|
||||
const std::string& bias_name = inputs[2].node_arg.Name();
|
||||
OnnxInputInfo var_info = {};
|
||||
OnnxInputInfo mean_info = {};
|
||||
OnnxInputInfo scale_info = {};
|
||||
OnnxInputInfo bias_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], scale_info));
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[2], bias_info));
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[3], mean_info));
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[4], var_info));
|
||||
TensorInfo var_info = {};
|
||||
TensorInfo mean_info = {};
|
||||
TensorInfo scale_info = {};
|
||||
TensorInfo bias_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], scale_info));
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[2], bias_info));
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[3], mean_info));
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[4], var_info));
|
||||
|
||||
// scale, bias, mean, and var must be initializers
|
||||
ORT_RETURN_IF_NOT(scale_info.is_initializer, "scale must be initializers");
|
||||
|
|
|
|||
|
|
@ -84,8 +84,8 @@ Status ClipOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
|
|||
std::vector<uint8_t> min_val_bytes;
|
||||
|
||||
if (num_inputs > 1 && !inputs[1].node_arg.Name().empty()) {
|
||||
OnnxInputInfo min_input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], min_input_info));
|
||||
TensorInfo min_input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], min_input_info));
|
||||
ORT_RETURN_IF_NOT(min_input_info.qnn_data_type == qnn_data_type,
|
||||
"QNN EP: The 'min' input of the Clip operator must be of type float32.");
|
||||
assert(min_input_info.is_initializer); // Checked by ExplicitOpCheck().
|
||||
|
|
@ -106,8 +106,8 @@ Status ClipOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
|
|||
std::vector<uint8_t> max_val_bytes;
|
||||
|
||||
if (num_inputs > 2 && !inputs[2].node_arg.Name().empty()) {
|
||||
OnnxInputInfo max_input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[2], max_input_info));
|
||||
TensorInfo max_input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[2], max_input_info));
|
||||
ORT_RETURN_IF_NOT(max_input_info.qnn_data_type == qnn_data_type,
|
||||
"QNN EP: The 'max' input of the Clip operator must of type float32.");
|
||||
assert(max_input_info.is_initializer); // Checked by ExplicitOpCheck().
|
||||
|
|
|
|||
|
|
@ -175,8 +175,8 @@ Status ConvOpBuilder::ProcessConv2DInputs(QnnModelWrapper& qnn_model_wrapper,
|
|||
//
|
||||
{
|
||||
const std::string& input1_name = inputs[1].node_arg.Name();
|
||||
OnnxInputInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], input_info));
|
||||
TensorInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input_info));
|
||||
|
||||
std::string actual_name = input_info.is_initializer ? input1_name : input1_name + "_ort_qnn_ep_transpose";
|
||||
input_names.push_back(actual_name);
|
||||
|
|
@ -267,8 +267,8 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper,
|
|||
|
||||
{
|
||||
const std::string& input0_name = inputs[0].node_arg.Name();
|
||||
OnnxInputInfo input0_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input0_info));
|
||||
TensorInfo input0_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info));
|
||||
|
||||
const std::string conv_input0_name = input0_info.is_initializer ? input0_name
|
||||
: input0_name + "_ort_qnn_ep_reshape";
|
||||
|
|
@ -318,8 +318,8 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper,
|
|||
//
|
||||
{
|
||||
const std::string& input1_name = inputs[1].node_arg.Name();
|
||||
OnnxInputInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], input_info));
|
||||
TensorInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input_info));
|
||||
|
||||
std::string conv_weight_input_name = input_info.is_initializer ? input1_name : input1_name + "_ort_qnn_ep_transpose";
|
||||
input_names.push_back(conv_weight_input_name);
|
||||
|
|
|
|||
|
|
@ -94,8 +94,8 @@ Status InstanceNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
|
|||
bool do_op_validation) const {
|
||||
const auto& inputs = node_unit.Inputs();
|
||||
|
||||
OnnxInputInfo input0_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input0_info));
|
||||
TensorInfo input0_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info));
|
||||
|
||||
// HTP backend can only handle rank 3 inputs if the batch size is 1. If the batch size is not 1,
|
||||
// QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4.
|
||||
|
|
@ -168,8 +168,8 @@ Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_m
|
|||
|
||||
const auto& outputs = node_unit.Outputs();
|
||||
|
||||
OnnxInputInfo output_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[0], output_info));
|
||||
TensorInfo output_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(outputs[0], output_info));
|
||||
|
||||
// HTP backend can only handle rank 3 inputs/outputs if the batch size is 1. If the batch size is not 1,
|
||||
// QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4.
|
||||
|
|
|
|||
|
|
@ -67,8 +67,8 @@ Status ProcessConstantValue(QnnModelWrapper& qnn_model_wrapper,
|
|||
std::vector<std::string>& param_tensor_names,
|
||||
const NodeUnit& node_unit,
|
||||
const NodeUnitIODef& input) {
|
||||
OnnxInputInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(input, input_info));
|
||||
TensorInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(input, input_info));
|
||||
std::vector<uint8_t> unpacked_tensor;
|
||||
// Already confirmed constant_value input is initializer in ProcessInputs()
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info.initializer_tensor, unpacked_tensor));
|
||||
|
|
|
|||
|
|
@ -97,8 +97,8 @@ Status SoftmaxOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
|
|||
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
|
||||
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
|
||||
|
||||
OnnxInputInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input_info));
|
||||
TensorInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input_info));
|
||||
const size_t input_rank = input_info.shape.size();
|
||||
|
||||
// If the axis attribute refers to the last dimension, then process the input as normal.
|
||||
|
|
@ -161,8 +161,8 @@ Status SoftmaxOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_
|
|||
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
|
||||
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
|
||||
|
||||
OnnxInputInfo output_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[0], output_info));
|
||||
TensorInfo output_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(outputs[0], output_info));
|
||||
const size_t output_rank = output_info.shape.size();
|
||||
const bool axis_is_last_dim = static_cast<size_t>(axis) == output_rank - 1;
|
||||
|
||||
|
|
|
|||
|
|
@ -365,33 +365,33 @@ bool QnnModelWrapper::ProcessQuantizationParameter(const std::optional<NodeUnitI
|
|||
return true;
|
||||
}
|
||||
|
||||
Status QnnModelWrapper::GetOnnxInputInfo(const NodeUnitIODef& input,
|
||||
OnnxInputInfo& input_info) const {
|
||||
Status QnnModelWrapper::GetTensorInfo(const NodeUnitIODef& input, TensorInfo& tensor_info) const {
|
||||
const std::string& name = input.node_arg.Name();
|
||||
|
||||
// Fill in quantization param info.
|
||||
input_info.quant_param = QNN_QUANTIZE_PARAMS_INIT;
|
||||
tensor_info.quant_param = QNN_QUANTIZE_PARAMS_INIT;
|
||||
bool is_quantized_tensor = input.quant_param.has_value();
|
||||
utils::InitializeQuantizeParam(input_info.quant_param, is_quantized_tensor);
|
||||
utils::InitializeQuantizeParam(tensor_info.quant_param, is_quantized_tensor);
|
||||
|
||||
if (is_quantized_tensor) {
|
||||
ORT_RETURN_IF_NOT(ProcessQuantizationParameter(input.quant_param,
|
||||
input_info.quant_param.scaleOffsetEncoding.scale,
|
||||
input_info.quant_param.scaleOffsetEncoding.offset),
|
||||
tensor_info.quant_param.scaleOffsetEncoding.scale,
|
||||
tensor_info.quant_param.scaleOffsetEncoding.offset),
|
||||
"QNN EP: Cannot get quantization parameters for input ", name.c_str());
|
||||
}
|
||||
|
||||
// Fill in QNN data type.
|
||||
input_info.qnn_data_type = QNN_DATATYPE_FLOAT_32;
|
||||
ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, input.node_arg.TypeAsProto(), input_info.qnn_data_type));
|
||||
tensor_info.qnn_data_type = QNN_DATATYPE_FLOAT_32;
|
||||
ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, input.node_arg.TypeAsProto(),
|
||||
tensor_info.qnn_data_type));
|
||||
|
||||
// Fill in shape.
|
||||
ORT_RETURN_IF_NOT(GetOnnxShape(input.node_arg, input_info.shape), "Cannot get shape");
|
||||
ORT_RETURN_IF_NOT(GetOnnxShape(input.node_arg, tensor_info.shape), "Cannot get shape");
|
||||
|
||||
// Fill in initializer info.
|
||||
input_info.is_initializer = IsInitializerInput(name);
|
||||
if (input_info.is_initializer) {
|
||||
input_info.initializer_tensor = GetInitializerTensors().at(name);
|
||||
tensor_info.is_initializer = IsInitializerInput(name);
|
||||
if (tensor_info.is_initializer) {
|
||||
tensor_info.initializer_tensor = GetInitializerTensors().at(name);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
|||
|
|
@ -18,9 +18,9 @@
|
|||
namespace onnxruntime {
|
||||
namespace qnn {
|
||||
|
||||
// POD struct that stores information about an ONNX input.
|
||||
// Filled out by QnnModelWrapper::GetOnnxInputInfo()
|
||||
struct OnnxInputInfo {
|
||||
// Stores information about an ONNX input or output tensor.
|
||||
// Filled out by QnnModelWrapper::GetTensorInfo()
|
||||
struct TensorInfo {
|
||||
std::vector<uint32_t> shape;
|
||||
Qnn_DataType_t qnn_data_type;
|
||||
Qnn_QuantizeParams_t quant_param;
|
||||
|
|
@ -117,8 +117,7 @@ class QnnModelWrapper {
|
|||
return input_index_map_.find(tensor_name) != input_index_map_.end();
|
||||
}
|
||||
|
||||
// TODO(hecli) rename to GetTensorInfo
|
||||
Status GetOnnxInputInfo(const NodeUnitIODef& input, OnnxInputInfo& input_info) const;
|
||||
Status GetTensorInfo(const NodeUnitIODef& input, TensorInfo& input_info) const;
|
||||
|
||||
Status AddReshapeNode(const std::string& input_name,
|
||||
const std::string& output_name,
|
||||
|
|
|
|||
Loading…
Reference in a new issue