[QNN EP] Clean-up todo for OnnxInputInfo (#18416)

### Description
Renames `OnnxInputInfo` struct to `TensorInfo` because this struct can
be used for both input and output tensors.

### Motivation and Context
Clean up TODO item
This commit is contained in:
Adrian Lizarraga 2023-11-14 08:14:40 -08:00 committed by GitHub
parent 5aeed62630
commit c9d5345c46
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 53 additions and 54 deletions

View file

@ -56,8 +56,8 @@ Status BaseOpBuilder::ProcessInput(QnnModelWrapper& qnn_model_wrapper,
return Status::OK();
}
OnnxInputInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(input, input_info));
TensorInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(input, input_info));
std::vector<uint8_t> unpacked_tensor;
if (input_info.is_initializer) {
@ -126,8 +126,8 @@ Status BaseOpBuilder::ProcessOutputs(QnnModelWrapper& qnn_model_wrapper,
for (size_t output_i = 0; output_i < output_count; ++output_i) {
const auto& output_name = outputs[output_i].node_arg.Name();
OnnxInputInfo output_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[output_i], output_info));
TensorInfo output_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(outputs[output_i], output_info));
if (output_info.quant_param.encodingDefinition == QNN_DEFINITION_DEFINED) {
ORT_RETURN_IF_ERROR(OverrideOutputQuantParam(qnn_model_wrapper, node_unit, logger, input_names,

View file

@ -251,7 +251,7 @@ class BatchNormOpBuilder : public BaseOpBuilder {
return Status::OK();
}
Status PreprocessMean(const OnnxInputInfo& mean_info,
Status PreprocessMean(const TensorInfo& mean_info,
const bool is_npu_backend,
const uint8_t* mean_raw_ptr,
const size_t mean_raw_ptr_length,
@ -273,7 +273,7 @@ class BatchNormOpBuilder : public BaseOpBuilder {
return Status::OK();
}
Status PreprocessStd(const OnnxInputInfo& var_info,
Status PreprocessStd(const TensorInfo& var_info,
const bool is_npu_backend,
const uint8_t* var_raw_ptr,
const size_t var_raw_ptr_length,
@ -297,7 +297,7 @@ class BatchNormOpBuilder : public BaseOpBuilder {
return Status::OK();
}
Status PreprocessScale(const OnnxInputInfo& scale_info,
Status PreprocessScale(const TensorInfo& scale_info,
const bool is_npu_backend,
const uint8_t* scale_raw_ptr,
const size_t scale_raw_ptr_length,
@ -325,7 +325,7 @@ class BatchNormOpBuilder : public BaseOpBuilder {
return Status::OK();
}
Status PreprocessBias(const OnnxInputInfo& bias_info,
Status PreprocessBias(const TensorInfo& bias_info,
const bool is_npu_backend,
const uint8_t* bias_raw_ptr,
const size_t bias_raw_ptr_length,
@ -354,7 +354,7 @@ class BatchNormOpBuilder : public BaseOpBuilder {
return Status::OK();
}
Status Postprocess(const OnnxInputInfo& info,
Status Postprocess(const TensorInfo& info,
const bool is_npu_backend,
const std::vector<double>& double_tensor,
const double rmax,
@ -476,14 +476,14 @@ Status BatchNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
{
const std::string& scale_name = inputs[1].node_arg.Name();
const std::string& bias_name = inputs[2].node_arg.Name();
OnnxInputInfo var_info = {};
OnnxInputInfo mean_info = {};
OnnxInputInfo scale_info = {};
OnnxInputInfo bias_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], scale_info));
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[2], bias_info));
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[3], mean_info));
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[4], var_info));
TensorInfo var_info = {};
TensorInfo mean_info = {};
TensorInfo scale_info = {};
TensorInfo bias_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], scale_info));
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[2], bias_info));
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[3], mean_info));
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[4], var_info));
// scale, bias, mean, and var must be initializers
ORT_RETURN_IF_NOT(scale_info.is_initializer, "scale must be initializers");

View file

@ -84,8 +84,8 @@ Status ClipOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
std::vector<uint8_t> min_val_bytes;
if (num_inputs > 1 && !inputs[1].node_arg.Name().empty()) {
OnnxInputInfo min_input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], min_input_info));
TensorInfo min_input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], min_input_info));
ORT_RETURN_IF_NOT(min_input_info.qnn_data_type == qnn_data_type,
"QNN EP: The 'min' input of the Clip operator must be of type float32.");
assert(min_input_info.is_initializer); // Checked by ExplicitOpCheck().
@ -106,8 +106,8 @@ Status ClipOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
std::vector<uint8_t> max_val_bytes;
if (num_inputs > 2 && !inputs[2].node_arg.Name().empty()) {
OnnxInputInfo max_input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[2], max_input_info));
TensorInfo max_input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[2], max_input_info));
ORT_RETURN_IF_NOT(max_input_info.qnn_data_type == qnn_data_type,
"QNN EP: The 'max' input of the Clip operator must of type float32.");
assert(max_input_info.is_initializer); // Checked by ExplicitOpCheck().

View file

@ -175,8 +175,8 @@ Status ConvOpBuilder::ProcessConv2DInputs(QnnModelWrapper& qnn_model_wrapper,
//
{
const std::string& input1_name = inputs[1].node_arg.Name();
OnnxInputInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], input_info));
TensorInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input_info));
std::string actual_name = input_info.is_initializer ? input1_name : input1_name + "_ort_qnn_ep_transpose";
input_names.push_back(actual_name);
@ -267,8 +267,8 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper,
{
const std::string& input0_name = inputs[0].node_arg.Name();
OnnxInputInfo input0_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input0_info));
TensorInfo input0_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info));
const std::string conv_input0_name = input0_info.is_initializer ? input0_name
: input0_name + "_ort_qnn_ep_reshape";
@ -318,8 +318,8 @@ Status ConvOpBuilder::ProcessConv1DInputs(QnnModelWrapper& qnn_model_wrapper,
//
{
const std::string& input1_name = inputs[1].node_arg.Name();
OnnxInputInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[1], input_info));
TensorInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[1], input_info));
std::string conv_weight_input_name = input_info.is_initializer ? input1_name : input1_name + "_ort_qnn_ep_transpose";
input_names.push_back(conv_weight_input_name);

View file

@ -94,8 +94,8 @@ Status InstanceNormOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
bool do_op_validation) const {
const auto& inputs = node_unit.Inputs();
OnnxInputInfo input0_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input0_info));
TensorInfo input0_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input0_info));
// HTP backend can only handle rank 3 inputs if the batch size is 1. If the batch size is not 1,
// QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4.
@ -168,8 +168,8 @@ Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_m
const auto& outputs = node_unit.Outputs();
OnnxInputInfo output_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[0], output_info));
TensorInfo output_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(outputs[0], output_info));
// HTP backend can only handle rank 3 inputs/outputs if the batch size is 1. If the batch size is not 1,
// QNN EP must reshape the input and output to (N, 1, W, C) and process the InstanceNorm as rank 4.

View file

@ -67,8 +67,8 @@ Status ProcessConstantValue(QnnModelWrapper& qnn_model_wrapper,
std::vector<std::string>& param_tensor_names,
const NodeUnit& node_unit,
const NodeUnitIODef& input) {
OnnxInputInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(input, input_info));
TensorInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(input, input_info));
std::vector<uint8_t> unpacked_tensor;
// Already confirmed constant_value input is initializer in ProcessInputs()
ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(*input_info.initializer_tensor, unpacked_tensor));

View file

@ -97,8 +97,8 @@ Status SoftmaxOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
OnnxInputInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input_info));
TensorInfo input_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(inputs[0], input_info));
const size_t input_rank = input_info.shape.size();
// If the axis attribute refers to the last dimension, then process the input as normal.
@ -161,8 +161,8 @@ Status SoftmaxOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
OnnxInputInfo output_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[0], output_info));
TensorInfo output_info = {};
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(outputs[0], output_info));
const size_t output_rank = output_info.shape.size();
const bool axis_is_last_dim = static_cast<size_t>(axis) == output_rank - 1;

View file

@ -365,33 +365,33 @@ bool QnnModelWrapper::ProcessQuantizationParameter(const std::optional<NodeUnitI
return true;
}
Status QnnModelWrapper::GetOnnxInputInfo(const NodeUnitIODef& input,
OnnxInputInfo& input_info) const {
Status QnnModelWrapper::GetTensorInfo(const NodeUnitIODef& input, TensorInfo& tensor_info) const {
const std::string& name = input.node_arg.Name();
// Fill in quantization param info.
input_info.quant_param = QNN_QUANTIZE_PARAMS_INIT;
tensor_info.quant_param = QNN_QUANTIZE_PARAMS_INIT;
bool is_quantized_tensor = input.quant_param.has_value();
utils::InitializeQuantizeParam(input_info.quant_param, is_quantized_tensor);
utils::InitializeQuantizeParam(tensor_info.quant_param, is_quantized_tensor);
if (is_quantized_tensor) {
ORT_RETURN_IF_NOT(ProcessQuantizationParameter(input.quant_param,
input_info.quant_param.scaleOffsetEncoding.scale,
input_info.quant_param.scaleOffsetEncoding.offset),
tensor_info.quant_param.scaleOffsetEncoding.scale,
tensor_info.quant_param.scaleOffsetEncoding.offset),
"QNN EP: Cannot get quantization parameters for input ", name.c_str());
}
// Fill in QNN data type.
input_info.qnn_data_type = QNN_DATATYPE_FLOAT_32;
ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, input.node_arg.TypeAsProto(), input_info.qnn_data_type));
tensor_info.qnn_data_type = QNN_DATATYPE_FLOAT_32;
ORT_RETURN_IF_ERROR(utils::GetQnnDataType(is_quantized_tensor, input.node_arg.TypeAsProto(),
tensor_info.qnn_data_type));
// Fill in shape.
ORT_RETURN_IF_NOT(GetOnnxShape(input.node_arg, input_info.shape), "Cannot get shape");
ORT_RETURN_IF_NOT(GetOnnxShape(input.node_arg, tensor_info.shape), "Cannot get shape");
// Fill in initializer info.
input_info.is_initializer = IsInitializerInput(name);
if (input_info.is_initializer) {
input_info.initializer_tensor = GetInitializerTensors().at(name);
tensor_info.is_initializer = IsInitializerInput(name);
if (tensor_info.is_initializer) {
tensor_info.initializer_tensor = GetInitializerTensors().at(name);
}
return Status::OK();

View file

@ -18,9 +18,9 @@
namespace onnxruntime {
namespace qnn {
// POD struct that stores information about an ONNX input.
// Filled out by QnnModelWrapper::GetOnnxInputInfo()
struct OnnxInputInfo {
// Stores information about an ONNX input or output tensor.
// Filled out by QnnModelWrapper::GetTensorInfo()
struct TensorInfo {
std::vector<uint32_t> shape;
Qnn_DataType_t qnn_data_type;
Qnn_QuantizeParams_t quant_param;
@ -117,8 +117,7 @@ class QnnModelWrapper {
return input_index_map_.find(tensor_name) != input_index_map_.end();
}
// TODO(hecli) rename to GetTensorInfo
Status GetOnnxInputInfo(const NodeUnitIODef& input, OnnxInputInfo& input_info) const;
Status GetTensorInfo(const NodeUnitIODef& input, TensorInfo& input_info) const;
Status AddReshapeNode(const std::string& input_name,
const std::string& output_name,