[QNN EP] Support Resize with 'asymmetric' transformation mode on HTP backend (#16060)

### Description
- Adds support for Resize with the `asymmetric` coordinate
transformation mode on the QNN HTP backend.
- Adds unit test that shows this is only correct if the `nearest_mode`
is `"floor"`.

### Motivation and Context
This is needed to enable more models to run on the QNN HTP backend.

Note:
QNN's ONNX converter tool translates an ONNX Resize op with `{mode:
"nearest", coordinate_transformation_mode: "asymmetric", "nearest_mode":
<ANYTHING>}` to QNN's ResizeNearestNeighbor with `{align_corners: 0,
half_pixel: 0}`.
Unit tests show that this is only accurate if the ONNX attribute
nearest_mode is "floor". Need to investigate how to handle other
rounding modes. Ideally, we would use QNN's own Resize operator (instead
of ResizeNearestNeighbor), but that doesn't support the "asymmetric"
coordinate transformation mode on the HTP backend.
This commit is contained in:
Adrian Lizarraga 2023-05-23 16:04:19 -07:00 committed by GitHub
parent 55c3f4b28f
commit 96ee72d7f8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 110 additions and 69 deletions

View file

@ -55,7 +55,7 @@ Status ArgMaxMinOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_mode
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation));
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -165,7 +165,8 @@ Status BaseOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
}
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names), {},
logger, is_quantized_model, do_op_validation));
logger, is_quantized_model, do_op_validation,
GetQnnOpType(node_unit.OpType())));
return Status::OK();
}
@ -175,7 +176,8 @@ Status BaseOpBuilder::ProcessOutputs(QnnModelWrapper& qnn_model_wrapper,
std::vector<std::string>&& param_tensor_names,
const logging::Logger& logger,
bool is_quantized_model,
bool do_op_validation) const {
bool do_op_validation,
const std::string& qnn_op_type) const {
ORT_UNUSED_PARAMETER(logger);
// Add output
// Output part is common for all Ops, only difference is the Op attribute
@ -234,7 +236,7 @@ Status BaseOpBuilder::ProcessOutputs(QnnModelWrapper& qnn_model_wrapper,
ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
qnn_def::package_name,
GetQnnOpType(node_unit.OpType()),
qnn_op_type, // Typically GetQnnOpType(), but can be overridden.
std::move(input_names),
std::move(output_names),
std::move(param_tensor_names),

View file

@ -60,7 +60,8 @@ class BaseOpBuilder : public IOpBuilder {
std::vector<std::string>&& param_tensor_names,
const logging::Logger& logger,
bool is_quantized_model,
bool do_op_validation) const ORT_MUST_USE_RESULT;
bool do_op_validation,
const std::string& qnn_op_type) const ORT_MUST_USE_RESULT;
Status ProcessInput(QnnModelWrapper& qnn_model_wrapper,
const NodeUnitIODef& input,

View file

@ -142,7 +142,7 @@ Status ClipOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation));
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -160,7 +160,7 @@ Status GemmOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
bool is_quantized_model,
bool do_op_validation) const {
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names), {},
logger, is_quantized_model, do_op_validation));
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -113,7 +113,7 @@ Status InstanceNormOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_m
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation));
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -174,7 +174,7 @@ Status LRNOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrap
}
return ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names), std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation);
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType()));
}
void CreateLRNOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {

View file

@ -229,7 +229,7 @@ Status PoolOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation));
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -253,7 +253,7 @@ Status ReduceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation));
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -270,6 +270,17 @@ Status ResizeOpBuilder::ValidateQDQOp(QnnModelWrapper& qnn_model_wrapper, const
const std::string nearest_mode = GetOnnxAttr(node_helper, onnx_nearest_mode_attr);
ORT_RETURN_IF_NOT(ArrayHasString(supported_nearest_modes, nearest_mode),
"QNN EP: Resize does not support nearest_mode ", nearest_mode.c_str());
// TODO: Support 'asymmetric' transformation mode with nearest_mode != 'floor'.
//
// QNN's ONNX converter tool translates 'nearest' + 'asymmetric' (regardless of rounding mode)
// to QNN's ResizeNearestNeighbor with {align_corners: 0, half_pixel: 0}.
// This is only accurate if the rounding mode is "floor". Need to investigate how to handle
// other rounding modes with Qualcomm. Ideally, we would use QNN's Resize operator, but it doesn't support
// the "asymmetric" coordinate transformation mode on HTP.
ORT_RETURN_IF(transformation_mode == "asymmetric" && nearest_mode != "floor",
"QNN EP: Resize with coordinate_transformation_mode 'asymmetric' and nearest_mode '", nearest_mode,
"' is not currently supported on the HTP backend.");
}
// Check that input shape has at least a rank of 3.
@ -356,40 +367,8 @@ Status ResizeOpBuilder::ProcessOpAttrsAndOutputs(QnnModelWrapper& qnn_model_wrap
param_tensor_names.push_back(qnn_half_pixel_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(qnn_half_pixel_param));
const auto& resize_output = node_unit.Outputs()[0];
const auto& output_name = resize_output.node_arg.Name();
Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
InitializeQuantizeParam(quantize_param, false);
const auto* type_proto = resize_output.node_arg.TypeAsProto();
Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
ORT_RETURN_IF_ERROR(GetQnnDataType(false, type_proto, qnn_data_type));
std::vector<uint32_t> output_shape;
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(resize_output.node_arg, output_shape),
"Cannot get shape");
bool is_graph_output = qnn_model_wrapper.IsGraphOutput(output_name);
Qnn_TensorType_t tensor_type = is_graph_output ? QNN_TENSOR_TYPE_APP_READ : QNN_TENSOR_TYPE_NATIVE;
QnnTensorWrapper output_tensorwrapper(output_name,
tensor_type,
qnn_data_type,
quantize_param,
std::move(output_shape));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor.");
ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
qnn_def::package_name,
qnn_node_type,
std::move(input_names),
{output_name},
std::move(param_tensor_names),
do_op_validation),
"Failed to add node.");
return Status::OK();
return ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names), std::move(param_tensor_names),
logger, false, do_op_validation, qnn_node_type);
}
Status ResizeOpBuilder::ProcessQDQOpAttrsAndOutputs(QnnModelWrapper& qnn_model_wrapper,
@ -400,21 +379,39 @@ Status ResizeOpBuilder::ProcessQDQOpAttrsAndOutputs(QnnModelWrapper& qnn_model_w
std::vector<std::string> param_tensor_names;
NodeAttrHelper node_helper(node_unit);
// Parameter 'exclude_outside'
{
Qnn_Scalar_t qnn_exclude_outside = QNN_SCALAR_INIT;
qnn_exclude_outside.dataType = QNN_DATATYPE_BOOL_8;
qnn_exclude_outside.bool8Value = static_cast<uint8_t>(GetOnnxAttr(node_helper, onnx_exclude_outside_attr) != 0);
const std::string interp_mode = GetOnnxAttr(node_helper, onnx_mode_attr);
const std::string transformation_mode = GetOnnxAttr(node_helper, onnx_coord_transf_mode_attr);
std::string qnn_op_type = "Resize";
QnnParamWrapper qnn_exclude_outside_param(node_unit.Index(), node_unit.Name(), qnn_def::exclude_outside,
qnn_exclude_outside);
param_tensor_names.push_back(qnn_exclude_outside_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(qnn_exclude_outside_param));
}
// Handle Resize with {mode: "nearest", coordinate_transformation_mode: "asymmetric"} uniquely.
// QNN's ONNX converter tool translates this configuration (regardless of rounding mode)
// to QNN's ResizeNearestNeighbor with {align_corners: 0, half_pixel: 0}.
//
// NOTE: This is only accurate if the rounding mode is "floor". Need to investigate how to handle
// other rounding modes with Qualcomm. Ideally, we would use QNN's Resize operator, but it doesn't support
// the "asymmetric" coordinate transformation mode on HTP.
if (interp_mode == "nearest" && transformation_mode == "asymmetric") {
qnn_op_type = "ResizeNearestNeighbor";
// Parameter 'transformation_mode'
{
const std::string transformation_mode = GetOnnxAttr(node_helper, onnx_coord_transf_mode_attr);
// Set parameter 'align_corners' to 0
Qnn_Scalar_t qnn_align_corners = QNN_SCALAR_INIT;
qnn_align_corners.dataType = QNN_DATATYPE_BOOL_8;
qnn_align_corners.bool8Value = static_cast<uint8_t>(0);
QnnParamWrapper qnn_align_corners_param(node_unit.Index(), node_unit.Name(),
qnn_def::align_corners, qnn_align_corners);
param_tensor_names.push_back(qnn_align_corners_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(qnn_align_corners_param));
// Set parameter 'half_pixel_centers' to 0
Qnn_Scalar_t qnn_half_pixel = QNN_SCALAR_INIT;
qnn_half_pixel.dataType = QNN_DATATYPE_BOOL_8;
qnn_half_pixel.bool8Value = static_cast<uint8_t>(0);
QnnParamWrapper qnn_half_pixel_param(node_unit.Index(), node_unit.Name(),
qnn_def::half_pixel_centers, qnn_half_pixel);
param_tensor_names.push_back(qnn_half_pixel_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(qnn_half_pixel_param));
} else {
// Parameter 'transformation_mode'
Qnn_Scalar_t qnn_transformation_mode = QNN_SCALAR_INIT;
qnn_transformation_mode.dataType = QNN_DATATYPE_UINT_32;
ORT_RETURN_IF_ERROR(GetQnnModeFromString(supported_coord_transf_modes, transformation_mode,
@ -424,11 +421,18 @@ Status ResizeOpBuilder::ProcessQDQOpAttrsAndOutputs(QnnModelWrapper& qnn_model_w
qnn_transformation_mode);
param_tensor_names.push_back(qnn_transformation_mode_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(qnn_transformation_mode_param));
}
// Parameter 'interpolation_mode'
{
const std::string interp_mode = GetOnnxAttr(node_helper, onnx_mode_attr);
// Parameter 'exclude_outside'
Qnn_Scalar_t qnn_exclude_outside = QNN_SCALAR_INIT;
qnn_exclude_outside.dataType = QNN_DATATYPE_BOOL_8;
qnn_exclude_outside.bool8Value = static_cast<uint8_t>(GetOnnxAttr(node_helper, onnx_exclude_outside_attr) != 0);
QnnParamWrapper qnn_exclude_outside_param(node_unit.Index(), node_unit.Name(), qnn_def::exclude_outside,
qnn_exclude_outside);
param_tensor_names.push_back(qnn_exclude_outside_param.GetParamTensorName());
qnn_model_wrapper.AddParamWrapper(std::move(qnn_exclude_outside_param));
// Parameter 'interpolation_mode'
Qnn_Scalar_t qnn_interp_mode = QNN_SCALAR_INIT;
qnn_interp_mode.dataType = QNN_DATATYPE_UINT_32;
ORT_RETURN_IF_ERROR(GetQnnModeFromString(supported_modes, interp_mode, "mode", qnn_interp_mode.uint32Value));
@ -454,7 +458,7 @@ Status ResizeOpBuilder::ProcessQDQOpAttrsAndOutputs(QnnModelWrapper& qnn_model_w
}
return ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names), std::move(param_tensor_names),
logger, true, do_op_validation);
logger, true, do_op_validation, qnn_op_type);
}
void CreateResizeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {

View file

@ -252,7 +252,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation));
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -217,7 +217,7 @@ Status SliceOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wr
{param_tensor_name},
logger,
is_quantized_model,
do_op_validation));
do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -120,7 +120,7 @@ Status SplitOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wr
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation));
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -85,7 +85,7 @@ Status TileOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
std::move(input_names),
std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation));
logger, is_quantized_model, do_op_validation, GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -112,8 +112,9 @@ Status TopKOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
std::string k_param_name = k_param.GetParamTensorName();
qnn_model_wrapper.AddParamWrapper(std::move(k_param));
std::vector<std::string> param_tensor_names{k_param_name};
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names), std::move(param_tensor_names),
logger, is_quantized_model, do_op_validation));
ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit, std::move(input_names),
std::move(param_tensor_names), logger, is_quantized_model, do_op_validation,
GetQnnOpType(node_unit.OpType())));
return Status::OK();
}

View file

@ -258,6 +258,39 @@ TEST_F(QnnHTPBackendTests, TestQDQU8Resize2xNearestHalfPixelRoundPreferFloor) {
"TestQDQU8Resize2xNearestHalfPixelRoundPreferFloor");
}
TEST_F(QnnHTPBackendTests, TestQDQU8Resize2xNearestAsymmetricFloor) {
RunQDQResizeOpTest<uint8_t>({1, 3, 4, 4}, {1, 3, 8, 8}, "nearest", "asymmetric", "floor",
ExpectedEPNodeAssignment::All, 1e-5f,
"TestQDQU8Resize2xNearestAsymmetricFloor");
}
// TODO: Investigate with Qualcomm. The qnn-onnx-converter tool translates ONNX Resize [nearest, asymmetric, ceil] to
// QNN ResizeNearestNeighbor {align_corners: 0, half_pixel: 0}, which is NOT equivalent. It would be better to use
// QNN's own Resize operator (instead of ResizeNearestNeighbor), but it doesn't support the "asymmetric" coordinate
// transform mode.
//
// Expected: contains 192 values, where each value and its corresponding value in 16-byte object
// <C0-00 00-00 00-00 00-00 40-05 D6-27 BB-01 00-00> are an almost-equal pair
// Actual : 16 - byte object<C0 - 00 00 - 00 00 - 00 00 - 00 40 - 04 E9 - 1B BB - 01 00 - 00>,
// where the value pair(0.15, 0.501) at index #1 don't match, which is 0.351 from 0.15
TEST_F(QnnHTPBackendTests, DISABLED_TestQDQU8Resize2xNearestAsymmetricCeil) {
RunQDQResizeOpTest<uint8_t>({1, 3, 4, 4}, {1, 3, 8, 8}, "nearest", "asymmetric", "ceil",
ExpectedEPNodeAssignment::All, 1e-5f,
"TestQDQU8Resize2xNearestAsymmetricFloor");
}
TEST_F(QnnHTPBackendTests, TestQDQU8Resize3xNearestAsymmetricFloor) {
RunQDQResizeOpTest<uint8_t>({1, 3, 4, 4}, {1, 3, 12, 12}, "nearest", "asymmetric", "floor",
ExpectedEPNodeAssignment::All, 1e-5f,
"TestQDQU8Resize2xNearestAsymmetricFloor");
}
TEST_F(QnnHTPBackendTests, TestQDQU8ResizeHalfNearestAsymmetricFloor) {
RunQDQResizeOpTest<uint8_t>({1, 3, 4, 4}, {1, 3, 2, 2}, "nearest", "asymmetric", "floor",
ExpectedEPNodeAssignment::All, 1e-5f,
"TestQDQU8Resize2xNearestAsymmetricFloor");
}
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
} // namespace test