[QNN EP]Fix issue in LeakyRelu Opbuilder for HTP backend. (#15356)

### Description
Fix issue in LeakyRelu Opbuilder for HTP backend.
Qnn Prelu(Onnx LeakyRelu) requires alpha data as the 2nd input while
Onnx set it as attribute. HTP backend requires input to be quantized. It
caused Qnn Op validation failed by setting the 2ns input as float32 data
type.
Fix:
Need to set the 2nd input as quantized input for HTP backend. Calculate
the quantization parameter and quantize the alpha data into uint8.

### Motivation and Context
Unblock models with the LeakyRelu execution on QualComm HTP backend.
This commit is contained in:
Hector Li 2023-04-07 09:15:07 -07:00 committed by GitHub
parent 16f5909f2d
commit bb21031cbb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 120 additions and 14 deletions

View file

@ -7,6 +7,7 @@
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
#include "core/providers/qnn/builder/op_builder_factory.h"
#include "core/common/safeint.h"
#include "core/util/qmath.h"
#include "base_op_builder.h"
@ -34,7 +35,8 @@ class SimpleOpBuilder : public BaseOpBuilder {
std::vector<std::string>& param_tensor_names) const;
Status ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const std::string input_name) const;
const std::string input_name,
bool is_quantized_model) const;
Status HandleSingleTransposeNode(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
@ -90,18 +92,33 @@ Status SimpleOpBuilder::ProcessPermAttribute(QnnModelWrapper& qnn_model_wrapper,
return Status::OK();
}
Status SimpleOpBuilder::ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, const std::string input_name) const {
Status SimpleOpBuilder::ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const std::string input_name,
bool is_quantized_model) const {
NodeAttrHelper node_helper(node_unit);
Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
union {
float alpha;
uint8_t unpack[sizeof(float)];
} tensor_data;
tensor_data.alpha = node_helper.Get("alpha", 0.01f);
std::vector<uint8_t> unpacked_data(tensor_data.unpack, tensor_data.unpack + sizeof(float));
Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
InitializeQuantizeParam(quantize_param, false);
std::vector<uint8_t> unpacked_data;
if (is_quantized_model) {
float scale;
uint8_t zero_point;
int64_t num_of_elements = 1;
concurrency::ThreadPool* thread_pool = nullptr;
GetQuantizationParameter(&tensor_data.alpha, num_of_elements, scale, zero_point, thread_pool);
unpacked_data.resize(1);
ParQuantizeLinear(&tensor_data.alpha, unpacked_data.data(), num_of_elements, scale, zero_point, thread_pool);
InitializeQuantizeParam(quantize_param, is_quantized_model, scale, static_cast<int32_t>(zero_point));
qnn_data_type = QNN_DATATYPE_UFIXED_POINT_8;
} else {
unpacked_data.assign(tensor_data.unpack, tensor_data.unpack + sizeof(float));
}
std::vector<uint32_t> input_shape{1};
Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
Qnn_TensorType_t tensor_type = QNN_TENSOR_TYPE_STATIC;
QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, quantize_param,
std::move(input_shape), std::move(unpacked_data));
@ -205,7 +222,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
if (node_unit.OpType() == "LeakyRelu") {
std::string input_name = "alpha";
ORT_RETURN_IF_ERROR(ProcessAlphaAttribute(qnn_model_wrapper, node_unit, input_name));
ORT_RETURN_IF_ERROR(ProcessAlphaAttribute(qnn_model_wrapper, node_unit, input_name, is_quantized_model));
input_names.push_back(input_name);
}

View file

@ -214,9 +214,6 @@ GetQDQTestCaseFn BuildQDQGatherOpTestCase(const std::vector<int64_t>& input_shap
// input_data -> Q/DQ ->
auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, .003f, 1);
std::vector<NodeArg*> gather_op_inputs;
gather_op_inputs.push_back(input_qdq_output);
auto* indices_input = builder.MakeInitializer<IndicesType>(indices_shape, indices);
auto* gather_output = builder.MakeIntermediate();
@ -250,9 +247,6 @@ GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector<int64_t
// input_data -> Q/DQ ->
auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, .003f, 1);
std::vector<NodeArg*> gather_op_inputs;
gather_op_inputs.push_back(input_qdq_output);
auto* indices_input = builder.MakeScalarInitializer<IndicesType>(indices);
auto* gather_output = builder.MakeIntermediate();
@ -269,6 +263,35 @@ GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector<int64_t
};
}
// Creates the following graph:
// _______________________
// | |
// input (f32) -> Q -> DQ -> | LeakyRelu | -> Q -> DQ -> output (f32)
// |_______________________|
//
template <typename QuantType>
GetQDQTestCaseFn BuildQDQLeakyReluOpTestCase(const std::vector<int64_t>& input_shape) {
return [input_shape](ModelTestBuilder& builder) {
auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
auto* final_output = builder.MakeOutput();
// input_data -> Q/DQ ->
auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, 0.0473f, 137);
auto* leakyrelu_output = builder.MakeIntermediate();
Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input_qdq_output}, {leakyrelu_output});
leakyrelu_node.AddAttribute("alpha", 0.2f);
// -> Q/DQ -> final_output
auto* q_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<QuantType>(leakyrelu_output, 0.02696f, 48,
q_output);
builder.AddDequantizeLinearNode<QuantType>(q_output, 0.02696f, 48,
final_output);
};
}
template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
return [input_shape, weights_shape](ModelTestBuilder& builder) {

View file

@ -19,9 +19,9 @@ namespace test {
* Runs a Gather op model on the QNN HTP backend. Checks the graph node assignment, and that inference
* outputs for QNN and CPU match.
*
* \param op_type The Gather op type (e.g., ReduceSum).
* \param opset The opset version.
* \param test_description Description of the test for error reporting.
* \param scalar_indices whether the incidices input is scalar or not.
* \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None)
*/
template <typename QuantType, typename IndicesType>

View file

@ -0,0 +1,66 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#if !defined(ORT_MINIMAL_BUILD)
#include <string>
#include "core/graph/graph.h"
#include "test/optimizer/qdq_test_utils.h"
#include "test/providers/qnn/qnn_test_utils.h"
#include "gtest/gtest.h"
namespace onnxruntime {
namespace test {
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
/**
* Runs a LeakyRelu op model on the QNN HTP backend. Checks the graph node assignment, and that inference
* outputs for QNN and CPU match.
*
* \param op_type The LeakyRelu op type (e.g., ReduceSum).
* \param opset The opset version.
* \param test_description Description of the test for error reporting.
* \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None)
*/
template <typename QuantType>
static void RunLeakyReluOpQDQTest(int opset, const char* test_description,
ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All) {
ProviderOptions provider_options;
#if defined(_WIN32)
provider_options["backend_path"] = "QnnHtp.dll";
#else
provider_options["backend_path"] = "libQnnHtp.so";
#endif
constexpr int expected_nodes_in_partition = 1;
RunQnnModelTest(BuildQDQLeakyReluOpTestCase<QuantType>({2, 3, 4}),
provider_options,
opset,
expected_ep_assignment,
expected_nodes_in_partition,
test_description);
}
// Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all
// nodes are supported by the QNN EP, and that the inference results match the CPU EP results.
//
// - Uses uint8 as the quantization type.
TEST_F(QnnHTPBackendTests, TestQDQLeakyReluOpSet15) {
RunLeakyReluOpQDQTest<uint8_t>(15, "TestQDQLeakyReluOpSet15");
}
// Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all
// nodes are supported by the QNN EP, and that the inference results match the CPU EP results.
//
// - Uses uint8 as the quantization type.
TEST_F(QnnHTPBackendTests, TestQDQLeakyReluOpSet16) {
RunLeakyReluOpQDQTest<uint8_t>(16, "TestQDQLeakyReluOpSet16");
}
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
} // namespace test
} // namespace onnxruntime
#endif