[QNN EP]Fix issue in LeakyRelu Opbuilder for HTP backend. (#15356)

### Description Fix issue in LeakyRelu Opbuilder for HTP backend. Qnn Prelu(Onnx LeakyRelu) requires alpha data as the 2nd input while Onnx set it as attribute. HTP backend requires input to be quantized. It caused Qnn Op validation failed by setting the 2ns input as float32 data type. Fix: Need to set the 2nd input as quantized input for HTP backend. Calculate the quantization parameter and quantize the alpha data into uint8. ### Motivation and Context Unblock models with the LeakyRelu execution on QualComm HTP backend.
2026-07-13 18:08:13 +00:00 · 2023-04-07 09:15:07 -07:00 · 2023-04-07 09:15:07 -07:00 · bb21031cbb
commit bb21031cbb
parent 16f5909f2d
4 changed files with 120 additions and 14 deletions
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
@ -7,6 +7,7 @@
 #include "core/providers/qnn/builder/qnn_model_wrapper.h"
 #include "core/providers/qnn/builder/op_builder_factory.h"
 #include "core/common/safeint.h"
+#include "core/util/qmath.h"

 #include "base_op_builder.h"

@ -34,7 +35,8 @@ class SimpleOpBuilder : public BaseOpBuilder {
                              std::vector<std::string>& param_tensor_names) const;
  Status ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper,
                               const NodeUnit& node_unit,
-                               const std::string input_name) const;
+                               const std::string input_name,
+                               bool is_quantized_model) const;
  Status HandleSingleTransposeNode(QnnModelWrapper& qnn_model_wrapper,
                                   const NodeUnit& node_unit,
                                   std::vector<std::string>&& input_names,
@ -90,18 +92,33 @@ Status SimpleOpBuilder::ProcessPermAttribute(QnnModelWrapper& qnn_model_wrapper,
  return Status::OK();
 }

-Status SimpleOpBuilder::ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, const std::string input_name) const {
+Status SimpleOpBuilder::ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper,
+                                              const NodeUnit& node_unit,
+                                              const std::string input_name,
+                                              bool is_quantized_model) const {
  NodeAttrHelper node_helper(node_unit);
+  Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
+  Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
  union {
    float alpha;
    uint8_t unpack[sizeof(float)];
  } tensor_data;
  tensor_data.alpha = node_helper.Get("alpha", 0.01f);
-  std::vector<uint8_t> unpacked_data(tensor_data.unpack, tensor_data.unpack + sizeof(float));
-  Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
-  InitializeQuantizeParam(quantize_param, false);
+  std::vector<uint8_t> unpacked_data;
+  if (is_quantized_model) {
+    float scale;
+    uint8_t zero_point;
+    int64_t num_of_elements = 1;
+    concurrency::ThreadPool* thread_pool = nullptr;
+    GetQuantizationParameter(&tensor_data.alpha, num_of_elements, scale, zero_point, thread_pool);
+    unpacked_data.resize(1);
+    ParQuantizeLinear(&tensor_data.alpha, unpacked_data.data(), num_of_elements, scale, zero_point, thread_pool);
+    InitializeQuantizeParam(quantize_param, is_quantized_model, scale, static_cast<int32_t>(zero_point));
+    qnn_data_type = QNN_DATATYPE_UFIXED_POINT_8;
+  } else {
+    unpacked_data.assign(tensor_data.unpack, tensor_data.unpack + sizeof(float));
+  }
  std::vector<uint32_t> input_shape{1};
-  Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
  Qnn_TensorType_t tensor_type = QNN_TENSOR_TYPE_STATIC;
  QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, quantize_param,
                                       std::move(input_shape), std::move(unpacked_data));
@ -205,7 +222,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w

  if (node_unit.OpType() == "LeakyRelu") {
    std::string input_name = "alpha";
-    ORT_RETURN_IF_ERROR(ProcessAlphaAttribute(qnn_model_wrapper, node_unit, input_name));
+    ORT_RETURN_IF_ERROR(ProcessAlphaAttribute(qnn_model_wrapper, node_unit, input_name, is_quantized_model));
    input_names.push_back(input_name);
  }

--- a/onnxruntime/test/optimizer/qdq_test_utils.h
+++ b/onnxruntime/test/optimizer/qdq_test_utils.h
@ -214,9 +214,6 @@ GetQDQTestCaseFn BuildQDQGatherOpTestCase(const std::vector<int64_t>& input_shap
    // input_data -> Q/DQ ->
    auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, .003f, 1);

-    std::vector<NodeArg*> gather_op_inputs;
-    gather_op_inputs.push_back(input_qdq_output);
-
    auto* indices_input = builder.MakeInitializer<IndicesType>(indices_shape, indices);

    auto* gather_output = builder.MakeIntermediate();
@ -250,9 +247,6 @@ GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector<int64_t
    // input_data -> Q/DQ ->
    auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, .003f, 1);

-    std::vector<NodeArg*> gather_op_inputs;
-    gather_op_inputs.push_back(input_qdq_output);
-
    auto* indices_input = builder.MakeScalarInitializer<IndicesType>(indices);

    auto* gather_output = builder.MakeIntermediate();
@ -269,6 +263,35 @@ GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector<int64_t
  };
 }

+// Creates the following graph:
+//                                _______________________
+//                               |                       |
+//    input (f32) -> Q -> DQ ->  |       LeakyRelu       | -> Q -> DQ -> output (f32)
+//                               |_______________________|
+//
+template <typename QuantType>
+GetQDQTestCaseFn BuildQDQLeakyReluOpTestCase(const std::vector<int64_t>& input_shape) {
+  return [input_shape](ModelTestBuilder& builder) {
+    auto* input_data = builder.MakeInput<float>(input_shape, -1.0f, 1.0f);
+    auto* final_output = builder.MakeOutput();
+
+    // input_data -> Q/DQ ->
+    auto* input_qdq_output = AddQDQNodePair<QuantType>(builder, input_data, 0.0473f, 137);
+
+    auto* leakyrelu_output = builder.MakeIntermediate();
+    Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input_qdq_output}, {leakyrelu_output});
+    leakyrelu_node.AddAttribute("alpha", 0.2f);
+
+    // -> Q/DQ -> final_output
+    auto* q_output = builder.MakeIntermediate();
+    builder.AddQuantizeLinearNode<QuantType>(leakyrelu_output, 0.02696f, 48,
+                                             q_output);
+
+    builder.AddDequantizeLinearNode<QuantType>(q_output, 0.02696f, 48,
+                                               final_output);
+  };
+}
+
 template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
 GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
  return [input_shape, weights_shape](ModelTestBuilder& builder) {
--- a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
@ -19,9 +19,9 @@ namespace test {
 * Runs a Gather op model on the QNN HTP backend. Checks the graph node assignment, and that inference
 * outputs for QNN and CPU match.
 *
- * \param op_type The Gather op type (e.g., ReduceSum).
 * \param opset The opset version.
 * \param test_description Description of the test for error reporting.
+ * \param scalar_indices whether the incidices input is scalar or not.
 * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None)
 */
 template <typename QuantType, typename IndicesType>
--- a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc
@ -0,0 +1,66 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#if !defined(ORT_MINIMAL_BUILD)
+
+#include <string>
+#include "core/graph/graph.h"
+
+#include "test/optimizer/qdq_test_utils.h"
+#include "test/providers/qnn/qnn_test_utils.h"
+
+#include "gtest/gtest.h"
+
+namespace onnxruntime {
+namespace test {
+#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
+
+/**
+ * Runs a LeakyRelu op model on the QNN HTP backend. Checks the graph node assignment, and that inference
+ * outputs for QNN and CPU match.
+ *
+ * \param op_type The LeakyRelu op type (e.g., ReduceSum).
+ * \param opset The opset version.
+ * \param test_description Description of the test for error reporting.
+ * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None)
+ */
+template <typename QuantType>
+static void RunLeakyReluOpQDQTest(int opset, const char* test_description,
+                                  ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All) {
+  ProviderOptions provider_options;
+#if defined(_WIN32)
+  provider_options["backend_path"] = "QnnHtp.dll";
+#else
+  provider_options["backend_path"] = "libQnnHtp.so";
+#endif
+
+  constexpr int expected_nodes_in_partition = 1;
+  RunQnnModelTest(BuildQDQLeakyReluOpTestCase<QuantType>({2, 3, 4}),
+                  provider_options,
+                  opset,
+                  expected_ep_assignment,
+                  expected_nodes_in_partition,
+                  test_description);
+}
+
+// Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all
+// nodes are supported by the QNN EP, and that the inference results match the CPU EP results.
+//
+// - Uses uint8 as the quantization type.
+TEST_F(QnnHTPBackendTests, TestQDQLeakyReluOpSet15) {
+  RunLeakyReluOpQDQTest<uint8_t>(15, "TestQDQLeakyReluOpSet15");
+}
+
+// Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all
+// nodes are supported by the QNN EP, and that the inference results match the CPU EP results.
+//
+// - Uses uint8 as the quantization type.
+TEST_F(QnnHTPBackendTests, TestQDQLeakyReluOpSet16) {
+  RunLeakyReluOpQDQTest<uint8_t>(16, "TestQDQLeakyReluOpSet16");
+}
+
+#endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
+}  // namespace test
+}  // namespace onnxruntime
+
+#endif