[QNN EP] Enable Qnn EP op support Elu, HardSwish, Atan (#15681)

### Description Enable some Ops for QNN EP: Elu, HardSwish, Atan ### Motivation and Context unblock more models
2026-07-19 19:00:47 +00:00 · 2023-04-25 20:11:06 -07:00 · 2023-04-25 20:11:06 -07:00 · 3dc9720cfc
commit 3dc9720cfc
parent 1524f73a09
6 changed files with 72 additions and 14 deletions
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
@ -50,10 +50,13 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
          {"ReduceSum", {}},
          {"Relu", {}},
          {"Gelu", {}},
+          {"Elu", {}},
+          {"HardSwish", {}},
          {"Sigmoid", {}},
          {"Slice", {}},
          {"Softmax", {}},
          {"Sqrt", {}},
+          {"Atan", {}},
          {"Tanh", {}},
          {"Exp", {}}};
 }
--- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
+++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
@ -15,6 +15,7 @@ namespace qnn {
 OpBuilderRegistrations::OpBuilderRegistrations() {
  {
    CreateSimpleOpBuilder("Add", *this);
+    CreateSimpleOpBuilder("Atan", *this);
    CreateSimpleOpBuilder("Mul", *this);
    CreateSimpleOpBuilder("Abs", *this);
    CreateSimpleOpBuilder("And", *this);
@ -39,6 +40,7 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
    CreateSimpleOpBuilder("PRelu", *this);
    CreateSimpleOpBuilder("Relu", *this);
    CreateSimpleOpBuilder("Gelu", *this);
+    CreateSimpleOpBuilder("Elu", *this);
    CreateSimpleOpBuilder("Round", *this);
    CreateSimpleOpBuilder("Where", *this);
    CreateSimpleOpBuilder("Sigmoid", *this);
@ -55,6 +57,8 @@ OpBuilderRegistrations::OpBuilderRegistrations() {

    CreateSimpleOpBuilder("QuantizeLinear", *this);
    CreateSimpleOpBuilder("DequantizeLinear", *this);
+
+    CreateSimpleOpBuilder("HardSwish", *this);
  }

  {
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h
@ -96,6 +96,7 @@ class BaseOpBuilder : public IOpBuilder {
        {"Mul", "ElementWiseMultiply"},
        {"Abs", "ElementWiseAbs"},
        {"And", "ElementWiseAnd"},
+        {"Atan", "ElementWiseAtan"},
        {"Ceil", "ElementWiseCeil"},
        {"Cast", "Cast"},
        {"Clip", "ReluMinMax"},
@ -140,10 +141,13 @@ class BaseOpBuilder : public IOpBuilder {

        {"MatMul", "MatMul"},

+        {"Elu", "Elu"},
        {"Relu", "Relu"},
        {"Gelu", "Gelu"},
        {"Sigmoid", "Sigmoid"},

+        {"HardSwish", "HardSwish"},
+
        {"Conv", "Conv2d"},

        {"GlobalAveragePool", "PoolAvg2d"},
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
@ -35,8 +35,11 @@ class SimpleOpBuilder : public BaseOpBuilder {
                              std::vector<std::string>& param_tensor_names) const;
  Status ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper,
                               const NodeUnit& node_unit,
-                               const std::string input_name,
-                               bool is_quantized_model) const;
+                               std::vector<std::string>& param_tensor_names) const;
+  Status ProcessAlphaAttributeAsInput(QnnModelWrapper& qnn_model_wrapper,
+                                      const NodeUnit& node_unit,
+                                      const std::string input_name,
+                                      bool is_quantized_model) const;
  Status HandleSingleTransposeNode(QnnModelWrapper& qnn_model_wrapper,
                                   const NodeUnit& node_unit,
                                   std::vector<std::string>&& input_names,
@ -94,8 +97,24 @@ Status SimpleOpBuilder::ProcessPermAttribute(QnnModelWrapper& qnn_model_wrapper,

 Status SimpleOpBuilder::ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper,
                                              const NodeUnit& node_unit,
-                                              const std::string input_name,
-                                              bool is_quantized_model) const {
+                                              std::vector<std::string>& param_tensor_names) const {
+  NodeAttrHelper node_helper(node_unit);
+  float alpha = node_helper.Get("alpha", 1.0f);
+  Qnn_Scalar_t alpha_qnn_scalar = QNN_SCALAR_INIT;
+  alpha_qnn_scalar.dataType = QNN_DATATYPE_FLOAT_32;
+  alpha_qnn_scalar.floatValue = alpha;
+
+  QnnParamWrapper alpha_param(node_unit.Index(), node_unit.Name(), qnn_def::alpha, alpha_qnn_scalar);
+  param_tensor_names.push_back(alpha_param.GetParamTensorName());
+  qnn_model_wrapper.AddParamWrapper(std::move(alpha_param));
+
+  return Status::OK();
+}
+
+Status SimpleOpBuilder::ProcessAlphaAttributeAsInput(QnnModelWrapper& qnn_model_wrapper,
+                                                     const NodeUnit& node_unit,
+                                                     const std::string input_name,
+                                                     bool is_quantized_model) const {
  NodeAttrHelper node_helper(node_unit);
  Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
  Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
@ -222,10 +241,14 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w

  if (node_unit.OpType() == "LeakyRelu") {
    std::string input_name = "alpha";
-    ORT_RETURN_IF_ERROR(ProcessAlphaAttribute(qnn_model_wrapper, node_unit, input_name, is_quantized_model));
+    ORT_RETURN_IF_ERROR(ProcessAlphaAttributeAsInput(qnn_model_wrapper, node_unit, input_name, is_quantized_model));
    input_names.push_back(input_name);
  }

+  if (node_unit.OpType() == "Elu") {
+    ORT_RETURN_IF_ERROR(ProcessAlphaAttribute(qnn_model_wrapper, node_unit, param_tensor_names));
+  }
+
  ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
                                     std::move(input_names),
                                     std::move(param_tensor_names),
--- a/onnxruntime/core/providers/qnn/builder/qnn_def.h
+++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h
@ -427,6 +427,7 @@ const std::string rounding_mode = "rounding_mode";
 const std::string topk = "k";
 const std::string multiples = "multiples";
 const std::string epsilon = "epsilon";
+const std::string alpha = "alpha";
 }  // namespace qnn_def

 }  // namespace qnn
--- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
@ -23,8 +23,10 @@ namespace test {
 //
 // Currently used to test QNN EP.
 template <typename InputQType>
-GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const std::vector<int64_t>& input_shape, const std::string& op_type) {
-  return [input_shape, op_type](ModelTestBuilder& builder) {
+GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const std::vector<int64_t>& input_shape,
+                                               const std::string& op_type,
+                                               const std::string& domain = kOnnxDomain) {
+  return [input_shape, op_type, domain](ModelTestBuilder& builder) {
    const InputQType quant_zero_point = 0;
    const float quant_scale = 1.0f;

@ -34,7 +36,7 @@ GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const std::vector<int64_t>& input
    builder.AddDequantizeLinearNode<InputQType>(input, quant_scale, quant_zero_point, dq_input);

    auto* op_output = builder.MakeIntermediate();
-    builder.AddNode(op_type, {dq_input}, {op_output}, kMSDomain);
+    builder.AddNode(op_type, {dq_input}, {op_output}, domain);

    auto* q_output = builder.MakeIntermediate();
    builder.AddQuantizeLinearNode<InputQType>(op_output, quant_scale, quant_zero_point, q_output);
@ -45,7 +47,7 @@ GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const std::vector<int64_t>& input
 }

 /**
- * Runs an BatchNormalization model on the QNN HTP backend. Checks the graph node assignment, and that inference
+ * Runs an Simple Op model on the QNN HTP backend. Checks the graph node assignment, and that inference
 * outputs for QNN and CPU match.
 *
 * \param input_shape The input's shape.
@ -55,7 +57,10 @@ GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const std::vector<int64_t>& input
 */
 static void RunQDQSingleInputOpTest(const std::vector<int64_t>& input_shape, const std::string& op_type,
                                    const char* test_description,
-                                    ExpectedEPNodeAssignment expected_ep_assignment, int num_nodes_in_graph) {
+                                    int opset_version,
+                                    ExpectedEPNodeAssignment expected_ep_assignment,
+                                    int num_nodes_in_graph,
+                                    const std::string& domain = kOnnxDomain) {
  ProviderOptions provider_options;
 #if defined(_WIN32)
  provider_options["backend_path"] = "QnnHtp.dll";
@ -64,18 +69,36 @@ static void RunQDQSingleInputOpTest(const std::vector<int64_t>& input_shape, con
 #endif

  // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs.
-  RunQnnModelTest(BuildQDQSingleInputOpTestCase<uint8_t>(input_shape, op_type),
+  RunQnnModelTest(BuildQDQSingleInputOpTestCase<uint8_t>(input_shape, op_type, domain),
                  provider_options,
-                  11,
+                  opset_version,
                  expected_ep_assignment,
                  num_nodes_in_graph,
                  test_description);
 }

-// Check that QNN compiles DQ -> BatchNormalization -> Q as a single unit.
+// Check that QNN compiles DQ -> Gelu -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, TestQDQGeluTest) {
-  RunQDQSingleInputOpTest({1, 2, 3}, "Gelu", "TestQDQGeluTest", ExpectedEPNodeAssignment::All, 1);
+  RunQDQSingleInputOpTest({1, 2, 3}, "Gelu", "TestQDQGeluTest", 11, ExpectedEPNodeAssignment::All, 1, kMSDomain);
+}
+
+// Check that QNN compiles DQ -> Elu -> Q as a single unit.
+// Use an input of rank 3.
+TEST_F(QnnHTPBackendTests, TestQDQEluTest) {
+  RunQDQSingleInputOpTest({1, 2, 3}, "Elu", "TestQDQGeluTest", 11, ExpectedEPNodeAssignment::All, 1);
+}
+
+// Check that QNN compiles DQ -> HardSwish -> Q as a single unit.
+// Use an input of rank 3.
+TEST_F(QnnHTPBackendTests, TestQDQHardSwishTest) {
+  RunQDQSingleInputOpTest({1, 2, 3}, "HardSwish", "TestQDQGeluTest", 14, ExpectedEPNodeAssignment::All, 1);
+}
+
+// Check that QNN compiles DQ -> HardSwish -> Q as a single unit.
+// Use an input of rank 3.
+TEST_F(QnnHTPBackendTests, TestQDQAtanTest) {
+  RunQDQSingleInputOpTest({1, 2, 3}, "Atan", "TestQDQGeluTest", 11, ExpectedEPNodeAssignment::All, 1);
 }

 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)