From 3dc9720cfc9df7e11359ddbae77a0cb00f71f904 Mon Sep 17 00:00:00 2001
From: Hector Li <hecli@microsoft.com>
Date: Tue, 25 Apr 2023 20:11:06 -0700
Subject: [PATCH] [QNN EP] Enable Qnn EP op support Elu, HardSwish, Atan
 (#15681)

### Description
Enable some Ops for QNN EP: Elu, HardSwish, Atan

### Motivation and Context
unblock more models
---
 .../selectors_actions/shared/utils.cc         |  3 ++
 .../qnn/builder/op_builder_factory.cc         |  4 ++
 .../qnn/builder/opbuilder/base_op_builder.h   |  4 ++
 .../builder/opbuilder/simple_op_builder.cc    | 33 ++++++++++++---
 .../core/providers/qnn/builder/qnn_def.h      |  1 +
 .../test/providers/qnn/simple_op_htp_test.cc  | 41 +++++++++++++++----
 6 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
index 132db0ae49..895524aea3 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/shared/utils.cc
@@ -50,10 +50,13 @@ static const OpVersionsAndSelector::OpVersionsMap GetUnaryOpVersionsMap() {
           {"ReduceSum", {}},
           {"Relu", {}},
           {"Gelu", {}},
+          {"Elu", {}},
+          {"HardSwish", {}},
           {"Sigmoid", {}},
           {"Slice", {}},
           {"Softmax", {}},
           {"Sqrt", {}},
+          {"Atan", {}},
           {"Tanh", {}},
           {"Exp", {}}};
 }
diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
index fa2d5c9733..36767f5ed3 100644
--- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
+++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
@@ -15,6 +15,7 @@ namespace qnn {
 OpBuilderRegistrations::OpBuilderRegistrations() {
   {
     CreateSimpleOpBuilder("Add", *this);
+    CreateSimpleOpBuilder("Atan", *this);
     CreateSimpleOpBuilder("Mul", *this);
     CreateSimpleOpBuilder("Abs", *this);
     CreateSimpleOpBuilder("And", *this);
@@ -39,6 +40,7 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
     CreateSimpleOpBuilder("PRelu", *this);
     CreateSimpleOpBuilder("Relu", *this);
     CreateSimpleOpBuilder("Gelu", *this);
+    CreateSimpleOpBuilder("Elu", *this);
     CreateSimpleOpBuilder("Round", *this);
     CreateSimpleOpBuilder("Where", *this);
     CreateSimpleOpBuilder("Sigmoid", *this);
@@ -55,6 +57,8 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
 
     CreateSimpleOpBuilder("QuantizeLinear", *this);
     CreateSimpleOpBuilder("DequantizeLinear", *this);
+
+    CreateSimpleOpBuilder("HardSwish", *this);
   }
 
   {
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h
index 7e1a6024bb..d428fe49ba 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h
@@ -96,6 +96,7 @@ class BaseOpBuilder : public IOpBuilder {
         {"Mul", "ElementWiseMultiply"},
         {"Abs", "ElementWiseAbs"},
         {"And", "ElementWiseAnd"},
+        {"Atan", "ElementWiseAtan"},
         {"Ceil", "ElementWiseCeil"},
         {"Cast", "Cast"},
         {"Clip", "ReluMinMax"},
@@ -140,10 +141,13 @@ class BaseOpBuilder : public IOpBuilder {
 
         {"MatMul", "MatMul"},
 
+        {"Elu", "Elu"},
         {"Relu", "Relu"},
         {"Gelu", "Gelu"},
         {"Sigmoid", "Sigmoid"},
 
+        {"HardSwish", "HardSwish"},
+
         {"Conv", "Conv2d"},
 
         {"GlobalAveragePool", "PoolAvg2d"},
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
index 6730d3b7a5..4ab8169028 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
@@ -35,8 +35,11 @@ class SimpleOpBuilder : public BaseOpBuilder {
                               std::vector<std::string>& param_tensor_names) const;
   Status ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper,
                                const NodeUnit& node_unit,
-                               const std::string input_name,
-                               bool is_quantized_model) const;
+                               std::vector<std::string>& param_tensor_names) const;
+  Status ProcessAlphaAttributeAsInput(QnnModelWrapper& qnn_model_wrapper,
+                                      const NodeUnit& node_unit,
+                                      const std::string input_name,
+                                      bool is_quantized_model) const;
   Status HandleSingleTransposeNode(QnnModelWrapper& qnn_model_wrapper,
                                    const NodeUnit& node_unit,
                                    std::vector<std::string>&& input_names,
@@ -94,8 +97,24 @@ Status SimpleOpBuilder::ProcessPermAttribute(QnnModelWrapper& qnn_model_wrapper,
 
 Status SimpleOpBuilder::ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper,
                                               const NodeUnit& node_unit,
-                                              const std::string input_name,
-                                              bool is_quantized_model) const {
+                                              std::vector<std::string>& param_tensor_names) const {
+  NodeAttrHelper node_helper(node_unit);
+  float alpha = node_helper.Get("alpha", 1.0f);
+  Qnn_Scalar_t alpha_qnn_scalar = QNN_SCALAR_INIT;
+  alpha_qnn_scalar.dataType = QNN_DATATYPE_FLOAT_32;
+  alpha_qnn_scalar.floatValue = alpha;
+
+  QnnParamWrapper alpha_param(node_unit.Index(), node_unit.Name(), qnn_def::alpha, alpha_qnn_scalar);
+  param_tensor_names.push_back(alpha_param.GetParamTensorName());
+  qnn_model_wrapper.AddParamWrapper(std::move(alpha_param));
+
+  return Status::OK();
+}
+
+Status SimpleOpBuilder::ProcessAlphaAttributeAsInput(QnnModelWrapper& qnn_model_wrapper,
+                                                     const NodeUnit& node_unit,
+                                                     const std::string input_name,
+                                                     bool is_quantized_model) const {
   NodeAttrHelper node_helper(node_unit);
   Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT;
   Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32;
@@ -222,10 +241,14 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
 
   if (node_unit.OpType() == "LeakyRelu") {
     std::string input_name = "alpha";
-    ORT_RETURN_IF_ERROR(ProcessAlphaAttribute(qnn_model_wrapper, node_unit, input_name, is_quantized_model));
+    ORT_RETURN_IF_ERROR(ProcessAlphaAttributeAsInput(qnn_model_wrapper, node_unit, input_name, is_quantized_model));
     input_names.push_back(input_name);
   }
 
+  if (node_unit.OpType() == "Elu") {
+    ORT_RETURN_IF_ERROR(ProcessAlphaAttribute(qnn_model_wrapper, node_unit, param_tensor_names));
+  }
+
   ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit,
                                      std::move(input_names),
                                      std::move(param_tensor_names),
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h
index d6127ed0b0..767931b93f 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_def.h
+++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h
@@ -427,6 +427,7 @@ const std::string rounding_mode = "rounding_mode";
 const std::string topk = "k";
 const std::string multiples = "multiples";
 const std::string epsilon = "epsilon";
+const std::string alpha = "alpha";
 }  // namespace qnn_def
 
 }  // namespace qnn
diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
index ea7f718b79..96b340295b 100644
--- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
@@ -23,8 +23,10 @@ namespace test {
 //
 // Currently used to test QNN EP.
 template <typename InputQType>
-GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const std::vector<int64_t>& input_shape, const std::string& op_type) {
-  return [input_shape, op_type](ModelTestBuilder& builder) {
+GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const std::vector<int64_t>& input_shape,
+                                               const std::string& op_type,
+                                               const std::string& domain = kOnnxDomain) {
+  return [input_shape, op_type, domain](ModelTestBuilder& builder) {
     const InputQType quant_zero_point = 0;
     const float quant_scale = 1.0f;
 
@@ -34,7 +36,7 @@ GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const std::vector<int64_t>& input
     builder.AddDequantizeLinearNode<InputQType>(input, quant_scale, quant_zero_point, dq_input);
 
     auto* op_output = builder.MakeIntermediate();
-    builder.AddNode(op_type, {dq_input}, {op_output}, kMSDomain);
+    builder.AddNode(op_type, {dq_input}, {op_output}, domain);
 
     auto* q_output = builder.MakeIntermediate();
     builder.AddQuantizeLinearNode<InputQType>(op_output, quant_scale, quant_zero_point, q_output);
@@ -45,7 +47,7 @@ GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const std::vector<int64_t>& input
 }
 
 /**
- * Runs an BatchNormalization model on the QNN HTP backend. Checks the graph node assignment, and that inference
+ * Runs an Simple Op model on the QNN HTP backend. Checks the graph node assignment, and that inference
  * outputs for QNN and CPU match.
  *
  * \param input_shape The input's shape.
@@ -55,7 +57,10 @@ GetQDQTestCaseFn BuildQDQSingleInputOpTestCase(const std::vector<int64_t>& input
  */
 static void RunQDQSingleInputOpTest(const std::vector<int64_t>& input_shape, const std::string& op_type,
                                     const char* test_description,
-                                    ExpectedEPNodeAssignment expected_ep_assignment, int num_nodes_in_graph) {
+                                    int opset_version,
+                                    ExpectedEPNodeAssignment expected_ep_assignment,
+                                    int num_nodes_in_graph,
+                                    const std::string& domain = kOnnxDomain) {
   ProviderOptions provider_options;
 #if defined(_WIN32)
   provider_options["backend_path"] = "QnnHtp.dll";
@@ -64,18 +69,36 @@ static void RunQDQSingleInputOpTest(const std::vector<int64_t>& input_shape, con
 #endif
 
   // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs.
-  RunQnnModelTest(BuildQDQSingleInputOpTestCase<uint8_t>(input_shape, op_type),
+  RunQnnModelTest(BuildQDQSingleInputOpTestCase<uint8_t>(input_shape, op_type, domain),
                   provider_options,
-                  11,
+                  opset_version,
                   expected_ep_assignment,
                   num_nodes_in_graph,
                   test_description);
 }
 
-// Check that QNN compiles DQ -> BatchNormalization -> Q as a single unit.
+// Check that QNN compiles DQ -> Gelu -> Q as a single unit.
 // Use an input of rank 3.
 TEST_F(QnnHTPBackendTests, TestQDQGeluTest) {
-  RunQDQSingleInputOpTest({1, 2, 3}, "Gelu", "TestQDQGeluTest", ExpectedEPNodeAssignment::All, 1);
+  RunQDQSingleInputOpTest({1, 2, 3}, "Gelu", "TestQDQGeluTest", 11, ExpectedEPNodeAssignment::All, 1, kMSDomain);
+}
+
+// Check that QNN compiles DQ -> Elu -> Q as a single unit.
+// Use an input of rank 3.
+TEST_F(QnnHTPBackendTests, TestQDQEluTest) {
+  RunQDQSingleInputOpTest({1, 2, 3}, "Elu", "TestQDQGeluTest", 11, ExpectedEPNodeAssignment::All, 1);
+}
+
+// Check that QNN compiles DQ -> HardSwish -> Q as a single unit.
+// Use an input of rank 3.
+TEST_F(QnnHTPBackendTests, TestQDQHardSwishTest) {
+  RunQDQSingleInputOpTest({1, 2, 3}, "HardSwish", "TestQDQGeluTest", 14, ExpectedEPNodeAssignment::All, 1);
+}
+
+// Check that QNN compiles DQ -> HardSwish -> Q as a single unit.
+// Use an input of rank 3.
+TEST_F(QnnHTPBackendTests, TestQDQAtanTest) {
+  RunQDQSingleInputOpTest({1, 2, 3}, "Atan", "TestQDQGeluTest", 11, ExpectedEPNodeAssignment::All, 1);
 }
 
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)