diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc index de05c78f89..a7bf0c19d8 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc @@ -7,6 +7,7 @@ #include "core/providers/qnn/builder/qnn_model_wrapper.h" #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/common/safeint.h" +#include "core/util/qmath.h" #include "base_op_builder.h" @@ -34,7 +35,8 @@ class SimpleOpBuilder : public BaseOpBuilder { std::vector& param_tensor_names) const; Status ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, - const std::string input_name) const; + const std::string input_name, + bool is_quantized_model) const; Status HandleSingleTransposeNode(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, std::vector&& input_names, @@ -90,18 +92,33 @@ Status SimpleOpBuilder::ProcessPermAttribute(QnnModelWrapper& qnn_model_wrapper, return Status::OK(); } -Status SimpleOpBuilder::ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit, const std::string input_name) const { +Status SimpleOpBuilder::ProcessAlphaAttribute(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const std::string input_name, + bool is_quantized_model) const { NodeAttrHelper node_helper(node_unit); + Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT; + Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32; union { float alpha; uint8_t unpack[sizeof(float)]; } tensor_data; tensor_data.alpha = node_helper.Get("alpha", 0.01f); - std::vector unpacked_data(tensor_data.unpack, tensor_data.unpack + sizeof(float)); - Qnn_QuantizeParams_t quantize_param = QNN_QUANTIZE_PARAMS_INIT; - InitializeQuantizeParam(quantize_param, false); + std::vector unpacked_data; + if (is_quantized_model) { + float scale; + uint8_t zero_point; + int64_t num_of_elements = 1; + concurrency::ThreadPool* thread_pool = nullptr; + GetQuantizationParameter(&tensor_data.alpha, num_of_elements, scale, zero_point, thread_pool); + unpacked_data.resize(1); + ParQuantizeLinear(&tensor_data.alpha, unpacked_data.data(), num_of_elements, scale, zero_point, thread_pool); + InitializeQuantizeParam(quantize_param, is_quantized_model, scale, static_cast(zero_point)); + qnn_data_type = QNN_DATATYPE_UFIXED_POINT_8; + } else { + unpacked_data.assign(tensor_data.unpack, tensor_data.unpack + sizeof(float)); + } std::vector input_shape{1}; - Qnn_DataType_t qnn_data_type = QNN_DATATYPE_FLOAT_32; Qnn_TensorType_t tensor_type = QNN_TENSOR_TYPE_STATIC; QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, quantize_param, std::move(input_shape), std::move(unpacked_data)); @@ -205,7 +222,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w if (node_unit.OpType() == "LeakyRelu") { std::string input_name = "alpha"; - ORT_RETURN_IF_ERROR(ProcessAlphaAttribute(qnn_model_wrapper, node_unit, input_name)); + ORT_RETURN_IF_ERROR(ProcessAlphaAttribute(qnn_model_wrapper, node_unit, input_name, is_quantized_model)); input_names.push_back(input_name); } diff --git a/onnxruntime/test/optimizer/qdq_test_utils.h b/onnxruntime/test/optimizer/qdq_test_utils.h index e870572e50..d86d27d1ff 100644 --- a/onnxruntime/test/optimizer/qdq_test_utils.h +++ b/onnxruntime/test/optimizer/qdq_test_utils.h @@ -214,9 +214,6 @@ GetQDQTestCaseFn BuildQDQGatherOpTestCase(const std::vector& input_shap // input_data -> Q/DQ -> auto* input_qdq_output = AddQDQNodePair(builder, input_data, .003f, 1); - std::vector gather_op_inputs; - gather_op_inputs.push_back(input_qdq_output); - auto* indices_input = builder.MakeInitializer(indices_shape, indices); auto* gather_output = builder.MakeIntermediate(); @@ -250,9 +247,6 @@ GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector Q/DQ -> auto* input_qdq_output = AddQDQNodePair(builder, input_data, .003f, 1); - std::vector gather_op_inputs; - gather_op_inputs.push_back(input_qdq_output); - auto* indices_input = builder.MakeScalarInitializer(indices); auto* gather_output = builder.MakeIntermediate(); @@ -269,6 +263,35 @@ GetQDQTestCaseFn BuildQDQGatherOpScalarIndicesTestCase(const std::vector Q -> DQ -> | LeakyRelu | -> Q -> DQ -> output (f32) +// |_______________________| +// +template +GetQDQTestCaseFn BuildQDQLeakyReluOpTestCase(const std::vector& input_shape) { + return [input_shape](ModelTestBuilder& builder) { + auto* input_data = builder.MakeInput(input_shape, -1.0f, 1.0f); + auto* final_output = builder.MakeOutput(); + + // input_data -> Q/DQ -> + auto* input_qdq_output = AddQDQNodePair(builder, input_data, 0.0473f, 137); + + auto* leakyrelu_output = builder.MakeIntermediate(); + Node& leakyrelu_node = builder.AddNode("LeakyRelu", {input_qdq_output}, {leakyrelu_output}); + leakyrelu_node.AddAttribute("alpha", 0.2f); + + // -> Q/DQ -> final_output + auto* q_output = builder.MakeIntermediate(); + builder.AddQuantizeLinearNode(leakyrelu_output, 0.02696f, 48, + q_output); + + builder.AddDequantizeLinearNode(q_output, 0.02696f, 48, + final_output); + }; +} + template GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector& input_shape, const std::vector& weights_shape) { return [input_shape, weights_shape](ModelTestBuilder& builder) { diff --git a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc index 09d98fab0e..b482e5acc1 100644 --- a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc @@ -19,9 +19,9 @@ namespace test { * Runs a Gather op model on the QNN HTP backend. Checks the graph node assignment, and that inference * outputs for QNN and CPU match. * - * \param op_type The Gather op type (e.g., ReduceSum). * \param opset The opset version. * \param test_description Description of the test for error reporting. + * \param scalar_indices whether the incidices input is scalar or not. * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) */ template diff --git a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc new file mode 100644 index 0000000000..18f2e11337 --- /dev/null +++ b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#if !defined(ORT_MINIMAL_BUILD) + +#include +#include "core/graph/graph.h" + +#include "test/optimizer/qdq_test_utils.h" +#include "test/providers/qnn/qnn_test_utils.h" + +#include "gtest/gtest.h" + +namespace onnxruntime { +namespace test { +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) + +/** + * Runs a LeakyRelu op model on the QNN HTP backend. Checks the graph node assignment, and that inference + * outputs for QNN and CPU match. + * + * \param op_type The LeakyRelu op type (e.g., ReduceSum). + * \param opset The opset version. + * \param test_description Description of the test for error reporting. + * \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None) + */ +template +static void RunLeakyReluOpQDQTest(int opset, const char* test_description, + ExpectedEPNodeAssignment expected_ep_assignment = ExpectedEPNodeAssignment::All) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + constexpr int expected_nodes_in_partition = 1; + RunQnnModelTest(BuildQDQLeakyReluOpTestCase({2, 3, 4}), + provider_options, + opset, + expected_ep_assignment, + expected_nodes_in_partition, + test_description); +} + +// Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +TEST_F(QnnHTPBackendTests, TestQDQLeakyReluOpSet15) { + RunLeakyReluOpQDQTest(15, "TestQDQLeakyReluOpSet15"); +} + +// Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all +// nodes are supported by the QNN EP, and that the inference results match the CPU EP results. +// +// - Uses uint8 as the quantization type. +TEST_F(QnnHTPBackendTests, TestQDQLeakyReluOpSet16) { + RunLeakyReluOpQDQTest(16, "TestQDQLeakyReluOpSet16"); +} + +#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) +} // namespace test +} // namespace onnxruntime + +#endif \ No newline at end of file