From 76d6345f0b8be602ff509aa49a715180e0e370fa Mon Sep 17 00:00:00 2001 From: Hector Li Date: Wed, 8 Jan 2025 12:52:08 -0800 Subject: [PATCH] Fix the issue for Gather int64 indices handling (#23274) ### Description Fix the issue for Gather int64 indices handling. Make it still insert Cast node if it's non-quantized Gather node. --- .../builder/opbuilder/gather_op_builder.cc | 38 +++++----------- .../providers/qnn/builder/qnn_model_wrapper.h | 18 ++++++++ .../test/providers/qnn/gather_op_htp_test.cc | 44 +++++++++++++++++++ 3 files changed, 72 insertions(+), 28 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc index 3737fcb54f..5549716751 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/gather_op_builder.cc @@ -126,23 +126,10 @@ static Status ProcessIndicesInput(QnnModelWrapper& qnn_model_wrapper, std::vector& input_names, bool do_op_validation) { const auto& input_name = indices_input.node_arg.Name(); - if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_name)) { - LOGS(logger, VERBOSE) << "Tensor already added, skip it: " << input_name; - input_names.push_back(input_name); - return Status::OK(); - } TensorInfo indices_info = {}; ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetTensorInfo(indices_input, indices_info)); - const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()); - const bool is_graph_input = qnn_model_wrapper.IsGraphInput(input_name); - ORT_RETURN_IF(is_npu_backend && - (indices_info.qnn_data_type == QNN_DATATYPE_INT_64) && - !(indices_info.is_initializer || is_graph_input), - "HTP backend doesn't support a Gather* op with a dynamic int64 input activation ", - "unless it is a graph input."); - std::vector qnn_indices_bytes; // Get raw bytes for static indices. @@ -165,27 +152,22 @@ static Status ProcessIndicesInput(QnnModelWrapper& qnn_model_wrapper, Qnn_TensorType_t tensor_type = qnn_model_wrapper.GetTensorType(input_name); std::vector cast_output_shape(indices_info.shape); - QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, indices_info.qnn_data_type, QnnQuantParamsWrapper(), - std::move(indices_info.shape), std::move(qnn_indices_bytes)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor."); + if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_name)) { + LOGS(logger, VERBOSE) << "Tensor already added, skip it: " << input_name; + } else { + QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, indices_info.qnn_data_type, QnnQuantParamsWrapper(), + std::move(indices_info.shape), std::move(qnn_indices_bytes)); + ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor."); + } // Insert QNN Cast op to convert dynamic indices from int64 to int32. std::string indices_input_name(input_name); if (indices_info.qnn_data_type == QNN_DATATYPE_INT_64) { assert(!indices_info.is_initializer); - indices_input_name = input_name + "_ort_qnn_ep_cast"; - QnnTensorWrapper cast_output(indices_input_name, QNN_TENSOR_TYPE_NATIVE, QNN_DATATYPE_INT_32, - QnnQuantParamsWrapper(), std::move(cast_output_shape)); - ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(cast_output)), "Failed to add tensor."); - ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(indices_input_name, - QNN_OP_PACKAGE_NAME_QTI_AISW, - "Cast", - {input_name}, - {indices_input_name}, - {}, - do_op_validation), - "Failed to add node."); + ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddInt64CastNode(input_name, indices_input_name, + std::move(cast_output_shape), + do_op_validation)); } input_names.push_back(indices_input_name); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h index 19d0f05811..875c93a686 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h @@ -224,6 +224,24 @@ class QnnModelWrapper { tensor_data_type, quantize_param, do_op_validation, is_for_input, is_for_output); } + Status AddInt64CastNode(const std::string& input_name, std::string& cast_output_name, + std::vector&& cast_output_shape, bool do_op_validation) { + cast_output_name = input_name + "_ort_qnn_ep_cast"; + QnnTensorWrapper cast_output(cast_output_name, QNN_TENSOR_TYPE_NATIVE, QNN_DATATYPE_INT_32, + QnnQuantParamsWrapper(), std::move(cast_output_shape)); + ORT_RETURN_IF_NOT(AddTensorWrapper(std::move(cast_output)), "Failed to add tensor."); + ORT_RETURN_IF_NOT(CreateQnnNode(cast_output_name, + QNN_OP_PACKAGE_NAME_QTI_AISW, + "Cast", + {input_name}, + {cast_output_name}, + {}, + do_op_validation), + "Failed to add node."); + + return Status::OK(); + } + Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer, std::vector& unpacked_tensor) const; diff --git a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc index 55177cc7ed..21c3732321 100644 --- a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc @@ -119,6 +119,15 @@ TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt32_Axis0) { ExpectedEPNodeAssignment::All); } +// negative indices +TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt32_NegativeIndices) { + RunQDQGatherOpTest(TestInputDef({3, 2}, false, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}), + TestInputDef({2, 2}, true, {-1, 1, 1, 2}), + {utils::MakeAttribute("axis", static_cast(0))}, + 13, + ExpectedEPNodeAssignment::All); +} + // Test creates a DQ -> Gather -> Q -> DQ graph, and checks that all // nodes are supported by the QNN EP, and that the inference results are as accurate as CPU EP. // @@ -148,6 +157,41 @@ TEST_F(QnnHTPBackendTests, DISABLED_GatherOp_IndicesStaticInt32_Axis1) { ExpectedEPNodeAssignment::All); } +// Runs a non-QDQ model on HTP and compares output to CPU EP. +template +static void RunOpTest(const std::string& op_type, + const TestInputDef& input_def_1, + const TestInputDef& input_defs_2, + const std::vector& attrs, + int opset_version, + ExpectedEPNodeAssignment expected_ep_assignment, + const std::string& op_domain = kOnnxDomain, + float fp32_abs_err = 1e-3f) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + // Runs model with a Q/DQ binary op and compares the outputs of the CPU and QNN EPs. + RunQnnModelTest(BuildOpTestCase(op_type, {input_def_1}, {input_defs_2}, attrs, op_domain), + provider_options, + opset_version, + expected_ep_assignment, + fp32_abs_err); +} + +// Non-QDQ model, Gather with static input and dynamic int64 indices +TEST_F(QnnHTPBackendTests, GatherOp_IndicesStaticInt64) { + RunOpTest("Gather", + TestInputDef({3, 2}, true, {1.0f, 1.2f, 2.3f, 3.4f, 4.5f, 5.7f}), + TestInputDef({2, 2}, false, {0, 1, 1, 2}), + {utils::MakeAttribute("axis", static_cast(0))}, + 13, + ExpectedEPNodeAssignment::All); +} + #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) } // namespace test } // namespace onnxruntime