From 9768a727e1006b84673f818924fee20b5c4288e1 Mon Sep 17 00:00:00 2001 From: Hector Li Date: Wed, 6 Dec 2023 13:07:09 -0800 Subject: [PATCH] [QNN EP] Fix a bug that can't create context binary if the model has inputs/outputs with different data type (#18722) Fix a bug that can't create context binary if the model has inputs/outputs with different data type ### Description Update EPContext op schema to unblock nodes with different data type among inputs & outputs --- docs/ContribOperators.md | 4 +- .../core/graph/contrib_ops/contrib_defs.cc | 10 +-- .../test/providers/qnn/qnn_basic_test.cc | 72 +++++++++++++++++++ .../test/providers/qnn/qnn_test_utils.cc | 4 +- .../test/providers/qnn/qnn_test_utils.h | 4 +- onnxruntime/test/util/include/test_utils.h | 3 +- onnxruntime/test/util/test_utils.cc | 7 +- 7 files changed, 89 insertions(+), 15 deletions(-) diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md index c73f978bdf..e5b43ddba8 100644 --- a/docs/ContribOperators.md +++ b/docs/ContribOperators.md @@ -1599,14 +1599,14 @@ This version of the operator has been available since version 1 of the 'com.micr #### Inputs (1 - ∞)
-
inputs (variadic) : T
+
inputs (variadic, heterogeneous) : T
List of tensors for inputs
#### Outputs (1 - ∞)
-
outputs (variadic) : T
+
outputs (variadic, heterogeneous) : T
One or more outputs, list of tensors for outputs
diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index 4c0d78f0ee..26fca454c9 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -3248,7 +3248,7 @@ void RegisterContribSchemas() { "List of tensors for inputs", "T", OpSchema::Variadic, - true, + false, 1, OpSchema::NonDifferentiable) .Output( @@ -3257,7 +3257,7 @@ void RegisterContribSchemas() { "One or more outputs, list of tensors for outputs", "T", OpSchema::Variadic, - true, + false, 1, OpSchema::NonDifferentiable) .TypeConstraint( @@ -3273,11 +3273,7 @@ void RegisterContribSchemas() { "tensor(float16)", "tensor(float)", "tensor(double)"}, - "Constrain input and output types.") - .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { - // Type inference - propagateElemTypeFromInputToOutput(ctx, 0, 0); - }); + "Constrain input and output types."); static const char* BitmaskDropout_ver1_doc = R"DOC( BitmaskDropout takes an input floating-point tensor, an optional input ratio (floating-point scalar) and an optional input training_mode (boolean scalar). diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index 2e2acb36e8..e30c79eca3 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -336,6 +336,78 @@ TEST_F(QnnHTPBackendTests, QnnContextPriorityHigh) { "high"); // qnn_context_priority } +// Create a model with Case + Add (quantized) +// cast_input -> Cast -> Q -> DQ \ +// Add -> Q -> DQ -> output +// input2 -> Q -> DQ / +static GetTestModelFn BuildCastAddTestCase() { + return [](ModelTestBuilder& builder) { + // Creat Cast node int32 -> float32 + NodeArg* cast_input = MakeTestInput(builder, TestInputDef({2, 3}, false, {0, 1, 0, 1, 0, 1})); + + auto* cast_output = builder.MakeIntermediate(); + Node& cast_node = builder.AddNode("Cast", {cast_input}, {cast_output}); + cast_node.AddAttribute("to", static_cast(ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)); + + // Create Add node + std::vector data = {0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f}; + gsl::span data_range = gsl::make_span(data); + QuantParams q_parameter = GetDataQuantParams(data_range); + auto* add_input1_qdq = AddQDQNodePair(builder, cast_output, q_parameter.scale, q_parameter.zero_point); + + NodeArg* add_input2 = MakeTestInput(builder, TestInputDef({2, 3}, false, data)); + auto* add_input2_qdq = AddQDQNodePair(builder, add_input2, q_parameter.scale, q_parameter.zero_point); + + auto* add_output = builder.MakeIntermediate(); + + builder.AddNode("Add", {add_input1_qdq, add_input2_qdq}, {add_output}); + + // add_output -> Q -> DQ -> output + AddQDQNodePairWithOutputAsGraphOutput(builder, add_output, q_parameter.scale, q_parameter.zero_point); + }; +} + +// Test that models with 2 inputs which has different data type can still generate the context binary +TEST_F(QnnHTPBackendTests, QnnContextBinaryGeneration2InputTypes) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + provider_options["qnn_context_cache_enable"] = "1"; + const std::string context_binary_file = "./qnn_context_binary_int32_fp32_inputs_test.onnx"; + provider_options["qnn_context_cache_path"] = context_binary_file; + + RunQnnModelTest(BuildCastAddTestCase(), + provider_options, + 13, // opset + ExpectedEPNodeAssignment::All, + 1e-5f, + logging::Severity::kERROR, + false); + + // Make sure the Qnn context cache binary file is generated + EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str())); +} + +// A repro of QC case 06838696, accuracy issue for Cast + Op (quantized) +// the value pair(1, 0.00392156886) at index #1 don't match, +// which is -0.996078 from 1 +TEST_F(QnnHTPBackendTests, DISABLED_CastAddHTPAccuracyTest) { + ProviderOptions provider_options; +#if defined(_WIN32) + provider_options["backend_path"] = "QnnHtp.dll"; +#else + provider_options["backend_path"] = "libQnnHtp.so"; +#endif + + RunQnnModelTest(BuildCastAddTestCase(), + provider_options, + 13, // opset + ExpectedEPNodeAssignment::All); +} + #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) #endif // !defined(ORT_MINIMAL_BUILD) diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc index 665a838b43..4c38109d30 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.cc +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.cc @@ -81,7 +81,7 @@ void TryEnableQNNSaver(ProviderOptions& qnn_options) { void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions provider_options, int opset_version, ExpectedEPNodeAssignment expected_ep_assignment, - float fp32_abs_err, logging::Severity log_severity) { + float fp32_abs_err, logging::Severity log_severity, bool verify_outputs) { EPVerificationParams verification_params; verification_params.ep_node_assignment = expected_ep_assignment; verification_params.fp32_abs_err = fp32_abs_err; @@ -106,7 +106,7 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions prov TryEnableQNNSaver(provider_options); RunAndVerifyOutputsWithEP(AsByteSpan(model_data.data(), model_data.size()), "QNN_EP_TestLogID", QnnExecutionProviderWithOptions(provider_options), - helper.feeds_, verification_params); + helper.feeds_, verification_params, {}, verify_outputs); } void InferenceModel(const std::string& model_data, const char* log_id, diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h index fe77c6bdba..9ec0985e81 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.h +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.h @@ -633,7 +633,9 @@ inline GetTestQDQModelFn BuildQDQOpTestCase(const std::string& op_typ */ void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions provider_options, int opset_version, ExpectedEPNodeAssignment expected_ep_assignment, - float fp32_abs_err = 1e-5f, logging::Severity log_severity = logging::Severity::kERROR); + float fp32_abs_err = 1e-5f, + logging::Severity log_severity = logging::Severity::kERROR, + bool verify_outputs = true); enum class BackendSupport { SUPPORT_UNKNOWN, diff --git a/onnxruntime/test/util/include/test_utils.h b/onnxruntime/test/util/include/test_utils.h index 48a71b8acb..48f0d7c2ab 100644 --- a/onnxruntime/test/util/include/test_utils.h +++ b/onnxruntime/test/util/include/test_utils.h @@ -69,7 +69,8 @@ void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes, std::unique_ptr execution_provider, const NameMLValMap& feeds, const EPVerificationParams& params = EPVerificationParams(), - const std::function& session_options_updater = {}); + const std::function& session_options_updater = {}, + bool verify_outputs = true); // Tests model loading only. // This can be used to test EPs in builds where only loading (and not running) of a model is supported. diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc index 5f1fdae72f..598147b81d 100644 --- a/onnxruntime/test/util/test_utils.cc +++ b/onnxruntime/test/util/test_utils.cc @@ -133,7 +133,8 @@ void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes, std::string std::unique_ptr execution_provider, const NameMLValMap& feeds, const EPVerificationParams& params, - const std::function& session_options_updater) { + const std::function& session_options_updater, + bool verify_outputs) { std::vector model_data_buffer{}; const auto model_data = GetModelBytes(model_path_or_bytes, model_data_buffer); @@ -184,7 +185,9 @@ void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes, std::string // Run with EP and verify the result std::vector fetches; ASSERT_STATUS_OK(session_object2.Run(run_options, feeds, output_names, &fetches)); - VerifyOutputs(output_names, expected_fetches, fetches, params); + if (verify_outputs) { + VerifyOutputs(output_names, expected_fetches, fetches, params); + } if (params.graph_verifier) { (*params.graph_verifier)(graph2);