mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
[QNN EP] Fix a bug that can't create context binary if the model has inputs/outputs with different data type (#18722)
Fix a bug that can't create context binary if the model has inputs/outputs with different data type ### Description Update EPContext op schema to unblock nodes with different data type among inputs & outputs
This commit is contained in:
parent
559bd52252
commit
9768a727e1
7 changed files with 89 additions and 15 deletions
|
|
@ -1599,14 +1599,14 @@ This version of the operator has been available since version 1 of the 'com.micr
|
|||
#### Inputs (1 - ∞)
|
||||
|
||||
<dl>
|
||||
<dt><tt>inputs</tt> (variadic) : T</dt>
|
||||
<dt><tt>inputs</tt> (variadic, heterogeneous) : T</dt>
|
||||
<dd>List of tensors for inputs</dd>
|
||||
</dl>
|
||||
|
||||
#### Outputs (1 - ∞)
|
||||
|
||||
<dl>
|
||||
<dt><tt>outputs</tt> (variadic) : T</dt>
|
||||
<dt><tt>outputs</tt> (variadic, heterogeneous) : T</dt>
|
||||
<dd>One or more outputs, list of tensors for outputs</dd>
|
||||
</dl>
|
||||
|
||||
|
|
|
|||
|
|
@ -3248,7 +3248,7 @@ void RegisterContribSchemas() {
|
|||
"List of tensors for inputs",
|
||||
"T",
|
||||
OpSchema::Variadic,
|
||||
true,
|
||||
false,
|
||||
1,
|
||||
OpSchema::NonDifferentiable)
|
||||
.Output(
|
||||
|
|
@ -3257,7 +3257,7 @@ void RegisterContribSchemas() {
|
|||
"One or more outputs, list of tensors for outputs",
|
||||
"T",
|
||||
OpSchema::Variadic,
|
||||
true,
|
||||
false,
|
||||
1,
|
||||
OpSchema::NonDifferentiable)
|
||||
.TypeConstraint(
|
||||
|
|
@ -3273,11 +3273,7 @@ void RegisterContribSchemas() {
|
|||
"tensor(float16)",
|
||||
"tensor(float)",
|
||||
"tensor(double)"},
|
||||
"Constrain input and output types.")
|
||||
.TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
|
||||
// Type inference
|
||||
propagateElemTypeFromInputToOutput(ctx, 0, 0);
|
||||
});
|
||||
"Constrain input and output types.");
|
||||
|
||||
static const char* BitmaskDropout_ver1_doc = R"DOC(
|
||||
BitmaskDropout takes an input floating-point tensor, an optional input ratio (floating-point scalar) and an optional input training_mode (boolean scalar).
|
||||
|
|
|
|||
|
|
@ -336,6 +336,78 @@ TEST_F(QnnHTPBackendTests, QnnContextPriorityHigh) {
|
|||
"high"); // qnn_context_priority
|
||||
}
|
||||
|
||||
// Create a model with Case + Add (quantized)
|
||||
// cast_input -> Cast -> Q -> DQ \
|
||||
// Add -> Q -> DQ -> output
|
||||
// input2 -> Q -> DQ /
|
||||
static GetTestModelFn BuildCastAddTestCase() {
|
||||
return [](ModelTestBuilder& builder) {
|
||||
// Creat Cast node int32 -> float32
|
||||
NodeArg* cast_input = MakeTestInput(builder, TestInputDef<int32_t>({2, 3}, false, {0, 1, 0, 1, 0, 1}));
|
||||
|
||||
auto* cast_output = builder.MakeIntermediate();
|
||||
Node& cast_node = builder.AddNode("Cast", {cast_input}, {cast_output});
|
||||
cast_node.AddAttribute("to", static_cast<int64_t>(ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
|
||||
|
||||
// Create Add node
|
||||
std::vector<float> data = {0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f};
|
||||
gsl::span<float> data_range = gsl::make_span(data);
|
||||
QuantParams<uint8_t> q_parameter = GetDataQuantParams<uint8_t>(data_range);
|
||||
auto* add_input1_qdq = AddQDQNodePair<uint8_t>(builder, cast_output, q_parameter.scale, q_parameter.zero_point);
|
||||
|
||||
NodeArg* add_input2 = MakeTestInput(builder, TestInputDef<float>({2, 3}, false, data));
|
||||
auto* add_input2_qdq = AddQDQNodePair<uint8_t>(builder, add_input2, q_parameter.scale, q_parameter.zero_point);
|
||||
|
||||
auto* add_output = builder.MakeIntermediate();
|
||||
|
||||
builder.AddNode("Add", {add_input1_qdq, add_input2_qdq}, {add_output});
|
||||
|
||||
// add_output -> Q -> DQ -> output
|
||||
AddQDQNodePairWithOutputAsGraphOutput<uint8_t>(builder, add_output, q_parameter.scale, q_parameter.zero_point);
|
||||
};
|
||||
}
|
||||
|
||||
// Test that models with 2 inputs which has different data type can still generate the context binary
|
||||
TEST_F(QnnHTPBackendTests, QnnContextBinaryGeneration2InputTypes) {
|
||||
ProviderOptions provider_options;
|
||||
#if defined(_WIN32)
|
||||
provider_options["backend_path"] = "QnnHtp.dll";
|
||||
#else
|
||||
provider_options["backend_path"] = "libQnnHtp.so";
|
||||
#endif
|
||||
provider_options["qnn_context_cache_enable"] = "1";
|
||||
const std::string context_binary_file = "./qnn_context_binary_int32_fp32_inputs_test.onnx";
|
||||
provider_options["qnn_context_cache_path"] = context_binary_file;
|
||||
|
||||
RunQnnModelTest(BuildCastAddTestCase(),
|
||||
provider_options,
|
||||
13, // opset
|
||||
ExpectedEPNodeAssignment::All,
|
||||
1e-5f,
|
||||
logging::Severity::kERROR,
|
||||
false);
|
||||
|
||||
// Make sure the Qnn context cache binary file is generated
|
||||
EXPECT_TRUE(std::filesystem::exists(context_binary_file.c_str()));
|
||||
}
|
||||
|
||||
// A repro of QC case 06838696, accuracy issue for Cast + Op (quantized)
|
||||
// the value pair(1, 0.00392156886) at index #1 don't match,
|
||||
// which is -0.996078 from 1
|
||||
TEST_F(QnnHTPBackendTests, DISABLED_CastAddHTPAccuracyTest) {
|
||||
ProviderOptions provider_options;
|
||||
#if defined(_WIN32)
|
||||
provider_options["backend_path"] = "QnnHtp.dll";
|
||||
#else
|
||||
provider_options["backend_path"] = "libQnnHtp.so";
|
||||
#endif
|
||||
|
||||
RunQnnModelTest(BuildCastAddTestCase(),
|
||||
provider_options,
|
||||
13, // opset
|
||||
ExpectedEPNodeAssignment::All);
|
||||
}
|
||||
|
||||
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
|
||||
#endif // !defined(ORT_MINIMAL_BUILD)
|
||||
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ void TryEnableQNNSaver(ProviderOptions& qnn_options) {
|
|||
|
||||
void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions provider_options,
|
||||
int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
|
||||
float fp32_abs_err, logging::Severity log_severity) {
|
||||
float fp32_abs_err, logging::Severity log_severity, bool verify_outputs) {
|
||||
EPVerificationParams verification_params;
|
||||
verification_params.ep_node_assignment = expected_ep_assignment;
|
||||
verification_params.fp32_abs_err = fp32_abs_err;
|
||||
|
|
@ -106,7 +106,7 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions prov
|
|||
TryEnableQNNSaver(provider_options);
|
||||
RunAndVerifyOutputsWithEP(AsByteSpan(model_data.data(), model_data.size()), "QNN_EP_TestLogID",
|
||||
QnnExecutionProviderWithOptions(provider_options),
|
||||
helper.feeds_, verification_params);
|
||||
helper.feeds_, verification_params, {}, verify_outputs);
|
||||
}
|
||||
|
||||
void InferenceModel(const std::string& model_data, const char* log_id,
|
||||
|
|
|
|||
|
|
@ -633,7 +633,9 @@ inline GetTestQDQModelFn<QuantType> BuildQDQOpTestCase(const std::string& op_typ
|
|||
*/
|
||||
void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions provider_options,
|
||||
int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
|
||||
float fp32_abs_err = 1e-5f, logging::Severity log_severity = logging::Severity::kERROR);
|
||||
float fp32_abs_err = 1e-5f,
|
||||
logging::Severity log_severity = logging::Severity::kERROR,
|
||||
bool verify_outputs = true);
|
||||
|
||||
enum class BackendSupport {
|
||||
SUPPORT_UNKNOWN,
|
||||
|
|
|
|||
|
|
@ -69,7 +69,8 @@ void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes,
|
|||
std::unique_ptr<IExecutionProvider> execution_provider,
|
||||
const NameMLValMap& feeds,
|
||||
const EPVerificationParams& params = EPVerificationParams(),
|
||||
const std::function<void(SessionOptions&)>& session_options_updater = {});
|
||||
const std::function<void(SessionOptions&)>& session_options_updater = {},
|
||||
bool verify_outputs = true);
|
||||
|
||||
// Tests model loading only.
|
||||
// This can be used to test EPs in builds where only loading (and not running) of a model is supported.
|
||||
|
|
|
|||
|
|
@ -133,7 +133,8 @@ void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes, std::string
|
|||
std::unique_ptr<IExecutionProvider> execution_provider,
|
||||
const NameMLValMap& feeds,
|
||||
const EPVerificationParams& params,
|
||||
const std::function<void(SessionOptions&)>& session_options_updater) {
|
||||
const std::function<void(SessionOptions&)>& session_options_updater,
|
||||
bool verify_outputs) {
|
||||
std::vector<std::byte> model_data_buffer{};
|
||||
const auto model_data = GetModelBytes(model_path_or_bytes, model_data_buffer);
|
||||
|
||||
|
|
@ -184,7 +185,9 @@ void RunAndVerifyOutputsWithEP(ModelPathOrBytes model_path_or_bytes, std::string
|
|||
// Run with EP and verify the result
|
||||
std::vector<OrtValue> fetches;
|
||||
ASSERT_STATUS_OK(session_object2.Run(run_options, feeds, output_names, &fetches));
|
||||
VerifyOutputs(output_names, expected_fetches, fetches, params);
|
||||
if (verify_outputs) {
|
||||
VerifyOutputs(output_names, expected_fetches, fetches, params);
|
||||
}
|
||||
|
||||
if (params.graph_verifier) {
|
||||
(*params.graph_verifier)(graph2);
|
||||
|
|
|
|||
Loading…
Reference in a new issue