mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-26 22:35:43 +00:00
[QNN EP] Ensure QNN EP rejects nodes with I/O of dynamic shape (#22066)
### Description Updates QNN EP to properly reject nodes that have inputs or outputs with dynamic shapes. ### Motivation and Context Currently, QNN EP does not properly offload subgraphs with dynamic shapes to the CPU EP. This PR ensures that QNN EP rejects nodes that consume or generate I/O with dynamic shapes.
This commit is contained in:
parent
55ab13e7ca
commit
f7bf5a19ba
4 changed files with 68 additions and 3 deletions
|
|
@ -308,8 +308,10 @@ bool QnnModelWrapper::GetOnnxShape(const NodeArg& node_arg, std::vector<uint32_t
|
|||
return true;
|
||||
}
|
||||
|
||||
// We already checked the shape has no dynamic dimension
|
||||
for (const auto& dim : shape_proto->dim()) {
|
||||
if (!dim.has_dim_value()) {
|
||||
return false; // Do not support dynamic shapes.
|
||||
}
|
||||
shape.push_back(SafeInt<uint32_t>(dim.dim_value()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -948,6 +948,63 @@ TEST_F(QnnHTPBackendTests, Float32ModelWithFP16PrecisionTest) {
|
|||
0.008f);
|
||||
}
|
||||
|
||||
// Test that QNN EP only handles nodes with static shapes and rejects nodes with dynamic shape I/O.
|
||||
TEST_F(QnnHTPBackendTests, EPRejectsDynamicShapesF32) {
|
||||
// Local function that builds a model in which the last two nodes use dynamic shapes.
|
||||
auto model_build_fn = [](ModelTestBuilder& builder) {
|
||||
NodeArg* input1 = builder.MakeInput<float>(std::vector<int64_t>{1, 2, 8, 8},
|
||||
GetFloatDataInRange(0.0f, 1.0f, 128));
|
||||
NodeArg* input2 = builder.MakeInput<int64_t>(std::vector<int64_t>{3}, std::vector<int64_t>{1, 2, 49});
|
||||
|
||||
// Add a Conv with known shapes. QNN EP should support it.
|
||||
NodeArg* weight = builder.MakeInitializer<float>(std::vector<int64_t>{2, 2, 2, 2},
|
||||
GetFloatDataInRange(-0.3f, 0.3f, 16));
|
||||
NodeArg* bias = builder.MakeInitializer<float>(std::vector<int64_t>{2}, {0.0f, 1.0f});
|
||||
|
||||
auto* conv_output = builder.MakeIntermediate();
|
||||
builder.AddNode("Conv", {input1, weight, bias}, {conv_output});
|
||||
|
||||
// Add a Reshape to a dynamic shape. QNN EP should reject this node.
|
||||
auto* reshape_output = builder.MakeIntermediate();
|
||||
builder.AddNode("Reshape", {conv_output, input2}, {reshape_output});
|
||||
|
||||
// Add a Softmax. QNN EP should reject this node because its input has a dynamic shape.
|
||||
NodeArg* output = builder.MakeOutput();
|
||||
builder.AddNode("Softmax", {reshape_output}, {output});
|
||||
};
|
||||
|
||||
// Local function that checks that the nodes with dynamic shape I/O were assigned to CPU EP.
|
||||
std::function<void(const Graph&)> ep_graph_checker = [](const Graph& graph) {
|
||||
for (const Node& node : graph.Nodes()) {
|
||||
const std::string& ep_name = node.GetExecutionProviderType();
|
||||
const std::string& op_type = node.OpType();
|
||||
if (op_type == "Reshape" || op_type == "Softmax") {
|
||||
EXPECT_EQ(ep_name, kCpuExecutionProvider);
|
||||
} else {
|
||||
EXPECT_EQ(ep_name, kQnnExecutionProvider);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
ProviderOptions provider_options;
|
||||
#if defined(_WIN32)
|
||||
provider_options["backend_path"] = "QnnHtp.dll";
|
||||
#else
|
||||
provider_options["backend_path"] = "libQnnHtp.so";
|
||||
#endif
|
||||
provider_options["enable_htp_fp16_precision"] = "1"; // QNN EP will use fp16 precision.
|
||||
// CPU EP will use fp32, so we can relax accuracy requirements.
|
||||
|
||||
RunQnnModelTest(model_build_fn,
|
||||
provider_options,
|
||||
/*opset*/ 19,
|
||||
ExpectedEPNodeAssignment::Some,
|
||||
/*abs_err*/ 1e-4f,
|
||||
logging::Severity::kERROR,
|
||||
/*verify_output*/ true,
|
||||
&ep_graph_checker);
|
||||
}
|
||||
|
||||
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
|
||||
#endif // !defined(ORT_MINIMAL_BUILD)
|
||||
|
||||
|
|
|
|||
|
|
@ -98,10 +98,12 @@ void TryEnableQNNSaver(ProviderOptions& qnn_options) {
|
|||
|
||||
void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions provider_options,
|
||||
int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
|
||||
float fp32_abs_err, logging::Severity log_severity, bool verify_outputs) {
|
||||
float fp32_abs_err, logging::Severity log_severity, bool verify_outputs,
|
||||
std::function<void(const Graph&)>* ep_graph_checker) {
|
||||
EPVerificationParams verification_params;
|
||||
verification_params.ep_node_assignment = expected_ep_assignment;
|
||||
verification_params.fp32_abs_err = fp32_abs_err;
|
||||
verification_params.graph_verifier = ep_graph_checker;
|
||||
// Add kMSDomain to cover contrib op like Gelu
|
||||
const std::unordered_map<std::string, int> domain_to_version = {{"", opset_version}, {kMSDomain, 1}};
|
||||
|
||||
|
|
|
|||
|
|
@ -1033,12 +1033,16 @@ inline GetTestQDQModelFn<QuantType> BuildQDQOpTestCase(
|
|||
* \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None).
|
||||
* \param fp32_abs_err The acceptable error between CPU EP and QNN EP.
|
||||
* \param log_severity The logger's minimum severity level.
|
||||
* \param verify_outputs True to verify that the outputs match (within tolerance).
|
||||
* \param ep_graph_checker Function called on the Graph generated for the EP's session. Used to check node
|
||||
* EP assignment.
|
||||
*/
|
||||
void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions provider_options,
|
||||
int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
|
||||
float fp32_abs_err = 1e-5f,
|
||||
logging::Severity log_severity = logging::Severity::kERROR,
|
||||
bool verify_outputs = true);
|
||||
bool verify_outputs = true,
|
||||
std::function<void(const Graph&)>* ep_graph_checker = nullptr);
|
||||
|
||||
enum class BackendSupport {
|
||||
SUPPORT_UNKNOWN,
|
||||
|
|
|
|||
Loading…
Reference in a new issue