mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-03 23:49:44 +00:00
[QNN EP] Disable early termination in GetCapability (#18140)
[QNN EP] Disable early termination in GetCapability if there are multiple partition and context binary enabled ### Description QNN EP context binary cache feature only support single partition for now. We have early termination in GetCapability. After the PR https://github.com/microsoft/onnxruntime/pull/17764. There's no Level 1 optimization any more for the 1st GetCapability. Graph transformer EnsureUniqueDQForNodeUnit is not applied. So if there's initializer -> DQ -> shared by multiple node unit. The node is not identified as node unit group. QNN EP report many not supported nodes because of this in the 1st GetCapability call. The 2nd GetCapability still works normally. Disable the early termination in GetCapability, delay the decision to Compile.
This commit is contained in:
parent
9bba990871
commit
be2f72a315
1 changed files with 2 additions and 6 deletions
|
|
@ -404,10 +404,6 @@ QNNExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_viewer
|
|||
}
|
||||
}
|
||||
|
||||
if (num_of_partitions > 1) {
|
||||
ORT_ENFORCE(!context_cache_enabled_, "Only support single partition for context cache feature.");
|
||||
}
|
||||
|
||||
const auto summary_msg = MakeString("Number of partitions supported by QNN EP: ", num_of_partitions,
|
||||
", number of nodes in the graph: ", num_nodes_in_graph,
|
||||
", number of nodes supported by QNN: ", num_of_supported_nodes);
|
||||
|
|
@ -485,7 +481,7 @@ Status QNNExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>& fused
|
|||
|
||||
bool is_ctx_file_exist = qnn_cache_model_handler_->GetIsContextCacheFileExists();
|
||||
if (is_qnn_ctx_model || (context_cache_enabled_ && is_ctx_file_exist)) {
|
||||
ORT_ENFORCE(fused_nodes_and_graphs.size() == 1, "Only support single partition for context cache feature.");
|
||||
ORT_RETURN_IF(fused_nodes_and_graphs.size() != 1, "Only support single partition for context cache feature.");
|
||||
std::unique_ptr<qnn::QnnModel> qnn_model = std::make_unique<qnn::QnnModel>(logger, qnn_backend_manager_.get());
|
||||
// Load and execute from cached context if exist
|
||||
ORT_RETURN_IF_ERROR(qnn_cache_model_handler_->LoadQnnCtxFromOnnxModel(graph_viewer,
|
||||
|
|
@ -509,7 +505,7 @@ Status QNNExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>& fused
|
|||
|
||||
ORT_RETURN_IF_ERROR(CompileFromOrtGraph(fused_nodes_and_graphs, node_compute_funcs, logger));
|
||||
if (context_cache_enabled_ && !is_qnn_ctx_model) {
|
||||
ORT_ENFORCE(fused_nodes_and_graphs.size() == 1, "Only support single partition for context cache feature.");
|
||||
ORT_RETURN_IF(fused_nodes_and_graphs.size() != 1, "Only support single partition for context cache feature.");
|
||||
uint64_t buffer_size(0);
|
||||
auto context_buffer = qnn_backend_manager_->GetContextBinaryBuffer(buffer_size);
|
||||
ORT_RETURN_IF_ERROR(qnn_cache_model_handler_->GenerateCtxCacheOnnxModel(context_buffer.get(),
|
||||
|
|
|
|||
Loading…
Reference in a new issue