mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-19 02:03:52 +00:00
Remove useless NodeProto serializations (#18791)
## Description
This pull request aims to enhance the efficiency of the inference
session creation by eliminating unnecessary `Node::ToProto` invocations.
The current codebase presents opportunities for optimization,
particularly in the removal of superfluous `Node::ToProto` calls, along
with their subsequent `~NodeProto` invocations.
## Motivation and Context
The optimization focus of this pull request is on addressing low-hanging
fruit in the inference session creation process. By strategically
removing undesired `Node::ToProto` calls, we aim to streamline the
codebase and enhance the overall performance. The flame graphs
illustrate the notable improvements achieved by reducing the percentage
of `Node::ToProto` calls, thereby optimizing the execution flow.
### Code Snippet
```cpp
TEST(InferenceSessionTests, Bench) {
// Initialize logging manager
auto logging_manager = std::make_unique<logging::LoggingManager>(
std::unique_ptr<ISink>(new CLogSink()), logging::Severity::kVERBOSE, false,
LoggingManager::InstanceType::Temporal);
// Create environment
std::unique_ptr<Environment> env;
auto st = Environment::Create(std::move(logging_manager), env);
ASSERT_TRUE(st.IsOK());
// Configure session options
SessionOptions so;
so.execution_mode = ExecutionMode::ORT_SEQUENTIAL;
so.graph_optimization_level = TransformerLevel::Level2;
so.intra_op_param.thread_pool_size = 1;
// Initialize and load the InferenceSession
InferenceSessionTestGlobalThreadPools session1{so, *env};
ASSERT_STATUS_OK(session1.Load("big.onnx"));
ASSERT_STATUS_OK(session1.Initialize());
}
```
### `big.onnx` model creation
```python
import onnx
import numpy as np
from spox import argument, build, Tensor, Var
from spox.opset.ai.onnx import v17 as op
from spox.opset.ai.onnx.ml.v3 import label_encoder
a = argument(Tensor(np.int64, ('N',)))
c = a
for x in range(1000):
c = op.mul(c, op.const(np.ones(10000, dtype=np.int64)))
for x in range(3000):
all_strings = list("random_string" + str(i) for i in range(100))
all_ints = list(range(len(all_strings)))
c = label_encoder(
c,
keys_int64s=all_ints,
values_strings=all_strings
)
c = label_encoder(c, keys_strings=all_strings, values_int64s=all_ints)
model: onnx.ModelProto = build(inputs={'a': a}, outputs={'c': c})
onnx.save(model, "big.onnx")
```
Testing in `Release` with `perf` yields:
Before: 3.3% spent in `Node::ToProto`
After: 1.6% spent in `Node::ToProto`
---------
Co-authored-by: Atanas Dimitrov <atanasdimitrov@Atanass-MacBook-Pro.local>
This commit is contained in:
parent
d5628f52df
commit
4e2d88b75f
1 changed files with 4 additions and 4 deletions
|
|
@ -2550,14 +2550,14 @@ Status Graph::VerifyNodeAndOpMatch(const ResolveOptions& options) {
|
|||
// Node verification.
|
||||
auto& node = *GetNode(node_index);
|
||||
|
||||
NodeProto node_proto;
|
||||
node.ToProto(node_proto);
|
||||
const auto& node_name = node.Name();
|
||||
|
||||
if (!node.Op()) {
|
||||
{
|
||||
auto status = Status::OK();
|
||||
ORT_TRY {
|
||||
NodeProto node_proto;
|
||||
node.ToProto(node_proto);
|
||||
checker::check_node(node_proto, ctx, lsc);
|
||||
}
|
||||
ORT_CATCH(const std::exception& ex) {
|
||||
|
|
@ -2630,8 +2630,8 @@ Status Graph::VerifyNodeAndOpMatch(const ResolveOptions& options) {
|
|||
NO_CHANGE_ON_SYNC_FLAG(ORT_RETURN_IF_ERROR(InferAndVerifyTypeMatch(node, *p_op, options)));
|
||||
|
||||
// Accumulate output names of the iterated Node
|
||||
for (auto& output_name : node_proto.output()) {
|
||||
lsc.output_names.insert(output_name);
|
||||
for (const auto& output : node.OutputDefs()) {
|
||||
lsc.output_names.insert(output->Name());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue