Remove useless NodeProto serializations (#18791)

## Description
This pull request aims to enhance the efficiency of the inference
session creation by eliminating unnecessary `Node::ToProto` invocations.
The current codebase presents opportunities for optimization,
particularly in the removal of superfluous `Node::ToProto` calls, along
with their subsequent `~NodeProto` invocations.

## Motivation and Context
The optimization focus of this pull request is on addressing low-hanging
fruit in the inference session creation process. By strategically
removing undesired `Node::ToProto` calls, we aim to streamline the
codebase and enhance the overall performance. The flame graphs
illustrate the notable improvements achieved by reducing the percentage
of `Node::ToProto` calls, thereby optimizing the execution flow.

### Code Snippet
```cpp
TEST(InferenceSessionTests, Bench) {
  // Initialize logging manager
  auto logging_manager = std::make_unique<logging::LoggingManager>(
      std::unique_ptr<ISink>(new CLogSink()), logging::Severity::kVERBOSE, false,
      LoggingManager::InstanceType::Temporal);

  // Create environment
  std::unique_ptr<Environment> env;
  auto st = Environment::Create(std::move(logging_manager), env);
  ASSERT_TRUE(st.IsOK());

  // Configure session options
  SessionOptions so;
  so.execution_mode = ExecutionMode::ORT_SEQUENTIAL;
  so.graph_optimization_level = TransformerLevel::Level2;
  so.intra_op_param.thread_pool_size = 1;

  // Initialize and load the InferenceSession
  InferenceSessionTestGlobalThreadPools session1{so, *env};
  ASSERT_STATUS_OK(session1.Load("big.onnx"));
  ASSERT_STATUS_OK(session1.Initialize());
}
```

### `big.onnx` model creation
```python
import onnx
import numpy as np
from spox import argument, build, Tensor, Var
from spox.opset.ai.onnx import v17 as op
from spox.opset.ai.onnx.ml.v3 import label_encoder

a = argument(Tensor(np.int64, ('N',)))
c = a

for x in range(1000):
    c = op.mul(c, op.const(np.ones(10000, dtype=np.int64)))

for x in range(3000):
    all_strings = list("random_string" + str(i) for i in range(100))
    all_ints = list(range(len(all_strings)))
    c = label_encoder(
        c,
        keys_int64s=all_ints,
        values_strings=all_strings
    )
    c = label_encoder(c, keys_strings=all_strings, values_int64s=all_ints)

model: onnx.ModelProto = build(inputs={'a': a}, outputs={'c': c})
onnx.save(model, "big.onnx")
```

Testing in `Release` with `perf` yields:
Before: 3.3% spent in `Node::ToProto`
After: 1.6% spent in `Node::ToProto`

---------

Co-authored-by: Atanas Dimitrov <atanasdimitrov@Atanass-MacBook-Pro.local>
This commit is contained in:
Atanas Dimitrov 2024-01-04 09:38:28 +02:00 committed by GitHub
parent d5628f52df
commit 4e2d88b75f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2550,14 +2550,14 @@ Status Graph::VerifyNodeAndOpMatch(const ResolveOptions& options) {
// Node verification.
auto& node = *GetNode(node_index);
NodeProto node_proto;
node.ToProto(node_proto);
const auto& node_name = node.Name();
if (!node.Op()) {
{
auto status = Status::OK();
ORT_TRY {
NodeProto node_proto;
node.ToProto(node_proto);
checker::check_node(node_proto, ctx, lsc);
}
ORT_CATCH(const std::exception& ex) {
@ -2630,8 +2630,8 @@ Status Graph::VerifyNodeAndOpMatch(const ResolveOptions& options) {
NO_CHANGE_ON_SYNC_FLAG(ORT_RETURN_IF_ERROR(InferAndVerifyTypeMatch(node, *p_op, options)));
// Accumulate output names of the iterated Node
for (auto& output_name : node_proto.output()) {
lsc.output_names.insert(output_name);
for (const auto& output : node.OutputDefs()) {
lsc.output_names.insert(output->Name());
}
}