mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-26 22:35:43 +00:00
68 lines
2.7 KiB
C++
68 lines
2.7 KiB
C++
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
#pragma once
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
#include "core/session/onnxruntime_c_api.h"
|
|
#include "core/optimizer/graph_transformer_level.h"
|
|
|
|
namespace onnxruntime {
|
|
struct FreeDimensionOverride {
|
|
std::string dimension_denotation;
|
|
int64_t dimension_override;
|
|
};
|
|
|
|
/**
|
|
* Configuration information for a session.
|
|
*/
|
|
struct SessionOptions {
|
|
ExecutionMode execution_mode = ExecutionMode::ORT_SEQUENTIAL;
|
|
|
|
// enable profiling for this session.
|
|
bool enable_profiling = false;
|
|
|
|
// non empty filepath enables serialization of the transformed optimized model to the specified filepath.
|
|
std::basic_string<ORTCHAR_T> optimized_model_filepath;
|
|
|
|
// enable the memory pattern optimization.
|
|
// The idea is if the input shapes are the same, we could trace the internal memory allocation
|
|
// and generate a memory pattern for future request. So next time we could just do one allocation
|
|
// with a big chunk for all the internal memory allocation.
|
|
// See class 'OrtValuePatternPlanner'.
|
|
bool enable_mem_pattern = true;
|
|
|
|
// enable the memory arena on CPU
|
|
// Arena may pre-allocate memory for future usage.
|
|
// set this option to false if you don't want it.
|
|
bool enable_cpu_mem_arena = true;
|
|
|
|
// the prefix of the profile file. The current time will be appended to the file name.
|
|
std::basic_string<ORTCHAR_T> profile_file_prefix = ORT_TSTR("onnxruntime_profile_");
|
|
|
|
std::string session_logid; ///< logger id to use for session output
|
|
|
|
/// Log severity for the inference session. Applies to session load, initialization, etc.
|
|
/// See https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/common/logging/severity.h
|
|
/// Default = -1 (use default logger severity)
|
|
int session_log_severity_level = -1;
|
|
int session_log_verbosity_level = 0; ///< VLOG level if debug build and session_log_severity_level is 0 (VERBOSE).
|
|
|
|
unsigned max_num_graph_transformation_steps = 10; // TODO choose a good default here?
|
|
|
|
// set graph optimization level
|
|
TransformerLevel graph_optimization_level = TransformerLevel::Level1;
|
|
|
|
// controls the size of the thread pool used to parallelize the execution of tasks within individual nodes (ops)
|
|
int intra_op_num_threads = 0;
|
|
|
|
// controls the size of the thread pool used to parallelize the execution of nodes (ops)
|
|
// configuring this makes sense only when you're using parallel executor
|
|
int inter_op_num_threads = 0;
|
|
|
|
// For models with free input dimensions (most commonly batch size), specifies a set of values to override those
|
|
// free dimensions with, keyed by dimension denotation.
|
|
std::vector<FreeDimensionOverride> free_dimension_overrides;
|
|
};
|
|
} // namespace onnxruntime
|