onnxruntime/onnxruntime/core/framework/session_options.h

68 lines
2.7 KiB
C++

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <string>
#include <vector>
#include "core/session/onnxruntime_c_api.h"
#include "core/optimizer/graph_transformer_level.h"
namespace onnxruntime {
struct FreeDimensionOverride {
std::string dimension_denotation;
int64_t dimension_override;
};
/**
* Configuration information for a session.
*/
struct SessionOptions {
ExecutionMode execution_mode = ExecutionMode::ORT_SEQUENTIAL;
// enable profiling for this session.
bool enable_profiling = false;
// non empty filepath enables serialization of the transformed optimized model to the specified filepath.
std::basic_string<ORTCHAR_T> optimized_model_filepath;
// enable the memory pattern optimization.
// The idea is if the input shapes are the same, we could trace the internal memory allocation
// and generate a memory pattern for future request. So next time we could just do one allocation
// with a big chunk for all the internal memory allocation.
// See class 'OrtValuePatternPlanner'.
bool enable_mem_pattern = true;
// enable the memory arena on CPU
// Arena may pre-allocate memory for future usage.
// set this option to false if you don't want it.
bool enable_cpu_mem_arena = true;
// the prefix of the profile file. The current time will be appended to the file name.
std::basic_string<ORTCHAR_T> profile_file_prefix = ORT_TSTR("onnxruntime_profile_");
std::string session_logid; ///< logger id to use for session output
/// Log severity for the inference session. Applies to session load, initialization, etc.
/// See https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/common/logging/severity.h
/// Default = -1 (use default logger severity)
int session_log_severity_level = -1;
int session_log_verbosity_level = 0; ///< VLOG level if debug build and session_log_severity_level is 0 (VERBOSE).
unsigned max_num_graph_transformation_steps = 10; // TODO choose a good default here?
// set graph optimization level
TransformerLevel graph_optimization_level = TransformerLevel::Level1;
// controls the size of the thread pool used to parallelize the execution of tasks within individual nodes (ops)
int intra_op_num_threads = 0;
// controls the size of the thread pool used to parallelize the execution of nodes (ops)
// configuring this makes sense only when you're using parallel executor
int inter_op_num_threads = 0;
// For models with free input dimensions (most commonly batch size), specifies a set of values to override those
// free dimensions with, keyed by dimension denotation.
std::vector<FreeDimensionOverride> free_dimension_overrides;
};
} // namespace onnxruntime