From 0bf12e9dbf7d5ba434dcaa8254a9495b9d3d7bd7 Mon Sep 17 00:00:00 2001 From: Yufeng Li Date: Mon, 22 Apr 2019 13:49:41 -0700 Subject: [PATCH] Add option to enable/disable memory pattern back (#872) Memory pattern doesn't work for parallel executor by design. Enabling Memory Pattern for parallel executor logs warning and make the perf bad. Add option to enable/disable memory pattern back. --- .../onnxruntime/core/session/onnxruntime_c_api.h | 7 +++++-- onnxruntime/core/framework/execution_frame.cc | 11 ++++++----- onnxruntime/core/framework/session_state.cc | 14 +++++++++++--- onnxruntime/core/framework/session_state.h | 12 ++++++++++++ onnxruntime/core/session/abi_session_options.cc | 14 +++++++++++--- onnxruntime/core/session/inference_session.cc | 1 + onnxruntime/core/session/inference_session.h | 6 ++++++ 7 files changed, 52 insertions(+), 13 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 2dcd728705..599e17cbc7 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -208,9 +208,12 @@ ORT_API(void, OrtDisableSequentialExecution, _In_ OrtSessionOptions* options); ORT_API(void, OrtEnableProfiling, _In_ OrtSessionOptions* options, _In_ const ORTCHAR_T* profile_file_prefix); ORT_API(void, OrtDisableProfiling, _In_ OrtSessionOptions* options); -// deprecated +// Enable the memory pattern optimization. +// The idea is if the input shapes are the same, we could trace the internal memory allocation +// and generate a memory pattern for future request. So next time we could just do one allocation +// with a big chunk for all the internal memory allocation. +// Note: memory pattern optimization is only available when SequentialExecution enabled. ORT_API(void, OrtEnableMemPattern, _In_ OrtSessionOptions* options); -// deprecated ORT_API(void, OrtDisableMemPattern, _In_ OrtSessionOptions* options); // Enable the memory arena on CPU diff --git a/onnxruntime/core/framework/execution_frame.cc b/onnxruntime/core/framework/execution_frame.cc index 30b828b9e1..6a0e0cf21b 100644 --- a/onnxruntime/core/framework/execution_frame.cc +++ b/onnxruntime/core/framework/execution_frame.cc @@ -186,7 +186,7 @@ ExecutionFrame::ExecutionFrame(const std::vector& feed_mlvalue_idxs, // If the session enable memory pattern optimization // and we have execution plan generated, try to setup // memory pattern optimization. - if (session_state.GetExecutionPlan()) { + if (session_state.GetEnableMemoryPattern() && session_state.GetExecutionPlan()) { std::vector input_shapes; bool all_tensors = true; for (const auto& feed : feeds) { @@ -198,7 +198,7 @@ ExecutionFrame::ExecutionFrame(const std::vector& feed_mlvalue_idxs, input_shapes.push_back(tensor.Shape()); } - // if there are some traditional ml value type in inputs disable the memory pattern optimization. + //if there are some traditional ml value type in inputs disable the memory pattern optimization. if (all_tensors) { mem_patterns_ = session_state.GetMemoryPatternGroup(input_shapes); // if no existing patterns, generate one in this executionframe @@ -435,9 +435,10 @@ const AllocPlanPerValue& ExecutionFrame::GetAllocationPlan(int mlvalue_idx) { } void ExecutionFrame::TraceAllocate(int mlvalue_idx, size_t size) { - // don't trace the output tensors. - auto& allocation_plan = GetAllocationPlan(mlvalue_idx); - if (planner_ && allocation_plan.alloc_kind != AllocKind::kAllocateOutput) { + if (planner_) { + // don't trace the output tensors. + auto& allocation_plan = GetAllocationPlan(mlvalue_idx); + if (allocation_plan.alloc_kind == AllocKind::kAllocateOutput) return; auto status = planner_->TraceAllocation(mlvalue_idx, size); if (!status.IsOK()) LOGS(session_state_.Logger(), WARNING) << "TraceAllocation for mlvalue_idx=" << mlvalue_idx << " size=" << size diff --git a/onnxruntime/core/framework/session_state.cc b/onnxruntime/core/framework/session_state.cc index 5be86cd841..62d7932f8e 100644 --- a/onnxruntime/core/framework/session_state.cc +++ b/onnxruntime/core/framework/session_state.cc @@ -96,6 +96,13 @@ Status SessionState::UpdateMemoryPatternGroupCache(const std::vector session_state) { auto entry = subgraph_session_states_.find(index); // make sure this is new. internal logic error if it is not so using ORT_ENFORCE. if (entry != subgraph_session_states_.cend()) { const auto& existing_entries = entry->second; - ORT_ENFORCE(existing_entries.find(attribute_name) == existing_entries.cend(), "Entry exists in node ", index, - " for attribute ", attribute_name); + ORT_ENFORCE(existing_entries.find(attribute_name) == existing_entries.cend(), + "Entry exists in node ", index, " for attribute ", attribute_name); } subgraph_session_states_[index].insert(std::make_pair(attribute_name, std::move(session_state))); diff --git a/onnxruntime/core/framework/session_state.h b/onnxruntime/core/framework/session_state.h index 619f2683df..08a53afb9b 100644 --- a/onnxruntime/core/framework/session_state.h +++ b/onnxruntime/core/framework/session_state.h @@ -121,6 +121,16 @@ class SessionState { Status UpdateMemoryPatternGroupCache(const std::vector& input_shape, std::unique_ptr mem_patterns) const; + /** + Set enable memory pattern flag + */ + void SetEnableMemoryPattern(bool flag); + + /** + Get enable memory pattern flag + */ + bool GetEnableMemoryPattern() const; + struct NodeInfo { /** * @@ -197,6 +207,8 @@ class SessionState { const logging::Logger* logger_ = nullptr; profiling::Profiler* profiler_; + // switch for enable memory pattern optimization or not. + bool enable_mem_pattern_ = true; // lock for the mem_patterns_ mutable OrtMutex mem_patterns_lock_; // cache for the generated mem_patterns. key is calculated based on input shapes. diff --git a/onnxruntime/core/session/abi_session_options.cc b/onnxruntime/core/session/abi_session_options.cc index b9680fac48..a0f82d15ec 100644 --- a/onnxruntime/core/session/abi_session_options.cc +++ b/onnxruntime/core/session/abi_session_options.cc @@ -51,8 +51,16 @@ ORT_API(void, OrtDisableProfiling, _In_ OrtSessionOptions* options) { options->value.profile_file_prefix.clear(); } -ORT_API(void, OrtEnableMemPattern, _In_ OrtSessionOptions*) {} -ORT_API(void, OrtDisableMemPattern, _In_ OrtSessionOptions*) {} +// enable the memory pattern optimization. +// The idea is if the input shapes are the same, we could trace the internal memory allocation +// and generate a memory pattern for future request. So next time we could just do one allocation +// with a big chunk for all the internal memory allocation. +ORT_API(void, OrtEnableMemPattern, _In_ OrtSessionOptions* options) { + options->value.enable_mem_pattern = true; +} +ORT_API(void, OrtDisableMemPattern, _In_ OrtSessionOptions* options) { + options->value.enable_mem_pattern = false; +} // enable the memory arena on CPU // Arena may pre-allocate memory for future usage. @@ -79,7 +87,7 @@ ORT_API(void, OrtSetSessionLogVerbosityLevel, _In_ OrtSessionOptions* options, u // Returns 0 on success and -1 otherwise // Available options are : 0, 1, 2. ORT_API(int, OrtSetSessionGraphOptimizationLevel, _In_ OrtSessionOptions* options, uint32_t graph_optimization_level) { - if (graph_optimization_level >= static_cast(onnxruntime::TransformerLevel::MaxTransformerLevel)){ + if (graph_optimization_level >= static_cast(onnxruntime::TransformerLevel::MaxTransformerLevel)) { return -1; } options->value.graph_optimization_level = static_cast(graph_optimization_level); diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 0c4be5e30a..a5ee850204 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -110,6 +110,7 @@ InferenceSession::InferenceSession(const SessionOptions& session_options, loggin } session_state_.SetThreadPool(thread_pool_.get()); + session_state_.SetEnableMemoryPattern(session_options.enable_mem_pattern && session_options.enable_sequential_execution); session_profiler_.Initialize(session_logger_); session_state_.SetProfiler(session_profiler_); if (session_options.enable_profiling) { diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h index 3f82872c06..88e627a8ea 100644 --- a/onnxruntime/core/session/inference_session.h +++ b/onnxruntime/core/session/inference_session.h @@ -53,6 +53,12 @@ struct SessionOptions { // enable profiling for this session. bool enable_profiling = false; + // enable the memory pattern optimization. + // The idea is if the input shapes are the same, we could trace the internal memory allocation + // and generate a memory pattern for future request. So next time we could just do one allocation + // with a big chunk for all the internal memory allocation. + bool enable_mem_pattern = true; + // enable the memory arena on CPU // Arena may pre-allocate memory for future usage. // set this option to false if you don't want it.