diff --git a/include/onnxruntime/core/framework/run_options.h b/include/onnxruntime/core/framework/run_options.h index e5a84e7aa7..5444c825d7 100644 --- a/include/onnxruntime/core/framework/run_options.h +++ b/include/onnxruntime/core/framework/run_options.h @@ -27,10 +27,6 @@ struct OrtRunOptions { // So it is possible that only some of the nodes are executed. bool only_execute_path_to_fetches = false; - // Set to 'true' to synchronize execution providers with CPU at the end of session run. - // Taking CUDA EP as an example, it will trigger cudaStreamSynchronize on the compute stream. - bool synchronize_execution_providers = true; - #ifdef ENABLE_TRAINING // Used by onnxruntime::training::TrainingSession. This class is now deprecated. // Delete training_mode when TrainingSession is deleted. diff --git a/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h index 49b46ca077..1f5fcd50e1 100644 --- a/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h @@ -25,3 +25,8 @@ // Example usage: "cpu:0;gpu:0" (or) "gpu:0" // By default, the value for this key is empty (i.e.) no memory arenas are shrunk static const char* const kOrtRunOptionsConfigEnableMemoryArenaShrinkage = "memory.enable_memory_arena_shrinkage"; + +// Set to '1' to not synchronize execution providers with CPU at the end of session run. +// Per default it will be set to '0' +// Taking CUDA EP as an example, it omit triggering cudaStreamSynchronize on the compute stream. +static const char* const kOrtRunOptionsConfigDisableSynchronizeExecutionProviders = "disable_synchronize_execution_providers"; diff --git a/onnxruntime/core/framework/utils.cc b/onnxruntime/core/framework/utils.cc index 79691d7b51..f88d098454 100644 --- a/onnxruntime/core/framework/utils.cc +++ b/onnxruntime/core/framework/utils.cc @@ -20,6 +20,8 @@ #include "core/framework/tensorprotoutils.h" #include "core/mlas/inc/mlas.h" #include "core/framework/TensorSeq.h" +#include "core/framework/run_options.h" +#include "core/session/onnxruntime_run_options_config_keys.h" #ifdef USE_AZURE #include "core/framework/cloud_executor.h" #endif @@ -793,13 +795,14 @@ common::Status ExecuteGraph(const SessionState& session_state, logger); } #endif + bool synchronize_execution_providers = run_options.config_options.GetConfigOrDefault(kOrtRunOptionsConfigDisableSynchronizeExecutionProviders, "0") == "0"; return ExecuteGraph(session_state, feeds_fetches_manager, feeds, fetches, execution_mode, run_options.terminate, logger, - run_options.synchronize_execution_providers, + synchronize_execution_providers, run_options.only_execute_path_to_fetches); } diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 43ccfa2962..79068a0271 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -1997,7 +1997,8 @@ Status InferenceSession::Run(const RunOptions& run_options, // info all execution providers InferenceSession:Run ended for (auto* xp : exec_providers_to_stop) { - auto status = xp->OnRunEnd(run_options.synchronize_execution_providers); + bool synchronize_execution_providers = run_options.config_options.GetConfigOrDefault(kOrtRunOptionsConfigDisableSynchronizeExecutionProviders, "0") == "0"; + auto status = xp->OnRunEnd(synchronize_execution_providers); ORT_CHECK_AND_SET_RETVAL(status); } diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 7d4fb6d32c..490eb92afc 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -1368,8 +1368,6 @@ RunOptions instance. The individual calls will exit gracefully and return an err #endif .def_readwrite("only_execute_path_to_fetches", &RunOptions::only_execute_path_to_fetches, R"pbdoc(Only execute the nodes needed by fetch list)pbdoc") - .def_readwrite("synchronize_execution_providers", &RunOptions::synchronize_execution_providers, - R"pbdoc(Synchronize execution providers after executing session.)pbdoc") .def( "add_run_config_entry", [](RunOptions* options, const char* config_key, const char* config_value) -> void { diff --git a/orttraining/orttraining/python/training/torchdynamo/ort_backend.py b/orttraining/orttraining/python/training/torchdynamo/ort_backend.py index bd397edf67..0319fecf69 100644 --- a/orttraining/orttraining/python/training/torchdynamo/ort_backend.py +++ b/orttraining/orttraining/python/training/torchdynamo/ort_backend.py @@ -396,7 +396,7 @@ def _run_onnx_session_with_ortvaluevector( _nvtx_range_push("run_with_ortvaluevector") run_options = onnxruntime.RunOptions() - run_options.synchronize_execution_providers = True + run_options.add_run_config_entry("disable_synchronize_execution_providers", "1") sess.run_with_ortvaluevector(run_options, input_names, ort_inputs, output_names, ort_outputs, output_devices) _nvtx_range_pop()