mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-30 23:18:20 +00:00
### Description Re-work handling of static objects in pybind. Make sure we ref-count Environment from Sessions. The following has been done: - Make global objects function static. This ensures that the objects are constructed on demand. The first object constructed is destructed last. This is platform independent. - Make global objects ownership shared as suggested by pybind since they are not surfaced at Python level, and they cannot be referred to by dependent python objects. Verified that all python objects are GCed before globals are destroyed. This takes care of inference session dependency on environment and its default logger and this is also platform independent. - Utilize pybind atexit mechanism to clear execution providers and unload CUDA libraries (as suggested by https://github.com/microsoft/onnxruntime/pull/14903) . Since this is registered for module exit, it takes place before any other global are destroyed and clears shared objects state or even unloads the libraries. This should also work in a platform independent way. ### Motivation and Context - Global object destruction order is managed manually and that becomes source of trouble. We want to make it deterministic and platform independent. - Frequent hangs in Python layer due to the static object's destruction order. Some of the Python session objects are being garbage collected after main exits and they require ORT environment to be alive. (Use after free)
54 lines
1.9 KiB
C++
54 lines
1.9 KiB
C++
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
#include "python/onnxruntime_pybind_exceptions.h"
|
|
#include "python/onnxruntime_pybind_mlvalue.h"
|
|
#include "python/onnxruntime_pybind_state_common.h"
|
|
|
|
#include "core/platform/env.h"
|
|
#include <unordered_map>
|
|
#include <cstdlib>
|
|
|
|
namespace onnxruntime {
|
|
namespace python {
|
|
namespace py = pybind11;
|
|
|
|
using namespace onnxruntime::logging;
|
|
|
|
using ExecutionProviderMap = std::unordered_map<std::string, std::shared_ptr<IExecutionProvider>>;
|
|
using ExecutionProviderLibInfoMap = std::unordered_map<std::string, std::pair<std::string, ProviderOptions>>;
|
|
|
|
class ORTTrainingPythonEnv {
|
|
public:
|
|
ORTTrainingPythonEnv();
|
|
|
|
std::shared_ptr<Environment> GetORTEnv() const;
|
|
|
|
std::shared_ptr<IExecutionProvider> GetExecutionProviderInstance(const std::string& provider_type,
|
|
size_t hash);
|
|
|
|
void AddExecutionProvider(const std::string& provider_type,
|
|
size_t hash,
|
|
std::unique_ptr<IExecutionProvider> execution_provider);
|
|
|
|
void RegisterExtExecutionProviderInfo(const std::string& provider_type,
|
|
const std::string& provider_lib_path,
|
|
const ProviderOptions& default_options);
|
|
|
|
const std::vector<std::string>& GetAvailableTrainingExecutionProviderTypes();
|
|
|
|
ExecutionProviderLibInfoMap ext_execution_provider_info_map_;
|
|
|
|
void ClearExecutionProviderInstances();
|
|
|
|
private:
|
|
std::string GetExecutionProviderMapKey(const std::string& provider_type,
|
|
size_t hash);
|
|
|
|
std::shared_ptr<Environment> ort_env_;
|
|
ExecutionProviderMap execution_provider_instances_map_;
|
|
std::vector<std::string> available_training_eps_;
|
|
};
|
|
|
|
} // namespace python
|
|
} // namespace onnxruntime
|