Fix ETW trace logging crash in multithreading situations (#21566)

### Description
ETW trace logger is fakely registered as initialized_ is marked as true
before the registration is done, causing crashing issue for Lenovo
camera application.

A prior attempt to address was made here:
https://github.com/microsoft/onnxruntime/pull/21226
It was reverted here:
https://github.com/microsoft/onnxruntime/pull/21360

### Motivation and Context
The problem is that during initialization of TraceLoggingRegisterEx, it
will reinvoke the callback and attempt reinitialization, which is not
allowed. TraceLoggingRegisterEx however can be initialized concurrently
when initialization happens on multiple threads. For these reasons it
needs to be protected by a lock, but the lock cannot naively block
because the callback's reinvocation will cause a deadlock.

To solve this problem another tracking variable is added :
"initializing" which protects against reinitialization during the first
initialization.

---------

Co-authored-by: Sheil Kumar <sheilk@microsoft.com>
This commit is contained in:
Sheil Kumar 2024-07-31 08:59:55 -07:00 committed by GitHub
parent 1d4b161145
commit b341c44c20
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 28 additions and 6 deletions

View file

@ -137,28 +137,45 @@ void NTAPI EtwRegistrationManager::ORT_TL_EtwEnableCallback(
EtwRegistrationManager::~EtwRegistrationManager() {
std::lock_guard<OrtMutex> lock(callbacks_mutex_);
callbacks_.clear();
::TraceLoggingUnregister(etw_provider_handle);
if (initialization_status_ == InitializationStatus::Initialized ||
initialization_status_ == InitializationStatus::Initializing) {
std::lock_guard<OrtMutex> init_lock(init_mutex_);
assert(initialization_status_ != InitializationStatus::Initializing);
if (initialization_status_ == InitializationStatus::Initialized) {
::TraceLoggingUnregister(etw_provider_handle);
initialization_status_ = InitializationStatus::NotInitialized;
}
}
}
EtwRegistrationManager::EtwRegistrationManager() {
}
void EtwRegistrationManager::LazyInitialize() {
if (!initialized_) {
void EtwRegistrationManager::LazyInitialize() try {
if (initialization_status_ == InitializationStatus::NotInitialized) {
std::lock_guard<OrtMutex> lock(init_mutex_);
if (!initialized_) { // Double-check locking pattern
initialized_ = true;
if (initialization_status_ == InitializationStatus::NotInitialized) { // Double-check locking pattern
initialization_status_ = InitializationStatus::Initializing;
etw_status_ = ::TraceLoggingRegisterEx(etw_provider_handle, ORT_TL_EtwEnableCallback, nullptr);
if (FAILED(etw_status_)) {
ORT_THROW("ETW registration failed. Logging will be broken: " + std::to_string(etw_status_));
}
initialization_status_ = InitializationStatus::Initialized;
}
}
} catch (...) {
initialization_status_ = InitializationStatus::Failed;
throw;
}
void EtwRegistrationManager::InvokeCallbacks(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level, ULONGLONG MatchAnyKeyword,
ULONGLONG MatchAllKeyword, PEVENT_FILTER_DESCRIPTOR FilterData,
PVOID CallbackContext) {
if (initialization_status_ != InitializationStatus::Initialized) {
// Drop messages until manager is fully initialized.
return;
}
std::lock_guard<OrtMutex> lock(callbacks_mutex_);
for (const auto& callback : callbacks_) {
(*callback)(SourceId, IsEnabled, Level, MatchAnyKeyword, MatchAllKeyword, FilterData, CallbackContext);

View file

@ -47,6 +47,11 @@ class EtwSink : public ISink {
};
class EtwRegistrationManager {
enum class InitializationStatus { NotInitialized,
Initializing,
Initialized,
Failed };
public:
using EtwInternalCallback = std::function<void(LPCGUID SourceId, ULONG IsEnabled, UCHAR Level,
ULONGLONG MatchAnyKeyword, ULONGLONG MatchAllKeyword,
@ -96,7 +101,7 @@ class EtwRegistrationManager {
OrtMutex callbacks_mutex_;
mutable OrtMutex provider_change_mutex_;
OrtMutex init_mutex_;
bool initialized_ = false;
InitializationStatus initialization_status_ = InitializationStatus::NotInitialized;
bool is_enabled_;
UCHAR level_;
ULONGLONG keyword_;