onnxruntime/onnxruntime/core/framework/execution_providers.h
ivberg 148f54c6ea
Add capturestate / rundown ETW support logging for session and provider options (#19397)
### Description
Add capturestate / rundown ETW support logging for session and provider
options.

### Motivation and Context
Follow-up to #16259 and #18882

This is very useful when you have longer running ONNX sessions which
will be the case for a lot of AI workloads. That means ETW tracing may
start minutes or hours after a process & session has been established.
When a trace is captured, you would want to know the state of ONNX at
that time. The state for ONNX is session and config options so that they
show up in the trace.

Tested with xperf and ORT 
xperf -start ort -on 3a26b1ff-7484-7484-7484-15261f42614d
xperf -capturestate ort 3a26b1ff-7484-7484-7484-15261f42614d <--- Run
this after session has been up for some time
xperf -stop ort -d .\ort.etl  <- Trace will now also have rundown events

Also these will show if you use WPR [CaptureStateOnSave
](https://learn.microsoft.com/en-us/windows-hardware/test/wpt/capturestateonsave)
2024-02-08 11:28:05 -08:00

160 lines
5.6 KiB
C++

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "core/framework/execution_provider.h"
#include "core/graph/graph_viewer.h"
#include "core/common/logging/logging.h"
#ifdef _WIN32
#include <winmeta.h>
#include <evntrace.h>
#include "core/platform/tracing.h"
#include "core/platform/windows/telemetry.h"
#endif
namespace onnxruntime {
/**
Class for managing lookup of the execution providers in a session.
*/
class ExecutionProviders {
public:
ExecutionProviders() = default;
common::Status Add(const std::string& provider_id, const std::shared_ptr<IExecutionProvider>& p_exec_provider) {
// make sure there are no issues before we change any internal data structures
if (provider_idx_map_.find(provider_id) != provider_idx_map_.end()) {
auto status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Provider ", provider_id, " has already been registered.");
LOGS_DEFAULT(ERROR) << status.ErrorMessage();
return status;
}
// index that provider will have after insertion
auto new_provider_idx = exec_providers_.size();
ORT_IGNORE_RETURN_VALUE(provider_idx_map_.insert({provider_id, new_provider_idx}));
// update execution provider options
auto providerOptions = p_exec_provider->GetProviderOptions();
exec_provider_options_[provider_id] = providerOptions;
#ifdef _WIN32
LogProviderOptions(provider_id, providerOptions, false);
// Register callback for ETW capture state (rundown)
WindowsTelemetry::RegisterInternalCallback(
[this](
LPCGUID SourceId,
ULONG IsEnabled,
UCHAR Level,
ULONGLONG MatchAnyKeyword,
ULONGLONG MatchAllKeyword,
PEVENT_FILTER_DESCRIPTOR FilterData,
PVOID CallbackContext) {
(void)SourceId;
(void)Level;
(void)MatchAnyKeyword;
(void)MatchAllKeyword;
(void)FilterData;
(void)CallbackContext;
// Check if this callback is for capturing state
if ((IsEnabled == EVENT_CONTROL_CODE_CAPTURE_STATE) &&
((MatchAnyKeyword & static_cast<ULONGLONG>(onnxruntime::logging::ORTTraceLoggingKeyword::Session)) != 0)) {
for (size_t i = 0; i < exec_providers_.size(); ++i) {
const auto& provider_id = exec_provider_ids_[i];
auto it = exec_provider_options_.find(provider_id);
if (it != exec_provider_options_.end()) {
const auto& options = it->second;
LogProviderOptions(provider_id, options, true);
}
}
}
});
#endif
exec_provider_ids_.push_back(provider_id);
exec_providers_.push_back(p_exec_provider);
return Status::OK();
}
#ifdef _WIN32
void LogProviderOptions(const std::string& provider_id, const ProviderOptions& providerOptions, bool captureState) {
for (const auto& config_pair : providerOptions) {
TraceLoggingWrite(
telemetry_provider_handle,
"ProviderOptions",
TraceLoggingKeyword(static_cast<uint64_t>(onnxruntime::logging::ORTTraceLoggingKeyword::Session)),
TraceLoggingLevel(WINEVENT_LEVEL_INFO),
TraceLoggingString(provider_id.c_str(), "ProviderId"),
TraceLoggingString(config_pair.first.c_str(), "Key"),
TraceLoggingString(config_pair.second.c_str(), "Value"),
TraceLoggingBool(captureState, "isCaptureState"));
}
}
#endif
const IExecutionProvider* Get(const onnxruntime::Node& node) const {
return Get(node.GetExecutionProviderType());
}
const IExecutionProvider* Get(onnxruntime::ProviderType provider_id) const {
auto it = provider_idx_map_.find(provider_id);
if (it == provider_idx_map_.end()) {
return nullptr;
}
return exec_providers_[it->second].get();
}
IExecutionProvider* Get(onnxruntime::ProviderType provider_id) {
auto it = provider_idx_map_.find(provider_id);
if (it == provider_idx_map_.end()) {
return nullptr;
}
return exec_providers_[it->second].get();
}
bool Empty() const { return exec_providers_.empty(); }
size_t NumProviders() const { return exec_providers_.size(); }
using const_iterator = typename std::vector<std::shared_ptr<IExecutionProvider>>::const_iterator;
const_iterator begin() const noexcept { return exec_providers_.cbegin(); }
const_iterator end() const noexcept { return exec_providers_.cend(); }
const std::vector<std::string>& GetIds() const { return exec_provider_ids_; }
const ProviderOptionsMap& GetAllProviderOptions() const { return exec_provider_options_; }
bool GetCpuProviderWasImplicitlyAdded() const { return cpu_execution_provider_was_implicitly_added_; }
void SetCpuProviderWasImplicitlyAdded(bool cpu_execution_provider_was_implicitly_added) {
cpu_execution_provider_was_implicitly_added_ = cpu_execution_provider_was_implicitly_added;
}
private:
// Some compilers emit incomprehensive output if this is allowed
// with a container that has unique_ptr or something move-only.
ORT_DISALLOW_COPY_AND_ASSIGNMENT(ExecutionProviders);
std::vector<std::shared_ptr<IExecutionProvider>> exec_providers_;
std::vector<std::string> exec_provider_ids_;
ProviderOptionsMap exec_provider_options_;
// maps for fast lookup of an index into exec_providers_
std::unordered_map<std::string, size_t> provider_idx_map_;
// Whether the CPU provider was implicitly added to a session for fallback (true),
// or whether it was explicitly added by the caller.
bool cpu_execution_provider_was_implicitly_added_ = false;
};
} // namespace onnxruntime