diff --git a/torch/_C/_profiler.pyi b/torch/_C/_profiler.pyi index 3c1b74c681a..7f4ba7ec97a 100644 --- a/torch/_C/_profiler.pyi +++ b/torch/_C/_profiler.pyi @@ -1,5 +1,5 @@ from enum import Enum -from typing import Any, Literal +from typing import Any, Literal, Optional from typing_extensions import TypeAlias from torch._C import device, dtype, layout @@ -72,6 +72,7 @@ class ProfilerConfig: with_flops: bool, with_modules: bool, experimental_config: _ExperimentalConfig, + trace_id: Optional[str] = None, ) -> None: ... class _ProfilerEvent: diff --git a/torch/autograd/profiler.py b/torch/autograd/profiler.py index 207ad6d272f..6ffb9d3160b 100644 --- a/torch/autograd/profiler.py +++ b/torch/autograd/profiler.py @@ -1,4 +1,5 @@ # mypy: allow-untyped-defs +import uuid from collections import defaultdict from dataclasses import dataclass from time import perf_counter_ns @@ -209,6 +210,7 @@ class profile: use_cpu=True, experimental_config=None, acc_events=False, + custom_trace_id_callback=None, ): self.enabled: bool = enabled if not self.enabled: @@ -245,7 +247,8 @@ class profile: self.profiling_start_time_ns = 0 self.profiling_end_time_ns = 0 self._stats = _ProfilerStats() - + self.custom_trace_id_callback = custom_trace_id_callback + self.trace_id = "" if not self.use_cpu: assert ( use_kineto @@ -305,7 +308,22 @@ class profile: len(self.kineto_activities) > 0 ), "No activities specified for the profiler" - def config(self): + def default_trace_id(self): + # Generate a UUID + uuid_raw = uuid.uuid4() + + return f"{uuid_raw.int:032X}" + + def create_trace_id(self): + if self.custom_trace_id_callback: + return self.custom_trace_id_callback() + return self.default_trace_id() + + def config(self, create_trace_id=False): + # only need to generate new trace id upon prepare trace not start trace + if create_trace_id: + trace_id = self.create_trace_id() + self.trace_id = trace_id return ProfilerConfig( self.profiler_kind, self.record_shapes, @@ -314,6 +332,7 @@ class profile: self.with_flops, self.with_modules, self.experimental_config, + self.trace_id, ) def __enter__(self): @@ -328,7 +347,7 @@ class profile: def _prepare_trace(self): self.entered = True t0 = perf_counter_ns() - _prepare_profiler(self.config(), self.kineto_activities) + _prepare_profiler(self.config(create_trace_id=True), self.kineto_activities) t1 = perf_counter_ns() self._stats.profiler_prepare_call_duration_us = int((t1 - t0) / 1000) @@ -336,7 +355,7 @@ class profile: self.entered = True _run_on_profiler_start() t0 = perf_counter_ns() - _enable_profiler(self.config(), self.kineto_activities) + _enable_profiler(self.config(create_trace_id=False), self.kineto_activities) t1 = perf_counter_ns() self._stats.profiler_enable_call_duration_us = int((t1 - t0) / 1000) self.profiling_start_time_ns = t1 diff --git a/torch/csrc/autograd/profiler_kineto.cpp b/torch/csrc/autograd/profiler_kineto.cpp index 10d1c2e7ef7..a428ebbc569 100644 --- a/torch/csrc/autograd/profiler_kineto.cpp +++ b/torch/csrc/autograd/profiler_kineto.cpp @@ -603,7 +603,8 @@ void prepareProfiler( at::hasCUDA() || at::hasXPU() || at::hasMTIA() || c10::get_privateuse1_backend() != "privateuseone"), activities, - config.experimental_config); + config.experimental_config, + config.trace_id); if (!config.experimental_config.performance_events.empty()) { /* For now only CPU activity is supported */ diff --git a/torch/csrc/profiler/kineto_shim.cpp b/torch/csrc/profiler/kineto_shim.cpp index ef70242eafb..2b850ac0010 100644 --- a/torch/csrc/profiler/kineto_shim.cpp +++ b/torch/csrc/profiler/kineto_shim.cpp @@ -220,10 +220,21 @@ bool collectivesProfilerExists() { #endif } +static const std::string setTraceID(const std::string& trace_id) { + if (trace_id == "") { + return ""; + } + std::stringstream configss; + configss << "REQUEST_TRACE_ID=" << trace_id << "\n"; + configss << "REQUEST_GROUP_TRACE_ID=" << trace_id << "\n"; + return configss.str(); +} + void prepareTrace( const bool cpuOnly, const ActivitySet& activities, - const torch::profiler::impl::ExperimentalConfig& config) { + const torch::profiler::impl::ExperimentalConfig& config, + const std::string& trace_id) { #ifdef USE_KINETO libkineto::api().resetKinetoTLS(); if (!libkineto::api().isProfilerRegistered()) { @@ -270,7 +281,9 @@ void prepareTrace( return; } - libkineto::api().activityProfiler().prepareTrace(k_activities); + const std::string configStr = setTraceID(trace_id); + + libkineto::api().activityProfiler().prepareTrace(k_activities, configStr); #endif // USE_KINETO } diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h index 085e9dd2fcb..c4efd7785b7 100644 --- a/torch/csrc/profiler/kineto_shim.h +++ b/torch/csrc/profiler/kineto_shim.h @@ -111,7 +111,8 @@ using ActivitySet = std::set; void prepareTrace( const bool cpuOnly, const ActivitySet& activities, - const torch::profiler::impl::ExperimentalConfig& config); + const torch::profiler::impl::ExperimentalConfig& config, + const std::string& trace_id = ""); void toggleCollectionDynamic(const bool enable); void startTrace(); diff --git a/torch/csrc/profiler/orchestration/observer.cpp b/torch/csrc/profiler/orchestration/observer.cpp index 39a8845cb84..c4d25f5f078 100644 --- a/torch/csrc/profiler/orchestration/observer.cpp +++ b/torch/csrc/profiler/orchestration/observer.cpp @@ -38,14 +38,16 @@ ProfilerConfig::ProfilerConfig( bool with_stack, bool with_flops, bool with_modules, - ExperimentalConfig experimental_config) + ExperimentalConfig experimental_config, + const std::string& trace_id) : state{state}, experimental_config{std::move(experimental_config)}, report_input_shapes{report_input_shapes}, profile_memory{profile_memory}, with_stack{with_stack}, with_flops{with_flops}, - with_modules{with_modules} {} + with_modules{with_modules}, + trace_id{trace_id} {} bool ProfilerConfig::disabled() const { return state == torch::profiler::impl::ProfilerState::Disabled; diff --git a/torch/csrc/profiler/orchestration/observer.h b/torch/csrc/profiler/orchestration/observer.h index 4475101efac..ef7b4d4566e 100644 --- a/torch/csrc/profiler/orchestration/observer.h +++ b/torch/csrc/profiler/orchestration/observer.h @@ -103,7 +103,8 @@ struct TORCH_API ProfilerConfig { bool with_stack = false, bool with_flops = false, bool with_modules = false, - ExperimentalConfig experimental_config = ExperimentalConfig()); + ExperimentalConfig experimental_config = ExperimentalConfig(), + const std::string& trace_id = ""); bool disabled() const; bool global() const; @@ -115,6 +116,7 @@ struct TORCH_API ProfilerConfig { bool with_stack; bool with_flops; bool with_modules; + std::string trace_id; // For serialization at::IValue toIValue() const; diff --git a/torch/csrc/profiler/python/init.cpp b/torch/csrc/profiler/python/init.cpp index 1a859c58980..cf5041558d8 100644 --- a/torch/csrc/profiler/python/init.cpp +++ b/torch/csrc/profiler/python/init.cpp @@ -410,15 +410,26 @@ void initPythonBindings(PyObject* module) { })); py::class_(m, "ProfilerConfig") - .def(py::init< - ProfilerState, - bool, /* report_input_shapes */ - bool, /* profile_memory */ - bool, /* with_stack */ - bool, /* with_flops */ - bool, /* with_modules */ - ExperimentalConfig /* experimental_config */ - >()); + .def( + py::init< + ProfilerState, + bool, /* report_input_shapes */ + bool, /* profile_memory */ + bool, /* with_stack */ + bool, /* with_flops */ + bool, /* with_modules */ + ExperimentalConfig /* experimental_config */, + std::string /* trace_id */ + >(), + py::arg("state"), + py::arg("report_input_shapes"), + py::arg("profile_memory"), + py::arg("with_stack"), + py::arg("with_flops"), + py::arg("with_modules"), + py::arg("experimental_config"), + py::arg("trace_id") = "" // Make trace_id the only optional param + ); py::enum_(m, "_EventType") .value("TorchOp", EventType::TorchOp) diff --git a/torch/profiler/profiler.py b/torch/profiler/profiler.py index 355c199ad2c..8985bd99c70 100644 --- a/torch/profiler/profiler.py +++ b/torch/profiler/profiler.py @@ -141,6 +141,7 @@ class _KinetoProfile: experimental_config: Optional[_ExperimentalConfig] = None, execution_trace_observer: Optional[_ITraceObserver] = None, acc_events: bool = False, + custom_trace_id_callback: Optional[Callable[[], str]] = None, ): self.activities = set(activities) if activities else supported_activities() self.record_shapes = record_shapes @@ -151,6 +152,7 @@ class _KinetoProfile: self.experimental_config = experimental_config self.execution_trace_observer = execution_trace_observer self.acc_events = acc_events + self.custom_trace_id_callback = custom_trace_id_callback self.profiler: Optional[prof.profile] = None self.mem_tl: Optional[MemoryProfileTimeline] = None self.use_device = None @@ -186,6 +188,7 @@ class _KinetoProfile: use_kineto=True, experimental_config=self.experimental_config, acc_events=self.acc_events, + custom_trace_id_callback=self.custom_trace_id_callback, ) self.profiler._prepare_trace() @@ -661,6 +664,7 @@ class profile(_KinetoProfile): acc_events: bool = False, # deprecated: use_cuda: Optional[bool] = None, + custom_trace_id_callback: Optional[Callable[[], str]] = None, ): activities_set = set(activities) if activities else supported_activities() if use_cuda is not None: @@ -685,6 +689,7 @@ class profile(_KinetoProfile): experimental_config=experimental_config, execution_trace_observer=execution_trace_observer, acc_events=acc_events, + custom_trace_id_callback=custom_trace_id_callback, ) if schedule: @@ -806,6 +811,20 @@ class profile(_KinetoProfile): ) self.step_rec_fn.__enter__() + def set_custom_trace_id_callback(self, callback): + """ + Sets a callback to be called when a new trace ID is generated. + """ + self.custom_trace_id_callback = callback + + def get_trace_id(self): + """ + Returns the current trace ID. + """ + if self.profiler is None: + return None + return self.profiler.trace_id + def _trace_ready(self): if self.on_trace_ready: self.on_trace_ready(self)