_inductor: Add dynamo_timed for async_compile.precompile and turn on (#141920)

waitcounters

This fixes some review comments from https://github.com/pytorch/pytorch/pull/141379
and gives us another dynamo_timed event for local compilation.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/141920
Approved by: https://github.com/masnesral
This commit is contained in:
Colin L. Rice 2024-12-03 16:51:30 -07:00 committed by PyTorch MergeBot
parent 30d907c6fb
commit 86f306b15e

View file

@ -38,7 +38,6 @@ from torch._inductor.runtime.compile_tasks import (
_worker_compile_triton,
)
from torch.hub import _Faketqdm, tqdm
from torch.monitor import _WaitCounter
from torch.utils._triton import has_triton_package
@ -239,7 +238,11 @@ class AsyncCompile:
set_feature_use(
"pytorch/inductor:enable_parallel_compile_version (post_warmup)", False
)
with _WaitCounter("pytorch.async_compile.precompile").guard():
with dynamo_timed(
"async_compile.precompile",
log_pt2_compile_event=True,
log_waitcounter=True,
):
kernel.precompile()
return kernel
@ -308,8 +311,8 @@ class AsyncCompile:
def wait(self, scope: Dict[str, Any]) -> None:
with dynamo_timed(
"async_compile.wait", log_pt2_compile_event=True
), _WaitCounter("pytorch.async_compile.wait").guard():
"async_compile.wait", log_pt2_compile_event=True, log_waitcounter=True
):
num_kernels = len(
[
value