mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Summary: Fixed a bunch of fbcode imports that happened to work but confused autodeps. After this autodeps still suggests "improvements" to TARGETS (which breaks our builds) but at least it can find all the imports. Test Plan: ``` fbpython fbcode/tools/build/buck/linters/lint_autoformat.py --linter=autodeps --default-exec-timeout=1800 -- fbcode/caffe2/TARGETS fbcode/caffe2/test/TARGETS ``` Before: ``` ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "test_export" (from caffe2/test/export/testing.py:229) when processing rule "test_export". Please make sure it's listed in the srcs parameter of another rule. See https://fbur$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "testing" (from caffe2/test/export/test_export.py:87) when processing rule "test_export". Please make sure it's listed in the srcs parameter of another rule. See https://fburl$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "test_export" (from caffe2/test/export/test_serdes.py:9) when processing rule "test_export". Please make sure it's listed in the srcs parameter of another rule. See https://fb$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "testing" (from caffe2/test/export/test_serdes.py:10) when processing rule "test_export". Please make sure it's listed in the srcs parameter of another rule. See https://fburl$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "testing" (from caffe2/test/export/test_retraceability.py:7) when processing rule "test_export". Please make sure it's listed in the srcs parameter of another rule. See https:$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "test_export" (from caffe2/test/export/test_retraceability.py:6) when processing rule "test_export". Please make sure it's listed in the srcs parameter of another rule. See ht$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "testing" (from caffe2/test/export/test_export_nonstrict.py:7) when processing rule "test_export". Please make sure it's listed in the srcs parameter of another rule. See http$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "test_export" (from caffe2/test/export/test_export_nonstrict.py:6) when processing rule "test_export". Please make sure it's listed in the srcs parameter of another rule. See $ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "test_export" (from caffe2/test/export/test_export_training_ir_to_run_decomp.py:8) when processing rule "test_export". Please make sure it's listed in the srcs parameter of an$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "testing" (from caffe2/test/export/test_export_training_ir_to_run_decomp.py:10) when processing rule "test_export". Please make sure it's listed in the srcs parameter of anoth$ ERROR while processing caffe2/test/TARGETS: Found "//python/typeshed_internal:typeshed_internal_library" owner for "cv2" but it is protected by visibility rules: [] (from caffe2/test/test_bundled_images.py:7) when processing rule "test_bundled_$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "caffe2.test.profiler_test_cpp_thread_lib" (from caffe2/test/profiler/test_cpp_thread.py:29) when processing rule "profiler_test_cpp_thread". Please make sure it's listed in t$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "torch._utils_internal.get_file_path_2" (from caffe2/test/test_custom_ops.py:23) when processing rule "custom_ops". Please make sure it's listed in the srcs parameter of anoth$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "torch._utils_internal.get_file_path_2" (from caffe2/test/test_public_bindings.py:13) when processing rule "public_bindings". Please make sure it's listed in the srcs paramete$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "torch._C._profiler.symbolize_tracebacks" (from caffe2/test/test_cuda.py:3348) when processing rule "test_cuda". Please make sure it's listed in the srcs parameter of another $ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for "torch._C._profiler.gather_traceback" (from caffe2/test/test_cuda.py:3348) when processing rule "test_cuda". Please make sure it's listed in the srcs parameter of another rule$ ERROR while processing caffe2/test/TARGETS: Cannot find an owner for include <torch/csrc/autograd/profiler_kineto.h> (from caffe2/test/profiler/test_cpp_thread.cpp:2) when processing profiler_test_cpp_thread_lib. Some things to try: ``` Differential Revision: D62049222 Pull Request resolved: https://github.com/pytorch/pytorch/pull/135614 Approved by: https://github.com/oulgen, https://github.com/laithsakka
206 lines
7.2 KiB
Python
206 lines
7.2 KiB
Python
# Owner(s): ["module: inductor"]
|
|
import json
|
|
import unittest
|
|
from typing import Callable, Optional
|
|
|
|
import torch
|
|
import torch._inductor.test_case
|
|
import torch._inductor.utils
|
|
from torch._inductor import config
|
|
from torch.profiler import ProfilerActivity
|
|
from torch.testing._internal.common_utils import TemporaryFileName
|
|
from torch.testing._internal.inductor_utils import HAS_CUDA
|
|
from torch.utils._triton import has_triton
|
|
|
|
|
|
HAS_TRITON = has_triton()
|
|
|
|
|
|
class DynamoProfilerTests(torch._inductor.test_case.TestCase):
|
|
@unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
|
|
def test_inductor_profiling_triton_launch(self):
|
|
# Verify that we get some sort of CPU-side indication of triton kernel launches
|
|
# in the profile traces. Currently, those appear as `cuLaunchKernel`. If this
|
|
# detail changes, the test can be updated or removed.
|
|
@torch.compile
|
|
def fn(x, y):
|
|
return (x + y).sin().cos()
|
|
|
|
x, y = (torch.rand((4, 4), device="cuda") for _ in range(2))
|
|
|
|
with torch.profiler.profile() as prof:
|
|
fn(x, y)
|
|
|
|
with TemporaryFileName(mode="w+") as fname:
|
|
prof.export_chrome_trace(fname)
|
|
with open(fname) as f:
|
|
trace_json = json.load(f)
|
|
|
|
self.assertTrue("traceEvents" in trace_json)
|
|
events = trace_json["traceEvents"]
|
|
|
|
kernel_name = "hipModuleLaunchKernel" if torch.version.hip else "cuLaunchKernel"
|
|
|
|
def nameMatchesLaunchKernel(event_name):
|
|
return kernel_name in event_name
|
|
|
|
self.assertTrue(
|
|
any(("name" in event and kernel_name == event["name"]) for event in events)
|
|
)
|
|
|
|
def _test_profiling_kernel_names(
|
|
self, fn, args, kernel_name_str: str, check_fn: Optional[Callable] = None
|
|
):
|
|
"""
|
|
We expect a record_function event to be added on the CPU side, surrounding
|
|
the launch of each triton kernel.
|
|
"""
|
|
fn_opt = torch.compile(fn)
|
|
|
|
for _ in range(2):
|
|
fn_opt(*args)
|
|
|
|
if check_fn is not None:
|
|
check_fn()
|
|
|
|
with torch.profiler.profile(
|
|
activities=[ProfilerActivity.CPU], record_shapes=True
|
|
) as prof:
|
|
fn_opt(*args)
|
|
|
|
# The name of the kernel is expected to match the name of the kernel in debug
|
|
# files etc. The name could change in the future, but it seems reasonable that
|
|
# the name should always contain "triton" and "kernel_name_str" - e.g. if the
|
|
# kernel contains a sin op, it should probably contain "str" in the name.
|
|
# If this changes in the future, feel free to change the assertion here.
|
|
# Debugging tips: you can add prof.export_chrome_trace("test.json") inline in
|
|
# this test, and then view test.json in chrome://tracing to see the trace.
|
|
self.assertTrue(
|
|
any(
|
|
(
|
|
hasattr(event, "name")
|
|
and kernel_name_str in event.name
|
|
and "triton" in event.name
|
|
)
|
|
for event in prof.events()
|
|
)
|
|
)
|
|
return prof.events()
|
|
|
|
@unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
|
|
def test_inductor_profiling_kernel_names_pointwise(self):
|
|
def fn(x, y):
|
|
return (x + y).sin().cos()
|
|
|
|
args = [torch.rand((4, 4), device="cuda") for _ in range(2)]
|
|
|
|
events = self._test_profiling_kernel_names(fn, args, "sin")
|
|
event_found = False
|
|
for event in events:
|
|
if event.name == "triton_poi_fused_add_cos_sin_0":
|
|
event_found = True
|
|
self.assertTrue(event.input_shapes == [[4, 4], [4, 4], [4, 4], []])
|
|
self.assertTrue(event_found)
|
|
|
|
@unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
|
|
def test_inductor_profiling_kernel_names_template(self):
|
|
with config.patch(
|
|
{"max_autotune": True, "max_autotune_gemm_backends": "TRITON"}
|
|
):
|
|
|
|
def fn(x, y):
|
|
return x @ y
|
|
|
|
args = [torch.rand((4, 4), device="cuda") for _ in range(2)]
|
|
|
|
def check_fn():
|
|
# test_profiling_kernel_names will check this before asserting mm is in the trace.
|
|
# reason: sometimes testing runs on machines with not enough SMs, and autotuning is skipped.
|
|
if (
|
|
torch._dynamo.utils.counters["inductor"][
|
|
"select_algorithm_autotune"
|
|
]
|
|
== 0
|
|
):
|
|
raise unittest.SkipTest(
|
|
"select_algorithm didn't run, we probably won't get profiling data. GPU might not have enough SMs."
|
|
)
|
|
|
|
events = self._test_profiling_kernel_names(fn, args, "mm", check_fn)
|
|
|
|
event_found = False
|
|
for event in events:
|
|
if event.name == "triton_tem_fused_mm_0":
|
|
event_found = True
|
|
self.assertTrue(event.input_shapes == [[4, 4], [4, 4], [4, 4]])
|
|
self.assertTrue(event_found)
|
|
|
|
@unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
|
|
def test_inductor_profiling_kernel_names_foreach(self):
|
|
with config.patch(
|
|
{"max_autotune": True, "max_autotune_gemm_backends": "TRITON"}
|
|
):
|
|
|
|
def fn(x, y):
|
|
return torch._foreach_add(x, y)
|
|
|
|
x = [torch.rand((4, 4), device="cuda") for _ in range(3)]
|
|
y = [torch.rand((4, 4), device="cuda") for _ in range(3)]
|
|
|
|
args = (x, y)
|
|
|
|
events = self._test_profiling_kernel_names(fn, args, "_for_")
|
|
event_found = False
|
|
for event in events:
|
|
if event.name == "triton_for_fused_0":
|
|
event_found = True
|
|
self.assertTrue(
|
|
event.input_shapes
|
|
== [
|
|
[4, 4],
|
|
[4, 4],
|
|
[4, 4],
|
|
[4, 4],
|
|
[4, 4],
|
|
[4, 4],
|
|
[4, 4],
|
|
[4, 4],
|
|
[4, 4],
|
|
]
|
|
)
|
|
self.assertTrue(event_found)
|
|
|
|
@unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
|
|
def test_inductor_profiling_triton_hooks(self):
|
|
from triton.compiler import CompiledKernel # @manual
|
|
|
|
hooks_called = {"enter": False, "exit": False}
|
|
|
|
def launch_enter_hook(lazy_dict):
|
|
hooks_called["enter"] = True
|
|
|
|
def launch_exit_hook(lazy_dict):
|
|
hooks_called["exit"] = True
|
|
|
|
CompiledKernel.launch_enter_hook = launch_enter_hook
|
|
CompiledKernel.launch_exit_hook = launch_exit_hook
|
|
|
|
def fn(x, y):
|
|
return torch._foreach_add(x, y)
|
|
|
|
x = [torch.rand((4, 4), device="cuda") for _ in range(3)]
|
|
y = [torch.rand((4, 4), device="cuda") for _ in range(3)]
|
|
|
|
args = (x, y)
|
|
fn_opt = torch.compile(fn)
|
|
fn_opt(*args)
|
|
|
|
self.assertTrue(hooks_called["enter"])
|
|
self.assertTrue(hooks_called["exit"])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from torch._inductor.test_case import run_tests
|
|
|
|
if HAS_CUDA:
|
|
run_tests()
|