mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
[inductor] Use torch.cuda.clock_rate instead of triton.testing.nvsmi (#118662)
`triton.testing.nvsmi` invokes `nvidia-smi` as a subprocess, and Meta prod usually doesn't make nvidia-smi available. Might as well just use something that's native to torch. Differential Revision: [D53235814](https://our.internmc.facebook.com/intern/diff/D53235814/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/118662 Approved by: https://github.com/jansel
This commit is contained in:
parent
80379ef0aa
commit
563f1b9fef
2 changed files with 10 additions and 2 deletions
|
|
@ -1162,9 +1162,9 @@ def get_device_tflops(dtype):
|
|||
|
||||
if inspect.signature(get_max_simd_tflops).parameters.get("clock_rate"):
|
||||
# Triton API change in https://github.com/openai/triton/pull/2293
|
||||
from triton.testing import nvsmi
|
||||
from torch._utils_internal import max_clock_rate
|
||||
|
||||
sm_clock = nvsmi(["clocks.max.sm"])[0]
|
||||
sm_clock = max_clock_rate()
|
||||
if dtype in (torch.float16, torch.bfloat16):
|
||||
return get_max_tensorcore_tflops(dtype, sm_clock)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import functools
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
|
@ -94,6 +95,13 @@ def log_export_usage(**kwargs):
|
|||
pass
|
||||
|
||||
|
||||
@functools.lru_cache(None)
|
||||
def max_clock_rate():
|
||||
from triton.testing import nvsmi
|
||||
|
||||
return nvsmi(["clocks.max.sm"])[0]
|
||||
|
||||
|
||||
TEST_MASTER_ADDR = "127.0.0.1"
|
||||
TEST_MASTER_PORT = 29500
|
||||
# USE_GLOBAL_DEPS controls whether __init__.py tries to load
|
||||
|
|
|
|||
Loading…
Reference in a new issue