mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
[ROCm] fix test_cublas_workspace_explicit_allocation for gfx12 (#145227)
gfx12 passes the condition `torch.cuda.get_device_capability() >= (9, 4)` and uses `default_workspace_size=128MB`, but it required only for MI300
Fix condition to use `("gfx94" in gcn_arch)` instead of `torch.cuda.get_device_properties()` to detect MI300.
Now `default_workspace_size=32MB` is used for gfx12 and the test passes
Pull Request resolved: https://github.com/pytorch/pytorch/pull/145227
Approved by: https://github.com/jeffdaily, https://github.com/eqy
This commit is contained in:
parent
80a0412b76
commit
6967ef1b07
2 changed files with 6 additions and 2 deletions
|
|
@ -125,7 +125,8 @@ size_t parseChosenWorkspaceSize() {
|
|||
}
|
||||
/* 32MiB default, 128MiB for MI300 */
|
||||
cudaDeviceProp* properties = at::cuda::getCurrentDeviceProperties();
|
||||
const bool gfx94 = properties != nullptr && properties->major == 9 && properties->minor == 4;
|
||||
std::string device_arch = properties->gcnArchName;
|
||||
const bool gfx94 = device_arch.find("gfx94") != std::string::npos;
|
||||
const size_t default_size = gfx94 ? 1024 * 128 * 1024 : 1024 * 32 * 1024;
|
||||
#else
|
||||
/* :4096:2:16:8 default, 32MiB for Hopper */
|
||||
|
|
|
|||
|
|
@ -454,7 +454,10 @@ class TestCuda(TestCase):
|
|||
if torch.version.hip:
|
||||
default_workspace_size = 1024 * 32 * 1024 # :1024:32 32MiB
|
||||
# different size (128 MiB) expected on MI300 GPU
|
||||
if torch.cuda.get_device_capability() >= (9, 4):
|
||||
gcn_arch = str(
|
||||
torch.cuda.get_device_properties(0).gcnArchName.split(":", 1)[0]
|
||||
)
|
||||
if "gfx94" in gcn_arch:
|
||||
default_workspace_size = 1024 * 128 * 1024 # :1024:128
|
||||
else:
|
||||
default_workspace_size = (
|
||||
|
|
|
|||
Loading…
Reference in a new issue