[ROCm] fix test_cublas_workspace_explicit_allocation for gfx12 (#145227)

gfx12 passes the condition `torch.cuda.get_device_capability() >= (9, 4)` and uses `default_workspace_size=128MB`, but it required only for MI300
Fix condition to use `("gfx94" in gcn_arch)` instead of `torch.cuda.get_device_properties()` to detect MI300.
Now `default_workspace_size=32MB` is used for gfx12 and the test passes

Pull Request resolved: https://github.com/pytorch/pytorch/pull/145227
Approved by: https://github.com/jeffdaily, https://github.com/eqy
This commit is contained in:
Dmitry Nikolaev 2025-01-28 16:19:27 +00:00 committed by PyTorch MergeBot
parent 80a0412b76
commit 6967ef1b07
2 changed files with 6 additions and 2 deletions

View file

@ -125,7 +125,8 @@ size_t parseChosenWorkspaceSize() {
}
/* 32MiB default, 128MiB for MI300 */
cudaDeviceProp* properties = at::cuda::getCurrentDeviceProperties();
const bool gfx94 = properties != nullptr && properties->major == 9 && properties->minor == 4;
std::string device_arch = properties->gcnArchName;
const bool gfx94 = device_arch.find("gfx94") != std::string::npos;
const size_t default_size = gfx94 ? 1024 * 128 * 1024 : 1024 * 32 * 1024;
#else
/* :4096:2:16:8 default, 32MiB for Hopper */

View file

@ -454,7 +454,10 @@ class TestCuda(TestCase):
if torch.version.hip:
default_workspace_size = 1024 * 32 * 1024 # :1024:32 32MiB
# different size (128 MiB) expected on MI300 GPU
if torch.cuda.get_device_capability() >= (9, 4):
gcn_arch = str(
torch.cuda.get_device_properties(0).gcnArchName.split(":", 1)[0]
)
if "gfx94" in gcn_arch:
default_workspace_size = 1024 * 128 * 1024 # :1024:128
else:
default_workspace_size = (