Friendly catch exception when fail to initialize XPU devices (#141658)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/141658
Approved by: https://github.com/EikanWang
This commit is contained in:
Yu, Guangye 2024-11-27 16:56:56 +00:00 committed by PyTorch MergeBot
parent 60fe50aa42
commit d905f1350a

View file

@ -44,8 +44,18 @@ void enumDevices(std::vector<std::unique_ptr<sycl::device>>& devices) {
}
inline void initGlobalDevicePoolState() {
// Enumerate all GPU devices and record them.
enumDevices(gDevicePool.devices);
// Attempt to initialize XPU devices. If no device is found or the driver is
// not installed correctly, issue a warning message instead of raising an
// exception to avoid disrupting the user experience.
try {
// Enumerate all GPU devices and record them.
enumDevices(gDevicePool.devices);
} catch (const sycl::exception& e) {
TORCH_WARN(
"Failed to initialize XPU devices. The driver may not be installed, installed incorrectly, or incompatible with the current setup. ",
"Please refer to the guideline (https://github.com/pytorch/pytorch?tab=readme-ov-file#intel-gpu-support) for proper installation and configuration.");
return;
}
if (gDevicePool.devices.empty()) {
TORCH_WARN("XPU device count is zero!");
return;