mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Revert "Introduce XPU implementation for PyTorch ATen operators (#120891)"
This reverts commit 148a8de639.
Reverted https://github.com/pytorch/pytorch/pull/120891 on behalf of https://github.com/huydhn due to Sorry for reverting your change but I need to revert it to resolve a conflict in trunk https://github.com/pytorch/pytorch/pull/121794#issuecomment-2013434523. Please help reland the change after ([comment](https://github.com/pytorch/pytorch/pull/120891#issuecomment-2013668563))
This commit is contained in:
parent
628dcde136
commit
182bb0f2ca
4 changed files with 2 additions and 184 deletions
|
|
@ -1096,56 +1096,6 @@ if(USE_XPU)
|
|||
torch_compile_options(torch_xpu) # see cmake/public/utils.cmake
|
||||
target_compile_options_if_supported(torch_xpu "-Wno-deprecated-copy") # see cmake/public/utils.cmake
|
||||
target_compile_definitions(torch_xpu PRIVATE USE_XPU)
|
||||
|
||||
# ATen XPU implementation
|
||||
set(TORCH_XPU_OPS_DIR ${TORCH_ROOT}/third_party/torch-xpu-ops)
|
||||
set(TORCH_XPU_OPS_REPO_URL https://github.com/intel/torch-xpu-ops.git)
|
||||
file(READ "${TORCH_ROOT}/third_party/xpu.txt" TORCH_XPU_OPS_COMMIT)
|
||||
string(REGEX REPLACE "\n$" "" TORCH_XPU_OPS_COMMIT "${TORCH_XPU_OPS_COMMIT}")
|
||||
if(NOT EXISTS "${TORCH_XPU_OPS_DIR}/.git")
|
||||
execute_process(
|
||||
COMMAND git clone --quiet ${TORCH_XPU_OPS_REPO_URL} ${TORCH_XPU_OPS_DIR}
|
||||
RESULT_VARIABLE _exitcode)
|
||||
if(NOT _exitcode EQUAL 0)
|
||||
message(FATAL_ERROR "Fail to clone ${TORCH_XPU_OPS_REPO_URL}")
|
||||
endif()
|
||||
endif()
|
||||
execute_process(
|
||||
COMMAND git fetch --quiet
|
||||
WORKING_DIRECTORY ${TORCH_XPU_OPS_DIR}
|
||||
RESULT_VARIABLE _exitcode)
|
||||
if(NOT _exitcode EQUAL 0)
|
||||
message(FATAL_ERROR "Fail to fetch ${TORCH_XPU_OPS_REPO_URL}")
|
||||
endif()
|
||||
execute_process(
|
||||
COMMAND git checkout --quiet ${TORCH_XPU_OPS_COMMIT}
|
||||
WORKING_DIRECTORY ${TORCH_XPU_OPS_DIR}
|
||||
RESULT_VARIABLE _exitcode)
|
||||
if(NOT _exitcode EQUAL 0)
|
||||
message(FATAL_ERROR "Fail to checkout ${TORCH_XPU_OPS_REPO_URL} to ${TORCH_XPU_OPS_COMMIT}")
|
||||
endif()
|
||||
|
||||
set(TORCH_XPU_OPS_INCLUDE_DIRS
|
||||
${TORCH_SRC_DIR}/csrc/api
|
||||
${TORCH_SRC_DIR}/csrc/api/include
|
||||
${Caffe2_CPU_INCLUDE}
|
||||
${Caffe2_XPU_INCLUDE})
|
||||
# Pass the target as a dependency so that ATen headers generation
|
||||
# could be followed by torch-xpu-ops build.
|
||||
# 1. Sources in torch-xpu-ops depend on generated ATen headers.
|
||||
# 2. Using add_custom_command in torch-xpu-ops to define sycl device sources
|
||||
# compilation. add_custom_command requires an explicit dependency.
|
||||
set(TORCH_XPU_OPS_PYTORCH_DEPS ATEN_CPU_FILES_GEN_TARGET)
|
||||
|
||||
add_subdirectory(${TORCH_ROOT}/third_party/torch-xpu-ops
|
||||
${CMAKE_BINARY_DIR}/caffe2/aten_xpu)
|
||||
if(NOT TARGET torch_xpu_ops)
|
||||
message(WARNING "Failed to include ATen XPU implementation target")
|
||||
else()
|
||||
target_link_libraries(torch_xpu PRIVATE torch_xpu_ops)
|
||||
target_link_libraries(torch_xpu PRIVATE
|
||||
"-Wl,--whole-archive,\"$<TARGET_FILE:torch_xpu_ops>\" -Wl,--no-whole-archive")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT MSVC AND USE_XNNPACK)
|
||||
|
|
|
|||
106
test/test_xpu.py
106
test/test_xpu.py
|
|
@ -5,21 +5,7 @@ import unittest
|
|||
|
||||
import torch
|
||||
import torch.xpu._gpu_trace as gpu_trace
|
||||
from torch.testing._internal.common_device_type import (
|
||||
instantiate_device_type_tests,
|
||||
onlyXPU,
|
||||
OpDTypes,
|
||||
ops,
|
||||
)
|
||||
from torch.testing._internal.common_methods_invocations import ops_and_refs
|
||||
from torch.testing._internal.common_utils import (
|
||||
NoTest,
|
||||
run_tests,
|
||||
suppress_warnings,
|
||||
TEST_WITH_UBSAN,
|
||||
TEST_XPU,
|
||||
TestCase,
|
||||
)
|
||||
from torch.testing._internal.common_utils import NoTest, run_tests, TEST_XPU, TestCase
|
||||
|
||||
if not TEST_XPU:
|
||||
print("XPU not available, skipping tests", file=sys.stderr)
|
||||
|
|
@ -27,42 +13,6 @@ if not TEST_XPU:
|
|||
|
||||
TEST_MULTIXPU = torch.xpu.device_count() > 1
|
||||
|
||||
cpu_device = torch.device("cpu")
|
||||
xpu_device = torch.device("xpu")
|
||||
|
||||
any_common_cpu_xpu_one = OpDTypes.any_common_cpu_cuda_one
|
||||
_xpu_computation_op_list = [
|
||||
"fill",
|
||||
"zeros",
|
||||
"zeros_like",
|
||||
"clone",
|
||||
"view_as_real",
|
||||
"view_as_complex",
|
||||
"view",
|
||||
"resize_",
|
||||
"resize_as_",
|
||||
"add",
|
||||
"sub",
|
||||
"mul",
|
||||
"div",
|
||||
"abs",
|
||||
]
|
||||
_xpu_tensor_factory_op_list = [
|
||||
"as_strided",
|
||||
"empty",
|
||||
"empty_strided",
|
||||
]
|
||||
_xpu_not_test_dtype_op_list = [
|
||||
"resize_", # Skipped by CPU
|
||||
"resize_as_", # Skipped by CPU
|
||||
"abs", # Not aligned dtype
|
||||
]
|
||||
_xpu_all_op_list = _xpu_computation_op_list + _xpu_tensor_factory_op_list
|
||||
_xpu_all_ops = [op for op in ops_and_refs if op.name in _xpu_all_op_list]
|
||||
_xpu_computation_ops = [
|
||||
op for op in ops_and_refs if op.name in _xpu_computation_op_list
|
||||
]
|
||||
|
||||
|
||||
class TestXpu(TestCase):
|
||||
def test_device_behavior(self):
|
||||
|
|
@ -177,60 +127,6 @@ if __name__ == "__main__":
|
|||
torch.xpu.set_rng_state(g_state0)
|
||||
self.assertEqual(2024, torch.xpu.initial_seed())
|
||||
|
||||
@onlyXPU
|
||||
@suppress_warnings
|
||||
@ops(_xpu_computation_ops, dtypes=any_common_cpu_xpu_one)
|
||||
def test_compare_cpu(self, device, dtype, op):
|
||||
def to_cpu(arg):
|
||||
if isinstance(arg, torch.Tensor):
|
||||
return arg.to(device="cpu")
|
||||
return arg
|
||||
|
||||
samples = op.reference_inputs(device, dtype)
|
||||
|
||||
for sample in samples:
|
||||
cpu_sample = sample.transform(to_cpu)
|
||||
xpu_results = op(sample.input, *sample.args, **sample.kwargs)
|
||||
cpu_results = op(cpu_sample.input, *cpu_sample.args, **cpu_sample.kwargs)
|
||||
|
||||
xpu_results = sample.output_process_fn_grad(xpu_results)
|
||||
cpu_results = cpu_sample.output_process_fn_grad(cpu_results)
|
||||
|
||||
# Lower tolerance because we are running this as a `@slowTest`
|
||||
# Don't want the periodic tests to fail frequently
|
||||
self.assertEqual(xpu_results, cpu_results, atol=1e-4, rtol=1e-4)
|
||||
|
||||
@onlyXPU
|
||||
@ops(_xpu_computation_ops, allowed_dtypes=(torch.bool,))
|
||||
@unittest.skipIf(TEST_WITH_UBSAN, "Test uses undefined behavior")
|
||||
def test_non_standard_bool_values(self, device, dtype, op):
|
||||
# Test boolean values other than 0x00 and 0x01 (gh-54789)
|
||||
def convert_boolean_tensors(x):
|
||||
if not isinstance(x, torch.Tensor) or x.dtype != torch.bool:
|
||||
return x
|
||||
|
||||
# Map False -> 0 and True -> Random value in [2, 255]
|
||||
true_vals = torch.randint(
|
||||
2, 255, x.shape, dtype=torch.uint8, device=x.device
|
||||
)
|
||||
false_vals = torch.zeros((), dtype=torch.uint8, device=x.device)
|
||||
x_int = torch.where(x, true_vals, false_vals)
|
||||
|
||||
ret = x_int.view(torch.bool)
|
||||
self.assertEqual(ret, x)
|
||||
return ret
|
||||
|
||||
for sample in op.sample_inputs(device, dtype):
|
||||
expect = op(sample.input, *sample.args, **sample.kwargs)
|
||||
|
||||
transformed = sample.transform(convert_boolean_tensors)
|
||||
actual = op(transformed.input, *transformed.args, **transformed.kwargs)
|
||||
|
||||
self.assertEqual(expect, actual)
|
||||
|
||||
|
||||
instantiate_device_type_tests(TestXpu, globals(), only_for="xpu")
|
||||
|
||||
|
||||
class TestXpuTrace(TestCase):
|
||||
def setUp(self):
|
||||
|
|
|
|||
1
third_party/xpu.txt
vendored
1
third_party/xpu.txt
vendored
|
|
@ -1 +0,0 @@
|
|||
84db213ab7125fce94aa2f00d2c61811b9384f40
|
||||
|
|
@ -15,7 +15,7 @@ import os
|
|||
import torch
|
||||
from torch.testing._internal.common_utils import TestCase, TEST_WITH_ROCM, TEST_MKL, \
|
||||
skipCUDANonDefaultStreamIf, TEST_WITH_ASAN, TEST_WITH_UBSAN, TEST_WITH_TSAN, \
|
||||
IS_SANDCASTLE, IS_FBCODE, IS_REMOTE_GPU, IS_WINDOWS, TEST_MPS, TEST_XPU, \
|
||||
IS_SANDCASTLE, IS_FBCODE, IS_REMOTE_GPU, IS_WINDOWS, TEST_MPS, \
|
||||
_TestParametrizer, compose_parametrize_fns, dtype_name, \
|
||||
TEST_WITH_MIOPEN_SUGGEST_NHWC, NATIVE_DEVICES, skipIfTorchDynamo, \
|
||||
get_tracked_input, clear_tracked_input, PRINT_REPRO_ON_FAILURE, \
|
||||
|
|
@ -569,27 +569,6 @@ class MPSTestBase(DeviceTypeTestBase):
|
|||
def _should_stop_test_suite(self):
|
||||
return False
|
||||
|
||||
class XPUTestBase(DeviceTypeTestBase):
|
||||
device_type = 'xpu'
|
||||
primary_device: ClassVar[str]
|
||||
|
||||
@classmethod
|
||||
def get_primary_device(cls):
|
||||
return cls.primary_device
|
||||
|
||||
@classmethod
|
||||
def get_all_devices(cls):
|
||||
# currently only one device is supported on MPS backend
|
||||
prim_device = cls.get_primary_device()
|
||||
return [prim_device]
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.primary_device = 'xpu:0'
|
||||
|
||||
def _should_stop_test_suite(self):
|
||||
return False
|
||||
|
||||
class PrivateUse1TestBase(DeviceTypeTestBase):
|
||||
primary_device: ClassVar[str]
|
||||
device_mod = None
|
||||
|
|
@ -697,8 +676,6 @@ def get_desired_device_type_test_bases(except_for=None, only_for=None, include_l
|
|||
test_bases = device_type_test_bases.copy()
|
||||
if allow_mps and TEST_MPS and MPSTestBase not in test_bases:
|
||||
test_bases.append(MPSTestBase)
|
||||
if only_for == 'xpu' and TEST_XPU and XPUTestBase not in test_bases:
|
||||
test_bases.append(XPUTestBase)
|
||||
# Filter out the device types based on user inputs
|
||||
desired_device_type_test_bases = filter_desired_device_types(test_bases, except_for, only_for)
|
||||
if include_lazy:
|
||||
|
|
@ -1324,10 +1301,6 @@ def onlyCUDA(fn):
|
|||
def onlyMPS(fn):
|
||||
return onlyOn('mps')(fn)
|
||||
|
||||
|
||||
def onlyXPU(fn):
|
||||
return onlyOn('xpu')(fn)
|
||||
|
||||
def onlyPRIVATEUSE1(fn):
|
||||
device_type = torch._C._get_privateuse1_backend_name()
|
||||
device_mod = getattr(torch, device_type, None)
|
||||
|
|
|
|||
Loading…
Reference in a new issue