Revert "Introduce XPU implementation for PyTorch ATen operators (#120891)"

This reverts commit 148a8de639. Reverted https://github.com/pytorch/pytorch/pull/120891 on behalf of https://github.com/huydhn due to Sorry for reverting your change but I need to revert it to resolve a conflict in trunk https://github.com/pytorch/pytorch/pull/121794#issuecomment-2013434523. Please help reland the change after ([comment](https://github.com/pytorch/pytorch/pull/120891#issuecomment-2013668563))
2026-05-14 20:57:59 +00:00 · 2024-03-21 20:30:20 +00:00 · 2024-03-21 20:30:20 +00:00 · 182bb0f2ca
commit 182bb0f2ca
parent 628dcde136
4 changed files with 2 additions and 184 deletions
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@ -1096,56 +1096,6 @@ if(USE_XPU)
  torch_compile_options(torch_xpu)  # see cmake/public/utils.cmake
  target_compile_options_if_supported(torch_xpu "-Wno-deprecated-copy")  # see cmake/public/utils.cmake
  target_compile_definitions(torch_xpu PRIVATE USE_XPU)
-
-  # ATen XPU implementation
-  set(TORCH_XPU_OPS_DIR ${TORCH_ROOT}/third_party/torch-xpu-ops)
-  set(TORCH_XPU_OPS_REPO_URL https://github.com/intel/torch-xpu-ops.git)
-  file(READ "${TORCH_ROOT}/third_party/xpu.txt" TORCH_XPU_OPS_COMMIT)
-  string(REGEX REPLACE "\n$" "" TORCH_XPU_OPS_COMMIT "${TORCH_XPU_OPS_COMMIT}")
-  if(NOT EXISTS "${TORCH_XPU_OPS_DIR}/.git")
-    execute_process(
-      COMMAND git clone --quiet ${TORCH_XPU_OPS_REPO_URL} ${TORCH_XPU_OPS_DIR}
-      RESULT_VARIABLE _exitcode)
-    if(NOT _exitcode EQUAL 0)
-      message(FATAL_ERROR "Fail to clone ${TORCH_XPU_OPS_REPO_URL}")
-    endif()
-  endif()
-  execute_process(
-    COMMAND git fetch --quiet
-    WORKING_DIRECTORY ${TORCH_XPU_OPS_DIR}
-    RESULT_VARIABLE _exitcode)
-  if(NOT _exitcode EQUAL 0)
-    message(FATAL_ERROR "Fail to fetch ${TORCH_XPU_OPS_REPO_URL}")
-  endif()
-  execute_process(
-    COMMAND git checkout --quiet ${TORCH_XPU_OPS_COMMIT}
-    WORKING_DIRECTORY ${TORCH_XPU_OPS_DIR}
-    RESULT_VARIABLE _exitcode)
-  if(NOT _exitcode EQUAL 0)
-    message(FATAL_ERROR "Fail to checkout ${TORCH_XPU_OPS_REPO_URL} to ${TORCH_XPU_OPS_COMMIT}")
-  endif()
-
-  set(TORCH_XPU_OPS_INCLUDE_DIRS
-      ${TORCH_SRC_DIR}/csrc/api
-      ${TORCH_SRC_DIR}/csrc/api/include
-      ${Caffe2_CPU_INCLUDE}
-      ${Caffe2_XPU_INCLUDE})
-  # Pass the target as a dependency so that ATen headers generation
-  # could be followed by torch-xpu-ops build.
-  # 1. Sources in torch-xpu-ops depend on generated ATen headers.
-  # 2. Using add_custom_command in torch-xpu-ops to define sycl device sources
-  #    compilation. add_custom_command requires an explicit dependency.
-  set(TORCH_XPU_OPS_PYTORCH_DEPS ATEN_CPU_FILES_GEN_TARGET)
-
-  add_subdirectory(${TORCH_ROOT}/third_party/torch-xpu-ops
-      ${CMAKE_BINARY_DIR}/caffe2/aten_xpu)
-  if(NOT TARGET torch_xpu_ops)
-    message(WARNING "Failed to include ATen XPU implementation target")
-  else()
-    target_link_libraries(torch_xpu PRIVATE torch_xpu_ops)
-    target_link_libraries(torch_xpu PRIVATE
-      "-Wl,--whole-archive,\"$<TARGET_FILE:torch_xpu_ops>\" -Wl,--no-whole-archive")
-  endif()
 endif()

 if(NOT MSVC AND USE_XNNPACK)
--- a/test/test_xpu.py
+++ b/test/test_xpu.py
@ -5,21 +5,7 @@ import unittest

 import torch
 import torch.xpu._gpu_trace as gpu_trace
-from torch.testing._internal.common_device_type import (
-    instantiate_device_type_tests,
-    onlyXPU,
-    OpDTypes,
-    ops,
-)
-from torch.testing._internal.common_methods_invocations import ops_and_refs
-from torch.testing._internal.common_utils import (
-    NoTest,
-    run_tests,
-    suppress_warnings,
-    TEST_WITH_UBSAN,
-    TEST_XPU,
-    TestCase,
-)
+from torch.testing._internal.common_utils import NoTest, run_tests, TEST_XPU, TestCase

 if not TEST_XPU:
    print("XPU not available, skipping tests", file=sys.stderr)
@ -27,42 +13,6 @@ if not TEST_XPU:

 TEST_MULTIXPU = torch.xpu.device_count() > 1

-cpu_device = torch.device("cpu")
-xpu_device = torch.device("xpu")
-
-any_common_cpu_xpu_one = OpDTypes.any_common_cpu_cuda_one
-_xpu_computation_op_list = [
-    "fill",
-    "zeros",
-    "zeros_like",
-    "clone",
-    "view_as_real",
-    "view_as_complex",
-    "view",
-    "resize_",
-    "resize_as_",
-    "add",
-    "sub",
-    "mul",
-    "div",
-    "abs",
-]
-_xpu_tensor_factory_op_list = [
-    "as_strided",
-    "empty",
-    "empty_strided",
-]
-_xpu_not_test_dtype_op_list = [
-    "resize_",  # Skipped by CPU
-    "resize_as_",  # Skipped by CPU
-    "abs",  # Not aligned dtype
-]
-_xpu_all_op_list = _xpu_computation_op_list + _xpu_tensor_factory_op_list
-_xpu_all_ops = [op for op in ops_and_refs if op.name in _xpu_all_op_list]
-_xpu_computation_ops = [
-    op for op in ops_and_refs if op.name in _xpu_computation_op_list
-]
-

 class TestXpu(TestCase):
    def test_device_behavior(self):
@ -177,60 +127,6 @@ if __name__ == "__main__":
        torch.xpu.set_rng_state(g_state0)
        self.assertEqual(2024, torch.xpu.initial_seed())

-    @onlyXPU
-    @suppress_warnings
-    @ops(_xpu_computation_ops, dtypes=any_common_cpu_xpu_one)
-    def test_compare_cpu(self, device, dtype, op):
-        def to_cpu(arg):
-            if isinstance(arg, torch.Tensor):
-                return arg.to(device="cpu")
-            return arg
-
-        samples = op.reference_inputs(device, dtype)
-
-        for sample in samples:
-            cpu_sample = sample.transform(to_cpu)
-            xpu_results = op(sample.input, *sample.args, **sample.kwargs)
-            cpu_results = op(cpu_sample.input, *cpu_sample.args, **cpu_sample.kwargs)
-
-            xpu_results = sample.output_process_fn_grad(xpu_results)
-            cpu_results = cpu_sample.output_process_fn_grad(cpu_results)
-
-            # Lower tolerance because we are running this as a `@slowTest`
-            # Don't want the periodic tests to fail frequently
-            self.assertEqual(xpu_results, cpu_results, atol=1e-4, rtol=1e-4)
-
-    @onlyXPU
-    @ops(_xpu_computation_ops, allowed_dtypes=(torch.bool,))
-    @unittest.skipIf(TEST_WITH_UBSAN, "Test uses undefined behavior")
-    def test_non_standard_bool_values(self, device, dtype, op):
-        # Test boolean values other than 0x00 and 0x01 (gh-54789)
-        def convert_boolean_tensors(x):
-            if not isinstance(x, torch.Tensor) or x.dtype != torch.bool:
-                return x
-
-            # Map False -> 0 and True -> Random value in [2, 255]
-            true_vals = torch.randint(
-                2, 255, x.shape, dtype=torch.uint8, device=x.device
-            )
-            false_vals = torch.zeros((), dtype=torch.uint8, device=x.device)
-            x_int = torch.where(x, true_vals, false_vals)
-
-            ret = x_int.view(torch.bool)
-            self.assertEqual(ret, x)
-            return ret
-
-        for sample in op.sample_inputs(device, dtype):
-            expect = op(sample.input, *sample.args, **sample.kwargs)
-
-            transformed = sample.transform(convert_boolean_tensors)
-            actual = op(transformed.input, *transformed.args, **transformed.kwargs)
-
-            self.assertEqual(expect, actual)
-
-
-instantiate_device_type_tests(TestXpu, globals(), only_for="xpu")
-

 class TestXpuTrace(TestCase):
    def setUp(self):
--- a/third_party/xpu.txt
+++ b/third_party/xpu.txt
@ -1 +0,0 @@
-84db213ab7125fce94aa2f00d2c61811b9384f40
--- a/torch/testing/_internal/common_device_type.py
+++ b/torch/testing/_internal/common_device_type.py
@ -15,7 +15,7 @@ import os
 import torch
 from torch.testing._internal.common_utils import TestCase, TEST_WITH_ROCM, TEST_MKL, \
    skipCUDANonDefaultStreamIf, TEST_WITH_ASAN, TEST_WITH_UBSAN, TEST_WITH_TSAN, \
-    IS_SANDCASTLE, IS_FBCODE, IS_REMOTE_GPU, IS_WINDOWS, TEST_MPS, TEST_XPU, \
+    IS_SANDCASTLE, IS_FBCODE, IS_REMOTE_GPU, IS_WINDOWS, TEST_MPS, \
    _TestParametrizer, compose_parametrize_fns, dtype_name, \
    TEST_WITH_MIOPEN_SUGGEST_NHWC, NATIVE_DEVICES, skipIfTorchDynamo, \
    get_tracked_input, clear_tracked_input, PRINT_REPRO_ON_FAILURE, \
@ -569,27 +569,6 @@ class MPSTestBase(DeviceTypeTestBase):
    def _should_stop_test_suite(self):
        return False

-class XPUTestBase(DeviceTypeTestBase):
-    device_type = 'xpu'
-    primary_device: ClassVar[str]
-
-    @classmethod
-    def get_primary_device(cls):
-        return cls.primary_device
-
-    @classmethod
-    def get_all_devices(cls):
-        # currently only one device is supported on MPS backend
-        prim_device = cls.get_primary_device()
-        return [prim_device]
-
-    @classmethod
-    def setUpClass(cls):
-        cls.primary_device = 'xpu:0'
-
-    def _should_stop_test_suite(self):
-        return False
-
 class PrivateUse1TestBase(DeviceTypeTestBase):
    primary_device: ClassVar[str]
    device_mod = None
@ -697,8 +676,6 @@ def get_desired_device_type_test_bases(except_for=None, only_for=None, include_l
    test_bases = device_type_test_bases.copy()
    if allow_mps and TEST_MPS and MPSTestBase not in test_bases:
        test_bases.append(MPSTestBase)
-    if only_for == 'xpu' and TEST_XPU and XPUTestBase not in test_bases:
-        test_bases.append(XPUTestBase)
    # Filter out the device types based on user inputs
    desired_device_type_test_bases = filter_desired_device_types(test_bases, except_for, only_for)
    if include_lazy:
@ -1324,10 +1301,6 @@ def onlyCUDA(fn):
 def onlyMPS(fn):
    return onlyOn('mps')(fn)

-
-def onlyXPU(fn):
-    return onlyOn('xpu')(fn)
-
 def onlyPRIVATEUSE1(fn):
    device_type = torch._C._get_privateuse1_backend_name()
    device_mod = getattr(torch, device_type, None)