new_qtensor support privateuseone allocator. (#111464)

I want to create a quant tensor through `PerTensorAffineQuantizer`. But I found that it will throw error because of the lake of judgment for PrivateUse1. Pull Request resolved: https://github.com/pytorch/pytorch/pull/111464 Approved by: https://github.com/ezyang
2026-05-15 21:00:47 +00:00 · 2023-11-01 05:16:54 +00:00 · 2023-11-01 05:16:54 +00:00 · c73da67d46
commit c73da67d46
parent 748c1a1d81
3 changed files with 21 additions and 0 deletions
--- a/aten/src/ATen/quantized/Quantizer.cpp
+++ b/aten/src/ATen/quantized/Quantizer.cpp
@ -121,6 +121,8 @@ inline Tensor new_qtensor(
    allocator = at::getCPUAllocator();
  } else if (device.is_meta()) {
    allocator = GetAllocator(kMeta);
+  } else if (device.is_privateuseone()) {
+    allocator = GetAllocator(kPrivateUse1);
  } else {
    TORCH_INTERNAL_ASSERT(0, "unrecognized device for new_qtensor: ", device);
  }
--- a/test/cpp_extensions/open_registration_extension.cpp
+++ b/test/cpp_extensions/open_registration_extension.cpp
@ -11,6 +11,7 @@
 #include <torch/extension.h>

 #include <ATen/native/cpu/Loops.h>
+#include <ATen/native/quantized/AffineQuantizer.h>
 #include <ATen/native/DispatchStub.h>
 #include <ATen/native/Resize.h>
 #include <ATen/native/UnaryOps.h>
@ -49,9 +50,18 @@ void abs_kernel(::at::TensorIteratorBase& iter) {

 } // namespace

+void quantize_tensor_per_tensor_affine_privateuse1(
+    const at::Tensor& rtensor,
+    at::Tensor& qtensor,
+    double scale,
+    int64_t zero_point) {
+    // do nothing
+}
+
 namespace at::native {

 REGISTER_PRIVATEUSE1_DISPATCH(abs_stub, &abs_kernel);
+REGISTER_PRIVATEUSE1_DISPATCH(quantize_tensor_per_tensor_affine_stub, &quantize_tensor_per_tensor_affine_privateuse1);

 } // namespace at::native
 struct CustomBackendMetadata : public c10::BackendMeta {
@ -342,6 +352,7 @@ TORCH_LIBRARY_IMPL(aten, PrivateUse1, m) {
  m.impl("_pin_memory", &custom__pin_memory);
  m.impl("is_pinned", &custom_is_pinned);
  m.impl("resize_", &custom_resize_);
+  m.impl("quantize_per_tensor", at::native::quantize_per_tensor);
 }

 void custom_cpu_fallback(const c10::OperatorHandle& op, torch::jit::Stack* stack) {
--- a/test/test_cpp_extensions_open_device_registration.py
+++ b/test/test_cpp_extensions_open_device_registration.py
@ -184,6 +184,13 @@ class TestCppExtensionOpenRgistration(common.TestCase):
            torch.abs(foo_input_data)
            self.assertTrue(self.module.custom_abs_called())

+        def test_open_device_quantized():
+            torch.utils.rename_privateuse1_backend('foo')
+            input_data = torch.randn(3, 4, 5, dtype=torch.float32, device="cpu").to("foo")
+            quantized_tensor = torch.quantize_per_tensor(input_data, 0.1, 10, torch.qint8)
+            self.assertEqual(quantized_tensor.device, torch.device('foo:0'))
+            self.assertEqual(quantized_tensor.dtype, torch.qint8)
+
        def test_open_device_random():
            with torch.random.fork_rng(device_type="foo"):
                pass
@ -491,6 +498,7 @@ class TestCppExtensionOpenRgistration(common.TestCase):
        test_open_device_storage_type()
        test_open_device_faketensor()
        test_open_device_named_tensor()
+        test_open_device_quantized()

        test_compile_autograd_function_returns_self()
        test_compile_autograd_function_aliasing()