[quant][graphmode] Add quantize_per_tensor.tensors (#35916)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/35916 quantize_per_tensor can now accept list of tensors. Needed for operators like LSTM and cat Test Plan: Imported from OSS Differential Revision: D20830388 fbshipit-source-id: 73f81cf6b7c7614ef19a73b721bc57cf33211345
2026-05-14 20:57:59 +00:00 · 2020-04-03 10:33:28 -07:00 · 2020-04-03 10:33:28 -07:00 · 7468ef04c2
commit 7468ef04c2
parent f0c747243c
3 changed files with 54 additions and 22 deletions
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
@ -3633,6 +3633,11 @@
  dispatch:
    CPU: quantize_per_tensor_cpu

+- func: quantize_per_tensor.tensors(Tensor[] tensors, Tensor scales, Tensor zero_points, ScalarType dtype) -> Tensor[]
+  variants: function
+  dispatch:
+    CPU: quantize_per_tensor_list_cpu
+
 - func: quantize_per_channel(Tensor self, Tensor scales, Tensor zero_points, int axis, ScalarType dtype) -> Tensor
  variants: function
  dispatch:
--- a/aten/src/ATen/native/quantized/QTensor.cpp
+++ b/aten/src/ATen/native/quantized/QTensor.cpp
@ -2,9 +2,9 @@
 #include <ATen/NativeFunctions.h>
 #include <ATen/native/TensorIterator.h>
 #include <ATen/native/cpu/Loops.h>
+#include <ATen/native/quantized/cpu/quant_utils.h>
 #include <ATen/quantized/QTensorImpl.h>
 #include <ATen/quantized/Quantizer.h>
-#include <ATen/native/quantized/cpu/quant_utils.h>

 namespace at {
 namespace native {
@ -18,6 +18,22 @@ Tensor quantize_per_tensor_cpu(
  return quantizer->quantize(self);
 }

+std::vector<Tensor> quantize_per_tensor_list_cpu(
+    TensorList tensors,
+    const Tensor& scales,
+    const Tensor& zero_points,
+    ScalarType dtype) {
+  std::vector<Tensor> quantized_tensors;
+  for (auto i = 0; i < tensors.size(); ++i) {
+    quantized_tensors.push_back(at::quantize_per_tensor(
+        tensors[i],
+        scales[i].item<double>(),
+        zero_points[i].item<int64_t>(),
+        dtype));
+  }
+  return quantized_tensors;
+}
+
 Tensor quantize_per_channel_cpu(
    const Tensor& self,
    const Tensor& scales,
@ -56,13 +72,17 @@ int64_t q_zero_point_quant(const Tensor& self) {
 Tensor q_per_channel_scales_quant(const Tensor& self) {
  auto quantizer = get_qtensorimpl(self)->quantizer();
  TORCH_CHECK(quantizer->qscheme() == kPerChannelAffine);
-  return static_cast<PerChannelAffineQuantizer*>(quantizer.get())->scales().to(kDouble);
+  return static_cast<PerChannelAffineQuantizer*>(quantizer.get())
+      ->scales()
+      .to(kDouble);
 }

 Tensor q_per_channel_zero_points_quant(const Tensor& self) {
  auto quantizer = get_qtensorimpl(self)->quantizer();
  TORCH_CHECK(quantizer->qscheme() == kPerChannelAffine);
-  return static_cast<PerChannelAffineQuantizer*>(quantizer.get())->zero_points().to(kLong);
+  return static_cast<PerChannelAffineQuantizer*>(quantizer.get())
+      ->zero_points()
+      .to(kLong);
 }

 int64_t q_per_channel_axis_quant(const Tensor& self) {
@ -102,14 +122,15 @@ Tensor make_per_tensor_quantized_tensor_cpu(
      scale,
      zero_point);
  Tensor self_contig = self.contiguous();
-  AT_DISPATCH_QINT_TYPES(dst.scalar_type(), "make_per_tensor_quantized_tensor", [&]() {
-    underlying_t* self_data = self_contig.data_ptr<underlying_t>();
-    underlying_t* dst_data =
-        reinterpret_cast<underlying_t*>(dst.data_ptr<scalar_t>());
-    if (self.numel() > 0) {
-      memcpy(dst_data, self_data, self.nbytes());
-    }
-  });
+  AT_DISPATCH_QINT_TYPES(
+      dst.scalar_type(), "make_per_tensor_quantized_tensor", [&]() {
+        underlying_t* self_data = self_contig.data_ptr<underlying_t>();
+        underlying_t* dst_data =
+            reinterpret_cast<underlying_t*>(dst.data_ptr<scalar_t>());
+        if (self.numel() > 0) {
+          memcpy(dst_data, self_data, self.nbytes());
+        }
+      });
  return dst;
 }

@ -160,7 +181,9 @@ Tensor& set_quantizer_(Tensor& self, ConstQuantizerPtr quantizer) {
  return self;
 }

-Tensor quantized_clone(const Tensor& self, c10::optional<c10::MemoryFormat> optional_memory_format) {
+Tensor quantized_clone(
+    const Tensor& self,
+    c10::optional<c10::MemoryFormat> optional_memory_format) {
  // TODO: add per channel support
  TORCH_INTERNAL_ASSERT(
      self.qscheme() == at::kPerTensorAffine,
@ -171,8 +194,9 @@ Tensor quantized_clone(const Tensor& self, c10::optional<c10::MemoryFormat> opti

  // TODO: To support all features of MemoryFormat::Preserve we need to add
  // _empty_affine_quantized_strided function and use it similarly to
-  // Tensor clone(const Tensor& src, c10::optional<c10::MemoryFormat> optional_memory_format)
-  // if (self.is_non_overlapping_and_dense()) -> _empty_affine_quantized_strided
+  // Tensor clone(const Tensor& src, c10::optional<c10::MemoryFormat>
+  // optional_memory_format) if (self.is_non_overlapping_and_dense()) ->
+  // _empty_affine_quantized_strided
  if (memory_format == MemoryFormat::Preserve) {
    memory_format = self.suggest_memory_format();
  }
@ -220,20 +244,22 @@ bool quantized_equal(const Tensor& self, const Tensor& other) {
 }

 /* Calculate the quantization params for the activation tensor */
-std::tuple<double, int64_t> _choose_qparams_per_tensor(const Tensor& self, bool reduce_range) {
+std::tuple<double, int64_t> _choose_qparams_per_tensor(
+    const Tensor& self,
+    bool reduce_range) {
  at::Tensor a;
  auto input_contig = self.contiguous();
  float x_min = input_contig.min().item<float>();
  float x_max = input_contig.max().item<float>();

  auto q_params = quant_utils::ChooseQuantizationParams(
-        /*min=*/x_min,
-        /*max=*/x_max,
-        /*qmin=*/0,
-        /*qmax=*/255,
-        /*preserve_sparsity=*/false,
-        /*force_scale_power_of_two=*/false,
-        /*reduce_range=*/reduce_range);
+      /*min=*/x_min,
+      /*max=*/x_max,
+      /*qmin=*/0,
+      /*qmax=*/255,
+      /*preserve_sparsity=*/false,
+      /*force_scale_power_of_two=*/false,
+      /*reduce_range=*/reduce_range);

  return std::make_tuple(q_params.scale, q_params.zero_point);
 }
--- a/test/backward_compatibility/check_backward_compatibility.py
+++ b/test/backward_compatibility/check_backward_compatibility.py
@ -24,6 +24,7 @@ white_list = [
    ('aten::append*', datetime.date(2020, 4, 15)),
    ('aten::real*', datetime.date(2020, 4, 15)),
    ('aten::imag*', datetime.date(2020, 4, 15)),
+    ('aten::quantize_per_tensor', datetime.date(2020, 4, 15)),
 ]