[quant][graphmode] Add quantize_per_tensor.tensors (#35916)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/35916

quantize_per_tensor can now accept list of tensors.
Needed for operators like LSTM and cat

Test Plan: Imported from OSS

Differential Revision: D20830388

fbshipit-source-id: 73f81cf6b7c7614ef19a73b721bc57cf33211345
This commit is contained in:
Supriya Rao 2020-04-03 10:33:28 -07:00 committed by Facebook GitHub Bot
parent f0c747243c
commit 7468ef04c2
3 changed files with 54 additions and 22 deletions

View file

@ -3633,6 +3633,11 @@
dispatch:
CPU: quantize_per_tensor_cpu
- func: quantize_per_tensor.tensors(Tensor[] tensors, Tensor scales, Tensor zero_points, ScalarType dtype) -> Tensor[]
variants: function
dispatch:
CPU: quantize_per_tensor_list_cpu
- func: quantize_per_channel(Tensor self, Tensor scales, Tensor zero_points, int axis, ScalarType dtype) -> Tensor
variants: function
dispatch:

View file

@ -2,9 +2,9 @@
#include <ATen/NativeFunctions.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/native/cpu/Loops.h>
#include <ATen/native/quantized/cpu/quant_utils.h>
#include <ATen/quantized/QTensorImpl.h>
#include <ATen/quantized/Quantizer.h>
#include <ATen/native/quantized/cpu/quant_utils.h>
namespace at {
namespace native {
@ -18,6 +18,22 @@ Tensor quantize_per_tensor_cpu(
return quantizer->quantize(self);
}
std::vector<Tensor> quantize_per_tensor_list_cpu(
TensorList tensors,
const Tensor& scales,
const Tensor& zero_points,
ScalarType dtype) {
std::vector<Tensor> quantized_tensors;
for (auto i = 0; i < tensors.size(); ++i) {
quantized_tensors.push_back(at::quantize_per_tensor(
tensors[i],
scales[i].item<double>(),
zero_points[i].item<int64_t>(),
dtype));
}
return quantized_tensors;
}
Tensor quantize_per_channel_cpu(
const Tensor& self,
const Tensor& scales,
@ -56,13 +72,17 @@ int64_t q_zero_point_quant(const Tensor& self) {
Tensor q_per_channel_scales_quant(const Tensor& self) {
auto quantizer = get_qtensorimpl(self)->quantizer();
TORCH_CHECK(quantizer->qscheme() == kPerChannelAffine);
return static_cast<PerChannelAffineQuantizer*>(quantizer.get())->scales().to(kDouble);
return static_cast<PerChannelAffineQuantizer*>(quantizer.get())
->scales()
.to(kDouble);
}
Tensor q_per_channel_zero_points_quant(const Tensor& self) {
auto quantizer = get_qtensorimpl(self)->quantizer();
TORCH_CHECK(quantizer->qscheme() == kPerChannelAffine);
return static_cast<PerChannelAffineQuantizer*>(quantizer.get())->zero_points().to(kLong);
return static_cast<PerChannelAffineQuantizer*>(quantizer.get())
->zero_points()
.to(kLong);
}
int64_t q_per_channel_axis_quant(const Tensor& self) {
@ -102,14 +122,15 @@ Tensor make_per_tensor_quantized_tensor_cpu(
scale,
zero_point);
Tensor self_contig = self.contiguous();
AT_DISPATCH_QINT_TYPES(dst.scalar_type(), "make_per_tensor_quantized_tensor", [&]() {
underlying_t* self_data = self_contig.data_ptr<underlying_t>();
underlying_t* dst_data =
reinterpret_cast<underlying_t*>(dst.data_ptr<scalar_t>());
if (self.numel() > 0) {
memcpy(dst_data, self_data, self.nbytes());
}
});
AT_DISPATCH_QINT_TYPES(
dst.scalar_type(), "make_per_tensor_quantized_tensor", [&]() {
underlying_t* self_data = self_contig.data_ptr<underlying_t>();
underlying_t* dst_data =
reinterpret_cast<underlying_t*>(dst.data_ptr<scalar_t>());
if (self.numel() > 0) {
memcpy(dst_data, self_data, self.nbytes());
}
});
return dst;
}
@ -160,7 +181,9 @@ Tensor& set_quantizer_(Tensor& self, ConstQuantizerPtr quantizer) {
return self;
}
Tensor quantized_clone(const Tensor& self, c10::optional<c10::MemoryFormat> optional_memory_format) {
Tensor quantized_clone(
const Tensor& self,
c10::optional<c10::MemoryFormat> optional_memory_format) {
// TODO: add per channel support
TORCH_INTERNAL_ASSERT(
self.qscheme() == at::kPerTensorAffine,
@ -171,8 +194,9 @@ Tensor quantized_clone(const Tensor& self, c10::optional<c10::MemoryFormat> opti
// TODO: To support all features of MemoryFormat::Preserve we need to add
// _empty_affine_quantized_strided function and use it similarly to
// Tensor clone(const Tensor& src, c10::optional<c10::MemoryFormat> optional_memory_format)
// if (self.is_non_overlapping_and_dense()) -> _empty_affine_quantized_strided
// Tensor clone(const Tensor& src, c10::optional<c10::MemoryFormat>
// optional_memory_format) if (self.is_non_overlapping_and_dense()) ->
// _empty_affine_quantized_strided
if (memory_format == MemoryFormat::Preserve) {
memory_format = self.suggest_memory_format();
}
@ -220,20 +244,22 @@ bool quantized_equal(const Tensor& self, const Tensor& other) {
}
/* Calculate the quantization params for the activation tensor */
std::tuple<double, int64_t> _choose_qparams_per_tensor(const Tensor& self, bool reduce_range) {
std::tuple<double, int64_t> _choose_qparams_per_tensor(
const Tensor& self,
bool reduce_range) {
at::Tensor a;
auto input_contig = self.contiguous();
float x_min = input_contig.min().item<float>();
float x_max = input_contig.max().item<float>();
auto q_params = quant_utils::ChooseQuantizationParams(
/*min=*/x_min,
/*max=*/x_max,
/*qmin=*/0,
/*qmax=*/255,
/*preserve_sparsity=*/false,
/*force_scale_power_of_two=*/false,
/*reduce_range=*/reduce_range);
/*min=*/x_min,
/*max=*/x_max,
/*qmin=*/0,
/*qmax=*/255,
/*preserve_sparsity=*/false,
/*force_scale_power_of_two=*/false,
/*reduce_range=*/reduce_range);
return std::make_tuple(q_params.scale, q_params.zero_point);
}

View file

@ -24,6 +24,7 @@ white_list = [
('aten::append*', datetime.date(2020, 4, 15)),
('aten::real*', datetime.date(2020, 4, 15)),
('aten::imag*', datetime.date(2020, 4, 15)),
('aten::quantize_per_tensor', datetime.date(2020, 4, 15)),
]