pytorch docs: add fake_quantize functions documentation (#51748)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/51748 Adding docs for `fake_quantize_per_tensor_affine` and `fake_quantize_per_channel_affine` functions. Note: not documenting `fake_quantize_per_tensor_affine_cachemask` and `fake_quantize_per_channel_affine_cachemask` since they are implementation details of `fake_quantize_per_tensor_affine` and `fake_quantize_per_channel_affine`, and do not need to be exposed to the user at the moment. Test Plan: Build the docs locally on Mac OS, it looks good Reviewed By: supriyar Differential Revision: D26270514 Pulled By: vkuzo fbshipit-source-id: 8e3c9815a12a3427572cb4d34a779e9f5e4facdd
2026-05-14 20:57:59 +00:00 · 2021-02-05 17:50:40 -08:00 · 2021-02-05 17:50:40 -08:00 · 8c48af822e
commit 8c48af822e
parent ececbcfff2
2 changed files with 87 additions and 0 deletions
--- a/docs/source/torch.rst
+++ b/docs/source/torch.rst
@ -298,6 +298,8 @@ Pointwise Ops
    exp
    exp2
    expm1
+    fake_quantize_per_channel_affine
+    fake_quantize_per_tensor_affine
    fix
    float_power
    floor
--- a/torch/_torch_docs.py
+++ b/torch/_torch_docs.py
@ -9232,6 +9232,91 @@ Example::
    tensor([ 3.,  0., -0., -0.])
 """.format(**common_args))

+add_docstr(torch.fake_quantize_per_tensor_affine,
+           r"""
+fake_quantize_per_tensor_affine(input, scale, zero_point, quant_min, quant_max) -> Tensor
+
+Returns a new tensor with the data in :attr:`input` fake quantized using :attr:`scale`,
+:attr:`zero_point`, :attr:`quant_min` and :attr:`quant_max`.
+
+.. math::
+    \text{output} = min(
+        \text{quant\_max},
+        max(
+            \text{quant\_min},
+            \text{std::nearby\_int}(\text{input} / \text{scale}) + \text{zero\_point}
+        )
+    )
+
+Args:
+    input (Tensor): the input value(s), in ``torch.float32``.
+    scale (double): quantization scale
+    zero_point (int64): quantization zero_point
+    quant_min (int64): lower bound of the quantized domain
+    quant_max (int64): upper bound of the quantized domain
+
+Returns:
+    Tensor: A newly fake_quantized tensor
+
+Example::
+
+    >>> x = torch.randn(4)
+    >>> x
+    tensor([ 0.0552,  0.9730,  0.3973, -1.0780])
+    >>> torch.fake_quantize_per_tensor_affine(x, 0.1, 0, 0, 255)
+    tensor([0.1000, 1.0000, 0.4000, 0.0000])
+""")
+
+add_docstr(torch.fake_quantize_per_channel_affine,
+           r"""
+fake_quantize_per_channel_affine(input, scale, zero_point, quant_min, quant_max) -> Tensor
+
+Returns a new tensor with the data in :attr:`input` fake quantized per channel using :attr:`scale`,
+:attr:`zero_point`, :attr:`quant_min` and :attr:`quant_max`, across the channel specified by :attr:`axis`.
+
+.. math::
+    \text{output} = min(
+        \text{quant\_max},
+        max(
+            \text{quant\_min},
+            \text{std::nearby\_int}(\text{input} / \text{scale}) + \text{zero\_point}
+        )
+    )
+
+Args:
+    input (Tensor): the input value(s), in ``torch.float32``.
+    scale (Tensor): quantization scale, per channel
+    zero_point (Tensor): quantization zero_point, per channel
+    axis (int32): channel axis
+    quant_min (int64): lower bound of the quantized domain
+    quant_max (int64): upper bound of the quantized domain
+
+Returns:
+    Tensor: A newly fake_quantized per channel tensor
+
+Example::
+
+    >>> x = torch.randn(2, 2, 2)
+    >>> x
+    tensor([[[-0.2525, -0.0466],
+             [ 0.3491, -0.2168]],
+
+            [[-0.5906,  1.6258],
+             [ 0.6444, -0.0542]]])
+    >>> scales = (torch.randn(2) + 1) * 0.05
+    >>> scales
+    tensor([0.0475, 0.0486])
+    >>> zero_points = torch.zeros(2).to(torch.long)
+    >>> zero_points
+    tensor([0, 0])
+    >>> torch.fake_quantize_per_channel_affine(x, scales, zero_points, 1, 0, 255)
+    tensor([[[0.0000, 0.0000],
+             [0.3405, 0.0000]],
+
+            [[0.0000, 1.6134],
+            [0.6323, 0.0000]]])
+""")
+
 add_docstr(torch.fix,
           r"""
 fix(input, *, out=None) -> Tensor