mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: This test tests an operator that quantizes and serializes a float array. Among the data serialized, one element is the bias, i.e. the minimum value in the array. The test may fail when the array contains both +0.0 and -0.0, while all other elements are positive. (this happens quite frequently with a hypothesis version >= 6.17.4, due to [this issue](https://github.com/HypothesisWorks/hypothesis/issues/3606)) Depending on the exact settings of SIMD (single instruction, multiple data), the elements of the array may be visited in different orders while running the operator and while calculating the reference. Because +0.0 and -0.0 compare equal, the minimum value may be either +0.0 or -0.0. Nevertheless, the serialized forms of these two values differ in the sign bit, and can make the test fail because it's conducting an exact match on the serialized result. To avoid this failure, I'm adding a line to replace all -0.0 with +0.0 in the input array. Test Plan: Run this with both hypothesis < 6.17.4 and >= 6.17.4: ``` buck2 test mode/opt caffe2/caffe2/python:fused_8bit_rowwise_conversion_ops_test - test_quantize_op ``` Differential Revision: D44617022 Pull Request resolved: https://github.com/pytorch/pytorch/pull/98183 Approved by: https://github.com/malfet
106 lines
3.9 KiB
Python
106 lines
3.9 KiB
Python
|
|
|
|
|
|
|
|
|
|
from caffe2.python import core, workspace
|
|
import caffe2.python.hypothesis_test_util as hu
|
|
|
|
import numpy as np
|
|
import struct
|
|
from hypothesis import given
|
|
|
|
# Eigen/Python round 0.5 away from 0, Numpy rounds to even
|
|
round_to_nearest = np.vectorize(round)
|
|
|
|
|
|
def bytes_to_floats(byte_matrix):
|
|
floats = np.empty([np.shape(byte_matrix)[0], 1], dtype=np.float32)
|
|
for i, byte_values in enumerate(byte_matrix):
|
|
floats[i], = struct.unpack('f', bytearray(byte_values))
|
|
return floats
|
|
|
|
|
|
def floats_to_bytes(floats):
|
|
byte_matrix = np.empty([np.shape(floats)[0], 4], dtype=np.uint8)
|
|
for i, value in enumerate(floats):
|
|
assert isinstance(value, np.float32), (value, floats)
|
|
as_bytes = struct.pack('f', value)
|
|
# In Python3 bytes will be a list of int, in Python2 a list of string
|
|
if isinstance(as_bytes[0], int):
|
|
byte_matrix[i] = list(as_bytes)
|
|
else:
|
|
byte_matrix[i] = [ord(i) for i in as_bytes]
|
|
return byte_matrix
|
|
|
|
|
|
def fused_rowwise_8bit_quantize_reference(data):
|
|
minimum = np.min(data, axis=-1, keepdims=True)
|
|
maximum = np.max(data, axis=-1, keepdims=True)
|
|
span = maximum - minimum
|
|
bias = minimum
|
|
scale = span / 255.0
|
|
inverse_scale = 255.0 / (span + 1e-8)
|
|
quantized_data = round_to_nearest((data - bias) * inverse_scale)
|
|
scale_bytes = floats_to_bytes(scale.reshape(-1))
|
|
scale_bytes = scale_bytes.reshape(data.shape[:-1] + (scale_bytes.shape[-1],))
|
|
bias_bytes = floats_to_bytes(bias.reshape(-1))
|
|
bias_bytes = bias_bytes.reshape(data.shape[:-1] + (bias_bytes.shape[-1],))
|
|
print(quantized_data.shape, scale.shape, scale_bytes.shape, bias.shape, bias_bytes.shape)
|
|
return np.concatenate([quantized_data, scale_bytes, bias_bytes], axis=-1)
|
|
|
|
|
|
def fused_rowwise_8bit_quantize_dequantize_reference(data):
|
|
fused_quantized = fused_rowwise_8bit_quantize_reference(data)
|
|
scale = bytes_to_floats(fused_quantized[..., -8:-4].astype(np.uint8).reshape(-1, 4))
|
|
scale = scale.reshape(fused_quantized.shape[:-1] + (scale.shape[-1],))
|
|
bias = bytes_to_floats(fused_quantized[..., -4:].astype(np.uint8).reshape(-1, 4))
|
|
bias = bias.reshape(fused_quantized.shape[:-1] + (bias.shape[-1],))
|
|
quantized_data = fused_quantized[..., :-8]
|
|
return quantized_data * scale + bias
|
|
|
|
|
|
class TestFused8BitRowwiseQuantizationConversion(hu.HypothesisTestCase):
|
|
@given(input_data=hu.tensor(min_dim=1, max_dim=3, max_value=33))
|
|
def test_quantize_op(self, input_data):
|
|
input_data[input_data == -0.0] = 0.0
|
|
|
|
quantize = core.CreateOperator(
|
|
'FloatToFused8BitRowwiseQuantized',
|
|
['input_data'],
|
|
['quantized_data'],
|
|
)
|
|
workspace.FeedBlob('input_data', input_data)
|
|
workspace.RunOperatorOnce(quantize)
|
|
|
|
quantized_data = workspace.FetchBlob('quantized_data')
|
|
|
|
reference = fused_rowwise_8bit_quantize_reference(
|
|
input_data.astype(np.float32)
|
|
)
|
|
np.testing.assert_array_almost_equal(quantized_data, reference)
|
|
|
|
@given(input_data=hu.tensor(min_dim=1, max_dim=3, max_value=33))
|
|
def test_quantize_and_dequantize_op(self, input_data):
|
|
quantize = core.CreateOperator(
|
|
'FloatToFused8BitRowwiseQuantized',
|
|
['input_data'],
|
|
['quantized_data'],
|
|
)
|
|
workspace.FeedBlob('input_data', input_data)
|
|
workspace.RunOperatorOnce(quantize)
|
|
|
|
quantized_data = workspace.FetchBlob('quantized_data')
|
|
|
|
dequantize = core.CreateOperator(
|
|
'Fused8BitRowwiseQuantizedToFloat',
|
|
['quantized_data'],
|
|
['dequantized_data'],
|
|
)
|
|
workspace.FeedBlob('quantized_data', quantized_data)
|
|
workspace.RunOperatorOnce(dequantize)
|
|
|
|
dequantized_data = workspace.FetchBlob('dequantized_data')
|
|
|
|
reference = fused_rowwise_8bit_quantize_dequantize_reference(input_data)
|
|
np.testing.assert_array_almost_equal(dequantized_data, reference)
|