pytorch/caffe2/python/fused_8bit_rowwise_conversion_ops_test.py
Yun Wang (Speech) ba6bc5080f Fix fused_8bit_rowwise_conversion_ops_test (#98183)
Summary:
This test tests an operator that quantizes and serializes a float array.
Among the data serialized, one element is the bias, i.e. the minimum value in the array.

The test may fail when the array contains both +0.0 and -0.0, while all other elements are positive.
(this happens quite frequently with a hypothesis version >= 6.17.4, due to [this issue](https://github.com/HypothesisWorks/hypothesis/issues/3606))
Depending on the exact settings of SIMD (single instruction, multiple data), the elements of the array may be visited in different orders while running the operator and while calculating the reference.
Because +0.0 and -0.0 compare equal, the minimum value may be either +0.0 or -0.0.
Nevertheless, the serialized forms of these two values differ in the sign bit, and can make the test fail because it's conducting an exact match on the serialized result.

To avoid this failure, I'm adding a line to replace all -0.0 with +0.0 in the input array.

Test Plan:
Run this with both hypothesis < 6.17.4 and >= 6.17.4:
```
buck2 test mode/opt caffe2/caffe2/python:fused_8bit_rowwise_conversion_ops_test - test_quantize_op
```

Differential Revision: D44617022

Pull Request resolved: https://github.com/pytorch/pytorch/pull/98183
Approved by: https://github.com/malfet
2023-04-04 16:06:13 +00:00

106 lines
3.9 KiB
Python

from caffe2.python import core, workspace
import caffe2.python.hypothesis_test_util as hu
import numpy as np
import struct
from hypothesis import given
# Eigen/Python round 0.5 away from 0, Numpy rounds to even
round_to_nearest = np.vectorize(round)
def bytes_to_floats(byte_matrix):
floats = np.empty([np.shape(byte_matrix)[0], 1], dtype=np.float32)
for i, byte_values in enumerate(byte_matrix):
floats[i], = struct.unpack('f', bytearray(byte_values))
return floats
def floats_to_bytes(floats):
byte_matrix = np.empty([np.shape(floats)[0], 4], dtype=np.uint8)
for i, value in enumerate(floats):
assert isinstance(value, np.float32), (value, floats)
as_bytes = struct.pack('f', value)
# In Python3 bytes will be a list of int, in Python2 a list of string
if isinstance(as_bytes[0], int):
byte_matrix[i] = list(as_bytes)
else:
byte_matrix[i] = [ord(i) for i in as_bytes]
return byte_matrix
def fused_rowwise_8bit_quantize_reference(data):
minimum = np.min(data, axis=-1, keepdims=True)
maximum = np.max(data, axis=-1, keepdims=True)
span = maximum - minimum
bias = minimum
scale = span / 255.0
inverse_scale = 255.0 / (span + 1e-8)
quantized_data = round_to_nearest((data - bias) * inverse_scale)
scale_bytes = floats_to_bytes(scale.reshape(-1))
scale_bytes = scale_bytes.reshape(data.shape[:-1] + (scale_bytes.shape[-1],))
bias_bytes = floats_to_bytes(bias.reshape(-1))
bias_bytes = bias_bytes.reshape(data.shape[:-1] + (bias_bytes.shape[-1],))
print(quantized_data.shape, scale.shape, scale_bytes.shape, bias.shape, bias_bytes.shape)
return np.concatenate([quantized_data, scale_bytes, bias_bytes], axis=-1)
def fused_rowwise_8bit_quantize_dequantize_reference(data):
fused_quantized = fused_rowwise_8bit_quantize_reference(data)
scale = bytes_to_floats(fused_quantized[..., -8:-4].astype(np.uint8).reshape(-1, 4))
scale = scale.reshape(fused_quantized.shape[:-1] + (scale.shape[-1],))
bias = bytes_to_floats(fused_quantized[..., -4:].astype(np.uint8).reshape(-1, 4))
bias = bias.reshape(fused_quantized.shape[:-1] + (bias.shape[-1],))
quantized_data = fused_quantized[..., :-8]
return quantized_data * scale + bias
class TestFused8BitRowwiseQuantizationConversion(hu.HypothesisTestCase):
@given(input_data=hu.tensor(min_dim=1, max_dim=3, max_value=33))
def test_quantize_op(self, input_data):
input_data[input_data == -0.0] = 0.0
quantize = core.CreateOperator(
'FloatToFused8BitRowwiseQuantized',
['input_data'],
['quantized_data'],
)
workspace.FeedBlob('input_data', input_data)
workspace.RunOperatorOnce(quantize)
quantized_data = workspace.FetchBlob('quantized_data')
reference = fused_rowwise_8bit_quantize_reference(
input_data.astype(np.float32)
)
np.testing.assert_array_almost_equal(quantized_data, reference)
@given(input_data=hu.tensor(min_dim=1, max_dim=3, max_value=33))
def test_quantize_and_dequantize_op(self, input_data):
quantize = core.CreateOperator(
'FloatToFused8BitRowwiseQuantized',
['input_data'],
['quantized_data'],
)
workspace.FeedBlob('input_data', input_data)
workspace.RunOperatorOnce(quantize)
quantized_data = workspace.FetchBlob('quantized_data')
dequantize = core.CreateOperator(
'Fused8BitRowwiseQuantizedToFloat',
['quantized_data'],
['dequantized_data'],
)
workspace.FeedBlob('quantized_data', quantized_data)
workspace.RunOperatorOnce(dequantize)
dequantized_data = workspace.FetchBlob('dequantized_data')
reference = fused_rowwise_8bit_quantize_dequantize_reference(input_data)
np.testing.assert_array_almost_equal(dequantized_data, reference)