pytorch/caffe2/python/fused_8bit_rowwise_conversion_ops_test.py
Peter Goldsborough 8dc0702af5 Add float32 <-> fused_rowwise_8bit conversion Caffe2 operators
Summary: This first diff adds the conversion operators that go from float to our fused 8bit rowwise quantized storage and back again. For now I've put the scale and bias in front of each row because it makes the pointer arithmetic nicer here and in the EmebddingLookup perfkernel. If benchmarks or other reasons point out that this is a bad idea we can change it easily.

Reviewed By: kennyhorror

Differential Revision: D6710785

fbshipit-source-id: 086ab91c12d3b472564a06eff6329be6cb9e680e
2018-01-19 15:44:33 -08:00

114 lines
4.2 KiB
Python

# Copyright (c) 2016-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from caffe2.python import core, workspace
import caffe2.python.hypothesis_test_util as hu
import numpy as np
import struct
from hypothesis import given
# Eigen/Python round 0.5 away from 0, Numpy rounds to even
round_to_nearest = np.vectorize(round)
def bytes_to_floats(byte_matrix):
floats = np.empty([np.shape(byte_matrix)[0], 1], dtype=np.float32)
for i, byte_values in enumerate(byte_matrix):
floats[i], = struct.unpack('f', bytearray(byte_values))
return floats
def floats_to_bytes(floats):
byte_matrix = np.empty([np.shape(floats)[0], 4], dtype=np.uint8)
for i, value in enumerate(floats):
assert isinstance(value, np.float32), (value, floats)
as_bytes = struct.pack('f', value)
# In Python3 bytes will be a list of int, in Python2 a list of string
if isinstance(as_bytes[0], int):
byte_matrix[i] = list(as_bytes)
else:
byte_matrix[i] = list(map(ord, as_bytes))
return byte_matrix
def fused_rowwise_8bit_quantize_reference(data):
minimum = np.min(data, axis=1, keepdims=True)
maximum = np.max(data, axis=1, keepdims=True)
span = maximum - minimum
bias = minimum
scale = span / 255.0
inverse_scale = 255.0 / (span + 1e-8)
quantized_data = round_to_nearest((data - bias) * inverse_scale)
scale_bytes = floats_to_bytes(scale.reshape(-1))
bias_bytes = floats_to_bytes(bias.reshape(-1))
return np.concatenate([quantized_data, scale_bytes, bias_bytes], axis=1)
def fused_rowwise_8bit_quantize_dequantize_reference(data):
fused_quantized = fused_rowwise_8bit_quantize_reference(data)
scale = bytes_to_floats(fused_quantized[:, -8:-4].astype(np.uint8))
bias = bytes_to_floats(fused_quantized[:, -4:].astype(np.uint8))
quantized_data = fused_quantized[:, :-8]
return quantized_data * scale + bias
class TestFused8BitRowwiseQuantizationConversion(hu.HypothesisTestCase):
@given(input_data=hu.tensor(min_dim=2, max_dim=2))
def test_quantize_op(self, input_data):
quantize = core.CreateOperator(
'FloatToFused8BitRowwiseQuantized',
['input_data'],
['quantized_data'],
)
workspace.FeedBlob('input_data', input_data)
workspace.RunOperatorOnce(quantize)
quantized_data = workspace.FetchBlob('quantized_data')
reference = fused_rowwise_8bit_quantize_reference(
input_data.astype(np.float32)
)
np.testing.assert_array_almost_equal(quantized_data, reference)
@given(input_data=hu.tensor(min_dim=2, max_dim=2))
def test_quantize_and_dequantize_op(self, input_data):
quantize = core.CreateOperator(
'FloatToFused8BitRowwiseQuantized',
['input_data'],
['quantized_data'],
)
workspace.FeedBlob('input_data', input_data)
workspace.RunOperatorOnce(quantize)
quantized_data = workspace.FetchBlob('quantized_data')
dequantize = core.CreateOperator(
'Fused8BitRowwiseQuantizedToFloat',
['quantized_data'],
['dequantized_data'],
)
workspace.FeedBlob('quantized_data', quantized_data)
workspace.RunOperatorOnce(dequantize)
dequantized_data = workspace.FetchBlob('dequantized_data')
reference = fused_rowwise_8bit_quantize_dequantize_reference(input_data)
np.testing.assert_array_almost_equal(dequantized_data, reference)