[NNAPI EP] Add per-tensor u8s8 support for Qlinear[Conv/MatMul] (#6818)

* NNAPI Add per-tensor u8s8 support

* Update some comments

* Address CR comments

* Address CR comments
This commit is contained in:
Guoyu Wang 2021-03-03 15:44:49 -08:00 committed by GitHub
parent 3c5d811e77
commit fedb68429c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 273 additions and 127 deletions

View file

@ -137,7 +137,9 @@ bool HasValidBinaryOpQuantizedInputs(const Node& node) {
bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const Node& node,
const std::vector<size_t>& indices, const OpSupportCheckParams& params) {
const auto& op_type = node.OpType();
bool is_qlinear_conv = (op_type == "QLinearConv");
auto qlinear_op_type = GetQLinearOpType(node);
bool is_qlinear_conv = (qlinear_op_type == QLinearOpType::QLinearConv);
bool is_qlinear_matmul = (qlinear_op_type == QLinearOpType::QLinearMatMul);
const auto input_defs(node.InputDefs());
for (const auto idx : indices) {
if (idx >= input_defs.size()) {
@ -145,46 +147,53 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
<< " >= input number, " << input_defs.size();
return false;
}
const auto scale_name = input_defs[idx]->Name();
if (Contains(initializers, scale_name)) {
const auto& scale_tensor = *initializers.at(scale_name);
int64_t scales_dim = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0];
bool is_conv_weight = is_qlinear_conv && idx == 4;
bool is_conv_u8s8_weight = false;
if (is_conv_weight) {
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
is_conv_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8;
}
// We need to check the per-channel quantization scales dimensions for u8s8 QlinearConv
// We only support per-channel quantization for u8s8
// For all other cases, the scales should be a scalar
if (is_conv_u8s8_weight) {
if (params.android_sdk_ver < 29) {
LOGS_DEFAULT(VERBOSE) << op_type << " only supports per-channel quantization on Android API 29+, "
<< "system API level: " << params.android_sdk_ver;
return false;
}
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
if (weight_tensor.dims()[0] != scales_dim) {
LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
<< " weight dimension[0] " << weight_tensor.dims()[0]
<< " scale dimension " << scales_dim;
return false;
}
} else {
if (scales_dim != 1) {
LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
<< " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
return false;
}
}
} else {
LOGS_DEFAULT(VERBOSE) << "The scale of " << op_type << " must be known";
if (!Contains(initializers, scale_name)) {
LOGS_DEFAULT(VERBOSE) << "The scale of " << op_type << " must be an initializer tensor";
return false;
}
// If this op is Qlinear[Conv/MatMul], we want to check u8s8 support for weight tensor (or B tensor for QlinearMatMul)
bool is_conv_matmul_weight = (is_qlinear_conv || is_qlinear_matmul) && idx == 4;
bool is_conv_matmul_u8s8_weight = false;
if (is_conv_matmul_weight) {
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
is_conv_matmul_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8;
}
const auto& scale_tensor = *initializers.at(scale_name);
int64_t scales_dim = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0];
if (!is_conv_matmul_u8s8_weight) {
if (scales_dim != 1) {
LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
<< " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
return false;
}
} else if (scales_dim != 1) {
// For u8s8 Qlinear[Conv/MatMul], we support
// 1. Per-tensor, the weight will be transformed to uint8 later
// 2. Per-channel, only from Android API level 29
if (is_qlinear_matmul) {
LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
return false;
}
if (params.android_sdk_ver < 29) {
LOGS_DEFAULT(VERBOSE) << op_type << " only supports per-channel quantization on Android API 29+, "
<< "system API level: " << params.android_sdk_ver;
return false;
}
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
if (weight_tensor.dims()[0] != scales_dim) {
LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
<< " weight dimension[0] " << weight_tensor.dims()[0]
<< " scale dimension " << scales_dim;
return false;
}
}
}
return true;
@ -193,7 +202,9 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const Node& node,
const std::vector<size_t>& indices) {
const auto& op_type = node.OpType();
bool is_qlinear_conv = (op_type == "QLinearConv");
auto qlinear_op_type = GetQLinearOpType(node);
bool is_qlinear_conv = (qlinear_op_type == QLinearOpType::QLinearConv);
bool is_qlinear_matmul = (qlinear_op_type == QLinearOpType::QLinearMatMul);
const auto input_defs(node.InputDefs());
for (const auto idx : indices) {
if (idx >= input_defs.size()) {
@ -203,65 +214,77 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
}
const auto zero_point_name = input_defs[idx]->Name();
if (Contains(initializers, zero_point_name)) {
bool is_conv_weight = is_qlinear_conv && idx == 5;
bool is_conv_u8s8_weight = false;
if (is_conv_weight) {
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
is_conv_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8;
}
if (!Contains(initializers, zero_point_name)) {
LOGS_DEFAULT(VERBOSE) << "The zero point of " << op_type << " must be an initializer tensor";
return false;
}
const auto& zero_tensor = *initializers.at(zero_point_name);
int64_t zero_dim = zero_tensor.dims().empty() ? 1 : zero_tensor.dims()[0];
if (is_conv_u8s8_weight) {
if (zero_tensor.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8) {
LOGS_DEFAULT(VERBOSE) << "u8s8 QlinearConv only supports int8 zero point for weight, "
<< "actual zero point type: [" << zero_tensor.data_type() << "]";
return false;
}
bool is_conv_matmul_weight = is_qlinear_conv && idx == 5;
bool is_conv_matmul_u8s8_weight = false;
if (is_conv_matmul_weight) {
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
is_conv_matmul_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8;
}
// For onnx, u8s8 QlinearConv, the weight zero point can be a scalar,
// or a tensor with same channel as weight, for NNAPI we only support it be
// 0 (scalar) or all 0 (tensor), NNAPI will assume the zero point for per-channel
// quantization is 0 there is no input for it
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
if (weight_tensor.dims()[0] != zero_dim && zero_dim != 1) {
LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
<< " weight dimension[0] " << weight_tensor.dims()[0]
<< " zero point dimension " << zero_dim;
return false;
}
const auto& zero_tensor = *initializers.at(zero_point_name);
int64_t zero_dim = zero_tensor.dims().empty() ? 1 : zero_tensor.dims()[0];
std::unique_ptr<uint8_t[]> unpacked_tensor;
size_t tensor_byte_size;
auto status = onnxruntime::utils::UnpackInitializerData(
zero_tensor,
node.ModelPath(),
unpacked_tensor, tensor_byte_size);
if (!status.IsOK()) {
LOGS_DEFAULT(ERROR) << "QLinearConv erro when unpack zero tensor:" << status.ErrorMessage();
return false;
}
// Verify all onnx weight zero point(s) are 0(s)
const int8_t* zero_points = reinterpret_cast<const int8_t*>(unpacked_tensor.get());
for (size_t i = 0; i < tensor_byte_size; i++) {
if (zero_points[i] != 0) {
LOGS_DEFAULT(VERBOSE) << "QLinearConv only support 0 as zero point, "
<< "zero_points[" << i << "] has value: " << zero_points[i];
return false;
}
}
} else {
if (zero_dim != 1) {
LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
<< " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
return false;
}
if (!is_conv_matmul_u8s8_weight) {
if (zero_dim != 1) {
LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
<< " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
return false;
}
} else {
LOGS_DEFAULT(VERBOSE) << "The zero point of " << op_type << " must be known";
return false;
// For u8s8 Qlinear[Conv/MatMul], we support
// 1. Per-tensor, the weight will be transformed to uint8 later
// 2. Per-channel, only from Android API level 29
if (zero_tensor.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8) {
LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only supports int8 zero point for weight, "
<< "actual zero point type: [" << zero_tensor.data_type() << "]";
return false;
}
if (zero_dim != 1) {
if (is_qlinear_matmul) {
LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
return false;
}
}
// For onnx, u8s8 QlinearConv, the weight zero point can be a scalar,
// or a tensor with same channel as weight, for NNAPI we only support it be
// 0 (scalar) or all 0 (tensor), NNAPI will assume the zero point for per-channel
// quantization is 0 there is no input for it
const auto& weight_tensor = *initializers.at(node.InputDefs()[3]->Name());
if (weight_tensor.dims()[0] != zero_dim && zero_dim != 1) {
LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
<< " weight dimension[0] " << weight_tensor.dims()[0]
<< " zero point dimension " << zero_dim;
return false;
}
std::unique_ptr<uint8_t[]> unpacked_tensor;
size_t tensor_byte_size;
auto status = onnxruntime::utils::UnpackInitializerData(
zero_tensor,
node.ModelPath(),
unpacked_tensor, tensor_byte_size);
if (!status.IsOK()) {
LOGS_DEFAULT(ERROR) << "Qlinear[Conv/MatMul] error when unpack zero tensor: " << zero_point_name
<< ", error msg: " << status.ErrorMessage();
return false;
}
// Verify all onnx weight zero point(s) are 0(s)
const int8_t* zero_points = reinterpret_cast<const int8_t*>(unpacked_tensor.get());
for (size_t i = 0; i < tensor_byte_size; i++) {
if (zero_points[i] != 0) {
LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only support 0 as zero point, "
<< "zero_points[" << i << "] has value: " << zero_points[i];
return false;
}
}
}
}

View file

@ -272,15 +272,23 @@ enum DataLayout {
L_1230 = 1,
};
// TODO, replace this with more efficient code in optimizers
// This is primarily used for adding the weight (an initializer) of Conv/QlinearConv
// And perform layout change from ONNX -> NNAPI
// If is_per_tensor_u8s8 is true, the QlinearConv is per-tensor u8s8 (input X is unsigned int8
// and weight W is signed int8 and it is per-tensor (NOT per-channel) quantized), in this case,
// since NNAPI requires X and W to be same type for per-tensor quantization,
// the initializer tensor W will be converted from int8 to uint8 by flip each byte by XOR 0x80
// byte ^ 0x80 == byte + 128
static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
const std::string& name,
const OperandType& source_operand_type,
DataLayout new_layout) ORT_MUST_USE_RESULT;
DataLayout new_layout,
bool is_per_tensor_u8s8) ORT_MUST_USE_RESULT;
static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
const std::string& name,
const OperandType& source_operand_type,
DataLayout new_layout) {
DataLayout new_layout,
bool is_per_tensor_u8s8) {
const auto& tensor = *model_builder.GetInitializerTensors().at(name);
const Shape& shape = source_operand_type.dimensions;
ORT_RETURN_IF_NOT(shape.size() == 4,
@ -322,6 +330,8 @@ static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
std::unique_ptr<uint8_t[]> buffer_holder(new uint8_t[operand_type.GetOperandBlobByteSize()]);
uint8_t* buffer = buffer_holder.get();
size_t element_size = operand_type.GetElementByteSize();
uint8_t bit_flip_val = is_per_tensor_u8s8 ? 0x80 : 0;
for (uint32_t out = 0; out < out_t; out++) {
for (uint32_t in = 0; in < in_t; in++) {
for (uint32_t h = 0; h < h_t; h++) {
@ -345,7 +355,7 @@ static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
}
for (size_t i = 0; i < element_size; i++) {
buffer[element_size * nnapi_idx + i] = src[element_size * onnx_idx + i];
buffer[element_size * nnapi_idx + i] = src[element_size * onnx_idx + i] ^ bit_flip_val;
}
}
}
@ -355,13 +365,21 @@ static Status AddInitializerInNewLayout(ModelBuilder& model_builder,
return model_builder.AddOperandFromPersistMemoryBuffer(name, &buffer[0], operand_type);
}
// TODO, replace this with more efficient code in optimizers
// This is primarily used for adding the input B (an initializer) of MatMul/QlinearMatMul/Gemm (not transposed)
// and transpose it, since for NNAPI only supports A*B'
//
// If is_per_tensor_u8s8 is true, the QlinearMatMul is per-tensor u8s8 (input A is unsigned int8
// and input B is signed int8), in this case, since NNAPI requires A and B to be same type,
// the initializer tensor B will be converted from int8 to uint8 by flip each byte by XOR 0x80
// byte ^ 0x80 == byte + 128
static Status AddInitializerTransposed(ModelBuilder& model_builder,
const OperandType& source_operand_type,
const std::string& name) ORT_MUST_USE_RESULT;
const std::string& name,
bool is_per_tensor_u8s8) ORT_MUST_USE_RESULT;
static Status AddInitializerTransposed(ModelBuilder& model_builder,
const OperandType& source_operand_type,
const std::string& name) {
const std::string& name,
bool is_per_tensor_u8s8) {
const auto& tensor = *model_builder.GetInitializerTensors().at(name);
const Shape& shape = source_operand_type.dimensions;
@ -397,10 +415,11 @@ static Status AddInitializerTransposed(ModelBuilder& model_builder,
std::unique_ptr<uint8_t[]> buffer_holder(new uint8_t[operand_type.GetOperandBlobByteSize()]);
uint8_t* buffer = buffer_holder.get();
size_t element_size = operand_type.GetElementByteSize();
uint8_t bit_flip_val = is_per_tensor_u8s8 ? 0x80 : 0;
for (uint32_t x = 0; x < x_t; x++) {
for (uint32_t y = 0; y < y_t; y++) {
for (size_t i = 0; i < element_size; i++) {
buffer[element_size * (y * x_t + x) + i] = src[element_size * (x * y_t + y) + i];
buffer[element_size * (y * x_t + x) + i] = src[element_size * (x * y_t + y) + i] ^ bit_flip_val;
}
}
}
@ -518,16 +537,26 @@ static Status GetBinaryOpQuantizationScaleAndZeroPoint(
return Status::OK();
}
static Status GetConvOpQuantizationScaleAndZeroPoint(
// Get scale and zero point for
// [QlinearConv] input, weight, output
// [QlinearMatMul] A, B, Y
//
// In case of u8s8 (input/A is uint8 and weight/B is int8)
// If the QlinearConv is using per-channel u8s8, return the scales vector
// If the Qlinear[Conv/MatMul] is using per-tensor u8s8, the weight/B tensor
// will be convert to uint8 later, will return the same scale and 128 as zero point
// Also will set is_per_tensor_u8s8 to true to be used later
static Status GetConvMatMulOpQuantizationScaleAndZeroPoint(
const ModelBuilder& model_builder, const Node& node,
float& a_scale, float& w_scale, float& y_scale,
int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point,
optional<vector<float>>& w_scales) ORT_MUST_USE_RESULT;
static Status GetConvOpQuantizationScaleAndZeroPoint(
optional<vector<float>>& w_scales, bool& is_per_tensor_u8s8) ORT_MUST_USE_RESULT;
static Status GetConvMatMulOpQuantizationScaleAndZeroPoint(
const ModelBuilder& model_builder, const Node& node,
float& a_scale, float& w_scale, float& y_scale,
int32_t& a_zero_point, int32_t& w_zero_point, int32_t& y_zero_point,
optional<vector<float>>& w_scales) {
optional<vector<float>>& w_scales, bool& is_per_tensor_u8s8) {
is_per_tensor_u8s8 = false;
// Get scale and zero points
// We will handle per-channel weight scale and zero point later
ORT_RETURN_IF_ERROR(
@ -543,14 +572,26 @@ static Status GetConvOpQuantizationScaleAndZeroPoint(
if (weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_UINT8)
return Status::OK();
// Now we have u8s8 QlinearConv
// This is per-tensor u8s8
// NNAPI does not support per-tensor u8s8
// For this case we will need to convert the int8 weight tensor to uint8
// And have same scale and 128 as zero point
// The conversion of the weight tensor itself will be done in the OpBuilder
const auto& scale_tensor = *initializers.at(input_defs[4]->Name());
int64_t scale_dim = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0];
if (scale_dim == 1) {
w_zero_point = 128;
is_per_tensor_u8s8 = true;
return Status::OK();
}
// Now we have u8s8 per-channel QlinearConv
// u8s8 QlinearConv always have 0 as zero point so we are not getting it here
// and we do not use w_scale here, so we reset them back to 0
w_scale = 0.0f;
w_zero_point = 0;
// We need to copy the 1d scales array for per-channel quantization
const auto& scale_tensor = *initializers.at(input_defs[4]->Name());
const auto* scales = GetTensorFloatData(scale_tensor);
size_t scales_size = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0];
vector<float> scales_vec(scales_size, 0.0f);
@ -1345,12 +1386,12 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
// this is for per-channel quantization weights
optional<vector<float>> w_scales;
bool is_per_tensor_u8s8 = false;
if (is_qlinear_conv) {
ORT_RETURN_IF_ERROR(GetConvOpQuantizationScaleAndZeroPoint(model_builder, node,
x_scale, w_scale, y_scale,
x_zero_point, w_zero_point, y_zero_point,
w_scales));
ORT_RETURN_IF_ERROR(GetConvMatMulOpQuantizationScaleAndZeroPoint(model_builder, node,
x_scale, w_scale, y_scale,
x_zero_point, w_zero_point, y_zero_point,
w_scales, is_per_tensor_u8s8));
}
Shape onnx_weight_shape;
@ -1366,7 +1407,15 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
onnx_weight_type = Type::TENSOR_QUANT8_ASYMM;
break;
case ONNX_NAMESPACE::TensorProto_DataType_INT8:
onnx_weight_type = Type::TENSOR_QUANT8_SYMM_PER_CHANNEL;
// We support both per-tensor and per-channel u8s8
// For per-tensor u8s8 we will convert the int8 weight to uint8
if (is_per_tensor_u8s8) {
// Per-Tensor u8s8
onnx_weight_type = Type::TENSOR_QUANT8_ASYMM;
} else {
// Per-Channel u8s8
onnx_weight_type = Type::TENSOR_QUANT8_SYMM_PER_CHANNEL;
}
break;
default:
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
@ -1384,9 +1433,9 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
// Pre-process weights
if (conv_2d || grouped_conv_2d) {
ORT_RETURN_IF_ERROR(AddInitializerInNewLayout(model_builder, weight, onnx_weight_operand_type, L_0231));
ORT_RETURN_IF_ERROR(AddInitializerInNewLayout(model_builder, weight, onnx_weight_operand_type, L_0231, is_per_tensor_u8s8));
} else { // depthwise_conv_2d
ORT_RETURN_IF_ERROR(AddInitializerInNewLayout(model_builder, weight, onnx_weight_operand_type, L_1230));
ORT_RETURN_IF_ERROR(AddInitializerInNewLayout(model_builder, weight, onnx_weight_operand_type, L_1230, is_per_tensor_u8s8));
}
if (is_qlinear_conv) {
@ -1697,10 +1746,14 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
b_zero_point = 0,
y_zero_point = 0;
bool is_per_tensor_u8s8 = false;
if (is_qlinear_matmul) {
ORT_RETURN_IF_ERROR(GetBinaryOpQuantizationScaleAndZeroPoint(model_builder, node,
a_scale, b_scale, y_scale,
a_zero_point, b_zero_point, y_zero_point));
optional<vector<float>> w_scales;
ORT_RETURN_IF_ERROR(
GetConvMatMulOpQuantizationScaleAndZeroPoint(model_builder, node,
a_scale, b_scale, y_scale,
a_zero_point, b_zero_point, y_zero_point,
w_scales, is_per_tensor_u8s8));
}
uint32_t input_2_idx;
@ -1717,7 +1770,7 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
onnx_mat_b_shape.push_back(SafeInt<uint32_t>(dim));
const OperandType onnx_mat_b_operand_type(onnx_mat_b_type, onnx_mat_b_shape, b_scale, b_zero_point);
ORT_RETURN_IF_ERROR(AddInitializerTransposed(model_builder, onnx_mat_b_operand_type, input2));
ORT_RETURN_IF_ERROR(AddInitializerTransposed(model_builder, onnx_mat_b_operand_type, input2, is_per_tensor_u8s8));
}
input_2_idx = operand_indices.at(input2);

View file

@ -58,15 +58,15 @@ TEST(QuantizeLinearMatmulOpTest, QLinearMatMul3D_U8S8) {
test.AddInput<uint8_t>("a_zero_point", {}, {113});
test.AddInput<int8_t>("T2", {2, 4, 3},
{-43, 51, -34,
60, 26, -17,
0, 63, -55,
47, -29, -31,
{-43, 51, -34,
60, 26, -17,
0, 63, -55,
47, -29, -31,
-62, 51, -42,
60, 26, -22,
0, -8, -19,
37, -2, -47});
-62, 51, -42,
60, 26, -22,
0, -8, -19,
37, -2, -47});
test.AddInput<float>("b_scale", {}, {0.00802f});
test.AddInput<int8_t>("b_zero_point", {}, {-2});
@ -83,6 +83,76 @@ TEST(QuantizeLinearMatmulOpTest, QLinearMatMul3D_U8S8) {
test.Run();
}
TEST(QuantizeLinearMatmulOpTest, QLinearMatMul2D_U8U8) {
auto run_test = [](bool only_t1_not_initializer) {
OpTester test("QLinearMatMul", 10);
test.AddInput<uint8_t>("T1", {2, 4},
{208, 236, 0, 238,
3, 214, 255, 29});
test.AddInput<float>("a_scale", {}, {0.0066f}, only_t1_not_initializer);
test.AddInput<uint8_t>("a_zero_point", {}, {113}, only_t1_not_initializer);
test.AddInput<uint8_t>("T2", {4, 3},
{152, 51, 244,
60, 26, 255,
0, 127, 246,
127, 254, 247},
only_t1_not_initializer);
test.AddInput<float>("b_scale", {}, {0.00705f}, only_t1_not_initializer);
test.AddInput<uint8_t>("b_zero_point", {}, {114}, only_t1_not_initializer);
test.AddInput<float>("y_scale", {}, {0.0107f}, only_t1_not_initializer);
test.AddInput<uint8_t>("y_zero_point", {}, {118}, only_t1_not_initializer);
test.AddOutput<uint8_t>("T3", {2, 3},
{168, 115, 255,
1, 66, 151});
test.Run();
};
run_test(false);
// NNAPI will require all inputs except T1 to be initializers
run_test(true);
}
TEST(QuantizeLinearMatmulOpTest, QLinearMatMul2D_U8S8) {
auto run_test = [](bool only_t1_not_initializer) {
OpTester test("QLinearMatMul", 10);
test.AddInput<uint8_t>("T1", {2, 4},
{208, 126, 0, 238,
3, 214, 255, 29});
test.AddInput<float>("a_scale", {}, {0.0066f}, only_t1_not_initializer);
test.AddInput<uint8_t>("a_zero_point", {}, {113}, only_t1_not_initializer);
test.AddInput<int8_t>("T2", {4, 3},
{-43, 51, -34,
60, 26, -17,
0, 63, -55,
47, -29, -31},
only_t1_not_initializer);
test.AddInput<float>("b_scale", {}, {0.00802f}, only_t1_not_initializer);
test.AddInput<int8_t>("b_zero_point", {}, {0}, only_t1_not_initializer);
test.AddInput<float>("y_scale", {}, {0.0123f}, only_t1_not_initializer);
test.AddInput<uint8_t>("y_zero_point", {}, {118}, only_t1_not_initializer);
test.AddOutput<uint8_t>("T3", {2, 3},
{129, 94, 113,
147, 154, 104});
test.Run();
};
run_test(false);
// NNAPI will require all inputs except T1 to be initializers
run_test(true);
}
static void QLinearMatMul2DTest(bool only_t1_not_initializer) {
// Test non-empty inputs
OpTester test_non_empty("QLinearMatMul", 10);