[Android NNAPI EP] Add QLinearAdd op Support, move some throw with return status (#4607)

* remove dependency of external jd-dnnlibrary

* add qlinearadd support

* combine some qlinear ops logics, move some throw into return status

* merge master

* minor bug fixes

* addressed comments
This commit is contained in:
gwang-msft 2020-07-30 11:45:11 -07:00 committed by GitHub
parent 51332e3c81
commit 282975aefb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 391 additions and 193 deletions

View file

@ -9,6 +9,9 @@
#include "helper.h"
namespace onnxruntime {
namespace nnapi {
using std::string;
using std::vector;
@ -40,6 +43,28 @@ std::string GetErrorCause(int error_code) {
}
}
QLinearOpType GetQLinearOpType(const onnxruntime::Node& node) {
const auto& op_type = node.OpType();
if (op_type == "DequantizeLinear")
return QLinearOpType::DequantizeLinear;
else if (op_type == "QuantizeLinear")
return QLinearOpType::QuantizeLinear;
else if (op_type == "QLinearConv")
return QLinearOpType::QLinearConv;
else if (op_type == "QLinearMatMul")
return QLinearOpType::QLinearMatMul;
else if (op_type == "QLinearAdd")
return QLinearOpType::QLinearAdd;
return QLinearOpType::Unknown;
}
bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type) {
return qlinear_op_type == QLinearOpType::QLinearConv ||
qlinear_op_type == QLinearOpType::QLinearMatMul ||
qlinear_op_type == QLinearOpType::QLinearAdd;
}
NodeAttrHelper::NodeAttrHelper(const onnxruntime::Node& node)
: node_attributes_(node.GetAttributes()) {}
@ -97,3 +122,6 @@ vector<float> NodeAttrHelper::Get(const std::string& key, const vector<float>& d
bool NodeAttrHelper::HasAttr(const std::string& key) const {
return Contains(node_attributes_, key);
}
} // namespace nnapi
} // namespace onnxruntime

View file

@ -8,6 +8,9 @@
#include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h"
namespace onnxruntime {
namespace nnapi {
#define THROW_ON_ERROR(val) \
{ \
const auto ret = (val); \
@ -36,12 +39,31 @@ inline bool Contains(const Map& map, const Key& key) {
std::string GetErrorCause(int error_code);
enum class QLinearOpType : uint8_t {
Unknown, // Unknown or not a linear quantized op
DequantizeLinear,
QuantizeLinear,
QLinearConv,
QLinearMatMul,
QLinearAdd,
// Not yet supported
// QLinearAveragePool,
// QLinearMul,
// QLinearReduceMean,
};
QLinearOpType GetQLinearOpType(const onnxruntime::Node& node);
// This qlinear op is an operator takes 2 input and producce 1 output
// Such as QLinearConv, QLinearMatMul, QLinearAdd, ...
bool IsQLinearBinaryOp(QLinearOpType qlinear_op_type);
/**
* Wrapping onnxruntime::Node for retrieving attribute values
*/
class NodeAttrHelper {
public:
NodeAttrHelper(const onnxruntime::Node& proto);
NodeAttrHelper(const onnxruntime::Node& node);
float Get(const std::string& key, float def_val) const;
int32_t Get(const std::string& key, int32_t def_val) const;
@ -54,3 +76,6 @@ class NodeAttrHelper {
private:
const onnxruntime::NodeAttributes& node_attributes_;
};
} // namespace nnapi
} // namespace onnxruntime

View file

@ -192,8 +192,8 @@ std::unordered_map<std::string, vector<const Node*>> GetAllQuantizedOpInputs(con
const auto& node_indices = graph_view.GetNodesInTopologicalOrder();
for (const auto& node_idx : node_indices) {
const auto* node(graph_view.GetNode(node_idx));
const auto& op_type = node->OpType();
if (op_type == "DequantizeLinear" || op_type == "QLinearMatMul" || op_type == "QLinearConv") {
auto qlinear_op_type = GetQLinearOpType(*node);
if (qlinear_op_type == QLinearOpType::DequantizeLinear || IsQLinearBinaryOp(qlinear_op_type)) {
const auto& input_name = node->InputDefs()[0]->Name();
if (Contains(all_quantized_op_inputs, input_name))
all_quantized_op_inputs.at(input_name).push_back(node);
@ -201,7 +201,7 @@ std::unordered_map<std::string, vector<const Node*>> GetAllQuantizedOpInputs(con
all_quantized_op_inputs.emplace(input_name, vector<const Node*>{node});
}
if (op_type == "QLinearMatMul" || op_type == "QLinearConv") {
if (IsQLinearBinaryOp(qlinear_op_type)) {
const auto& input_name = node->InputDefs()[3]->Name();
if (Contains(all_quantized_op_inputs, input_name))
all_quantized_op_inputs.at(input_name).push_back(node);
@ -328,8 +328,8 @@ void ModelBuilder::RegisterModelInputs() {
}
// TODO, verify the scale and zero point match if there are multiple op using same input
std::tie(scale, zero_point) =
GetQuantizedInputScaleAndZeroPoint(*this, *all_quantized_op_inputs.at(input_name)[0], input_name);
ORT_THROW_IF_ERROR(GetQuantizedInputScaleAndZeroPoint(
*this, *all_quantized_op_inputs.at(input_name)[0], input_name, scale, zero_point));
break;
}
default:

View file

@ -155,7 +155,9 @@ static void AddBinaryOperator(int32_t op_type,
const std::string& input2,
int32_t fuse_code,
const std::string& output,
bool output_is_nhwc) {
bool output_is_nhwc,
float output_scale = 0.0f,
int32_t output_zero_point = 0) {
auto& shaper(model_builder.GetShaper());
const auto& operand_indices(model_builder.GetOperandIndices());
const auto& operand_types(model_builder.GetOperandTypes());
@ -165,7 +167,7 @@ static void AddBinaryOperator(int32_t op_type,
input_indices.push_back(operand_indices.at(input2)); // input 2
input_indices.push_back(model_builder.AddOperandFromScalar(fuse_code));
shaper.Eltwise(input1, input2, output);
const OperandType output_operand_type(operand_types.at(input1).type, shaper[output]);
const OperandType output_operand_type(operand_types.at(input1).type, shaper[output], output_scale, output_zero_point);
model_builder.AddOperation(op_type, input_indices, {output}, {output_operand_type}, {output_is_nhwc});
}
@ -441,39 +443,101 @@ static float GetQuantizationScale(const ModelBuilder& model_builder, const Node&
return GetTensorFloatData(scale_tensor)[0];
}
static int32_t GetQuantizationZeroPoint(const ModelBuilder& model_builder, const Node& node, size_t idx) {
static Status GetQuantizationZeroPoint(const ModelBuilder& model_builder, const Node& node, size_t idx, int32_t& zero_point) {
std::unique_ptr<uint8_t[]> unpacked_tensor;
size_t tensor_byte_size;
const auto& zero_point_tensor = model_builder.GetInitializerTensors().at(node.InputDefs()[idx]->Name());
ORT_THROW_IF_ERROR(
ORT_RETURN_IF_ERROR(
UnpackInitializerTensor(zero_point_tensor, unpacked_tensor, tensor_byte_size));
return static_cast<int32_t>(unpacked_tensor.get()[0]);
zero_point = static_cast<int32_t>(unpacked_tensor.get()[0]);
return Status::OK();
}
static void VerifyValidInputQuantizedType(const std::string& input_name,
const OperandType& input_operand_type,
float scale, int32_t zero_point) {
ORT_ENFORCE(input_operand_type.operandType.scale == scale,
"Input [" + input_name + "] NNAPI input: " + " scale: " +
std::to_string(input_operand_type.operandType.scale) +
", ONNX input scale: " + std::to_string(scale));
// Get scales and zero points for the qlinear binary ops (which has 2 input and 1 output)
// QLinearConv, QLinearMatmul, QLinearAdd
// a, b are inputs, and y is output
static Status GetBinaryOpQuantizationScaleAndZeroPoint(const ModelBuilder& model_builder, const Node& node,
float& a_scale, float& b_scale, float& y_scale,
int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) {
a_scale = GetQuantizationScale(model_builder, node, 1);
b_scale = GetQuantizationScale(model_builder, node, 4);
y_scale = GetQuantizationScale(model_builder, node, 6);
ORT_ENFORCE(input_operand_type.operandType.zeroPoint == zero_point,
"Input [" + input_name + "] NNNAPI input zero point: " +
std::to_string(input_operand_type.operandType.zeroPoint) +
", ONNX input zero point: " + std::to_string(zero_point));
ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, 2, a_zero_point));
ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, 5, b_zero_point));
ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, 7, y_zero_point));
return Status::OK();
}
std::pair<float, int32_t> GetQuantizedInputScaleAndZeroPoint(const ModelBuilder& model_builder,
const Node& node,
const std::string& input_name) {
// NNAPI has the qunatization scale and zero point embedded in the ANeuralNetworksOperandType
// ONNX has the qunatization scale and zero point as the inputs of the qlinear operators
// We want to verify the scale and zeropoint of the ONNX inputs matches the values embedded in the NNAPI inputs
static Status IsValidInputQuantizedType(const ModelBuilder& model_builder,
const std::string& input_name,
float scale,
int32_t zero_point) {
const OperandType& input_operand_type = model_builder.GetOperandTypes().at(input_name);
if (input_operand_type.operandType.scale != scale) {
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
"Input [" + input_name + "] NNAPI input scale: " +
std::to_string(input_operand_type.operandType.scale) +
", ONNX input scale: " + std::to_string(scale));
}
if (input_operand_type.operandType.zeroPoint != zero_point) {
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
"Input [" + input_name + "] NNNAPI input zero point: " +
std::to_string(input_operand_type.operandType.zeroPoint) +
", ONNX input zero point: " + std::to_string(zero_point));
}
return Status::OK();
}
static void AddBinaryOpQuantizationScaleAndZeroPointToSkip(ModelBuilder& model_builder, const Node& node) {
const auto input_defs(node.InputDefs());
model_builder.AddInitializerToSkip(input_defs[1]->Name()); // a_scale
model_builder.AddInitializerToSkip(input_defs[2]->Name()); // a_zero_point
model_builder.AddInitializerToSkip(input_defs[4]->Name()); // b_scale
model_builder.AddInitializerToSkip(input_defs[5]->Name()); // b_zero_point
model_builder.AddInitializerToSkip(input_defs[6]->Name()); // y_scale
model_builder.AddInitializerToSkip(input_defs[7]->Name()); // y_zero_point
}
static bool IsBinaryOpQuantizedInputsSupported(const Node& node) {
int32_t a_input_type, b_input_type;
if (!GetType(*node.InputDefs()[0], a_input_type))
return false;
if (!GetType(*node.InputDefs()[3], b_input_type))
return false;
if (a_input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8 || a_input_type != b_input_type) {
LOGS_DEFAULT(VERBOSE) << "[" << node.OpType()
<< "] A Input type: [" << a_input_type
<< "] B Input type: [" << b_input_type
<< "] is not supported for now";
return false;
}
return true;
}
Status GetQuantizedInputScaleAndZeroPoint(const ModelBuilder& model_builder,
const Node& node,
const std::string& input_name,
float& scale,
int32_t& zero_point) {
const auto& op_type = node.OpType();
assert(op_type == "QLinearMatMul" || op_type == "QLinearConv" || op_type == "DequantizeLinear");
auto qlinear_op_type = GetQLinearOpType(node);
assert(qlinear_op_type != QLinearOpType::Unknown &&
qlinear_op_type != QLinearOpType::QuantizeLinear);
size_t scale_idx, zero_point_idx;
if (op_type == "DequantizeLinear") {
if (qlinear_op_type == QLinearOpType::DequantizeLinear) {
scale_idx = 1;
zero_point_idx = 2;
} else if (op_type == "QLinearMatMul" || op_type == "QLinearConv") {
} else if (IsQLinearBinaryOp(qlinear_op_type)) {
const auto input_defs(node.InputDefs());
if (input_name == input_defs[0]->Name()) {
scale_idx = 1;
@ -482,19 +546,20 @@ std::pair<float, int32_t> GetQuantizedInputScaleAndZeroPoint(const ModelBuilder&
scale_idx = 4;
zero_point_idx = 5;
} else {
ORT_THROW("Unknown input: " + input_name + ", for op: " + op_type);
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
"Unknown input: " + input_name + ", for op: " + op_type);
}
} else {
ORT_THROW("Unsupported op: " + op_type);
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Unsupported op: " + op_type);
}
float scale = GetQuantizationScale(model_builder, node, scale_idx);
int32_t zero_point = 0;
scale = GetQuantizationScale(model_builder, node, scale_idx);
zero_point = 0;
if (node.InputDefs().size() > 2) {
zero_point = GetQuantizationZeroPoint(model_builder, node, zero_point_idx);
ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, zero_point_idx, zero_point));
}
return std::make_pair(scale, zero_point);
return Status::OK();
}
#pragma endregion helpers
@ -599,14 +664,23 @@ bool BaseOpBuilder::HasExternalInitializer(ModelBuilder& model_builder, const No
#pragma region op_binary
class BinaryOpBuilder : public BaseOpBuilder {
private:
int32_t GetMinSupportedSdkVer(ModelBuilder& model_builder, const Node& node) const override;
public:
void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) override;
private:
bool IsOpSupportedImpl(ModelBuilder& /* model_builder */, const Node& node) override;
int32_t GetMinSupportedSdkVer(ModelBuilder& model_builder, const Node& node) const override;
bool IsOpSupportedImpl(ModelBuilder& model_builder, const Node& node) override;
bool HasSupportedInputs(const Node& node) override;
void AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) override;
};
void BinaryOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) {
const auto& op = node.OpType();
if (op == "QLinearAdd") {
AddBinaryOpQuantizationScaleAndZeroPointToSkip(model_builder, node);
}
}
int32_t BinaryOpBuilder::GetMinSupportedSdkVer(ModelBuilder& /* model_builder */, const Node& node) const {
const auto& op(node.OpType());
if (op == "Sub" || op == "Div") {
@ -616,10 +690,28 @@ int32_t BinaryOpBuilder::GetMinSupportedSdkVer(ModelBuilder& /* model_builder */
return 27;
}
bool BinaryOpBuilder::IsOpSupportedImpl(ModelBuilder& /* model_builder */, const Node& node) {
bool BinaryOpBuilder::HasSupportedInputs(const Node& node) {
if (node.OpType() != "QLinearAdd")
return BaseOpBuilder::HasSupportedInputs(node);
// QLinearAdd
if (!IsBinaryOpQuantizedInputsSupported(node))
return false;
return true;
}
bool BinaryOpBuilder::IsOpSupportedImpl(ModelBuilder& model_builder, const Node& node) {
const auto& op_type(node.OpType());
const auto input_defs(node.InputDefs());
bool op_is_qlinear = op_type == "QLinearAdd";
size_t a_idx = 0, b_idx = 1;
if (op_is_qlinear) {
b_idx = 3;
}
Shape input1_shape, input2_shape;
if (!GetShape(*node.InputDefs()[0], input1_shape) ||
!GetShape(*node.InputDefs()[1], input2_shape))
if (!GetShape(*input_defs[a_idx], input1_shape) ||
!GetShape(*input_defs[b_idx], input2_shape))
return false;
const auto input1_size = input1_shape.size();
@ -631,25 +723,57 @@ bool BinaryOpBuilder::IsOpSupportedImpl(ModelBuilder& /* model_builder */, const
return false;
}
if (op_is_qlinear) {
// For QLinearAdd, we only support uint8 output now
int32_t output_type;
if (!GetType(*node.OutputDefs()[0], output_type))
return false;
if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
LOGS_DEFAULT(VERBOSE) << "[" << op_type
<< "] output type: [" << output_type
<< "] is not supported for now";
return false;
}
// All scale/zero points are initializer scalars
// a/b/y_scale
if (!IsQuantizationScaleSupported(model_builder, node, {1, 4, 6}))
return false;
// a/b/y_zero_point
if (!IsQuantizationZeroPointSupported(model_builder, node, {2, 5, 7}))
return false;
}
return true;
}
void BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) {
const auto& op(node.OpType());
const auto& op_type(node.OpType());
const auto input_defs(node.InputDefs());
int32_t op_code;
if (op == "Add")
bool op_is_qlinear = op_type == "QLinearAdd";
if (op_type == "Add" || op_is_qlinear)
op_code = ANEURALNETWORKS_ADD;
else if (op == "Sub")
else if (op_type == "Sub")
op_code = ANEURALNETWORKS_SUB;
else if (op == "Mul")
else if (op_type == "Mul")
op_code = ANEURALNETWORKS_MUL;
else if (op == "Div")
else if (op_type == "Div")
op_code = ANEURALNETWORKS_DIV;
else {
ORT_THROW("UnaryOpBuilder, unknown op: " + op);
ORT_THROW("UnaryOpBuilder, unknown op: " + op_type);
}
std::string input1 = node.InputDefs()[0]->Name();
std::string input2 = node.InputDefs()[1]->Name();
size_t a_idx = 0, b_idx = 1;
if (op_is_qlinear) {
b_idx = 3;
}
std::string input1 = input_defs[a_idx]->Name();
std::string input2 = input_defs[b_idx]->Name();
const auto& output = node.OutputDefs()[0]->Name();
bool input1_is_nhwc = model_builder.IsOperandNHWC(input1);
@ -660,22 +784,42 @@ void BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
output_is_nhwc = input1_is_nhwc;
} else if (input1_is_nhwc) {
// need transpsoe input1 back to nchw
const auto& nhwc_input = node.InputDefs()[0]->Name();
const auto& nhwc_input = input_defs[a_idx]->Name();
if (!model_builder.GetNCHWOperand(nhwc_input, input1)) {
input1 = model_builder.GetUniqueName(nhwc_input + "_nhwc_to_nchw");
TransposeNHWCToNCHW(model_builder, nhwc_input, input1);
}
} else { // input2_is_nhwc
// need transpsoe input2 back to nchw
const auto& nhwc_input = node.InputDefs()[1]->Name();
const auto& nhwc_input = input_defs[b_idx]->Name();
if (!model_builder.GetNCHWOperand(nhwc_input, input2)) {
input2 = model_builder.GetUniqueName(nhwc_input + "_nhwc_to_nchw");
TransposeNHWCToNCHW(model_builder, nhwc_input, input2);
}
}
float a_scale = 0.0f,
b_scale = 0.0f,
y_scale = 0.0f;
int32_t a_zero_point = 0,
b_zero_point = 0,
y_zero_point = 0;
if (op_is_qlinear) {
ORT_THROW_IF_ERROR(
GetBinaryOpQuantizationScaleAndZeroPoint(model_builder, node,
a_scale, b_scale, y_scale,
a_zero_point, b_zero_point, y_zero_point));
}
// Verify if the scale and zero point matchs from onnx input and nnapi input
if (op_is_qlinear) {
ORT_THROW_IF_ERROR(IsValidInputQuantizedType(model_builder, input1, a_scale, a_zero_point));
ORT_THROW_IF_ERROR(IsValidInputQuantizedType(model_builder, input2, b_scale, b_zero_point));
}
int32_t fuse_code = model_builder.FindActivation(node, *node.OutputDefs()[0]);
AddBinaryOperator(op_code, model_builder, input1, input2, fuse_code, output, output_is_nhwc);
AddBinaryOperator(op_code, model_builder, input1, input2, fuse_code, output, output_is_nhwc, y_scale, y_zero_point);
}
#pragma endregion
@ -1181,21 +1325,9 @@ bool ConvOpBuilder::HasSupportedInputs(const Node& node) {
return BaseOpBuilder::HasSupportedInputs(node);
// QLinearConv only supports input of uint8 for now
int32_t x_input_type, w_input_type;
if (!GetType(*node.InputDefs()[0], x_input_type))
if (!IsBinaryOpQuantizedInputsSupported(node))
return false;
if (!GetType(*node.InputDefs()[3], w_input_type))
return false;
if (x_input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8 || x_input_type != w_input_type) {
LOGS_DEFAULT(VERBOSE) << "[" << node.OpType()
<< "] x Input type: [" << x_input_type
<< "] w Input type: [" << w_input_type
<< "] is not supported for now";
return false;
}
return true;
}
@ -1205,13 +1337,8 @@ void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod
// skip the weight for conv as we need to transpose
if (op == "QLinearConv") {
model_builder.AddInitializerToSkip(input_defs[1]->Name()); // a_scale
model_builder.AddInitializerToSkip(input_defs[2]->Name()); // x_zero_point
AddBinaryOpQuantizationScaleAndZeroPointToSkip(model_builder, node);
model_builder.AddInitializerToSkip(input_defs[3]->Name()); // w
model_builder.AddInitializerToSkip(input_defs[4]->Name()); // w_scale
model_builder.AddInitializerToSkip(input_defs[5]->Name()); // w_zero_point
model_builder.AddInitializerToSkip(input_defs[6]->Name()); // y_scale
model_builder.AddInitializerToSkip(input_defs[7]->Name()); // y_zero_point
if (input_defs.size() > 8)
model_builder.AddInitializerToSkip(input_defs[8]->Name()); // B
} else {
@ -1337,17 +1464,13 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
y_zero_point = 0;
if (is_qlinear_conv) {
x_scale = GetQuantizationScale(model_builder, node, 1);
w_scale = GetQuantizationScale(model_builder, node, 4);
y_scale = GetQuantizationScale(model_builder, node, 6);
x_zero_point = GetQuantizationZeroPoint(model_builder, node, 2);
w_zero_point = GetQuantizationZeroPoint(model_builder, node, 5);
y_zero_point = GetQuantizationZeroPoint(model_builder, node, 7);
ORT_THROW_IF_ERROR(
GetBinaryOpQuantizationScaleAndZeroPoint(model_builder, node,
x_scale, w_scale, y_scale,
x_zero_point, w_zero_point, y_zero_point));
}
const auto& weight = input_defs[w_idx]->Name();
const auto& weight_tensor = initializers.at(weight);
bool conv_2d = false,
depthwise_conv_2d = false,
@ -1394,15 +1517,8 @@ void ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
if (is_qlinear_conv) {
// Verify if the scale and zero point matchs from onnx input/weight and nnapi input/weight
const OperandType& x_operand_type = operand_types.at(input);
ORT_ENFORCE(x_operand_type.type == Type::TENSOR_QUANT8_ASYMM,
"input type is " + TypeToStr(x_operand_type.type));
VerifyValidInputQuantizedType(input, x_operand_type, x_scale, x_zero_point);
const OperandType& w_operand_type = operand_types.at(weight);
ORT_ENFORCE(w_operand_type.type == Type::TENSOR_QUANT8_ASYMM,
"input type is " + TypeToStr(w_operand_type.type));
VerifyValidInputQuantizedType(weight, w_operand_type, w_scale, w_zero_point);
ORT_THROW_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point));
ORT_THROW_IF_ERROR(IsValidInputQuantizedType(model_builder, weight, w_scale, w_zero_point));
}
bool hasBias = (input_defs.size() > b_idx);
@ -1708,19 +1824,8 @@ bool GemmOpBuilder::HasSupportedInputs(const Node& node) {
return BaseOpBuilder::HasSupportedInputs(node);
// QLinearMatMul
int32_t a_input_type, b_input_type;
if (!GetType(*node.InputDefs()[0], a_input_type))
if (!IsBinaryOpQuantizedInputsSupported(node))
return false;
if (!GetType(*node.InputDefs()[3], b_input_type))
return false;
if (a_input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8 || a_input_type != b_input_type) {
LOGS_DEFAULT(VERBOSE) << "[" << node.OpType()
<< "] A Input type: [" << a_input_type
<< "] B Input type: [" << b_input_type
<< "] is not supported for now";
return false;
}
return true;
}
@ -1840,13 +1945,8 @@ void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod
if (transB == 0)
model_builder.AddInitializerToSkip(input_defs[1]->Name());
} else if (op == "QLinearMatMul") {
model_builder.AddInitializerToSkip(input_defs[1]->Name()); // a_scale
model_builder.AddInitializerToSkip(input_defs[2]->Name()); // a_zero_point
AddBinaryOpQuantizationScaleAndZeroPointToSkip(model_builder, node);
model_builder.AddInitializerToSkip(input_defs[3]->Name()); // b
model_builder.AddInitializerToSkip(input_defs[4]->Name()); // b_scale
model_builder.AddInitializerToSkip(input_defs[5]->Name()); // b_zero_point
model_builder.AddInitializerToSkip(input_defs[6]->Name()); // y_scale
model_builder.AddInitializerToSkip(input_defs[7]->Name()); // y_zero_point
}
}
@ -1878,13 +1978,10 @@ void GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
y_zero_point = 0;
if (is_qlinear_matmul) {
a_scale = GetQuantizationScale(model_builder, node, 1);
b_scale = GetQuantizationScale(model_builder, node, 4);
y_scale = GetQuantizationScale(model_builder, node, 6);
a_zero_point = GetQuantizationZeroPoint(model_builder, node, 2);
b_zero_point = GetQuantizationZeroPoint(model_builder, node, 5);
y_zero_point = GetQuantizationZeroPoint(model_builder, node, 7);
ORT_THROW_IF_ERROR(
GetBinaryOpQuantizationScaleAndZeroPoint(model_builder, node,
a_scale, b_scale, y_scale,
a_zero_point, b_zero_point, y_zero_point));
}
uint32_t input_2_idx;
@ -1908,15 +2005,8 @@ void GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Nod
// Verify if the scale and zero point matchs from onnx input and nnapi input
if (is_qlinear_matmul) {
const OperandType& a_operand_type = operand_types.at(input1);
ORT_ENFORCE(a_operand_type.type == Type::TENSOR_QUANT8_ASYMM,
"input type is " + TypeToStr(a_operand_type.type));
VerifyValidInputQuantizedType(input1, a_operand_type, a_scale, a_zero_point);
const OperandType& b_operand_type = operand_types.at(input2);
ORT_ENFORCE(b_operand_type.type == Type::TENSOR_QUANT8_ASYMM,
"input type is " + TypeToStr(b_operand_type.type));
VerifyValidInputQuantizedType(input2, b_operand_type, b_scale, b_zero_point);
ORT_THROW_IF_ERROR(IsValidInputQuantizedType(model_builder, input1, a_scale, a_zero_point));
ORT_THROW_IF_ERROR(IsValidInputQuantizedType(model_builder, input2, b_scale, b_zero_point));
}
uint32_t bias_idx;
@ -2260,7 +2350,7 @@ void QuantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
Type output_type = Type::TENSOR_QUANT8_ASYMM;
if (input_defs.size() == 3) { // Get zero point
zero_point = GetQuantizationZeroPoint(model_builder, node, 2);
ORT_THROW_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, 2, zero_point));
}
LOGS_DEFAULT(VERBOSE) << "scale: " << scale << " zp: " << zero_point;
@ -2332,7 +2422,6 @@ bool DequantizeLinearOpBuilder::IsOpSupportedImpl(ModelBuilder& model_builder, c
void DequantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) {
auto& shaper(model_builder.GetShaper());
const auto& operand_indices(model_builder.GetOperandIndices());
const auto& operand_types(model_builder.GetOperandTypes());
const auto input_defs(node.InputDefs());
const auto& input = input_defs[0]->Name();
@ -2342,14 +2431,10 @@ void DequantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builde
float scale = GetQuantizationScale(model_builder, node, 1);
int32_t zero_point = 0;
if (input_defs.size() == 3) { // Get zero point
zero_point = GetQuantizationZeroPoint(model_builder, node, 2);
ORT_THROW_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, 2, zero_point));
}
const OperandType& input_operand_type = operand_types.at(input);
ORT_ENFORCE(input_operand_type.type == Type::TENSOR_QUANT8_ASYMM,
"input type is " + TypeToStr(input_operand_type.type));
VerifyValidInputQuantizedType(input, input_operand_type, scale, zero_point);
ORT_THROW_IF_ERROR(IsValidInputQuantizedType(model_builder, input, scale, zero_point));
shaper.Identity(input, output);
const OperandType output_operand_type(Type::TENSOR_FLOAT32, shaper[output]);
@ -2455,6 +2540,7 @@ CreateOpBuilders() {
op_map.emplace("Sub", binary_op_builder);
op_map.emplace("Mul", binary_op_builder);
op_map.emplace("Div", binary_op_builder);
op_map.emplace("QLinearAdd", binary_op_builder);
}
op_map.emplace("Relu", std::make_shared<ReluOpBuilder>());

View file

@ -31,8 +31,9 @@ std::unordered_map<std::string, std::shared_ptr<IOpBuilder>> CreateOpBuilders();
void TransposeNHWCToNCHW(ModelBuilder& model_builder, const std::string& input, const std::string& output);
// Get the quantized input's scale and zero point for the given input
std::pair<float, int32_t> GetQuantizedInputScaleAndZeroPoint(const ModelBuilder& model_builder,
const Node& node, const std::string& input_name);
Status GetQuantizedInputScaleAndZeroPoint(const ModelBuilder& model_builder,
const Node& node, const std::string& input_name,
float& scale, int32_t& zero_point);
} // namespace nnapi
} // namespace onnxruntime

View file

@ -8,16 +8,14 @@
namespace onnxruntime {
namespace test {
static std::vector<int64_t> PrefixingDims(const std::vector<int64_t>& dims, size_t number_dims)
{
static std::vector<int64_t> PrefixingDims(const std::vector<int64_t>& dims, size_t number_dims) {
std::vector<int64_t> prefixed_dims;
if (number_dims > dims.size()) prefixed_dims.resize(number_dims - dims.size(), 1);
prefixed_dims.insert(prefixed_dims.end(), dims.begin(), dims.end());
return prefixed_dims;
}
static int64_t CalcStrides(const std::vector<int64_t>& dims, std::vector<int64_t>& strides, bool clear1 = false)
{
static int64_t CalcStrides(const std::vector<int64_t>& dims, std::vector<int64_t>& strides, bool clear1 = false) {
strides.clear();
strides.resize(dims.size(), 1);
for (int i = (int)dims.size() - 2; i >= 0; --i) {
@ -38,13 +36,12 @@ static T clampi(int a, int min_value, int max_value) {
}
template <typename T>
void
RunQLinearMathTestFromFloat(
void RunQLinearMathTestFromFloat(
const char* op_name, std::function<float(float, float)> calc,
const std::vector<float>& a, const std::vector<int64_t>& a_shape_origin, float A_scale, T A_zero_point,
const std::vector<float>& b, const std::vector<int64_t>& b_shape_origin, float B_scale, T B_zero_point,
float C_scale, T C_zero_point)
{
float C_scale, T C_zero_point,
bool all_initializer_scale_zero_point = false) {
size_t number_dims = std::max(a_shape_origin.size(), b_shape_origin.size());
std::vector<int64_t> a_shape = PrefixingDims(a_shape_origin, number_dims);
std::vector<int64_t> b_shape = PrefixingDims(b_shape_origin, number_dims);
@ -61,7 +58,7 @@ RunQLinearMathTestFromFloat(
auto c_size = CalcStrides(c_shape, c_strides, false);
auto a_size = CalcStrides(a_shape, a_strides, true);
auto b_size = CalcStrides(b_shape, b_strides, true);
if (a_size != static_cast<int64_t>(a.size()) || b_size != static_cast<int64_t>(b.size())){
if (a_size != static_cast<int64_t>(a.size()) || b_size != static_cast<int64_t>(b.size())) {
throw std::runtime_error("Input size not match input shape!");
}
constexpr int qmax = std::numeric_limits<T>::max();
@ -73,19 +70,19 @@ RunQLinearMathTestFromFloat(
a_quantized[i] = clampi<T>(static_cast<int>(std::nearbyintf(a[i] / A_scale)) + A_zero_point, qmin, qmax);
}
test.template AddInput<T>("A", a_shape_origin, a_quantized);
test.AddInput<float>("A_scale", {}, {A_scale});
test.template AddInput<T>("A_zero_point", {}, {A_zero_point});
test.AddInput<float>("A_scale", {}, {A_scale}, all_initializer_scale_zero_point);
test.template AddInput<T>("A_zero_point", {}, {A_zero_point}, all_initializer_scale_zero_point);
std::vector<T> b_quantized(b.size());
for (size_t i = 0, sz = b.size(); i < sz; ++i) {
b_quantized[i] = clampi<T>(static_cast<int>(std::nearbyintf(b[i] / B_scale)) + B_zero_point, qmin, qmax);
}
test.template AddInput<T>("B", b_shape_origin, b_quantized);
test.AddInput<float>("B_scale", {}, {B_scale});
test.template AddInput<T>("B_zero_point", {}, {B_zero_point});
test.AddInput<float>("B_scale", {}, {B_scale}, all_initializer_scale_zero_point);
test.template AddInput<T>("B_zero_point", {}, {B_zero_point}, all_initializer_scale_zero_point);
test.AddInput<float>("C_scale", {}, {C_scale});
test.template AddInput<T>("C_zero_point", {}, {C_zero_point});
test.AddInput<float>("C_scale", {}, {C_scale}, all_initializer_scale_zero_point);
test.template AddInput<T>("C_zero_point", {}, {C_zero_point}, all_initializer_scale_zero_point);
std::vector<T> c(c_size);
for (int64_t offset = 0; offset < c_size; ++offset) {
int64_t remain = offset, a_offset = 0, b_offset = 0;
@ -107,27 +104,25 @@ RunQLinearMathTestFromFloat(
// total 32 + 31 elements to cover all path
// for add() usage tensor A
static std::vector<float> A4Add = {
0.00f, 0.25f, 0.50f, 0.75f, 1.00f, 1.25f, 1.50f, 1.75f,
2.00f, 2.25f, 2.50f, 2.75f, 3.00f, 3.50f, 3.75f, 4.00f,
-0.00f, -0.25f, -0.50f, -0.75f, -1.00f, -1.25f, -1.50f, -1.75f,
-2.00f, -2.25f, -2.50f, -2.75f, -3.00f, -4.00f, -3.75f, -3.50f,
0.00f, 0.25f, 0.50f, 0.75f, 1.00f, 1.25f, 1.50f, 1.75f,
2.00f, 2.25f, 2.50f, 2.75f, 3.00f, 3.75f, 4.25f, 4.50f,
-0.00f, -0.25f, -0.50f, -0.75f, -1.00f, -1.25f, -1.50f, -1.75f,
-2.00f, -2.25f, -2.50f, -2.75f, -3.00f, 3.75f, 3.00f
};
0.00f, 0.25f, 0.50f, 0.75f, 1.00f, 1.25f, 1.50f, 1.75f,
2.00f, 2.25f, 2.50f, 2.75f, 3.00f, 3.50f, 3.75f, 4.00f,
-0.00f, -0.25f, -0.50f, -0.75f, -1.00f, -1.25f, -1.50f, -1.75f,
-2.00f, -2.25f, -2.50f, -2.75f, -3.00f, -4.00f, -3.75f, -3.50f,
0.00f, 0.25f, 0.50f, 0.75f, 1.00f, 1.25f, 1.50f, 1.75f,
2.00f, 2.25f, 2.50f, 2.75f, 3.00f, 3.75f, 4.25f, 4.50f,
-0.00f, -0.25f, -0.50f, -0.75f, -1.00f, -1.25f, -1.50f, -1.75f,
-2.00f, -2.25f, -2.50f, -2.75f, -3.00f, 3.75f, 3.00f};
// for add() usage tensor B
static std::vector<float> B4Add = {
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -4.00f, 5.00f, 5.50f,
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -4.00f, 5.00f, 5.50f,
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -4.00f, 5.00f, 5.50f,
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -3.75f, -4.00f
};
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -4.00f, 5.00f, 5.50f,
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -4.00f, 5.00f, 5.50f,
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -4.00f, 5.00f, 5.50f,
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -3.75f, -4.00f};
static auto add_function = [](float a_dequantized, float b_dequantized) {
return a_dequantized + b_dequantized;
@ -144,7 +139,16 @@ TEST(QLinearBinaryOpTest, AddU8VectorVectorFull) {
uint8_t C_zero_point = 128;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {63}, A_scale, A_zero_point, B, {63}, B_scale, B_zero_point, C_scale, C_zero_point);
A, {63}, A_scale, A_zero_point,
B, {63}, B_scale, B_zero_point,
C_scale, C_zero_point);
// NNAPI will require all the scales and zero points be initializers
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {63}, A_scale, A_zero_point,
B, {63}, B_scale, B_zero_point,
C_scale, C_zero_point,
true);
}
TEST(QLinearBinaryOpTest, AddU8VectorVectorBroadcast) {
@ -152,73 +156,117 @@ TEST(QLinearBinaryOpTest, AddU8VectorVectorBroadcast) {
float A_scale = 8.0f / 256.0f;
uint8_t A_zero_point = 128;
std::vector<float> B = {
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -4.00f, 5.00f, 5.50f,
-0.50f, -1.25f, 0.75f, 1.25f, 2.25f
};
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -4.00f, 5.00f, 5.50f,
-0.50f, -1.25f, 0.75f, 1.25f, 2.25f};
float B_scale = 8.0f / 256.0f;
uint8_t B_zero_point = 128;
float C_scale = 16.0f / 256.0f;
uint8_t C_zero_point = 128;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {3, 3, 7}, A_scale, A_zero_point, B, {3, 1, 7}, B_scale, B_zero_point, C_scale, C_zero_point);
A, {3, 3, 7}, A_scale, A_zero_point,
B, {3, 1, 7}, B_scale, B_zero_point,
C_scale, C_zero_point);
// NNAPI will require all the scales and zero points be initializers
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {3, 3, 7}, A_scale, A_zero_point,
B, {3, 1, 7}, B_scale, B_zero_point,
C_scale, C_zero_point,
true);
}
TEST(QLinearBinaryOpTest, AddU8ScalarVectorFull) {
const std::vector<float>& A(A4Add);
float A_scale = 8.0f / 256.0f;
uint8_t A_zero_point = 128;
std::vector<float> B = { 0.25f };
std::vector<float> B = {0.25f};
float B_scale = 8.0f / 256.0f;
uint8_t B_zero_point = 96;
float C_scale = 8.0f / 256.0f;
uint8_t C_zero_point = 100;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
B, {1}, B_scale, B_zero_point, A, {63}, A_scale, A_zero_point, C_scale, C_zero_point);
B, {1}, B_scale, B_zero_point,
A, {63}, A_scale, A_zero_point,
C_scale, C_zero_point);
// NNAPI will require all the scales and zero points be initializers
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
B, {1}, B_scale, B_zero_point,
A, {63}, A_scale, A_zero_point,
C_scale, C_zero_point,
true);
}
TEST(QLinearBinaryOpTest, AddU8ScalarVectorBroadcast) {
const std::vector<float>& A(A4Add);
float A_scale = 8.0f / 256.0f;
uint8_t A_zero_point = 128;
std::vector<float> B = { 0.25f, -0.25f, -0.00f };
std::vector<float> B = {0.25f, -0.25f, -0.00f};
float B_scale = 8.0f / 256.0f;
uint8_t B_zero_point = 96;
float C_scale = 8.0f / 256.0f;
uint8_t C_zero_point = 100;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
B, {3, 1, 1}, B_scale, B_zero_point, A, {3, 7, 3}, A_scale, A_zero_point, C_scale, C_zero_point);
B, {3, 1, 1}, B_scale, B_zero_point,
A, {3, 7, 3}, A_scale, A_zero_point,
C_scale, C_zero_point);
// NNAPI will require all the scales and zero points be initializers
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
B, {3, 1, 1}, B_scale, B_zero_point,
A, {3, 7, 3}, A_scale, A_zero_point,
C_scale, C_zero_point,
true);
}
TEST(QLinearBinaryOpTest, AddU8VectorScalarFull) {
const std::vector<float>& A(A4Add);
float A_scale = 8.0f / 256.0f;
uint8_t A_zero_point = 128;
std::vector<float> B = { 0.25f };
std::vector<float> B = {0.25f};
float B_scale = 8.0f / 256.0f;
uint8_t B_zero_point = 96;
float C_scale = 16.0f / 256.0f;
uint8_t C_zero_point = 128;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {63}, A_scale, A_zero_point, B, {1}, B_scale, B_zero_point, C_scale, C_zero_point);
A, {63}, A_scale, A_zero_point,
B, {1}, B_scale, B_zero_point,
C_scale, C_zero_point);
// NNAPI will require all the scales and zero points be initializers
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {63}, A_scale, A_zero_point,
B, {1}, B_scale, B_zero_point,
C_scale, C_zero_point,
true);
}
TEST(QLinearBinaryOpTest, AddU8VectorScalarBroadcast) {
const std::vector<float>& A(A4Add);
float A_scale = 8.0f / 256.0f;
uint8_t A_zero_point = 128;
std::vector<float> B = { 0.25f, -0.25f, -0.00f };
std::vector<float> B = {0.25f, -0.25f, -0.00f};
float B_scale = 8.0f / 256.0f;
uint8_t B_zero_point = 96;
float C_scale = 16.0f / 256.0f;
uint8_t C_zero_point = 128;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {3, 7, 3}, A_scale, A_zero_point, B, {1, 1, 3}, B_scale, B_zero_point, C_scale, C_zero_point);
A, {3, 7, 3}, A_scale, A_zero_point,
B, {1, 1, 3}, B_scale, B_zero_point,
C_scale, C_zero_point);
// NNAPI will require all the scales and zero points be initializers
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {3, 7, 3}, A_scale, A_zero_point,
B, {1, 1, 3}, B_scale, B_zero_point,
C_scale, C_zero_point,
true);
}
TEST(QLinearBinaryOpTest, AddS8VectorVectorFull) {
@ -232,7 +280,9 @@ TEST(QLinearBinaryOpTest, AddS8VectorVectorFull) {
int8_t C_zero_point = -16;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {63}, A_scale, A_zero_point, B, {63}, B_scale, B_zero_point, C_scale, C_zero_point);
A, {63}, A_scale, A_zero_point,
B, {63}, B_scale, B_zero_point,
C_scale, C_zero_point);
}
TEST(QLinearBinaryOpTest, AddS8VectorVectorBroadcast) {
@ -240,75 +290,83 @@ TEST(QLinearBinaryOpTest, AddS8VectorVectorBroadcast) {
float A_scale = 8.0f / 256.0f;
int8_t A_zero_point = 0;
std::vector<float> B = {
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -4.00f, 5.00f, 5.50f,
-0.50f, -1.25f, 0.75f, 1.25f, 2.25f
};
4.00f, 0.25f, 0.00f, -0.25f, 0.50f, -0.25f, -0.00f, 0.25f,
-1.50f, -2.25f, 2.50f, 3.75f, -3.75f, -4.00f, 5.00f, 5.50f,
-0.50f, -1.25f, 0.75f, 1.25f, 2.25f};
float B_scale = 8.0f / 256.0f;
int8_t B_zero_point = 0;
float C_scale = 16.0f / 256.0f;
int8_t C_zero_point = -16;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {3, 3, 7}, A_scale, A_zero_point, B, {3, 1, 7}, B_scale, B_zero_point, C_scale, C_zero_point);
A, {3, 3, 7}, A_scale, A_zero_point,
B, {3, 1, 7}, B_scale, B_zero_point,
C_scale, C_zero_point);
}
TEST(QLinearBinaryOpTest, AddS8ScalarVectorFull) {
const std::vector<float>& A(A4Add);
float A_scale = 8.0f / 256.0f;
int8_t A_zero_point = 0;
std::vector<float> B = { 0.25f };
std::vector<float> B = {0.25f};
float B_scale = 2.0f / 256.0f;
int8_t B_zero_point = 16;
float C_scale = 8.0f / 256.0f;
int8_t C_zero_point = 10;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
B, {1}, B_scale, B_zero_point, A, {63}, A_scale, A_zero_point, C_scale, C_zero_point);
B, {1}, B_scale, B_zero_point,
A, {63}, A_scale, A_zero_point,
C_scale, C_zero_point);
}
TEST(QLinearBinaryOpTest, AddS8ScalarVectorBroadcast) {
const std::vector<float>& A(A4Add);
float A_scale = 8.0f / 256.0f;
int8_t A_zero_point = 0;
std::vector<float> B = { 0.25f, -0.25f, -0.00f };
std::vector<float> B = {0.25f, -0.25f, -0.00f};
float B_scale = 2.0f / 256.0f;
int8_t B_zero_point = 16;
float C_scale = 8.0f / 256.0f;
int8_t C_zero_point = 10;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
B, {3, 1, 1}, B_scale, B_zero_point, A, {3, 7, 3}, A_scale, A_zero_point, C_scale, C_zero_point);
B, {3, 1, 1}, B_scale, B_zero_point,
A, {3, 7, 3}, A_scale, A_zero_point,
C_scale, C_zero_point);
}
TEST(QLinearBinaryOpTest, AddS8VectorScalarFull) {
const std::vector<float>& A(A4Add);
float A_scale = 8.0f / 256.0f;
int8_t A_zero_point = 0;
std::vector<float> B = { 0.25f };
std::vector<float> B = {0.25f};
float B_scale = 2.0f / 256.0f;
int8_t B_zero_point = 16;
float C_scale = 8.0f / 256.0f;
int8_t C_zero_point = 10;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {63}, A_scale, A_zero_point, B, {1}, B_scale, B_zero_point, C_scale, C_zero_point);
A, {63}, A_scale, A_zero_point,
B, {1}, B_scale, B_zero_point,
C_scale, C_zero_point);
}
TEST(QLinearBinaryOpTest, AddS8VectorScalarBroadcast) {
const std::vector<float>& A(A4Add);
float A_scale = 8.0f / 256.0f;
int8_t A_zero_point = 0;
std::vector<float> B = { 0.25f, -0.25f, -0.00f };
std::vector<float> B = {0.25f, -0.25f, -0.00f};
float B_scale = 2.0f / 256.0f;
int8_t B_zero_point = 16;
float C_scale = 8.0f / 256.0f;
int8_t C_zero_point = 10;
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
A, {3, 7, 3}, A_scale, A_zero_point, B, {1, 1, 3}, B_scale, B_zero_point, C_scale, C_zero_point);
A, {3, 7, 3}, A_scale, A_zero_point,
B, {1, 1, 3}, B_scale, B_zero_point,
C_scale, C_zero_point);
}
} // namespace test
} // namespace onnxruntime