mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-23 22:13:38 +00:00
[NNAPI QDQ] AddQDQAdd/Mul, update to NNAPI QDQ handling, update some test settings (#10483)
* Squashed commit of the following: commit12380491a9Author: Guoyu Wang <wanggy@outlook.com> Date: Mon Feb 7 12:59:04 2022 -0800 Add qdq mul support commit9cadda7f2cMerge:7a328477610f5d0a091aAuthor: Guoyu Wang <wanggy@outlook.com> Date: Mon Feb 7 11:24:47 2022 -0800 Merge remote-tracking branch 'origin/master' into gwang-msft/qdq_mul commit7a32847761Author: Guoyu Wang <wanggy@outlook.com> Date: Mon Feb 7 00:41:30 2022 -0800 move test case to util commitc1a8f0d81eAuthor: Guoyu Wang <wanggy@outlook.com> Date: Fri Feb 4 13:04:26 2022 -0800 update input/output check commita6f0a0d504Author: Guoyu Wang <wanggy@outlook.com> Date: Thu Feb 3 18:37:21 2022 -0800 update quantized io check functions commit87f4d1dcfeMerge:7849f0710997b8f6f394Author: Guoyu Wang <wanggy@outlook.com> Date: Wed Feb 2 17:22:58 2022 -0800 Merge remote-tracking branch 'origin/master' into gwang-msft/qdq_mul commit7849f07109Author: Guoyu Wang <wanggy@outlook.com> Date: Wed Feb 2 17:22:55 2022 -0800 minor update commit7196cdf419Author: Guoyu Wang <wanggy@outlook.com> Date: Wed Feb 2 10:50:10 2022 -0800 init change commit84c00772a1Merge:a8c7dce22f7318361645Author: Guoyu Wang <wanggy@outlook.com> Date: Tue Feb 1 18:21:17 2022 -0800 Merge remote-tracking branch 'origin/master' into gwang-msft/qdq_mul commita8c7dce22fMerge:55e536c182ef7b4dc05cAuthor: Guoyu Wang <wanggy@outlook.com> Date: Tue Feb 1 13:51:04 2022 -0800 Merge remote-tracking branch 'origin/master' into gwang-msft/qdq_mul commit55e536c182Author: Guoyu Wang <wanggy@outlook.com> Date: Tue Feb 1 11:44:34 2022 -0800 address cr comments commitd460f5b776Author: Guoyu Wang <wanggy@outlook.com> Date: Tue Feb 1 00:33:54 2022 -0800 fix android UT failure commit52146cf06fAuthor: Guoyu Wang <wanggy@outlook.com> Date: Mon Jan 31 16:01:13 2022 -0800 fix build break commitec6d07df8bAuthor: Guoyu Wang <wanggy@outlook.com> Date: Mon Jan 31 15:41:52 2022 -0800 minor update to UT commit8ec8490b4fAuthor: Guoyu Wang <wanggy@outlook.com> Date: Mon Jan 31 15:01:30 2022 -0800 Add NNAPI support of QDQ Resize * Update qdq add/mul test case, fix build break * Address CR comments * Add QLinearMul support * remove unused params * Address CR comments
This commit is contained in:
parent
655f490c95
commit
e4dc4e4d3c
11 changed files with 712 additions and 747 deletions
|
|
@ -61,6 +61,8 @@ QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
|
|||
return QuantizedOpType::QLinearMatMul;
|
||||
else if (op_type == "QLinearAdd")
|
||||
return QuantizedOpType::QLinearAdd;
|
||||
else if (op_type == "QLinearMul")
|
||||
return QuantizedOpType::QLinearMul;
|
||||
else if (op_type == "QLinearSigmoid")
|
||||
return QuantizedOpType::QLinearSigmoid;
|
||||
else if (op_type == "QLinearAveragePool")
|
||||
|
|
@ -72,6 +74,10 @@ QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit) {
|
|||
return QuantizedOpType::QDQResize;
|
||||
else if (op_type == "AveragePool")
|
||||
return QuantizedOpType::QDQAveragePool;
|
||||
else if (op_type == "Add")
|
||||
return QuantizedOpType::QDQAdd;
|
||||
else if (op_type == "Mul")
|
||||
return QuantizedOpType::QDQMul;
|
||||
} else {
|
||||
// throw?
|
||||
// Do we want to throw here? seems got neglected last time
|
||||
|
|
@ -114,25 +120,13 @@ bool IsQuantizedPool(QuantizedOpType quant_op_type) {
|
|||
bool IsQuantizedBinaryOp(QuantizedOpType quant_op_type) {
|
||||
return quant_op_type == QuantizedOpType::QLinearMatMul ||
|
||||
quant_op_type == QuantizedOpType::QLinearAdd ||
|
||||
quant_op_type == QuantizedOpType::QLinearMul ||
|
||||
quant_op_type == QuantizedOpType::QDQAdd ||
|
||||
quant_op_type == QuantizedOpType::QDQMul ||
|
||||
IsQuantizedConv(quant_op_type);
|
||||
}
|
||||
|
||||
bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit) {
|
||||
int32_t input_type;
|
||||
if (!GetType(node_unit.Inputs()[0].node_arg, input_type))
|
||||
return false;
|
||||
|
||||
if (input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
|
||||
LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType()
|
||||
<< "] Input type: [" << input_type
|
||||
<< "] is not supported for now";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) {
|
||||
bool HasValidBinaryOpQuantizedInputTypes(const NodeUnit& node_unit) {
|
||||
auto quant_op_type = GetQuantizedOpType(node_unit);
|
||||
int32_t a_input_type, b_input_type;
|
||||
if (!IsQuantizedBinaryOp(quant_op_type)) {
|
||||
|
|
@ -146,16 +140,17 @@ bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) {
|
|||
if (!GetType(inputs[1].node_arg, b_input_type))
|
||||
return false;
|
||||
|
||||
// QlinearConv supports u8u8 or u8s8
|
||||
// QLinearMatMul/Add only support u8u8
|
||||
bool is_quant_conv = IsQuantizedConv(quant_op_type);
|
||||
// QlinearConv/MatMul supports u8u8 or u8s8
|
||||
// QLinearAdd/QLinearMul only support u8u8
|
||||
bool is_quant_conv_or_matmul = IsQuantizedConv(quant_op_type) || (quant_op_type == QuantizedOpType::QLinearMatMul);
|
||||
|
||||
bool has_valid_qlinear_conv_weight =
|
||||
(b_input_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
|
||||
b_input_type == ONNX_NAMESPACE::TensorProto_DataType_INT8);
|
||||
|
||||
if (a_input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8 ||
|
||||
(!is_quant_conv && a_input_type != b_input_type) ||
|
||||
(is_quant_conv && !has_valid_qlinear_conv_weight)) {
|
||||
(!is_quant_conv_or_matmul && a_input_type != b_input_type) ||
|
||||
(is_quant_conv_or_matmul && !has_valid_qlinear_conv_weight)) {
|
||||
LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType()
|
||||
<< "] A Input type: [" << a_input_type
|
||||
<< "] B Input type: [" << b_input_type
|
||||
|
|
@ -166,182 +161,6 @@ bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const std::vector<size_t>& indices, const OpSupportCheckParams& params, bool is_input) {
|
||||
const auto& op_type = node_unit.OpType();
|
||||
auto quant_op_type = GetQuantizedOpType(node_unit);
|
||||
bool is_quant_conv = IsQuantizedConv(quant_op_type);
|
||||
bool is_quant_matmul = (quant_op_type == QuantizedOpType::QLinearMatMul);
|
||||
const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs();
|
||||
for (const auto idx : indices) {
|
||||
if (idx >= io_defs.size()) {
|
||||
LOGS_DEFAULT(VERBOSE) << (is_input ? "Input" : "Output") << " index, " << idx
|
||||
<< " >= size, " << io_defs.size()
|
||||
<< " of NodeUnit: " << node_unit.Name();
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& io_def = io_defs[idx];
|
||||
if (!io_def.quant_param.has_value()) {
|
||||
LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, Input index, " << idx
|
||||
<< " has no quant_param";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto scale_name = io_def.quant_param->scale.Name();
|
||||
|
||||
if (!Contains(initializers, scale_name)) {
|
||||
LOGS_DEFAULT(VERBOSE) << "The scale of " << op_type << " must be an initializer tensor";
|
||||
return false;
|
||||
}
|
||||
|
||||
// If this op is Qlinear[Conv/MatMul], we want to check u8s8 support for weight tensor (or B tensor for QlinearMatMul)
|
||||
bool is_conv_matmul_weight = is_input && (is_quant_conv || is_quant_matmul) && idx == 1;
|
||||
bool is_conv_matmul_u8s8_weight = false;
|
||||
|
||||
if (is_conv_matmul_weight) {
|
||||
const auto& weight_tensor = *initializers.at(io_def.node_arg.Name());
|
||||
is_conv_matmul_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8;
|
||||
}
|
||||
|
||||
const auto& scale_tensor = *initializers.at(scale_name);
|
||||
int64_t scales_dim = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0];
|
||||
if (!is_conv_matmul_u8s8_weight) {
|
||||
if (scales_dim != 1) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
|
||||
<< " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
|
||||
return false;
|
||||
}
|
||||
} else if (scales_dim != 1) {
|
||||
// For u8s8 Qlinear[Conv/MatMul], we support
|
||||
// 1. Per-tensor, the weight will be transformed to uint8 later
|
||||
// 2. Per-channel, only from Android API level 29
|
||||
if (is_quant_matmul) {
|
||||
LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params.android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << " only supports per-channel quantization on Android API 29+, "
|
||||
<< "system NNAPI feature level: " << params.android_feature_level;
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& weight_tensor = *initializers.at(io_def.node_arg.Name());
|
||||
if (weight_tensor.dims()[0] != scales_dim) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
|
||||
<< " weight dimension[0] " << weight_tensor.dims()[0]
|
||||
<< " scale dimension " << scales_dim;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const std::vector<size_t>& indices, bool is_input) {
|
||||
const auto& op_type = node_unit.OpType();
|
||||
auto quant_op_type = GetQuantizedOpType(node_unit);
|
||||
bool is_quant_conv = IsQuantizedConv(quant_op_type);
|
||||
bool is_quant_matmul = (quant_op_type == QuantizedOpType::QLinearMatMul);
|
||||
|
||||
const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs();
|
||||
for (const auto idx : indices) {
|
||||
if (idx >= io_defs.size()) {
|
||||
LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, "
|
||||
<< (is_input ? "Input" : "Output") << " index, " << idx
|
||||
<< " >= size, " << io_defs.size();
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& io_def = io_defs[idx];
|
||||
if (!io_def.quant_param.has_value()) {
|
||||
LOGS_DEFAULT(VERBOSE) << "HasValidQuantizationZeroPoints, Input index, " << idx
|
||||
<< " has no quant_param";
|
||||
return false;
|
||||
}
|
||||
|
||||
// zero point is optional here
|
||||
if (!io_def.quant_param->zero_point)
|
||||
return true;
|
||||
|
||||
const auto& zero_point_name = io_def.quant_param->zero_point->Name();
|
||||
if (!Contains(initializers, zero_point_name)) {
|
||||
LOGS_DEFAULT(VERBOSE) << "The zero point of " << op_type << " must be an initializer tensor";
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_conv_matmul_weight = is_input && (is_quant_conv || is_quant_matmul) && idx == 1;
|
||||
bool is_conv_matmul_u8s8_weight = false;
|
||||
|
||||
if (is_conv_matmul_weight) {
|
||||
const auto& weight_tensor = *initializers.at(io_def.node_arg.Name());
|
||||
is_conv_matmul_u8s8_weight = weight_tensor.data_type() == ONNX_NAMESPACE::TensorProto_DataType_INT8;
|
||||
}
|
||||
|
||||
const auto& zero_tensor = *initializers.at(zero_point_name);
|
||||
int64_t zero_dim = zero_tensor.dims().empty() ? 1 : zero_tensor.dims()[0];
|
||||
|
||||
if (!is_conv_matmul_u8s8_weight) {
|
||||
if (zero_dim != 1) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
|
||||
<< " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// For u8s8 Qlinear[Conv/MatMul], we support
|
||||
// 1. Per-tensor, the weight will be transformed to uint8 later
|
||||
// 2. Per-channel, only from Android API level 29
|
||||
if (zero_tensor.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8) {
|
||||
LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only supports int8 zero point for weight, "
|
||||
<< "actual zero point type: [" << zero_tensor.data_type() << "]";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (zero_dim != 1) {
|
||||
if (is_quant_matmul) {
|
||||
LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// For onnx, u8s8 QlinearConv, the weight zero point can be a scalar,
|
||||
// or a tensor with same channel as weight, for NNAPI we only support it be
|
||||
// 0 (scalar) or all 0 (tensor), NNAPI will assume the zero point for per-channel
|
||||
// quantization is 0 there is no input for it
|
||||
const auto& weight_tensor = *initializers.at(io_def.node_arg.Name());
|
||||
if (weight_tensor.dims()[0] != zero_dim && zero_dim != 1) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
|
||||
<< " weight dimension[0] " << weight_tensor.dims()[0]
|
||||
<< " zero point dimension " << zero_dim;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> unpacked_tensor;
|
||||
auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, node_unit.ModelPath(), unpacked_tensor);
|
||||
if (!status.IsOK()) {
|
||||
LOGS_DEFAULT(ERROR) << "Qlinear[Conv/MatMul] error when unpack zero tensor: " << zero_point_name
|
||||
<< ", error msg: " << status.ErrorMessage();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify all onnx weight zero point(s) are 0(s)
|
||||
const int8_t* zero_points = reinterpret_cast<const int8_t*>(unpacked_tensor.data());
|
||||
for (size_t i = 0; i < unpacked_tensor.size(); i++) {
|
||||
if (zero_points[i] != 0) {
|
||||
LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only support 0 as zero point, "
|
||||
<< "zero_points[" << i << "] has value: " << zero_points[i];
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
common::Status GetQuantizationScaleAndZeroPoint(
|
||||
const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path,
|
||||
float& scale, int32_t& zero_point) {
|
||||
|
|
@ -387,8 +206,8 @@ common::Status GetQuantizationScaleAndZeroPoint(
|
|||
|
||||
common::Status GetQuantizationScaleAndZeroPoint(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit, const std::string& name,
|
||||
float& scale, int32_t& zero_point, bool is_input) {
|
||||
const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs();
|
||||
float& scale, int32_t& zero_point, IOKind io_kind) {
|
||||
const auto& io_defs = io_kind == IOKind::Input ? node_unit.Inputs() : node_unit.Outputs();
|
||||
for (const auto& io_def : io_defs) {
|
||||
if (io_def.node_arg.Name() == name)
|
||||
return GetQuantizationScaleAndZeroPoint(initializers, io_def, node_unit.ModelPath(),
|
||||
|
|
|
|||
|
|
@ -82,12 +82,14 @@ enum class QuantizedOpType : uint8_t {
|
|||
QLinearAdd,
|
||||
QLinearSigmoid,
|
||||
QLinearAveragePool,
|
||||
QLinearMul,
|
||||
// Not yet supported
|
||||
// QLinearMul,
|
||||
// QLinearReduceMean,
|
||||
QDQConv,
|
||||
QDQResize,
|
||||
QDQAveragePool,
|
||||
QDQAdd,
|
||||
QDQMul,
|
||||
// TODO, add other QDQ NodeUnit types
|
||||
};
|
||||
|
||||
|
|
@ -97,6 +99,11 @@ enum class ConvType : uint8_t {
|
|||
Grouped,
|
||||
};
|
||||
|
||||
enum class IOKind : uint8_t {
|
||||
Input,
|
||||
Output,
|
||||
};
|
||||
|
||||
QuantizedOpType GetQuantizedOpType(const NodeUnit& node_unit);
|
||||
|
||||
// Return the type of the conv ops,
|
||||
|
|
@ -113,18 +120,8 @@ bool IsQuantizedPool(QuantizedOpType quant_op_type);
|
|||
// Such as QLinearConv, QLinearMatMul, QLinearAdd, QDQConv,...
|
||||
bool IsQuantizedBinaryOp(QuantizedOpType quant_op_type);
|
||||
|
||||
// Check if a qlinear unary op has valid inputs, Qlinear[Sigmoid/AveragePool]
|
||||
bool HasValidUnaryOpQuantizedInputs(const NodeUnit& node_unit);
|
||||
// Check if a qlinear binary op has valid inputs, Qlinear[Conv/MatMul/Add]
|
||||
bool HasValidBinaryOpQuantizedInputs(const NodeUnit& node_unit);
|
||||
|
||||
// Check if a qlinear op has valid scales for given indices
|
||||
bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const std::vector<size_t>& indices, const OpSupportCheckParams& params, bool is_input);
|
||||
|
||||
// Check if a qlinear op has valid zero points for given indices
|
||||
bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const std::vector<size_t>& indices, bool is_input);
|
||||
bool HasValidBinaryOpQuantizedInputTypes(const NodeUnit& node_unit);
|
||||
|
||||
common::Status GetQuantizationScaleAndZeroPoint(
|
||||
const InitializedTensorSet& initializers, const NodeUnitIODef& io_def, const Path& model_path,
|
||||
|
|
@ -132,7 +129,7 @@ common::Status GetQuantizationScaleAndZeroPoint(
|
|||
|
||||
common::Status GetQuantizationScaleAndZeroPoint(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit, const std::string& name,
|
||||
float& scale, int32_t& zero_point, bool is_input = true);
|
||||
float& scale, int32_t& zero_point, IOKind io_kind = IOKind::Input);
|
||||
|
||||
// Get Shape/Type of a NodeArg
|
||||
// TODO, move to shared_utils
|
||||
|
|
|
|||
|
|
@ -210,7 +210,7 @@ static Status GetInputDataType(
|
|||
// TODO, verify the scale and zero point match if there are multiple op using same input
|
||||
const auto* node_unit = all_quantized_op_inputs.at(name)[0];
|
||||
ORT_RETURN_IF_ERROR(GetQuantizationScaleAndZeroPoint(
|
||||
initializers, *node_unit, name, scale, zero_point, true /* is_input */));
|
||||
initializers, *node_unit, name, scale, zero_point, IOKind::Input));
|
||||
break;
|
||||
}
|
||||
// case ONNX_NAMESPACE::TensorProto_DataType_INT8:
|
||||
|
|
|
|||
|
|
@ -452,7 +452,7 @@ static Status HandleAutoPad(const Shape& input_shape,
|
|||
}
|
||||
|
||||
// Get scales and zero points for the qlinear binary ops (which has 2 input and 1 output)
|
||||
// QLinearConv, QLinearMatmul, QLinearAdd
|
||||
// QLinearConv, QLinearMatmul, QLinearAdd, QLinearMul
|
||||
// a, b are inputs, and y is output
|
||||
static Status GetBinaryOpQuantizationScaleAndZeroPoint(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
|
|
@ -656,8 +656,11 @@ class BinaryOpBuilder : public BaseOpBuilder {
|
|||
};
|
||||
|
||||
/* static */ bool BinaryOpBuilder::IsQuantizedOp(const NodeUnit& node_unit) {
|
||||
// TODO, add support for QDQ NodeUnit
|
||||
return node_unit.OpType() == "QLinearAdd";
|
||||
const auto quant_type = GetQuantizedOpType(node_unit);
|
||||
return quant_type == QuantizedOpType::QLinearAdd ||
|
||||
quant_type == QuantizedOpType::QLinearMul ||
|
||||
quant_type == QuantizedOpType::QDQAdd ||
|
||||
quant_type == QuantizedOpType::QDQMul;
|
||||
}
|
||||
|
||||
void BinaryOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const NodeUnit& node_unit) const {
|
||||
|
|
@ -680,6 +683,7 @@ void BinaryOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const N
|
|||
"Mul",
|
||||
"Div",
|
||||
"QLinearAdd",
|
||||
"QLinearMul",
|
||||
"Pow",
|
||||
});
|
||||
}
|
||||
|
|
@ -690,12 +694,12 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
|
|||
|
||||
int32_t op_code;
|
||||
bool add_activation = true;
|
||||
bool op_is_qlinear = op_type == "QLinearAdd";
|
||||
if (op_type == "Add" || op_is_qlinear) {
|
||||
bool is_quant_op = IsQuantizedOp(node_unit);
|
||||
if (op_type == "Add" || op_type == "QLinearAdd") { // Add/QLinearAdd/QDQAdd
|
||||
op_code = ANEURALNETWORKS_ADD;
|
||||
} else if (op_type == "Sub") {
|
||||
op_code = ANEURALNETWORKS_SUB;
|
||||
} else if (op_type == "Mul") {
|
||||
} else if (op_type == "Mul" || op_type == "QLinearMul") { // Mul/QLinearMul/QDQMul
|
||||
op_code = ANEURALNETWORKS_MUL;
|
||||
} else if (op_type == "Div") {
|
||||
op_code = ANEURALNETWORKS_DIV;
|
||||
|
|
@ -721,7 +725,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
|
|||
b_zero_point = 0,
|
||||
y_zero_point = 0;
|
||||
|
||||
if (op_is_qlinear) {
|
||||
if (is_quant_op) {
|
||||
ORT_RETURN_IF_ERROR(GetBinaryOpQuantizationScaleAndZeroPoint(
|
||||
model_builder.GetInitializerTensors(), node_unit,
|
||||
a_scale, b_scale, y_scale,
|
||||
|
|
@ -729,7 +733,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
|
|||
}
|
||||
|
||||
// Verify if the scale and zero point matchs from onnx input and nnapi input match
|
||||
if (op_is_qlinear) {
|
||||
if (is_quant_op) {
|
||||
ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input1, a_scale, a_zero_point));
|
||||
ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input2, b_scale, b_zero_point));
|
||||
}
|
||||
|
|
@ -2717,6 +2721,7 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
|
|||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Mul", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Pow", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearAdd", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearMul", BinaryOpBuilder);
|
||||
NNAPI_EP_ADD_SHARED_OP_BUILDER("Sub", BinaryOpBuilder);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,21 @@ struct OpSupportCheckerRegistrations {
|
|||
std::unordered_map<std::string, const IOpSupportChecker*> op_support_checker_map;
|
||||
};
|
||||
|
||||
bool HasExternalInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit) {
|
||||
template <class T>
|
||||
void CreateSharedOpSupportCheckerImpl(const std::string& op_type,
|
||||
OpSupportCheckerRegistrations& op_registrations,
|
||||
const std::vector<std::string>& op_types) {
|
||||
// The shared OpSupportChecker is already in the OpSupportCheckerRegistrations
|
||||
if (op_registrations.op_support_checker_map.find(op_type) != op_registrations.op_support_checker_map.cend())
|
||||
return;
|
||||
|
||||
op_registrations.support_checkers.push_back(std::make_unique<T>());
|
||||
for (const auto& op : op_types) {
|
||||
op_registrations.op_support_checker_map.emplace(op, op_registrations.support_checkers.back().get());
|
||||
}
|
||||
}
|
||||
|
||||
static bool HasExternalInitializer(const InitializedTensorSet& initializers, const NodeUnit& node_unit) {
|
||||
const auto is_ext_initializer =
|
||||
[&](const NodeArg& node_arg) {
|
||||
const auto& input_name(node_arg.Name());
|
||||
|
|
@ -58,18 +72,200 @@ bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node
|
|||
return false;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void CreateSharedOpSupportCheckerImpl(const std::string& op_type,
|
||||
OpSupportCheckerRegistrations& op_registrations,
|
||||
const std::vector<std::string>& op_types) {
|
||||
// The shared OpSupportChecker is already in the OpSupportCheckerRegistrations
|
||||
if (op_registrations.op_support_checker_map.find(op_type) != op_registrations.op_support_checker_map.cend())
|
||||
return;
|
||||
|
||||
op_registrations.support_checkers.push_back(std::make_unique<T>());
|
||||
for (const auto& op : op_types) {
|
||||
op_registrations.op_support_checker_map.emplace(op, op_registrations.support_checkers.back().get());
|
||||
static bool IsQuantizationScaleSupported(const InitializedTensorSet& initializers,
|
||||
const NodeUnitIODef& io_def,
|
||||
const OpSupportCheckParams& params,
|
||||
const std::string& op_type,
|
||||
bool is_quant_matmul,
|
||||
bool is_conv_matmul_u8s8_weight) {
|
||||
const auto scale_name = io_def.quant_param->scale.Name();
|
||||
auto it = initializers.find(scale_name);
|
||||
if (it == initializers.cend()) {
|
||||
LOGS_DEFAULT(VERBOSE) << "The scale of " << op_type << " must be an initializer tensor";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& scale_tensor = *it->second;
|
||||
int64_t scales_dim = scale_tensor.dims().empty() ? 1 : scale_tensor.dims()[0];
|
||||
if (!is_conv_matmul_u8s8_weight) {
|
||||
if (scales_dim != 1) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
|
||||
<< " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
|
||||
return false;
|
||||
}
|
||||
} else if (scales_dim != 1) {
|
||||
// For u8s8 Qlinear[Conv/MatMul], we support
|
||||
// 1. Per-tensor, the weight will be transformed to uint8 later
|
||||
// 2. Per-channel, only from Android API level 29
|
||||
if (is_quant_matmul) {
|
||||
LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params.android_feature_level < ANEURALNETWORKS_FEATURE_LEVEL_3) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << " only supports per-channel quantization on Android API 29+, "
|
||||
<< "system NNAPI feature level: " << params.android_feature_level;
|
||||
return false;
|
||||
}
|
||||
|
||||
Shape weight_shape;
|
||||
if (!GetShape(io_def.node_arg, weight_shape))
|
||||
return false;
|
||||
|
||||
if (weight_shape[0] != scales_dim) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
|
||||
<< " weight dimension[0] " << weight_shape[0]
|
||||
<< " scale dimension " << scales_dim;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool IsQuantizationZeroPointSupported(const InitializedTensorSet& initializers,
|
||||
const NodeUnitIODef& io_def,
|
||||
const std::string& op_type,
|
||||
const Path& model_path,
|
||||
bool is_quant_matmul,
|
||||
bool is_conv_matmul_u8s8_weight) {
|
||||
// zero point is optional here
|
||||
if (!io_def.quant_param->zero_point)
|
||||
return true;
|
||||
|
||||
const auto& zero_point_name = io_def.quant_param->zero_point->Name();
|
||||
if (!Contains(initializers, zero_point_name)) {
|
||||
LOGS_DEFAULT(VERBOSE) << "The zero point of " << op_type << " must be an initializer tensor";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& zero_tensor = *initializers.at(zero_point_name);
|
||||
int64_t zero_dim = zero_tensor.dims().empty() ? 1 : zero_tensor.dims()[0];
|
||||
|
||||
if (!is_conv_matmul_u8s8_weight) {
|
||||
if (zero_dim != 1) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << " does not support per-channel quantization, "
|
||||
<< " for now, only u8s8 QlinearConv supports per-channel quantization on API 29+";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// For u8s8 Qlinear[Conv/MatMul], we support
|
||||
// 1. Per-tensor, the weight will be transformed to uint8 later
|
||||
// 2. Per-channel, only from Android API level 29
|
||||
if (zero_tensor.data_type() != ONNX_NAMESPACE::TensorProto_DataType_INT8) {
|
||||
LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only supports int8 zero point for weight, "
|
||||
<< "actual zero point type: [" << zero_tensor.data_type() << "]";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (zero_dim != 1) {
|
||||
if (is_quant_matmul) {
|
||||
LOGS_DEFAULT(VERBOSE) << "QLinearMatMul does not support per-channel quantization";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// For onnx, u8s8 QlinearConv, the weight zero point can be a scalar,
|
||||
// or a tensor with same channel as weight, for NNAPI we only support it be
|
||||
// 0 (scalar) or all 0 (tensor), NNAPI will assume the zero point for per-channel
|
||||
// quantization is 0 there is no input for it
|
||||
Shape weight_shape;
|
||||
if (!GetShape(io_def.node_arg, weight_shape))
|
||||
return false;
|
||||
|
||||
if (weight_shape[0] != zero_dim && zero_dim != 1) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << " mismatch int8 per-channel quantization weight,"
|
||||
<< " weight dimension[0] " << weight_shape[0]
|
||||
<< " zero point dimension " << zero_dim;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> unpacked_tensor;
|
||||
auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, model_path, unpacked_tensor);
|
||||
if (!status.IsOK()) {
|
||||
LOGS_DEFAULT(ERROR) << "Qlinear[Conv/MatMul] error when unpack zero tensor: " << zero_point_name
|
||||
<< ", error msg: " << status.ErrorMessage();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify all onnx weight zero point(s) are 0(s)
|
||||
const int8_t* zero_points = reinterpret_cast<const int8_t*>(unpacked_tensor.data());
|
||||
for (size_t i = 0; i < unpacked_tensor.size(); i++) {
|
||||
if (zero_points[i] != 0) {
|
||||
LOGS_DEFAULT(VERBOSE) << "u8s8 Qlinear[Conv/MatMul] only support 0 as zero point, "
|
||||
<< "zero_points[" << i << "] has value: " << zero_points[i];
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the given quantized input(s) or output(s) is supported
|
||||
static bool IsQuantizedIOSupported(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const std::vector<size_t>& indices, const OpSupportCheckParams& params, IOKind io_kind) {
|
||||
const auto& op_type = node_unit.OpType();
|
||||
auto quant_op_type = GetQuantizedOpType(node_unit);
|
||||
|
||||
ORT_ENFORCE(quant_op_type != QuantizedOpType::Unknown, "[", op_type, "] is not a quantized op");
|
||||
|
||||
bool is_input = io_kind == IOKind::Input;
|
||||
bool is_quant_conv = IsQuantizedConv(quant_op_type);
|
||||
bool is_quant_matmul = (quant_op_type == QuantizedOpType::QLinearMatMul);
|
||||
const auto& io_defs = is_input ? node_unit.Inputs() : node_unit.Outputs();
|
||||
|
||||
for (const auto idx : indices) {
|
||||
if (idx >= io_defs.size()) {
|
||||
LOGS_DEFAULT(VERBOSE) << (is_input ? "Input" : "Output") << " index, " << idx
|
||||
<< " >= size, " << io_defs.size()
|
||||
<< " of NodeUnit: " << node_unit.Name();
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& io_def = io_defs[idx];
|
||||
ORT_ENFORCE(io_def.quant_param.has_value(), "Input index, ", idx, " has no quant_param");
|
||||
|
||||
// If this op is Qlinear[Conv/MatMul], we want to check u8s8 support for weight tensor (or B tensor for QlinearMatMul)
|
||||
bool is_conv_matmul_weight = is_input && (is_quant_conv || is_quant_matmul) && idx == 1;
|
||||
bool is_conv_matmul_u8s8_weight = false;
|
||||
|
||||
if (is_conv_matmul_weight) {
|
||||
int32_t weight_type;
|
||||
if (!GetType(io_def.node_arg, weight_type))
|
||||
return false;
|
||||
is_conv_matmul_u8s8_weight = weight_type == ONNX_NAMESPACE::TensorProto_DataType_INT8;
|
||||
}
|
||||
|
||||
int32_t input_type;
|
||||
if (!GetType(io_def.node_arg, input_type))
|
||||
return false;
|
||||
|
||||
// We only support u8 for most of the inputs and all outputs, with the exception for Quantized MatMul and Conv,
|
||||
// which allows s8 weight (u8s8)
|
||||
// TODO, add support of s8s8
|
||||
if (input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8 &&
|
||||
!(input_type == ONNX_NAMESPACE::TensorProto_DataType_INT8 && is_conv_matmul_u8s8_weight)) {
|
||||
LOGS_DEFAULT(VERBOSE) << op_type << "NodeUnit [" << node_unit.Name()
|
||||
<< "], type [" << op_type << "]'s "
|
||||
<< (is_input ? "Input" : "Output") << " index [" << idx
|
||||
<< "] has unsupported type [" << input_type << "]";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check scale and zero point
|
||||
if (!IsQuantizationScaleSupported(initializers, io_def, params, op_type,
|
||||
is_quant_matmul, is_conv_matmul_u8s8_weight)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!IsQuantizationZeroPointSupported(initializers, io_def, op_type, node_unit.ModelPath(),
|
||||
is_quant_matmul, is_conv_matmul_u8s8_weight)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#pragma endregion helpers
|
||||
|
|
@ -100,7 +296,9 @@ class BaseOpSupportChecker : public IOpSupportChecker {
|
|||
return ANEURALNETWORKS_FEATURE_LEVEL_1;
|
||||
}
|
||||
|
||||
virtual bool HasSupportedInputsImpl(const NodeUnit& node_unit) const;
|
||||
virtual bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const;
|
||||
|
||||
virtual int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const { return 1; }
|
||||
virtual int GetMaxSupportedOpSet(const NodeUnit& /* node_unit */) const { return 15; }
|
||||
|
|
@ -112,7 +310,8 @@ class BaseOpSupportChecker : public IOpSupportChecker {
|
|||
|
||||
private:
|
||||
bool HasSupportedOpSet(const NodeUnit& node_unit) const;
|
||||
bool HasSupportedInputs(const NodeUnit& node_unit) const;
|
||||
bool HasSupportedInputOutputs(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const;
|
||||
};
|
||||
|
||||
/* static */ void BaseOpSupportChecker::CreateSharedOpSupportChecker(
|
||||
|
|
@ -138,7 +337,7 @@ bool BaseOpSupportChecker::IsOpSupported(const InitializedTensorSet& initializer
|
|||
if (!IsNodeUnitTypeSupported(node_unit))
|
||||
return false;
|
||||
|
||||
if (!HasSupportedInputs(node_unit))
|
||||
if (!HasSupportedInputOutputs(initializers, node_unit, params))
|
||||
return false;
|
||||
|
||||
// We do not support external initializers for now
|
||||
|
|
@ -151,7 +350,8 @@ bool BaseOpSupportChecker::IsOpSupported(const InitializedTensorSet& initializer
|
|||
return IsOpSupportedImpl(initializers, node_unit, params);
|
||||
}
|
||||
|
||||
bool BaseOpSupportChecker::HasSupportedInputs(const NodeUnit& node_unit) const {
|
||||
bool BaseOpSupportChecker::HasSupportedInputOutputs(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
// We do not support unknown(null) input shape
|
||||
auto has_supported_shape = [](const NodeArg& node_arg, const std::string& name, const std::string op_type) {
|
||||
const auto* shape_proto = node_arg.Shape();
|
||||
|
|
@ -185,10 +385,12 @@ bool BaseOpSupportChecker::HasSupportedInputs(const NodeUnit& node_unit) const {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
return HasSupportedInputsImpl(node_unit);
|
||||
return HasSupportedInputOutputsImpl(initializers, node_unit, params);
|
||||
}
|
||||
|
||||
bool BaseOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const {
|
||||
bool BaseOpSupportChecker::HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
// We only check the type of input 0 by default
|
||||
// specific op builder can override this
|
||||
const auto& input = node_unit.Inputs()[0].node_arg;
|
||||
|
|
@ -245,8 +447,13 @@ class BinaryOpSupportChecker : public BaseOpSupportChecker {
|
|||
const OpSupportCheckParams& params) const override;
|
||||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
|
||||
bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
int GetMinSupportedOpSet(const NodeUnit& node_unit) const override;
|
||||
|
||||
bool IsNodeUnitTypeSupported(const NodeUnit& node_unit) const override;
|
||||
static bool IsQuantizedOp(const NodeUnit& node_unit);
|
||||
};
|
||||
|
||||
/* static */ void BinaryOpSupportChecker::CreateSharedOpSupportChecker(
|
||||
|
|
@ -259,10 +466,29 @@ class BinaryOpSupportChecker : public BaseOpSupportChecker {
|
|||
"Mul",
|
||||
"Div",
|
||||
"QLinearAdd",
|
||||
"QLinearMul",
|
||||
"Pow",
|
||||
});
|
||||
}
|
||||
|
||||
bool BinaryOpSupportChecker::IsNodeUnitTypeSupported(const NodeUnit& node_unit) const {
|
||||
if (node_unit.UnitType() == NodeUnit::Type::QDQGroup) {
|
||||
const auto quant_type = GetQuantizedOpType(node_unit);
|
||||
return quant_type == QuantizedOpType::QDQAdd ||
|
||||
quant_type == QuantizedOpType::QDQMul;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */ bool BinaryOpSupportChecker::IsQuantizedOp(const NodeUnit& node_unit) {
|
||||
const auto quant_type = GetQuantizedOpType(node_unit);
|
||||
return quant_type == QuantizedOpType::QLinearAdd ||
|
||||
quant_type == QuantizedOpType::QLinearMul ||
|
||||
quant_type == QuantizedOpType::QDQAdd ||
|
||||
quant_type == QuantizedOpType::QDQMul;
|
||||
}
|
||||
|
||||
int32_t BinaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(
|
||||
const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) const {
|
||||
const auto& op(node_unit.OpType());
|
||||
|
|
@ -281,21 +507,29 @@ int BinaryOpSupportChecker::GetMinSupportedOpSet(const NodeUnit& node_unit) cons
|
|||
const auto& op(node_unit.OpType());
|
||||
|
||||
// Add/Sub/Mul/Div/Pow opset 6- has broadcast attributes we do not support now
|
||||
if (op != "QLinearAdd")
|
||||
if (op != "QLinearAdd" && op != "QLinearMul")
|
||||
return 7;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool BinaryOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const {
|
||||
bool is_qlinear_add = node_unit.OpType() == "QLinearAdd";
|
||||
bool BinaryOpSupportChecker::HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
bool is_quantized_op = IsQuantizedOp(node_unit);
|
||||
bool is_pow = node_unit.OpType() == "Pow";
|
||||
if (!is_qlinear_add && !is_pow)
|
||||
return BaseOpSupportChecker::HasSupportedInputsImpl(node_unit);
|
||||
if (!is_quantized_op && !is_pow)
|
||||
return BaseOpSupportChecker::HasSupportedInputOutputsImpl(initializers, node_unit, params);
|
||||
|
||||
if (is_qlinear_add) {
|
||||
// QLinearAdd
|
||||
if (!HasValidBinaryOpQuantizedInputs(node_unit))
|
||||
if (is_quantized_op) {
|
||||
// QLinearAdd/QDQAdd/QLinearMul/QDQMul
|
||||
if (!HasValidBinaryOpQuantizedInputTypes(node_unit))
|
||||
return false;
|
||||
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0, 1}, params, IOKind::Input))
|
||||
return false;
|
||||
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Output))
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -320,11 +554,10 @@ bool BinaryOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) c
|
|||
return true;
|
||||
}
|
||||
|
||||
bool BinaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
bool BinaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
const auto& op_type(node_unit.OpType());
|
||||
const auto& inputs = node_unit.Inputs();
|
||||
bool op_is_qlinear = op_type == "QLinearAdd";
|
||||
Shape input1_shape, input2_shape;
|
||||
if (!GetShape(inputs[0].node_arg, input1_shape) ||
|
||||
!GetShape(inputs[1].node_arg, input2_shape))
|
||||
|
|
@ -339,32 +572,6 @@ bool BinaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
|
|||
return false;
|
||||
}
|
||||
|
||||
if (op_is_qlinear) {
|
||||
// For QLinearAdd, we only support uint8 output now
|
||||
int32_t output_type;
|
||||
if (!GetType(node_unit.Outputs()[0].node_arg, output_type))
|
||||
return false;
|
||||
|
||||
if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
|
||||
LOGS_DEFAULT(VERBOSE) << "[" << op_type
|
||||
<< "] output type: [" << output_type
|
||||
<< "] is not supported for now";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check input scales and ZPs
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0, 1}, params, true /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0, 1}, true /* is_input */))
|
||||
return false;
|
||||
|
||||
// Check output scale and ZP
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -382,7 +589,9 @@ class TransposeOpSupportChecker : public BaseOpSupportChecker {
|
|||
return ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
|
||||
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
|
||||
bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
};
|
||||
|
||||
bool TransposeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
|
|
@ -401,7 +610,9 @@ bool TransposeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /*
|
|||
return true;
|
||||
}
|
||||
|
||||
bool TransposeOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const {
|
||||
bool TransposeOpSupportChecker::HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
int32_t input_type;
|
||||
if (!GetType(node_unit.Inputs()[0].node_arg, input_type))
|
||||
return false;
|
||||
|
|
@ -561,8 +772,10 @@ class PoolOpSupportChecker : public BaseOpSupportChecker {
|
|||
return params.use_nchw ? ANEURALNETWORKS_FEATURE_LEVEL_3 : ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
|
||||
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
|
||||
bool IsNodeUnitTypeSupported(const NodeUnit& /* node_unit */) const override { return true; }
|
||||
bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
bool IsNodeUnitTypeSupported(const NodeUnit& /* node_unit */) const override;
|
||||
static bool IsQuantizedOp(const NodeUnit& node_unit);
|
||||
};
|
||||
|
||||
|
|
@ -579,12 +792,21 @@ class PoolOpSupportChecker : public BaseOpSupportChecker {
|
|||
});
|
||||
}
|
||||
|
||||
bool PoolOpSupportChecker::IsNodeUnitTypeSupported(const NodeUnit& node_unit) const {
|
||||
if (node_unit.UnitType() == NodeUnit::Type::QDQGroup) {
|
||||
const auto quant_type = GetQuantizedOpType(node_unit);
|
||||
return quant_type == QuantizedOpType::QDQAveragePool;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */ bool PoolOpSupportChecker::IsQuantizedOp(const NodeUnit& node_unit) {
|
||||
return IsQuantizedPool(GetQuantizedOpType(node_unit));
|
||||
}
|
||||
|
||||
bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
const auto& op_name = node_unit.Name();
|
||||
const auto& op_type = node_unit.OpType();
|
||||
const auto& inputs = node_unit.Inputs();
|
||||
|
|
@ -601,7 +823,8 @@ bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
|
|||
}
|
||||
|
||||
bool is_quant_pool = IsQuantizedOp(node_unit);
|
||||
if (op_type == "AveragePool" || op_type == "MaxPool" || op_type == "QLinearAveragePool") {
|
||||
bool is_average_pool = op_type == "AveragePool" || op_type == "QLinearAveragePool";
|
||||
if (is_average_pool || op_type == "MaxPool") {
|
||||
NodeAttrHelper helper(node_unit);
|
||||
|
||||
const auto count_include_pad = helper.Get("count_include_pad", 0);
|
||||
|
|
@ -642,20 +865,7 @@ bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
|
|||
}
|
||||
|
||||
// We need to check if we have valid scales and zero points for QLinearAveragePool
|
||||
if (is_quant_pool) {
|
||||
// Check input scales and ZPs
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */))
|
||||
return false;
|
||||
|
||||
// Check output scale and ZP
|
||||
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
|
||||
return false;
|
||||
|
||||
if (is_average_pool && is_quant_pool) {
|
||||
// NNAPI requires Quantized Average Pool has same scale and zero point for both input and output
|
||||
float input_scale = 0.0f;
|
||||
int32_t input_zp = 0;
|
||||
|
|
@ -697,14 +907,23 @@ bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
|
|||
return true;
|
||||
}
|
||||
|
||||
bool PoolOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const {
|
||||
bool is_max_pool = node_unit.OpType() == "MaxPool";
|
||||
bool PoolOpSupportChecker::HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
const auto& op_type = node_unit.OpType();
|
||||
bool is_quant_pool = IsQuantizedOp(node_unit);
|
||||
if (!is_max_pool && !is_quant_pool)
|
||||
return BaseOpSupportChecker::HasSupportedInputsImpl(node_unit);
|
||||
bool is_max_pool = op_type == "MaxPool";
|
||||
bool is_average_pool = op_type == "AveragePool" || op_type == "QLinearAveragePool";
|
||||
bool is_quant_average_pool = is_quant_pool && is_average_pool;
|
||||
if (!is_max_pool && !is_quant_average_pool)
|
||||
return BaseOpSupportChecker::HasSupportedInputOutputsImpl(initializers, node_unit, params);
|
||||
|
||||
if (is_quant_pool) {
|
||||
return HasValidUnaryOpQuantizedInputs(node_unit);
|
||||
if (is_quant_average_pool) {
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Input))
|
||||
return false;
|
||||
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Output))
|
||||
return false;
|
||||
}
|
||||
|
||||
// is_max_pool
|
||||
|
|
@ -742,7 +961,9 @@ class ConvOpSupportChecker : public BaseOpSupportChecker {
|
|||
return params.use_nchw ? ANEURALNETWORKS_FEATURE_LEVEL_3 : ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
|
||||
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
|
||||
bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& /* params */) const override;
|
||||
bool IsNodeUnitTypeSupported(const NodeUnit& /* node_unit */) const override { return true; }
|
||||
static bool IsQuantizedOp(const NodeUnit& node_unit);
|
||||
};
|
||||
|
|
@ -761,12 +982,20 @@ class ConvOpSupportChecker : public BaseOpSupportChecker {
|
|||
return IsQuantizedConv(GetQuantizedOpType(node_unit));
|
||||
}
|
||||
|
||||
bool ConvOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const {
|
||||
bool ConvOpSupportChecker::HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
if (!IsQuantizedOp(node_unit))
|
||||
return BaseOpSupportChecker::HasSupportedInputsImpl(node_unit);
|
||||
return BaseOpSupportChecker::HasSupportedInputOutputsImpl(initializers, node_unit, params);
|
||||
|
||||
// QLinearConv only supports input of uint8 for now
|
||||
if (!HasValidBinaryOpQuantizedInputs(node_unit))
|
||||
if (!HasValidBinaryOpQuantizedInputTypes(node_unit))
|
||||
return false;
|
||||
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0, 1}, params, IOKind::Input))
|
||||
return false;
|
||||
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Output))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
@ -813,34 +1042,10 @@ bool ConvOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
|
|||
}
|
||||
|
||||
if (is_quant_conv) {
|
||||
// For QLinearConv, we only support uint8 output now
|
||||
int32_t output_type;
|
||||
if (!GetType(node_unit.Outputs()[0].node_arg, output_type))
|
||||
return false;
|
||||
|
||||
if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
|
||||
LOGS_DEFAULT(VERBOSE) << "[" << op_type
|
||||
<< "] output type: [" << output_type
|
||||
<< "] is not supported for now";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inputs.size() > 2 && !Contains(initializers, inputs[2].node_arg.Name())) {
|
||||
LOGS_DEFAULT(VERBOSE) << "Bias of QLinearConv must be known";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check input scales and ZPs
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0, 1}, params, true /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0, 1}, true /* is_input */))
|
||||
return false;
|
||||
|
||||
// Check output scale and ZP
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -931,16 +1136,26 @@ class GemmOpSupportChecker : public BaseOpSupportChecker {
|
|||
private:
|
||||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
|
||||
bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& /* params */) const override;
|
||||
int GetMinSupportedOpSet(const NodeUnit& node_unit) const override;
|
||||
};
|
||||
|
||||
bool GemmOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const {
|
||||
bool GemmOpSupportChecker::HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
if (node_unit.OpType() != "QLinearMatMul")
|
||||
return BaseOpSupportChecker::HasSupportedInputsImpl(node_unit);
|
||||
return BaseOpSupportChecker::HasSupportedInputOutputsImpl(initializers, node_unit, params);
|
||||
|
||||
// QLinearMatMul
|
||||
if (!HasValidBinaryOpQuantizedInputs(node_unit))
|
||||
if (!HasValidBinaryOpQuantizedInputTypes(node_unit))
|
||||
return false;
|
||||
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0, 1}, params, IOKind::Input))
|
||||
return false;
|
||||
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Output))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
@ -1077,33 +1292,6 @@ bool GemmOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initial
|
|||
LOGS_DEFAULT(VERBOSE) << "B of MatMul must be known";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_qlinear_matmul) {
|
||||
// For QLinearMatMul, we only support uint8 output now
|
||||
int32_t output_type;
|
||||
if (!GetType(node_unit.Outputs()[0].node_arg, output_type))
|
||||
return false;
|
||||
|
||||
if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
|
||||
LOGS_DEFAULT(VERBOSE) << "[" << op_type
|
||||
<< "] output type: [" << output_type
|
||||
<< "] is not supported for now";
|
||||
return false;
|
||||
}
|
||||
|
||||
// All scale/zero points are initializer scalars
|
||||
// Check input scales and ZPs
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0, 1}, params, true /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0, 1}, true /* is_input */))
|
||||
return false;
|
||||
|
||||
// Check output scale and ZP
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
LOGS_DEFAULT(VERBOSE) << "GemmOpSupportChecker, unknown op: " << op_type;
|
||||
}
|
||||
|
|
@ -1127,7 +1315,9 @@ class UnaryOpSupportChecker : public BaseOpSupportChecker {
|
|||
int32_t GetMinSupportedNNAPIFeatureLevel(const NodeUnit& /* node_unit */,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
|
||||
bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& /* params */) const override;
|
||||
|
||||
int GetMinSupportedOpSet(const NodeUnit& node_unit) const override;
|
||||
|
||||
|
|
@ -1176,12 +1366,20 @@ int32_t UnaryOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const NodeUnit&
|
|||
return ANEURALNETWORKS_FEATURE_LEVEL_1;
|
||||
}
|
||||
|
||||
bool UnaryOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const {
|
||||
bool UnaryOpSupportChecker::HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
// We only need to override input check for QLinearSigmoid
|
||||
if (node_unit.OpType() != "QLinearSigmoid")
|
||||
return BaseOpSupportChecker::HasSupportedInputsImpl(node_unit);
|
||||
return BaseOpSupportChecker::HasSupportedInputOutputsImpl(initializers, node_unit, params);
|
||||
|
||||
return HasValidUnaryOpQuantizedInputs(node_unit);
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Input))
|
||||
return false;
|
||||
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Output))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// All ops except "Sin" opset 5- uses consumed_inputs attribute which is not supported for now
|
||||
|
|
@ -1195,24 +1393,11 @@ int UnaryOpSupportChecker::GetMinSupportedOpSet(const NodeUnit& node_unit) const
|
|||
}
|
||||
|
||||
/* static */ bool UnaryOpSupportChecker::IsQuantizedOpSupported(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& params) {
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit, const OpSupportCheckParams& /* params */) {
|
||||
const auto& op_type = node_unit.OpType();
|
||||
ORT_ENFORCE(op_type == "QLinearSigmoid");
|
||||
|
||||
const auto& op_name = node_unit.Name();
|
||||
|
||||
// Check input scales and ZPs
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */))
|
||||
return false;
|
||||
|
||||
// Check output scale and ZP
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
|
||||
return false;
|
||||
|
||||
// NNAPI requires the scale be 1.f/256 and zero point to be 0
|
||||
// See https://android.googlesource.com/platform/frameworks/ml/+/refs/heads/android10-c2f2-release/nn/common/operations/Activation.cpp#180
|
||||
float output_scale = 0.0f;
|
||||
|
|
@ -1249,7 +1434,9 @@ class ConcatOpSupportChecker : public BaseOpSupportChecker {
|
|||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
|
||||
bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& /* params */) const override;
|
||||
};
|
||||
|
||||
bool ConcatOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
|
|
@ -1268,7 +1455,9 @@ bool ConcatOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* in
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ConcatOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const {
|
||||
bool ConcatOpSupportChecker::HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& /* params */) const {
|
||||
int32_t input_type;
|
||||
if (!GetType(node_unit.Inputs()[0].node_arg, input_type))
|
||||
return false;
|
||||
|
|
@ -1331,37 +1520,17 @@ bool SqueezeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& init
|
|||
|
||||
class QuantizeLinearOpSupportChecker : public BaseOpSupportChecker {
|
||||
private:
|
||||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const NodeUnit& /* node_unit */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_3;
|
||||
}
|
||||
};
|
||||
|
||||
bool QuantizeLinearOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
int32_t output_type;
|
||||
if (!GetType(node_unit.Outputs()[0].node_arg, output_type))
|
||||
return false;
|
||||
|
||||
if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
|
||||
LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType()
|
||||
<< "] output type: [" << output_type
|
||||
<< "] is not supported for now";
|
||||
return false;
|
||||
bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override {
|
||||
return IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Output);
|
||||
}
|
||||
|
||||
// For QuantizeLinear only output is quantized
|
||||
// Check output scale and ZP
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
#pragma endregion
|
||||
|
||||
|
|
@ -1369,42 +1538,17 @@ bool QuantizeLinearOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSe
|
|||
|
||||
class DequantizeLinearOpSupportChecker : public BaseOpSupportChecker {
|
||||
private:
|
||||
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override;
|
||||
|
||||
int32_t GetMinSupportedNNAPIFeatureLevel(const NodeUnit& /* node_unit */,
|
||||
const OpSupportCheckParams& /* params */) const override {
|
||||
return ANEURALNETWORKS_FEATURE_LEVEL_1;
|
||||
}
|
||||
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
|
||||
};
|
||||
|
||||
bool DequantizeLinearOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
// For DequantizeLinear only input is quantized
|
||||
// Check input scale and ZP
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DequantizeLinearOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const {
|
||||
int32_t input_type;
|
||||
if (!GetType(node_unit.Inputs()[0].node_arg, input_type))
|
||||
return false;
|
||||
|
||||
if (input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
|
||||
LOGS_DEFAULT(VERBOSE) << "[" << node_unit.OpType()
|
||||
<< "] Input type: [" << input_type
|
||||
<< "] is not supported for now";
|
||||
return false;
|
||||
bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const override {
|
||||
return IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Input);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
#pragma endregion
|
||||
|
||||
|
|
@ -1480,7 +1624,9 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker {
|
|||
// We only support Resize opset 11+ here
|
||||
int GetMinSupportedOpSet(const NodeUnit& /* node_unit */) const override { return 11; }
|
||||
|
||||
bool HasSupportedInputsImpl(const NodeUnit& node_unit) const override;
|
||||
bool HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& /* initializers */, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& /* params */) const override;
|
||||
bool IsNodeUnitTypeSupported(const NodeUnit& /* node_unit */) const override { return true; }
|
||||
static bool IsQuantizedOp(const NodeUnit& node_unit) ORT_MUST_USE_RESULT; // TODO, see if we want to move this to BaseOpBuilder
|
||||
};
|
||||
|
|
@ -1609,33 +1755,6 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
|
|||
}
|
||||
}
|
||||
|
||||
if (IsQuantizedOp(node_unit)) {
|
||||
// For QDQResize, we only support uint8 output now
|
||||
// TODO, add int8 support to NNAPI, and maybe move all the output type check into a virtual function
|
||||
// similar to HasSupportedInputsImpl
|
||||
int32_t output_type;
|
||||
if (!GetType(node_unit.Outputs()[0].node_arg, output_type))
|
||||
return false;
|
||||
|
||||
if (output_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
|
||||
LOGS_DEFAULT(VERBOSE) << "[Resize] output type: [" << output_type
|
||||
<< "] is not supported for now";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check input scales and ZPs
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, true /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, true /* is_input */))
|
||||
return false;
|
||||
|
||||
// Check output scale and ZP
|
||||
if (!HasValidQuantizationScales(initializers, node_unit, {0}, params, false /* is_input */))
|
||||
return false;
|
||||
if (!HasValidQuantizationZeroPoints(initializers, node_unit, {0}, false /* is_input */))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1653,7 +1772,9 @@ int32_t ResizeOpSupportChecker::GetMinSupportedNNAPIFeatureLevel(const NodeUnit&
|
|||
return ANEURALNETWORKS_FEATURE_LEVEL_2;
|
||||
}
|
||||
|
||||
bool ResizeOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) const {
|
||||
bool ResizeOpSupportChecker::HasSupportedInputOutputsImpl(
|
||||
const InitializedTensorSet& initializers, const NodeUnit& node_unit,
|
||||
const OpSupportCheckParams& params) const {
|
||||
int32_t input_type;
|
||||
if (!GetType(node_unit.Inputs()[0].node_arg, input_type))
|
||||
return false;
|
||||
|
|
@ -1666,6 +1787,14 @@ bool ResizeOpSupportChecker::HasSupportedInputsImpl(const NodeUnit& node_unit) c
|
|||
return false;
|
||||
}
|
||||
|
||||
if (IsQuantizedOp(node_unit)) {
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Input))
|
||||
return false;
|
||||
|
||||
if (!IsQuantizedIOSupported(initializers, node_unit, {0}, params, IOKind::Output))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1870,6 +1999,7 @@ static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() {
|
|||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Mul", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Pow", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearAdd", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearMul", BinaryOpSupportChecker);
|
||||
NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sub", BinaryOpSupportChecker);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -43,61 +43,87 @@ void RunQLinearMathTestFromFloat(
|
|||
const quantization::Params<T>& a_params,
|
||||
const std::vector<float>& b, const std::vector<int64_t>& b_shape_origin,
|
||||
const quantization::Params<T>& b_params,
|
||||
const quantization::Params<T>& c_params,
|
||||
bool input_b_is_initializer = false,
|
||||
bool all_initializer_scale_zero_point = false) {
|
||||
size_t number_dims = std::max(a_shape_origin.size(), b_shape_origin.size());
|
||||
std::vector<int64_t> a_shape = PrefixingDims(a_shape_origin, number_dims);
|
||||
std::vector<int64_t> b_shape = PrefixingDims(b_shape_origin, number_dims);
|
||||
// calc broadcasting shaped
|
||||
std::vector<int64_t> c_shape(number_dims, 1);
|
||||
for (size_t axis = 0; axis < number_dims; ++axis) {
|
||||
if (a_shape[axis] != b_shape[axis] && (a_shape[axis] != 1 && b_shape[axis] != 1)) {
|
||||
ORT_THROW("Shapes can not be broadcasted");
|
||||
}
|
||||
c_shape[axis] = std::max(a_shape[axis], b_shape[axis]);
|
||||
}
|
||||
|
||||
std::vector<int64_t> a_strides, b_strides, c_strides;
|
||||
auto c_size = CalcStrides(c_shape, c_strides, false);
|
||||
auto a_size = CalcStrides(a_shape, a_strides, true);
|
||||
auto b_size = CalcStrides(b_shape, b_strides, true);
|
||||
if (a_size != static_cast<int64_t>(a.size()) || b_size != static_cast<int64_t>(b.size())) {
|
||||
ORT_THROW("Input size not match input shape!");
|
||||
}
|
||||
constexpr int qmax = std::numeric_limits<T>::max();
|
||||
constexpr int qmin = std::numeric_limits<T>::min();
|
||||
|
||||
OpTester test(op_name, 1, onnxruntime::kMSDomain);
|
||||
std::vector<T> a_quantized = QuantizeTestVector<T>(a, a_params);
|
||||
test.template AddInput<T>("A", a_shape_origin, a_quantized);
|
||||
test.AddInput<float>("A_scale", {}, {a_params.scale}, all_initializer_scale_zero_point);
|
||||
test.template AddInput<T>("A_zero_point", {}, {a_params.zero_point}, all_initializer_scale_zero_point);
|
||||
|
||||
std::vector<T> b_quantized = QuantizeTestVector<T>(b, b_params);
|
||||
test.template AddInput<T>("B", b_shape_origin, b_quantized, input_b_is_initializer);
|
||||
test.AddInput<float>("B_scale", {}, {b_params.scale}, all_initializer_scale_zero_point);
|
||||
test.template AddInput<T>("B_zero_point", {}, {b_params.zero_point}, all_initializer_scale_zero_point);
|
||||
|
||||
test.AddInput<float>("C_scale", {}, {c_params.scale}, all_initializer_scale_zero_point);
|
||||
test.template AddInput<T>("C_zero_point", {}, {c_params.zero_point}, all_initializer_scale_zero_point);
|
||||
std::vector<T> c(c_size);
|
||||
for (int64_t offset = 0; offset < c_size; ++offset) {
|
||||
int64_t remain = offset, a_offset = 0, b_offset = 0;
|
||||
const quantization::Params<T>& c_params) {
|
||||
const auto run_test = [&](bool input_b_is_initializer,
|
||||
bool all_initializer_scale_zero_point) {
|
||||
size_t number_dims = std::max(a_shape_origin.size(), b_shape_origin.size());
|
||||
std::vector<int64_t> a_shape = PrefixingDims(a_shape_origin, number_dims);
|
||||
std::vector<int64_t> b_shape = PrefixingDims(b_shape_origin, number_dims);
|
||||
// calc broadcasting shaped
|
||||
std::vector<int64_t> c_shape(number_dims, 1);
|
||||
for (size_t axis = 0; axis < number_dims; ++axis) {
|
||||
int64_t index = remain / c_strides[axis];
|
||||
remain = remain % c_strides[axis];
|
||||
a_offset += index * a_strides[axis];
|
||||
b_offset += index * b_strides[axis];
|
||||
if (a_shape[axis] != b_shape[axis] && (a_shape[axis] != 1 && b_shape[axis] != 1)) {
|
||||
ORT_THROW("Shapes can not be broadcasted");
|
||||
}
|
||||
c_shape[axis] = std::max(a_shape[axis], b_shape[axis]);
|
||||
}
|
||||
|
||||
float a_dequantized = quantization::Dequantize(a_quantized[a_offset], a_params);
|
||||
float b_dequantized = quantization::Dequantize(b_quantized[b_offset], b_params);
|
||||
c[offset] = clampi<T>(static_cast<int>(std::nearbyintf(calc(a_dequantized, b_dequantized) / c_params.scale)) + c_params.zero_point, qmin, qmax);
|
||||
}
|
||||
test.template AddOutput<T>("C", c_shape, c);
|
||||
std::vector<int64_t> a_strides, b_strides, c_strides;
|
||||
auto c_size = CalcStrides(c_shape, c_strides, false);
|
||||
auto a_size = CalcStrides(a_shape, a_strides, true);
|
||||
auto b_size = CalcStrides(b_shape, b_strides, true);
|
||||
if (a_size != static_cast<int64_t>(a.size()) || b_size != static_cast<int64_t>(b.size())) {
|
||||
ORT_THROW("Input size not match input shape!");
|
||||
}
|
||||
constexpr int qmax = std::numeric_limits<T>::max();
|
||||
constexpr int qmin = std::numeric_limits<T>::min();
|
||||
|
||||
test.Run();
|
||||
OpTester test(op_name, 1, onnxruntime::kMSDomain);
|
||||
std::vector<T> a_quantized = QuantizeTestVector<T>(a, a_params);
|
||||
test.template AddInput<T>("A", a_shape_origin, a_quantized);
|
||||
test.AddInput<float>("A_scale", {}, {a_params.scale}, all_initializer_scale_zero_point);
|
||||
test.template AddInput<T>("A_zero_point", {}, {a_params.zero_point}, all_initializer_scale_zero_point);
|
||||
|
||||
std::vector<T> b_quantized = QuantizeTestVector<T>(b, b_params);
|
||||
test.template AddInput<T>("B", b_shape_origin, b_quantized, input_b_is_initializer);
|
||||
test.AddInput<float>("B_scale", {}, {b_params.scale}, all_initializer_scale_zero_point);
|
||||
test.template AddInput<T>("B_zero_point", {}, {b_params.zero_point}, all_initializer_scale_zero_point);
|
||||
|
||||
test.AddInput<float>("C_scale", {}, {c_params.scale}, all_initializer_scale_zero_point);
|
||||
test.template AddInput<T>("C_zero_point", {}, {c_params.zero_point}, all_initializer_scale_zero_point);
|
||||
std::vector<T> c(c_size);
|
||||
for (int64_t offset = 0; offset < c_size; ++offset) {
|
||||
int64_t remain = offset, a_offset = 0, b_offset = 0;
|
||||
for (size_t axis = 0; axis < number_dims; ++axis) {
|
||||
int64_t index = remain / c_strides[axis];
|
||||
remain = remain % c_strides[axis];
|
||||
a_offset += index * a_strides[axis];
|
||||
b_offset += index * b_strides[axis];
|
||||
}
|
||||
|
||||
float a_dequantized = quantization::Dequantize(a_quantized[a_offset], a_params);
|
||||
float b_dequantized = quantization::Dequantize(b_quantized[b_offset], b_params);
|
||||
c[offset] = clampi<T>(static_cast<int>(std::nearbyintf(calc(a_dequantized, b_dequantized) / c_params.scale)) + c_params.zero_point, qmin, qmax);
|
||||
}
|
||||
|
||||
float abs_error = 0.0f;
|
||||
|
||||
// For quantized models, NNAPI's rounding is different than CPU provider
|
||||
// Sometimes the result is within +/-1 of result of CPU provider
|
||||
// For ONNX, we use rounding to nearest ties to even.
|
||||
// For NNAPI, it is using std::round which is HALF_AWAY_FROM_ZERO, see
|
||||
// https://android.googlesource.com/platform/frameworks/ml/+/refs/heads/master/nn/common/operations/Quantize.cpp
|
||||
// Use 1 as abs_error which is the smallest possbile for uint8_t
|
||||
//
|
||||
// NOTE, for now the tolerance will only apply if the NNAPI is actually used,
|
||||
// if for any reason the execution falls back to CPU, we still expect an exact match
|
||||
// See, 'void Check<uint8_t>(...' in onnxruntime/test/providers/provider_test_utils.cc
|
||||
#ifdef USE_NNAPI
|
||||
abs_error = 1.0f;
|
||||
#endif
|
||||
|
||||
test.template AddOutput<T>("C", c_shape, c, false /* sort_output */, 0.0f /* rel_error */, abs_error);
|
||||
|
||||
test.Run();
|
||||
};
|
||||
|
||||
run_test(false /* input_b_is_initializer */, false /* all_initializer_scale_zero_point */);
|
||||
|
||||
// NNAPI will require all the scales and zero points be initializers
|
||||
run_test(false /* input_b_is_initializer */, true /* all_initializer_scale_zero_point */);
|
||||
|
||||
// We also want to test the case input B is an initializer
|
||||
run_test(true /* input_b_is_initializer */, true /* all_initializer_scale_zero_point */);
|
||||
}
|
||||
|
||||
// total 32 + 31 elements to cover all path
|
||||
|
|
@ -145,22 +171,6 @@ TEST(QLinearBinaryOpTest, AddU8VectorVectorFull) {
|
|||
A, {63}, A_params,
|
||||
B, {63}, B_params,
|
||||
C_params);
|
||||
|
||||
// NNAPI will require all the scales and zero points be initializers
|
||||
// We also want to test the case input B is an initializer
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
A, {63}, A_params,
|
||||
B, {63}, B_params,
|
||||
C_params,
|
||||
false /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
A, {63}, A_params,
|
||||
B, {63}, B_params,
|
||||
C_params,
|
||||
true /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
}
|
||||
|
||||
TEST(QLinearBinaryOpTest, AddU8VectorVectorBroadcast) {
|
||||
|
|
@ -180,22 +190,6 @@ TEST(QLinearBinaryOpTest, AddU8VectorVectorBroadcast) {
|
|||
A, {3, 3, 7}, A_params,
|
||||
B, {3, 1, 7}, B_params,
|
||||
C_params);
|
||||
|
||||
// NNAPI will require all the scales and zero points be initializers
|
||||
// We also want to test the case input B is an initializer
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
A, {3, 3, 7}, A_params,
|
||||
B, {3, 1, 7}, B_params,
|
||||
C_params,
|
||||
false /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
A, {3, 3, 7}, A_params,
|
||||
B, {3, 1, 7}, B_params,
|
||||
C_params,
|
||||
true /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
}
|
||||
|
||||
TEST(QLinearBinaryOpTest, AddU8ScalarVectorFull) {
|
||||
|
|
@ -212,22 +206,6 @@ TEST(QLinearBinaryOpTest, AddU8ScalarVectorFull) {
|
|||
B, {1}, B_params,
|
||||
A, {63}, A_params,
|
||||
C_params);
|
||||
|
||||
// NNAPI will require all the scales and zero points be initializers
|
||||
// We also want to test the case input B is an initializer
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
B, {1}, B_params,
|
||||
A, {63}, A_params,
|
||||
C_params,
|
||||
false /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
B, {1}, B_params,
|
||||
A, {63}, A_params,
|
||||
C_params,
|
||||
true /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
}
|
||||
|
||||
TEST(QLinearBinaryOpTest, AddU8ScalarVectorBroadcast) {
|
||||
|
|
@ -244,22 +222,6 @@ TEST(QLinearBinaryOpTest, AddU8ScalarVectorBroadcast) {
|
|||
B, {3, 1, 1}, B_params,
|
||||
A, {3, 7, 3}, A_params,
|
||||
C_params);
|
||||
|
||||
// NNAPI will require all the scales and zero points be initializers
|
||||
// We also want to test the case input B is an initializer
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
B, {3, 1, 1}, B_params,
|
||||
A, {3, 7, 3}, A_params,
|
||||
C_params,
|
||||
false /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
B, {3, 1, 1}, B_params,
|
||||
A, {3, 7, 3}, A_params,
|
||||
C_params,
|
||||
true /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
}
|
||||
|
||||
TEST(QLinearBinaryOpTest, AddU8VectorScalarFull) {
|
||||
|
|
@ -276,22 +238,6 @@ TEST(QLinearBinaryOpTest, AddU8VectorScalarFull) {
|
|||
A, {63}, A_params,
|
||||
B, {1}, B_params,
|
||||
C_params);
|
||||
|
||||
// NNAPI will require all the scales and zero points be initializers
|
||||
// We also want to test the case input B is an initializer
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
A, {63}, A_params,
|
||||
B, {1}, B_params,
|
||||
C_params,
|
||||
false /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
A, {63}, A_params,
|
||||
B, {1}, B_params,
|
||||
C_params,
|
||||
true /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
}
|
||||
|
||||
TEST(QLinearBinaryOpTest, AddU8VectorScalarBroadcast) {
|
||||
|
|
@ -308,22 +254,6 @@ TEST(QLinearBinaryOpTest, AddU8VectorScalarBroadcast) {
|
|||
A, {3, 7, 3}, A_params,
|
||||
B, {1, 1, 3}, B_params,
|
||||
C_params);
|
||||
|
||||
// NNAPI will require all the scales and zero points be initializers
|
||||
// We also want to test the case input B is an initializer
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
A, {3, 7, 3}, A_params,
|
||||
B, {1, 1, 3}, B_params,
|
||||
C_params,
|
||||
false /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
|
||||
RunQLinearMathTestFromFloat("QLinearAdd", add_function,
|
||||
A, {3, 7, 3}, A_params,
|
||||
B, {1, 1, 3}, B_params,
|
||||
C_params,
|
||||
true /* input_b_is_initializer */,
|
||||
true /* all_initializer_scale_zero_point */);
|
||||
}
|
||||
|
||||
TEST(QLinearBinaryOpTest, AddS8VectorVectorFull) {
|
||||
|
|
|
|||
|
|
@ -81,10 +81,27 @@ GetQDQTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, c
|
|||
template <typename InputType, typename OutputType>
|
||||
GetQDQTestCaseFn BuildQDQAveragePoolTestCase(const std::vector<int64_t>& input_shape) {
|
||||
return [input_shape](ModelTestBuilder& builder) {
|
||||
|
||||
#ifdef USE_NNAPI // NNAPI require consistent scales/ZPs for DQ -> Pool -> Q
|
||||
float dq_scale = 0.0038f;
|
||||
float pool_output_scale = 0.0038f;
|
||||
float q_scale = 0.0038f;
|
||||
InputType dq_zp = std::numeric_limits<OutputType>::max() / 2;
|
||||
InputType pool_output_zp = std::numeric_limits<OutputType>::max() / 2;
|
||||
InputType q_zp = std::numeric_limits<OutputType>::max() / 2;
|
||||
#else
|
||||
float dq_scale = 0.0035f;
|
||||
float pool_output_scale = 0.0038f;
|
||||
float q_scale = 0.0039f;
|
||||
InputType dq_zp = 7;
|
||||
InputType pool_output_zp = std::numeric_limits<OutputType>::max() / 2;
|
||||
InputType q_zp = std::numeric_limits<OutputType>::max() / 2;
|
||||
#endif
|
||||
|
||||
auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
|
||||
auto* output_arg = builder.MakeOutput();
|
||||
// add QDQ + AveragePool
|
||||
auto* dq_output = AddQDQNodePair<InputType>(builder, input_arg, .0035f, 7);
|
||||
auto* dq_output = AddQDQNodePair<InputType>(builder, input_arg, dq_scale, dq_zp);
|
||||
auto* averagepool_output = builder.MakeIntermediate();
|
||||
Node& pool_node = builder.AddNode("AveragePool", {dq_output}, {averagepool_output});
|
||||
std::vector<int64_t> pads((input_shape.size() - 2) * 2, 1);
|
||||
|
|
@ -95,12 +112,12 @@ GetQDQTestCaseFn BuildQDQAveragePoolTestCase(const std::vector<int64_t>& input_s
|
|||
// add QDQ output
|
||||
auto* q_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<OutputType>(averagepool_output,
|
||||
.0038f,
|
||||
std::numeric_limits<OutputType>::max() / 2,
|
||||
pool_output_scale,
|
||||
pool_output_zp,
|
||||
q_output);
|
||||
builder.AddDequantizeLinearNode<OutputType>(q_output,
|
||||
.0039f,
|
||||
std::numeric_limits<OutputType>::max() / 2,
|
||||
q_scale,
|
||||
q_zp,
|
||||
output_arg);
|
||||
};
|
||||
}
|
||||
|
|
@ -110,5 +127,65 @@ GetQDQTestCaseFn BuildQDQResizeTestCase(const std::vector<int64_t>& input_shape,
|
|||
const std::string& mode = "nearest",
|
||||
const std::string& coordinate_transformation_mode = "half_pixel");
|
||||
|
||||
template <typename Input1Type, typename Input2Type, typename OutputType>
|
||||
GetQDQTestCaseFn BuildBinaryOpTestCase(const std::vector<int64_t>& input_shape,
|
||||
const std::string& op_type) {
|
||||
return [input_shape, op_type](ModelTestBuilder& builder) {
|
||||
auto* input1_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
|
||||
auto* input2_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
|
||||
auto* output_arg = builder.MakeOutput();
|
||||
|
||||
#ifdef USE_NNAPI // NNAPI require consistent scales for DQ -> bin_op_input and bin_op_output-> Q
|
||||
float q_scale = 0.008f;
|
||||
float op_input_scale = 0.008f;
|
||||
float op_output_scale = 0.0076f;
|
||||
float dq_scale = 0.0076f;
|
||||
#else
|
||||
float q_scale = 0.008f;
|
||||
float op_input_scale = 0.0079f;
|
||||
float op_output_scale = 0.0076f;
|
||||
float dq_scale = 0.0078f;
|
||||
#endif
|
||||
|
||||
// add QDQ 1
|
||||
auto* q1_output = builder.MakeIntermediate();
|
||||
auto* dq1_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<Input1Type>(input1_arg,
|
||||
q_scale,
|
||||
std::numeric_limits<Input1Type>::max() / 2,
|
||||
q1_output);
|
||||
builder.AddDequantizeLinearNode<Input1Type>(q1_output,
|
||||
op_input_scale,
|
||||
std::numeric_limits<Input1Type>::max() / 2,
|
||||
dq1_output);
|
||||
|
||||
// add QDQ 2
|
||||
auto* q2_output = builder.MakeIntermediate();
|
||||
auto* dq2_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<Input2Type>(input2_arg,
|
||||
q_scale,
|
||||
std::numeric_limits<Input2Type>::max() / 2,
|
||||
q2_output);
|
||||
builder.AddDequantizeLinearNode<Input2Type>(q2_output,
|
||||
op_input_scale,
|
||||
std::numeric_limits<Input2Type>::max() / 2,
|
||||
dq2_output);
|
||||
|
||||
// add binary operator
|
||||
auto* binary_op_output = builder.MakeIntermediate();
|
||||
builder.AddNode(op_type, {dq1_output, dq2_output}, {binary_op_output});
|
||||
|
||||
// add QDQ output
|
||||
auto* q3_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<OutputType>(binary_op_output,
|
||||
op_output_scale,
|
||||
std::numeric_limits<OutputType>::max() / 2,
|
||||
q3_output);
|
||||
builder.AddDequantizeLinearNode<OutputType>(q3_output,
|
||||
dq_scale,
|
||||
std::numeric_limits<OutputType>::max() / 2,
|
||||
output_arg);
|
||||
};
|
||||
}
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -39,7 +39,7 @@ namespace test {
|
|||
template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
|
||||
void QDQTransformerConvTests() {
|
||||
auto test_case = [&](const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
|
||||
auto check_conv_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
if constexpr (std::is_same<InputType, OutputType>::value &&
|
||||
std::is_same<BiasType, int32_t>::value &&
|
||||
|
|
@ -57,7 +57,7 @@ void QDQTransformerConvTests() {
|
|||
};
|
||||
|
||||
TransformerTester(BuildQDQConvTestCase<InputType, WeightType, BiasType, OutputType>(input_shape, weights_shape),
|
||||
check_conv_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
12 /*opset_version*/,
|
||||
|
|
@ -136,7 +136,7 @@ TEST(QDQTransformerTests, ConvMaxPoolReshape_UInt8) {
|
|||
builder.AddQuantizeLinearNode<uint8_t>(reshape_output, .0039f, 135, output_arg);
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QLinearConv"], 1);
|
||||
EXPECT_EQ(op_to_count["MaxPool"], 1);
|
||||
|
|
@ -146,7 +146,7 @@ TEST(QDQTransformerTests, ConvMaxPoolReshape_UInt8) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
opset_version);
|
||||
|
|
@ -197,7 +197,7 @@ TEST(QDQTransformerTests, ConvMaxPoolReshape_Int8) {
|
|||
}
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QLinearConv"], 1);
|
||||
EXPECT_EQ(op_to_count["MaxPool"], 1);
|
||||
|
|
@ -206,7 +206,7 @@ TEST(QDQTransformerTests, ConvMaxPoolReshape_Int8) {
|
|||
EXPECT_EQ(op_to_count["DequantizeLinear"], 0);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_mp_reshape_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
};
|
||||
|
||||
test_case({1, 12, 37}, {32, 12, 5});
|
||||
|
|
@ -217,7 +217,7 @@ TEST(QDQTransformerTests, ConvMaxPoolReshape_Int8) {
|
|||
template <typename InputType, typename OutputType>
|
||||
void QDQTransformerAveragePoolTests() {
|
||||
auto test_case = [&](const std::vector<int64_t>& input_shape) {
|
||||
auto check_averagepool_op_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
if constexpr (std::is_same<InputType, OutputType>::value) {
|
||||
EXPECT_EQ(op_to_count["com.microsoft.QLinearAveragePool"], 1);
|
||||
|
|
@ -233,7 +233,7 @@ void QDQTransformerAveragePoolTests() {
|
|||
};
|
||||
|
||||
TransformerTester(BuildQDQAveragePoolTestCase<InputType, OutputType>(input_shape),
|
||||
check_averagepool_op_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
12 /*opset_version*/,
|
||||
|
|
@ -266,52 +266,7 @@ TEST(QDQTransformerTests, AveragePool_U8S8) {
|
|||
template <typename Input1Type, typename Input2Type, typename OutputType>
|
||||
void QDQTransformerBinaryOpTests(const std::string& op_type) {
|
||||
auto test_case = [&](const std::vector<int64_t>& input_shape) {
|
||||
auto build_test_case = [&](ModelTestBuilder& builder) {
|
||||
auto* input1_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
|
||||
auto* input2_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
|
||||
auto* output_arg = builder.MakeOutput();
|
||||
|
||||
// add QDQ 1
|
||||
auto* q1_output = builder.MakeIntermediate();
|
||||
auto* dq1_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<Input1Type>(input1_arg,
|
||||
.004f,
|
||||
std::numeric_limits<Input1Type>::max() / 2,
|
||||
q1_output);
|
||||
builder.AddDequantizeLinearNode<Input1Type>(q1_output,
|
||||
.0039f,
|
||||
std::numeric_limits<Input1Type>::max() / 2,
|
||||
dq1_output);
|
||||
|
||||
// add QDQ 2
|
||||
auto* q2_output = builder.MakeIntermediate();
|
||||
auto* dq2_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<Input2Type>(input2_arg,
|
||||
.004f,
|
||||
std::numeric_limits<Input2Type>::max() / 2,
|
||||
q2_output);
|
||||
builder.AddDequantizeLinearNode<Input2Type>(q2_output,
|
||||
.0039f,
|
||||
std::numeric_limits<Input2Type>::max() / 2,
|
||||
dq2_output);
|
||||
|
||||
// add binary operator
|
||||
auto* binary_op_output = builder.MakeIntermediate();
|
||||
builder.AddNode(op_type, {dq1_output, dq2_output}, {binary_op_output});
|
||||
|
||||
// add QDQ output
|
||||
auto* q3_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<OutputType>(binary_op_output,
|
||||
.0038f,
|
||||
std::numeric_limits<OutputType>::max() / 2,
|
||||
q3_output);
|
||||
builder.AddDequantizeLinearNode<OutputType>(q3_output,
|
||||
.0039f,
|
||||
std::numeric_limits<OutputType>::max() / 2,
|
||||
output_arg);
|
||||
};
|
||||
|
||||
auto check_binary_op_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
if (std::is_same<Input1Type, Input2Type>::value &&
|
||||
std::is_same<Input1Type, OutputType>::value) {
|
||||
|
|
@ -327,8 +282,8 @@ void QDQTransformerBinaryOpTests(const std::string& op_type) {
|
|||
}
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_binary_op_graph,
|
||||
TransformerTester(BuildBinaryOpTestCase<Input1Type, Input2Type, OutputType>(input_shape, op_type),
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
12 /*opset_version*/,
|
||||
|
|
@ -426,7 +381,7 @@ void QDQTransformerMatMulTests(bool has_output_q) {
|
|||
}
|
||||
};
|
||||
|
||||
auto check_binary_op_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
if (has_output_q) {
|
||||
if constexpr (std::is_same<Input1Type, OutputType>::value &&
|
||||
|
|
@ -459,7 +414,7 @@ void QDQTransformerMatMulTests(bool has_output_q) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_binary_op_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
12 /*opset_version*/,
|
||||
|
|
@ -696,14 +651,14 @@ TEST(QDQTransformerTests, Gather) {
|
|||
builder.AddQuantizeLinearNode<int8_t>(gather_output, .003f, 1, output_arg);
|
||||
};
|
||||
|
||||
auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["Gather"], 1);
|
||||
EXPECT_EQ(op_to_count["QuantizeLinear"], 0);
|
||||
EXPECT_EQ(op_to_count["DequantizeLinear"], 0);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_matmul_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
};
|
||||
|
||||
test_case({12, 37}, {24, 12});
|
||||
|
|
@ -728,14 +683,14 @@ TEST(QDQTransformerTests, Transpose) {
|
|||
builder.AddQuantizeLinearNode<int8_t>(transpose_output, .003f, 1, output_arg);
|
||||
};
|
||||
|
||||
auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["Transpose"], 1);
|
||||
EXPECT_EQ(op_to_count["QuantizeLinear"], 0);
|
||||
EXPECT_EQ(op_to_count["DequantizeLinear"], 0);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_matmul_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
};
|
||||
|
||||
test_case({2, 13, 12, 37}, {0, 3, 1, 2});
|
||||
|
|
@ -760,13 +715,13 @@ TEST(QDQTransformerTests, Transpose_No_Fusion) {
|
|||
builder.AddQuantizeLinearNode<int8_t>(transpose_output, .003f, 1, output_arg);
|
||||
};
|
||||
|
||||
auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QuantizeLinear"], 1);
|
||||
EXPECT_EQ(op_to_count["DequantizeLinear"], 1);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_matmul_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
};
|
||||
|
||||
test_case({2, 13, 12, 37}, {0, 3, 1, 2});
|
||||
|
|
@ -775,7 +730,7 @@ TEST(QDQTransformerTests, Transpose_No_Fusion) {
|
|||
TEST(QDQTransformerTests, Resize) {
|
||||
auto test_case = [&](const std::vector<int64_t>& input1_shape,
|
||||
const std::vector<int64_t>& sizes_shape) {
|
||||
auto check_resize_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["Resize"], 1);
|
||||
EXPECT_EQ(op_to_count["QuantizeLinear"], 0);
|
||||
|
|
@ -783,7 +738,7 @@ TEST(QDQTransformerTests, Resize) {
|
|||
};
|
||||
|
||||
TransformerTester(BuildQDQResizeTestCase(input1_shape, sizes_shape),
|
||||
check_resize_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -828,7 +783,7 @@ TEST(QDQTransformerTests, Resize_No_Fusion) {
|
|||
builder.AddQuantizeLinearNode<uint8_t>(resize_output, .003f, 1, output_arg);
|
||||
};
|
||||
|
||||
auto check_qdq_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["Resize"], 1);
|
||||
EXPECT_EQ(op_to_count["Concat"], 1);
|
||||
|
|
@ -836,7 +791,7 @@ TEST(QDQTransformerTests, Resize_No_Fusion) {
|
|||
EXPECT_EQ(op_to_count["DequantizeLinear"], 1);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_qdq_graph,
|
||||
TransformerTester(build_test_case, check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -867,7 +822,7 @@ TEST(QDQTransformerTests, ResizeReshape) {
|
|||
builder.AddNode("Reshape", {qdq_resize_output, reshape_shape}, {output_arg});
|
||||
};
|
||||
|
||||
auto check_qdq_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["Resize"], 1);
|
||||
EXPECT_EQ(op_to_count["Reshape"], 1);
|
||||
|
|
@ -875,7 +830,7 @@ TEST(QDQTransformerTests, ResizeReshape) {
|
|||
EXPECT_EQ(op_to_count["DequantizeLinear"], 1);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_qdq_graph,
|
||||
TransformerTester(build_test_case, check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -905,13 +860,13 @@ TEST(QDQTransformerTests, ArgMax) {
|
|||
argmax_node.AddAttribute("select_last_index", static_cast<int64_t>(select_last_index));
|
||||
};
|
||||
|
||||
auto check_argmax_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["ArgMax"], 1);
|
||||
EXPECT_EQ(op_to_count["DequantizeLinear"], 0);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_argmax_graph,
|
||||
TransformerTester(build_test_case, check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
/* opset_version */ 13);
|
||||
|
|
@ -939,14 +894,14 @@ TEST(QDQTransformerTests, QLinearMatMul) {
|
|||
builder.AddQuantizeLinearNode<uint8_t>(matmul_output, .0039f, 135, output_arg);
|
||||
};
|
||||
|
||||
auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QLinearMatMul"], 1);
|
||||
EXPECT_EQ(op_to_count["QuantizeLinear"], 2);
|
||||
EXPECT_EQ(op_to_count["DequantizeLinear"], 0);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_matmul_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
};
|
||||
|
||||
test_case({12, 37}, {37, 12});
|
||||
|
|
@ -970,7 +925,7 @@ TEST(QDQTransformerTests, MatMul_No_Fusion) {
|
|||
builder.AddQuantizeLinearNode<uint8_t>(matmul_output, .0039f, 135, output_arg);
|
||||
};
|
||||
|
||||
auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["MatMul"], 1);
|
||||
EXPECT_EQ(op_to_count["QLinearMatMul"], 0);
|
||||
|
|
@ -978,7 +933,7 @@ TEST(QDQTransformerTests, MatMul_No_Fusion) {
|
|||
EXPECT_EQ(op_to_count["DequantizeLinear"], 1);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_matmul_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
};
|
||||
|
||||
test_case({12, 37}, {37, 12});
|
||||
|
|
@ -1006,7 +961,7 @@ TEST(QDQTransformerTests, MatMul_1st_Input_Int8) {
|
|||
builder.AddQuantizeLinearNode<uint8_t>(matmul_output, .0039f, 135, output_arg);
|
||||
};
|
||||
|
||||
auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["MatMul"], 1);
|
||||
EXPECT_EQ(op_to_count["QLinearMatMul"], 0);
|
||||
|
|
@ -1014,7 +969,7 @@ TEST(QDQTransformerTests, MatMul_1st_Input_Int8) {
|
|||
EXPECT_EQ(op_to_count["DequantizeLinear"], 2);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_matmul_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
};
|
||||
|
||||
test_case({12, 37}, {37, 12});
|
||||
|
|
@ -1043,7 +998,7 @@ TEST(QDQTransformerTests, MatMulIntegerToFloat) {
|
|||
builder.AddNode("MatMul", {dq_output_1, dq_output_2}, {output_arg});
|
||||
};
|
||||
|
||||
auto check_matmul_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["com.microsoft.MatMulIntegerToFloat"], 1);
|
||||
EXPECT_EQ(op_to_count["QuantizeLinear"], 0);
|
||||
|
|
@ -1051,7 +1006,7 @@ TEST(QDQTransformerTests, MatMulIntegerToFloat) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_matmul_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
12 /*opset_version*/,
|
||||
|
|
@ -1086,7 +1041,7 @@ TEST(QDQTransformerTests, ConvRelu) {
|
|||
builder.AddQuantizeLinearNode<uint8_t>(relu_output, .0039f, is_zp_zero ? 0 : 1, output_arg);
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
if (is_zp_zero) {
|
||||
EXPECT_EQ(op_to_count["QLinearConv"], 1);
|
||||
|
|
@ -1104,7 +1059,7 @@ TEST(QDQTransformerTests, ConvRelu) {
|
|||
}
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case, check_mp_reshape_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2);
|
||||
};
|
||||
|
||||
test_case({1, 12, 37}, {32, 12, 5}, true);
|
||||
|
|
@ -1150,7 +1105,7 @@ TEST(QDQTransformerTests, ConvAveragePoolReshape_UInt8) {
|
|||
builder.AddDequantizeLinearNode<uint8_t>(q_output, .0035f, 135, output_arg);
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QLinearConv"], 1);
|
||||
EXPECT_EQ(op_to_count["com.microsoft.QLinearAveragePool"], 1);
|
||||
|
|
@ -1160,7 +1115,7 @@ TEST(QDQTransformerTests, ConvAveragePoolReshape_UInt8) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
12 /*opset_version*/,
|
||||
|
|
@ -1213,7 +1168,7 @@ TEST(QDQTransformerTests, ConvAveragePoolReshape_Int8) {
|
|||
}
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QLinearConv"], 1);
|
||||
EXPECT_EQ(op_to_count["com.microsoft.QLinearAveragePool"], 1);
|
||||
|
|
@ -1223,7 +1178,7 @@ TEST(QDQTransformerTests, ConvAveragePoolReshape_Int8) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
12 /*opset_version*/,
|
||||
|
|
@ -1277,7 +1232,7 @@ TEST(QDQTransformerTests, ConvAveragePoolReshape_Int8_Fail) {
|
|||
}
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["Conv"], 1);
|
||||
EXPECT_EQ(op_to_count["QLinearConv"], 0);
|
||||
|
|
@ -1288,7 +1243,7 @@ TEST(QDQTransformerTests, ConvAveragePoolReshape_Int8_Fail) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
12 /*opset_version*/,
|
||||
|
|
@ -1325,7 +1280,7 @@ void QDQTransformerLeakyReluTests() {
|
|||
output_arg);
|
||||
};
|
||||
|
||||
auto check_binary_op_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
if constexpr (std::is_same<InputType, OutputType>::value) {
|
||||
EXPECT_EQ(op_to_count["com.microsoft.QLinearLeakyRelu"], 1);
|
||||
|
|
@ -1341,7 +1296,7 @@ void QDQTransformerLeakyReluTests() {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_binary_op_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
12 /*opset_version*/,
|
||||
|
|
@ -1401,7 +1356,7 @@ TEST(QDQTransformerTests, ConvTranspose_QBackward) {
|
|||
}
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QLinearConv"], 1);
|
||||
EXPECT_EQ(op_to_count["Transpose"], 1);
|
||||
|
|
@ -1410,7 +1365,7 @@ TEST(QDQTransformerTests, ConvTranspose_QBackward) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -1461,7 +1416,7 @@ TEST(QDQTransformerTests, QBackward_MutilpleSteps) {
|
|||
}
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QLinearConv"], 1);
|
||||
EXPECT_EQ(op_to_count["MaxPool"], 1);
|
||||
|
|
@ -1472,7 +1427,7 @@ TEST(QDQTransformerTests, QBackward_MutilpleSteps) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -1512,7 +1467,7 @@ TEST(QDQTransformerTests, ConvTranspose_DQForward) {
|
|||
}
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QLinearConv"], 1);
|
||||
EXPECT_EQ(op_to_count["Transpose"], 1);
|
||||
|
|
@ -1521,7 +1476,7 @@ TEST(QDQTransformerTests, ConvTranspose_DQForward) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -1572,7 +1527,7 @@ TEST(QDQTransformerTests, DQForward_MutilpleSteps) {
|
|||
}
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QLinearConv"], 1);
|
||||
EXPECT_EQ(op_to_count["MaxPool"], 1);
|
||||
|
|
@ -1583,7 +1538,7 @@ TEST(QDQTransformerTests, DQForward_MutilpleSteps) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -1704,7 +1659,7 @@ TEST(QDQTransformerTests, Concat) {
|
|||
}
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&input_shapes, &has_input_float, &has_input_int8, &has_output_int8](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&input_shapes, &has_input_float, &has_input_int8, &has_output_int8](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
if (has_input_float || has_input_int8 || has_output_int8) {
|
||||
EXPECT_EQ(op_to_count["com.microsoft.QLinearConcat"], 0);
|
||||
|
|
@ -1716,7 +1671,7 @@ TEST(QDQTransformerTests, Concat) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
12 /*opset_version*/,
|
||||
|
|
@ -1763,7 +1718,7 @@ TEST(QDQTransformerTests, QDQPropagation_QDQCancelOut) {
|
|||
builder.AddNode("Reshape", {maxpool_output, reshape_shape}, {output_arg});
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["MaxPool"], 1);
|
||||
EXPECT_EQ(op_to_count["Reshape"], 1);
|
||||
|
|
@ -1773,7 +1728,7 @@ TEST(QDQTransformerTests, QDQPropagation_QDQCancelOut) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -1799,7 +1754,7 @@ TEST(QDQTransformerTests, QDQPropagation_QDQ_CancelOut_More) {
|
|||
builder.AddQuantizeLinearNode<uint8_t>(reshape_output, same_scale ? .004f : .0039f, same_zp ? 129 : 128, output_arg);
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["Reshape"], 1);
|
||||
EXPECT_EQ(op_to_count["QuantizeLinear"], same_scale && same_zp ? 1 : 2);
|
||||
|
|
@ -1807,7 +1762,7 @@ TEST(QDQTransformerTests, QDQPropagation_QDQ_CancelOut_More) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -1833,7 +1788,7 @@ TEST(QDQTransformerTests, QDQPropagation_Q_No_Parent) {
|
|||
builder.AddQuantizeLinearNode<uint8_t>(transpose_output, .0035f, 135, output_arg);
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
GraphViewer graph_viewer(session.GetGraph());
|
||||
const auto& node_topology_list = graph_viewer.GetNodesInTopologicalOrder();
|
||||
EXPECT_EQ(graph_viewer.GetNode(node_topology_list[0])->OpType(), "QuantizeLinear");
|
||||
|
|
@ -1841,7 +1796,7 @@ TEST(QDQTransformerTests, QDQPropagation_Q_No_Parent) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -1866,7 +1821,7 @@ TEST(QDQTransformerTests, QDQPropagation_DQ_No_Children) {
|
|||
transpose_node.AddAttribute("perm", perms);
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
GraphViewer graph_viewer(session.GetGraph());
|
||||
const auto& node_topology_list = graph_viewer.GetNodesInTopologicalOrder();
|
||||
|
|
@ -1875,7 +1830,7 @@ TEST(QDQTransformerTests, QDQPropagation_DQ_No_Children) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -1902,7 +1857,7 @@ TEST(QDQTransformerTests, QDQPropagation_Per_Layer_No_Propagation) {
|
|||
transpose_node.AddAttribute("perm", perms);
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
GraphViewer graph_viewer(session.GetGraph());
|
||||
const auto& node_topology_list = graph_viewer.GetNodesInTopologicalOrder();
|
||||
|
|
@ -1911,7 +1866,7 @@ TEST(QDQTransformerTests, QDQPropagation_Per_Layer_No_Propagation) {
|
|||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
@ -1935,14 +1890,14 @@ TEST(QDQTransformerTests, QDQPropagation_DQ_Q) {
|
|||
builder.AddQuantizeLinearNode<uint8_t>(dq_output, .0035f, 135, output_arg);
|
||||
};
|
||||
|
||||
auto check_mp_reshape_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto check_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
EXPECT_EQ(op_to_count["QuantizeLinear"], 1);
|
||||
EXPECT_EQ(op_to_count["DequantizeLinear"], 1);
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
check_mp_reshape_graph,
|
||||
check_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -271,7 +271,9 @@ TEST(NnapiExecutionProviderTest, TestNoShapeInputModel) {
|
|||
<< "No node should be taken by the NNAPI EP";
|
||||
}
|
||||
|
||||
static void RunQDQModelTest(const GetQDQTestCaseFn& build_test_case, const char* test_description) {
|
||||
static void RunQDQModelTest(const GetQDQTestCaseFn& build_test_case,
|
||||
const char* test_description,
|
||||
const EPVerificationParams& params = EPVerificationParams()) {
|
||||
onnxruntime::Model model(test_description, false, DefaultLoggingManager().DefaultLogger());
|
||||
Graph& graph = model.MainGraph();
|
||||
ModelTestBuilder helper(graph);
|
||||
|
|
@ -286,7 +288,7 @@ static void RunQDQModelTest(const GetQDQTestCaseFn& build_test_case, const char*
|
|||
#if defined(__ANDROID__)
|
||||
RunAndVerifyOutputsWithEP(model_data, "NnapiExecutionProviderTest.TestQDQModel",
|
||||
std::make_unique<NnapiExecutionProvider>(0),
|
||||
helper.feeds_);
|
||||
helper.feeds_, params);
|
||||
#else
|
||||
// test load only
|
||||
SessionOptions so;
|
||||
|
|
@ -306,7 +308,8 @@ TEST(NnapiExecutionProviderTest, TestQDQConv) {
|
|||
uint8_t /* OutputType */>(
|
||||
{1, 1, 5, 5} /*input_shape*/,
|
||||
{1, 1, 3, 3} /*weights_shape*/),
|
||||
"nnapi_qdq_test_graph_conv");
|
||||
"nnapi_qdq_test_graph_conv",
|
||||
{true /* verify_entire_graph_use_ep */});
|
||||
}
|
||||
|
||||
TEST(NnapiExecutionProviderTest, TestQDQResize) {
|
||||
|
|
@ -316,14 +319,44 @@ TEST(NnapiExecutionProviderTest, TestQDQResize) {
|
|||
{1, 3, 32, 32} /* sizes_data */,
|
||||
"linear" /* mode */,
|
||||
"asymmetric" /* coordinate_transformation_mode */),
|
||||
"nnapi_qdq_test_graph_resize");
|
||||
"nnapi_qdq_test_graph_resize",
|
||||
{true /* verify_entire_graph_use_ep */});
|
||||
}
|
||||
|
||||
TEST(NnapiExecutionProviderTest, TestQDQAveragePool) {
|
||||
// NNAPI use different rounding, which may cause ~1% difference in the result
|
||||
RunQDQModelTest(BuildQDQAveragePoolTestCase<uint8_t /* InputType */,
|
||||
uint8_t /* OutputType */>(
|
||||
{1, 3, 32, 32} /* input_shape */),
|
||||
"nnapi_qdq_test_graph_averagepool");
|
||||
"nnapi_qdq_test_graph_averagepool",
|
||||
{
|
||||
true /* verify_entire_graph_use_ep */,
|
||||
1e-2f /* fp32_abs_err */,
|
||||
});
|
||||
}
|
||||
|
||||
TEST(NnapiExecutionProviderTest, TestQDQAdd) {
|
||||
RunQDQModelTest(BuildBinaryOpTestCase<uint8_t /* Input1Type */,
|
||||
uint8_t /* Input2Type */,
|
||||
uint8_t /* OutputType */>(
|
||||
{1, 23, 13, 13} /* input_shape */,
|
||||
"Add" /* op_type */),
|
||||
"nnapi_qdq_test_graph_add",
|
||||
{true /* verify_entire_graph_use_ep */});
|
||||
}
|
||||
|
||||
TEST(NnapiExecutionProviderTest, TestQDQMul) {
|
||||
// NNAPI use different rounding, which may cause ~1% difference in the result
|
||||
RunQDQModelTest(BuildBinaryOpTestCase<uint8_t /* Input1Type */,
|
||||
uint8_t /* Input2Type */,
|
||||
uint8_t /* OutputType */>(
|
||||
{1, 23, 13, 13} /* input_shape */,
|
||||
"Mul" /* op_type */),
|
||||
"nnapi_qdq_test_graph_mul",
|
||||
{
|
||||
true /* verify_entire_graph_use_ep */,
|
||||
1e-2f /* fp32_abs_err */,
|
||||
});
|
||||
}
|
||||
|
||||
#endif // !(ORT_MINIMAL_BUILD)
|
||||
|
|
|
|||
|
|
@ -15,6 +15,18 @@ class Graph;
|
|||
|
||||
namespace test {
|
||||
|
||||
// struct to hold some verification params for RunAndVerifyOutputsWithEP
|
||||
struct EPVerificationParams {
|
||||
// Verify the entire graph is taken by the EP
|
||||
// if this is set to false, then will verify that at least one node is assigned to 'execution_provider'
|
||||
bool verify_entire_graph_use_ep{false};
|
||||
|
||||
// Some EP may use different rounding than ORT CPU EP, which may cause a bigger abs error than
|
||||
// the default of 1e-5f, especially for scenarios such as [Q -> Quantized op -> DQ]
|
||||
// Set this only if this is necessary
|
||||
float fp32_abs_err = 1e-5f;
|
||||
};
|
||||
|
||||
// return number of nodes in the Graph and any subgraphs that are assigned to the specified execution provider
|
||||
int CountAssignedNodes(const Graph& current_graph, const std::string& ep_type);
|
||||
|
||||
|
|
@ -23,13 +35,14 @@ int CountAssignedNodes(const Graph& current_graph, const std::string& ep_type);
|
|||
void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path,
|
||||
const char* log_id,
|
||||
std::unique_ptr<IExecutionProvider> execution_provider,
|
||||
const NameMLValMap& feeds);
|
||||
const NameMLValMap& feeds,
|
||||
const EPVerificationParams& params = EPVerificationParams());
|
||||
|
||||
// helper function that takes in model_data
|
||||
// used in nnapi qdq model tests
|
||||
void RunAndVerifyOutputsWithEP(const std::string& model_data,
|
||||
const char* log_id,
|
||||
std::unique_ptr<IExecutionProvider> execution_provider,
|
||||
const NameMLValMap& feeds);
|
||||
const NameMLValMap& feeds,
|
||||
const EPVerificationParams& params = EPVerificationParams());
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@ namespace onnxruntime {
|
|||
namespace test {
|
||||
static void VerifyOutputs(const std::vector<std::string>& output_names,
|
||||
const std::vector<OrtValue>& expected_fetches,
|
||||
const std::vector<OrtValue>& fetches) {
|
||||
const std::vector<OrtValue>& fetches,
|
||||
const EPVerificationParams& params) {
|
||||
ASSERT_EQ(expected_fetches.size(), fetches.size());
|
||||
|
||||
for (size_t i = 0, end = expected_fetches.size(); i < end; ++i) {
|
||||
|
|
@ -40,10 +41,8 @@ static void VerifyOutputs(const std::vector<std::string>& output_names,
|
|||
<< " mismatch for " << output_names[i];
|
||||
break;
|
||||
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
|
||||
constexpr float abs_err = 1e-5f;
|
||||
|
||||
EXPECT_THAT(ltensor.DataAsSpan<float>(),
|
||||
::testing::Pointwise(::testing::FloatNear(abs_err), rtensor.DataAsSpan<float>()));
|
||||
::testing::Pointwise(::testing::FloatNear(params.fp32_abs_err), rtensor.DataAsSpan<float>()));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
@ -72,16 +71,18 @@ int CountAssignedNodes(const Graph& current_graph, const std::string& ep_type) {
|
|||
|
||||
void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
|
||||
std::unique_ptr<IExecutionProvider> execution_provider,
|
||||
const NameMLValMap& feeds) {
|
||||
const NameMLValMap& feeds,
|
||||
const EPVerificationParams& params) {
|
||||
// read raw data from model provided by the model_path
|
||||
std::ifstream stream(model_path, std::ios::in | std::ios::binary);
|
||||
std::string model_data((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
|
||||
RunAndVerifyOutputsWithEP(model_data, log_id, std::move(execution_provider), feeds);
|
||||
RunAndVerifyOutputsWithEP(model_data, log_id, std::move(execution_provider), feeds, params);
|
||||
}
|
||||
|
||||
void RunAndVerifyOutputsWithEP(const std::string& model_data, const char* log_id,
|
||||
std::unique_ptr<IExecutionProvider> execution_provider,
|
||||
const NameMLValMap& feeds) {
|
||||
const NameMLValMap& feeds,
|
||||
const EPVerificationParams& params) {
|
||||
SessionOptions so;
|
||||
so.session_logid = log_id;
|
||||
RunOptions run_options;
|
||||
|
|
@ -122,12 +123,17 @@ void RunAndVerifyOutputsWithEP(const std::string& model_data, const char* log_id
|
|||
// make sure that some nodes are assigned to the EP, otherwise this test is pointless...
|
||||
const auto& graph2 = session_object2.GetGraph();
|
||||
auto ep_nodes = CountAssignedNodes(graph2, provider_type);
|
||||
ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type;
|
||||
if (params.verify_entire_graph_use_ep) {
|
||||
// Verify the entire graph is assigned to the EP
|
||||
ASSERT_EQ(ep_nodes, graph2.NumberOfNodes()) << "Not all nodes were assigned to " << provider_type;
|
||||
} else {
|
||||
ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type;
|
||||
}
|
||||
|
||||
// Run with EP and verify the result
|
||||
std::vector<OrtValue> fetches;
|
||||
ASSERT_STATUS_OK(session_object2.Run(run_options, feeds, output_names, &fetches));
|
||||
VerifyOutputs(output_names, expected_fetches, fetches);
|
||||
VerifyOutputs(output_names, expected_fetches, fetches, params);
|
||||
}
|
||||
|
||||
#if !defined(DISABLE_SPARSE_TENSORS)
|
||||
|
|
|
|||
Loading…
Reference in a new issue