[QNN EP] Update QNN SDK to 2.12 (#16750)

### Description
- Updates the default QNN SDK to 2.12 for CI pipelines
- Adds a disabled InstanceNormalization test for regression on QNN SDK
2.12
- Cleans up logs for unsupported ops.

### Motivation and Context
Test with the latest QNN SDK.
This commit is contained in:
Adrian Lizarraga 2023-07-20 16:22:14 -07:00 committed by GitHub
parent eaea34f8e2
commit a8c263f92c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 137 additions and 74 deletions

View file

@ -173,29 +173,33 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
// For Cast, need to call IsOpSupported (below) to validate input and output types.
// For other single non-qdq nodes, immediately return not supported.
if (node_unit.OpType() != "Cast") {
LOGS(logger, VERBOSE) << "Non-QDQ single node is not supported for NPU backend. Node name: " << node_unit.Name()
<< " Op type: " << node_unit.OpType();
LOGS(logger, WARNING) << "Non-QDQ " << node_unit.OpType()
<< " operators are not supported on HTP or DSP backends. " << node_unit.OpType()
<< " node `" << node_unit.Name() << " will not be assigned to QNN EP.";
return false;
}
}
// Non-NPU backend, quantized model not supported, but a QDQ node encountered
if (!is_npu_backend && IsQdqNode(node_unit)) {
LOGS(logger, ERROR) << "There's no reason to run a QDQ model on non HTP/DSP backend!";
LOGS(logger, ERROR) << "QDQ models are only supported on HTP or DSP backends. "
<< node_unit.OpType() << " node `" << node_unit.Name() << "` will not be assigned to QNN EP.";
return false;
}
bool supported = false;
const auto* op_builder = qnn::GetOpBuilder(node_unit.OpType());
if (op_builder == nullptr) {
LOGS(logger, VERBOSE) << "Op not implemented in QNN EP. Op type: " << node_unit.OpType();
LOGS(logger, WARNING) << "Operators of type `" << node_unit.OpType() << "` are not supported by QNN EP."
<< node_unit.OpType() << " node `" << node_unit.Name()
<< "` will not be assigned to QNN EP.";
} else {
auto status = op_builder->IsOpSupported(qnn_model_wrapper,
node_unit, logger,
is_npu_backend);
if (Status::OK() != status) {
LOGS(logger, VERBOSE) << "Op type: " << node_unit.OpType()
<< ", not supported: " << status.ErrorMessage();
LOGS(logger, WARNING) << node_unit.OpType() << " node `" << node_unit.Name()
<< "` is not supported: " << status.ErrorMessage();
}
supported = (Status::OK() == status);
}

View file

@ -91,49 +91,6 @@ GetQDQTestCaseFn BuildQDQConvTransposeTestCase(const std::vector<int64_t>& input
};
}
// Creates the graph:
// _______________________
// input_u8 -> DQ -> | | -> Q -> output_u8
// scale_u8 (initializer) -> DQ -> | InstanceNormalization |
// bias_u8 (initializer) -> DQ -> |_______________________|
//
// Currently used to test QNN EP.
template <typename InputQType, typename ScaleQType, typename BiasQType>
GetQDQTestCaseFn BuildQDQInstanceNormTestCase(const std::vector<int64_t>& input_shape, float epsilon) {
return [input_shape, epsilon](ModelTestBuilder& builder) {
const int64_t num_channels = input_shape[1];
const InputQType quant_zero_point = 0;
const float quant_scale = 1.0f;
auto* dq_scale_output = builder.MakeIntermediate();
auto* scale = builder.MakeInitializer<ScaleQType>({num_channels}, static_cast<ScaleQType>(0),
static_cast<ScaleQType>(127));
builder.AddDequantizeLinearNode<ScaleQType>(scale, quant_scale, quant_zero_point, dq_scale_output);
// Add bias (initializer) -> DQ ->
auto* dq_bias_output = builder.MakeIntermediate();
auto* bias = builder.MakeInitializer<BiasQType>({num_channels}, static_cast<BiasQType>(0),
static_cast<BiasQType>(4));
builder.AddDequantizeLinearNode<BiasQType>(bias, 1.0f, 0, dq_bias_output);
// Add input_u8 -> DQ ->
auto* input_u8 = builder.MakeInput<InputQType>(input_shape, static_cast<InputQType>(0),
static_cast<InputQType>(10));
auto* dq_input_output = builder.MakeIntermediate();
builder.AddDequantizeLinearNode<InputQType>(input_u8, quant_scale, quant_zero_point, dq_input_output);
// Add dq_input_output -> InstanceNormalization ->
auto* instance_norm_output = builder.MakeIntermediate();
Node& inst_norm_node = builder.AddNode("InstanceNormalization", {dq_input_output, dq_scale_output, dq_bias_output},
{instance_norm_output});
inst_norm_node.AddAttribute("epsilon", epsilon);
// Add instance_norm_output -> Q -> output_u8
auto* output_u8 = builder.MakeOutput();
builder.AddQuantizeLinearNode<InputQType>(instance_norm_output, quant_scale, quant_zero_point, output_u8);
};
}
// Creates the following graph:
// _______________________
// input (f32) -> Q -> DQ -> | | -> Q -> DQ -> output (f32)

View file

@ -5,6 +5,7 @@
#include <string>
#include "core/graph/graph.h"
#include "core/graph/node_attr_utils.h"
#include "test/optimizer/qdq_test_utils.h"
#include "test/providers/qnn/qnn_test_utils.h"
@ -15,15 +16,65 @@ namespace onnxruntime {
namespace test {
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
// Creates the graph:
// _______________________
// input_u8 -> DQ -> | | -> Q -> output_u8
// scale_u8 (initializer) -> DQ -> | InstanceNormalization |
// bias_u8 (initializer) -> DQ -> |_______________________|
//
// Currently used to test QNN EP.
template <typename QuantType>
GetQDQTestCaseFn BuildQDQInstanceNormTestCase(const TestInputDef<QuantType>& input_def,
const TestInputDef<QuantType>& scale_def,
const TestInputDef<int32_t>& bias_def,
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs) {
return [input_def, scale_def, bias_def, attrs](ModelTestBuilder& builder) {
const QuantType quant_zero_point = 0;
const float quant_scale = 1.0f;
auto* dq_scale_output = builder.MakeIntermediate();
auto* scale = MakeTestInput<QuantType>(builder, scale_def);
builder.AddDequantizeLinearNode<QuantType>(scale, quant_scale, quant_zero_point, dq_scale_output);
// Add bias (initializer) -> DQ ->
auto* dq_bias_output = builder.MakeIntermediate();
auto* bias = MakeTestInput<int32_t>(builder, bias_def);
builder.AddDequantizeLinearNode<int32_t>(bias, 1.0f, 0, dq_bias_output);
// Add input_u8 -> DQ ->
auto* input_u8 = MakeTestInput<QuantType>(builder, input_def);
auto* dq_input_output = builder.MakeIntermediate();
builder.AddDequantizeLinearNode<QuantType>(input_u8, quant_scale, quant_zero_point, dq_input_output);
// Add dq_input_output -> InstanceNormalization ->
auto* instance_norm_output = builder.MakeIntermediate();
Node& inst_norm_node = builder.AddNode("InstanceNormalization", {dq_input_output, dq_scale_output, dq_bias_output},
{instance_norm_output});
for (const auto& attr : attrs) {
inst_norm_node.AddAttributeProto(attr);
}
// Add instance_norm_output -> Q -> output_u8
auto* output_u8 = builder.MakeOutput();
builder.AddQuantizeLinearNode<QuantType>(instance_norm_output, quant_scale, quant_zero_point, output_u8);
};
}
/**
* Runs an InstanceNormalization model on the QNN HTP backend. Checks the graph node assignment, and that inference
* outputs for QNN and CPU match.
*
* \param input_shape The input's shape.
* \param epsilon The epsilon attribute.
* \param input_def The test input's definition (shape, is_initializer, data).
* \param scale_def The scale input's definition. Correct shapes must be 1D [num_input_channels].
* \param bias_def The bias input's definition. Correct shapes must be 1D [num_input_channels].
* \param attrs The node's attributes. The only valid attribute for InstanceNormalization is 'epsilon'.
* \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None).
*/
static void RunInstanceNormQDQTest(const std::vector<int64_t>& input_shape, float epsilon,
template <typename QuantType = uint8_t>
static void RunInstanceNormQDQTest(const TestInputDef<QuantType>& input_def,
const TestInputDef<QuantType>& scale_def,
const TestInputDef<int32_t>& bias_def,
const std::vector<ONNX_NAMESPACE::AttributeProto>& attrs,
ExpectedEPNodeAssignment expected_ep_assignment) {
ProviderOptions provider_options;
#if defined(_WIN32)
@ -33,7 +84,7 @@ static void RunInstanceNormQDQTest(const std::vector<int64_t>& input_shape, floa
#endif
// Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs.
RunQnnModelTest(BuildQDQInstanceNormTestCase<uint8_t, uint8_t, int32_t>(input_shape, epsilon),
RunQnnModelTest(BuildQDQInstanceNormTestCase<QuantType>(input_def, scale_def, bias_def, attrs),
provider_options,
18,
expected_ep_assignment);
@ -42,19 +93,43 @@ static void RunInstanceNormQDQTest(const std::vector<int64_t>& input_shape, floa
// Check that QNN compiles DQ -> InstanceNormalization -> Q as a single unit.
// Use an input of rank 4.
TEST_F(QnnHTPBackendTests, TestQDQInstanceNormU8) {
RunInstanceNormQDQTest({1, 2, 3, 3}, 1e-05f, ExpectedEPNodeAssignment::All);
RunInstanceNormQDQTest(TestInputDef<uint8_t>({1, 2, 3, 3}, false, 0, 255),
TestInputDef<uint8_t>({2}, true, 0, 127),
TestInputDef<int32_t>({2}, true, 0, 10),
{},
ExpectedEPNodeAssignment::All);
}
// Check that QNN compiles DQ -> InstanceNormalization -> Q as a single unit.
// Use an input of rank 3.
TEST_F(QnnHTPBackendTests, TestQDQInstanceNormU8Rank3) {
RunInstanceNormQDQTest({1, 2, 3}, 1e-05f, ExpectedEPNodeAssignment::All);
RunInstanceNormQDQTest(TestInputDef<uint8_t>({1, 2, 3}, false, {6, 4, 2, 6, 8, 2}),
TestInputDef<uint8_t>({2}, true, {1, 2}),
TestInputDef<int32_t>({2}, true, {1, 3}),
{},
ExpectedEPNodeAssignment::All);
}
// TODO: This test now fails in QNN SDK version 2.12.0 (windows arm64 and linux x86_64).
// This worked in QNN SDK version 2.10.0. Need to determine the severity of this inaccuracy.
//
// Exepcted output: 2 6 2 42 42 0
// Actual output: 2 6 2 43 43 0
TEST_F(QnnHTPBackendTests, DISABLED_TestQDQInstanceNormU8Rank3_QnnSdk_2_12_Regression) {
RunInstanceNormQDQTest(TestInputDef<uint8_t>({1, 2, 3}, false, {3, 4, 3, 9, 9, 8}),
TestInputDef<uint8_t>({2}, true, {2, 57}),
TestInputDef<int32_t>({2}, true, {3, 2}),
{},
ExpectedEPNodeAssignment::All);
}
// Check that QNN InstanceNorm operator does not handle inputs with rank > 4.
TEST_F(QnnHTPBackendTests, TestQDQInstanceNormU8Rank5) {
// No nodes should be assigned to QNN EP, and graph should have 5 (non-fused) nodes.
RunInstanceNormQDQTest({1, 2, 3, 3, 3}, 1e-05f, ExpectedEPNodeAssignment::None);
RunInstanceNormQDQTest(TestInputDef<uint8_t>({1, 2, 3, 3, 3}, false, 0, 255),
TestInputDef<uint8_t>({2}, true, 0, 127),
TestInputDef<int32_t>({2}, true, 0, 10),
{},
ExpectedEPNodeAssignment::None);
}
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)

View file

@ -63,7 +63,34 @@ static BackendSupport GetHTPSupport(const onnxruntime::logging::Logger& logger)
ModelTestBuilder helper(graph);
// Build simple QDQ graph: DQ -> InstanceNormalization -> Q
GetQDQTestCaseFn build_test_case = BuildQDQInstanceNormTestCase<uint8_t, uint8_t, int32_t>({1, 2, 3, 3}, 1e-05f);
GetQDQTestCaseFn build_test_case = [](ModelTestBuilder& builder) {
const uint8_t quant_zero_point = 0;
const float quant_scale = 1.0f;
auto* dq_scale_output = builder.MakeIntermediate();
auto* scale = builder.MakeInitializer<uint8_t>({2}, std::vector<uint8_t>{1, 2});
builder.AddDequantizeLinearNode<uint8_t>(scale, quant_scale, quant_zero_point, dq_scale_output);
// Add bias (initializer) -> DQ ->
auto* dq_bias_output = builder.MakeIntermediate();
auto* bias = builder.MakeInitializer<int32_t>({2}, std::vector<int32_t>{1, 1});
builder.AddDequantizeLinearNode<int32_t>(bias, 1.0f, 0, dq_bias_output);
// Add input_u8 -> DQ ->
auto* input_u8 = builder.MakeInput<uint8_t>({1, 2, 3}, std::vector<uint8_t>{1, 2, 3, 4, 5, 6});
auto* dq_input_output = builder.MakeIntermediate();
builder.AddDequantizeLinearNode<uint8_t>(input_u8, quant_scale, quant_zero_point, dq_input_output);
// Add dq_input_output -> InstanceNormalization ->
auto* instance_norm_output = builder.MakeIntermediate();
builder.AddNode("InstanceNormalization", {dq_input_output, dq_scale_output, dq_bias_output},
{instance_norm_output});
// Add instance_norm_output -> Q -> output_u8
auto* output_u8 = builder.MakeOutput();
builder.AddQuantizeLinearNode<uint8_t>(instance_norm_output, quant_scale, quant_zero_point, output_u8);
};
build_test_case(helper);
helper.SetGraphOutputs();
auto status = model.MainGraph().Resolve();

View file

@ -3,7 +3,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK version
type: string
default: qnn-v2.10.0.230425122932_54038
default: qnn-v2.12.0.230626
jobs:
- job: Build_QNN_EP

View file

@ -3,7 +3,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK version
type: string
default: qnn-v2.10.0.230425122932_54038
default: qnn-v2.12.0.230626
jobs:
- job: Build_QNN_EP
@ -57,7 +57,7 @@ jobs:
inputs:
script: |
./build/Release/onnx_test_runner -e qnn \
-v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/target/x86_64-linux-clang/lib/libQnnCpu.so" \
-v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/lib/x86_64-linux-clang/libQnnCpu.so" \
cmake/external/onnx/onnx/backend/test/data/node
- task: CmdLine@2
@ -65,7 +65,7 @@ jobs:
inputs:
script: |
./build/Release/onnx_test_runner -e qnn \
-v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/target/x86_64-linux-clang/lib/libQnnCpu.so" \
-v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/lib/x86_64-linux-clang/libQnnCpu.so" \
/data/float32_models
- task: CmdLine@2
@ -73,7 +73,7 @@ jobs:
inputs:
script: |
./build/Release/onnx_test_runner -e qnn \
-v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/target/x86_64-linux-clang/lib/libQnnHtp.so" \
-v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/lib/x86_64-linux-clang/libQnnHtp.so" \
/data/qdq_models
- task: CmdLine@2
@ -81,7 +81,7 @@ jobs:
inputs:
script: |
./build/Release/onnx_test_runner -e qnn \
-v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/target/x86_64-linux-clang/lib/libQnnHtp.so qnn_context_cache_enable|1 qnn_context_cache_path|./build/Release/mobilenet_qdq.bin" \
-v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/lib/x86_64-linux-clang/libQnnHtp.so qnn_context_cache_enable|1 qnn_context_cache_path|./build/Release/mobilenet_qdq.bin" \
/data/qdq_models/mobilenetv2-1.0_add_transpose_quant
- task: CmdLine@2
@ -89,5 +89,5 @@ jobs:
inputs:
script: |
./build/Release/onnx_test_runner -e qnn \
-v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/target/x86_64-linux-clang/lib/libQnnHtp.so qnn_context_cache_enable|1 qnn_context_cache_path|./build/Release/mobilenet_qdq.bin" \
-v -j 1 -c 1 -i "backend_path|$(QNN_SDK_ROOT)/lib/x86_64-linux-clang/libQnnHtp.so qnn_context_cache_enable|1 qnn_context_cache_path|./build/Release/mobilenet_qdq.bin" \
/data/qdq_models/mobilenetv2-1.0_add_transpose_quant

View file

@ -2,12 +2,12 @@ parameters:
- name: qnn_sdk_path_win
displayName: QNN Windows SDK path
type: string
default: C:\data\qnnsdk\qnn-v2.10.0.230425122932_54038_win
default: C:\data\qnnsdk\qnn-v2.12.1.230626_win
- name: qnn_sdk_info
displayName: QNN SDK Version Information
type: string
default: qnn-v2.10.0.230425122932_54038
default: qnn-v2.12.1.230626_win
- name: ort_package_version
displayName: OnnxRuntime Nuget package version

View file

@ -3,7 +3,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK version
type: string
default: qnn-v2.10.0.230425122932_54038_win
default: qnn-v2.12.1.230626_win
jobs:
- job: 'build'
@ -55,17 +55,17 @@ jobs:
displayName: 'Run unit tests'
- script: |
.\$(BuildConfig)\onnx_test_runner -j 1 -c 1 -v -e qnn -i "backend_path|$(QNN_SDK_ROOT)\target\aarch64-windows-msvc\lib\QnnCpu.dll" $(Build.SourcesDirectory)\cmake\external\onnx\onnx\backend\test\data\node
.\$(BuildConfig)\onnx_test_runner -j 1 -c 1 -v -e qnn -i "backend_path|$(QNN_SDK_ROOT)\lib\aarch64-windows-msvc\QnnCpu.dll" $(Build.SourcesDirectory)\cmake\external\onnx\onnx\backend\test\data\node
workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
displayName: 'Run ONNX Tests'
- script: |
.\$(BuildConfig)\onnx_test_runner -j 1 -c 1 -v -e qnn -i "backend_path|$(QNN_SDK_ROOT)\target\aarch64-windows-msvc\lib\QnnCpu.dll" C:\data\float32_models
.\$(BuildConfig)\onnx_test_runner -j 1 -c 1 -v -e qnn -i "backend_path|$(QNN_SDK_ROOT)\lib\aarch64-windows-msvc\QnnCpu.dll" C:\data\float32_models
workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
displayName: 'Run float32 model tests'
- script: |
.\$(BuildConfig)\onnx_test_runner -j 1 -c 1 -v -e qnn -i "backend_path|$(QNN_SDK_ROOT)\target\aarch64-windows-msvc\lib\QnnHtp.dll" C:\data\qdq_models
.\$(BuildConfig)\onnx_test_runner -j 1 -c 1 -v -e qnn -i "backend_path|$(QNN_SDK_ROOT)\lib\aarch64-windows-msvc\QnnHtp.dll" C:\data\qdq_models
workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
displayName: 'Run QDQ model tests'
enabled: false

View file

@ -3,7 +3,7 @@ parameters:
- name: QnnSdk
displayName: QNN SDK version
type: string
default: qnn-v2.10.0.230425122932_54038_win
default: qnn-v2.12.1.230626_win
jobs:
- job: 'build'
@ -68,12 +68,12 @@ jobs:
displayName: 'Run unit tests'
- script: |
.\$(BuildConfig)\onnx_test_runner -j 1 -c 1 -v -e qnn -i "backend_path|$(QNN_SDK_ROOT)\target\x86_64-windows-msvc\lib\QnnCpu.dll" $(Build.SourcesDirectory)\cmake\external\onnx\onnx\backend\test\data\node
.\$(BuildConfig)\onnx_test_runner -j 1 -c 1 -v -e qnn -i "backend_path|$(QNN_SDK_ROOT)\lib\x86_64-windows-msvc\QnnCpu.dll" $(Build.SourcesDirectory)\cmake\external\onnx\onnx\backend\test\data\node
workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
displayName: 'Run ONNX Tests'
- script: |
.\$(BuildConfig)\onnx_test_runner -j 1 -c 1 -v -e qnn -i "backend_path|$(QNN_SDK_ROOT)\target\x86_64-windows-msvc\lib\QnnCpu.dll" C:\data\float32_models
.\$(BuildConfig)\onnx_test_runner -j 1 -c 1 -v -e qnn -i "backend_path|$(QNN_SDK_ROOT)\lib\x86_64-windows-msvc\QnnCpu.dll" C:\data\float32_models
workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
displayName: 'Run float32 model tests'