mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-16 21:00:14 +00:00
[QNN EP] Update to QNN SDK 2.22 (#20628)
### Description - Updates pipelines to use QNN SDK 2.22 by default. - Linux QNN pipeline now uses an Ubuntu 22.04 image (required by QNN SDK) - Android QNN pipeline still uses the current Ubuntu 20.04 image. Will update in a separate PR. - Disables QDQ LayerNorm test that triggers QNN's graph finalization error on QNN 2.22 - Increases accuracy tolerance for various HTP tests so that they pass on Windows arm64. ### Motivation and Context Test QNN EP with latest QNN SDK version by default. --------- Signed-off-by: adrianlizarraga <adlizarraga@microsoft.com>
This commit is contained in:
parent
df28c7d73b
commit
b5eb9e8a8a
21 changed files with 71 additions and 55 deletions
|
|
@ -1381,6 +1381,11 @@ std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider
|
|||
// expected 13.5 (41580000), got 0 (0), diff: 13.5, tol=0.0145 idx=3. 3 of 4 differ
|
||||
broken_tests->insert({"averagepool_2d_ceil", "result differs"});
|
||||
#endif
|
||||
// These next 3 Resize tests fail on CPU backend with QNN SDK 2.22.0 due to inaccuracy.
|
||||
// output=Y:expected 1 (3f800000), got 3 (40400000), diff: 2, tol=0.002 idx=24. 8 of 56 differ
|
||||
broken_tests->insert({"resize_upsample_sizes_nearest", "result differs"});
|
||||
broken_tests->insert({"resize_upsample_sizes_nearest_axes_2_3", "result differs"});
|
||||
broken_tests->insert({"resize_upsample_sizes_nearest_axes_3_2", "result differs"});
|
||||
}
|
||||
|
||||
#ifdef DISABLE_CONTRIB_OPS
|
||||
|
|
|
|||
|
|
@ -163,22 +163,15 @@ void RunMatMulTest(int32_t opset_version, bool is_a_constant, bool is_b_constant
|
|||
|
||||
// OpenVINO EP: Disabled temporarily matmul broadcasting not fully supported
|
||||
// Disable TensorRT because of unsupported data type
|
||||
std::unordered_set<std::string> excluded_providers{kTensorrtExecutionProvider, kOpenVINOExecutionProvider};
|
||||
// QNN EP: Crash during graph execution for QNN's CPU backend on QNN SDK 2.22. Not a problem for QNN's HTP backend.
|
||||
std::unordered_set<std::string> excluded_providers{kTensorrtExecutionProvider,
|
||||
kOpenVINOExecutionProvider,
|
||||
kQnnExecutionProvider};
|
||||
if (t.name == "test 2D empty input") {
|
||||
// NNAPI: currently fails for the "test 2D empty input" case
|
||||
excluded_providers.insert(kNnapiExecutionProvider);
|
||||
}
|
||||
|
||||
if ("test padding and broadcast A > B" == t.name || "test 2D empty input" == t.name) {
|
||||
// QNN can't handle 0 shap
|
||||
excluded_providers.insert(kQnnExecutionProvider);
|
||||
}
|
||||
#if defined(__linux__)
|
||||
if (t.name == "test padding and broadcast B > A") {
|
||||
// Accuracy error with QNN SDK 2.17.0 on CPU backend.
|
||||
excluded_providers.insert(kQnnExecutionProvider);
|
||||
}
|
||||
#endif
|
||||
test.ConfigExcludeEps(excluded_providers)
|
||||
.Config(run_with_tunable_op)
|
||||
.RunWithConfig();
|
||||
|
|
|
|||
|
|
@ -158,7 +158,8 @@ GetTestQDQModelFn<InputQType> BuildQDQBatchNormTestCase(const TestInputDef<float
|
|||
static void RunBatchNormQDQTest(const TestInputDef<float>& input_def,
|
||||
const TestInputDef<float>& scale_def,
|
||||
const TestInputDef<float>& bias_def,
|
||||
ExpectedEPNodeAssignment expected_ep_assignment) {
|
||||
ExpectedEPNodeAssignment expected_ep_assignment,
|
||||
QDQTolerance tolerance = QDQTolerance()) {
|
||||
ProviderOptions provider_options;
|
||||
#if defined(_WIN32)
|
||||
provider_options["backend_path"] = "QnnHtp.dll";
|
||||
|
|
@ -171,7 +172,8 @@ static void RunBatchNormQDQTest(const TestInputDef<float>& input_def,
|
|||
BuildQDQBatchNormTestCase<uint8_t, uint8_t, uint8_t>(input_def, scale_def, bias_def),
|
||||
provider_options,
|
||||
11,
|
||||
expected_ep_assignment);
|
||||
expected_ep_assignment,
|
||||
tolerance);
|
||||
}
|
||||
|
||||
static void RunBatchNormFP16Test(const TestInputDef<float>& input_def,
|
||||
|
|
@ -219,7 +221,9 @@ TEST_F(QnnHTPBackendTests, BatchNorm2D) {
|
|||
RunBatchNormQDQTest(TestInputDef<float>({2, num_channels, 2, 2}, false, input_data), // Input data
|
||||
TestInputDef<float>({num_channels}, true, {1.0f, 2.0f}), // Scale initializer
|
||||
TestInputDef<float>({num_channels}, true, {1.1f, 2.1f}), // Bias initializer
|
||||
ExpectedEPNodeAssignment::All);
|
||||
ExpectedEPNodeAssignment::All,
|
||||
// Require a slightly increased tolerance on Windows ARM64 (from 0.4% to 0.6%).
|
||||
QDQTolerance(0.006f));
|
||||
}
|
||||
|
||||
// Test FP16 BatchNormalization on the HTP backend.
|
||||
|
|
|
|||
|
|
@ -1626,8 +1626,8 @@ TEST_F(QnnHTPBackendTests, ConvU8U8S32_large_input1_padding_bias_initializer) {
|
|||
ExpectedEPNodeAssignment::All,
|
||||
false, // use_qdq_contrib_ops
|
||||
13, // opset
|
||||
// Need tolerance of 0.73% of output range after QNN SDK 2.17
|
||||
QDQTolerance(0.00730f));
|
||||
// Need tolerance of 0.76% of output range after QNN SDK 2.19.2
|
||||
QDQTolerance(0.0076f));
|
||||
}
|
||||
|
||||
TEST_F(QnnHTPBackendTests, ConvU8U8S32_large_input2_bias_initializer) {
|
||||
|
|
|
|||
|
|
@ -285,7 +285,8 @@ TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicInputs) {
|
|||
ExpectedEPNodeAssignment::All,
|
||||
13,
|
||||
false,
|
||||
QDQTolerance(0.00410f));
|
||||
// Require tolerance of 0.74% on Windows ARM64.
|
||||
QDQTolerance(0.0074f));
|
||||
}
|
||||
|
||||
TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_DynamicC) {
|
||||
|
|
@ -304,7 +305,8 @@ TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_DynamicC) {
|
|||
ExpectedEPNodeAssignment::All,
|
||||
13,
|
||||
false,
|
||||
QDQTolerance(0.00410f));
|
||||
// Require tolerance of 0.74% on Windows ARM64.
|
||||
QDQTolerance(0.0074f));
|
||||
}
|
||||
|
||||
TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_StaticC) {
|
||||
|
|
@ -323,7 +325,8 @@ TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_StaticC) {
|
|||
ExpectedEPNodeAssignment::All,
|
||||
13,
|
||||
false,
|
||||
QDQTolerance(0.00410f));
|
||||
// Require tolerance of 0.74% on Windows ARM64.
|
||||
QDQTolerance(0.0074f));
|
||||
}
|
||||
|
||||
// Test 16-bit QDQ Gemm with dynamic inputs A and Bias. The B input is an initializer.
|
||||
|
|
|
|||
|
|
@ -158,7 +158,20 @@ TEST_F(QnnHTPBackendTests, LayerNorm1D_LastAxis_StaticScale_AU16_WU8) {
|
|||
}
|
||||
|
||||
// Test accuracy of 8-bit QDQ LayerNorm with a dynamic scale input.
|
||||
TEST_F(QnnHTPBackendTests, LayerNorm1D_LastAxis_DynamicScale) {
|
||||
//
|
||||
// TODO(adrianlizarraga): Fails to finalize with QNN SDK 2.22.
|
||||
// Verbose logs:
|
||||
// Starting stage: Graph Transformations and Optimizations
|
||||
// C:\...\QNN\HTP\HTP\src\hexagon\prepare\graph_prepare.cc:203:ERROR:could not create op: q::flat_to_vtcm
|
||||
// C:\...\QNN\HTP\HTP\src\hexagon\prepare\graph_prepare.cc:1187:ERROR:Op 0x102800000013 preparation failed with err:-1
|
||||
// Completed stage: Graph Transformations and Optimizations (6247 us)
|
||||
// QnnDsp <E> "node_token_15" generated: could not create op
|
||||
// QnnDsp <E> RouterWindows graph prepare failed 12
|
||||
// QnnDsp <E> Failed to finalize graph (id: 1) with err 1002
|
||||
// QnnDsp <V> Wake up free backend 1 thread(s)
|
||||
// QnnDsp <I> QnnGraph_finalize done. status 0x3ea
|
||||
// Failed to finalize QNN graph.
|
||||
TEST_F(QnnHTPBackendTests, DISABLED_LayerNorm1D_LastAxis_DynamicScale) {
|
||||
RunLayerNormQDQTest<uint8_t, uint8_t>(TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(0.0f, 10.0f, 6)),
|
||||
TestInputDef<float>({3}, false, GetFloatDataInRange(0.0f, 1.0f, 3)), // Dynamic
|
||||
{utils::MakeAttribute("axis", static_cast<int64_t>(-1))}, // Last axis
|
||||
|
|
|
|||
|
|
@ -135,8 +135,8 @@ TEST_F(QnnHTPBackendTests, LRNSize3) {
|
|||
0.75f, // beta
|
||||
1.0f, // bias
|
||||
13, // opset
|
||||
// Need to use tolerance of 0.405% of output range after QNN SDK 2.17
|
||||
QDQTolerance(0.00405f));
|
||||
// Need to use tolerance of 0.8% of output range after QNN SDK 2.22
|
||||
QDQTolerance(0.008f));
|
||||
}
|
||||
|
||||
TEST_F(QnnHTPBackendTests, LRNSize5) {
|
||||
|
|
@ -147,8 +147,8 @@ TEST_F(QnnHTPBackendTests, LRNSize5) {
|
|||
0.75f, // beta
|
||||
1.0f, // bias
|
||||
13, // opset
|
||||
// Need to use tolerance of 0.407% of output range after QNN SDK 2.17
|
||||
QDQTolerance(0.00407f));
|
||||
// Need to use tolerance of 0.8% of output range after QNN SDK 2.22
|
||||
QDQTolerance(0.008f));
|
||||
}
|
||||
|
||||
TEST_F(QnnHTPBackendTests, LRN_size_larger_than_channel) {
|
||||
|
|
|
|||
|
|
@ -103,7 +103,8 @@ static void RunQDQMatMulOpOpTest(const TestInputDef<float>& input1_def,
|
|||
// CPU tests:
|
||||
//
|
||||
|
||||
TEST_F(QnnCPUBackendTests, MatMulOp) {
|
||||
// TODO: Crashes during QNN CPU execution (QNN SDK 2.22)
|
||||
TEST_F(QnnCPUBackendTests, DISABLED_MatMulOp) {
|
||||
RunMatMulOpOpTest(TestInputDef<float>({2, 3}, false, {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f}),
|
||||
TestInputDef<float>({3, 2}, false, {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f}),
|
||||
ExpectedEPNodeAssignment::All, 18);
|
||||
|
|
@ -126,13 +127,8 @@ TEST_F(QnnCPUBackendTests, DISABLED_MatMulOp_Broadcast) {
|
|||
ExpectedEPNodeAssignment::All, 18, 0.0004f);
|
||||
}
|
||||
|
||||
#if defined(__linux__)
|
||||
// TODO: Crashes during QNN CPU execution (QNN SDK 2.22)
|
||||
TEST_F(QnnCPUBackendTests, DISABLED_MatMulOp_PaddingAndBroadcast_BLargerThanA) {
|
||||
#else
|
||||
// TODO: When fixed, enable MathOpTest.MatMulFloatType from cpu/mat/matmul_test.cc
|
||||
// QNN SDK 2.17: Accuracy errors
|
||||
TEST_F(QnnCPUBackendTests, MatMulOp_PaddingAndBroadcast_BLargerThanA) {
|
||||
#endif
|
||||
std::vector<int64_t> input0_shape = {2, 3, 2};
|
||||
std::vector<int64_t> input1_shape = {3, 2, 2, 1};
|
||||
RunMatMulOpOpTest(TestInputDef<float>(input0_shape, false, GetSequentialFloatData(input0_shape)),
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ parameters:
|
|||
- name: QnnSdk
|
||||
displayName: QNN SDK version
|
||||
type: string
|
||||
default: 2.21.0.240401
|
||||
default: 2.22.0.240425
|
||||
|
||||
jobs:
|
||||
- job: Build_QNN_EP
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ parameters:
|
|||
- name: QnnSdk
|
||||
displayName: QNN SDK Version
|
||||
type: string
|
||||
default: 2.21.0.240401
|
||||
default: 2.22.0.240425
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
|
|
@ -743,4 +743,4 @@ stages:
|
|||
displayName: 'Publish Pipeline NuGet Artifact'
|
||||
inputs:
|
||||
artifactName: 'drop-signed-nuget-qnn'
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)/nuget-artifact-merged'
|
||||
targetPath: '$(Build.ArtifactStagingDirectory)/nuget-artifact-merged'
|
||||
|
|
|
|||
|
|
@ -32,11 +32,11 @@ parameters:
|
|||
- name: QnnSdk
|
||||
displayName: QNN SDK version
|
||||
type: string
|
||||
default: 2.21.0.240401
|
||||
default: 2.22.0.240425
|
||||
|
||||
jobs:
|
||||
- job: Build_QNN_EP
|
||||
pool: onnxruntime-qnn-ubuntu-2004-cpu
|
||||
pool: onnxruntime-qnn-ubuntu-2204-cpu
|
||||
timeoutInMinutes: 60
|
||||
workspace:
|
||||
clean: all
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ parameters:
|
|||
- name: qnn_sdk_version
|
||||
type: string
|
||||
displayName: 'QNN SDK version. Only for QNN packages.'
|
||||
default: 2.21.0.240401
|
||||
default: 2.22.0.240425
|
||||
|
||||
trigger: none
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ parameters:
|
|||
- name: QnnSdk
|
||||
displayName: QNN SDK Version
|
||||
type: string
|
||||
default: 2.21.0.240401
|
||||
default: 2.22.0.240425
|
||||
|
||||
- name: build_config
|
||||
displayName: Build Configuration
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
parameters:
|
||||
- name: QnnSDKVersion
|
||||
type: string
|
||||
default: '2.21.0.240401'
|
||||
default: '2.22.0.240425'
|
||||
|
||||
steps:
|
||||
- script: |
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
parameters:
|
||||
- name: QnnSDKVersion
|
||||
type: string
|
||||
default: '2.21.0.240401'
|
||||
default: '2.22.0.240425'
|
||||
|
||||
steps:
|
||||
- powershell: |
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ parameters:
|
|||
- name: qnn_sdk_version
|
||||
type: string
|
||||
displayName: 'QNN SDK version. Only for QNN packages.'
|
||||
default: 2.21.0.240401
|
||||
default: 2.22.0.240425
|
||||
|
||||
stages:
|
||||
- ${{ if eq(parameters.enable_windows_cpu, true) }}:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ parameters:
|
|||
- name: QNN_SDK
|
||||
displayName: QNN SDK Version
|
||||
type: string
|
||||
default: 2.21.0.240401
|
||||
default: 2.22.0.240425
|
||||
|
||||
- name: PYTHON_VERSION
|
||||
type: string
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ parameters:
|
|||
- name: QNN_SDK
|
||||
displayName: QNN SDK Version
|
||||
type: string
|
||||
default: 2.21.0.240401
|
||||
default: 2.22.0.240425
|
||||
|
||||
- name: ENV_SETUP_SCRIPT
|
||||
type: string
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
parameters:
|
||||
QnnSdk: '2.21.0.240401'
|
||||
QnnSdk: '2.22.0.240425'
|
||||
build_config: 'RelWithDebInfo'
|
||||
IsReleaseBuild: false
|
||||
DoEsrp: false
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ parameters:
|
|||
- name: QnnSdk
|
||||
displayName: QNN SDK version
|
||||
type: string
|
||||
default: 2.21.0.240401
|
||||
default: 2.22.0.240425
|
||||
|
||||
jobs:
|
||||
- job: 'build'
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ parameters:
|
|||
- name: QnnSdk
|
||||
displayName: QNN SDK version
|
||||
type: string
|
||||
default: 2.21.0.240401
|
||||
default: 2.22.0.240425
|
||||
|
||||
jobs:
|
||||
- job: 'build'
|
||||
|
|
@ -90,12 +90,14 @@ jobs:
|
|||
workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)'
|
||||
displayName: 'Run unit tests'
|
||||
|
||||
- script: |
|
||||
.\$(BuildConfig)\onnx_test_runner -j 1 -v -e qnn -i "backend_path|$(QnnSDKRootDir)\lib\x86_64-windows-msvc\QnnCpu.dll" $(Build.SourcesDirectory)\cmake\external\onnx\onnx\backend\test\data\node
|
||||
workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
|
||||
displayName: 'Run ONNX Tests'
|
||||
|
||||
- script: |
|
||||
.\$(BuildConfig)\onnx_test_runner -j 1 -v -e qnn -i "backend_path|$(QnnSDKRootDir)\lib\x86_64-windows-msvc\QnnCpu.dll" C:\data\float32_models
|
||||
workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
|
||||
displayName: 'Run float32 model tests'
|
||||
# Comment out QnnCpu tests because QNN SDK 2.22 CPU backend crashes when executing MatMuls.
|
||||
# Does not happen with HTP backend.
|
||||
# - script: |
|
||||
# .\$(BuildConfig)\onnx_test_runner -j 1 -v -e qnn -i "backend_path|$(QnnSDKRootDir)\lib\x86_64-windows-msvc\QnnCpu.dll" $(Build.SourcesDirectory)\cmake\external\onnx\onnx\backend\test\data\node
|
||||
# workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
|
||||
# displayName: 'Run ONNX Tests'
|
||||
#
|
||||
# - script: |
|
||||
# .\$(BuildConfig)\onnx_test_runner -j 1 -v -e qnn -i "backend_path|$(QnnSDKRootDir)\lib\x86_64-windows-msvc\QnnCpu.dll" C:\data\float32_models
|
||||
# workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
|
||||
# displayName: 'Run float32 model tests'
|
||||
|
|
|
|||
Loading…
Reference in a new issue