[QNN EP] Update to QNN SDK 2.22 (#20628)

### Description - Updates pipelines to use QNN SDK 2.22 by default. - Linux QNN pipeline now uses an Ubuntu 22.04 image (required by QNN SDK) - Android QNN pipeline still uses the current Ubuntu 20.04 image. Will update in a separate PR. - Disables QDQ LayerNorm test that triggers QNN's graph finalization error on QNN 2.22 - Increases accuracy tolerance for various HTP tests so that they pass on Windows arm64. ### Motivation and Context Test QNN EP with latest QNN SDK version by default. --------- Signed-off-by: adrianlizarraga <adlizarraga@microsoft.com>
2026-05-16 21:00:14 +00:00 · 2024-06-05 18:25:23 -07:00 · 2024-06-05 18:25:23 -07:00 · b5eb9e8a8a
commit b5eb9e8a8a
parent df28c7d73b
21 changed files with 71 additions and 55 deletions
--- a/onnxruntime/test/onnx/TestCase.cc
+++ b/onnxruntime/test/onnx/TestCase.cc
@ -1381,6 +1381,11 @@ std::unique_ptr<std::set<BrokenTest>> GetBrokenTests(const std::string& provider
    // expected 13.5 (41580000), got 0 (0), diff: 13.5, tol=0.0145 idx=3. 3 of 4 differ
    broken_tests->insert({"averagepool_2d_ceil", "result differs"});
 #endif
+    // These next 3 Resize tests fail on CPU backend with QNN SDK 2.22.0 due to inaccuracy.
+    // output=Y:expected 1 (3f800000), got 3 (40400000), diff: 2, tol=0.002 idx=24. 8 of 56 differ
+    broken_tests->insert({"resize_upsample_sizes_nearest", "result differs"});
+    broken_tests->insert({"resize_upsample_sizes_nearest_axes_2_3", "result differs"});
+    broken_tests->insert({"resize_upsample_sizes_nearest_axes_3_2", "result differs"});
  }

 #ifdef DISABLE_CONTRIB_OPS
--- a/onnxruntime/test/providers/cpu/math/matmul_test.cc
+++ b/onnxruntime/test/providers/cpu/math/matmul_test.cc
@ -163,22 +163,15 @@ void RunMatMulTest(int32_t opset_version, bool is_a_constant, bool is_b_constant

    // OpenVINO EP: Disabled temporarily matmul broadcasting not fully supported
    // Disable TensorRT because of unsupported data type
-    std::unordered_set<std::string> excluded_providers{kTensorrtExecutionProvider, kOpenVINOExecutionProvider};
+    // QNN EP: Crash during graph execution for QNN's CPU backend on QNN SDK 2.22. Not a problem for QNN's HTP backend.
+    std::unordered_set<std::string> excluded_providers{kTensorrtExecutionProvider,
+                                                       kOpenVINOExecutionProvider,
+                                                       kQnnExecutionProvider};
    if (t.name == "test 2D empty input") {
      // NNAPI: currently fails for the "test 2D empty input" case
      excluded_providers.insert(kNnapiExecutionProvider);
    }

-    if ("test padding and broadcast A > B" == t.name || "test 2D empty input" == t.name) {
-      // QNN can't handle 0 shap
-      excluded_providers.insert(kQnnExecutionProvider);
-    }
-#if defined(__linux__)
-    if (t.name == "test padding and broadcast B > A") {
-      // Accuracy error with QNN SDK 2.17.0 on CPU backend.
-      excluded_providers.insert(kQnnExecutionProvider);
-    }
-#endif
    test.ConfigExcludeEps(excluded_providers)
        .Config(run_with_tunable_op)
        .RunWithConfig();
--- a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
@ -158,7 +158,8 @@ GetTestQDQModelFn<InputQType> BuildQDQBatchNormTestCase(const TestInputDef<float
 static void RunBatchNormQDQTest(const TestInputDef<float>& input_def,
                                const TestInputDef<float>& scale_def,
                                const TestInputDef<float>& bias_def,
-                                ExpectedEPNodeAssignment expected_ep_assignment) {
+                                ExpectedEPNodeAssignment expected_ep_assignment,
+                                QDQTolerance tolerance = QDQTolerance()) {
  ProviderOptions provider_options;
 #if defined(_WIN32)
  provider_options["backend_path"] = "QnnHtp.dll";
@ -171,7 +172,8 @@ static void RunBatchNormQDQTest(const TestInputDef<float>& input_def,
                       BuildQDQBatchNormTestCase<uint8_t, uint8_t, uint8_t>(input_def, scale_def, bias_def),
                       provider_options,
                       11,
-                       expected_ep_assignment);
+                       expected_ep_assignment,
+                       tolerance);
 }

 static void RunBatchNormFP16Test(const TestInputDef<float>& input_def,
@ -219,7 +221,9 @@ TEST_F(QnnHTPBackendTests, BatchNorm2D) {
  RunBatchNormQDQTest(TestInputDef<float>({2, num_channels, 2, 2}, false, input_data),  // Input data
                      TestInputDef<float>({num_channels}, true, {1.0f, 2.0f}),          // Scale initializer
                      TestInputDef<float>({num_channels}, true, {1.1f, 2.1f}),          // Bias initializer
-                      ExpectedEPNodeAssignment::All);
+                      ExpectedEPNodeAssignment::All,
+                      // Require a slightly increased tolerance on Windows ARM64 (from 0.4% to 0.6%).
+                      QDQTolerance(0.006f));
 }

 // Test FP16 BatchNormalization on the HTP backend.
--- a/onnxruntime/test/providers/qnn/conv_test.cc
+++ b/onnxruntime/test/providers/qnn/conv_test.cc
@ -1626,8 +1626,8 @@ TEST_F(QnnHTPBackendTests, ConvU8U8S32_large_input1_padding_bias_initializer) {
                                     ExpectedEPNodeAssignment::All,
                                     false,  // use_qdq_contrib_ops
                                     13,     // opset
-                                     // Need tolerance of 0.73% of output range after QNN SDK 2.17
-                                     QDQTolerance(0.00730f));
+                                     // Need tolerance of 0.76% of output range after QNN SDK 2.19.2
+                                     QDQTolerance(0.0076f));
 }

 TEST_F(QnnHTPBackendTests, ConvU8U8S32_large_input2_bias_initializer) {
--- a/onnxruntime/test/providers/qnn/gemm_op_test.cc
+++ b/onnxruntime/test/providers/qnn/gemm_op_test.cc
@ -285,7 +285,8 @@ TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicInputs) {
                                        ExpectedEPNodeAssignment::All,
                                        13,
                                        false,
-                                        QDQTolerance(0.00410f));
+                                        // Require tolerance of 0.74% on Windows ARM64.
+                                        QDQTolerance(0.0074f));
 }

 TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_DynamicC) {
@ -304,7 +305,8 @@ TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_DynamicC) {
                                        ExpectedEPNodeAssignment::All,
                                        13,
                                        false,
-                                        QDQTolerance(0.00410f));
+                                        // Require tolerance of 0.74% on Windows ARM64.
+                                        QDQTolerance(0.0074f));
 }

 TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_StaticC) {
@ -323,7 +325,8 @@ TEST_F(QnnHTPBackendTests, Gemm_Broadcast_Bias_DynamicA_StaticB_StaticC) {
                                        ExpectedEPNodeAssignment::All,
                                        13,
                                        false,
-                                        QDQTolerance(0.00410f));
+                                        // Require tolerance of 0.74% on Windows ARM64.
+                                        QDQTolerance(0.0074f));
 }

 // Test 16-bit QDQ Gemm with dynamic inputs A and Bias. The B input is an initializer.
--- a/onnxruntime/test/providers/qnn/layer_norm_test.cc
+++ b/onnxruntime/test/providers/qnn/layer_norm_test.cc
@ -158,7 +158,20 @@ TEST_F(QnnHTPBackendTests, LayerNorm1D_LastAxis_StaticScale_AU16_WU8) {
 }

 // Test accuracy of 8-bit QDQ LayerNorm with a dynamic scale input.
-TEST_F(QnnHTPBackendTests, LayerNorm1D_LastAxis_DynamicScale) {
+//
+// TODO(adrianlizarraga): Fails to finalize with QNN SDK 2.22.
+// Verbose logs:
+// Starting stage: Graph Transformations and Optimizations
+// C:\...\QNN\HTP\HTP\src\hexagon\prepare\graph_prepare.cc:203:ERROR:could not create op: q::flat_to_vtcm
+// C:\...\QNN\HTP\HTP\src\hexagon\prepare\graph_prepare.cc:1187:ERROR:Op 0x102800000013 preparation failed with err:-1
+// Completed stage: Graph Transformations and Optimizations (6247 us)
+// QnnDsp <E> "node_token_15" generated: could not create op
+// QnnDsp <E> RouterWindows graph prepare failed 12
+// QnnDsp <E> Failed to finalize graph (id: 1) with err 1002
+// QnnDsp <V> Wake up free backend 1 thread(s)
+// QnnDsp <I> QnnGraph_finalize done. status 0x3ea
+// Failed to finalize QNN graph.
+TEST_F(QnnHTPBackendTests, DISABLED_LayerNorm1D_LastAxis_DynamicScale) {
  RunLayerNormQDQTest<uint8_t, uint8_t>(TestInputDef<float>({1, 2, 3}, false, GetFloatDataInRange(0.0f, 10.0f, 6)),
                                        TestInputDef<float>({3}, false, GetFloatDataInRange(0.0f, 1.0f, 3)),  // Dynamic
                                        {utils::MakeAttribute("axis", static_cast<int64_t>(-1))},             // Last axis
--- a/onnxruntime/test/providers/qnn/lrn_op_test.cc
+++ b/onnxruntime/test/providers/qnn/lrn_op_test.cc
@ -135,8 +135,8 @@ TEST_F(QnnHTPBackendTests, LRNSize3) {
                           0.75f,    // beta
                           1.0f,     // bias
                           13,       // opset
-                           // Need to use tolerance of 0.405% of output range after QNN SDK 2.17
-                           QDQTolerance(0.00405f));
+                           // Need to use tolerance of 0.8% of output range after QNN SDK 2.22
+                           QDQTolerance(0.008f));
 }

 TEST_F(QnnHTPBackendTests, LRNSize5) {
@ -147,8 +147,8 @@ TEST_F(QnnHTPBackendTests, LRNSize5) {
                           0.75f,    // beta
                           1.0f,     // bias
                           13,       // opset
-                           // Need to use tolerance of 0.407% of output range after QNN SDK 2.17
-                           QDQTolerance(0.00407f));
+                           // Need to use tolerance of 0.8% of output range after QNN SDK 2.22
+                           QDQTolerance(0.008f));
 }

 TEST_F(QnnHTPBackendTests, LRN_size_larger_than_channel) {
--- a/onnxruntime/test/providers/qnn/matmul_test.cpp
+++ b/onnxruntime/test/providers/qnn/matmul_test.cpp
@ -103,7 +103,8 @@ static void RunQDQMatMulOpOpTest(const TestInputDef<float>& input1_def,
 // CPU tests:
 //

-TEST_F(QnnCPUBackendTests, MatMulOp) {
+// TODO: Crashes during QNN CPU execution (QNN SDK 2.22)
+TEST_F(QnnCPUBackendTests, DISABLED_MatMulOp) {
  RunMatMulOpOpTest(TestInputDef<float>({2, 3}, false, {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f}),
                    TestInputDef<float>({3, 2}, false, {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f}),
                    ExpectedEPNodeAssignment::All, 18);
@ -126,13 +127,8 @@ TEST_F(QnnCPUBackendTests, DISABLED_MatMulOp_Broadcast) {
                    ExpectedEPNodeAssignment::All, 18, 0.0004f);
 }

-#if defined(__linux__)
+// TODO: Crashes during QNN CPU execution (QNN SDK 2.22)
 TEST_F(QnnCPUBackendTests, DISABLED_MatMulOp_PaddingAndBroadcast_BLargerThanA) {
-#else
-// TODO: When fixed, enable MathOpTest.MatMulFloatType from cpu/mat/matmul_test.cc
-// QNN SDK 2.17: Accuracy errors
-TEST_F(QnnCPUBackendTests, MatMulOp_PaddingAndBroadcast_BLargerThanA) {
-#endif
  std::vector<int64_t> input0_shape = {2, 3, 2};
  std::vector<int64_t> input1_shape = {3, 2, 2, 1};
  RunMatMulOpOpTest(TestInputDef<float>(input0_shape, false, GetSequentialFloatData(input0_shape)),
--- a/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/android-arm64-v8a-QNN-crosscompile-ci-pipeline.yml
@ -31,7 +31,7 @@ parameters:
 - name: QnnSdk
  displayName: QNN SDK version
  type: string
-  default: 2.21.0.240401
+  default: 2.22.0.240425

 jobs:
 - job: Build_QNN_EP
--- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
+++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
@ -71,7 +71,7 @@ parameters:
 - name: QnnSdk
  displayName: QNN SDK Version
  type: string
-  default: 2.21.0.240401
+  default: 2.22.0.240425

 resources:
  repositories:
@ -743,4 +743,4 @@ stages:
      displayName: 'Publish Pipeline NuGet Artifact'
      inputs:
        artifactName: 'drop-signed-nuget-qnn'
-        targetPath: '$(Build.ArtifactStagingDirectory)/nuget-artifact-merged'
+        targetPath: '$(Build.ArtifactStagingDirectory)/nuget-artifact-merged'
--- a/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-qnn-ci-pipeline.yml
@ -32,11 +32,11 @@ parameters:
 - name: QnnSdk
  displayName: QNN SDK version
  type: string
-  default: 2.21.0.240401
+  default: 2.22.0.240425

 jobs:
  - job: Build_QNN_EP
-    pool: onnxruntime-qnn-ubuntu-2004-cpu
+    pool: onnxruntime-qnn-ubuntu-2204-cpu
    timeoutInMinutes: 60
    workspace:
      clean: all
--- a/tools/ci_build/github/azure-pipelines/py-packaging-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/py-packaging-pipeline.yml
@ -59,7 +59,7 @@ parameters:
 - name: qnn_sdk_version
  type: string
  displayName: 'QNN SDK version. Only for QNN packages.'
-  default: 2.21.0.240401
+  default: 2.22.0.240425

 trigger: none

--- a/tools/ci_build/github/azure-pipelines/qnn-ep-nuget-packaging-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/qnn-ep-nuget-packaging-pipeline.yml
@ -2,7 +2,7 @@ parameters:
 - name: QnnSdk
  displayName: QNN SDK Version
  type: string
-  default: 2.21.0.240401
+  default: 2.22.0.240425

 - name: build_config
  displayName: Build Configuration
--- a/tools/ci_build/github/azure-pipelines/templates/jobs/download_linux_qnn_sdk.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/jobs/download_linux_qnn_sdk.yml
@ -1,7 +1,7 @@
 parameters:
  - name: QnnSDKVersion
    type: string
-    default: '2.21.0.240401'
+    default: '2.22.0.240425'

 steps:
  - script: |
--- a/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_qnn_sdk.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/jobs/download_win_qnn_sdk.yml
@ -1,7 +1,7 @@
 parameters:
  - name: QnnSDKVersion
    type: string
-    default: '2.21.0.240401'
+    default: '2.22.0.240425'

 steps:
  - powershell: |
--- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
@ -60,7 +60,7 @@ parameters:
 - name: qnn_sdk_version
  type: string
  displayName: 'QNN SDK version. Only for QNN packages.'
-  default: 2.21.0.240401
+  default: 2.22.0.240425

 stages:
 - ${{ if eq(parameters.enable_windows_cpu, true) }}:
--- a/tools/ci_build/github/azure-pipelines/templates/py-win-arm64-qnn.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-win-arm64-qnn.yml
@ -7,7 +7,7 @@ parameters:
 - name: QNN_SDK
  displayName: QNN SDK Version
  type: string
-  default: 2.21.0.240401
+  default: 2.22.0.240425

 - name: PYTHON_VERSION
  type: string
--- a/tools/ci_build/github/azure-pipelines/templates/py-win-x64-qnn.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-win-x64-qnn.yml
@ -7,7 +7,7 @@ parameters:
 - name: QNN_SDK
  displayName: QNN SDK Version
  type: string
-  default: 2.21.0.240401
+  default: 2.22.0.240425

 - name: ENV_SETUP_SCRIPT
  type: string
--- a/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/qnn-ep-win.yml
@ -1,5 +1,5 @@
 parameters:
-  QnnSdk: '2.21.0.240401'
+  QnnSdk: '2.22.0.240425'
  build_config: 'RelWithDebInfo'  
  IsReleaseBuild: false
  DoEsrp: false
--- a/tools/ci_build/github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-qnn-arm64-ci-pipeline.yml
@ -32,7 +32,7 @@ parameters:
 - name: QnnSdk
  displayName: QNN SDK version
  type: string
-  default: 2.21.0.240401
+  default: 2.22.0.240425

 jobs:
 - job: 'build'
--- a/tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-qnn-ci-pipeline.yml
@ -32,7 +32,7 @@ parameters:
 - name: QnnSdk
  displayName: QNN SDK version
  type: string
-  default: 2.21.0.240401
+  default: 2.22.0.240425

 jobs:
 - job: 'build'
@ -90,12 +90,14 @@ jobs:
    workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)\$(BuildConfig)'
    displayName: 'Run unit tests'

-  - script: |
-      .\$(BuildConfig)\onnx_test_runner -j 1 -v -e qnn -i "backend_path|$(QnnSDKRootDir)\lib\x86_64-windows-msvc\QnnCpu.dll" $(Build.SourcesDirectory)\cmake\external\onnx\onnx\backend\test\data\node
-    workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
-    displayName: 'Run ONNX Tests'
-
-  - script: |
-      .\$(BuildConfig)\onnx_test_runner -j 1 -v -e qnn -i "backend_path|$(QnnSDKRootDir)\lib\x86_64-windows-msvc\QnnCpu.dll" C:\data\float32_models
-    workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
-    displayName: 'Run float32 model tests'
+  # Comment out QnnCpu tests because QNN SDK 2.22 CPU backend crashes when executing MatMuls.
+  # Does not happen with HTP backend.
+  # - script: |
+  #    .\$(BuildConfig)\onnx_test_runner -j 1 -v -e qnn -i "backend_path|$(QnnSDKRootDir)\lib\x86_64-windows-msvc\QnnCpu.dll" $(Build.SourcesDirectory)\cmake\external\onnx\onnx\backend\test\data\node
+  #    workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
+  #    displayName: 'Run ONNX Tests'
+  #
+  # - script: |
+  #    .\$(BuildConfig)\onnx_test_runner -j 1 -v -e qnn -i "backend_path|$(QnnSDKRootDir)\lib\x86_64-windows-msvc\QnnCpu.dll" C:\data\float32_models
+  #    workingDirectory: '$(Build.BinariesDirectory)\$(BuildConfig)'
+  #    displayName: 'Run float32 model tests'