From 60bb07307bccaa9145d1a9ba97575a85c5c38b9d Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 25 May 2023 00:09:08 -0700
Subject: [PATCH] Fix the TRT GPU build job in python packaging pipeline
 (#16073)

1. Cherry-pick #16054 back to the main branch
2. Replace onnxruntime-gpu-winbuild-t4 with onnxruntime-Win2022-GPU-T4.
The later one has VS2022.

---------

Co-authored-by: Patrice Vignola <vignola.patrice@gmail.com>
---
 .../contrib_ops/attention_op_test_helper.cc   |  3 ++-
 .../contrib_ops/attention_op_test_helper.h    |  2 +-
 .../multihead_attention_op_test.cc            |  3 ++-
 .../templates/py-packaging-stage.yml          | 23 ++++++++++---------
 .../azure-pipelines/templates/py-win-gpu.yml  | 12 +++++++---
 5 files changed, 26 insertions(+), 17 deletions(-)
diff --git a/onnxruntime/test/contrib_ops/attention_op_test_helper.cc b/onnxruntime/test/contrib_ops/attention_op_test_helper.cc
index bac91aa544..44a9808119 100644
--- a/onnxruntime/test/contrib_ops/attention_op_test_helper.cc
+++ b/onnxruntime/test/contrib_ops/attention_op_test_helper.cc
@@ -7,7 +7,8 @@
 namespace onnxruntime {
 namespace test {
 
-#if !defined(_MSC_VER) || defined(USE_DML)
+// Disable some tests in Windows since prefast build might crash with large test data.
+#if !defined(_MSC_VER)
 void GetWeight_64_3_64(std::vector<float>& weight_data) {
   weight_data = {
       -0.004707f, -0.006775f, 0.0009236f, 0.003067f, -0.00806f, 0.00779f, 0.0004425f, 0.00846f, 0.00048f,
diff --git a/onnxruntime/test/contrib_ops/attention_op_test_helper.h b/onnxruntime/test/contrib_ops/attention_op_test_helper.h
index 0e2241e23e..aa2a371cc9 100644
--- a/onnxruntime/test/contrib_ops/attention_op_test_helper.h
+++ b/onnxruntime/test/contrib_ops/attention_op_test_helper.h
@@ -43,7 +43,7 @@ struct AttentionTestData {
 };
 
 // Disable some tests in Windows since prefast build might crash with large test data.
-#if !defined(_MSC_VER) || defined(USE_DML)
+#if !defined(_MSC_VER)
 // Return packed weights and bias for input projection.
 void GetAttentionWeight(std::vector<float>& weight_data, int elements = 64 * 3 * 64, int offset = 0, int step = 1);
 void GetAttentionBias(std::vector<float>& bias_data, int elements = 3 * 64, int offset = 0, int step = 1);
diff --git a/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc b/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc
index 5cd42e815f..fc7eafd714 100644
--- a/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc
+++ b/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc
@@ -452,7 +452,8 @@ static void RunMultiHeadAttentionTests(AttentionTestData& data, bool disable_cpu
   }
 }
 
-#if !defined(_MSC_VER) || defined(USE_DML)
+// Disable some tests in Windows since prefast build might crash with large test data.
+#if !defined(_MSC_VER)
 // Test fused cross attention kernel
 // It requires head_size > 32 and head_size <= 64 for T4 GPU; hidden_size == v_hidden_size.
 TEST(MultiHeadAttentionTest, CrossAttention_Batch2_HeadSize40) {
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
index 54c820bea2..63e05ef8d9 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml
@@ -113,10 +113,11 @@ stages:
           addToPath: true
           architecture: $(buildArch)
 
-      - task: PythonScript@0
-        displayName: 'Run a Python script'
+      - task: onebranch.pipeline.tsaoptions@1
+        displayName: 'OneBranch TSAOptions'
         inputs:
-          scriptPath: 'tools\ci_build\update_tsaoptions.py'
+          tsaConfigFilePath: '$(Build.SourcesDirectory)\.config\tsaoptions.json'
+          appendSourceBranchName: false
 
       - template: set-nightly-build-option-variable-step.yml
 
@@ -270,33 +271,33 @@ stages:
   - ${{ if eq(parameters.enable_windows_gpu, true) }}:
       - template: py-win-gpu.yml
         parameters:
-          MACHINE_POOL: 'onnxruntime-gpu-winbuild-t4'
+          MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4'
           PYTHON_VERSION: '3.8'
-          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
+          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8"  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
           ENV_SETUP_SCRIPT: setup_env_gpu.bat
           EP_NAME: gpu
 
       - template: py-win-gpu.yml
         parameters:
-          MACHINE_POOL: 'onnxruntime-gpu-winbuild-t4'
+          MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4'
           PYTHON_VERSION: '3.9'
-          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
+          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8"  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
           ENV_SETUP_SCRIPT: setup_env_gpu.bat
           EP_NAME: gpu
 
       - template: py-win-gpu.yml
         parameters:
-          MACHINE_POOL: 'onnxruntime-gpu-winbuild-t4'
+          MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4'
           PYTHON_VERSION: '3.10'
-          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
+          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8"  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
           ENV_SETUP_SCRIPT: setup_env_gpu.bat
           EP_NAME: gpu
 
       - template: py-win-gpu.yml
         parameters:
-          MACHINE_POOL: 'onnxruntime-gpu-winbuild-t4'
+          MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4'
           PYTHON_VERSION: '3.11'
-          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
+          EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8"  --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
           ENV_SETUP_SCRIPT: setup_env_gpu.bat
           EP_NAME: gpu
 
diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml
index e8c57bf6ed..fa0b159f54 100644
--- a/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml
@@ -45,6 +45,12 @@ jobs:
           addToPath: true
           architecture: 'x64'
 
+      - task: onebranch.pipeline.tsaoptions@1
+        displayName: 'OneBranch TSAOptions'
+        inputs:
+          tsaConfigFilePath: '$(Build.SourcesDirectory)\.config\tsaoptions.json'
+          appendSourceBranchName: false
+
       - task: BatchScript@1
         displayName: 'setup env'
         inputs:
@@ -65,9 +71,9 @@ jobs:
 
       - template: download-deps.yml
 
-      - ${{ if contains(parameters.EP_BUILD_FLAGS, 'use_cuda') }}:
-        - powershell: |
-           azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v11.8" $(Agent.TempDirectory)
+      - powershell: |
+         azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v11.8" $(Agent.TempDirectory)
+
       - task: PythonScript@0
         displayName: 'Update deps.txt'
         inputs: