From 60bb07307bccaa9145d1a9ba97575a85c5c38b9d Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Thu, 25 May 2023 00:09:08 -0700 Subject: [PATCH] Fix the TRT GPU build job in python packaging pipeline (#16073) 1. Cherry-pick #16054 back to the main branch 2. Replace onnxruntime-gpu-winbuild-t4 with onnxruntime-Win2022-GPU-T4. The later one has VS2022. --------- Co-authored-by: Patrice Vignola --- .../contrib_ops/attention_op_test_helper.cc | 3 ++- .../contrib_ops/attention_op_test_helper.h | 2 +- .../multihead_attention_op_test.cc | 3 ++- .../templates/py-packaging-stage.yml | 23 ++++++++++--------- .../azure-pipelines/templates/py-win-gpu.yml | 12 +++++++--- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/onnxruntime/test/contrib_ops/attention_op_test_helper.cc b/onnxruntime/test/contrib_ops/attention_op_test_helper.cc index bac91aa544..44a9808119 100644 --- a/onnxruntime/test/contrib_ops/attention_op_test_helper.cc +++ b/onnxruntime/test/contrib_ops/attention_op_test_helper.cc @@ -7,7 +7,8 @@ namespace onnxruntime { namespace test { -#if !defined(_MSC_VER) || defined(USE_DML) +// Disable some tests in Windows since prefast build might crash with large test data. +#if !defined(_MSC_VER) void GetWeight_64_3_64(std::vector& weight_data) { weight_data = { -0.004707f, -0.006775f, 0.0009236f, 0.003067f, -0.00806f, 0.00779f, 0.0004425f, 0.00846f, 0.00048f, diff --git a/onnxruntime/test/contrib_ops/attention_op_test_helper.h b/onnxruntime/test/contrib_ops/attention_op_test_helper.h index 0e2241e23e..aa2a371cc9 100644 --- a/onnxruntime/test/contrib_ops/attention_op_test_helper.h +++ b/onnxruntime/test/contrib_ops/attention_op_test_helper.h @@ -43,7 +43,7 @@ struct AttentionTestData { }; // Disable some tests in Windows since prefast build might crash with large test data. -#if !defined(_MSC_VER) || defined(USE_DML) +#if !defined(_MSC_VER) // Return packed weights and bias for input projection. void GetAttentionWeight(std::vector& weight_data, int elements = 64 * 3 * 64, int offset = 0, int step = 1); void GetAttentionBias(std::vector& bias_data, int elements = 3 * 64, int offset = 0, int step = 1); diff --git a/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc b/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc index 5cd42e815f..fc7eafd714 100644 --- a/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc +++ b/onnxruntime/test/contrib_ops/multihead_attention_op_test.cc @@ -452,7 +452,8 @@ static void RunMultiHeadAttentionTests(AttentionTestData& data, bool disable_cpu } } -#if !defined(_MSC_VER) || defined(USE_DML) +// Disable some tests in Windows since prefast build might crash with large test data. +#if !defined(_MSC_VER) // Test fused cross attention kernel // It requires head_size > 32 and head_size <= 64 for T4 GPU; hidden_size == v_hidden_size. TEST(MultiHeadAttentionTest, CrossAttention_Batch2_HeadSize40) { diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml index 54c820bea2..63e05ef8d9 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml @@ -113,10 +113,11 @@ stages: addToPath: true architecture: $(buildArch) - - task: PythonScript@0 - displayName: 'Run a Python script' + - task: onebranch.pipeline.tsaoptions@1 + displayName: 'OneBranch TSAOptions' inputs: - scriptPath: 'tools\ci_build\update_tsaoptions.py' + tsaConfigFilePath: '$(Build.SourcesDirectory)\.config\tsaoptions.json' + appendSourceBranchName: false - template: set-nightly-build-option-variable-step.yml @@ -270,33 +271,33 @@ stages: - ${{ if eq(parameters.enable_windows_gpu, true) }}: - template: py-win-gpu.yml parameters: - MACHINE_POOL: 'onnxruntime-gpu-winbuild-t4' + MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4' PYTHON_VERSION: '3.8' - EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" + EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" ENV_SETUP_SCRIPT: setup_env_gpu.bat EP_NAME: gpu - template: py-win-gpu.yml parameters: - MACHINE_POOL: 'onnxruntime-gpu-winbuild-t4' + MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4' PYTHON_VERSION: '3.9' - EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" + EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" ENV_SETUP_SCRIPT: setup_env_gpu.bat EP_NAME: gpu - template: py-win-gpu.yml parameters: - MACHINE_POOL: 'onnxruntime-gpu-winbuild-t4' + MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4' PYTHON_VERSION: '3.10' - EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" + EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" ENV_SETUP_SCRIPT: setup_env_gpu.bat EP_NAME: gpu - template: py-win-gpu.yml parameters: - MACHINE_POOL: 'onnxruntime-gpu-winbuild-t4' + MACHINE_POOL: 'onnxruntime-Win2022-GPU-T4' PYTHON_VERSION: '3.11' - EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_version=11.8 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" + EP_BUILD_FLAGS: --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.6.0.12.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" ENV_SETUP_SCRIPT: setup_env_gpu.bat EP_NAME: gpu diff --git a/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml b/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml index e8c57bf6ed..fa0b159f54 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-win-gpu.yml @@ -45,6 +45,12 @@ jobs: addToPath: true architecture: 'x64' + - task: onebranch.pipeline.tsaoptions@1 + displayName: 'OneBranch TSAOptions' + inputs: + tsaConfigFilePath: '$(Build.SourcesDirectory)\.config\tsaoptions.json' + appendSourceBranchName: false + - task: BatchScript@1 displayName: 'setup env' inputs: @@ -65,9 +71,9 @@ jobs: - template: download-deps.yml - - ${{ if contains(parameters.EP_BUILD_FLAGS, 'use_cuda') }}: - - powershell: | - azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v11.8" $(Agent.TempDirectory) + - powershell: | + azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v11.8" $(Agent.TempDirectory) + - task: PythonScript@0 displayName: 'Update deps.txt' inputs: