From f2e19a8ccf90e089b47b01ac59a83e6991d53776 Mon Sep 17 00:00:00 2001 From: Ashwini Khade Date: Thu, 26 Oct 2023 14:58:57 -0700 Subject: [PATCH] Updates to training pipelines to reduce CI time (#18116) ### Description Motivation for this PR is reducing CI test time by removing unnecessary tests from the pipelines. Following changes are for reducing test time in pipelines: - Skip CPU model tests in GPU builds. Training CIs run these tests as a sanity check. There is no direct training code being tested in these pipelines, furthermore, CPU tests are being run in CPU pipelines so no need to run them again in GPU builds and block the GPU VM. This change reduces testing time by 20-25 mins in all training GPU pipelines. - Delete debug package building pipeline for linux training packages. This was required by compiler team at some point but there have been 0 downloads of these packages. ### Motivation and Context --- onnxruntime/test/providers/cpu/model_tests.cc | 7 +++++++ .../orttraining-py-packaging-pipeline-cpu.yml | 2 +- .../orttraining-py-packaging-pipeline-cuda.yml | 13 ------------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index 13dcded6f3..c2e7577a7c 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -443,6 +443,13 @@ static ORT_STRING_VIEW provider_name_dml = ORT_TSTR("dml"); #ifdef USE_DML provider_names[provider_name_dml] = {opset7, opset8, opset9, opset10, opset11, opset12, opset13, opset14, opset15, opset16, opset17, opset18}; #endif + +#if defined(ENABLE_TRAINING_CORE) && defined(USE_CUDA) + // Removing the CPU EP tests from CUDA build for training as these tests are already run in the CPU pipelines. + // Note: These are inference tests, we run these in training builds as an extra check. Therefore reducing + // the number of times these are run to reduce the CI time. + provider_names.erase(provider_name_cpu); +#endif std::vector> v; // Permanently exclude following tests because ORT support only opset starting from 7, // Please make no more changes to the list diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml index 983143df3f..9755e1f077 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml @@ -9,7 +9,7 @@ resources: ref: 5eda9aded5462201e6310105728d33016e637ea7 stages: -- stage: Python_Packaging_Linux_Trainin_CPU +- stage: Python_Packaging_Linux_Training_CPU jobs: - job: Linux_Training_CPU_Wheels diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml index b8dfb7f3c9..f244851f8c 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml @@ -20,16 +20,3 @@ stages: agent_pool: Onnxruntime-Linux-GPU upload_wheel: 'yes' debug_build: false - -# Added for triton compiler team. Can be potentially removed. -- template: templates/py-packaging-training-cuda-stage.yml - parameters: - build_py_parameters: --enable_training --update --build - torch_version: '2.0.0' - opset_version: '15' - cuda_version: '11.8' - cmake_cuda_architectures: 70;75;80;86 - docker_file: Dockerfile.manylinux2_28_training_cuda11_8 - agent_pool: Onnxruntime-Linux-GPU - upload_wheel: 'no' - debug_build: true