From f2e19a8ccf90e089b47b01ac59a83e6991d53776 Mon Sep 17 00:00:00 2001
From: Ashwini Khade <askhade@microsoft.com>
Date: Thu, 26 Oct 2023 14:58:57 -0700
Subject: [PATCH] Updates to training pipelines to reduce CI time (#18116)

### Description
Motivation for this PR is reducing CI test time by removing unnecessary
tests from the pipelines.

Following changes are for reducing test time in pipelines:

- Skip CPU model tests in GPU builds. Training CIs run these tests as a
sanity check. There is no direct training code being tested in these
pipelines, furthermore, CPU tests are being run in CPU pipelines so no
need to run them again in GPU builds and block the GPU VM. This change
reduces testing time by 20-25 mins in all training GPU pipelines.

- Delete debug package building pipeline for linux training packages.
This was required by compiler team at some point but there have been 0
downloads of these packages.


### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
---
 onnxruntime/test/providers/cpu/model_tests.cc       |  7 +++++++
 .../orttraining-py-packaging-pipeline-cpu.yml       |  2 +-
 .../orttraining-py-packaging-pipeline-cuda.yml      | 13 -------------
 3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 13dcded6f3..c2e7577a7c 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -443,6 +443,13 @@ static ORT_STRING_VIEW provider_name_dml = ORT_TSTR("dml");
 #ifdef USE_DML
   provider_names[provider_name_dml] = {opset7, opset8, opset9, opset10, opset11, opset12, opset13, opset14, opset15, opset16, opset17, opset18};
 #endif
+
+#if defined(ENABLE_TRAINING_CORE) && defined(USE_CUDA)
+  // Removing the CPU EP tests from CUDA build for training as these tests are already run in the CPU pipelines.
+  // Note: These are inference tests, we run these in training builds as an extra check. Therefore reducing
+  // the number of times these are run to reduce the CI time.
+  provider_names.erase(provider_name_cpu);
+#endif
   std::vector<std::basic_string<ORTCHAR_T>> v;
   // Permanently exclude following tests because ORT support only opset starting from 7,
   // Please make no more changes to the list
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml
index 983143df3f..9755e1f077 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml
@@ -9,7 +9,7 @@ resources:
     ref: 5eda9aded5462201e6310105728d33016e637ea7
 
 stages:
-- stage: Python_Packaging_Linux_Trainin_CPU
+- stage: Python_Packaging_Linux_Training_CPU
 
   jobs:
     - job: Linux_Training_CPU_Wheels
diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml
index b8dfb7f3c9..f244851f8c 100644
--- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cuda.yml
@@ -20,16 +20,3 @@ stages:
     agent_pool: Onnxruntime-Linux-GPU
     upload_wheel: 'yes'
     debug_build: false
-
-# Added for triton compiler team. Can be potentially removed.
-- template: templates/py-packaging-training-cuda-stage.yml
-  parameters:
-    build_py_parameters: --enable_training --update --build
-    torch_version: '2.0.0'
-    opset_version: '15'
-    cuda_version: '11.8'
-    cmake_cuda_architectures: 70;75;80;86
-    docker_file: Dockerfile.manylinux2_28_training_cuda11_8
-    agent_pool: Onnxruntime-Linux-GPU
-    upload_wheel: 'no'
-    debug_build: true