Move ORT Training pipeline to github actions (#22543)

Move ORT Training pipeline to github actions and enable CodeQL scan for the code(including inference code). We will move all pull request pipelines to Github Actions.
2026-05-14 20:48:00 +00:00 · 2024-10-23 11:57:15 -07:00 · 2024-10-23 11:57:15 -07:00 · a25c9315ea
commit a25c9315ea
parent fd8ee4894d
6 changed files with 62 additions and 214 deletions
--- a/.github/codeql/codeql-config.yml
+++ b/.github/codeql/codeql-config.yml
@ -0,0 +1,7 @@
+name: "CodeQL config"
+queries: 
+  - uses: security-extended
+  - uses: security-and-quality
+paths-ignore:
+  - tests
+  - build
--- a/.github/workflows/linux_training.yml
+++ b/.github/workflows/linux_training.yml
@ -0,0 +1,55 @@
+name: orttraining-linux-ci-pipeline
+on:
+  push:
+    branches:
+      - main
+      - rel-*
+  pull_request:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+  
+jobs:
+  orttraining-linux-ci-pipeline:
+    runs-on: ubuntu-24.04
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+    steps:
+      - uses: actions/checkout@v4
+      - run: |
+         python3 -m pip install -r tools/ci_build/github/linux/python/requirements.txt
+      - name: Initialize CodeQL
+        uses: github/codeql-action/init@v3
+        with:
+          config-file: ./.github/codeql/codeql-config.yml
+          languages: 'cpp'
+      - run: |
+         set -e -x
+         rm -rf build
+         python3 tools/ci_build/build.py --build_dir build --config Release --enable_training --skip_submodule_sync --parallel --update --build
+         
+      - name: Perform CodeQL Analysis
+        uses: github/codeql-action/analyze@v3
+        with:
+          category: "/language:cpp"
+          output: sarif-results
+          upload: failure-only
+        
+      - name: filter-sarif
+        uses: advanced-security/filter-sarif@v1
+        with:
+          patterns: |
+            +**/*.cc
+            +**/*.h
+            -tests/**/*.*
+            -build/**/*.*
+          input: sarif-results/cpp.sarif
+          output: sarif-results/cpp.sarif
+
+      - name: Upload SARIF
+        uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: sarif-results/cpp.sarif
--- a/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml
@ -1,95 +0,0 @@
-##### start trigger Don't edit it manually, Please do edit set-trigger-rules.py ####
-### please do rerun set-trigger-rules.py ###
-trigger:
-  branches:
-    include:
-    - main
-    - rel-*
-  paths:
-    exclude:
-    - docs/**
-    - README.md
-    - CONTRIBUTING.md
-    - BUILD.md
-    - 'js/web'
-    - 'onnxruntime/core/providers/js'
-pr:
-  branches:
-    include:
-    - main
-    - rel-*
-  paths:
-    exclude:
-    - docs/**
-    - README.md
-    - CONTRIBUTING.md
-    - BUILD.md
-    - 'js/web'
-    - 'onnxruntime/core/providers/js'
-#### end trigger ####
-
-jobs:
- job: Linux_Build
-  timeoutInMinutes: 180
-  workspace:
-    clean: all
-  variables:
-    skipComponentGovernanceDetection: true
-    CCACHE_DIR: $(Pipeline.Workspace)/ccache
-    TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)]
-  pool: onnxruntime-Ubuntu-2204-Training-CPU
-  steps:
-  - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
-    displayName: 'Clean Agent Directories'
-    condition: always()
-
-  - checkout: self
-    clean: true
-    submodules: none
-
-  - template: templates/get-docker-image-steps.yml
-    parameters:
-      Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile
-      Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu
-      DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg BASEIMAGE=registry.access.redhat.com/ubi8/ubi"
-      Repository: onnxruntimecpubuildcentos8x64_packaging
-
-  - task: Cache@2
-    inputs:
-      key: '"$(TODAY)" | "$(Build.SourceBranch)" | "$(Build.SourceVersion)"'
-      path: $(CCACHE_DIR)
-      cacheHitVar: CACHE_RESTORED
-      restoreKeys: |
-        "$(TODAY)" | "$(Build.SourceBranch)"
-        "$(TODAY)" |
-    displayName: Cach Task
-
-  - task: CmdLine@2
-    displayName: 'build'
-    inputs:
-      script: |
-        set -e -x
-        mkdir -p $HOME/.onnx
-        mkdir -p $(Pipeline.Workspace)/ccache
-        docker run --rm \
-          --volume /data/onnx:/data/onnx:ro \
-          --volume /data/models:/build/models:ro \
-          --volume $(Build.SourcesDirectory):/onnxruntime_src \
-          --volume $(Build.BinariesDirectory):/build \
-          --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
-          --volume $(Pipeline.Workspace)/ccache:/cache \
-          -e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-          -e NIGHTLY_BUILD \
-          -e BUILD_BUILDNUMBER \
-          -e CCACHE_DIR=/cache \
-          onnxruntimecpubuildcentos8x64_packaging \
-          /onnxruntime_src/tools/ci_build/github/linux/build_training_ci.sh
-      workingDirectory: $(Build.SourcesDirectory)
-
-  - task: PublishTestResults@2
-    displayName: 'Publish unit test results'
-    inputs:
-      testResultsFiles: '**/*.results.xml'
-      searchFolder: '$(Build.BinariesDirectory)'
-      testRunTitle: 'Unit Test Run'
-    condition: succeededOrFailed()
--- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml
@ -1,55 +0,0 @@
-##### start trigger Don't edit it manually, Please do edit set-trigger-rules.py ####
-### please do rerun set-trigger-rules.py ###
-trigger:
-  branches:
-    include:
-    - main
-    - rel-*
-  paths:
-    exclude:
-    - docs/**
-    - README.md
-    - CONTRIBUTING.md
-    - BUILD.md
-    - 'js/web'
-    - 'onnxruntime/core/providers/js'
-pr:
-  branches:
-    include:
-    - main
-    - rel-*
-  paths:
-    exclude:
-    - docs/**
-    - README.md
-    - CONTRIBUTING.md
-    - BUILD.md
-    - 'js/web'
-    - 'onnxruntime/core/providers/js'
-#### end trigger ####
-
-jobs:
- template: templates/linux-ci.yml
-  parameters:
-    AgentPool : 'Onnxruntime-Linux-GPU-NC6sv3'
-    JobName: 'Onnxruntime_Linux_GPU_Training'
-    RunDockerBuildArgs: >
-      -o ubuntu20.04 -d gpu
-      -t onnxruntime_orttraining_ortmodule_tests_image
-      -u
-      -e
-      -x "
-      --enable_training
-      --config Release
-      --use_cuda --cuda_version=11.8 --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8
-      --build_wheel
-      --enable_nvtx_profile
-      --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=70
-      "
-    RunInjectedPipeline: 'true'
-    InjectedPipeline: 'orttraining-linux-gpu-test-ci-pipeline.yml'
-    DockerImageTag: 'onnxruntime_orttraining_ortmodule_tests_image'
-    TimeoutInMinutes: 190
-    # Enable unreleased onnx opsets in CI builds
-    # This facilitates testing the implementation for the new opsets
-    AllowReleasedOpsetOnly: '0'
--- a/tools/ci_build/github/linux/build_training_ci.sh
+++ b/tools/ci_build/github/linux/build_training_ci.sh
@ -1,4 +0,0 @@
-#!/bin/bash
-set -e -x
-python3.12 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/python/requirements.txt
-python3.12 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --enable_training --skip_submodule_sync --parallel
--- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu_training
+++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu_training
@ -1,60 +0,0 @@
-ARG BASEIMAGE=nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu18.04
-
-FROM $BASEIMAGE
-
-ARG PYTHON_VERSION=3.9
-ARG INSTALL_DEPS_EXTRA_ARGS
-ARG USE_CONDA=false
-
-ADD scripts /tmp/scripts
-RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && \
-    /tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS
-
-# If USE_CONDA is false, use root to install python dependencies.
-RUN if [ "$USE_CONDA" = false ] ; \
-    then /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS ; \
-    fi
-
-WORKDIR /root
-
-# Allow configure to pick up GDK and CuDNN where it expects it.
-# (Note: $CUDNN_VERSION is defined by NVidia's base image)
-RUN _CUDNN_VERSION=$(echo $CUDNN_VERSION | cut -d. -f1-2) && \
-    mkdir -p /usr/local/cudnn-$_CUDNN_VERSION/cuda/include && \
-    ln -s /usr/include/cudnn.h /usr/local/cudnn-$_CUDNN_VERSION/cuda/include/cudnn.h && \
-    mkdir -p /usr/local/cudnn-$_CUDNN_VERSION/cuda/lib64 && \
-    ln -s /etc/alternatives/libcudnn_so /usr/local/cudnn-$_CUDNN_VERSION/cuda/lib64/libcudnn.so && \
-    ln -s /usr/local/cudnn{-$_CUDNN_VERSION,}
-
-ENV LD_LIBRARY_PATH /usr/local/openblas/lib:$LD_LIBRARY_PATH
-
-ARG BUILD_USER=onnxruntimedev
-ARG BUILD_UID=1000
-RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID
-WORKDIR /home/$BUILD_USER
-USER $BUILD_USER
-
-ARG MINICONDA_PREFIX=/home/$BUILD_USER/miniconda3
-RUN if [ "$USE_CONDA" = true ] ; \
-    then MINICONDA=miniconda.sh && \
-    wget --no-verbose https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -O $MINICONDA && \
-    chmod a+x $MINICONDA && \
-    ./$MINICONDA -b -p $MINICONDA_PREFIX && \
-    rm ./$MINICONDA && \
-    $MINICONDA_PREFIX/bin/conda clean --yes --all && \
-    $MINICONDA_PREFIX/bin/conda install -y python=$PYTHON_VERSION ; \
-    fi
-
-ENV PATH /home/$BUILD_USER/miniconda3/bin:$PATH
-
-# If USE_CONDA is true, use onnxruntimedev user to install python dependencies
-RUN if [ "$USE_CONDA" = true ] ; \
-    then /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS -c ; \
-    fi
-
-WORKDIR /root
-USER root
-RUN rm -rf /tmp/scripts
-
-WORKDIR /home/$BUILD_USER
-USER $BUILD_USER