Move ORT Training pipeline to github actions (#22543)

Move ORT Training pipeline to github actions and enable CodeQL scan for the code(including inference code).
We will move all pull request pipelines to Github Actions.
This commit is contained in:
Changming Sun 2024-10-23 11:57:15 -07:00 committed by GitHub
parent fd8ee4894d
commit a25c9315ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 62 additions and 214 deletions

7
.github/codeql/codeql-config.yml vendored Normal file
View file

@ -0,0 +1,7 @@
name: "CodeQL config"
queries:
- uses: security-extended
- uses: security-and-quality
paths-ignore:
- tests
- build

55
.github/workflows/linux_training.yml vendored Normal file
View file

@ -0,0 +1,55 @@
name: orttraining-linux-ci-pipeline
on:
push:
branches:
- main
- rel-*
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
orttraining-linux-ci-pipeline:
runs-on: ubuntu-24.04
permissions:
actions: read
contents: read
security-events: write
steps:
- uses: actions/checkout@v4
- run: |
python3 -m pip install -r tools/ci_build/github/linux/python/requirements.txt
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
config-file: ./.github/codeql/codeql-config.yml
languages: 'cpp'
- run: |
set -e -x
rm -rf build
python3 tools/ci_build/build.py --build_dir build --config Release --enable_training --skip_submodule_sync --parallel --update --build
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:cpp"
output: sarif-results
upload: failure-only
- name: filter-sarif
uses: advanced-security/filter-sarif@v1
with:
patterns: |
+**/*.cc
+**/*.h
-tests/**/*.*
-build/**/*.*
input: sarif-results/cpp.sarif
output: sarif-results/cpp.sarif
- name: Upload SARIF
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: sarif-results/cpp.sarif

View file

@ -1,95 +0,0 @@
##### start trigger Don't edit it manually, Please do edit set-trigger-rules.py ####
### please do rerun set-trigger-rules.py ###
trigger:
branches:
include:
- main
- rel-*
paths:
exclude:
- docs/**
- README.md
- CONTRIBUTING.md
- BUILD.md
- 'js/web'
- 'onnxruntime/core/providers/js'
pr:
branches:
include:
- main
- rel-*
paths:
exclude:
- docs/**
- README.md
- CONTRIBUTING.md
- BUILD.md
- 'js/web'
- 'onnxruntime/core/providers/js'
#### end trigger ####
jobs:
- job: Linux_Build
timeoutInMinutes: 180
workspace:
clean: all
variables:
skipComponentGovernanceDetection: true
CCACHE_DIR: $(Pipeline.Workspace)/ccache
TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)]
pool: onnxruntime-Ubuntu-2204-Training-CPU
steps:
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
displayName: 'Clean Agent Directories'
condition: always()
- checkout: self
clean: true
submodules: none
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile
Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg BASEIMAGE=registry.access.redhat.com/ubi8/ubi"
Repository: onnxruntimecpubuildcentos8x64_packaging
- task: Cache@2
inputs:
key: '"$(TODAY)" | "$(Build.SourceBranch)" | "$(Build.SourceVersion)"'
path: $(CCACHE_DIR)
cacheHitVar: CACHE_RESTORED
restoreKeys: |
"$(TODAY)" | "$(Build.SourceBranch)"
"$(TODAY)" |
displayName: Cach Task
- task: CmdLine@2
displayName: 'build'
inputs:
script: |
set -e -x
mkdir -p $HOME/.onnx
mkdir -p $(Pipeline.Workspace)/ccache
docker run --rm \
--volume /data/onnx:/data/onnx:ro \
--volume /data/models:/build/models:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
--volume $(Pipeline.Workspace)/ccache:/cache \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
-e CCACHE_DIR=/cache \
onnxruntimecpubuildcentos8x64_packaging \
/onnxruntime_src/tools/ci_build/github/linux/build_training_ci.sh
workingDirectory: $(Build.SourcesDirectory)
- task: PublishTestResults@2
displayName: 'Publish unit test results'
inputs:
testResultsFiles: '**/*.results.xml'
searchFolder: '$(Build.BinariesDirectory)'
testRunTitle: 'Unit Test Run'
condition: succeededOrFailed()

View file

@ -1,55 +0,0 @@
##### start trigger Don't edit it manually, Please do edit set-trigger-rules.py ####
### please do rerun set-trigger-rules.py ###
trigger:
branches:
include:
- main
- rel-*
paths:
exclude:
- docs/**
- README.md
- CONTRIBUTING.md
- BUILD.md
- 'js/web'
- 'onnxruntime/core/providers/js'
pr:
branches:
include:
- main
- rel-*
paths:
exclude:
- docs/**
- README.md
- CONTRIBUTING.md
- BUILD.md
- 'js/web'
- 'onnxruntime/core/providers/js'
#### end trigger ####
jobs:
- template: templates/linux-ci.yml
parameters:
AgentPool : 'Onnxruntime-Linux-GPU-NC6sv3'
JobName: 'Onnxruntime_Linux_GPU_Training'
RunDockerBuildArgs: >
-o ubuntu20.04 -d gpu
-t onnxruntime_orttraining_ortmodule_tests_image
-u
-e
-x "
--enable_training
--config Release
--use_cuda --cuda_version=11.8 --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8
--build_wheel
--enable_nvtx_profile
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=70
"
RunInjectedPipeline: 'true'
InjectedPipeline: 'orttraining-linux-gpu-test-ci-pipeline.yml'
DockerImageTag: 'onnxruntime_orttraining_ortmodule_tests_image'
TimeoutInMinutes: 190
# Enable unreleased onnx opsets in CI builds
# This facilitates testing the implementation for the new opsets
AllowReleasedOpsetOnly: '0'

View file

@ -1,4 +0,0 @@
#!/bin/bash
set -e -x
python3.12 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/python/requirements.txt
python3.12 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --enable_training --skip_submodule_sync --parallel

View file

@ -1,60 +0,0 @@
ARG BASEIMAGE=nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu18.04
FROM $BASEIMAGE
ARG PYTHON_VERSION=3.9
ARG INSTALL_DEPS_EXTRA_ARGS
ARG USE_CONDA=false
ADD scripts /tmp/scripts
RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && \
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS
# If USE_CONDA is false, use root to install python dependencies.
RUN if [ "$USE_CONDA" = false ] ; \
then /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS ; \
fi
WORKDIR /root
# Allow configure to pick up GDK and CuDNN where it expects it.
# (Note: $CUDNN_VERSION is defined by NVidia's base image)
RUN _CUDNN_VERSION=$(echo $CUDNN_VERSION | cut -d. -f1-2) && \
mkdir -p /usr/local/cudnn-$_CUDNN_VERSION/cuda/include && \
ln -s /usr/include/cudnn.h /usr/local/cudnn-$_CUDNN_VERSION/cuda/include/cudnn.h && \
mkdir -p /usr/local/cudnn-$_CUDNN_VERSION/cuda/lib64 && \
ln -s /etc/alternatives/libcudnn_so /usr/local/cudnn-$_CUDNN_VERSION/cuda/lib64/libcudnn.so && \
ln -s /usr/local/cudnn{-$_CUDNN_VERSION,}
ENV LD_LIBRARY_PATH /usr/local/openblas/lib:$LD_LIBRARY_PATH
ARG BUILD_USER=onnxruntimedev
ARG BUILD_UID=1000
RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID
WORKDIR /home/$BUILD_USER
USER $BUILD_USER
ARG MINICONDA_PREFIX=/home/$BUILD_USER/miniconda3
RUN if [ "$USE_CONDA" = true ] ; \
then MINICONDA=miniconda.sh && \
wget --no-verbose https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -O $MINICONDA && \
chmod a+x $MINICONDA && \
./$MINICONDA -b -p $MINICONDA_PREFIX && \
rm ./$MINICONDA && \
$MINICONDA_PREFIX/bin/conda clean --yes --all && \
$MINICONDA_PREFIX/bin/conda install -y python=$PYTHON_VERSION ; \
fi
ENV PATH /home/$BUILD_USER/miniconda3/bin:$PATH
# If USE_CONDA is true, use onnxruntimedev user to install python dependencies
RUN if [ "$USE_CONDA" = true ] ; \
then /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS -c ; \
fi
WORKDIR /root
USER root
RUN rm -rf /tmp/scripts
WORKDIR /home/$BUILD_USER
USER $BUILD_USER