onnxruntime/tools/ci_build/github/azure-pipelines/linux-cpu-eager-pipeline.yml
Changming Sun eafd67b8fd
Update CUDA version to 11.6 and refactor python packaging pipeline (#13002)
1. Update CUDA version from 11.4 to 11.6.
2. Update Manylinux version
3. Upgrade GCC version from 10 to 11 for most x86_64 pipelines. CentOS 7 ARM64 doesn't have GCC 11 yet.
4. Refactor python packaging pipeline: 
    a. Split Linux GPU build job to two parts, build and test, so that the
build part doesn't need to use a GPU machine
    b. Make the Linux GPU build job and Linux CPU build job more similar: share the same bash script and yaml file.
5. Temporarily disable Attention_Mask1D_Fp16_B2_FusedNoPadding because it is causing one of our packaging pipeline to fail. I have created an ADO task for this.
2022-09-23 00:29:27 -07:00

144 lines
5.4 KiB
YAML

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 1a61614cabfd6b91c6afd6d9e172cc5b838c65fe
jobs:
- job: BuildAndTestEagerMode
timeoutInMinutes: 120
workspace:
clean: all
pool: Linux-CPU-2019
steps:
- checkout: self
clean: true
submodules: recursive
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_eager_cpu
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
Repository: onnxruntimecpubuildeagermode
- task: CmdLine@2
displayName: 'build'
inputs:
script: |
mkdir -p $HOME/.onnx
docker run --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
onnxruntimecpubuildeagermode \
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Debug Release \
--skip_submodule_sync \
--build_shared_lib \
--parallel \
--build_eager_mode --enable_training --build_wheel --skip_test
workingDirectory: $(Build.SourcesDirectory)
- task: CmdLine@2
displayName: 'install ortmodule extension and test'
inputs:
script: |
mkdir -p $HOME/.onnx
docker run --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
onnxruntimecpubuildeagermode \
bash -c "export PYTHONPATH=/build/Release && /opt/python/cp38-cp38/bin/python3 -m onnxruntime.training.ortmodule.torch_cpp_extensions.install &&\
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release \
--skip_submodule_sync \
--build_shared_lib \
--parallel \
--build_eager_mode --enable_training --build_wheel --test"
workingDirectory: $(Build.SourcesDirectory)
- template: templates/clean-agent-build-directory-step.yml
# This pipeline builds the latest PyTorch commit from source
# and use it in ORT tests. See Dockerfile.manylinux2014_lort_cpu
# for the installation steps. Idally, we should only use one pipeline
# for eager mode and LazyTensor, but we split them due to recent
# PyTorch's breaking changes.
#
# TODO: once ORT eager mode can run with latest PyTorch commit, we
# should
# 1. Set --build_eager_mode when running build.py in the
# first "task" below.
# 2. Copy the second "task" above as the third task below.
- job: BuildAndTestLazyTensor
timeoutInMinutes: 120
workspace:
clean: all
pool: Linux-CPU-2019
steps:
- checkout: self
clean: true
submodules: recursive
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_lort_cpu
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
Repository: onnxruntimecpubuildlort
- task: CmdLine@2
displayName: 'Build ORT for Python 3.9'
inputs:
script: |
docker run --rm \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
onnxruntimecpubuildlort \
/opt/python/cp39-cp39/bin/python3.9 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release \
--skip_submodule_sync \
--build_shared_lib \
--parallel \
--enable_lazy_tensor --enable_training --build_wheel --skip_test \
workingDirectory: $(Build.SourcesDirectory)
- task: CmdLine@2
displayName: 'Test LORT with Python 3.9'
inputs:
script: |
docker run --rm \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
onnxruntimecpubuildlort \
bash -c "
export LORT_CHECK_BASELINE=1 && \
export LORT_DUMP_GRAPH=1 && \
export LORT_DUMP_ATEN_OP_HISTORY=1 && \
export PYTHONPATH=/build/Release && \
/opt/python/cp39-cp39/bin/python3.9 -m pip install /build/Release/dist/*.whl && \
/opt/python/cp39-cp39/bin/python3.9 /onnxruntime_src/orttraining/orttraining/test/python/orttraining_test_lort.py"
workingDirectory: $(Build.SourcesDirectory)
condition: succeededOrFailed()
- template: templates/clean-agent-build-directory-step.yml