onnxruntime/tools/ci_build/github/azure-pipelines/linux-cpu-eager-pipeline.yml
Wei-Sheng Chin dc486d146b
Make ORT callable from various Pytorch compilers (LazyTensor, TorchDynamo, etc) (#10460)
* Make ORT as Pytorch JIT backend

LORT likely doesn't work with aten fallback so we only test LORT in its own CI.

* Revert changes to enable external CUDA allocator. Will add it later.

Revert "Revert changes to enable external CUDA allocator. Will add it later."

This reverts commit d5487f2e193014c805505afae8fb577c53667658.

Fix external allocator

* Relax tolerance and remove commented code

* Print more information in CI

* Fix pointer

* Address comments.
1. Reuse ORT-eager mode's environment.
2. Remove unused ctor.

* Use Pytorch master branch as all PRs are merged

Fix

* Refine based on cpplint feedbacks

* Revert changes to allow custom CUDA allocator in public APIs

* Use torch.testing.assert_close

* Use unittest framework

* Switch docker repo

* Rename *.cpp to *.cc

* Address comments

* Add comment

* Use same pipeline file for eager and lort pipelines

* Address comments

* Add yaml comment

* Fix cmake files

* Address comments

* Rename flags, remove printing code, remove dead comment
2022-08-22 09:40:40 -07:00

144 lines
5.4 KiB
YAML

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: a8099af1b3e25f0489717ad9c4f9a2e25a8c5b36
jobs:
- job: BuildAndTestEagerMode
timeoutInMinutes: 120
workspace:
clean: all
pool: Linux-CPU-2019
steps:
- checkout: self
clean: true
submodules: recursive
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_eager_cpu
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
Repository: onnxruntimecpubuildeagermode
- task: CmdLine@2
displayName: 'build'
inputs:
script: |
mkdir -p $HOME/.onnx
docker run --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
onnxruntimecpubuildeagermode \
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Debug Release \
--skip_submodule_sync \
--build_shared_lib \
--parallel \
--build_eager_mode --enable_training --build_wheel --skip_test
workingDirectory: $(Build.SourcesDirectory)
- task: CmdLine@2
displayName: 'install ortmodule extension and test'
inputs:
script: |
mkdir -p $HOME/.onnx
docker run --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
onnxruntimecpubuildeagermode \
bash -c "export PYTHONPATH=/build/Release && /opt/python/cp38-cp38/bin/python3 -m onnxruntime.training.ortmodule.torch_cpp_extensions.install &&\
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release \
--skip_submodule_sync \
--build_shared_lib \
--parallel \
--build_eager_mode --enable_training --build_wheel --test"
workingDirectory: $(Build.SourcesDirectory)
- template: templates/clean-agent-build-directory-step.yml
# This pipeline builds the latest PyTorch commit from source
# and use it in ORT tests. See Dockerfile.manylinux2014_lort_cpu
# for the installation steps. Idally, we should only use one pipeline
# for eager mode and LazyTensor, but we split them due to recent
# PyTorch's breaking changes.
#
# TODO: once ORT eager mode can run with latest PyTorch commit, we
# should
# 1. Set --build_eager_mode when running build.py in the
# first "task" below.
# 2. Copy the second "task" above as the third task below.
- job: BuildAndTestLazyTensor
timeoutInMinutes: 120
workspace:
clean: all
pool: Linux-CPU-2019
steps:
- checkout: self
clean: true
submodules: recursive
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_lort_cpu
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
Repository: onnxruntimecpubuildlort
- task: CmdLine@2
displayName: 'Build ORT for Python 3.9'
inputs:
script: |
docker run --rm \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
onnxruntimecpubuildlort \
/opt/python/cp39-cp39/bin/python3.9 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release \
--skip_submodule_sync \
--build_shared_lib \
--parallel \
--enable_lazy_tensor --enable_training --build_wheel --skip_test \
workingDirectory: $(Build.SourcesDirectory)
- task: CmdLine@2
displayName: 'Test LORT with Python 3.9'
inputs:
script: |
docker run --rm \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
onnxruntimecpubuildlort \
bash -c "
export LORT_CHECK_BASELINE=1 && \
export LORT_DUMP_GRAPH=1 && \
export LORT_DUMP_ATEN_OP_HISTORY=1 && \
export PYTHONPATH=/build/Release && \
/opt/python/cp39-cp39/bin/python3.9 -m pip install /build/Release/dist/*.whl && \
/opt/python/cp39-cp39/bin/python3.9 /onnxruntime_src/orttraining/orttraining/test/python/orttraining_test_lort.py"
workingDirectory: $(Build.SourcesDirectory)
condition: succeededOrFailed()
- template: templates/clean-agent-build-directory-step.yml