onnxruntime/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-e2e-test-ci-pipeline.yml
Edward Chen 71e7c2b423
Cache build docker images in container registry. (#5811)
This PR adds infrastructure to automatically cache docker images used in CI builds in a container registry.

Currently, build images are pulled from a container registry for some builds and built every time for others. The container registry requires maintenance to keep the images up to date and building images every time wastes build agent resources.

With this change, a given build image can be looked up in a cache container registry and if present, pulled, and otherwise, built and pushed. The uniqueness of a build image is determined by a hash digest of the dockerfile, docker build context directory, and certain "docker build" options. This digest is part of the image tag in the cache container repository.

The cache container registry will need to be cleaned up periodically. This is not automated yet.
2020-11-17 17:02:24 -08:00

62 lines
2.3 KiB
YAML

trigger: none
jobs:
- job: Onnxruntime_Linux_GPU_Training_E2E_Test
timeoutInMinutes: 60
steps:
- checkout: self
clean: true
submodules: recursive
# update these if the E2E test data changes
- script: |
orttraining/tools/ci_test/download_azure_blob_archive.py \
--azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip?snapshot=2020-06-15T23:17:35.8314853Z \
--target_dir $(Build.BinariesDirectory)/training_e2e_test_data \
--archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9
displayName: 'Download onnxruntime_training_data.zip data'
- template: templates/run-docker-build-steps.yml
parameters:
RunDockerBuildArgs: |
-o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) \
-t onnxruntime_e2e_test_image \
-x " \
--config RelWithDebInfo \
--enable_training \
--update --build \
"
DisplayName: 'Build'
- script: |
docker run \
--gpus all \
--rm \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume $(Build.BinariesDirectory)/training_e2e_test_data:/training_e2e_test_data:ro \
onnxruntime_e2e_test_image \
/onnxruntime_src/orttraining/tools/ci_test/run_batch_size_test.py \
--binary_dir /build/RelWithDebInfo \
--model_root /training_e2e_test_data/models
displayName: 'Run batch size test'
condition: succeededOrFailed() # ensure all tests are run
- script: |
docker run \
--gpus all \
--rm \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume $(Build.BinariesDirectory)/training_e2e_test_data:/training_e2e_test_data:ro \
onnxruntime_e2e_test_image \
/onnxruntime_src/orttraining/tools/ci_test/run_convergence_test.py \
--binary_dir /build/RelWithDebInfo \
--model_root /training_e2e_test_data/models \
--training_data_root /training_e2e_test_data/data
displayName: 'Run convergence test'
condition: succeededOrFailed() # ensure all tests are run
- template: templates/clean-agent-build-directory-step.yml