onnxruntime/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-test-ci-pipeline.yml

63 lines
2.4 KiB
YAML

trigger: none
jobs:
- job: Onnxruntime_Linux_GPU_ORTModule_Test
timeoutInMinutes: 120
pool: 'Linux-Single-GPU-V100'
steps:
- checkout: self
clean: true
submodules: recursive
- template: templates/run-docker-build-steps.yml
parameters:
RunDockerBuildArgs: |
-o ubuntu16.04 -d gpu -r $(Build.BinariesDirectory) \
-t onnxruntime_ortmodule_tests_image \
-x " \
--config RelWithDebInfo \
--enable_training \
--update --build \
--build_wheel \
" \
-u
DisplayName: 'Build'
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
displayName: 'Mount MNIST'
condition: succeededOrFailed()
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
displayName: 'Mount bert-data'
condition: succeededOrFailed()
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
displayName: 'Mount hf-models-cache'
condition: succeededOrFailed()
# Entry point for all ORTModule tests
- script: |
docker run \
--gpus all \
--shm-size=1024m \
--rm \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume /mnist:/mnist \
--volume /bert_data:/bert_data \
--volume /hf_models_cache:/hf_models_cache \
onnxruntime_ortmodule_tests_image \
/build/RelWithDebInfo/launch_test.py \
--cmd_line_with_args "python orttraining_ortmodule_tests.py --mnist /mnist --bert_data /bert_data/hf_data/glue_data/CoLA/original/raw --transformers_cache /hf_models_cache/huggingface/transformers" \
--cwd /build/RelWithDebInfo \
displayName: 'Run orttraining_ortmodule_tests.py'
condition: succeededOrFailed()
timeoutInMinutes: 60
- template: templates/component-governance-component-detection-steps.yml
parameters:
condition: 'succeeded'
- template: templates/clean-agent-build-directory-step.yml