onnxruntime/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ortmodule-distributed-test-ci-pipeline.yml
baijumeswani cab84d902e
Install and use conda on ortmodule CI pipelines (#7530)
* Install and use conda on ortmodule CI pipelines

* Update build script to install onnxruntime wheel before running unit tests

* Remove python 3.5 from install_python_deps

* Pinning deepspeed version to 0.3.15
2021-05-03 15:52:22 -07:00

54 lines
2 KiB
YAML

trigger: none
jobs:
- job: Onnxruntime_Linux_GPU_ORTModule_Distributed_Test
timeoutInMinutes: 120
pool: 'Linux-Multi-GPU-V100'
steps:
- checkout: self
clean: true
submodules: recursive
- template: templates/run-docker-build-steps.yml
parameters:
RunDockerBuildArgs: |
-o ubuntu18.04 -d gpu -r $(Build.BinariesDirectory) \
-t onnxruntime_ortmodule_distributed_tests_image \
-x " \
--config RelWithDebInfo \
--enable_training \
--update --build \
--build_wheel \
" \
-m \
-u \
-e
DisplayName: 'Build'
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
displayName: 'Mount MNIST'
condition: succeededOrFailed()
# Entry point for all ORTModule distributed tests
# Refer to orttraining/orttraining/test/python/how_to_add_ortmodule_distributed_ci_pipeline_tests.md for guidelines on how to add new tests to this pipeline.
- script: |
docker run \
--gpus all \
--shm-size=1024m \
--rm \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume /mnist:/mnist \
onnxruntime_ortmodule_distributed_tests_image \
bash -c "python3 -m pip install /build/RelWithDebInfo/dist/onnxruntime*.whl ; rm -rf /build/RelWithDebInfo/onnxruntime/ ; /build/RelWithDebInfo/launch_test.py --cmd_line_with_args 'python orttraining_ortmodule_distributed_tests.py --mnist /mnist' --cwd /build/RelWithDebInfo" \
displayName: 'Run orttraining_ortmodule_distributed_tests.py'
condition: succeededOrFailed()
timeoutInMinutes: 30
- template: templates/component-governance-component-detection-steps.yml
parameters:
condition: 'succeeded'
- template: templates/clean-agent-build-directory-step.yml