move training CI agent pools to 1ES hosted (#8775)

This commit is contained in:
liqun Fu 2021-08-18 18:36:19 -07:00 committed by GitHub
parent 39059f2539
commit 2beb873c6b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 23 additions and 23 deletions

View file

@ -3,7 +3,7 @@ trigger: none
jobs:
- template: templates/linux-ci.yml
parameters:
AgentPool : 'Linux-Single-GPU-V100'
AgentPool : 'Onnxruntime-Linux-GPU-NC6sv3'
JobName: 'Onnxruntime_Linux_GPU_Training'
SubmoduleCheckoutMode: 'recursive'
RunDockerBuildArgs: >

View file

@ -4,7 +4,7 @@ jobs:
- job: Orttraining_Linux_GPU_Distributed_E2E_Test
timeoutInMinutes: 180
pool: 'Linux-Multi-GPU-V100-E2E3'
pool: 'Onnxruntime-Linux-GPU-NC24sv3'
steps:
- checkout: self
@ -33,7 +33,7 @@ jobs:
--archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9
displayName: 'Download onnxruntime_training_data.zip data'
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/bert-data" -d "/bert_data"
displayName: 'Mount bert-data'
condition: succeededOrFailed()

View file

@ -4,7 +4,7 @@ jobs:
- job: Onnxruntime_Linux_GPU_ORTModule_Distributed_Test
timeoutInMinutes: 120
pool: 'Linux-Multi-GPU-V100'
pool: 'Onnxruntime-Linux-GPU-NC24sv3'
steps:
- checkout: self
@ -29,7 +29,7 @@ jobs:
-e
DisplayName: 'Build'
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/mnist" -d "/mnist"
displayName: 'Mount MNIST'
condition: succeededOrFailed()

View file

@ -11,7 +11,7 @@ jobs:
clean: true
submodules: recursive
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
displayName: 'Mount hf-models-cache'
condition: succeededOrFailed()

View file

@ -25,11 +25,11 @@ jobs:
"
DisplayName: 'Build performance tests'
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/bert-data" -d "/bert_data"
displayName: 'Mount bert-data'
condition: succeededOrFailed() # ensure all tests are run
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/gpt2-data" -d "/gpt2_data"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/gpt2-data" -d "/gpt2_data"
displayName: 'Mount gpt2 test data'
condition: succeededOrFailed() # ensure all tests are run

View file

@ -6,15 +6,15 @@ parameters:
steps:
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/mnist" -d "/mnist"
displayName: 'Mount MNIST'
condition: succeededOrFailed()
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/bert-data" -d "/bert_data"
displayName: 'Mount bert-data'
condition: succeededOrFailed()
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
displayName: 'Mount hf-models-cache'
condition: succeededOrFailed()

View file

@ -92,15 +92,15 @@ stages:
--build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-$(GccVersion)/root/usr/lib64:/opt/rh/devtoolset-$(GccVersion)/root/usr/lib:/opt/rh/devtoolset-$(GccVersion)/root/usr/lib64/dyninst:/opt/rh/devtoolset-$(GccVersion)/root/usr/lib/dyninst:/usr/local/lib64
Repository: onnxruntimetraininggpubuild
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/mnist" -d "/mnist"
displayName: 'Mount MNIST'
condition: succeededOrFailed()
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/bert-data" -d "/bert_data"
displayName: 'Mount bert-data'
condition: succeededOrFailed()
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
displayName: 'Mount hf-models-cache'
condition: succeededOrFailed()

View file

@ -5,19 +5,19 @@ function credentialize () {
sudo mkdir /etc/smbcredentials
fi
if [ -f "/etc/smbcredentials/orttrainingtestdata.cred" ]; then
sudo rm /etc/smbcredentials/orttrainingtestdata.cred
if [ -f "/etc/smbcredentials/orttrainingtestdatascus.cred" ]; then
sudo rm /etc/smbcredentials/orttrainingtestdatascus.cred
fi
# to create orttrainingtestdata.cred, I have to do: 'sudo bash -c ...'
sudo bash -c 'echo "username=orttrainingtestdata" >> /etc/smbcredentials/orttrainingtestdata.cred'
# to create orttrainingtestdatascus.cred, I have to do: 'sudo bash -c ...'
sudo bash -c 'echo "username=orttrainingtestdatascus" >> /etc/smbcredentials/orttrainingtestdatascus.cred'
# $1 get removed (do defend injection attack?) if I do 'sudo bash -c...'
# to enable 'sudo echo...' I need to 'sudo chmod 777...' first.
sudo chmod 777 /etc/smbcredentials/orttrainingtestdata.cred
sudo echo "password=$1" >> /etc/smbcredentials/orttrainingtestdata.cred
sudo chmod 777 /etc/smbcredentials/orttrainingtestdatascus.cred
sudo echo "password=$1" >> /etc/smbcredentials/orttrainingtestdatascus.cred
sudo chmod 600 /etc/smbcredentials/orttrainingtestdata.cred
sudo chmod 600 /etc/smbcredentials/orttrainingtestdatascus.cred
}
function mount_data () {
@ -36,8 +36,8 @@ function mount_data () {
sudo mkdir -p $2
sudo bash -c 'echo "$1 $2 cifs nofail,vers=3.0,credentials=/etc/smbcredentials/orttrainingtestdata.cred,dir_mode=0777,file_mode=0777,serverino" >> /etc/fstab' -- $1 $2
sudo mount -t cifs $1 $2 -o vers=3.0,credentials=/etc/smbcredentials/orttrainingtestdata.cred,dir_mode=0777,file_mode=0777,serverino
sudo bash -c 'echo "$1 $2 cifs nofail,vers=3.0,credentials=/etc/smbcredentials/orttrainingtestdatascus.cred,dir_mode=0777,file_mode=0777,serverino" >> /etc/fstab' -- $1 $2
sudo mount -t cifs $1 $2 -o vers=3.0,credentials=/etc/smbcredentials/orttrainingtestdatascus.cred,dir_mode=0777,file_mode=0777,serverino
}
while getopts "p:s:d:" opt; do