mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
move training CI agent pools to 1ES hosted (#8775)
This commit is contained in:
parent
39059f2539
commit
2beb873c6b
8 changed files with 23 additions and 23 deletions
|
|
@ -3,7 +3,7 @@ trigger: none
|
|||
jobs:
|
||||
- template: templates/linux-ci.yml
|
||||
parameters:
|
||||
AgentPool : 'Linux-Single-GPU-V100'
|
||||
AgentPool : 'Onnxruntime-Linux-GPU-NC6sv3'
|
||||
JobName: 'Onnxruntime_Linux_GPU_Training'
|
||||
SubmoduleCheckoutMode: 'recursive'
|
||||
RunDockerBuildArgs: >
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ jobs:
|
|||
- job: Orttraining_Linux_GPU_Distributed_E2E_Test
|
||||
|
||||
timeoutInMinutes: 180
|
||||
pool: 'Linux-Multi-GPU-V100-E2E3'
|
||||
pool: 'Onnxruntime-Linux-GPU-NC24sv3'
|
||||
|
||||
steps:
|
||||
- checkout: self
|
||||
|
|
@ -33,7 +33,7 @@ jobs:
|
|||
--archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9
|
||||
displayName: 'Download onnxruntime_training_data.zip data'
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/bert-data" -d "/bert_data"
|
||||
displayName: 'Mount bert-data'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ jobs:
|
|||
- job: Onnxruntime_Linux_GPU_ORTModule_Distributed_Test
|
||||
|
||||
timeoutInMinutes: 120
|
||||
pool: 'Linux-Multi-GPU-V100'
|
||||
pool: 'Onnxruntime-Linux-GPU-NC24sv3'
|
||||
|
||||
steps:
|
||||
- checkout: self
|
||||
|
|
@ -29,7 +29,7 @@ jobs:
|
|||
-e
|
||||
DisplayName: 'Build'
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/mnist" -d "/mnist"
|
||||
displayName: 'Mount MNIST'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ jobs:
|
|||
clean: true
|
||||
submodules: recursive
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
|
||||
displayName: 'Mount hf-models-cache'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@ jobs:
|
|||
"
|
||||
DisplayName: 'Build performance tests'
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/bert-data" -d "/bert_data"
|
||||
displayName: 'Mount bert-data'
|
||||
condition: succeededOrFailed() # ensure all tests are run
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/gpt2-data" -d "/gpt2_data"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/gpt2-data" -d "/gpt2_data"
|
||||
displayName: 'Mount gpt2 test data'
|
||||
condition: succeededOrFailed() # ensure all tests are run
|
||||
|
||||
|
|
|
|||
|
|
@ -6,15 +6,15 @@ parameters:
|
|||
|
||||
steps:
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/mnist" -d "/mnist"
|
||||
displayName: 'Mount MNIST'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/bert-data" -d "/bert_data"
|
||||
displayName: 'Mount bert-data'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
|
||||
displayName: 'Mount hf-models-cache'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
|
|
|
|||
|
|
@ -92,15 +92,15 @@ stages:
|
|||
--build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-$(GccVersion)/root/usr/lib64:/opt/rh/devtoolset-$(GccVersion)/root/usr/lib:/opt/rh/devtoolset-$(GccVersion)/root/usr/lib64/dyninst:/opt/rh/devtoolset-$(GccVersion)/root/usr/lib/dyninst:/usr/local/lib64
|
||||
Repository: onnxruntimetraininggpubuild
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/mnist" -d "/mnist"
|
||||
displayName: 'Mount MNIST'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/bert-data" -d "/bert_data"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/bert-data" -d "/bert_data"
|
||||
displayName: 'Mount bert-data'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
|
||||
displayName: 'Mount hf-models-cache'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
|
|
|
|||
|
|
@ -5,19 +5,19 @@ function credentialize () {
|
|||
sudo mkdir /etc/smbcredentials
|
||||
fi
|
||||
|
||||
if [ -f "/etc/smbcredentials/orttrainingtestdata.cred" ]; then
|
||||
sudo rm /etc/smbcredentials/orttrainingtestdata.cred
|
||||
if [ -f "/etc/smbcredentials/orttrainingtestdatascus.cred" ]; then
|
||||
sudo rm /etc/smbcredentials/orttrainingtestdatascus.cred
|
||||
fi
|
||||
|
||||
# to create orttrainingtestdata.cred, I have to do: 'sudo bash -c ...'
|
||||
sudo bash -c 'echo "username=orttrainingtestdata" >> /etc/smbcredentials/orttrainingtestdata.cred'
|
||||
# to create orttrainingtestdatascus.cred, I have to do: 'sudo bash -c ...'
|
||||
sudo bash -c 'echo "username=orttrainingtestdatascus" >> /etc/smbcredentials/orttrainingtestdatascus.cred'
|
||||
|
||||
# $1 get removed (do defend injection attack?) if I do 'sudo bash -c...'
|
||||
# to enable 'sudo echo...' I need to 'sudo chmod 777...' first.
|
||||
sudo chmod 777 /etc/smbcredentials/orttrainingtestdata.cred
|
||||
sudo echo "password=$1" >> /etc/smbcredentials/orttrainingtestdata.cred
|
||||
sudo chmod 777 /etc/smbcredentials/orttrainingtestdatascus.cred
|
||||
sudo echo "password=$1" >> /etc/smbcredentials/orttrainingtestdatascus.cred
|
||||
|
||||
sudo chmod 600 /etc/smbcredentials/orttrainingtestdata.cred
|
||||
sudo chmod 600 /etc/smbcredentials/orttrainingtestdatascus.cred
|
||||
}
|
||||
|
||||
function mount_data () {
|
||||
|
|
@ -36,8 +36,8 @@ function mount_data () {
|
|||
|
||||
sudo mkdir -p $2
|
||||
|
||||
sudo bash -c 'echo "$1 $2 cifs nofail,vers=3.0,credentials=/etc/smbcredentials/orttrainingtestdata.cred,dir_mode=0777,file_mode=0777,serverino" >> /etc/fstab' -- $1 $2
|
||||
sudo mount -t cifs $1 $2 -o vers=3.0,credentials=/etc/smbcredentials/orttrainingtestdata.cred,dir_mode=0777,file_mode=0777,serverino
|
||||
sudo bash -c 'echo "$1 $2 cifs nofail,vers=3.0,credentials=/etc/smbcredentials/orttrainingtestdatascus.cred,dir_mode=0777,file_mode=0777,serverino" >> /etc/fstab' -- $1 $2
|
||||
sudo mount -t cifs $1 $2 -o vers=3.0,credentials=/etc/smbcredentials/orttrainingtestdatascus.cred,dir_mode=0777,file_mode=0777,serverino
|
||||
}
|
||||
|
||||
while getopts "p:s:d:" opt; do
|
||||
|
|
|
|||
Loading…
Reference in a new issue