mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-03 23:49:44 +00:00
### Description <!-- Describe your changes. --> Use SAS Token to fix error` failed to perform copy command due to error: no SAS token or OAuth token is present and the resource is not public` Generate SAS Token of target data, add it into Key vault, and use it as Pipeline Variable. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Co-authored-by: peixuanzuo <peixuanzuo@linmif39a000004.zvflicr54joexhdgnhvmxrxygg.phxx.internal.cloudapp.net>
78 lines
2.8 KiB
YAML
78 lines
2.8 KiB
YAML
trigger: none
|
|
|
|
name: 'orttraining_amd_nightly_$(Date:yyyyMMdd)_$(Rev:r)'
|
|
pool: 'AMD-GPU'
|
|
|
|
jobs:
|
|
- job: Onnxruntime_Linux_GPU_AMD_Training_E2E_Test
|
|
|
|
timeoutInMinutes: 60
|
|
|
|
steps:
|
|
- checkout: self
|
|
clean: true
|
|
submodules: recursive
|
|
|
|
- script: |-
|
|
echo "##vso[task.prependpath]/home/ciagent/conda/bin/"
|
|
echo "##vso[task.prependpath]/home/ciagent/pkg/openmpi-4.0.5/bin/"
|
|
echo '##vso[task.setvariable variable=LD_LIBRARY_PATH]/home/ciagent/pkg/openmpi-4.0.5/lib/'
|
|
eval "$('/home/ciagent/conda/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
|
|
echo "Selecting GPU based on HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES"
|
|
displayName: 'Initialize environment'
|
|
|
|
# update these if the E2E test data changes
|
|
- script: |-
|
|
export AZURE_BLOB_SAS_TOKEN="$(onnxruntimetestdata-storage-training-container-sas-token)"
|
|
python orttraining/tools/ci_test/download_azure_blob_archive.py \
|
|
--azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip \
|
|
--target_dir training_e2e_test_data \
|
|
--archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9
|
|
displayName: 'Download onnxruntime_training_data.zip data'
|
|
|
|
- script: |-
|
|
python tools/ci_build/build.py \
|
|
--config RelWithDebInfo \
|
|
--enable_training \
|
|
--mpi_home /home/ciagent/pkg/openmpi-4.0.5 \
|
|
--use_rocm \
|
|
--rocm_home /opt/rocm \
|
|
--nccl_home /opt/rocm \
|
|
--update \
|
|
--build_dir ./build \
|
|
--build \
|
|
--parallel 8 \
|
|
--build_wheel \
|
|
--skip_tests
|
|
displayName: 'Build onnxruntime'
|
|
|
|
- script: |-
|
|
cd ./build/RelWithDebInfo &&\
|
|
../../tools/ci_build/github/pai/pai_test_launcher.sh
|
|
displayName: 'Run onnxruntime unit tests'
|
|
|
|
- script: |-
|
|
python orttraining/tools/ci_test/run_batch_size_test.py \
|
|
--binary_dir build/RelWithDebInfo \
|
|
--model_root training_e2e_test_data/models \
|
|
--gpu_sku MI100_32G
|
|
displayName: 'Run C++ BERT-L batch size test'
|
|
condition: succeededOrFailed() # ensure all tests are run
|
|
|
|
- script: |-
|
|
python orttraining/tools/ci_test/run_bert_perf_test.py \
|
|
--binary_dir build/RelWithDebInfo \
|
|
--model_root training_e2e_test_data/models \
|
|
--training_data_root training_e2e_test_data/data \
|
|
--gpu_sku MI100_32G
|
|
displayName: 'Run C++ BERT-L performance test'
|
|
condition: succeededOrFailed() # ensure all tests are run
|
|
|
|
- script: |-
|
|
python orttraining/tools/ci_test/run_convergence_test.py \
|
|
--binary_dir build/RelWithDebInfo \
|
|
--model_root training_e2e_test_data/models \
|
|
--training_data_root training_e2e_test_data/data \
|
|
--gpu_sku MI100_32G
|
|
displayName: 'Run C++ BERT-L convergence test'
|
|
condition: succeededOrFailed() # ensure all tests are run
|