onnxruntime/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
Changming Sun 328a13c06d
Enable VCPKG in more pipelines (#23590)
### Description
Enable VCPKG in more pipelines
2025-02-06 10:10:31 -08:00

486 lines
20 KiB
YAML

##### start trigger Don't edit it manually, Please do edit set-trigger-rules.py ####
### please do rerun set-trigger-rules.py ###
trigger:
branches:
include:
- main
- rel-*
paths:
exclude:
- docs/**
- README.md
- CONTRIBUTING.md
- BUILD.md
- 'js/web'
- 'onnxruntime/core/providers/js'
pr:
branches:
include:
- main
- rel-*
paths:
exclude:
- docs/**
- README.md
- CONTRIBUTING.md
- BUILD.md
- 'js/web'
- 'onnxruntime/core/providers/js'
#### end trigger ####
# reference: https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
parameters:
- name: specificArtifact
displayName: Use Specific Artifact
type: boolean
default: false
- name: BuildId
displayName: Specific Artifact's RunId
type: number
default: 0
variables:
- name: docker_base_image
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20250124.1
- name: linux_trt_version
value: 10.3.0.26-1.cuda11.8
- name: Repository
value: 'onnxruntimecuda11manylinuxbuild'
stages:
- stage: Build_Onnxruntime_Cuda
jobs:
- job: Linux_Build
timeoutInMinutes: 120
variables:
skipComponentGovernanceDetection: true
CCACHE_DIR: $(Pipeline.Workspace)/ccache
workspace:
clean: all
pool: onnxruntime-Ubuntu2204-AMD-CPU
steps:
- checkout: self
clean: true
submodules: none
# same as linux-gpu-ci-pipeline.yml
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "
--network=host
--build-arg BASEIMAGE=$(docker_base_image)
--build-arg TRT_VERSION=$(linux_trt_version)
--build-arg BUILD_UID=$( id -u )
"
Repository: $(Repository)
- task: CmdLine@2
inputs:
script: |
mkdir -p $HOME/.onnx
docker run -e --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume /data/models:/build/models:ro \
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
$(Repository) \
/bin/bash -c '
set -ex; \
PATH=/opt/python/cp310-cp310/bin:$PATH /opt/python/cp310-cp310/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release --update --build \
--skip_submodule_sync \
--build_shared_lib \
--parallel --use_vcpkg \
--build_wheel \
--enable_onnx_tests --use_cuda --cuda_version=11.8 --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 \
--enable_cuda_profiling \
--enable_pybind --build_java \
--cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;86" '
workingDirectory: $(Build.SourcesDirectory)
- task: CmdLine@2
inputs:
script: |
rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11
rm -f $(Build.BinariesDirectory)/Release/models
find $(Build.BinariesDirectory)/Release/_deps -mindepth 1 ! -regex '^$(Build.BinariesDirectory)/Release/_deps/onnx-src\(/.*\)?' -delete
cd $(Build.BinariesDirectory)/Release
find -executable -type f > $(Build.BinariesDirectory)/Release/perms.txt
- script: |
set -ex
mkdir -p $(Agent.TempDirectory)/ort
cp $(Build.BinariesDirectory)/Release/dist/*.whl $(Agent.TempDirectory)/ort/
displayName: 'Copy Wheels'
- task: PublishPipelineArtifact@0
displayName: 'Publish Pipeline Artifact'
inputs:
artifactName: 'drop-ort-linux-gpu'
targetPath: '$(Agent.TempDirectory)/ort'
- template: templates/explicitly-defined-final-tasks.yml
- stage: Stable_Diffusion
dependsOn:
- Build_Onnxruntime_Cuda
jobs:
- job: Stable_Diffusion
variables:
skipComponentGovernanceDetection: true
CLIP_MODEL_CACHE: $(Agent.TempDirectory)/clip_cache
STABLE_DIFFUSION_MODEL_CACHE: $(Agent.TempDirectory)/stablediffusion_cache
GenerateImage_DIR: $(Agent.TempDirectory)/images
hitAnother: 'False'
workspace:
clean: all
pool: onnxruntime-Linux-GPU-A10-12G
steps:
- checkout: self
clean: true
submodules: none
- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Onnxruntime Artifact'
ArtifactName: 'drop-ort-linux-gpu'
TargetPath: '$(Build.BinariesDirectory)/Release'
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2204_gpu_opencv
Context: tools/ci_build/github/linux/docker/
ScriptName: tools/ci_build/get_docker_image.py
DockerBuildArgs: "
--build-arg BUILD_UID=$( id -u )
"
Repository: onnxruntimeubuntupackagestest_cuda11
UseImageCacheContainerRegistry: false
- task: Cache@2
inputs:
key: stable_diffusion | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py
restoreKeys: |
stable_diffusion | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py
stable_diffusion
path: $(STABLE_DIFFUSION_MODEL_CACHE)
displayName: Cache stable diffusion model
- script: |
mkdir -p $(GenerateImage_DIR)
docker run --rm --gpus all -v $PWD:/workspace \
-v $(Build.BinariesDirectory)/Release:/Release \
-v $(STABLE_DIFFUSION_MODEL_CACHE):/model_cache:rw \
-v $(GenerateImage_DIR):/images:rw \
onnxruntimeubuntupackagestest_cuda11 \
bash -c ' \
set -ex; \
python3 --version; \
python3 -m pip install --upgrade pip; \
python3 -m pip install /Release/*.whl; \
pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion; \
python3 -m pip install -r requirements/cuda11/requirements.txt; \
python3 -m pip install numpy==1.22.2; \
python3 -m pip install --upgrade polygraphy onnx-graphsurgeon ; \
echo Generate an image guided by a text prompt; \
python3 demo_txt2img.py --framework-model-dir /model_cache --seed 1 --deterministic "astronaut riding a horse on mars" ; \
find $(pwd)/ORT_CUDA -name "*.png" -exec cp {} /images/ \; ; \
popd ; \
'
displayName: 'Run stable diffusion demo'
workingDirectory: $(Build.SourcesDirectory)
# For verification we will check the generated image looks .
# Because the artifact isn't used by other jobs, we set the artifact name to a varabile value.
# So the job can be rerun without the exception that artifact has been published.
- task: PublishPipelineArtifact@0
displayName: 'Publish Generated Image Artifact'
inputs:
artifactName: Generated-Image-$(System.JobAttempt)
targetPath: '$(GenerateImage_DIR)'
- task: Cache@2
inputs:
key: clip_model | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py
restoreKeys: |
clip_model | $(Build.SourcesDirectory)/onnxruntime/python/tools/transformers/models/stable_diffusion/test/check_image.py
clip_model
path: $(CLIP_MODEL_CACHE)
displayName: Cache clip model
- script: |
docker run --rm --gpus all -v $PWD:/workspace \
-v $(CLIP_MODEL_CACHE):/model_cache:rw \
onnxruntimeubuntupackagestest_cuda11 \
bash -c '
set -x; \
python3 --version; \
python3 -m pip install --upgrade pip; \
pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion/; \
image2=$(find $(pwd) -name "astronaut_riding_a_h*.png") ; \
pushd test; \
python3 -m pip install -r requirements.txt; \
echo check demo_txt2image.py generate image; \
python3 -u check_image.py --image1 astronaut_riding_error.png --image2 $image2 --cache_dir /model_cache --negative; \
if [ $? -ne 0 ]; then echo "Hit an unexpected image"; exit 1; fi; \
popd ; \
popd ; \
' || ( echo "##vso[task.setvariable variable=hitAnother;]True" && exit 1 )
displayName: 'Check if the generated image is wierd'
workingDirectory: $(Build.SourcesDirectory)
# If the generate image hit another test image, make the job status as warning
continueOnError: true
- bash: |
echo "You can use variables: $(hitAnother)"
# The step will execute if the gereneate image doesn't hit another test image
- script: |
docker run --rm --gpus all -v $PWD:/workspace \
-v $(CLIP_MODEL_CACHE):/model_cache:rw \
onnxruntimeubuntupackagestest_cuda11 \
bash -c '
set -ex; \
python3 --version; \
python3 -m pip install --upgrade pip; \
pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion/; \
image2=$(find $(pwd) -name "astronaut_riding_a_h*.png") ; \
pushd test; \
python3 -m pip install numpy==1.22.2; \
python3 -m pip install -r requirements.txt; \
echo check demo_txt2image.py generate image; \
python3 -u check_image.py --image1 astronaut_riding_txt2image-DDIM-50.png --image2 $image2 --cache_dir /model_cache ; \
popd ; \
popd ; \
'
displayName: 'Check the generated image'
workingDirectory: $(Build.SourcesDirectory)
condition: ne(variables.hitAnother, 'True')
- stage: Llama2_7B_ONNX
dependsOn:
- Build_Onnxruntime_Cuda
condition: or(eq(variables['Build.SourceBranch'], 'refs/heads/main'), startsWith(variables['Build.SourceBranch'], 'refs/heads/rel-'), eq(variables['UseA100'], '1'))
jobs:
- job: Llama2_7B_ONNX
timeoutInMinutes: 120
variables:
skipComponentGovernanceDetection: true
workspace:
clean: all
pool:
name: Onnxruntime-Linux-GPU-A100-WUS3
demands:
- WorkFolder -equals /mnt/storage/
steps:
- checkout: self
clean: true
submodules: none
- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Onnxruntime Artifact'
ArtifactName: 'drop-ort-linux-gpu'
TargetPath: '$(Build.BinariesDirectory)/ort-artifact/'
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0_torch
Context: tools/ci_build/github/linux/docker/
ScriptName: tools/ci_build/get_docker_image.py
DockerBuildArgs: "
--build-arg BUILD_UID=$( id -u )
--build-arg BASEIMAGE=${{ variables.docker_base_image }}
--build-arg TRT_VERSION=${{ variables.linux_trt_version }}
"
Repository: onnxruntimeubi8packagestest_torch
UseImageCacheContainerRegistry: false
- task: DownloadPackage@1
displayName: 'Download Meta Llama2 model'
inputs:
packageType: upack
feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
version: 1.0.0
definition: '6fe0c4ed-9d0e-4d66-94cc-fb6a111d02a5'
downloadPath: $(Agent.TempDirectory)/meta_llama2_7b_hf
- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/meta_llama2_7b_hf:/meta-llama2 \
onnxruntimeubi8packagestest_torch \
bash -c "
set -ex; \
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
python3 -m pip install --upgrade pip ; \
pushd models/llama ; \
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input /meta-llama2 --output llama2-7b-fp16 --precision fp16 --execution_provider cuda --small_gp;\
ls -l llama2-7b-fp16; \
du -sh llama2-7b-fp16; \
popd ; \
"
displayName: 'Run Llama2 to Onnx F16 and parity Test'
workingDirectory: $(Build.SourcesDirectory)
- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/meta_llama2_7b_hf:/meta-llama2 \
onnxruntimeubi8packagestest_torch \
bash -c "
set -ex; \
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
python3 -m pip install --upgrade pip ; \
pushd models/llama ; \
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input /meta-llama2 --output llama2-7b-fp32-gpu --precision fp32 --execution_provider cuda;\
ls -l llama2-7b-fp32-gpu; \
du -sh llama2-7b-fp32-gpu; \
popd ; \
"
displayName: 'Run Llama2 to Onnx fp32 and parity Test'
workingDirectory: $(Build.SourcesDirectory)
- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/meta_llama2_7b_hf:/meta-llama2 \
onnxruntimeubi8packagestest_torch \
bash -c "
set -ex; \
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
python3 -m pip install --upgrade pip ; \
pushd models/llama ; \
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m models.llama.convert_to_onnx -m meta-llama/Llama-2-7b-hf --input /meta-llama2 --output llama2-7b-int4-gpu --precision int4 --execution_provider cuda --use_gqa;\
ls -l llama2-7b-int4-gpu; \
du -sh llama2-7b-int4-gpu; \
popd ; \
"
displayName: 'Run Llama2 to Onnx INT4 and parity Test'
workingDirectory: $(Build.SourcesDirectory)
- stage: Whisper_ONNX
dependsOn:
- Build_Onnxruntime_Cuda
jobs:
- job: Whisper_ONNX
variables:
skipComponentGovernanceDetection: true
workspace:
clean: all
pool: Onnxruntime-Linux-A10-24G
steps:
- checkout: self
clean: true
submodules: none
- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Onnxruntime Artifact'
ArtifactName: 'drop-ort-linux-gpu'
TargetPath: '$(Build.BinariesDirectory)/ort-artifact/'
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}
- script: |
mkdir -p $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/ompffmpeg/
azcopy cp --recursive "https://lotusscus.blob.core.windows.net/models/ffmpeg/runtimes/linux-x64/native" $(Agent.TempDirectory)/ompffmpeg
cp $(Agent.TempDirectory)/ompffmpeg/native/* $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/ompffmpeg/
# we need to copy the files to the docker context
ls $(Build.SourcesDirectory)/tools/ci_build/github/linux/docker/ompffmpeg/
displayName: 'Download OMP FFmpeg'
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubuntu_2204_gpu_ffmpeg
Context: tools/ci_build/github/linux/docker/
ScriptName: tools/ci_build/get_docker_image.py
DockerBuildArgs: '--build-arg BUILD_UID=$( id -u )'
Repository: onnxruntimepackagestest_ompffmpeg
- task: DownloadPackage@1
# The model data in artifact is downloaded from openai/whisper-large-v3 in huggingface model hub
# In order to save size, removed .git directory and pickled files, and keep the safetensors model files
displayName: 'Download Whisper Model'
inputs:
packageType: upack
feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
version: 1.0.0
definition: 'b583ce7c-1a8f-4099-ae28-5d5f56c478b1'
downloadPath: $(Agent.TempDirectory)/whisper_large_v3
- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/whisper_large_v3:/whisper_large_v3 \
onnxruntimepackagestest_ompffmpeg \
bash -c '
set -ex; \
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
python3 -m pip install --upgrade pip ; \
pushd models/whisper ; \
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m pip uninstall -y torch ; \
python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
python3 -m models.whisper.convert_to_onnx -m /whisper_large_v3 --output whisperlargev3 --use_external_data_format ; \
popd ; \
'
displayName: 'Convert Whisper Model'
workingDirectory: $(Build.SourcesDirectory)
- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory):/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/whisper_large_v3:/whisper_large_v3 \
onnxruntimepackagestest_ompffmpeg \
bash -c '
set -ex; \
pushd /workspace/onnxruntime/python/tools/transformers/ ; \
python3 -m pip install --upgrade pip ; \
pushd models/whisper ; \
python3 -m pip install -r requirements.txt ; \
popd ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m pip uninstall -y torch ; \
python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
ls whisperlargev3; \
export LD_LIBRARY_PATH=/tmp/ompffmpeg:${LD_LIBRARY_PATH}; \
ffmpeg -version; \
python3 -m models.whisper.benchmark \
--benchmark-type ort \
--audio-path models/whisper/test/1272-141231-0002.mp3 \
--model-name openai/whisper-large-v3 \
--ort-model-path /workspace/onnxruntime/python/tools/transformers/whisperlargev3/whisper_large_v3_beamsearch.onnx \
--precision fp32 \
--device cuda > ort_output.txt ; \
cat ort_output.txt ; \
diff ort_output.txt /workspace/onnxruntime/python/tools/transformers/models/whisper/test/whisper_ort_output.txt && exit 0 || exit 1
popd ; \
'
displayName: 'Test Whisper ONNX Model'
workingDirectory: $(Build.SourcesDirectory)