Add Big models pipeline (#19222)

### Description
2 models are added in CI.
Stabe diffusion Model stage is based on
https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md

LLama2 FP16 is based on https://github.com/microsoft/Llama-2-Onnx.
12G GPU memory is not enough, so I choose T4 to run it.

### Motivation and Context
Add regular E2E test for big models. 
It will be triggered in main build, that is, it'll run after one PR is
merged.

More models will be added later.

### Test Runs ###

https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1275191&view=results
This commit is contained in:
Yi Zhang 2024-01-23 06:02:56 +08:00 committed by GitHub
parent 8d9d751179
commit 780acda7b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -0,0 +1,259 @@
# reference: https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
parameters:
- name: specificArtifact
displayName: Use Specific Artifact
type: boolean
default: false
- name: BuildId
displayName: Specific Artifact's RunId
type: number
default: 0
resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7
- repository: LLaMa2Onnx
type: Github
endpoint: Microsoft
name: Microsoft/Llama-2-Onnx
ref: main
variables:
- template: templates/common-variables.yml
- name: docker_base_image
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
- name: linux_trt_version
value: 8.6.1.6-1.cuda11.8
stages:
- stage: Build_Onnxruntime_Cuda
jobs:
- job: Linux_Build
timeoutInMinutes: 120
variables:
skipComponentGovernanceDetection: true
CCACHE_DIR: $(Pipeline.Workspace)/ccache
workspace:
clean: all
pool: onnxruntime-Ubuntu2204-AMD-CPU
steps:
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
displayName: 'Clean Agent Directories'
condition: always()
- checkout: self
clean: true
submodules: none
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "
--network=host
--build-arg BASEIMAGE=$(docker_base_image)
--build-arg TRT_VERSION=$(linux_trt_version)
--build-arg BUILD_UID=$( id -u )
"
Repository: onnxruntimecuda11build
- task: Cache@2
inputs:
key: '"ccache" | "$(Build.SourceBranch)" | "$(Build.SourceVersion)"'
path: $(CCACHE_DIR)
restoreKeys: |
"ccache" | "$(Build.SourceBranch)"
"ccache"
cacheHitVar: CACHE_RESTORED
displayName: Cach Task
- script: |
sudo mkdir -p $(Pipeline.Workspace)/ccache
condition: ne(variables.CACHE_RESTORED, 'true')
displayName: Create Cache Dir
- task: CmdLine@2
inputs:
script: |
mkdir -p $HOME/.onnx
docker run -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume /data/models:/build/models:ro \
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
--volume $(Pipeline.Workspace)/ccache:/cache \
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
-e CCACHE_DIR=/cache \
onnxruntimecuda11build \
/bin/bash -c "
set -ex; \
env; \
ccache -s; \
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release --update --build \
--skip_submodule_sync \
--build_shared_lib \
--parallel \
--build_wheel \
--enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \
--enable_cuda_profiling --enable_cuda_nhwc_ops \
--enable_pybind --build_java \
--use_cache \
--cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;86' ; \
ccache -sv; \
ccache -z"
workingDirectory: $(Build.SourcesDirectory)
- task: CmdLine@2
inputs:
script: |
rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11
rm -f $(Build.BinariesDirectory)/Release/models
find $(Build.BinariesDirectory)/Release/_deps -mindepth 1 ! -regex '^$(Build.BinariesDirectory)/Release/_deps/onnx-src\(/.*\)?' -delete
cd $(Build.BinariesDirectory)/Release
find -executable -type f > $(Build.BinariesDirectory)/Release/perms.txt
- script: |
set -ex
mkdir -p $(Agent.TempDirectory)/ort
cp $(Build.BinariesDirectory)/Release/dist/*.whl $(Agent.TempDirectory)/ort/
displayName: 'Copy Wheels'
- task: PublishPipelineArtifact@0
displayName: 'Publish Pipeline Artifact'
inputs:
artifactName: 'drop-ort-linux-gpu'
targetPath: '$(Agent.TempDirectory)/ort'
- template: templates/explicitly-defined-final-tasks.yml
- stage: Stale_Diffusion
dependsOn:
- Build_Onnxruntime_Cuda
jobs:
- job: Stale_Diffusion
variables:
skipComponentGovernanceDetection: true
CCACHE_DIR: $(Pipeline.Workspace)/ccache
workspace:
clean: all
pool: onnxruntime-Linux-GPU-A10-12G
steps:
- checkout: self
clean: true
submodules: none
- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Onnxruntime Artifact'
ArtifactName: 'drop-ort-linux-gpu'
TargetPath: '$(Build.BinariesDirectory)/Release'
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}
- script: |
docker run --rm --gpus all -v $PWD:/workspace -v $(Build.BinariesDirectory)/Release:/Release nvcr.io/nvidia/pytorch:22.11-py3 \
bash -c "
set -ex; \
python3 --version; \
python3 -m pip install --upgrade pip; \
python3 -m pip install /Release/*.whl; \
pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion; \
python3 -m pip install -r requirements-cuda11.txt; \
python3 -m pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com; \
echo Generate an image guided by a text prompt; \
python3 demo_txt2img.py "astronaut riding a horse on mars"; \
echo Generate an image with Stable Diffusion XL guided by a text prompt; \
python3 demo_txt2img_xl.py 'starry night over Golden Gate Bridge by van gogh'; \
python3 demo_txt2img_xl.py --enable-refiner 'starry night over Golden Gate Bridge by van gogh'; \
echo Generate an image guided by a text prompt using LCM LoRA; \
python3 demo_txt2img_xl.py --scheduler LCM --lora-weights latent-consistency/lcm-lora-sdxl --denoising-steps 4 "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"; \
popd; \
"
displayName: 'Run stable diffusion demo'
workingDirectory: $(Build.SourcesDirectory)
- stage: Llama2_ONNX_FP16
dependsOn:
- Build_Onnxruntime_Cuda
jobs:
- job: Llama2_ONNX_FP16
variables:
skipComponentGovernanceDetection: true
workspace:
clean: all
pool: onnxruntime-Linux-GPU-T4
steps:
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
displayName: 'Clean Agent Directories'
condition: always()
- checkout: self
clean: true
submodules: none
- checkout: LLaMa2Onnx
clean: true
submodules: none
- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Onnxruntime Artifact'
ArtifactName: 'drop-ort-linux-gpu'
TargetPath: '$(Build.BinariesDirectory)/ort-artifact/'
SpecificArtifact: ${{ parameters.specificArtifact }}
BuildId: ${{ parameters.BuildId }}
- task: DownloadPackage@1
displayName: 'Download Llama2 model'
inputs:
packageType: upack
feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
version: 1.0.0
definition: '772ebce3-7e06-46d5-b3cc-82040ec4b2ce'
downloadPath: $(Agent.TempDirectory)/llama2_onnx_ft16
- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: onnxruntime/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6
Context: onnxruntime/tools/ci_build/github/linux/docker/
ScriptName: onnxruntime/tools/ci_build/get_docker_image.py
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
Repository: onnxruntimeubi8packagestest
UpdateDepsTxt: false
- script: |
docker run --rm --gpus all -v $(Build.SourcesDirectory)/Llama-2-Onnx:/workspace \
-v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
-v $(Agent.TempDirectory)/llama2_onnx_ft16:/models \
onnxruntimeubi8packagestest \
bash -c "
set -ex; \
python3 -m pip install --upgrade pip ; \
python3 -m pip install /ort-artifact/*.whl ; \
python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
python3 -m pip install sentencepiece ; \
pushd /workspace ; \
python3 MinimumExample/Example_ONNX_LlamaV2.py --onnx_file /models/ONNX/LlamaV2_7B_FT_float16.onnx \
--embedding_file /models/embeddings.pth --tokenizer_path tokenizer.model --prompt 'What is the lightest element?' > /workspace/answer.txt ; \
popd ; \
"
displayName: 'Run Llama2 demo'
workingDirectory: $(Build.SourcesDirectory)
- script: |
set -ex
real=$(cat $(Build.SourcesDirectory)/Llama-2-Onnx/answer.txt)
trim_actual=$(tr -dc '[[:print:]]' <<< "$real")
expected="The lightest element is hydrogen. Hydrogen is the lightest element on the periodic table, with an atomic mass of 1.00794 u (unified atomic mass units)."
[ "$expected" == "$trim_actual" ] && exit 0 || exit 1
displayName: 'Check result'