Add Big models pipeline (#19222)

### Description 2 models are added in CI. Stabe diffusion Model stage is based on https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md LLama2 FP16 is based on https://github.com/microsoft/Llama-2-Onnx. 12G GPU memory is not enough, so I choose T4 to run it. ### Motivation and Context Add regular E2E test for big models. It will be triggered in main build, that is, it'll run after one PR is merged. More models will be added later. ### Test Runs ### https://dev.azure.com/onnxruntime/onnxruntime/_build/results?buildId=1275191&view=results
2026-07-20 19:12:24 +00:00 · 2024-01-23 06:02:56 +08:00 · 2024-01-23 06:02:56 +08:00 · 780acda7b4
commit 780acda7b4
parent 8d9d751179
1 changed files with 259 additions and 0 deletions
--- a/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/bigmodels-ci-pipeline.yml
@ -0,0 +1,259 @@
+# reference: https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
+parameters:
+- name: specificArtifact
+  displayName: Use Specific Artifact
+  type: boolean
+  default: false
+- name: BuildId
+  displayName: Specific Artifact's RunId
+  type: number
+  default: 0
+
+resources:
+  repositories:
+  - repository: manylinux
+    type: Github
+    endpoint: Microsoft
+    name: pypa/manylinux
+    ref: 5eda9aded5462201e6310105728d33016e637ea7
+
+  - repository: LLaMa2Onnx
+    type: Github
+    endpoint: Microsoft
+    name: Microsoft/Llama-2-Onnx
+    ref: main
+
+variables:
+  - template: templates/common-variables.yml
+  - name: docker_base_image
+    value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
+  - name: linux_trt_version
+    value: 8.6.1.6-1.cuda11.8
+
+stages:
+- stage: Build_Onnxruntime_Cuda
+  jobs:
+  - job: Linux_Build
+    timeoutInMinutes: 120
+    variables:
+      skipComponentGovernanceDetection: true
+      CCACHE_DIR: $(Pipeline.Workspace)/ccache
+    workspace:
+      clean: all
+    pool: onnxruntime-Ubuntu2204-AMD-CPU
+    steps:
+    - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
+      displayName: 'Clean Agent Directories'
+      condition: always()
+
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - template: templates/get-docker-image-steps.yml
+      parameters:
+        Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
+        Context: tools/ci_build/github/linux/docker
+        DockerBuildArgs: "
+        --network=host
+        --build-arg BASEIMAGE=$(docker_base_image)
+        --build-arg TRT_VERSION=$(linux_trt_version)
+        --build-arg BUILD_UID=$( id -u )
+        "
+        Repository: onnxruntimecuda11build
+
+    - task: Cache@2
+      inputs:
+        key: '"ccache" | "$(Build.SourceBranch)" | "$(Build.SourceVersion)"'
+        path: $(CCACHE_DIR)
+        restoreKeys: |
+          "ccache" | "$(Build.SourceBranch)"
+          "ccache"
+        cacheHitVar: CACHE_RESTORED
+      displayName: Cach Task
+
+    - script: |
+        sudo mkdir -p $(Pipeline.Workspace)/ccache
+      condition: ne(variables.CACHE_RESTORED, 'true')
+      displayName: Create Cache Dir
+
+    - task: CmdLine@2
+      inputs:
+        script: |
+          mkdir -p $HOME/.onnx
+          docker run -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
+            --volume /data/onnx:/data/onnx:ro \
+            --volume $(Build.SourcesDirectory):/onnxruntime_src \
+            --volume $(Build.BinariesDirectory):/build \
+            --volume /data/models:/build/models:ro \
+            --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
+            --volume $(Pipeline.Workspace)/ccache:/cache \
+            -e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
+            -e NIGHTLY_BUILD \
+            -e BUILD_BUILDNUMBER \
+            -e CCACHE_DIR=/cache \
+            onnxruntimecuda11build \
+            /bin/bash -c "
+              set -ex; \
+              env; \
+              ccache -s; \
+              /opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
+                --build_dir /build --cmake_generator Ninja \
+                --config Release --update --build \
+                --skip_submodule_sync \
+                --build_shared_lib \
+                --parallel \
+                --build_wheel \
+                --enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \
+                --enable_cuda_profiling --enable_cuda_nhwc_ops \
+                --enable_pybind --build_java \
+                --use_cache \
+                --cmake_extra_defines  'CMAKE_CUDA_ARCHITECTURES=75;86' ; \
+                ccache -sv; \
+                ccache -z"
+        workingDirectory: $(Build.SourcesDirectory)
+
+    - task: CmdLine@2
+      inputs:
+        script: |
+          rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11
+          rm -f $(Build.BinariesDirectory)/Release/models
+          find $(Build.BinariesDirectory)/Release/_deps -mindepth 1 ! -regex '^$(Build.BinariesDirectory)/Release/_deps/onnx-src\(/.*\)?' -delete
+          cd $(Build.BinariesDirectory)/Release
+          find -executable -type f > $(Build.BinariesDirectory)/Release/perms.txt
+
+    - script: |
+        set -ex
+        mkdir -p $(Agent.TempDirectory)/ort
+        cp $(Build.BinariesDirectory)/Release/dist/*.whl $(Agent.TempDirectory)/ort/
+      displayName: 'Copy Wheels'
+
+    - task: PublishPipelineArtifact@0
+      displayName: 'Publish Pipeline Artifact'
+      inputs:
+        artifactName: 'drop-ort-linux-gpu'
+        targetPath: '$(Agent.TempDirectory)/ort'
+
+    - template: templates/explicitly-defined-final-tasks.yml
+
+- stage: Stale_Diffusion
+  dependsOn:
+  - Build_Onnxruntime_Cuda
+  jobs:
+  - job: Stale_Diffusion
+    variables:
+      skipComponentGovernanceDetection: true
+      CCACHE_DIR: $(Pipeline.Workspace)/ccache
+    workspace:
+      clean: all
+    pool: onnxruntime-Linux-GPU-A10-12G
+    steps:
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - template: templates/flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download Onnxruntime Artifact'
+        ArtifactName: 'drop-ort-linux-gpu'
+        TargetPath: '$(Build.BinariesDirectory)/Release'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
+
+    - script: |
+        docker run --rm --gpus all -v $PWD:/workspace -v $(Build.BinariesDirectory)/Release:/Release nvcr.io/nvidia/pytorch:22.11-py3 \
+          bash -c "
+            set -ex; \
+            python3 --version; \
+            python3 -m pip install --upgrade pip; \
+            python3 -m pip install /Release/*.whl; \
+            pushd /workspace/onnxruntime/python/tools/transformers/models/stable_diffusion; \
+            python3 -m pip install -r requirements-cuda11.txt; \
+            python3 -m pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com; \
+            echo Generate an image guided by a text prompt; \
+            python3 demo_txt2img.py "astronaut riding a horse on mars"; \
+            echo Generate an image with Stable Diffusion XL guided by a text prompt; \
+            python3 demo_txt2img_xl.py 'starry night over Golden Gate Bridge by van gogh'; \
+            python3 demo_txt2img_xl.py --enable-refiner 'starry night over Golden Gate Bridge by van gogh'; \
+            echo Generate an image guided by a text prompt using LCM LoRA; \
+            python3 demo_txt2img_xl.py --scheduler LCM --lora-weights latent-consistency/lcm-lora-sdxl --denoising-steps 4 "Self-portrait oil painting, a beautiful cyborg with golden hair, 8k"; \
+            popd; \
+          "
+      displayName: 'Run stable diffusion demo'
+      workingDirectory: $(Build.SourcesDirectory)
+
+- stage: Llama2_ONNX_FP16
+  dependsOn:
+  - Build_Onnxruntime_Cuda
+  jobs:
+  - job: Llama2_ONNX_FP16
+    variables:
+      skipComponentGovernanceDetection: true
+    workspace:
+      clean: all
+    pool: onnxruntime-Linux-GPU-T4
+    steps:
+    - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
+      displayName: 'Clean Agent Directories'
+      condition: always()
+
+    - checkout: self
+      clean: true
+      submodules: none
+
+    - checkout: LLaMa2Onnx
+      clean: true
+      submodules: none
+
+    - template: templates/flex-downloadPipelineArtifact.yml
+      parameters:
+        StepName: 'Download Onnxruntime Artifact'
+        ArtifactName: 'drop-ort-linux-gpu'
+        TargetPath: '$(Build.BinariesDirectory)/ort-artifact/'
+        SpecificArtifact: ${{ parameters.specificArtifact }}
+        BuildId: ${{ parameters.BuildId }}
+
+    - task: DownloadPackage@1
+      displayName: 'Download Llama2 model'
+      inputs:
+        packageType: upack
+        feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
+        version: 1.0.0
+        definition: '772ebce3-7e06-46d5-b3cc-82040ec4b2ce'
+        downloadPath: $(Agent.TempDirectory)/llama2_onnx_ft16
+
+    - template: templates/get-docker-image-steps.yml
+      parameters:
+        Dockerfile: onnxruntime/tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda11_8_tensorrt8_6
+        Context: onnxruntime/tools/ci_build/github/linux/docker/
+        ScriptName: onnxruntime/tools/ci_build/get_docker_image.py
+        DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
+        Repository: onnxruntimeubi8packagestest
+        UpdateDepsTxt: false
+
+    - script: |
+        docker run --rm --gpus all -v $(Build.SourcesDirectory)/Llama-2-Onnx:/workspace \
+           -v $(Build.BinariesDirectory)/ort-artifact/:/ort-artifact \
+           -v $(Agent.TempDirectory)/llama2_onnx_ft16:/models \
+           onnxruntimeubi8packagestest \
+            bash -c "
+              set -ex; \
+              python3 -m pip install --upgrade pip ; \
+              python3 -m pip install /ort-artifact/*.whl ; \
+              python3 -m pip install torch --index-url https://download.pytorch.org/whl/cu118 ; \
+              python3 -m pip install sentencepiece ; \
+              pushd /workspace ; \
+              python3 MinimumExample/Example_ONNX_LlamaV2.py --onnx_file /models/ONNX/LlamaV2_7B_FT_float16.onnx \
+                --embedding_file /models/embeddings.pth --tokenizer_path tokenizer.model --prompt 'What is the lightest element?' > /workspace/answer.txt ; \
+              popd ; \
+            "
+      displayName: 'Run Llama2 demo'
+      workingDirectory: $(Build.SourcesDirectory)
+
+    - script: |
+        set -ex
+        real=$(cat $(Build.SourcesDirectory)/Llama-2-Onnx/answer.txt)
+        trim_actual=$(tr -dc '[[:print:]]' <<< "$real")
+        expected="The lightest element is hydrogen. Hydrogen is the lightest element on the periodic table, with an atomic mass of 1.00794 u (unified atomic mass units)."
+        [ "$expected" == "$trim_actual" ] && exit 0 || exit 1
+      displayName: 'Check result'