mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-03 03:58:54 +00:00
Fix Linux Multi GPU build pipeline (#16368)
### Description The build pipeline runs on Azure NV12 machines that will be deprecated soon because the SKU is too old. So this PR will move the pipeline to a Windows machine with two A10 GPUs.
This commit is contained in:
parent
5754cd7d1d
commit
188d5f5398
4 changed files with 33 additions and 70 deletions
|
|
@ -684,6 +684,12 @@ TEST_P(ModelTest, Run) {
|
|||
ASSERT_ORT_STATUS_OK(OrtApis::CreateCUDAProviderOptions(&cuda_options));
|
||||
std::unique_ptr<OrtCUDAProviderOptionsV2, decltype(&OrtApis::ReleaseCUDAProviderOptions)> rel_cuda_options(
|
||||
cuda_options, &OrtApis::ReleaseCUDAProviderOptions);
|
||||
std::vector<const char*> keys{"device_id"};
|
||||
|
||||
std::vector<const char*> values;
|
||||
std::string device_id = Env::Default().GetEnvironmentVar("ONNXRUNTIME_TEST_GPU_DEVICE_ID");
|
||||
values.push_back(device_id.empty() ? "0" : device_id.c_str());
|
||||
ASSERT_ORT_STATUS_OK(OrtApis::UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), 1));
|
||||
ortso.AppendExecutionProvider_CUDA_V2(*cuda_options);
|
||||
} else if (provider_name == "rocm") {
|
||||
OrtROCMProviderOptions ep_options;
|
||||
|
|
|
|||
|
|
@ -1,70 +0,0 @@
|
|||
resources:
|
||||
repositories:
|
||||
- repository: manylinux # The name used to reference this repository in the checkout step
|
||||
type: Github
|
||||
endpoint: Microsoft
|
||||
name: pypa/manylinux
|
||||
ref: 5eda9aded5462201e6310105728d33016e637ea7
|
||||
|
||||
variables:
|
||||
- template: templates/common-variables.yml
|
||||
|
||||
jobs:
|
||||
- job: Linux_Build
|
||||
timeoutInMinutes: 180
|
||||
workspace:
|
||||
clean: all
|
||||
pool: Linux-Multi-GPU
|
||||
steps:
|
||||
- checkout: self
|
||||
clean: true
|
||||
submodules: none
|
||||
|
||||
- template: templates/get-docker-image-steps.yml
|
||||
parameters:
|
||||
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_cuda11
|
||||
Context: tools/ci_build/github/linux/docker
|
||||
DockerBuildArgs: "--network=host --build-arg POLICY=manylinux2014 --build-arg PLATFORM=x86_64 --build-arg BASEIMAGE=${{variables.common_cuda_baseimg}} --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/devtoolset-11/root --build-arg PREPEND_PATH=/opt/rh/devtoolset-11/root/usr/bin: --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-11/root/usr/lib64:/opt/rh/devtoolset-11/root/usr/lib:/opt/rh/devtoolset-11/root/usr/lib64/dyninst:/opt/rh/devtoolset-11/root/usr/lib/dyninst:/usr/local/lib64 --build-arg BUILD_UID=$( id -u )"
|
||||
Repository: onnxruntimecuda11build
|
||||
|
||||
- task: CmdLine@2
|
||||
inputs:
|
||||
script: |
|
||||
mkdir -p $HOME/.onnx
|
||||
docker run --gpus all -e CC=/opt/rh/devtoolset-11/root/usr/bin/cc -e CXX=/opt/rh/devtoolset-11/root/usr/bin/c++ -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
|
||||
--volume /data/onnx:/data/onnx:ro \
|
||||
--volume $(Build.SourcesDirectory):/onnxruntime_src \
|
||||
--volume $(Build.BinariesDirectory):/build \
|
||||
--volume /data/models:/build/models:ro \
|
||||
--volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
|
||||
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
|
||||
-e NIGHTLY_BUILD \
|
||||
-e BUILD_BUILDNUMBER \
|
||||
onnxruntimecuda11build \
|
||||
/opt/python/cp38-cp38/bin/python3.8 /onnxruntime_src/tools/ci_build/build.py \
|
||||
--build_dir /build --cmake_generator Ninja \
|
||||
--config Release \
|
||||
--skip_submodule_sync \
|
||||
--build_shared_lib \
|
||||
--parallel \
|
||||
--build_wheel \
|
||||
--enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \
|
||||
--enable_pybind --build_java --build_nodejs --enable_multi_device_test \
|
||||
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc CMAKE_CUDA_ARCHITECTURES=52
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
|
||||
- task: PublishTestResults@2
|
||||
displayName: 'Publish unit test results'
|
||||
inputs:
|
||||
testResultsFiles: '**/*.results.xml'
|
||||
searchFolder: '$(Build.BinariesDirectory)'
|
||||
testRunTitle: 'Unit Test Run'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- template: templates/component-governance-component-detection-steps.yml
|
||||
parameters:
|
||||
condition: 'succeeded'
|
||||
|
||||
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
|
||||
displayName: 'Clean Agent Directories'
|
||||
condition: always()
|
||||
|
|
@ -59,6 +59,27 @@ stages:
|
|||
buildNodejs: true
|
||||
ort_build_pool_name: 'onnxruntime-Win-CPU-2022'
|
||||
|
||||
- ${{ if or(startsWith(variables['System.CollectionUri'], 'https://dev.azure.com/aiinfra/'),startsWith(variables['System.CollectionUri'], 'https://aiinfra.visualstudio.com/')) }}:
|
||||
# The settings below is the same as Windows GPU CI pipeline's CUDA job except here we set OnnxruntimeTestGpuDeviceId to 1
|
||||
- stage: cuda_multi_gpu
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- template: templates/jobs/win-ci-vs-2022-job.yml
|
||||
parameters:
|
||||
BuildConfig: 'RelWithDebInfo'
|
||||
EnvSetupScript: setup_env_cuda_11.bat
|
||||
buildArch: x64
|
||||
additionalBuildFlags: --enable_pybind --build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v11.8" --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
|
||||
msbuildPlatform: x64
|
||||
isX86: false
|
||||
job_name_suffix: x64_RelWithDebInfo
|
||||
RunOnnxRuntimeTests: true
|
||||
RunStaticCodeAnalysis: false
|
||||
ORT_EP_NAME: CUDA
|
||||
WITH_CACHE: true
|
||||
MachinePool: onnxruntime-Win2022-GPU-MultiA10
|
||||
OnnxruntimeTestGpuDeviceId: 1
|
||||
|
||||
- stage: Mimalloc
|
||||
dependsOn: [ ]
|
||||
jobs:
|
||||
|
|
|
|||
|
|
@ -55,6 +55,11 @@ parameters:
|
|||
type: boolean
|
||||
default: false
|
||||
|
||||
- name: OnnxruntimeTestGpuDeviceId
|
||||
type: number
|
||||
default: 0
|
||||
|
||||
|
||||
jobs:
|
||||
- job: build_${{ parameters.job_name_suffix }}
|
||||
variables:
|
||||
|
|
@ -69,6 +74,7 @@ jobs:
|
|||
DEPS_CACHE_DIR: $(Agent.TempDirectory)/deps_ccache
|
||||
ORT_CACHE_DIR: $(Agent.TempDirectory)/ort_ccache
|
||||
TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)]
|
||||
ONNXRUNTIME_TEST_GPU_DEVICE_ID: ${{ parameters.OnnxruntimeTestGpuDeviceId }}
|
||||
${{ if eq(parameters.WITH_CACHE, true) }}:
|
||||
PS_CACHE_ARG: '-use_cache'
|
||||
PY_CACHE_ARG: '--use_cache'
|
||||
|
|
|
|||
Loading…
Reference in a new issue