mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-04 04:07:22 +00:00
orttraining packaging and ci pipelines to use cuda 11.3 (#10252)
This commit is contained in:
parent
4b205eb2b3
commit
2affd6e71e
7 changed files with 9 additions and 8 deletions
|
|
@ -14,7 +14,7 @@ jobs:
|
|||
--enable_training
|
||||
--enable_training_torch_interop
|
||||
--config $(buildConfig)
|
||||
--use_cuda --cuda_version=11.1 --cuda_home=/usr/local/cuda-11.1 --cudnn_home=/usr/local/cuda-11.1
|
||||
--use_cuda --cuda_version=11.3 --cuda_home=/usr/local/cuda-11.3 --cudnn_home=/usr/local/cuda-11.3
|
||||
--build_wheel
|
||||
--enable_nvtx_profile
|
||||
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=70
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ jobs:
|
|||
-o ubuntu20.04 -d gpu \
|
||||
-t onnxruntime_distributed_tests_image \
|
||||
-x " \
|
||||
--use_cuda --cuda_version=11.1 --cuda_home=/usr/local/cuda-11.1 --cudnn_home=/usr/local/cuda-11.1 \
|
||||
--use_cuda --cuda_version=11.3 --cuda_home=/usr/local/cuda-11.3 --cudnn_home=/usr/local/cuda-11.3 \
|
||||
--config RelWithDebInfo \
|
||||
--enable_training \
|
||||
--update --build \
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
-t onnxruntime_ortmodule_distributed_tests_image \
|
||||
-x " \
|
||||
--config RelWithDebInfo \
|
||||
--use_cuda --cuda_version=11.1 --cuda_home=/usr/local/cuda-11.1 --cudnn_home=/usr/local/cuda-11.1 \
|
||||
--use_cuda --cuda_version=11.3 --cuda_home=/usr/local/cuda-11.3 --cudnn_home=/usr/local/cuda-11.3 \
|
||||
--enable_training \
|
||||
--enable_training_torch_interop \
|
||||
--update --build \
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ steps:
|
|||
--volume /bert_data:/bert_data \
|
||||
--volume /hf_models_cache:/hf_models_cache \
|
||||
${{ parameters.DockerImageTag }} \
|
||||
bash -c "python3 -m pip uninstall -y -r /onnxruntime_src/tools/ci_build/github/linux/docker/scripts/training/requirements.txt && python3 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.10.0_cu11.1.txt && python3 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt && rm -rf /build/onnxruntime/ && python3 -m pip install /build/dist/onnxruntime*.whl && python3 -m onnxruntime.training.ortmodule.torch_cpp_extensions.install && /build/launch_test.py --cmd_line_with_args 'python orttraining_ortmodule_tests.py --mnist /mnist --bert_data /bert_data/hf_data/glue_data/CoLA/original/raw --transformers_cache /hf_models_cache/huggingface/transformers' --cwd /build" \
|
||||
bash -c "python3 -m pip uninstall -y -r /onnxruntime_src/tools/ci_build/github/linux/docker/scripts/training/requirements.txt && python3 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.10.0_cu11.3.txt && python3 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage2/requirements.txt && rm -rf /build/onnxruntime/ && python3 -m pip install /build/dist/onnxruntime*.whl && python3 -m onnxruntime.training.ortmodule.torch_cpp_extensions.install && /build/launch_test.py --cmd_line_with_args 'python orttraining_ortmodule_tests.py --mnist /mnist --bert_data /bert_data/hf_data/glue_data/CoLA/original/raw --transformers_cache /hf_models_cache/huggingface/transformers' --cwd /build" \
|
||||
displayName: 'Run orttraining_ortmodule_tests.py'
|
||||
condition: succeededOrFailed()
|
||||
timeoutInMinutes: 60
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
ARG BASEIMAGE=nvcr.io/nvidia/cuda:11.3-cudnn8-devel-centos7
|
||||
ARG BASEIMAGE=nvcr.io/nvidia/cuda:11.3.1-cudnn8-devel-centos7
|
||||
ARG POLICY=manylinux2014
|
||||
ARG PLATFORM=x86_64
|
||||
ARG DEVTOOLSET_ROOTPATH=
|
||||
|
|
@ -192,7 +192,7 @@ RUN cd /tmp/scripts && \
|
|||
/tmp/scripts/manylinux/install_centos.sh && \
|
||||
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
/tmp/scripts/install_ninja.sh && \
|
||||
/tmp/scripts/install_python_deps.sh -d gpu -v 11.1 -p $PYTHON_VERSION -h $TORCH_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
/tmp/scripts/install_python_deps.sh -d gpu -v 11.3 -p $PYTHON_VERSION -h $TORCH_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
rm -rf /tmp/scripts
|
||||
|
||||
ARG BUILD_UID=1001
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
ARG BASEIMAGE=nvcr.io/nvidia/cuda:11.1.1-cudnn8-devel-ubuntu18.04
|
||||
ARG BASEIMAGE=nvcr.io/nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
|
||||
|
||||
FROM $BASEIMAGE
|
||||
|
||||
|
|
|
|||
|
|
@ -91,8 +91,9 @@ elif [ $BUILD_DEVICE = "gpu" ]; then
|
|||
if [[ $ORTMODULE_BUILD = true ]]; then
|
||||
INSTALL_DEPS_EXTRA_ARGS="${INSTALL_DEPS_EXTRA_ARGS} -u"
|
||||
fi
|
||||
INSTALL_DEPS_EXTRA_ARGS="${INSTALL_DEPS_EXTRA_ARGS} -v 11.3"
|
||||
$GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \
|
||||
--docker-build-args="--build-arg BASEIMAGE=nvcr.io/nvidia/cuda:11.1.1-cudnn8-devel-${BUILD_OS} --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg INSTALL_DEPS_EXTRA_ARGS=\"${INSTALL_DEPS_EXTRA_ARGS}\" --build-arg USE_CONDA=${USE_CONDA} --network=host" \
|
||||
--docker-build-args="--build-arg BASEIMAGE=nvcr.io/nvidia/cuda:11.3.1-cudnn8-devel-${BUILD_OS} --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg INSTALL_DEPS_EXTRA_ARGS=\"${INSTALL_DEPS_EXTRA_ARGS}\" --build-arg USE_CONDA=${USE_CONDA} --network=host" \
|
||||
--dockerfile Dockerfile.ubuntu_gpu_training --context .
|
||||
elif [[ $BUILD_DEVICE = "tensorrt"* ]]; then
|
||||
if [ $BUILD_DEVICE = "tensorrt-v7.1" ]; then
|
||||
|
|
|
|||
Loading…
Reference in a new issue