mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-02 03:55:34 +00:00
Install and use conda on ortmodule CI pipelines (#7530)
* Install and use conda on ortmodule CI pipelines * Update build script to install onnxruntime wheel before running unit tests * Remove python 3.5 from install_python_deps * Pinning deepspeed version to 0.3.15
This commit is contained in:
parent
ad15811ade
commit
cab84d902e
16 changed files with 140 additions and 79 deletions
|
|
@ -17,7 +17,8 @@ RUN apt-get update && \
|
|||
ENV PATH="/opt/cmake/bin:${PATH}"
|
||||
RUN git clone --single-branch --branch ${ONNXRUNTIME_SERVER_BRANCH} --recursive ${ONNXRUNTIME_REPO} onnxruntime
|
||||
RUN /onnxruntime/tools/ci_build/github/linux/docker/scripts/install_ubuntu.sh -p ${PYTHON_VERSION} && \
|
||||
/onnxruntime/tools/ci_build/github/linux/docker/scripts/install_deps.sh -p ${PYTHON_VERSION}
|
||||
/onnxruntime/tools/ci_build/github/linux/docker/scripts/install_os_deps.sh && \
|
||||
/onnxruntime/tools/ci_build/github/linux/docker/scripts/install_python_deps.sh -p ${PYTHON_VERSION}
|
||||
|
||||
WORKDIR /
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,8 @@ jobs:
|
|||
--build_wheel \
|
||||
" \
|
||||
-m \
|
||||
-u
|
||||
-u \
|
||||
-e
|
||||
DisplayName: 'Build'
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
|
||||
|
|
@ -41,9 +42,7 @@ jobs:
|
|||
--volume $(Build.BinariesDirectory):/build \
|
||||
--volume /mnist:/mnist \
|
||||
onnxruntime_ortmodule_distributed_tests_image \
|
||||
/build/RelWithDebInfo/launch_test.py \
|
||||
--cmd_line_with_args "python orttraining_ortmodule_distributed_tests.py --mnist /mnist" \
|
||||
--cwd /build/RelWithDebInfo \
|
||||
bash -c "python3 -m pip install /build/RelWithDebInfo/dist/onnxruntime*.whl ; rm -rf /build/RelWithDebInfo/onnxruntime/ ; /build/RelWithDebInfo/launch_test.py --cmd_line_with_args 'python orttraining_ortmodule_distributed_tests.py --mnist /mnist' --cwd /build/RelWithDebInfo" \
|
||||
displayName: 'Run orttraining_ortmodule_distributed_tests.py'
|
||||
condition: succeededOrFailed()
|
||||
timeoutInMinutes: 30
|
||||
|
|
|
|||
|
|
@ -22,7 +22,8 @@ jobs:
|
|||
--update --build \
|
||||
--build_wheel \
|
||||
" \
|
||||
-u
|
||||
-u \
|
||||
-e
|
||||
DisplayName: 'Build'
|
||||
|
||||
- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdata-storage-key) -s "//orttrainingtestdata.file.core.windows.net/mnist" -d "/mnist"
|
||||
|
|
@ -38,6 +39,8 @@ jobs:
|
|||
condition: succeededOrFailed()
|
||||
|
||||
# Entry point for all ORTModule tests
|
||||
# The onnxruntime folder is deleted in the build directory
|
||||
# to enforce use of the onnxruntime wheel
|
||||
- script: |
|
||||
docker run \
|
||||
--gpus all \
|
||||
|
|
@ -49,9 +52,7 @@ jobs:
|
|||
--volume /bert_data:/bert_data \
|
||||
--volume /hf_models_cache:/hf_models_cache \
|
||||
onnxruntime_ortmodule_tests_image \
|
||||
/build/RelWithDebInfo/launch_test.py \
|
||||
--cmd_line_with_args "python orttraining_ortmodule_tests.py --mnist /mnist --bert_data /bert_data/hf_data/glue_data/CoLA/original/raw --transformers_cache /hf_models_cache/huggingface/transformers" \
|
||||
--cwd /build/RelWithDebInfo \
|
||||
bash -c "python3 -m pip install /build/RelWithDebInfo/dist/onnxruntime*.whl ; rm -rf /build/RelWithDebInfo/onnxruntime/ ; /build/RelWithDebInfo/launch_test.py --cmd_line_with_args 'python orttraining_ortmodule_tests.py --mnist /mnist --bert_data /bert_data/hf_data/glue_data/CoLA/original/raw --transformers_cache /hf_models_cache/huggingface/transformers' --cwd /build/RelWithDebInfo" \
|
||||
displayName: 'Run orttraining_ortmodule_tests.py'
|
||||
condition: succeededOrFailed()
|
||||
timeoutInMinutes: 60
|
||||
|
|
|
|||
|
|
@ -25,7 +25,8 @@ ARG INSTALL_DEPS_EXTRA_ARGS
|
|||
ADD scripts /tmp/scripts
|
||||
RUN cd /tmp/scripts && \
|
||||
/tmp/scripts/install_centos.sh && \
|
||||
/tmp/scripts/install_deps.sh -d gpu -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
/tmp/scripts/install_python_deps.sh -d gpu -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
rm -rf /tmp/scripts
|
||||
|
||||
ARG BUILD_UID=1001
|
||||
|
|
|
|||
|
|
@ -32,7 +32,8 @@ ARG INSTALL_DEPS_EXTRA_ARGS
|
|||
ADD scripts /tmp/scripts
|
||||
RUN cd /tmp/scripts && \
|
||||
/tmp/scripts/install_centos.sh && \
|
||||
/tmp/scripts/install_deps.sh -d gpu -v 10.2 -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
/tmp/scripts/install_python_deps.sh -d gpu -v 10.2 -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
rm -rf /tmp/scripts
|
||||
|
||||
ARG BUILD_UID=1001
|
||||
|
|
|
|||
|
|
@ -32,7 +32,8 @@ ARG INSTALL_DEPS_EXTRA_ARGS
|
|||
ADD scripts /tmp/scripts
|
||||
RUN cd /tmp/scripts && \
|
||||
/tmp/scripts/install_centos.sh && \
|
||||
/tmp/scripts/install_deps.sh -d gpu -v 11.1 -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
/tmp/scripts/install_python_deps.sh -d gpu -v 11.1 -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
rm -rf /tmp/scripts
|
||||
|
||||
ARG BUILD_UID=1001
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ FROM ubuntu:${OS_VERSION}
|
|||
ARG PYTHON_VERSION=3.6
|
||||
|
||||
ADD scripts /tmp/scripts
|
||||
RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && /tmp/scripts/install_deps.sh -p $PYTHON_VERSION && rm -rf /tmp/scripts
|
||||
RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && /tmp/scripts/install_os_deps.sh && /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION && rm -rf /tmp/scripts
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@ FROM ubuntu:${OS_VERSION}
|
|||
ARG PYTHON_VERSION=3.5
|
||||
ADD scripts /tmp/scripts
|
||||
RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION -d EdgeDevice && \
|
||||
/tmp/scripts/install_deps.sh -p $PYTHON_VERSION -d EdgeDevice && \
|
||||
/tmp/scripts/install_os_deps.sh -d EdgeDevice && \
|
||||
/tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d EdgeDevice && \
|
||||
/tmp/scripts/install_protobuf.sh
|
||||
|
||||
ARG TOOL_CHAIN="fsl-imx-xwayland-glibc-x86_64-fsl-image-qt5-aarch64-toolchain-4.19-warrior.sh"
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ ARG INSTALL_DEPS_EXTRA_ARGS
|
|||
|
||||
ADD scripts /tmp/scripts
|
||||
RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && \
|
||||
/tmp/scripts/install_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
/tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
rm -rf /tmp/scripts
|
||||
|
||||
WORKDIR /root
|
||||
|
|
|
|||
|
|
@ -2,11 +2,16 @@ FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu18.04
|
|||
|
||||
ARG PYTHON_VERSION=3.6
|
||||
ARG INSTALL_DEPS_EXTRA_ARGS
|
||||
ARG USE_CONDA=false
|
||||
|
||||
ADD scripts /tmp/scripts
|
||||
RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && \
|
||||
/tmp/scripts/install_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
|
||||
rm -rf /tmp/scripts
|
||||
/tmp/scripts/install_os_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS
|
||||
|
||||
# If USE_CONDA is false, use root to install python dependencies.
|
||||
RUN if [ "$USE_CONDA" = false ] ; \
|
||||
then /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS ; \
|
||||
fi
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
|
|
@ -26,3 +31,28 @@ ARG BUILD_UID=1000
|
|||
RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID
|
||||
WORKDIR /home/$BUILD_USER
|
||||
USER $BUILD_USER
|
||||
|
||||
ARG MINICONDA_PREFIX=/home/$BUILD_USER/miniconda3
|
||||
RUN if [ "$USE_CONDA" = true ] ; \
|
||||
then MINICONDA=miniconda.sh && \
|
||||
wget --no-verbose https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -O $MINICONDA && \
|
||||
chmod a+x $MINICONDA && \
|
||||
./$MINICONDA -b -p $MINICONDA_PREFIX && \
|
||||
rm ./$MINICONDA && \
|
||||
$MINICONDA_PREFIX/bin/conda clean --yes --all && \
|
||||
$MINICONDA_PREFIX/bin/conda install -y python=$PYTHON_VERSION ; \
|
||||
fi
|
||||
|
||||
ENV PATH /home/$BUILD_USER/miniconda3/bin:$PATH
|
||||
|
||||
# If USE_CONDA is true, use onnxruntimedev user to install python dependencies
|
||||
RUN if [ "$USE_CONDA" = true ] ; \
|
||||
then /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS -c ; \
|
||||
fi
|
||||
|
||||
WORKDIR /root
|
||||
USER root
|
||||
RUN rm -rf /tmp/scripts
|
||||
|
||||
WORKDIR /home/$BUILD_USER
|
||||
USER $BUILD_USER
|
||||
|
|
|
|||
|
|
@ -6,7 +6,8 @@ ARG OPENVINO_VERSION=2021.3
|
|||
|
||||
ADD scripts /tmp/scripts
|
||||
RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION -d EdgeDevice && \
|
||||
/tmp/scripts/install_deps.sh -p $PYTHON_VERSION -d EdgeDevice
|
||||
/tmp/scripts/install_os_deps.sh -d EdgeDevice && \
|
||||
/tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d EdgeDevice
|
||||
|
||||
RUN apt update && apt install -y libnuma1 ocl-icd-libopencl1 && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/scripts
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ ARG PYTHON_VERSION=3.8
|
|||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
ADD scripts /tmp/scripts
|
||||
RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && /tmp/scripts/install_deps.sh -p $PYTHON_VERSION && rm -rf /tmp/scripts \
|
||||
RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && /tmp/scripts/install_os_deps.sh && /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION && rm -rf /tmp/scripts \
|
||||
&& rm /usr/local/bin/cmake && rm /usr/local/bin/ctest && rm -r /usr/local/share/cmake-3.14
|
||||
|
||||
WORKDIR /root
|
||||
|
|
|
|||
|
|
@ -2,27 +2,16 @@
|
|||
set -e -x
|
||||
|
||||
SCRIPT_DIR="$( dirname "${BASH_SOURCE[0]}" )"
|
||||
INSTALL_DEPS_TRAINING=false
|
||||
INSTALL_DEPS_DISTRIBUTED_SETUP=false
|
||||
ORTMODULE_BUILD=false
|
||||
TARGET_ROCM=false
|
||||
CU_VER="11.1"
|
||||
|
||||
while getopts p:d:v:tmur parameter_Option
|
||||
while getopts d:m parameter_Option
|
||||
do case "${parameter_Option}"
|
||||
in
|
||||
p) PYTHON_VER=${OPTARG};;
|
||||
d) DEVICE_TYPE=${OPTARG};;
|
||||
v) CU_VER=${OPTARG};;
|
||||
t) INSTALL_DEPS_TRAINING=true;;
|
||||
m) INSTALL_DEPS_DISTRIBUTED_SETUP=true;;
|
||||
u) ORTMODULE_BUILD=true;;
|
||||
r) TARGET_ROCM=true;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "Python version=$PYTHON_VER"
|
||||
|
||||
DEVICE_TYPE=${DEVICE_TYPE:=Normal}
|
||||
|
||||
#Download a file from internet
|
||||
|
|
@ -59,20 +48,6 @@ function GetFile {
|
|||
return $?
|
||||
}
|
||||
|
||||
if [[ "$PYTHON_VER" = "3.5" && -d "/opt/python/cp35-cp35m" ]]; then
|
||||
PYTHON_EXE="/opt/python/cp35-cp35m/bin/python3.5"
|
||||
elif [[ "$PYTHON_VER" = "3.6" && -d "/opt/python/cp36-cp36m" ]]; then
|
||||
PYTHON_EXE="/opt/python/cp36-cp36m/bin/python3.6"
|
||||
elif [[ "$PYTHON_VER" = "3.7" && -d "/opt/python/cp37-cp37m" ]]; then
|
||||
PYTHON_EXE="/opt/python/cp37-cp37m/bin/python3.7"
|
||||
elif [[ "$PYTHON_VER" = "3.8" && -d "/opt/python/cp38-cp38" ]]; then
|
||||
PYTHON_EXE="/opt/python/cp38-cp38/bin/python3.8"
|
||||
elif [[ "$PYTHON_VER" = "3.9" && -d "/opt/python/cp39-cp39" ]]; then
|
||||
PYTHON_EXE="/opt/python/cp39-cp39/bin/python3.9"
|
||||
else
|
||||
PYTHON_EXE="/usr/bin/python${PYTHON_VER}"
|
||||
fi
|
||||
|
||||
SYS_LONG_BIT=$(getconf LONG_BIT)
|
||||
mkdir -p /tmp/src
|
||||
GLIBC_VERSION=$(getconf GNU_LIBC_VERSION | cut -f 2 -d \.)
|
||||
|
|
@ -114,43 +89,14 @@ unzip gradle-6.3-bin.zip
|
|||
mv /tmp/src/gradle-6.3 /usr/local/gradle
|
||||
|
||||
if ! [ -x "$(command -v protoc)" ]; then
|
||||
source ${0/%install_deps\.sh/install_protobuf\.sh}
|
||||
source ${0/%install_os_deps\.sh/install_protobuf\.sh}
|
||||
fi
|
||||
|
||||
export ONNX_ML=1
|
||||
export CMAKE_ARGS="-DONNX_GEN_PB_TYPE_STUBS=OFF -DONNX_WERROR=OFF"
|
||||
${PYTHON_EXE} -m pip install -r ${0/%install_deps\.sh/requirements\.txt}
|
||||
if [ $DEVICE_TYPE = "gpu" ]; then
|
||||
if [[ $INSTALL_DEPS_TRAINING = true ]]; then
|
||||
if [[ $ORTMODULE_BUILD = false ]]; then
|
||||
${PYTHON_EXE} -m pip install -r ${0/%install_deps.sh/training\/requirements.txt}
|
||||
else
|
||||
if [[ $TARGET_ROCM = false ]]; then
|
||||
${PYTHON_EXE} -m pip install -r ${0/%install_deps.sh/training\/ortmodule\/stage1\/requirements_torch_cu${CU_VER}.txt}
|
||||
# Due to a [bug on DeepSpeed](https://github.com/microsoft/DeepSpeed/issues/663), we install it separately through ortmodule/stage2/requirements.txt
|
||||
${PYTHON_EXE} -m pip install -r ${0/%install_deps.sh/training\/ortmodule\/stage2\/requirements.txt}
|
||||
else
|
||||
${PYTHON_EXE} -m pip install \
|
||||
--pre -f https://download.pytorch.org/whl/nightly/rocm4.1/torch_nightly.html \
|
||||
torch torchvision torchtext
|
||||
${PYTHON_EXE} -m pip install -r ${0/%install_deps.sh/training\/ortmodule\/stage1\/requirements-rocm.txt}
|
||||
${PYTHON_EXE} -m pip install fairscale
|
||||
# remove triton requirement from getting triggered in requirements-sparse_attn.txt
|
||||
git clone https://github.com/ROCmSoftwarePlatform/DeepSpeed
|
||||
cd DeepSpeed &&\
|
||||
rm requirements/requirements-sparse_attn.txt &&\
|
||||
${PYTHON_EXE} setup.py bdist_wheel &&\
|
||||
${PYTHON_EXE} -m pip install dist/deepspeed*.whl &&\
|
||||
cd ..
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if [[ $INSTALL_DEPS_DISTRIBUTED_SETUP = true ]]; then
|
||||
source ${0/%install_deps.sh/install_openmpi.sh}
|
||||
source ${0/%install_os_deps.sh/install_openmpi.sh}
|
||||
fi
|
||||
fi
|
||||
|
||||
cd /
|
||||
rm -rf /tmp/src
|
||||
rm -rf /usr/include/google
|
||||
rm -rf /usr/$LIBDIR/libproto*
|
||||
75
tools/ci_build/github/linux/docker/scripts/install_python_deps.sh
Executable file
75
tools/ci_build/github/linux/docker/scripts/install_python_deps.sh
Executable file
|
|
@ -0,0 +1,75 @@
|
|||
#!/bin/bash
|
||||
set -e -x
|
||||
|
||||
INSTALL_DEPS_TRAINING=false
|
||||
INSTALL_DEPS_DISTRIBUTED_SETUP=false
|
||||
ORTMODULE_BUILD=false
|
||||
TARGET_ROCM=false
|
||||
CU_VER="11.1"
|
||||
USE_CONDA=false
|
||||
|
||||
while getopts p:d:v:tmurc parameter_Option
|
||||
do case "${parameter_Option}"
|
||||
in
|
||||
p) PYTHON_VER=${OPTARG};;
|
||||
d) DEVICE_TYPE=${OPTARG};;
|
||||
v) CU_VER=${OPTARG};;
|
||||
t) INSTALL_DEPS_TRAINING=true;;
|
||||
m) INSTALL_DEPS_DISTRIBUTED_SETUP=true;;
|
||||
u) ORTMODULE_BUILD=true;;
|
||||
r) TARGET_ROCM=true;;
|
||||
c) USE_CONDA=true;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "Python version=$PYTHON_VER"
|
||||
|
||||
DEVICE_TYPE=${DEVICE_TYPE:=Normal}
|
||||
|
||||
if [[ $USE_CONDA = true ]]; then
|
||||
# conda python version has already been installed by
|
||||
# tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu_training.
|
||||
# so, /home/onnxruntimedev/miniconda3/bin/python should point
|
||||
# to the correct version of the python version
|
||||
PYTHON_EXE="/home/onnxruntimedev/miniconda3/bin/python"
|
||||
elif [[ "$PYTHON_VER" = "3.6" && -d "/opt/python/cp36-cp36m" ]]; then
|
||||
PYTHON_EXE="/opt/python/cp36-cp36m/bin/python3.6"
|
||||
elif [[ "$PYTHON_VER" = "3.7" && -d "/opt/python/cp37-cp37m" ]]; then
|
||||
PYTHON_EXE="/opt/python/cp37-cp37m/bin/python3.7"
|
||||
elif [[ "$PYTHON_VER" = "3.8" && -d "/opt/python/cp38-cp38" ]]; then
|
||||
PYTHON_EXE="/opt/python/cp38-cp38/bin/python3.8"
|
||||
elif [[ "$PYTHON_VER" = "3.9" && -d "/opt/python/cp39-cp39" ]]; then
|
||||
PYTHON_EXE="/opt/python/cp39-cp39/bin/python3.9"
|
||||
else
|
||||
PYTHON_EXE="/usr/bin/python${PYTHON_VER}"
|
||||
fi
|
||||
|
||||
export ONNX_ML=1
|
||||
export CMAKE_ARGS="-DONNX_GEN_PB_TYPE_STUBS=OFF -DONNX_WERROR=OFF"
|
||||
${PYTHON_EXE} -m pip install -r ${0/%install_python_deps\.sh/requirements\.txt}
|
||||
if [ $DEVICE_TYPE = "gpu" ]; then
|
||||
if [[ $INSTALL_DEPS_TRAINING = true ]]; then
|
||||
if [[ $ORTMODULE_BUILD = false ]]; then
|
||||
${PYTHON_EXE} -m pip install -r ${0/%install_python_deps.sh/training\/requirements.txt}
|
||||
else
|
||||
if [[ $TARGET_ROCM = false ]]; then
|
||||
${PYTHON_EXE} -m pip install -r ${0/%install_python_deps.sh/training\/ortmodule\/stage1\/requirements_torch_cu${CU_VER}.txt}
|
||||
# Due to a [bug on DeepSpeed](https://github.com/microsoft/DeepSpeed/issues/663), we install it separately through ortmodule/stage2/requirements.txt
|
||||
${PYTHON_EXE} -m pip install -r ${0/%install_python_deps.sh/training\/ortmodule\/stage2\/requirements.txt}
|
||||
else
|
||||
${PYTHON_EXE} -m pip install \
|
||||
--pre -f https://download.pytorch.org/whl/nightly/rocm4.1/torch_nightly.html \
|
||||
torch torchvision torchtext
|
||||
${PYTHON_EXE} -m pip install -r ${0/%install_python_deps.sh/training\/ortmodule\/stage1\/requirements-rocm.txt}
|
||||
${PYTHON_EXE} -m pip install fairscale
|
||||
# remove triton requirement from getting triggered in requirements-sparse_attn.txt
|
||||
git clone https://github.com/ROCmSoftwarePlatform/DeepSpeed
|
||||
cd DeepSpeed &&\
|
||||
rm requirements/requirements-sparse_attn.txt &&\
|
||||
${PYTHON_EXE} setup.py bdist_wheel &&\
|
||||
${PYTHON_EXE} -m pip install dist/deepspeed*.whl &&\
|
||||
cd ..
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
|
@ -6,5 +6,5 @@ tensorboard
|
|||
h5py
|
||||
wget
|
||||
pytorch-lightning==1.2.5
|
||||
deepspeed
|
||||
deepspeed==0.3.15
|
||||
fairscale
|
||||
|
|
|
|||
|
|
@ -7,10 +7,11 @@ CUDA_VER=cuda10.1-cudnn7.6
|
|||
YOCTO_VERSION="4.19"
|
||||
INSTALL_DEPS_DISTRIBUTED_SETUP=false
|
||||
ORTMODULE_BUILD=false
|
||||
USE_CONDA=false
|
||||
ALLOW_RELEASED_ONNX_OPSET_ONLY_ENV="ALLOW_RELEASED_ONNX_OPSET_ONLY="$ALLOW_RELEASED_ONNX_OPSET_ONLY
|
||||
echo "ALLOW_RELEASED_ONNX_OPSET_ONLY environment variable is set as "$ALLOW_RELEASED_ONNX_OPSET_ONLY_ENV
|
||||
|
||||
while getopts c:o:d:r:p:x:a:v:y:t:i:mu parameter_Option
|
||||
while getopts c:o:d:r:p:x:a:v:y:t:i:mue parameter_Option
|
||||
do case "${parameter_Option}"
|
||||
in
|
||||
#android, ubuntu16.04, ubuntu18.04, CentOS7
|
||||
|
|
@ -39,6 +40,8 @@ i) IMAGE_CACHE_CONTAINER_REGISTRY_NAME=${OPTARG};;
|
|||
m) INSTALL_DEPS_DISTRIBUTED_SETUP=true;;
|
||||
# install ortmodule specific dependencies
|
||||
u) ORTMODULE_BUILD=true;;
|
||||
# install and use conda
|
||||
e) USE_CONDA=true;;
|
||||
esac
|
||||
done
|
||||
|
||||
|
|
@ -91,7 +94,7 @@ else
|
|||
INSTALL_DEPS_EXTRA_ARGS="${INSTALL_DEPS_EXTRA_ARGS} -u"
|
||||
fi
|
||||
$GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \
|
||||
--docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg INSTALL_DEPS_EXTRA_ARGS=\"${INSTALL_DEPS_EXTRA_ARGS}\"" \
|
||||
--docker-build-args="--build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg INSTALL_DEPS_EXTRA_ARGS=\"${INSTALL_DEPS_EXTRA_ARGS}\" --build-arg USE_CONDA=${USE_CONDA}" \
|
||||
--dockerfile $DOCKER_FILE --context .
|
||||
elif [ $BUILD_DEVICE = "tensorrt" ]; then
|
||||
# TensorRT container release 20.12
|
||||
|
|
|
|||
Loading…
Reference in a new issue