mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
[CI] Build docker on larger runners (#118167)
Otherwise it takes 1+h to build CUDA12.1 docker - Limit UCC builds to just sm_52(M60) and sm_86(A10G), which I think has the biggest impact - Replace hardcoded `-j6` build parallelism with more dynamic `-j$[$(nproc) - 2]` - Remove redundant check about Ubuntu-14.04 - Added `DOCKER_BUILDKIT` to parallelize the builds As result, docker build time drops from 1+h to 35 min Pull Request resolved: https://github.com/pytorch/pytorch/pull/118167 Approved by: https://github.com/huydhn
This commit is contained in:
parent
385d8b32fc
commit
66c3152e36
6 changed files with 23 additions and 55 deletions
|
|
@ -350,7 +350,7 @@ if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
|
|||
fi
|
||||
|
||||
# Build image
|
||||
docker build \
|
||||
DOCKER_BUILDKIT=1 docker build \
|
||||
--no-cache \
|
||||
--progress=plain \
|
||||
--build-arg "BUILD_ENVIRONMENT=${image}" \
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2
|
|||
tar -xjf valgrind-${VALGRIND_VERSION}.tar.bz2
|
||||
cd valgrind-${VALGRIND_VERSION}
|
||||
./configure --prefix=/usr/local
|
||||
make -j6
|
||||
make -j$[$(nproc) - 2]
|
||||
sudo make install
|
||||
cd ../../
|
||||
rm -rf valgrind_build
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@ tar xf "${OPENSSL}.tar.gz"
|
|||
cd "${OPENSSL}"
|
||||
./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
|
||||
# NOTE: openssl install errors out when built with the -j option
|
||||
make -j6; make install_sw
|
||||
NPROC=$[$(nproc) - 2]
|
||||
make -j${NPROC}; make install_sw
|
||||
# Link the ssl libraries to the /usr/lib folder.
|
||||
sudo ln -s /opt/openssl/lib/lib* /usr/lib
|
||||
cd ..
|
||||
|
|
|
|||
|
|
@ -2,55 +2,17 @@
|
|||
|
||||
set -ex
|
||||
|
||||
# This function installs protobuf 3.17
|
||||
install_protobuf_317() {
|
||||
pb_dir="/usr/temp_pb_install_dir"
|
||||
mkdir -p $pb_dir
|
||||
pb_dir="/usr/temp_pb_install_dir"
|
||||
mkdir -p $pb_dir
|
||||
|
||||
# On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
|
||||
# else it will fail with
|
||||
# g++: error: ./../lib64/crti.o: No such file or directory
|
||||
ln -s /usr/lib64 "$pb_dir/lib64"
|
||||
# On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or
|
||||
# else it will fail with
|
||||
# g++: error: ./../lib64/crti.o: No such file or directory
|
||||
ln -s /usr/lib64 "$pb_dir/lib64"
|
||||
|
||||
curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3
|
||||
tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
|
||||
# -j6 to balance memory usage and speed.
|
||||
# naked `-j` seems to use too much memory.
|
||||
pushd "$pb_dir" && ./configure && make -j6 && make -j6 check && sudo make -j6 install && sudo ldconfig
|
||||
popd
|
||||
rm -rf $pb_dir
|
||||
}
|
||||
|
||||
install_ubuntu() {
|
||||
# Ubuntu 14.04 has cmake 2.8.12 as the default option, so we will
|
||||
# install cmake3 here and use cmake3.
|
||||
apt-get update
|
||||
if [[ "$UBUNTU_VERSION" == 14.04 ]]; then
|
||||
apt-get install -y --no-install-recommends cmake3
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
install_protobuf_317
|
||||
}
|
||||
|
||||
install_centos() {
|
||||
install_protobuf_317
|
||||
}
|
||||
|
||||
# Install base packages depending on the base OS
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
install_ubuntu
|
||||
;;
|
||||
centos)
|
||||
install_centos
|
||||
;;
|
||||
*)
|
||||
echo "Unable to determine OS..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3
|
||||
tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz
|
||||
NPROC=$[$(nproc) - 2]
|
||||
pushd "$pb_dir" && ./configure && make -j${NPROC} && make -j${NPROC} check && sudo make -j${NRPOC} install && sudo ldconfig
|
||||
popd
|
||||
rm -rf $pb_dir
|
||||
|
|
|
|||
|
|
@ -36,7 +36,12 @@ function install_ucc() {
|
|||
git submodule update --init --recursive
|
||||
|
||||
./autogen.sh
|
||||
./configure --prefix=$UCC_HOME --with-ucx=$UCX_HOME --with-cuda=$with_cuda
|
||||
# We only run distributed tests on Tesla M60 and A10G
|
||||
NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
|
||||
./configure --prefix=$UCC_HOME \
|
||||
--with-ucx=$UCX_HOME \
|
||||
--with-cuda=$with_cuda \
|
||||
--with-nvcc-gencode="${NVCC_GENCODE}"
|
||||
time make -j
|
||||
sudo make install
|
||||
|
||||
|
|
|
|||
2
.github/workflows/docker-builds.yml
vendored
2
.github/workflows/docker-builds.yml
vendored
|
|
@ -31,7 +31,7 @@ permissions: read-all
|
|||
|
||||
jobs:
|
||||
docker-build:
|
||||
runs-on: [self-hosted, linux.2xlarge]
|
||||
runs-on: [self-hosted, linux.12xlarge]
|
||||
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }}
|
||||
timeout-minutes: 240
|
||||
strategy:
|
||||
|
|
|
|||
Loading…
Reference in a new issue