diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 4e0a6c816b6..5464285feee 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -350,7 +350,7 @@ if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then fi # Build image -docker build \ +DOCKER_BUILDKIT=1 docker build \ --no-cache \ --progress=plain \ --build-arg "BUILD_ENVIRONMENT=${image}" \ diff --git a/.ci/docker/common/install_base.sh b/.ci/docker/common/install_base.sh index e3568b20006..c2c300379d9 100755 --- a/.ci/docker/common/install_base.sh +++ b/.ci/docker/common/install_base.sh @@ -153,7 +153,7 @@ wget https://ossci-linux.s3.amazonaws.com/valgrind-${VALGRIND_VERSION}.tar.bz2 tar -xjf valgrind-${VALGRIND_VERSION}.tar.bz2 cd valgrind-${VALGRIND_VERSION} ./configure --prefix=/usr/local -make -j6 +make -j$[$(nproc) - 2] sudo make install cd ../../ rm -rf valgrind_build diff --git a/.ci/docker/common/install_openssl.sh b/.ci/docker/common/install_openssl.sh index 2f645f0bcb5..c73c9c333c0 100644 --- a/.ci/docker/common/install_openssl.sh +++ b/.ci/docker/common/install_openssl.sh @@ -9,7 +9,8 @@ tar xf "${OPENSSL}.tar.gz" cd "${OPENSSL}" ./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)' # NOTE: openssl install errors out when built with the -j option -make -j6; make install_sw +NPROC=$[$(nproc) - 2] +make -j${NPROC}; make install_sw # Link the ssl libraries to the /usr/lib folder. sudo ln -s /opt/openssl/lib/lib* /usr/lib cd .. diff --git a/.ci/docker/common/install_protobuf.sh b/.ci/docker/common/install_protobuf.sh index 4b7a7a6ac23..b5d4eceee0c 100755 --- a/.ci/docker/common/install_protobuf.sh +++ b/.ci/docker/common/install_protobuf.sh @@ -2,55 +2,17 @@ set -ex -# This function installs protobuf 3.17 -install_protobuf_317() { - pb_dir="/usr/temp_pb_install_dir" - mkdir -p $pb_dir +pb_dir="/usr/temp_pb_install_dir" +mkdir -p $pb_dir - # On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or - # else it will fail with - # g++: error: ./../lib64/crti.o: No such file or directory - ln -s /usr/lib64 "$pb_dir/lib64" +# On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or +# else it will fail with +# g++: error: ./../lib64/crti.o: No such file or directory +ln -s /usr/lib64 "$pb_dir/lib64" - curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3 - tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz - # -j6 to balance memory usage and speed. - # naked `-j` seems to use too much memory. - pushd "$pb_dir" && ./configure && make -j6 && make -j6 check && sudo make -j6 install && sudo ldconfig - popd - rm -rf $pb_dir -} - -install_ubuntu() { - # Ubuntu 14.04 has cmake 2.8.12 as the default option, so we will - # install cmake3 here and use cmake3. - apt-get update - if [[ "$UBUNTU_VERSION" == 14.04 ]]; then - apt-get install -y --no-install-recommends cmake3 - fi - - # Cleanup - apt-get autoclean && apt-get clean - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - - install_protobuf_317 -} - -install_centos() { - install_protobuf_317 -} - -# Install base packages depending on the base OS -ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') -case "$ID" in - ubuntu) - install_ubuntu - ;; - centos) - install_centos - ;; - *) - echo "Unable to determine OS..." - exit 1 - ;; -esac +curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3 +tar -xvz -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz +NPROC=$[$(nproc) - 2] +pushd "$pb_dir" && ./configure && make -j${NPROC} && make -j${NPROC} check && sudo make -j${NRPOC} install && sudo ldconfig +popd +rm -rf $pb_dir diff --git a/.ci/docker/common/install_ucc.sh b/.ci/docker/common/install_ucc.sh index 333e44e6f77..2224811bd98 100755 --- a/.ci/docker/common/install_ucc.sh +++ b/.ci/docker/common/install_ucc.sh @@ -36,7 +36,12 @@ function install_ucc() { git submodule update --init --recursive ./autogen.sh - ./configure --prefix=$UCC_HOME --with-ucx=$UCX_HOME --with-cuda=$with_cuda + # We only run distributed tests on Tesla M60 and A10G + NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86" + ./configure --prefix=$UCC_HOME \ + --with-ucx=$UCX_HOME \ + --with-cuda=$with_cuda \ + --with-nvcc-gencode="${NVCC_GENCODE}" time make -j sudo make install diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml index 6d4e3b27d46..803ac9ba930 100644 --- a/.github/workflows/docker-builds.yml +++ b/.github/workflows/docker-builds.yml @@ -31,7 +31,7 @@ permissions: read-all jobs: docker-build: - runs-on: [self-hosted, linux.2xlarge] + runs-on: [self-hosted, linux.12xlarge] environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'docker-build' || '' }} timeout-minutes: 240 strategy: