From 2000eba4547f885dc937c4335bee4ba1a71b4df5 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Thu, 25 Aug 2022 00:57:57 +0100 Subject: [PATCH] NCCL: Re-enable parallel builds (#83696) Since #83173 was merged I have noticed some CI being slowed down by the nccl building step. e.g. if there are no C++ changes then sccache compiles everything else very quickly and nccl becomes the limiting factor. This re-enables parallel builds with some safeguards to protect against oversubscription. When `make` is the parent build system, we can use `$(MAKE)` and the `make` jobserver will coordinate job allocation with the sub-process. For other build systems, this calls `make` with the `-l` flag which should prevent it launching jobs when the system load average is already too high. Pull Request resolved: https://github.com/pytorch/pytorch/pull/83696 Approved by: https://github.com/malfet --- cmake/External/nccl.cmake | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/cmake/External/nccl.cmake b/cmake/External/nccl.cmake index cd2b0147154..2d3821840c1 100644 --- a/cmake/External/nccl.cmake +++ b/cmake/External/nccl.cmake @@ -15,21 +15,37 @@ if(NOT __NCCL_INCLUDED) # this second replacement is needed when there are multiple archs string(REPLACE ";-gencode" " -gencode" NVCC_GENCODE "${NVCC_GENCODE}") + if("${CMAKE_GENERATOR}" MATCHES "Make") + # Recursive make with jobserver for parallelism + set(MAKE_COMMAND "$(MAKE)") + else() + if(DEFINED ENV{MAX_JOBS}) + set(MAX_JOBS "$ENV{MAX_JOBS}") + else() + include(ProcessorCount) + ProcessorCount(NUM_HARDWARE_THREADS) + # Assume 2 hardware threads per cpu core + math(EXPR MAX_JOBS "${NUM_HARDWARE_THREADS} / 2") + endif() + + # Parallel build with CPU load limit to avoid oversubscription + set(MAKE_COMMAND "make" "-j${MAX_JOBS}" "-l${MAX_JOBS}") + endif() + set(__NCCL_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/nccl") ExternalProject_Add(nccl_external SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/nccl/nccl BUILD_IN_SOURCE 1 CONFIGURE_COMMAND "" BUILD_COMMAND - env - make + ${MAKE_COMMAND} "CXX=${CMAKE_CXX_COMPILER}" "CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}" "NVCC=${CUDA_NVCC_EXECUTABLE}" "NVCC_GENCODE=${NVCC_GENCODE}" "BUILDDIR=${__NCCL_BUILD_DIR}" "VERBOSE=0" - BUILD_BYPRODUCTS "${__NCCL_BUILD_DIR}/lib/libnccl_static.a" + BUILD_BYPRODUCTS "${__NCCL_BUILD_DIR}/lib/libnccl_static.a" INSTALL_COMMAND "" )