mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-06 00:03:22 +00:00
link mpi when either use_mpi or use_nccl enabled (#14467)
### Only link mpi when either use_mpi or use_nccl enabled To fix the issue https://github.com/microsoft/onnxruntime/issues/14278. Talked with @askhade, we think if users want to enable NCCL/MPi but MPI is not found, it should be failure instead of warning. So this PR made the change. As a result, to make CIs pass, we need disable NCCL/MPI explicitly in the build command. This PR take an alternative approach, e.g. since NCCL and MPi are not used for customers, disable NCCL by default if "--disable_nccl" not specified, disable MPI by default if "--use_mpi" not specified. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. -->
This commit is contained in:
parent
c6c11039d7
commit
7eca42484c
2 changed files with 22 additions and 15 deletions
|
|
@ -1347,19 +1347,22 @@ if (onnxruntime_ENABLE_TRAINING)
|
|||
|
||||
find_package(MPI)
|
||||
|
||||
if (MPI_CXX_FOUND)
|
||||
message( STATUS "MPI Version: ${MPI_CXX_VERSION}")
|
||||
message( STATUS "MPI (include: ${MPI_CXX_INCLUDE_DIRS}, library: ${MPI_CXX_LIBRARIES})" )
|
||||
mark_as_advanced(MPI_CXX_INCLUDE_DIRS MPI_CXX_LIBRARIES)
|
||||
list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${MPI_CXX_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
|
||||
else ()
|
||||
set(onnxruntime_USE_NCCL OFF)
|
||||
set(onnxruntime_USE_MPI OFF)
|
||||
message( WARNING "MPI is not found. Please define onnxruntime_MPI_HOME to specify the path of MPI. Otherwise, NCCL will be disabled." )
|
||||
if (onnxruntime_USE_MPI OR onnxruntime_USE_NCCL)
|
||||
if (MPI_CXX_FOUND)
|
||||
message( STATUS "MPI Version: ${MPI_CXX_VERSION}")
|
||||
message( STATUS "MPI (include: ${MPI_CXX_INCLUDE_DIRS}, library: ${MPI_CXX_LIBRARIES})" )
|
||||
mark_as_advanced(MPI_CXX_INCLUDE_DIRS MPI_CXX_LIBRARIES)
|
||||
list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${MPI_CXX_LIBRARIES} ${MPI_CXX_LINK_FLAGS})
|
||||
else ()
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"MPI is not found. Please define onnxruntime_MPI_HOME to specify the path of MPI. Otherwise, NCCL will be disabled."
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Find NCCL and MPI
|
||||
if (onnxruntime_USE_NCCL AND MPI_CXX_FOUND)
|
||||
if (onnxruntime_USE_NCCL)
|
||||
if (onnxruntime_USE_CUDA)
|
||||
set(NCCL_LIBNAME "nccl")
|
||||
elseif (onnxruntime_USE_ROCM)
|
||||
|
|
@ -1417,13 +1420,15 @@ if (onnxruntime_ENABLE_TRAINING)
|
|||
add_definitions(-DORT_USE_NCCL=1)
|
||||
message( STATUS "NCCL is enabled in Linux GPU Build." )
|
||||
else ()
|
||||
set(onnxruntime_USE_NCCL OFF)
|
||||
message( WARNING "NCCL is not found. Please use --nccl_home to specify the path of NCCL. Otherwise, NCCL is disabled." )
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"NCCL is not found. Please use --nccl_home to specify the path of NCCL. Otherwise, NCCL is disabled."
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_MPI AND MPI_CXX_FOUND)
|
||||
if (onnxruntime_USE_MPI)
|
||||
add_definitions(-DUSE_MPI=1)
|
||||
endif()
|
||||
|
||||
|
|
|
|||
|
|
@ -192,10 +192,12 @@ def parse_arguments():
|
|||
parser.add_argument("--enable_training_apis", action="store_true", help="Enable ort training apis.")
|
||||
parser.add_argument("--enable_training_ops", action="store_true", help="Enable training ops in inference graph.")
|
||||
|
||||
parser.add_argument("--disable_nccl", action="store_true", help="Disable Nccl.")
|
||||
parser.add_argument("--disable_nccl", action="store_false", help="Disable NCCL, by default NCCL is disabled.")
|
||||
parser.add_argument("--mpi_home", help="Path to MPI installation dir")
|
||||
parser.add_argument("--nccl_home", help="Path to NCCL installation dir")
|
||||
parser.add_argument("--use_mpi", nargs="?", default=True, const=True, type=_str_to_bool)
|
||||
parser.add_argument(
|
||||
"--use_mpi", nargs="?", default=False, const=True, type=_str_to_bool, help="Disabled by default."
|
||||
)
|
||||
|
||||
# enable ONNX tests
|
||||
parser.add_argument(
|
||||
|
|
|
|||
Loading…
Reference in a new issue