onnxruntime/cmake/onnxruntime_training.cmake
Weixing Zhang aec4cb489e
ROCm EP for AMD GPU (#5480)
The ROCm EP is designed and implemented based on AMD GPU software stack named ROCm. Here is the link for the details about ROCm: https://rocmdocs.amd.com/en/latest/

ROCm EP was created based on the following things:
1. AMD GPU programming language: HIP
2. AMD GPU HIP language runtime: amdhip64
3. BLAS: rocBLAS, hipBLAS
4. DNN: miOpen
5. Collective Communication library: RCCL
6. cub: hipCub
7. …

Current status:
BERT-L and GPT2 training can be ran on AMD GPU with data parallel.

Next:
1. Make more GPU code be sharable between ROCm EP and CUDA EP since HIP language and HIP runtime API are very close to CUDA.
2. Continue improving the implementation.
3. Continue GPU kernel optimization.
4. Support model parallelism on ROCm EP.
……

The rocm kernels have been removed from this commit and will be in a separate PR. Since the original PR was too big(~180 files), it was suggested to split the PR into two parts, one is rocm-kernels, the other is non rocm kernels.  

Co-authored-by: Weixing Zhang <wezhan@microsoft.com>
Co-authored-by: sabreshao <sabre.shao@amd.com>
Co-authored-by: anghostcici <11013544+anghostcici@users.noreply.github.com>
Co-authored-by: Suffian Khan <sukha@microsoft.com>
Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
2020-10-29 17:13:04 -07:00

208 lines
10 KiB
CMake

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
set (CXXOPTS ${PROJECT_SOURCE_DIR}/external/cxxopts/include)
# training lib
file(GLOB_RECURSE onnxruntime_training_srcs
"${ORTTRAINING_SOURCE_DIR}/core/framework/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/framework/*.cc"
"${ORTTRAINING_SOURCE_DIR}/core/framework/tensorboard/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/framework/tensorboard/*.cc"
"${ORTTRAINING_SOURCE_DIR}/core/session/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/session/*.cc"
)
add_library(onnxruntime_training ${onnxruntime_training_srcs})
add_dependencies(onnxruntime_training onnx tensorboard ${onnxruntime_EXTERNAL_DEPENDENCIES})
onnxruntime_add_include_to_target(onnxruntime_training onnxruntime_common onnx onnx_proto tensorboard protobuf::libprotobuf flatbuffers)
# fix event_writer.cc 4100 warning
if(WIN32)
target_compile_options(onnxruntime_training PRIVATE /wd4100)
endif()
target_include_directories(onnxruntime_training PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} ${RE2_INCLUDE_DIR} PUBLIC ${onnxruntime_graph_header} ${MPI_INCLUDE_DIRS})
if (onnxruntime_USE_CUDA)
target_include_directories(onnxruntime_training PRIVATE ${onnxruntime_CUDNN_HOME}/include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
endif()
if (onnxruntime_USE_HOROVOD)
message(${HOROVOD_INCLUDE_DIRS})
target_include_directories(onnxruntime_training PUBLIC ${HOROVOD_INCLUDE_DIRS})
endif()
set_target_properties(onnxruntime_training PROPERTIES FOLDER "ONNXRuntime")
source_group(TREE ${ORTTRAINING_ROOT} FILES ${onnxruntime_training_srcs})
# training runner lib
file(GLOB_RECURSE onnxruntime_training_runner_srcs
"${ORTTRAINING_SOURCE_DIR}/models/runner/*.h"
"${ORTTRAINING_SOURCE_DIR}/models/runner/*.cc"
)
# perf test utils
set(onnxruntime_perf_test_src_dir ${TEST_SRC_DIR}/perftest)
set(onnxruntime_perf_test_src
"${onnxruntime_perf_test_src_dir}/utils.h")
if(WIN32)
list(APPEND onnxruntime_perf_test_src
"${onnxruntime_perf_test_src_dir}/windows/utils.cc")
else ()
list(APPEND onnxruntime_perf_test_src
"${onnxruntime_perf_test_src_dir}/posix/utils.cc")
endif()
add_library(onnxruntime_training_runner ${onnxruntime_training_runner_srcs} ${onnxruntime_perf_test_src})
add_dependencies(onnxruntime_training_runner ${onnxruntime_EXTERNAL_DEPENDENCIES} onnx onnxruntime_providers)
onnxruntime_add_include_to_target(onnxruntime_training_runner onnxruntime_training onnxruntime_framework onnxruntime_common onnx onnx_proto protobuf::libprotobuf onnxruntime_training flatbuffers)
target_include_directories(onnxruntime_training_runner PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR}/external/json PUBLIC ${onnxruntime_graph_header})
if (onnxruntime_USE_CUDA)
target_include_directories(onnxruntime_training_runner PUBLIC ${onnxruntime_CUDNN_HOME}/include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
endif()
if (onnxruntime_USE_ROCM)
add_definitions(-DUSE_ROCM=1)
target_include_directories(onnxruntime_training_runner PUBLIC ${onnxruntime_ROCM_HOME}/include)
endif()
check_cxx_compiler_flag(-Wno-maybe-uninitialized HAS_NO_MAYBE_UNINITIALIZED)
if(UNIX AND NOT APPLE)
if (HAS_NO_MAYBE_UNINITIALIZED)
target_compile_options(onnxruntime_training_runner PUBLIC "-Wno-maybe-uninitialized")
endif()
endif()
if (onnxruntime_USE_ROCM)
target_compile_options(onnxruntime_training_runner PUBLIC -D__HIP_PLATFORM_HCC__=1)
endif()
set_target_properties(onnxruntime_training_runner PROPERTIES FOLDER "ONNXRuntimeTest")
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_training_runner_srcs} ${onnxruntime_perf_test_src})
# MNIST
file(GLOB_RECURSE training_mnist_src
"${ORTTRAINING_SOURCE_DIR}/models/mnist/*.h"
"${ORTTRAINING_SOURCE_DIR}/models/mnist/mnist_data_provider.cc"
"${ORTTRAINING_SOURCE_DIR}/models/mnist/main.cc"
)
add_executable(onnxruntime_training_mnist ${training_mnist_src})
onnxruntime_add_include_to_target(onnxruntime_training_mnist onnxruntime_common onnx onnx_proto protobuf::libprotobuf onnxruntime_training flatbuffers)
target_include_directories(onnxruntime_training_mnist PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner)
set(ONNXRUNTIME_LIBS
onnxruntime_session
${onnxruntime_libs}
${PROVIDERS_CUDA}
${PROVIDERS_ROCM}
${PROVIDERS_MKLDNN}
onnxruntime_optimizer
onnxruntime_providers
onnxruntime_util
onnxruntime_framework
onnxruntime_graph
onnxruntime_common
onnxruntime_mlas
onnxruntime_flatbuffers
)
if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
list(APPEND ONNXRUNTIME_LIBS onnxruntime_language_interop onnxruntime_pyop)
endif()
if(UNIX AND NOT APPLE)
if (HAS_NO_MAYBE_UNINITIALIZED)
target_compile_options(onnxruntime_training_mnist PUBLIC "-Wno-maybe-uninitialized")
endif()
endif()
target_link_libraries(onnxruntime_training_mnist PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES})
set_target_properties(onnxruntime_training_mnist PROPERTIES FOLDER "ONNXRuntimeTest")
# squeezenet
# Disabling build for squeezenet, as no one is using this
#[[
file(GLOB_RECURSE training_squeezene_src
"${ORTTRAINING_SOURCE_DIR}/models/squeezenet/*.h"
"${ORTTRAINING_SOURCE_DIR}/models/squeezenet/*.cc"
)
add_executable(onnxruntime_training_squeezenet ${training_squeezene_src})
onnxruntime_add_include_to_target(onnxruntime_training_squeezenet onnxruntime_common onnx onnx_proto protobuf::libprotobuf onnxruntime_training flatbuffers)
target_include_directories(onnxruntime_training_squeezenet PUBLIC ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner)
if(UNIX AND NOT APPLE)
target_compile_options(onnxruntime_training_squeezenet PUBLIC "-Wno-maybe-uninitialized")
endif()
target_link_libraries(onnxruntime_training_squeezenet PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES})
set_target_properties(onnxruntime_training_squeezenet PROPERTIES FOLDER "ONNXRuntimeTest")
]]
# BERT
file(GLOB_RECURSE training_bert_src
"${ORTTRAINING_SOURCE_DIR}/models/bert/*.h"
"${ORTTRAINING_SOURCE_DIR}/models/bert/*.cc"
)
add_executable(onnxruntime_training_bert ${training_bert_src})
if(UNIX AND NOT APPLE)
if (HAS_NO_MAYBE_UNINITIALIZED)
target_compile_options(onnxruntime_training_bert PUBLIC "-Wno-maybe-uninitialized")
endif()
endif()
onnxruntime_add_include_to_target(onnxruntime_training_bert onnxruntime_common onnx onnx_proto protobuf::libprotobuf onnxruntime_training flatbuffers)
target_include_directories(onnxruntime_training_bert PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${MPI_INCLUDE_DIRS} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner)
if (onnxruntime_USE_HOROVOD)
target_include_directories(onnxruntime_training_bert PUBLIC ${HOROVOD_INCLUDE_DIRS})
endif()
target_link_libraries(onnxruntime_training_bert PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES})
set_target_properties(onnxruntime_training_bert PROPERTIES FOLDER "ONNXRuntimeTest")
# Pipeline
file(GLOB_RECURSE training_pipeline_poc_src
"${ORTTRAINING_SOURCE_DIR}/models/pipeline_poc/*.h"
"${ORTTRAINING_SOURCE_DIR}/models/pipeline_poc/*.cc"
)
add_executable(onnxruntime_training_pipeline_poc ${training_pipeline_poc_src})
if(UNIX AND NOT APPLE)
if (HAS_NO_MAYBE_UNINITIALIZED)
target_compile_options(onnxruntime_training_pipeline_poc PUBLIC "-Wno-maybe-uninitialized")
endif()
endif()
onnxruntime_add_include_to_target(onnxruntime_training_pipeline_poc onnxruntime_common onnx onnx_proto protobuf::libprotobuf onnxruntime_training flatbuffers)
target_include_directories(onnxruntime_training_pipeline_poc PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${MPI_INCLUDE_DIRS} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner)
if (onnxruntime_USE_HOROVOD)
target_include_directories(onnxruntime_training_pipeline_poc PUBLIC ${HOROVOD_INCLUDE_DIRS})
endif()
target_link_libraries(onnxruntime_training_pipeline_poc PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES})
set_target_properties(onnxruntime_training_pipeline_poc PROPERTIES FOLDER "ONNXRuntimeTest")
# GPT-2
file(GLOB_RECURSE training_gpt2_src
"${ORTTRAINING_SOURCE_DIR}/models/gpt2/*.h"
"${ORTTRAINING_SOURCE_DIR}/models/gpt2/*.cc"
)
add_executable(onnxruntime_training_gpt2 ${training_gpt2_src})
if(UNIX AND NOT APPLE)
if (HAS_NO_MAYBE_UNINITIALIZED)
target_compile_options(onnxruntime_training_gpt2 PUBLIC "-Wno-maybe-uninitialized")
endif()
endif()
onnxruntime_add_include_to_target(onnxruntime_training_gpt2 onnxruntime_common onnx onnx_proto protobuf::libprotobuf onnxruntime_training flatbuffers)
target_include_directories(onnxruntime_training_gpt2 PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${MPI_INCLUDE_DIRS} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner)
if (onnxruntime_USE_HOROVOD)
target_include_directories(onnxruntime_training_gpt2 PUBLIC ${HOROVOD_INCLUDE_DIRS})
endif()
target_link_libraries(onnxruntime_training_gpt2 PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES})
set_target_properties(onnxruntime_training_gpt2 PROPERTIES FOLDER "ONNXRuntimeTest")