diff --git a/.gitmodules b/.gitmodules index 6eb38ef853..4b16a02897 100644 --- a/.gitmodules +++ b/.gitmodules @@ -41,3 +41,6 @@ path = cmake/external/onnx-tensorrt url = https://github.com/onnx/onnx-tensorrt.git branch = 5.1 +[submodule "cmake/external/mimalloc"] + path = cmake/external/mimalloc + url = https://github.com/microsoft/mimalloc.git diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index bb7a0096fc..ad964996f1 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -459,6 +459,10 @@ else() endif() if (onnxruntime_USE_JEMALLOC) + if (onnxruntime_USE_MIMALLOC) + message( FATAL_ERROR "You cannot specify both jemalloc and mimalloc." ) + endif() + if (Win32) message( FATAL_ERROR "Jemalloc is not supported on Windows." ) endif() diff --git a/cmake/external/mimalloc b/cmake/external/mimalloc new file mode 160000 index 0000000000..e2202f6bbe --- /dev/null +++ b/cmake/external/mimalloc @@ -0,0 +1 @@ +Subproject commit e2202f6bbe4e2051014ac20c38b3cf88492e9d2f diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 4c0abeb970..7486591375 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -112,6 +112,88 @@ install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cp set_target_properties(onnxruntime_providers PROPERTIES LINKER_LANGUAGE CXX) set_target_properties(onnxruntime_providers PROPERTIES FOLDER "ONNXRuntime") + +if (onnxruntime_USE_MIMALLOC) + set(mimalloc_root_dir ${PROJECT_SOURCE_DIR}/external/mimalloc) + set(mimalloc_output_dir ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/) + set(mimalloc_wheel_dir ${mimalloc_output_dir}/onnxruntime/capi/) + + add_definitions( + -DUSE_MIMALLOC=1 # used in ONNXRuntime + -DMI_OVERRIDE=ON) # used in building MiMalloc + include_directories(${mimalloc_root_dir}/include) + + if(NOT IS_DIRECTORY ${mimalloc_wheel_dir}) + file(MAKE_DIRECTORY ${mimalloc_wheel_dir}) + endif() + + if (WIN32) + # The generic MiMalloc CMakeLists.txt project lacks + # the needed hooks to override malloc at runtime on Windows + # so we fall back to the specially provided VS solutions (which + # do have those hooks) + set(mimalloc_output mimalloc-override) + + if(NOT ${CMAKE_GENERATOR_PLATFORM} MATCHES "x64|Win32") + message(FATAL_ERROR "MiMalloc doesn't support ARM/ARM64 targets") + endif() + + set(vs_version "vs2019") + if (${CMAKE_GENERATOR} MATCHES "Visual Studio [1-5]+ [0-9]+") + set(vs_version "vs2017") + endif() + + set(mimalloc_config "Release") + if(${CMAKE_BUILD_TYPE} MATCHES "Debug") + set(mimalloc_config, "Debug") + endif() + + set(mimalloc_target_winsdk ${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION}) + if(DEFINED ENV{WindowsSDKVersion}) + set(mimalloc_target_winsdk $ENV{WindowsSDKVersion}) + endif() + + set(mimalloc_deps ${mimalloc_output_dir}mimalloc-redirect.dll) + set(mimalloc_platform ${CMAKE_GENERATOR_PLATFORM}) + if(${CMAKE_GENERATOR_PLATFORM} MATCHES "Win32") + set(mimalloc_deps ${mimalloc_output_dir}mimalloc-redirect32.dll) + set(mimalloc_platform x86) + endif() + + # msbuild throws a fit during a postbuild step when copying files if the source uses backslashes and the destination uses forward slashes + STRING(REGEX REPLACE "/" "\\\\" msbuild_converted_output_dir ${mimalloc_output_dir}) + add_custom_command(OUTPUT ${mimalloc_output} COMMAND msbuild ${mimalloc_root_dir}/ide/${vs_version}/mimalloc.sln + /p:OutDir=${msbuild_converted_output_dir} /p:Platform=${mimalloc_platform} /p:Configuration=${mimalloc_config} + /p:WindowsTargetPlatformVersion=${mimalloc_target_winsdk}) + add_custom_target(mimalloc_override ALL DEPENDS ${mimalloc_output}) + + add_library(mimalloc IMPORTED SHARED STATIC) + add_dependencies(mimalloc mimalloc_override) + set_target_properties(mimalloc PROPERTIES IMPORTED_LOCATION "${mimalloc_output_dir}${mimalloc_output}.lib") + + # copy the dlls into the directory where setup.py will look for them + add_custom_command(TARGET mimalloc_override POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${mimalloc_output_dir}mimalloc-override.dll ${mimalloc_deps} + ${mimalloc_wheel_dir} + ) + + else() + set(MI_BUILD_TESTS OFF CACHE BOOL "Build mimalloc tests" FORCE) + add_subdirectory(${mimalloc_root_dir} EXCLUDE_FROM_ALL) + set_target_properties(mimalloc PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + + target_compile_definitions(mimalloc PUBLIC MI_USE_CXX=ON) + + # copy the dll into the directory where setup.py will look for it + get_target_property(mimalloc_output_name mimalloc OUTPUT_NAME) + install(TARGETS mimalloc DESTINATION ${mimalloc_wheel_dir}) + endif() + + # TODO: enable linking once mimalloc has been integrated with an allocator class + # target_link_libraries(onnxruntime_providers mimalloc) +endif() + if (onnxruntime_USE_CUDA) file(GLOB_RECURSE onnxruntime_providers_cuda_cc_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/cuda/*.h" diff --git a/setup.py b/setup.py index b026797eb9..91806e9797 100644 --- a/setup.py +++ b/setup.py @@ -115,16 +115,16 @@ except ImportError: # Additional binaries if platform.system() == 'Linux': - libs = ['onnxruntime_pybind11_state.so', 'libmkldnn.so.0', 'libmklml_intel.so', 'libiomp5.so'] + libs = ['onnxruntime_pybind11_state.so', 'libmkldnn.so.0', 'libmklml_intel.so', 'libiomp5.so', 'mimalloc.so'] # nGraph Libs libs.extend(['libngraph.so', 'libcodegen.so', 'libcpu_backend.so', 'libmkldnn.so', 'libtbb_debug.so', 'libtbb_debug.so.2', 'libtbb.so', 'libtbb.so.2']) # Nuphar Libs libs.extend(['libtvm.so']) elif platform.system() == "Darwin": - libs = ['onnxruntime_pybind11_state.so', 'libmkldnn.0.dylib'] # TODO add libmklml and libiomp5 later. + libs = ['onnxruntime_pybind11_state.so', 'libmkldnn.0.dylib', 'mimalloc.so'] # TODO add libmklml and libiomp5 later. else: libs = ['onnxruntime_pybind11_state.pyd', 'mkldnn.dll', 'mklml.dll', 'libiomp5md.dll'] - libs.extend(['ngraph.dll', 'cpu_backend.dll', 'tbb.dll']) + libs.extend(['ngraph.dll', 'cpu_backend.dll', 'tbb.dll', 'mimalloc-override.dll', 'mimalloc-redirect.dll', 'mimalloc-redirect32.dll']) # Nuphar Libs libs.extend(['tvm.dll']) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 04bb3b9274..a1e5f492f5 100755 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -124,6 +124,7 @@ Use the individual flags to only run the specified stages. parser.add_argument("--skip_submodule_sync", action='store_true', help="Don't do a 'git submodule update'. Makes the Update phase faster.") parser.add_argument("--use_jemalloc", action='store_true', help="Use jemalloc.") + parser.add_argument("--use_mimalloc", action='store_true', help="Use mimalloc.") parser.add_argument("--use_openblas", action='store_true', help="Build with OpenBLAS.") parser.add_argument("--use_mkldnn", action='store_true', help="Build with MKLDNN.") parser.add_argument("--use_mklml", action='store_true', help="Build with MKLML.") @@ -331,6 +332,7 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home "-Donnxruntime_USE_AUTOML=" + ("ON" if args.use_automl else "OFF"), "-Donnxruntime_CUDA_HOME=" + (cuda_home if args.use_cuda else ""), "-Donnxruntime_USE_JEMALLOC=" + ("ON" if args.use_jemalloc else "OFF"), + "-Donnxruntime_USE_MIMALLOC=" + ("ON" if args.use_mimalloc else "OFF"), "-Donnxruntime_ENABLE_PYTHON=" + ("ON" if args.enable_pybind else "OFF"), "-Donnxruntime_BUILD_CSHARP=" + ("ON" if args.build_csharp else "OFF"), "-Donnxruntime_BUILD_SHARED_LIB=" + ("ON" if args.build_shared_lib or args.build_server else "OFF"),