From e30e94cb715cfb8a1e3d92d8e0e9ef474dc26ddf Mon Sep 17 00:00:00 2001 From: Yangqing Jia Date: Tue, 28 Feb 2017 10:15:08 -0800 Subject: [PATCH] Made CNMEM optional and added a few cmake components Summary: (1) Since cub seems to be a better memory pool I made cnmem optional. (2) Added MKL testing since Intel now provides an apt source, but that doesn't seem to work right now. (3) Added cmake file for nervana gpu. Closes https://github.com/caffe2/caffe2/pull/175 Differential Revision: D4627056 Pulled By: Yangqing fbshipit-source-id: 9676fa32fce2a29574c0bf7e9d31660b5535cb51 --- .travis.yml | 5 +++++ .travis/build.sh | 6 +++++- .travis/install.sh | 14 ++++++++++++++ CMakeLists.txt | 1 + caffe2/contrib/nervana/CMakeLists.txt | 12 ++++++++++++ caffe2/contrib/nervana/nervana_init_gpu.cc | 2 +- caffe2/core/context_gpu.cu | 21 +++++++++++++++++++++ cmake/Dependencies.cmake | 16 ++++++++++------ cmake/Summary.cmake | 1 + 9 files changed, 70 insertions(+), 8 deletions(-) create mode 100644 caffe2/contrib/nervana/CMakeLists.txt diff --git a/.travis.yml b/.travis.yml index 06e5e12c6c6..4d99959c175 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,6 +20,10 @@ matrix: - os: osx compiler: clang env: COMPILER=clang++ + - os: linux + compiler: gcc + env: COMPILER=g++ + env: BLAS=MKL - os: linux compiler: gcc addons: @@ -53,6 +57,7 @@ matrix: - env: COMPILER=g++-4.8 - env: BUILD_TARGET=android - env: BUILD_TARGET=ios + - env: BLAS=MKL cache: apt: true diff --git a/.travis/build.sh b/.travis/build.sh index 277792f2465..ebe49a72323 100755 --- a/.travis/build.sh +++ b/.travis/build.sh @@ -24,5 +24,9 @@ else #*************# # Linux build # #*************# - cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make + if [[ $BLAS == 'MKL' ]]; then + cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBLAS=MKL && make + else + cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make + fi fi diff --git a/.travis/install.sh b/.travis/install.sh index 516fa5e196f..8bb17b21126 100755 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -47,6 +47,20 @@ else sudo apt-get install libprotobuf-dev protobuf-compiler libatlas-base-dev libgoogle-glog-dev liblmdb-dev libleveldb-dev libsnappy-dev python-dev python-pip libiomp-dev libopencv-dev libpthread-stubs0-dev pip install numpy + + ######################### + # Install MKL if needed # + ######################### + + + if [[ $BLAS == 'MKL' ]]; then + wget http://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB + sudo sh -c 'echo deb http://apt.repos.intel.com/mkl stable main > /etc/apt/sources.list.d/intel-mkl.list' + sudo apt-get update + sudo apt-get install intel-mkl + fi + ################ # Install CUDA # ################ diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c4c783c967..d83f7ed2f8f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ option(USE_LEVELDB "Use LMDB" ON) option(USE_NCCL "Use NCCL" ON) option(USE_OPENCV "Use openCV" ON) option(USE_CUDA "Use Cuda" ON) +option(USE_CNMEM "Use CNMEM" OFF) option(USE_ZMQ "Use ZMQ" OFF) option(USE_ROCKSDB "Use RocksDB" ON) option(USE_REDIS "Use Redis" OFF) diff --git a/caffe2/contrib/nervana/CMakeLists.txt b/caffe2/contrib/nervana/CMakeLists.txt new file mode 100644 index 00000000000..074ffb6675b --- /dev/null +++ b/caffe2/contrib/nervana/CMakeLists.txt @@ -0,0 +1,12 @@ +if(USE_NERVANA_GPU) + message(STATUS "Include Nervana operators") + set(Caffe2_CONTRIB_NCCL_GPU_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/nervana_c_api.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/nervana_fc_op_gpu.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/nervana_init_gpu.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/nervana_math_gpu.cc" + ) + + set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} ${Caffe2_CONTRIB_NCCL_GPU_SRC}) + set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} PARENT_SCOPE) +endif() diff --git a/caffe2/contrib/nervana/nervana_init_gpu.cc b/caffe2/contrib/nervana/nervana_init_gpu.cc index 994fc97064a..5b7a1ce22b9 100644 --- a/caffe2/contrib/nervana/nervana_init_gpu.cc +++ b/caffe2/contrib/nervana/nervana_init_gpu.cc @@ -27,7 +27,7 @@ bool Caffe2InitializeNervanaKernels(int*, char***) { nervana_loadKernels(FLAGS_nervana_cubin_path.c_str()); if (g_nervana_kernel_loaded) { VLOG(1) << "Loaded nervana kernels from path " - << FLAGS_nervana_cubin_path; + << FLAGS_nervana_cubin_path; } else { // Since this is not a critical error we will just vlog it. VLOG(1) << "Cannot load nervana gpu kernels from path " diff --git a/caffe2/core/context_gpu.cu b/caffe2/core/context_gpu.cu index 132d230aab8..3cae5c4757a 100644 --- a/caffe2/core/context_gpu.cu +++ b/caffe2/core/context_gpu.cu @@ -5,7 +5,9 @@ #include #include "cub/util_allocator.cuh" +#ifdef CAFFE2_USE_CNMEM #include "cnmem.h" +#endif // CAFFE2_USE_CNMEM #include "caffe2/core/asan.h" #include "caffe2/core/context_gpu.h" @@ -55,8 +57,10 @@ thread_local ThreadLocalCUDAObjects CUDAContext::cuda_objects_; // Static global variables for setting up the memory pool. CudaMemoryPoolType g_cuda_memory_pool_type; +#ifdef CAFFE2_USE_CNMEM // For cnmem allocator vector g_cnmem_available_for_device; +#endif // CAFFE2_USE_CNMEM // For cub allocator unique_ptr g_cub_allocator; // an unordered map that holds the map from the cuda memory pointer to the @@ -137,6 +141,7 @@ static void Caffe2InitializeCuda() { ); } +#ifdef CAFFE2_USE_CNMEM static void SetUpCNMEM() { g_cnmem_available_for_device.assign(NumCudaDevices(), false); VLOG(1) << "Setting up cnmem memory pool."; @@ -191,6 +196,7 @@ static void SetUpCNMEM() { cnmemInit(cnmem_devs.size(), cnmem_devs.data(), CNMEM_FLAGS_DEFAULT)); VLOG(1) << "Done setting up cnmem memory pool."; } +#endif // CAFFE2_USE_CNMEM static void SetUpCub() { VLOG(1) << "Setting up cub memory pool."; @@ -220,9 +226,14 @@ static void Caffe2SetCUDAMemoryPool() { FLAGS_caffe2_cuda_memory_pool == "none") { g_cuda_memory_pool_type = CudaMemoryPoolType::NONE; } else if (FLAGS_caffe2_cuda_memory_pool == "cnmem") { +#ifdef CAFFE2_USE_CNMEM // sets up cnmem. g_cuda_memory_pool_type = CudaMemoryPoolType::CNMEM; SetUpCNMEM(); +#else + CAFFE_THROW("This caffe2 is not built with cnmem support, so you should " + "not use the cnmem memory pool type."); +#endif // CAFFE2_USE_CNMEM } else if (FLAGS_caffe2_cuda_memory_pool == "cub") { // Sets up cub. g_cuda_memory_pool_type = CudaMemoryPoolType::CUB; @@ -305,6 +316,7 @@ void* CUDAContext::New(size_t nbytes) { CUDA_CHECK(cudaMalloc(&ptr, nbytes)); return ptr; case CudaMemoryPoolType::CNMEM: { +#ifdef CAFFE2_USE_CNMEM auto gpuId = GetCurrentGPUID(); CAFFE_ENFORCE( gpuId < g_cnmem_available_for_device.size() && @@ -317,6 +329,10 @@ void* CUDAContext::New(size_t nbytes) { VLOG(2) << "CNMEM allocating pointer " << ptr << " on device " << GetCurrentGPUID(); return ptr; +#else + CAFFE_THROW("This caffe2 is not built with cnmem support, so you should " + "not use the cnmem memory pool type."); +#endif // CAFFE2_USE_CNMEM } case CudaMemoryPoolType::CUB: CUDA_CHECK(g_cub_allocator->DeviceAllocate(&ptr, nbytes)); @@ -348,6 +364,7 @@ void CUDAContext::Delete(void* ptr) { } break; } case CudaMemoryPoolType::CNMEM: { +#ifdef CAFFE2_USE_CNMEM auto it = g_cuda_device_affiliation.find(ptr); DCHECK(it != g_cuda_device_affiliation.end()); DeviceGuard guard(it->second); @@ -355,6 +372,10 @@ void CUDAContext::Delete(void* ptr) { CNMEM_CHECK(cnmemFree(ptr, nullptr)); g_cuda_device_affiliation.erase(it); break; +#else + CAFFE_THROW("This caffe2 is not built with cnmem support, so you should " + "not use the cnmem memory pool type."); +#endif // CAFFE2_USE_CNMEM } case CudaMemoryPoolType::CUB: { auto it = g_cuda_device_affiliation.find(ptr); diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index ee87c790f4e..579934d08a9 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -269,10 +269,14 @@ if(USE_CUDA) endif() # ---[ CNMEM -if(USE_CUDA) - add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cnmem) - include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third_party/cnmem/include) - # message(STATUS "cnmem: ${PROJECT_SOURCE_DIR}/third_party/cnmem/libcnmem.so") - # message(STATUS "${CMAKE_CURRENT_BINARY_DIR}") - list(APPEND Caffe2_DEPENDENCY_LIBS "${CMAKE_CURRENT_BINARY_DIR}/third_party/cnmem/libcnmem.so") +if(USE_CNMEM) + if (NOT USE_CUDA) + message(WARNING "If not using cuda, one should not use CNMEM either.") + set(USE_CNMEM OFF) + else() + add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cnmem) + include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third_party/cnmem/include) + list(APPEND Caffe2_DEPENDENCY_LIBS "${CMAKE_CURRENT_BINARY_DIR}/third_party/cnmem/libcnmem.so") + add_definitions(-DCAFFE2_USE_CNMEM) + endif() endif() diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 70cd1a2bfb3..344dcd14015 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -35,6 +35,7 @@ function (Caffe2_print_configuration_summary) message(STATUS " USE_CUDA : ${USE_CUDA}") if(${USE_CUDA}) message(STATUS " CUDA version : ${CUDA_VERSION}") + message(STATUS " USE_CNMEM : ${USE_CNMEM}") endif() message(STATUS " USE_NERVANA_GPU : ${USE_NERVANA_GPU}")