From e30e94cb715cfb8a1e3d92d8e0e9ef474dc26ddf Mon Sep 17 00:00:00 2001
From: Yangqing Jia <jiayq84@gmail.com>
Date: Tue, 28 Feb 2017 10:15:08 -0800
Subject: [PATCH] Made CNMEM optional and added a few cmake components

Summary:
(1) Since cub seems to be a better memory pool I made cnmem optional.
(2) Added MKL testing since Intel now provides an apt source, but that doesn't seem to work right now.
(3) Added cmake file for nervana gpu.
Closes https://github.com/caffe2/caffe2/pull/175

Differential Revision: D4627056

Pulled By: Yangqing

fbshipit-source-id: 9676fa32fce2a29574c0bf7e9d31660b5535cb51
---
 .travis.yml                                |  5 +++++
 .travis/build.sh                           |  6 +++++-
 .travis/install.sh                         | 14 ++++++++++++++
 CMakeLists.txt                             |  1 +
 caffe2/contrib/nervana/CMakeLists.txt      | 12 ++++++++++++
 caffe2/contrib/nervana/nervana_init_gpu.cc |  2 +-
 caffe2/core/context_gpu.cu                 | 21 +++++++++++++++++++++
 cmake/Dependencies.cmake                   | 16 ++++++++++------
 cmake/Summary.cmake                        |  1 +
 9 files changed, 70 insertions(+), 8 deletions(-)
 create mode 100644 caffe2/contrib/nervana/CMakeLists.txt

diff --git a/.travis.yml b/.travis.yml
index 06e5e12c6c6..4d99959c175 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,6 +20,10 @@ matrix:
     - os: osx
       compiler: clang
       env: COMPILER=clang++
+    - os: linux
+      compiler: gcc
+      env: COMPILER=g++
+      env: BLAS=MKL
     - os: linux
       compiler: gcc
       addons:
@@ -53,6 +57,7 @@ matrix:
     - env: COMPILER=g++-4.8
     - env: BUILD_TARGET=android
     - env: BUILD_TARGET=ios
+    - env: BLAS=MKL
 
 cache:
   apt: true
diff --git a/.travis/build.sh b/.travis/build.sh
index 277792f2465..ebe49a72323 100755
--- a/.travis/build.sh
+++ b/.travis/build.sh
@@ -24,5 +24,9 @@ else
 #*************#
 # Linux build #
 #*************#
-  cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make
+  if [[ $BLAS == 'MKL' ]]; then
+    cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBLAS=MKL && make
+  else
+    cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make
+  fi
 fi
diff --git a/.travis/install.sh b/.travis/install.sh
index 516fa5e196f..8bb17b21126 100755
--- a/.travis/install.sh
+++ b/.travis/install.sh
@@ -47,6 +47,20 @@ else
   sudo apt-get install libprotobuf-dev protobuf-compiler libatlas-base-dev libgoogle-glog-dev liblmdb-dev libleveldb-dev libsnappy-dev python-dev python-pip libiomp-dev libopencv-dev libpthread-stubs0-dev
   pip install numpy
 
+
+  #########################
+  # Install MKL if needed #
+  #########################
+
+
+  if [[ $BLAS == 'MKL' ]]; then
+    wget http://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
+    sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
+    sudo sh -c 'echo deb http://apt.repos.intel.com/mkl stable main > /etc/apt/sources.list.d/intel-mkl.list'
+    sudo apt-get update
+    sudo apt-get install intel-mkl
+  fi
+
   ################
   # Install CUDA #
   ################
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6c4c783c967..d83f7ed2f8f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,6 +45,7 @@ option(USE_LEVELDB "Use LMDB" ON)
 option(USE_NCCL "Use NCCL" ON)
 option(USE_OPENCV "Use openCV" ON)
 option(USE_CUDA "Use Cuda" ON)
+option(USE_CNMEM "Use CNMEM" OFF)
 option(USE_ZMQ "Use ZMQ" OFF)
 option(USE_ROCKSDB "Use RocksDB" ON)
 option(USE_REDIS "Use Redis" OFF)
diff --git a/caffe2/contrib/nervana/CMakeLists.txt b/caffe2/contrib/nervana/CMakeLists.txt
new file mode 100644
index 00000000000..074ffb6675b
--- /dev/null
+++ b/caffe2/contrib/nervana/CMakeLists.txt
@@ -0,0 +1,12 @@
+if(USE_NERVANA_GPU)
+  message(STATUS "Include Nervana operators")
+  set(Caffe2_CONTRIB_NCCL_GPU_SRC
+    "${CMAKE_CURRENT_SOURCE_DIR}/nervana_c_api.cu"
+    "${CMAKE_CURRENT_SOURCE_DIR}/nervana_fc_op_gpu.cc"
+    "${CMAKE_CURRENT_SOURCE_DIR}/nervana_init_gpu.cc"
+    "${CMAKE_CURRENT_SOURCE_DIR}/nervana_math_gpu.cc"
+  )
+
+  set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} ${Caffe2_CONTRIB_NCCL_GPU_SRC})
+  set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} PARENT_SCOPE)
+endif()
diff --git a/caffe2/contrib/nervana/nervana_init_gpu.cc b/caffe2/contrib/nervana/nervana_init_gpu.cc
index 994fc97064a..5b7a1ce22b9 100644
--- a/caffe2/contrib/nervana/nervana_init_gpu.cc
+++ b/caffe2/contrib/nervana/nervana_init_gpu.cc
@@ -27,7 +27,7 @@ bool Caffe2InitializeNervanaKernels(int*, char***) {
       nervana_loadKernels(FLAGS_nervana_cubin_path.c_str());
   if (g_nervana_kernel_loaded) {
     VLOG(1) << "Loaded nervana kernels from path "
-                  << FLAGS_nervana_cubin_path;
+            << FLAGS_nervana_cubin_path;
   } else {
     // Since this is not a critical error we will just vlog it.
     VLOG(1) << "Cannot load nervana gpu kernels from path "
diff --git a/caffe2/core/context_gpu.cu b/caffe2/core/context_gpu.cu
index 132d230aab8..3cae5c4757a 100644
--- a/caffe2/core/context_gpu.cu
+++ b/caffe2/core/context_gpu.cu
@@ -5,7 +5,9 @@
 #include <unordered_map>
 
 #include "cub/util_allocator.cuh"
+#ifdef CAFFE2_USE_CNMEM
 #include "cnmem.h"
+#endif // CAFFE2_USE_CNMEM
 
 #include "caffe2/core/asan.h"
 #include "caffe2/core/context_gpu.h"
@@ -55,8 +57,10 @@ thread_local ThreadLocalCUDAObjects CUDAContext::cuda_objects_;
 
 // Static global variables for setting up the memory pool.
 CudaMemoryPoolType g_cuda_memory_pool_type;
+#ifdef CAFFE2_USE_CNMEM
 // For cnmem allocator
 vector<bool> g_cnmem_available_for_device;
+#endif // CAFFE2_USE_CNMEM
 // For cub allocator
 unique_ptr<cub::CachingDeviceAllocator> g_cub_allocator;
 // an unordered map that holds the map from the cuda memory pointer to the
@@ -137,6 +141,7 @@ static void Caffe2InitializeCuda() {
   );
 }
 
+#ifdef CAFFE2_USE_CNMEM
 static void SetUpCNMEM() {
   g_cnmem_available_for_device.assign(NumCudaDevices(), false);
   VLOG(1) << "Setting up cnmem memory pool.";
@@ -191,6 +196,7 @@ static void SetUpCNMEM() {
       cnmemInit(cnmem_devs.size(), cnmem_devs.data(), CNMEM_FLAGS_DEFAULT));
   VLOG(1) << "Done setting up cnmem memory pool.";
 }
+#endif // CAFFE2_USE_CNMEM
 
 static void SetUpCub() {
   VLOG(1) << "Setting up cub memory pool.";
@@ -220,9 +226,14 @@ static void Caffe2SetCUDAMemoryPool() {
       FLAGS_caffe2_cuda_memory_pool == "none") {
     g_cuda_memory_pool_type = CudaMemoryPoolType::NONE;
   } else if (FLAGS_caffe2_cuda_memory_pool == "cnmem") {
+#ifdef CAFFE2_USE_CNMEM
     // sets up cnmem.
     g_cuda_memory_pool_type = CudaMemoryPoolType::CNMEM;
     SetUpCNMEM();
+#else
+    CAFFE_THROW("This caffe2 is not built with cnmem support, so you should "
+                "not use the cnmem memory pool type.");
+#endif // CAFFE2_USE_CNMEM
   } else if (FLAGS_caffe2_cuda_memory_pool == "cub") {
     // Sets up cub.
     g_cuda_memory_pool_type = CudaMemoryPoolType::CUB;
@@ -305,6 +316,7 @@ void* CUDAContext::New(size_t nbytes) {
     CUDA_CHECK(cudaMalloc(&ptr, nbytes));
     return ptr;
   case CudaMemoryPoolType::CNMEM: {
+#ifdef CAFFE2_USE_CNMEM
     auto gpuId = GetCurrentGPUID();
     CAFFE_ENFORCE(
         gpuId < g_cnmem_available_for_device.size() &&
@@ -317,6 +329,10 @@ void* CUDAContext::New(size_t nbytes) {
     VLOG(2) << "CNMEM allocating pointer " << ptr << " on device "
             << GetCurrentGPUID();
     return ptr;
+#else
+    CAFFE_THROW("This caffe2 is not built with cnmem support, so you should "
+                "not use the cnmem memory pool type.");
+#endif // CAFFE2_USE_CNMEM
   }
   case CudaMemoryPoolType::CUB:
     CUDA_CHECK(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
@@ -348,6 +364,7 @@ void CUDAContext::Delete(void* ptr) {
     }
     break; }
   case CudaMemoryPoolType::CNMEM: {
+#ifdef CAFFE2_USE_CNMEM
     auto it = g_cuda_device_affiliation.find(ptr);
     DCHECK(it != g_cuda_device_affiliation.end());
     DeviceGuard guard(it->second);
@@ -355,6 +372,10 @@ void CUDAContext::Delete(void* ptr) {
     CNMEM_CHECK(cnmemFree(ptr, nullptr));
     g_cuda_device_affiliation.erase(it);
     break;
+#else
+    CAFFE_THROW("This caffe2 is not built with cnmem support, so you should "
+                "not use the cnmem memory pool type.");
+#endif // CAFFE2_USE_CNMEM
   }
   case CudaMemoryPoolType::CUB: {
     auto it = g_cuda_device_affiliation.find(ptr);
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index ee87c790f4e..579934d08a9 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -269,10 +269,14 @@ if(USE_CUDA)
 endif()
 
 # ---[ CNMEM
-if(USE_CUDA)
-  add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cnmem)
-  include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third_party/cnmem/include)
-  # message(STATUS "cnmem: ${PROJECT_SOURCE_DIR}/third_party/cnmem/libcnmem.so")
-  # message(STATUS "${CMAKE_CURRENT_BINARY_DIR}")
-  list(APPEND Caffe2_DEPENDENCY_LIBS "${CMAKE_CURRENT_BINARY_DIR}/third_party/cnmem/libcnmem.so")
+if(USE_CNMEM)
+  if (NOT USE_CUDA)
+    message(WARNING "If not using cuda, one should not use CNMEM either.")
+    set(USE_CNMEM OFF)
+  else()
+    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cnmem)
+    include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third_party/cnmem/include)
+    list(APPEND Caffe2_DEPENDENCY_LIBS "${CMAKE_CURRENT_BINARY_DIR}/third_party/cnmem/libcnmem.so")
+    add_definitions(-DCAFFE2_USE_CNMEM)
+  endif()
 endif()
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
index 70cd1a2bfb3..344dcd14015 100644
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@@ -35,6 +35,7 @@ function (Caffe2_print_configuration_summary)
   message(STATUS "  USE_CUDA              : ${USE_CUDA}")
   if(${USE_CUDA})
   message(STATUS "    CUDA version        : ${CUDA_VERSION}")
+  message(STATUS "  USE_CNMEM             : ${USE_CNMEM}")
   endif()
 
   message(STATUS "  USE_NERVANA_GPU       : ${USE_NERVANA_GPU}")