Made CNMEM optional and added a few cmake components

Summary:
(1) Since cub seems to be a better memory pool I made cnmem optional.
(2) Added MKL testing since Intel now provides an apt source, but that doesn't seem to work right now.
(3) Added cmake file for nervana gpu.
Closes https://github.com/caffe2/caffe2/pull/175

Differential Revision: D4627056

Pulled By: Yangqing

fbshipit-source-id: 9676fa32fce2a29574c0bf7e9d31660b5535cb51
This commit is contained in:
Yangqing Jia 2017-02-28 10:15:08 -08:00 committed by Facebook Github Bot
parent b732f347ba
commit e30e94cb71
9 changed files with 70 additions and 8 deletions

View file

@ -20,6 +20,10 @@ matrix:
- os: osx
compiler: clang
env: COMPILER=clang++
- os: linux
compiler: gcc
env: COMPILER=g++
env: BLAS=MKL
- os: linux
compiler: gcc
addons:
@ -53,6 +57,7 @@ matrix:
- env: COMPILER=g++-4.8
- env: BUILD_TARGET=android
- env: BUILD_TARGET=ios
- env: BLAS=MKL
cache:
apt: true

View file

@ -24,5 +24,9 @@ else
#*************#
# Linux build #
#*************#
cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make
if [[ $BLAS == 'MKL' ]]; then
cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBLAS=MKL && make
else
cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make
fi
fi

View file

@ -47,6 +47,20 @@ else
sudo apt-get install libprotobuf-dev protobuf-compiler libatlas-base-dev libgoogle-glog-dev liblmdb-dev libleveldb-dev libsnappy-dev python-dev python-pip libiomp-dev libopencv-dev libpthread-stubs0-dev
pip install numpy
#########################
# Install MKL if needed #
#########################
if [[ $BLAS == 'MKL' ]]; then
wget http://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
sudo sh -c 'echo deb http://apt.repos.intel.com/mkl stable main > /etc/apt/sources.list.d/intel-mkl.list'
sudo apt-get update
sudo apt-get install intel-mkl
fi
################
# Install CUDA #
################

View file

@ -45,6 +45,7 @@ option(USE_LEVELDB "Use LMDB" ON)
option(USE_NCCL "Use NCCL" ON)
option(USE_OPENCV "Use openCV" ON)
option(USE_CUDA "Use Cuda" ON)
option(USE_CNMEM "Use CNMEM" OFF)
option(USE_ZMQ "Use ZMQ" OFF)
option(USE_ROCKSDB "Use RocksDB" ON)
option(USE_REDIS "Use Redis" OFF)

View file

@ -0,0 +1,12 @@
if(USE_NERVANA_GPU)
message(STATUS "Include Nervana operators")
set(Caffe2_CONTRIB_NCCL_GPU_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/nervana_c_api.cu"
"${CMAKE_CURRENT_SOURCE_DIR}/nervana_fc_op_gpu.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/nervana_init_gpu.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/nervana_math_gpu.cc"
)
set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} ${Caffe2_CONTRIB_NCCL_GPU_SRC})
set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} PARENT_SCOPE)
endif()

View file

@ -27,7 +27,7 @@ bool Caffe2InitializeNervanaKernels(int*, char***) {
nervana_loadKernels(FLAGS_nervana_cubin_path.c_str());
if (g_nervana_kernel_loaded) {
VLOG(1) << "Loaded nervana kernels from path "
<< FLAGS_nervana_cubin_path;
<< FLAGS_nervana_cubin_path;
} else {
// Since this is not a critical error we will just vlog it.
VLOG(1) << "Cannot load nervana gpu kernels from path "

View file

@ -5,7 +5,9 @@
#include <unordered_map>
#include "cub/util_allocator.cuh"
#ifdef CAFFE2_USE_CNMEM
#include "cnmem.h"
#endif // CAFFE2_USE_CNMEM
#include "caffe2/core/asan.h"
#include "caffe2/core/context_gpu.h"
@ -55,8 +57,10 @@ thread_local ThreadLocalCUDAObjects CUDAContext::cuda_objects_;
// Static global variables for setting up the memory pool.
CudaMemoryPoolType g_cuda_memory_pool_type;
#ifdef CAFFE2_USE_CNMEM
// For cnmem allocator
vector<bool> g_cnmem_available_for_device;
#endif // CAFFE2_USE_CNMEM
// For cub allocator
unique_ptr<cub::CachingDeviceAllocator> g_cub_allocator;
// an unordered map that holds the map from the cuda memory pointer to the
@ -137,6 +141,7 @@ static void Caffe2InitializeCuda() {
);
}
#ifdef CAFFE2_USE_CNMEM
static void SetUpCNMEM() {
g_cnmem_available_for_device.assign(NumCudaDevices(), false);
VLOG(1) << "Setting up cnmem memory pool.";
@ -191,6 +196,7 @@ static void SetUpCNMEM() {
cnmemInit(cnmem_devs.size(), cnmem_devs.data(), CNMEM_FLAGS_DEFAULT));
VLOG(1) << "Done setting up cnmem memory pool.";
}
#endif // CAFFE2_USE_CNMEM
static void SetUpCub() {
VLOG(1) << "Setting up cub memory pool.";
@ -220,9 +226,14 @@ static void Caffe2SetCUDAMemoryPool() {
FLAGS_caffe2_cuda_memory_pool == "none") {
g_cuda_memory_pool_type = CudaMemoryPoolType::NONE;
} else if (FLAGS_caffe2_cuda_memory_pool == "cnmem") {
#ifdef CAFFE2_USE_CNMEM
// sets up cnmem.
g_cuda_memory_pool_type = CudaMemoryPoolType::CNMEM;
SetUpCNMEM();
#else
CAFFE_THROW("This caffe2 is not built with cnmem support, so you should "
"not use the cnmem memory pool type.");
#endif // CAFFE2_USE_CNMEM
} else if (FLAGS_caffe2_cuda_memory_pool == "cub") {
// Sets up cub.
g_cuda_memory_pool_type = CudaMemoryPoolType::CUB;
@ -305,6 +316,7 @@ void* CUDAContext::New(size_t nbytes) {
CUDA_CHECK(cudaMalloc(&ptr, nbytes));
return ptr;
case CudaMemoryPoolType::CNMEM: {
#ifdef CAFFE2_USE_CNMEM
auto gpuId = GetCurrentGPUID();
CAFFE_ENFORCE(
gpuId < g_cnmem_available_for_device.size() &&
@ -317,6 +329,10 @@ void* CUDAContext::New(size_t nbytes) {
VLOG(2) << "CNMEM allocating pointer " << ptr << " on device "
<< GetCurrentGPUID();
return ptr;
#else
CAFFE_THROW("This caffe2 is not built with cnmem support, so you should "
"not use the cnmem memory pool type.");
#endif // CAFFE2_USE_CNMEM
}
case CudaMemoryPoolType::CUB:
CUDA_CHECK(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
@ -348,6 +364,7 @@ void CUDAContext::Delete(void* ptr) {
}
break; }
case CudaMemoryPoolType::CNMEM: {
#ifdef CAFFE2_USE_CNMEM
auto it = g_cuda_device_affiliation.find(ptr);
DCHECK(it != g_cuda_device_affiliation.end());
DeviceGuard guard(it->second);
@ -355,6 +372,10 @@ void CUDAContext::Delete(void* ptr) {
CNMEM_CHECK(cnmemFree(ptr, nullptr));
g_cuda_device_affiliation.erase(it);
break;
#else
CAFFE_THROW("This caffe2 is not built with cnmem support, so you should "
"not use the cnmem memory pool type.");
#endif // CAFFE2_USE_CNMEM
}
case CudaMemoryPoolType::CUB: {
auto it = g_cuda_device_affiliation.find(ptr);

View file

@ -269,10 +269,14 @@ if(USE_CUDA)
endif()
# ---[ CNMEM
if(USE_CUDA)
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cnmem)
include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third_party/cnmem/include)
# message(STATUS "cnmem: ${PROJECT_SOURCE_DIR}/third_party/cnmem/libcnmem.so")
# message(STATUS "${CMAKE_CURRENT_BINARY_DIR}")
list(APPEND Caffe2_DEPENDENCY_LIBS "${CMAKE_CURRENT_BINARY_DIR}/third_party/cnmem/libcnmem.so")
if(USE_CNMEM)
if (NOT USE_CUDA)
message(WARNING "If not using cuda, one should not use CNMEM either.")
set(USE_CNMEM OFF)
else()
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cnmem)
include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third_party/cnmem/include)
list(APPEND Caffe2_DEPENDENCY_LIBS "${CMAKE_CURRENT_BINARY_DIR}/third_party/cnmem/libcnmem.so")
add_definitions(-DCAFFE2_USE_CNMEM)
endif()
endif()

View file

@ -35,6 +35,7 @@ function (Caffe2_print_configuration_summary)
message(STATUS " USE_CUDA : ${USE_CUDA}")
if(${USE_CUDA})
message(STATUS " CUDA version : ${CUDA_VERSION}")
message(STATUS " USE_CNMEM : ${USE_CNMEM}")
endif()
message(STATUS " USE_NERVANA_GPU : ${USE_NERVANA_GPU}")