mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Made CNMEM optional and added a few cmake components
Summary: (1) Since cub seems to be a better memory pool I made cnmem optional. (2) Added MKL testing since Intel now provides an apt source, but that doesn't seem to work right now. (3) Added cmake file for nervana gpu. Closes https://github.com/caffe2/caffe2/pull/175 Differential Revision: D4627056 Pulled By: Yangqing fbshipit-source-id: 9676fa32fce2a29574c0bf7e9d31660b5535cb51
This commit is contained in:
parent
b732f347ba
commit
e30e94cb71
9 changed files with 70 additions and 8 deletions
|
|
@ -20,6 +20,10 @@ matrix:
|
|||
- os: osx
|
||||
compiler: clang
|
||||
env: COMPILER=clang++
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
env: COMPILER=g++
|
||||
env: BLAS=MKL
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
addons:
|
||||
|
|
@ -53,6 +57,7 @@ matrix:
|
|||
- env: COMPILER=g++-4.8
|
||||
- env: BUILD_TARGET=android
|
||||
- env: BUILD_TARGET=ios
|
||||
- env: BLAS=MKL
|
||||
|
||||
cache:
|
||||
apt: true
|
||||
|
|
|
|||
|
|
@ -24,5 +24,9 @@ else
|
|||
#*************#
|
||||
# Linux build #
|
||||
#*************#
|
||||
cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make
|
||||
if [[ $BLAS == 'MKL' ]]; then
|
||||
cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBLAS=MKL && make
|
||||
else
|
||||
cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make
|
||||
fi
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -47,6 +47,20 @@ else
|
|||
sudo apt-get install libprotobuf-dev protobuf-compiler libatlas-base-dev libgoogle-glog-dev liblmdb-dev libleveldb-dev libsnappy-dev python-dev python-pip libiomp-dev libopencv-dev libpthread-stubs0-dev
|
||||
pip install numpy
|
||||
|
||||
|
||||
#########################
|
||||
# Install MKL if needed #
|
||||
#########################
|
||||
|
||||
|
||||
if [[ $BLAS == 'MKL' ]]; then
|
||||
wget http://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
|
||||
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB
|
||||
sudo sh -c 'echo deb http://apt.repos.intel.com/mkl stable main > /etc/apt/sources.list.d/intel-mkl.list'
|
||||
sudo apt-get update
|
||||
sudo apt-get install intel-mkl
|
||||
fi
|
||||
|
||||
################
|
||||
# Install CUDA #
|
||||
################
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ option(USE_LEVELDB "Use LMDB" ON)
|
|||
option(USE_NCCL "Use NCCL" ON)
|
||||
option(USE_OPENCV "Use openCV" ON)
|
||||
option(USE_CUDA "Use Cuda" ON)
|
||||
option(USE_CNMEM "Use CNMEM" OFF)
|
||||
option(USE_ZMQ "Use ZMQ" OFF)
|
||||
option(USE_ROCKSDB "Use RocksDB" ON)
|
||||
option(USE_REDIS "Use Redis" OFF)
|
||||
|
|
|
|||
12
caffe2/contrib/nervana/CMakeLists.txt
Normal file
12
caffe2/contrib/nervana/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
if(USE_NERVANA_GPU)
|
||||
message(STATUS "Include Nervana operators")
|
||||
set(Caffe2_CONTRIB_NCCL_GPU_SRC
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/nervana_c_api.cu"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/nervana_fc_op_gpu.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/nervana_init_gpu.cc"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/nervana_math_gpu.cc"
|
||||
)
|
||||
|
||||
set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} ${Caffe2_CONTRIB_NCCL_GPU_SRC})
|
||||
set(Caffe2_GPU_SRCS ${Caffe2_GPU_SRCS} PARENT_SCOPE)
|
||||
endif()
|
||||
|
|
@ -27,7 +27,7 @@ bool Caffe2InitializeNervanaKernels(int*, char***) {
|
|||
nervana_loadKernels(FLAGS_nervana_cubin_path.c_str());
|
||||
if (g_nervana_kernel_loaded) {
|
||||
VLOG(1) << "Loaded nervana kernels from path "
|
||||
<< FLAGS_nervana_cubin_path;
|
||||
<< FLAGS_nervana_cubin_path;
|
||||
} else {
|
||||
// Since this is not a critical error we will just vlog it.
|
||||
VLOG(1) << "Cannot load nervana gpu kernels from path "
|
||||
|
|
|
|||
|
|
@ -5,7 +5,9 @@
|
|||
#include <unordered_map>
|
||||
|
||||
#include "cub/util_allocator.cuh"
|
||||
#ifdef CAFFE2_USE_CNMEM
|
||||
#include "cnmem.h"
|
||||
#endif // CAFFE2_USE_CNMEM
|
||||
|
||||
#include "caffe2/core/asan.h"
|
||||
#include "caffe2/core/context_gpu.h"
|
||||
|
|
@ -55,8 +57,10 @@ thread_local ThreadLocalCUDAObjects CUDAContext::cuda_objects_;
|
|||
|
||||
// Static global variables for setting up the memory pool.
|
||||
CudaMemoryPoolType g_cuda_memory_pool_type;
|
||||
#ifdef CAFFE2_USE_CNMEM
|
||||
// For cnmem allocator
|
||||
vector<bool> g_cnmem_available_for_device;
|
||||
#endif // CAFFE2_USE_CNMEM
|
||||
// For cub allocator
|
||||
unique_ptr<cub::CachingDeviceAllocator> g_cub_allocator;
|
||||
// an unordered map that holds the map from the cuda memory pointer to the
|
||||
|
|
@ -137,6 +141,7 @@ static void Caffe2InitializeCuda() {
|
|||
);
|
||||
}
|
||||
|
||||
#ifdef CAFFE2_USE_CNMEM
|
||||
static void SetUpCNMEM() {
|
||||
g_cnmem_available_for_device.assign(NumCudaDevices(), false);
|
||||
VLOG(1) << "Setting up cnmem memory pool.";
|
||||
|
|
@ -191,6 +196,7 @@ static void SetUpCNMEM() {
|
|||
cnmemInit(cnmem_devs.size(), cnmem_devs.data(), CNMEM_FLAGS_DEFAULT));
|
||||
VLOG(1) << "Done setting up cnmem memory pool.";
|
||||
}
|
||||
#endif // CAFFE2_USE_CNMEM
|
||||
|
||||
static void SetUpCub() {
|
||||
VLOG(1) << "Setting up cub memory pool.";
|
||||
|
|
@ -220,9 +226,14 @@ static void Caffe2SetCUDAMemoryPool() {
|
|||
FLAGS_caffe2_cuda_memory_pool == "none") {
|
||||
g_cuda_memory_pool_type = CudaMemoryPoolType::NONE;
|
||||
} else if (FLAGS_caffe2_cuda_memory_pool == "cnmem") {
|
||||
#ifdef CAFFE2_USE_CNMEM
|
||||
// sets up cnmem.
|
||||
g_cuda_memory_pool_type = CudaMemoryPoolType::CNMEM;
|
||||
SetUpCNMEM();
|
||||
#else
|
||||
CAFFE_THROW("This caffe2 is not built with cnmem support, so you should "
|
||||
"not use the cnmem memory pool type.");
|
||||
#endif // CAFFE2_USE_CNMEM
|
||||
} else if (FLAGS_caffe2_cuda_memory_pool == "cub") {
|
||||
// Sets up cub.
|
||||
g_cuda_memory_pool_type = CudaMemoryPoolType::CUB;
|
||||
|
|
@ -305,6 +316,7 @@ void* CUDAContext::New(size_t nbytes) {
|
|||
CUDA_CHECK(cudaMalloc(&ptr, nbytes));
|
||||
return ptr;
|
||||
case CudaMemoryPoolType::CNMEM: {
|
||||
#ifdef CAFFE2_USE_CNMEM
|
||||
auto gpuId = GetCurrentGPUID();
|
||||
CAFFE_ENFORCE(
|
||||
gpuId < g_cnmem_available_for_device.size() &&
|
||||
|
|
@ -317,6 +329,10 @@ void* CUDAContext::New(size_t nbytes) {
|
|||
VLOG(2) << "CNMEM allocating pointer " << ptr << " on device "
|
||||
<< GetCurrentGPUID();
|
||||
return ptr;
|
||||
#else
|
||||
CAFFE_THROW("This caffe2 is not built with cnmem support, so you should "
|
||||
"not use the cnmem memory pool type.");
|
||||
#endif // CAFFE2_USE_CNMEM
|
||||
}
|
||||
case CudaMemoryPoolType::CUB:
|
||||
CUDA_CHECK(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
|
||||
|
|
@ -348,6 +364,7 @@ void CUDAContext::Delete(void* ptr) {
|
|||
}
|
||||
break; }
|
||||
case CudaMemoryPoolType::CNMEM: {
|
||||
#ifdef CAFFE2_USE_CNMEM
|
||||
auto it = g_cuda_device_affiliation.find(ptr);
|
||||
DCHECK(it != g_cuda_device_affiliation.end());
|
||||
DeviceGuard guard(it->second);
|
||||
|
|
@ -355,6 +372,10 @@ void CUDAContext::Delete(void* ptr) {
|
|||
CNMEM_CHECK(cnmemFree(ptr, nullptr));
|
||||
g_cuda_device_affiliation.erase(it);
|
||||
break;
|
||||
#else
|
||||
CAFFE_THROW("This caffe2 is not built with cnmem support, so you should "
|
||||
"not use the cnmem memory pool type.");
|
||||
#endif // CAFFE2_USE_CNMEM
|
||||
}
|
||||
case CudaMemoryPoolType::CUB: {
|
||||
auto it = g_cuda_device_affiliation.find(ptr);
|
||||
|
|
|
|||
|
|
@ -269,10 +269,14 @@ if(USE_CUDA)
|
|||
endif()
|
||||
|
||||
# ---[ CNMEM
|
||||
if(USE_CUDA)
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cnmem)
|
||||
include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third_party/cnmem/include)
|
||||
# message(STATUS "cnmem: ${PROJECT_SOURCE_DIR}/third_party/cnmem/libcnmem.so")
|
||||
# message(STATUS "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS "${CMAKE_CURRENT_BINARY_DIR}/third_party/cnmem/libcnmem.so")
|
||||
if(USE_CNMEM)
|
||||
if (NOT USE_CUDA)
|
||||
message(WARNING "If not using cuda, one should not use CNMEM either.")
|
||||
set(USE_CNMEM OFF)
|
||||
else()
|
||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cnmem)
|
||||
include_directories(SYSTEM ${PROJECT_SOURCE_DIR}/third_party/cnmem/include)
|
||||
list(APPEND Caffe2_DEPENDENCY_LIBS "${CMAKE_CURRENT_BINARY_DIR}/third_party/cnmem/libcnmem.so")
|
||||
add_definitions(-DCAFFE2_USE_CNMEM)
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ function (Caffe2_print_configuration_summary)
|
|||
message(STATUS " USE_CUDA : ${USE_CUDA}")
|
||||
if(${USE_CUDA})
|
||||
message(STATUS " CUDA version : ${CUDA_VERSION}")
|
||||
message(STATUS " USE_CNMEM : ${USE_CNMEM}")
|
||||
endif()
|
||||
|
||||
message(STATUS " USE_NERVANA_GPU : ${USE_NERVANA_GPU}")
|
||||
|
|
|
|||
Loading…
Reference in a new issue