Remove cupti library dependency from core library

This commit is contained in:
Ryan Hill 2021-05-19 13:00:37 -07:00
parent c569dee80a
commit 6a889ee8bf
5 changed files with 61 additions and 2 deletions

View file

@ -92,8 +92,6 @@ onnxruntime_add_static_library(onnxruntime_common ${onnxruntime_common_src})
if (onnxruntime_USE_CUDA)
target_include_directories(onnxruntime_common PUBLIC ${onnxruntime_CUDA_HOME}/include ${onnxruntime_CUDA_HOME}/extras/CUPTI/include)
target_link_directories(onnxruntime_common PUBLIC ${onnxruntime_CUDA_HOME}/extras/CUPTI/lib64)
target_link_libraries(onnxruntime_common cupti)
endif()
if (onnxruntime_USE_TELEMETRY)

View file

@ -316,6 +316,11 @@ if (onnxruntime_USE_CUDA)
set_target_properties(onnxruntime_providers_cuda PROPERTIES LINKER_LANGUAGE CUDA)
set_target_properties(onnxruntime_providers_cuda PROPERTIES FOLDER "ONNXRuntime")
# Add in Cupti profiling dependency
target_include_directories(onnxruntime_providers_cuda PUBLIC ${onnxruntime_CUDA_HOME}/include ${onnxruntime_CUDA_HOME}/extras/CUPTI/include)
target_link_directories(onnxruntime_providers_cuda PUBLIC ${onnxruntime_CUDA_HOME}/extras/CUPTI/lib64)
target_link_libraries(onnxruntime_providers_cuda PRIVATE cupti)
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
target_include_directories(onnxruntime_providers_cuda PRIVATE ${PROJECT_SOURCE_DIR}/external/cub)
endif()

View file

@ -40,6 +40,11 @@ struct ProviderInfo_CUDA {
virtual int cudaGetDeviceCount() = 0;
virtual void CUDAExecutionProviderInfo__FromProviderOptions(const onnxruntime::ProviderOptions& options, onnxruntime::CUDAExecutionProviderInfo& info) = 0;
virtual int cuptiActivityEnable(int kind) = 0;
virtual int cuptiActivityGetNextRecord(uint8_t* buffer, size_t validBufferSizeBytes, void* record) = 0;
virtual int cuptiActivityRegisterCallbacks(void* funcBufferRequested, void* funcBufferCompleted) = 0;
virtual int cuptiActivityFlushAll(uint32_t flag) = 0;
#if defined(USE_CUDA) && defined(ORT_USE_NCCL) && defined(USE_NCCL_P2P)
virtual onnxruntime::cuda::INcclService& GetINcclService() = 0;
#endif

View file

@ -54,6 +54,10 @@ Status LongformerAttentionBase__CheckInputs(const LongformerAttentionBase* p, co
#include "contrib_ops/cpu/bert/attention_base.h"
#endif
#ifdef USE_CUDA
#include <cupti.h>
#endif
#ifdef ENABLE_TRAINING
#include "orttraining/training_ops/cpu/aten_ops/aten_op.h"
#include "orttraining/training_ops/cpu/controlflow/group.h"
@ -1081,6 +1085,36 @@ INcclService& INcclService::GetInstance() {
} // namespace onnxruntime
#if defined(USE_CUDA)
CUptiResult CUPTIAPI cuptiActivityEnable(CUpti_ActivityKind kind) {
auto* info = onnxruntime::GetProviderInfo_CUDA();
if (info)
return CUptiResult(info->cuptiActivityEnable(kind));
return CUPTI_ERROR_NOT_SUPPORTED;
}
CUptiResult CUPTIAPI cuptiActivityGetNextRecord(uint8_t* buffer, size_t validBufferSizeBytes, CUpti_Activity** record) {
auto* info = onnxruntime::GetProviderInfo_CUDA();
if (info)
return CUptiResult(info->cuptiActivityGetNextRecord(buffer, validBufferSizeBytes, record));
return CUPTI_ERROR_NOT_SUPPORTED;
}
CUptiResult CUPTIAPI cuptiActivityRegisterCallbacks(CUpti_BuffersCallbackRequestFunc funcBufferRequested, CUpti_BuffersCallbackCompleteFunc funcBufferCompleted) {
auto* info = onnxruntime::GetProviderInfo_CUDA();
if (info)
return CUptiResult(info->cuptiActivityRegisterCallbacks(funcBufferRequested, funcBufferCompleted));
return CUPTI_ERROR_NOT_SUPPORTED;
}
CUptiResult CUPTIAPI cuptiActivityFlushAll(uint32_t flag) {
auto* info = onnxruntime::GetProviderInfo_CUDA();
if (info)
return CUptiResult(info->cuptiActivityFlushAll(flag));
return CUPTI_ERROR_NOT_SUPPORTED;
}
#endif
ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessionOptions* options, int use_arena) {
auto factory = onnxruntime::CreateExecutionProviderFactory_Dnnl(use_arena);
if (!factory) {

View file

@ -6,6 +6,7 @@
#include "core/providers/cuda/cuda_provider_factory.h"
#include <memory>
#include <cupti.h>
#include "gsl/gsl"
@ -132,6 +133,22 @@ struct ProviderInfo_CUDA_Impl : ProviderInfo_CUDA {
info = CUDAExecutionProviderInfo::FromProviderOptions(options);
}
int cuptiActivityEnable(int kind) override {
return ::cuptiActivityEnable(CUpti_ActivityKind(kind));
}
int cuptiActivityGetNextRecord(uint8_t* buffer, size_t validBufferSizeBytes, void* record) override {
return ::cuptiActivityGetNextRecord(buffer, validBufferSizeBytes, reinterpret_cast<CUpti_Activity**>(record));
}
int cuptiActivityRegisterCallbacks(void* funcBufferRequested, void* funcBufferCompleted) override {
return ::cuptiActivityRegisterCallbacks(reinterpret_cast<CUpti_BuffersCallbackRequestFunc>(funcBufferRequested), reinterpret_cast<CUpti_BuffersCallbackCompleteFunc>(funcBufferCompleted));
}
int cuptiActivityFlushAll(uint32_t flag) override {
return ::cuptiActivityFlushAll(flag);
}
#if defined(USE_CUDA) && defined(ORT_USE_NCCL) && defined(USE_NCCL_P2P)
cuda::INcclService& GetINcclService() override {
return cuda::GetINcclService();