mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Remove cupti library dependency from core library
This commit is contained in:
parent
c569dee80a
commit
6a889ee8bf
5 changed files with 61 additions and 2 deletions
|
|
@ -92,8 +92,6 @@ onnxruntime_add_static_library(onnxruntime_common ${onnxruntime_common_src})
|
|||
|
||||
if (onnxruntime_USE_CUDA)
|
||||
target_include_directories(onnxruntime_common PUBLIC ${onnxruntime_CUDA_HOME}/include ${onnxruntime_CUDA_HOME}/extras/CUPTI/include)
|
||||
target_link_directories(onnxruntime_common PUBLIC ${onnxruntime_CUDA_HOME}/extras/CUPTI/lib64)
|
||||
target_link_libraries(onnxruntime_common cupti)
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_TELEMETRY)
|
||||
|
|
|
|||
|
|
@ -316,6 +316,11 @@ if (onnxruntime_USE_CUDA)
|
|||
set_target_properties(onnxruntime_providers_cuda PROPERTIES LINKER_LANGUAGE CUDA)
|
||||
set_target_properties(onnxruntime_providers_cuda PROPERTIES FOLDER "ONNXRuntime")
|
||||
|
||||
# Add in Cupti profiling dependency
|
||||
target_include_directories(onnxruntime_providers_cuda PUBLIC ${onnxruntime_CUDA_HOME}/include ${onnxruntime_CUDA_HOME}/extras/CUPTI/include)
|
||||
target_link_directories(onnxruntime_providers_cuda PUBLIC ${onnxruntime_CUDA_HOME}/extras/CUPTI/lib64)
|
||||
target_link_libraries(onnxruntime_providers_cuda PRIVATE cupti)
|
||||
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
|
||||
target_include_directories(onnxruntime_providers_cuda PRIVATE ${PROJECT_SOURCE_DIR}/external/cub)
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -40,6 +40,11 @@ struct ProviderInfo_CUDA {
|
|||
virtual int cudaGetDeviceCount() = 0;
|
||||
virtual void CUDAExecutionProviderInfo__FromProviderOptions(const onnxruntime::ProviderOptions& options, onnxruntime::CUDAExecutionProviderInfo& info) = 0;
|
||||
|
||||
virtual int cuptiActivityEnable(int kind) = 0;
|
||||
virtual int cuptiActivityGetNextRecord(uint8_t* buffer, size_t validBufferSizeBytes, void* record) = 0;
|
||||
virtual int cuptiActivityRegisterCallbacks(void* funcBufferRequested, void* funcBufferCompleted) = 0;
|
||||
virtual int cuptiActivityFlushAll(uint32_t flag) = 0;
|
||||
|
||||
#if defined(USE_CUDA) && defined(ORT_USE_NCCL) && defined(USE_NCCL_P2P)
|
||||
virtual onnxruntime::cuda::INcclService& GetINcclService() = 0;
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -54,6 +54,10 @@ Status LongformerAttentionBase__CheckInputs(const LongformerAttentionBase* p, co
|
|||
#include "contrib_ops/cpu/bert/attention_base.h"
|
||||
#endif
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include <cupti.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_TRAINING
|
||||
#include "orttraining/training_ops/cpu/aten_ops/aten_op.h"
|
||||
#include "orttraining/training_ops/cpu/controlflow/group.h"
|
||||
|
|
@ -1081,6 +1085,36 @@ INcclService& INcclService::GetInstance() {
|
|||
|
||||
} // namespace onnxruntime
|
||||
|
||||
#if defined(USE_CUDA)
|
||||
CUptiResult CUPTIAPI cuptiActivityEnable(CUpti_ActivityKind kind) {
|
||||
auto* info = onnxruntime::GetProviderInfo_CUDA();
|
||||
if (info)
|
||||
return CUptiResult(info->cuptiActivityEnable(kind));
|
||||
return CUPTI_ERROR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
CUptiResult CUPTIAPI cuptiActivityGetNextRecord(uint8_t* buffer, size_t validBufferSizeBytes, CUpti_Activity** record) {
|
||||
auto* info = onnxruntime::GetProviderInfo_CUDA();
|
||||
if (info)
|
||||
return CUptiResult(info->cuptiActivityGetNextRecord(buffer, validBufferSizeBytes, record));
|
||||
return CUPTI_ERROR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
CUptiResult CUPTIAPI cuptiActivityRegisterCallbacks(CUpti_BuffersCallbackRequestFunc funcBufferRequested, CUpti_BuffersCallbackCompleteFunc funcBufferCompleted) {
|
||||
auto* info = onnxruntime::GetProviderInfo_CUDA();
|
||||
if (info)
|
||||
return CUptiResult(info->cuptiActivityRegisterCallbacks(funcBufferRequested, funcBufferCompleted));
|
||||
return CUPTI_ERROR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
CUptiResult CUPTIAPI cuptiActivityFlushAll(uint32_t flag) {
|
||||
auto* info = onnxruntime::GetProviderInfo_CUDA();
|
||||
if (info)
|
||||
return CUptiResult(info->cuptiActivityFlushAll(flag));
|
||||
return CUPTI_ERROR_NOT_SUPPORTED;
|
||||
}
|
||||
#endif
|
||||
|
||||
ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessionOptions* options, int use_arena) {
|
||||
auto factory = onnxruntime::CreateExecutionProviderFactory_Dnnl(use_arena);
|
||||
if (!factory) {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include "core/providers/cuda/cuda_provider_factory.h"
|
||||
|
||||
#include <memory>
|
||||
#include <cupti.h>
|
||||
|
||||
#include "gsl/gsl"
|
||||
|
||||
|
|
@ -132,6 +133,22 @@ struct ProviderInfo_CUDA_Impl : ProviderInfo_CUDA {
|
|||
info = CUDAExecutionProviderInfo::FromProviderOptions(options);
|
||||
}
|
||||
|
||||
int cuptiActivityEnable(int kind) override {
|
||||
return ::cuptiActivityEnable(CUpti_ActivityKind(kind));
|
||||
}
|
||||
|
||||
int cuptiActivityGetNextRecord(uint8_t* buffer, size_t validBufferSizeBytes, void* record) override {
|
||||
return ::cuptiActivityGetNextRecord(buffer, validBufferSizeBytes, reinterpret_cast<CUpti_Activity**>(record));
|
||||
}
|
||||
|
||||
int cuptiActivityRegisterCallbacks(void* funcBufferRequested, void* funcBufferCompleted) override {
|
||||
return ::cuptiActivityRegisterCallbacks(reinterpret_cast<CUpti_BuffersCallbackRequestFunc>(funcBufferRequested), reinterpret_cast<CUpti_BuffersCallbackCompleteFunc>(funcBufferCompleted));
|
||||
}
|
||||
|
||||
int cuptiActivityFlushAll(uint32_t flag) override {
|
||||
return ::cuptiActivityFlushAll(flag);
|
||||
}
|
||||
|
||||
#if defined(USE_CUDA) && defined(ORT_USE_NCCL) && defined(USE_NCCL_P2P)
|
||||
cuda::INcclService& GetINcclService() override {
|
||||
return cuda::GetINcclService();
|
||||
|
|
|
|||
Loading…
Reference in a new issue