mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-01 23:30:35 +00:00
Fix python
This commit is contained in:
parent
683354424a
commit
4cf4cf3032
4 changed files with 61 additions and 30 deletions
|
|
@ -4,9 +4,15 @@
|
|||
#include "onnxruntime_c_api.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include "core/framework/provider_options.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
class IAllocator;
|
||||
class IDataTransfer;
|
||||
struct IExecutionProviderFactory;
|
||||
struct CUDAExecutionProviderInfo;
|
||||
enum class ArenaExtendStrategy : int32_t;
|
||||
struct CUDAExecutionProviderExternalAllocatorInfo;
|
||||
} // namespace onnxruntime
|
||||
|
||||
struct ProviderInfo_CUDA {
|
||||
|
|
@ -25,6 +31,12 @@ struct ProviderInfo_CUDA {
|
|||
|
||||
virtual void CopyGpuToCpu(void* dst_ptr, const void* src_ptr, const size_t size, const OrtMemoryInfo& dst_location, const OrtMemoryInfo& src_location) = 0;
|
||||
virtual void cudaMemcpy_HostToDevice(void* dst, const void* src, size_t count) = 0;
|
||||
virtual void cudaMemcpy_DeviceToHost(void* dst, const void* src, size_t count) = 0;
|
||||
virtual int cudaGetDeviceCount() = 0;
|
||||
virtual void CUDAExecutionProviderInfo__FromProviderOptions(const onnxruntime::ProviderOptions& options, onnxruntime::CUDAExecutionProviderInfo& info) = 0;
|
||||
|
||||
virtual std::shared_ptr<onnxruntime::IExecutionProviderFactory> CreateExecutionProviderFactory(const onnxruntime::CUDAExecutionProviderInfo& info) = 0;
|
||||
virtual std::shared_ptr<onnxruntime::IAllocator> CreateCudaAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::CUDAExecutionProviderExternalAllocatorInfo& external_allocator_info) = 0;
|
||||
};
|
||||
|
||||
extern "C" {
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ namespace onnxruntime {
|
|||
struct ProviderInfo_CUDA_Impl : ProviderInfo_CUDA {
|
||||
OrtStatus* SetCurrentGpuDeviceId(_In_ int device_id) override {
|
||||
int num_devices;
|
||||
auto cuda_err = cudaGetDeviceCount(&num_devices);
|
||||
auto cuda_err = ::cudaGetDeviceCount(&num_devices);
|
||||
if (cuda_err != cudaSuccess) {
|
||||
return CreateStatus(ORT_FAIL, "Failed to set device id since cudaGetDeviceCount failed.");
|
||||
}
|
||||
|
|
@ -113,8 +113,28 @@ struct ProviderInfo_CUDA_Impl : ProviderInfo_CUDA {
|
|||
}
|
||||
}
|
||||
|
||||
// Used only by slice_concatenate_test.cc
|
||||
void cudaMemcpy_HostToDevice(void* dst, const void* src, size_t count) override { cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice); }
|
||||
// Used by slice_concatenate_test.cc and onnxruntime_pybind_state.cc
|
||||
void cudaMemcpy_HostToDevice(void* dst, const void* src, size_t count) override { CUDA_CALL_THROW(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice)); }
|
||||
// Used by onnxruntime_pybind_state.cc
|
||||
void cudaMemcpy_DeviceToHost(void* dst, const void* src, size_t count) override { CUDA_CALL_THROW(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost)); }
|
||||
|
||||
int cudaGetDeviceCount() override {
|
||||
int num_devices = 0;
|
||||
CUDA_CALL_THROW(::cudaGetDeviceCount(&num_devices));
|
||||
return num_devices;
|
||||
}
|
||||
|
||||
void CUDAExecutionProviderInfo__FromProviderOptions(const ProviderOptions& options, CUDAExecutionProviderInfo& info) {
|
||||
info = CUDAExecutionProviderInfo::FromProviderOptions(options);
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const CUDAExecutionProviderInfo& info) override {
|
||||
return std::make_shared<CUDAProviderFactory>(info);
|
||||
}
|
||||
|
||||
std::shared_ptr<IAllocator> CreateCudaAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::CUDAExecutionProviderExternalAllocatorInfo& external_allocator_info) override {
|
||||
return CUDAExecutionProvider::CreateCudaAllocator(device_id, gpu_mem_limit, arena_extend_strategy, external_allocator_info);
|
||||
}
|
||||
|
||||
} g_info;
|
||||
|
||||
|
|
|
|||
|
|
@ -454,8 +454,6 @@ Tensor* AttentionBase::GetPresent(OpKernelContext* context, const Tensor* past,
|
|||
return g_host->AttentionBase__GetPresent(this, context, past, batch_size, head_size, sequence_length, past_sequence_length);
|
||||
}
|
||||
|
||||
Status YieldOp::Compute(OpKernelContext* context) const { return g_host->YieldOp__Compute(this, context); }
|
||||
|
||||
} // namespace contrib
|
||||
#endif
|
||||
|
||||
|
|
@ -467,6 +465,7 @@ std::unique_ptr<OpKernel> Loop::Create(const OpKernelInfo& info, const Loop::Con
|
|||
namespace contrib {
|
||||
Status Group::Compute(OpKernelContext* context) const { return g_host->contrib__Group__Compute(this, context); }
|
||||
Status PassThrough::Compute(OpKernelContext* context) const { return g_host->contrib__PassThrough__Compute(this, context); }
|
||||
Status YieldOp::Compute(OpKernelContext* context) const { return g_host->YieldOp__Compute(this, context); }
|
||||
} // namespace contrib
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -32,9 +32,6 @@
|
|||
|
||||
// execution provider factory creator headers
|
||||
#include "core/providers/cpu/cpu_provider_factory_creator.h"
|
||||
#ifdef USE_CUDA
|
||||
#include "core/providers/cuda/cuda_provider_factory_creator.h"
|
||||
#endif
|
||||
#ifdef USE_ROCM
|
||||
#include "core/providers/rocm/rocm_provider_factory_creator.h"
|
||||
#endif
|
||||
|
|
@ -165,6 +162,9 @@ size_t gpu_mem_limit = std::numeric_limits<size_t>::max();
|
|||
onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo;
|
||||
#endif
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include "core/providers/cuda/cuda_provider_factory.h"
|
||||
#endif
|
||||
#ifdef USE_TENSORRT
|
||||
#include "core/providers/tensorrt/tensorrt_provider_factory.h"
|
||||
#endif
|
||||
|
|
@ -203,10 +203,14 @@ const OrtDevice::DeviceType OrtDevice::GPU;
|
|||
namespace onnxruntime {
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* params);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
|
||||
#ifdef USE_CUDA
|
||||
ProviderInfo_CUDA* GetProviderInfo_CUDA();
|
||||
#endif
|
||||
#ifdef USE_OPENVINO
|
||||
const ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO();
|
||||
ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO();
|
||||
#endif
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nuphar(bool, const char*);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_VITISAI(const char* backend_type, int device_id);
|
||||
|
|
@ -443,8 +447,7 @@ static inline void RegisterExecutionProvider(InferenceSession* sess, onnxruntime
|
|||
#ifdef USE_CUDA
|
||||
|
||||
static bool IsCudaDeviceIdValid(const onnxruntime::logging::Logger& logger, int id) {
|
||||
int num_devices = 0;
|
||||
CUDA_CALL_THROW(cudaGetDeviceCount(&num_devices));
|
||||
int num_devices = GetProviderInfo_CUDA()->cudaGetDeviceCount();
|
||||
|
||||
if (0 == num_devices) {
|
||||
LOGS(logger, WARNING) << "your system does not have a CUDA capable device.";
|
||||
|
|
@ -465,18 +468,18 @@ static AllocatorPtr GetCudaAllocator(OrtDevice::DeviceId id) {
|
|||
static std::unordered_map<OrtDevice::DeviceId, AllocatorPtr> id_to_allocator_map;
|
||||
|
||||
if (id_to_allocator_map.find(id) == id_to_allocator_map.end()) {
|
||||
id_to_allocator_map.insert({id, CUDAExecutionProvider::CreateCudaAllocator(id, gpu_mem_limit, arena_extend_strategy, external_allocator_info)});
|
||||
id_to_allocator_map.insert({id, GetProviderInfo_CUDA()->CreateCudaAllocator(id, gpu_mem_limit, arena_extend_strategy, external_allocator_info)});
|
||||
}
|
||||
|
||||
return id_to_allocator_map[id];
|
||||
}
|
||||
|
||||
static void CpuToCudaMemCpy(void* dst, const void* src, size_t num_bytes) {
|
||||
CUDA_CALL_THROW(cudaMemcpy(dst, src, num_bytes, cudaMemcpyHostToDevice));
|
||||
GetProviderInfo_CUDA()->cudaMemcpy_HostToDevice(dst, src, num_bytes);
|
||||
}
|
||||
|
||||
static void CudaToCpuMemCpy(void* dst, const void* src, size_t num_bytes) {
|
||||
CUDA_CALL_THROW(cudaMemcpy(dst, src, num_bytes, cudaMemcpyDeviceToHost));
|
||||
GetProviderInfo_CUDA()->cudaMemcpy_DeviceToHost(dst, src, num_bytes);
|
||||
}
|
||||
|
||||
static const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* GetCudaToHostMemCpyFunction() {
|
||||
|
|
@ -613,26 +616,23 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
|
|||
} else if (type == kCudaExecutionProvider) {
|
||||
#ifdef USE_CUDA
|
||||
const auto it = provider_options_map.find(type);
|
||||
const CUDAExecutionProviderInfo info =
|
||||
it != provider_options_map.end()
|
||||
? CUDAExecutionProviderInfo::FromProviderOptions(it->second)
|
||||
: [&]() {
|
||||
CUDAExecutionProviderInfo info{};
|
||||
info.device_id = cuda_device_id;
|
||||
info.gpu_mem_limit = gpu_mem_limit;
|
||||
info.arena_extend_strategy = arena_extend_strategy;
|
||||
info.cudnn_conv_algo_search = cudnn_conv_algo_search;
|
||||
info.do_copy_in_default_stream = do_copy_in_default_stream;
|
||||
info.external_allocator_info = external_allocator_info;
|
||||
return info;
|
||||
}();
|
||||
CUDAExecutionProviderInfo info{};
|
||||
if (it != provider_options_map.end())
|
||||
GetProviderInfo_CUDA()->CUDAExecutionProviderInfo__FromProviderOptions(it->second, info);
|
||||
else {
|
||||
info.device_id = cuda_device_id;
|
||||
info.gpu_mem_limit = gpu_mem_limit;
|
||||
info.arena_extend_strategy = arena_extend_strategy;
|
||||
info.cudnn_conv_algo_search = cudnn_conv_algo_search;
|
||||
info.do_copy_in_default_stream = do_copy_in_default_stream;
|
||||
info.external_allocator_info = external_allocator_info;
|
||||
}
|
||||
|
||||
// This variable is never initialized because the APIs by which is it should be initialized are deprecated, however they still
|
||||
// exist are are in-use. Neverthless, it is used to return CUDAAllocator, hence we must try to initialize it here if we can
|
||||
// since FromProviderOptions might contain external CUDA allocator.
|
||||
external_allocator_info = info.external_allocator_info;
|
||||
RegisterExecutionProvider(
|
||||
sess, *onnxruntime::CreateExecutionProviderFactory_CUDA(info));
|
||||
RegisterExecutionProvider(sess, *GetProviderInfo_CUDA()->CreateExecutionProviderFactory(info));
|
||||
#endif
|
||||
} else if (type == kRocmExecutionProvider) {
|
||||
#ifdef USE_ROCM
|
||||
|
|
@ -649,7 +649,7 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
|
|||
return info;
|
||||
}();
|
||||
|
||||
// This variable is never initialized because the APIs by which is it should be initialized are deprecated, however they still
|
||||
// This variable is never initialized because the APIs by which is it should be initialized are deprecated, however they still
|
||||
// exist are are in-use. Neverthless, it is used to return CUDAAllocator, hence we must try to initialize it here if we can
|
||||
// since FromProviderOptions might contain external CUDA allocator.
|
||||
external_allocator_info = info.external_allocator_info;
|
||||
|
|
|
|||
Loading…
Reference in a new issue