Fix python

2026-07-18 18:52:16 +00:00 · 2021-04-13 20:28:03 -07:00 · 2021-04-13 20:28:03 -07:00 · 4cf4cf3032
commit 4cf4cf3032
parent 683354424a
4 changed files with 61 additions and 30 deletions
--- a/include/onnxruntime/core/providers/cuda/cuda_provider_factory.h
+++ b/include/onnxruntime/core/providers/cuda/cuda_provider_factory.h
@ -4,9 +4,15 @@
 #include "onnxruntime_c_api.h"

 #ifdef __cplusplus
+#include "core/framework/provider_options.h"
+
 namespace onnxruntime {
 class IAllocator;
 class IDataTransfer;
+struct IExecutionProviderFactory;
+struct CUDAExecutionProviderInfo;
+enum class ArenaExtendStrategy : int32_t;
+struct CUDAExecutionProviderExternalAllocatorInfo;
 }  // namespace onnxruntime

 struct ProviderInfo_CUDA {
@ -25,6 +31,12 @@ struct ProviderInfo_CUDA {

  virtual void CopyGpuToCpu(void* dst_ptr, const void* src_ptr, const size_t size, const OrtMemoryInfo& dst_location, const OrtMemoryInfo& src_location) = 0;
  virtual void cudaMemcpy_HostToDevice(void* dst, const void* src, size_t count) = 0;
+  virtual void cudaMemcpy_DeviceToHost(void* dst, const void* src, size_t count) = 0;
+  virtual int cudaGetDeviceCount() = 0;
+  virtual void CUDAExecutionProviderInfo__FromProviderOptions(const onnxruntime::ProviderOptions& options, onnxruntime::CUDAExecutionProviderInfo& info) = 0;
+
+  virtual std::shared_ptr<onnxruntime::IExecutionProviderFactory> CreateExecutionProviderFactory(const onnxruntime::CUDAExecutionProviderInfo& info) = 0;
+  virtual std::shared_ptr<onnxruntime::IAllocator> CreateCudaAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::CUDAExecutionProviderExternalAllocatorInfo& external_allocator_info) = 0;
 };

 extern "C" {
--- a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc
+++ b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc
@ -44,7 +44,7 @@ namespace onnxruntime {
 struct ProviderInfo_CUDA_Impl : ProviderInfo_CUDA {
  OrtStatus* SetCurrentGpuDeviceId(_In_ int device_id) override {
    int num_devices;
-    auto cuda_err = cudaGetDeviceCount(&num_devices);
+    auto cuda_err = ::cudaGetDeviceCount(&num_devices);
    if (cuda_err != cudaSuccess) {
      return CreateStatus(ORT_FAIL, "Failed to set device id since cudaGetDeviceCount failed.");
    }
@ -113,8 +113,28 @@ struct ProviderInfo_CUDA_Impl : ProviderInfo_CUDA {
    }
  }

-  // Used only by slice_concatenate_test.cc
-  void cudaMemcpy_HostToDevice(void* dst, const void* src, size_t count) override { cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice); }
+  // Used by slice_concatenate_test.cc and onnxruntime_pybind_state.cc
+  void cudaMemcpy_HostToDevice(void* dst, const void* src, size_t count) override { CUDA_CALL_THROW(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice)); }
+  // Used by onnxruntime_pybind_state.cc
+  void cudaMemcpy_DeviceToHost(void* dst, const void* src, size_t count) override { CUDA_CALL_THROW(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost)); }
+
+  int cudaGetDeviceCount() override {
+    int num_devices = 0;
+    CUDA_CALL_THROW(::cudaGetDeviceCount(&num_devices));
+    return num_devices;
+  }
+
+  void CUDAExecutionProviderInfo__FromProviderOptions(const ProviderOptions& options, CUDAExecutionProviderInfo& info) {
+    info = CUDAExecutionProviderInfo::FromProviderOptions(options);
+  }
+
+  std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const CUDAExecutionProviderInfo& info) override {
+    return std::make_shared<CUDAProviderFactory>(info);
+  }
+
+  std::shared_ptr<IAllocator> CreateCudaAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::CUDAExecutionProviderExternalAllocatorInfo& external_allocator_info) override {
+    return CUDAExecutionProvider::CreateCudaAllocator(device_id, gpu_mem_limit, arena_extend_strategy, external_allocator_info);
+  }

 } g_info;

--- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
+++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
@ -454,8 +454,6 @@ Tensor* AttentionBase::GetPresent(OpKernelContext* context, const Tensor* past,
  return g_host->AttentionBase__GetPresent(this, context, past, batch_size, head_size, sequence_length, past_sequence_length);
 }

-Status YieldOp::Compute(OpKernelContext* context) const { return g_host->YieldOp__Compute(this, context); }
-
 }  // namespace contrib
 #endif

@ -467,6 +465,7 @@ std::unique_ptr<OpKernel> Loop::Create(const OpKernelInfo& info, const Loop::Con
 namespace contrib {
 Status Group::Compute(OpKernelContext* context) const { return g_host->contrib__Group__Compute(this, context); }
 Status PassThrough::Compute(OpKernelContext* context) const { return g_host->contrib__PassThrough__Compute(this, context); }
+Status YieldOp::Compute(OpKernelContext* context) const { return g_host->YieldOp__Compute(this, context); }
 }  // namespace contrib
 #endif

--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@ -32,9 +32,6 @@

 // execution provider factory creator headers
 #include "core/providers/cpu/cpu_provider_factory_creator.h"
-#ifdef USE_CUDA
-#include "core/providers/cuda/cuda_provider_factory_creator.h"
-#endif
 #ifdef USE_ROCM
 #include "core/providers/rocm/rocm_provider_factory_creator.h"
 #endif
@ -165,6 +162,9 @@ size_t gpu_mem_limit = std::numeric_limits<size_t>::max();
 onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo;
 #endif

+#ifdef USE_CUDA
+#include "core/providers/cuda/cuda_provider_factory.h"
+#endif
 #ifdef USE_TENSORRT
 #include "core/providers/tensorrt/tensorrt_provider_factory.h"
 #endif
@ -203,10 +203,14 @@ const OrtDevice::DeviceType OrtDevice::GPU;
 namespace onnxruntime {
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
+std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* params);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
+#ifdef USE_CUDA
+ProviderInfo_CUDA* GetProviderInfo_CUDA();
+#endif
 #ifdef USE_OPENVINO
-const ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO();
+ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO();
 #endif
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nuphar(bool, const char*);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_VITISAI(const char* backend_type, int device_id);
@ -443,8 +447,7 @@ static inline void RegisterExecutionProvider(InferenceSession* sess, onnxruntime
 #ifdef USE_CUDA

 static bool IsCudaDeviceIdValid(const onnxruntime::logging::Logger& logger, int id) {
-  int num_devices = 0;
-  CUDA_CALL_THROW(cudaGetDeviceCount(&num_devices));
+  int num_devices = GetProviderInfo_CUDA()->cudaGetDeviceCount();

  if (0 == num_devices) {
    LOGS(logger, WARNING) << "your system does not have a CUDA capable device.";
@ -465,18 +468,18 @@ static AllocatorPtr GetCudaAllocator(OrtDevice::DeviceId id) {
  static std::unordered_map<OrtDevice::DeviceId, AllocatorPtr> id_to_allocator_map;

  if (id_to_allocator_map.find(id) == id_to_allocator_map.end()) {
-    id_to_allocator_map.insert({id, CUDAExecutionProvider::CreateCudaAllocator(id, gpu_mem_limit, arena_extend_strategy, external_allocator_info)});
+    id_to_allocator_map.insert({id, GetProviderInfo_CUDA()->CreateCudaAllocator(id, gpu_mem_limit, arena_extend_strategy, external_allocator_info)});
  }

  return id_to_allocator_map[id];
 }

 static void CpuToCudaMemCpy(void* dst, const void* src, size_t num_bytes) {
-  CUDA_CALL_THROW(cudaMemcpy(dst, src, num_bytes, cudaMemcpyHostToDevice));
+  GetProviderInfo_CUDA()->cudaMemcpy_HostToDevice(dst, src, num_bytes);
 }

 static void CudaToCpuMemCpy(void* dst, const void* src, size_t num_bytes) {
-  CUDA_CALL_THROW(cudaMemcpy(dst, src, num_bytes, cudaMemcpyDeviceToHost));
+  GetProviderInfo_CUDA()->cudaMemcpy_DeviceToHost(dst, src, num_bytes);
 }

 static const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* GetCudaToHostMemCpyFunction() {
@ -613,26 +616,23 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
    } else if (type == kCudaExecutionProvider) {
 #ifdef USE_CUDA
      const auto it = provider_options_map.find(type);
-      const CUDAExecutionProviderInfo info =
-          it != provider_options_map.end()
-              ? CUDAExecutionProviderInfo::FromProviderOptions(it->second)
-              : [&]() {
-                  CUDAExecutionProviderInfo info{};
-                  info.device_id = cuda_device_id;
-                  info.gpu_mem_limit = gpu_mem_limit;
-                  info.arena_extend_strategy = arena_extend_strategy;
-                  info.cudnn_conv_algo_search = cudnn_conv_algo_search;
-                  info.do_copy_in_default_stream = do_copy_in_default_stream;
-                  info.external_allocator_info = external_allocator_info;
-                  return info;
-                }();
+      CUDAExecutionProviderInfo info{};
+      if (it != provider_options_map.end())
+        GetProviderInfo_CUDA()->CUDAExecutionProviderInfo__FromProviderOptions(it->second, info);
+      else {
+        info.device_id = cuda_device_id;
+        info.gpu_mem_limit = gpu_mem_limit;
+        info.arena_extend_strategy = arena_extend_strategy;
+        info.cudnn_conv_algo_search = cudnn_conv_algo_search;
+        info.do_copy_in_default_stream = do_copy_in_default_stream;
+        info.external_allocator_info = external_allocator_info;
+      }

      // This variable is never initialized because the APIs by which is it should be initialized are deprecated, however they still
      // exist are are in-use. Neverthless, it is used to return CUDAAllocator, hence we must try to initialize it here if we can
      // since FromProviderOptions might contain external CUDA allocator.
      external_allocator_info = info.external_allocator_info;
-      RegisterExecutionProvider(
-          sess, *onnxruntime::CreateExecutionProviderFactory_CUDA(info));
+      RegisterExecutionProvider(sess, *GetProviderInfo_CUDA()->CreateExecutionProviderFactory(info));
 #endif
    } else if (type == kRocmExecutionProvider) {
 #ifdef USE_ROCM
@ -649,7 +649,7 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
                  return info;
                }();

-      // This variable is never initialized because the APIs by which is it should be initialized are deprecated, however they still 
+      // This variable is never initialized because the APIs by which is it should be initialized are deprecated, however they still
      // exist are are in-use. Neverthless, it is used to return CUDAAllocator, hence we must try to initialize it here if we can
      // since FromProviderOptions might contain external CUDA allocator.
      external_allocator_info = info.external_allocator_info;