Do not create compute stream when external CUDA allocator is used. (#6833)

2026-05-16 21:00:14 +00:00 · 2021-02-26 20:13:02 -08:00 · 2021-02-26 20:13:02 -08:00 · cb8d8464bc
commit cb8d8464bc
parent b4b87ac7a0
4 changed files with 26 additions and 10 deletions
--- a/onnxruntime/core/providers/cuda/cuda_allocator.h
+++ b/onnxruntime/core/providers/cuda/cuda_allocator.h
@ -28,7 +28,7 @@ class CUDAExternalAllocator : public CUDAAllocator {
  typedef void (*ExternalFree)(void* p);

 public:
-  CUDAExternalAllocator(OrtDevice::DeviceId device_id, const char* name, void* alloc, void* free)
+  CUDAExternalAllocator(OrtDevice::DeviceId device_id, const char* name, const void* alloc, const void* free)
      : CUDAAllocator(device_id, name) {
    alloc_ = reinterpret_cast<ExternalAlloc>(alloc);
    free_ = reinterpret_cast<ExternalFree>(free);
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
@ -127,11 +127,19 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in
  // must wait GPU idle, otherwise cudaGetDeviceProperties might fail
  CUDA_CALL_THROW(cudaDeviceSynchronize());
  CUDA_CALL_THROW(cudaGetDeviceProperties(&device_prop_, info_.device_id));
+
+  // This scenario is not supported.
+  ORT_ENFORCE(!(info.has_user_compute_stream && info.external_allocator_info.UseExternalAllocator()));
+
  if (info.has_user_compute_stream) {
    external_stream_ = true;
    stream_ = static_cast<cudaStream_t>(info.user_compute_stream);
  } else {
-    CUDA_CALL_THROW(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
+    if (info.external_allocator_info.UseExternalAllocator()) {
+      stream_ = nullptr;
+    } else {
+      CUDA_CALL_THROW(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
+    }
  }

  size_t free = 0;
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc
@ -36,7 +36,8 @@ const EnumNameMapping<ArenaExtendStrategy> arena_extend_strategy_mapping{

 CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options) {
  CUDAExecutionProviderInfo info{};
-
+  void* alloc = nullptr;
+  void* free = nullptr;
  ORT_THROW_IF_ERROR(
      ProviderOptionsParser{}
          .AddValueParser(
@ -55,18 +56,18 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P
              })
          .AddValueParser(
              cuda::provider_option_names::kcudaExternalAlloc,
-              [&info](const std::string& value_str) -> Status {
+              [&alloc](const std::string& value_str) -> Status {
                size_t address;
                ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
-                info.external_allocator_info.alloc  = reinterpret_cast<void*>(address);
+                alloc  = reinterpret_cast<void*>(address);
                return Status::OK();
              })
          .AddValueParser(
              cuda::provider_option_names::kcudaExternalFree,
-              [&info](const std::string& value_str) -> Status {
+              [&free](const std::string& value_str) -> Status {
                size_t address;
                ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
-                info.external_allocator_info.free  = reinterpret_cast<void*>(address);
+                free  = reinterpret_cast<void*>(address);
                return Status::OK();
              })
          .AddAssignmentToReference(cuda::provider_option_names::kMemLimit, info.cuda_mem_limit)
@ -79,6 +80,8 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P
          .AddAssignmentToReference(cuda::provider_option_names::kDoCopyInDefaultStream, info.do_copy_in_default_stream)
          .Parse(options));

+  CUDAExecutionProviderExternalAllocatorInfo alloc_info{alloc, free};
+  info.external_allocator_info = alloc_info;
  return info;
 }

--- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
@ -13,15 +13,20 @@
 namespace onnxruntime {
 // Information needed to construct CUDA execution providers.
 struct CUDAExecutionProviderExternalAllocatorInfo {
-  void* alloc{nullptr};
-  void* free{nullptr};
+  const void* alloc{nullptr};
+  const void* free{nullptr};

  CUDAExecutionProviderExternalAllocatorInfo() {
    alloc = nullptr;
    free = nullptr;
  }

-  bool UseExternalAllocator() {
+  CUDAExecutionProviderExternalAllocatorInfo(void* a, void* f) {
+    alloc = a;
+    free = f;
+  }
+
+  bool UseExternalAllocator() const {
    return (alloc != nullptr) && (free != nullptr);
  }
 };