From cb8d8464bc6ee39a894ecc76e05574095f5eb489 Mon Sep 17 00:00:00 2001 From: "M. Zeeshan Siddiqui" Date: Fri, 26 Feb 2021 20:13:02 -0800 Subject: [PATCH] Do not create compute stream when external CUDA allocator is used. (#6833) --- onnxruntime/core/providers/cuda/cuda_allocator.h | 2 +- .../core/providers/cuda/cuda_execution_provider.cc | 10 +++++++++- .../providers/cuda/cuda_execution_provider_info.cc | 13 ++++++++----- .../providers/cuda/cuda_execution_provider_info.h | 11 ++++++++--- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/onnxruntime/core/providers/cuda/cuda_allocator.h b/onnxruntime/core/providers/cuda/cuda_allocator.h index 0dd6d1e300..097662e9d5 100644 --- a/onnxruntime/core/providers/cuda/cuda_allocator.h +++ b/onnxruntime/core/providers/cuda/cuda_allocator.h @@ -28,7 +28,7 @@ class CUDAExternalAllocator : public CUDAAllocator { typedef void (*ExternalFree)(void* p); public: - CUDAExternalAllocator(OrtDevice::DeviceId device_id, const char* name, void* alloc, void* free) + CUDAExternalAllocator(OrtDevice::DeviceId device_id, const char* name, const void* alloc, const void* free) : CUDAAllocator(device_id, name) { alloc_ = reinterpret_cast(alloc); free_ = reinterpret_cast(free); diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index 0e2e542a48..60e9d9d315 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -127,11 +127,19 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in // must wait GPU idle, otherwise cudaGetDeviceProperties might fail CUDA_CALL_THROW(cudaDeviceSynchronize()); CUDA_CALL_THROW(cudaGetDeviceProperties(&device_prop_, info_.device_id)); + + // This scenario is not supported. + ORT_ENFORCE(!(info.has_user_compute_stream && info.external_allocator_info.UseExternalAllocator())); + if (info.has_user_compute_stream) { external_stream_ = true; stream_ = static_cast(info.user_compute_stream); } else { - CUDA_CALL_THROW(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)); + if (info.external_allocator_info.UseExternalAllocator()) { + stream_ = nullptr; + } else { + CUDA_CALL_THROW(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking)); + } } size_t free = 0; diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc index 1b94ace424..927e6687fd 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc @@ -36,7 +36,8 @@ const EnumNameMapping arena_extend_strategy_mapping{ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options) { CUDAExecutionProviderInfo info{}; - + void* alloc = nullptr; + void* free = nullptr; ORT_THROW_IF_ERROR( ProviderOptionsParser{} .AddValueParser( @@ -55,18 +56,18 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P }) .AddValueParser( cuda::provider_option_names::kcudaExternalAlloc, - [&info](const std::string& value_str) -> Status { + [&alloc](const std::string& value_str) -> Status { size_t address; ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address)); - info.external_allocator_info.alloc = reinterpret_cast(address); + alloc = reinterpret_cast(address); return Status::OK(); }) .AddValueParser( cuda::provider_option_names::kcudaExternalFree, - [&info](const std::string& value_str) -> Status { + [&free](const std::string& value_str) -> Status { size_t address; ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address)); - info.external_allocator_info.free = reinterpret_cast(address); + free = reinterpret_cast(address); return Status::OK(); }) .AddAssignmentToReference(cuda::provider_option_names::kMemLimit, info.cuda_mem_limit) @@ -79,6 +80,8 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P .AddAssignmentToReference(cuda::provider_option_names::kDoCopyInDefaultStream, info.do_copy_in_default_stream) .Parse(options)); + CUDAExecutionProviderExternalAllocatorInfo alloc_info{alloc, free}; + info.external_allocator_info = alloc_info; return info; } diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h index d398871e5d..d08fee602b 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h @@ -13,15 +13,20 @@ namespace onnxruntime { // Information needed to construct CUDA execution providers. struct CUDAExecutionProviderExternalAllocatorInfo { - void* alloc{nullptr}; - void* free{nullptr}; + const void* alloc{nullptr}; + const void* free{nullptr}; CUDAExecutionProviderExternalAllocatorInfo() { alloc = nullptr; free = nullptr; } - bool UseExternalAllocator() { + CUDAExecutionProviderExternalAllocatorInfo(void* a, void* f) { + alloc = a; + free = f; + } + + bool UseExternalAllocator() const { return (alloc != nullptr) && (free != nullptr); } };