mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-16 21:00:14 +00:00
Do not create compute stream when external CUDA allocator is used. (#6833)
This commit is contained in:
parent
b4b87ac7a0
commit
cb8d8464bc
4 changed files with 26 additions and 10 deletions
|
|
@ -28,7 +28,7 @@ class CUDAExternalAllocator : public CUDAAllocator {
|
|||
typedef void (*ExternalFree)(void* p);
|
||||
|
||||
public:
|
||||
CUDAExternalAllocator(OrtDevice::DeviceId device_id, const char* name, void* alloc, void* free)
|
||||
CUDAExternalAllocator(OrtDevice::DeviceId device_id, const char* name, const void* alloc, const void* free)
|
||||
: CUDAAllocator(device_id, name) {
|
||||
alloc_ = reinterpret_cast<ExternalAlloc>(alloc);
|
||||
free_ = reinterpret_cast<ExternalFree>(free);
|
||||
|
|
|
|||
|
|
@ -127,11 +127,19 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in
|
|||
// must wait GPU idle, otherwise cudaGetDeviceProperties might fail
|
||||
CUDA_CALL_THROW(cudaDeviceSynchronize());
|
||||
CUDA_CALL_THROW(cudaGetDeviceProperties(&device_prop_, info_.device_id));
|
||||
|
||||
// This scenario is not supported.
|
||||
ORT_ENFORCE(!(info.has_user_compute_stream && info.external_allocator_info.UseExternalAllocator()));
|
||||
|
||||
if (info.has_user_compute_stream) {
|
||||
external_stream_ = true;
|
||||
stream_ = static_cast<cudaStream_t>(info.user_compute_stream);
|
||||
} else {
|
||||
CUDA_CALL_THROW(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
|
||||
if (info.external_allocator_info.UseExternalAllocator()) {
|
||||
stream_ = nullptr;
|
||||
} else {
|
||||
CUDA_CALL_THROW(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
|
||||
}
|
||||
}
|
||||
|
||||
size_t free = 0;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,8 @@ const EnumNameMapping<ArenaExtendStrategy> arena_extend_strategy_mapping{
|
|||
|
||||
CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options) {
|
||||
CUDAExecutionProviderInfo info{};
|
||||
|
||||
void* alloc = nullptr;
|
||||
void* free = nullptr;
|
||||
ORT_THROW_IF_ERROR(
|
||||
ProviderOptionsParser{}
|
||||
.AddValueParser(
|
||||
|
|
@ -55,18 +56,18 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P
|
|||
})
|
||||
.AddValueParser(
|
||||
cuda::provider_option_names::kcudaExternalAlloc,
|
||||
[&info](const std::string& value_str) -> Status {
|
||||
[&alloc](const std::string& value_str) -> Status {
|
||||
size_t address;
|
||||
ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
|
||||
info.external_allocator_info.alloc = reinterpret_cast<void*>(address);
|
||||
alloc = reinterpret_cast<void*>(address);
|
||||
return Status::OK();
|
||||
})
|
||||
.AddValueParser(
|
||||
cuda::provider_option_names::kcudaExternalFree,
|
||||
[&info](const std::string& value_str) -> Status {
|
||||
[&free](const std::string& value_str) -> Status {
|
||||
size_t address;
|
||||
ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
|
||||
info.external_allocator_info.free = reinterpret_cast<void*>(address);
|
||||
free = reinterpret_cast<void*>(address);
|
||||
return Status::OK();
|
||||
})
|
||||
.AddAssignmentToReference(cuda::provider_option_names::kMemLimit, info.cuda_mem_limit)
|
||||
|
|
@ -79,6 +80,8 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P
|
|||
.AddAssignmentToReference(cuda::provider_option_names::kDoCopyInDefaultStream, info.do_copy_in_default_stream)
|
||||
.Parse(options));
|
||||
|
||||
CUDAExecutionProviderExternalAllocatorInfo alloc_info{alloc, free};
|
||||
info.external_allocator_info = alloc_info;
|
||||
return info;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,15 +13,20 @@
|
|||
namespace onnxruntime {
|
||||
// Information needed to construct CUDA execution providers.
|
||||
struct CUDAExecutionProviderExternalAllocatorInfo {
|
||||
void* alloc{nullptr};
|
||||
void* free{nullptr};
|
||||
const void* alloc{nullptr};
|
||||
const void* free{nullptr};
|
||||
|
||||
CUDAExecutionProviderExternalAllocatorInfo() {
|
||||
alloc = nullptr;
|
||||
free = nullptr;
|
||||
}
|
||||
|
||||
bool UseExternalAllocator() {
|
||||
CUDAExecutionProviderExternalAllocatorInfo(void* a, void* f) {
|
||||
alloc = a;
|
||||
free = f;
|
||||
}
|
||||
|
||||
bool UseExternalAllocator() const {
|
||||
return (alloc != nullptr) && (free != nullptr);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in a new issue