Do not create compute stream when external CUDA allocator is used. (#6833)

This commit is contained in:
M. Zeeshan Siddiqui 2021-02-26 20:13:02 -08:00 committed by GitHub
parent b4b87ac7a0
commit cb8d8464bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 26 additions and 10 deletions

View file

@ -28,7 +28,7 @@ class CUDAExternalAllocator : public CUDAAllocator {
typedef void (*ExternalFree)(void* p);
public:
CUDAExternalAllocator(OrtDevice::DeviceId device_id, const char* name, void* alloc, void* free)
CUDAExternalAllocator(OrtDevice::DeviceId device_id, const char* name, const void* alloc, const void* free)
: CUDAAllocator(device_id, name) {
alloc_ = reinterpret_cast<ExternalAlloc>(alloc);
free_ = reinterpret_cast<ExternalFree>(free);

View file

@ -127,11 +127,19 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in
// must wait GPU idle, otherwise cudaGetDeviceProperties might fail
CUDA_CALL_THROW(cudaDeviceSynchronize());
CUDA_CALL_THROW(cudaGetDeviceProperties(&device_prop_, info_.device_id));
// This scenario is not supported.
ORT_ENFORCE(!(info.has_user_compute_stream && info.external_allocator_info.UseExternalAllocator()));
if (info.has_user_compute_stream) {
external_stream_ = true;
stream_ = static_cast<cudaStream_t>(info.user_compute_stream);
} else {
CUDA_CALL_THROW(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
if (info.external_allocator_info.UseExternalAllocator()) {
stream_ = nullptr;
} else {
CUDA_CALL_THROW(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
}
}
size_t free = 0;

View file

@ -36,7 +36,8 @@ const EnumNameMapping<ArenaExtendStrategy> arena_extend_strategy_mapping{
CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options) {
CUDAExecutionProviderInfo info{};
void* alloc = nullptr;
void* free = nullptr;
ORT_THROW_IF_ERROR(
ProviderOptionsParser{}
.AddValueParser(
@ -55,18 +56,18 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P
})
.AddValueParser(
cuda::provider_option_names::kcudaExternalAlloc,
[&info](const std::string& value_str) -> Status {
[&alloc](const std::string& value_str) -> Status {
size_t address;
ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
info.external_allocator_info.alloc = reinterpret_cast<void*>(address);
alloc = reinterpret_cast<void*>(address);
return Status::OK();
})
.AddValueParser(
cuda::provider_option_names::kcudaExternalFree,
[&info](const std::string& value_str) -> Status {
[&free](const std::string& value_str) -> Status {
size_t address;
ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
info.external_allocator_info.free = reinterpret_cast<void*>(address);
free = reinterpret_cast<void*>(address);
return Status::OK();
})
.AddAssignmentToReference(cuda::provider_option_names::kMemLimit, info.cuda_mem_limit)
@ -79,6 +80,8 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P
.AddAssignmentToReference(cuda::provider_option_names::kDoCopyInDefaultStream, info.do_copy_in_default_stream)
.Parse(options));
CUDAExecutionProviderExternalAllocatorInfo alloc_info{alloc, free};
info.external_allocator_info = alloc_info;
return info;
}

View file

@ -13,15 +13,20 @@
namespace onnxruntime {
// Information needed to construct CUDA execution providers.
struct CUDAExecutionProviderExternalAllocatorInfo {
void* alloc{nullptr};
void* free{nullptr};
const void* alloc{nullptr};
const void* free{nullptr};
CUDAExecutionProviderExternalAllocatorInfo() {
alloc = nullptr;
free = nullptr;
}
bool UseExternalAllocator() {
CUDAExecutionProviderExternalAllocatorInfo(void* a, void* f) {
alloc = a;
free = f;
}
bool UseExternalAllocator() const {
return (alloc != nullptr) && (free != nullptr);
}
};