From b43254282fb0699992bcfca63ceed7f74168a663 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Thu, 19 Sep 2019 17:56:39 +1000 Subject: [PATCH] Handle bad alloc exception in bfc arena (#1846) * Handle std::bad_alloc when growing arena. Allow more than one attempt at reducing the buffer if allocation fails. More memory may have become available so never trying to backpedal more than once means we potentially fail when a large enough buffer could have been allocated. --- onnxruntime/core/framework/bfc_arena.cc | 34 +++++++++++++++++-------- onnxruntime/core/framework/bfc_arena.h | 4 --- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/onnxruntime/core/framework/bfc_arena.cc b/onnxruntime/core/framework/bfc_arena.cc index a795d28b4c..4b6fdb647c 100644 --- a/onnxruntime/core/framework/bfc_arena.cc +++ b/onnxruntime/core/framework/bfc_arena.cc @@ -75,18 +75,30 @@ bool BFCArena::Extend(size_t rounded_bytes) { // Try allocating. size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes); - void* mem_addr = device_allocator_->Alloc(bytes); - if (mem_addr == nullptr && !started_backpedal_) { - // Only backpedal once. - started_backpedal_ = true; + auto safe_alloc = [this](size_t alloc_bytes) { + void* new_mem = nullptr; + try { + new_mem = device_allocator_->Alloc(alloc_bytes); + } catch (const std::bad_alloc&) { + // attempted allocation can throw std::bad_alloc. we want to treat this the same as if it returned nullptr + // so swallow the exception + } + return new_mem; + }; + + void* mem_addr = safe_alloc(bytes); + + if (mem_addr == nullptr) { static constexpr float kBackpedalFactor = 0.9f; // Try allocating less memory. while (mem_addr == nullptr) { bytes = RoundedBytes(static_cast(bytes * kBackpedalFactor)); - if (bytes < rounded_bytes) break; - mem_addr = device_allocator_->Alloc(bytes); + if (bytes < rounded_bytes) + break; + + mem_addr = safe_alloc(bytes); } } @@ -94,12 +106,12 @@ bool BFCArena::Extend(size_t rounded_bytes) { return false; } + // we allocated the same number of bytes as the current region, so we have 2x that now if (!increased_allocation) { - // Increase the region size of the next required allocation. curr_region_allocation_bytes_ *= 2; } - LOGS_DEFAULT(INFO) << "Extending allocation by " << bytes + LOGS_DEFAULT(INFO) << "Extended allocation by " << bytes << " bytes."; stats_.total_allocated_bytes += bytes; @@ -227,9 +239,9 @@ void* BFCArena::AllocateRawInternal(size_t num_bytes, // couldn't find one. This means we must have run out of memory, // Dump the memory log for analysis. if (dump_log_on_failure) { - LOGS_DEFAULT(WARNING) << "BFC Arena ran out of memory trying " - << "to allocate " << num_bytes - << ". Current allocation summary follows."; + LOGS_DEFAULT(ERROR) << "BFC Arena ran out of memory trying " + << "to allocate " << num_bytes + << ". Current allocation summary follows."; DumpMemoryLog(rounded_bytes); } return nullptr; diff --git a/onnxruntime/core/framework/bfc_arena.h b/onnxruntime/core/framework/bfc_arena.h index 5136c1c6cc..1097f696e2 100644 --- a/onnxruntime/core/framework/bfc_arena.h +++ b/onnxruntime/core/framework/bfc_arena.h @@ -457,10 +457,6 @@ class BFCArena : public IArenaAllocator { // The size of the current region allocation. size_t curr_region_allocation_bytes_; - // An indicator that expansion of a region has hit the limits - // of the available memory. - bool started_backpedal_ = false; - std::unique_ptr device_allocator_; mutable OrtMutex lock_;