diff --git a/onnxruntime/core/framework/allocation_planner.cc b/onnxruntime/core/framework/allocation_planner.cc index e7568df886..0f3e856db8 100644 --- a/onnxruntime/core/framework/allocation_planner.cc +++ b/onnxruntime/core/framework/allocation_planner.cc @@ -787,7 +787,7 @@ class PlannerImpl { for (size_t index = 0; index < current_plan.program_counter_start.size(); index += 1) { ORT_ENFORCE((current_plan.program_counter_start[index] > start) || (start == 0)); ORT_ENFORCE(current_plan.program_counter_start[index] <= current_plan.program_counter_end[index]); - ORT_ENFORCE((current_plan.program_counter_start[index] < SIZE_MAX) || (index == 0)); + ORT_ENFORCE(current_plan.program_counter_start[index] < SIZE_MAX); ORT_ENFORCE((current_plan.program_counter_end[index] > 0) || (index == 0)); start = current_plan.program_counter_start[index]; diff --git a/onnxruntime/core/framework/execution_frame.cc b/onnxruntime/core/framework/execution_frame.cc index 6aaa3a756d..765b7f110e 100644 --- a/onnxruntime/core/framework/execution_frame.cc +++ b/onnxruntime/core/framework/execution_frame.cc @@ -292,7 +292,7 @@ ExecutionFrame::ExecutionFrame(const std::vector& feed_mlvalue_idxs, const } // log size of activation. Keep it commented out for now to avoid log flooding. - VLOGS(session_state_.Logger(), 1) << "**** Allocated memory for activations, size: " <patterns[i].PeakSize(); + // VLOGS(session_state_.Logger(), 1) << "**** Allocated memory for activations, size: " <patterns[i].PeakSize(); } } } diff --git a/onnxruntime/core/framework/mem_pattern_planner.h b/onnxruntime/core/framework/mem_pattern_planner.h index a78b56d95f..3f45ac5d71 100644 --- a/onnxruntime/core/framework/mem_pattern_planner.h +++ b/onnxruntime/core/framework/mem_pattern_planner.h @@ -70,11 +70,7 @@ class MemPatternPlanner { size_t current = 0; size_t waste_bytes = std::numeric_limits::max(); size_t best_offset = 0; - if (!blocks_.empty()) { - auto last_block = allocs_[*blocks_.rbegin()]; - best_offset = last_block.block_.offset_ + last_block.block_.size_; - } - + bool best_offset_found = false; for (auto it = blocks_.begin(); it != blocks_.end(); it++) { // Memory block can be re-used as long as there is no overlap between their time schedules. if (allocs_[*it].reuse_ && !OverlappingTimeSchedules(program_counter_start, program_counter_end, @@ -87,16 +83,25 @@ class MemPatternPlanner { if (gap >= size && (gap - size) < waste_bytes) { waste_bytes = gap - size; best_offset = current; + best_offset_found = true; } } current = std::max(current, allocs_[*it].block_.offset_ + allocs_[*it].block_.size_); } + ORT_ENFORCE(current <= buffer_size_); + if (current < buffer_size_) { auto gap = buffer_size_ - current; - if ((gap >= size) && ((gap - size) < waste_bytes)) + if ((gap >= size) && ((gap - size) < waste_bytes)) { best_offset = current; + best_offset_found = true; + } + } + + if (!best_offset_found) { + best_offset = current; } // we only need to bounds check the addition of size to best_offset as that is the only time we extend @@ -128,10 +133,7 @@ class MemPatternPlanner { size_t current = 0; size_t waste_bytes = std::numeric_limits::max(); size_t best_offset = 0; - if (!blocks_.empty()) { - auto last_block = allocs_[*blocks_.rbegin()]; - best_offset = last_block.block_.offset_ + last_block.block_.size_; - } + bool best_offset_found = false; for (auto it = blocks_.begin(); it != blocks_.end(); it++) { if (allocs_[*it].block_.offset_ >= current) { @@ -139,15 +141,24 @@ class MemPatternPlanner { if (gap >= size && (gap - size) < waste_bytes) { waste_bytes = gap - size; best_offset = current; + best_offset_found = true; } } current = std::max(current, allocs_[*it].block_.offset_ + allocs_[*it].block_.size_); } + ORT_ENFORCE(current <= buffer_size_); + if (current < buffer_size_) { auto gap = buffer_size_ - current; - if ((gap >= size) && ((gap - size) < waste_bytes)) + if ((gap >= size) && ((gap - size) < waste_bytes)) { best_offset = current; + best_offset_found = true; + } + } + + if (!best_offset_found) { + best_offset = current; } // we only need to bounds check the addition of size to best_offset as that is the only time we extend