mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-02 03:55:34 +00:00
Lazily get allocator when allocating an MLValue (#4276)
According to profiling in #4267, getting the allocator can account for a large fraction of overhead when accessing a kernel output, due to STL container operations. The allocator isn't used when (i) we're not creating a fence, and (ii) we have a memory pattern and a pre-allocated buffer, so we can avoid this overhead.
This commit is contained in:
parent
a490beedf1
commit
a541d28fb4
1 changed files with 5 additions and 2 deletions
|
|
@ -324,11 +324,13 @@ Status ExecutionFrame::AllocateMLValueTensorSelfOwnBufferHelper(OrtValue& ort_va
|
|||
return Status(ONNXRUNTIME, FAIL, "size overflow");
|
||||
}
|
||||
|
||||
auto alloc = GetAllocator(location);
|
||||
|
||||
// Lazily get the allocator only if needed.
|
||||
AllocatorPtr alloc = nullptr;
|
||||
|
||||
// create fence if needed
|
||||
if (create_fence) {
|
||||
ORT_ENFORCE(ort_value.Fence() == nullptr);
|
||||
alloc = GetAllocator(location);
|
||||
FencePtr f = alloc->CreateFence(&session_state_);
|
||||
// it is OK to have fence been nullptr if the execution provider has no async execution,
|
||||
// and allocator::CreateFence returns nullptr
|
||||
|
|
@ -370,6 +372,7 @@ Status ExecutionFrame::AllocateMLValueTensorSelfOwnBufferHelper(OrtValue& ort_va
|
|||
}
|
||||
|
||||
//no memory pattern, or the pattern is not correct.
|
||||
if (!alloc) alloc = GetAllocator(location);
|
||||
std::unique_ptr<Tensor> p_tensor = onnxruntime::make_unique<Tensor>(element_type, shape, alloc);
|
||||
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in a new issue