[ROCm] clear last status if hipErrorNotReady (#8358)

* [ROCm] clear last status if hipErrorNotReady

* use hipEventDisableTiming in rocm_fence.cc

* fix syntax errors

* destroy event before handle becomes invalid
This commit is contained in:
Jeff Daily 2021-07-13 15:58:40 -07:00 committed by GitHub
parent 178c139718
commit 8d8db7c9f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 39 additions and 12 deletions

View file

@ -279,14 +279,22 @@ Status ROCMExecutionProvider::OnRunStart() {
while (it != deferred_release_cpu_ptr_.end()) {
auto& e = it->first;
auto& v = it->second;
// note that hipEventQuery returns hipSucess before first hipEventRecord
if (v.recorded && hipSuccess == hipEventQuery(e)) {
for (auto p : v.cpu_ptrs) {
cpu_alloc->Free(p);
// note that hipEventQuery returns hipSuccess before first hipEventRecord
if (v.recorded) {
auto event_query_status = hipEventQuery(e);
if (event_query_status == hipSuccess) {
for (auto p : v.cpu_ptrs) {
cpu_alloc->Free(p);
}
HIP_RETURN_IF_ERROR(hipEventDestroy(e));
it = deferred_release_cpu_ptr_.erase(it);
} else if (event_query_status == hipErrorNotReady) {
// ignore and clear the error if not ready
hipGetLastError();
it++;
} else {
HIP_RETURN_IF_ERROR(event_query_status);
}
hipEvent_t expired_event = it->first;
it = deferred_release_cpu_ptr_.erase(it);
HIP_RETURN_IF_ERROR(hipEventDestroy(expired_event));
} else {
++it;
}

View file

@ -4,14 +4,17 @@
#include "core/providers/rocm/rocm_fence.h"
#include "core/graph/constants.h"
#include "core/providers/rocm/gpu_data_transfer.h"
#include "core/providers/rocm/rocm_common.h"
#include "core/providers/rocm/gpu_data_transfer.h"
namespace onnxruntime {
ROCMFence::ROCMFence(const GPUDataTransfer* data_transfer) : data_transfer_(data_transfer) {
HIP_CALL_THROW(hipEventCreate(&read_event_));
HIP_CALL_THROW(hipEventCreate(&write_event_));
// NOTE: hipEventBlockingSync may leads to longer wait time because of thread yield/switching in kernel
// if lower CPU usage is more important than latency, we should use this flag to avoid spin-loop in WaitOnCPU
int event_flags = /*hipEventBlockingSync |*/ hipEventDisableTiming;
HIP_CALL_THROW(hipEventCreateWithFlags(&read_event_, event_flags));
HIP_CALL_THROW(hipEventCreateWithFlags(&write_event_, event_flags));
}
ROCMFence::~ROCMFence() {
@ -43,8 +46,24 @@ void ROCMFence::BeforeUsingAsOutput(onnxruntime::ProviderType provider_type, int
}
bool ROCMFence::CanRelease() {
return hipEventQuery(read_event_) == hipSuccess &&
hipEventQuery(write_event_) == hipSuccess;
hipError_t status;
status = hipEventQuery(read_event_);
if (status == hipErrorNotReady) {
// ignore and clear the error if not ready
hipGetLastError();
return false;
} else if (status != hipSuccess) {
RocmCall<hipError_t, true>(status, "hipEventQuery(read_event_)", "HIP", hipSuccess);
}
status = hipEventQuery(write_event_);
if (status == hipErrorNotReady) {
// ignore and clear the error if not ready
hipGetLastError();
return false;
} else if (status != hipSuccess) {
RocmCall<hipError_t, true>(status, "hipEventQuery(write_event_)", "HIP", hipSuccess);
}
return true;
}
void ROCMFence::AfterUsedAsInput(int queue_id) {