mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-31 23:27:43 +00:00
Patching cuda profiler with enhancements (#9214)
This commit is contained in:
parent
4a1b386f7c
commit
ffca0b777b
3 changed files with 39 additions and 21 deletions
|
|
@ -16,6 +16,8 @@ std::atomic_flag CudaProfiler::enabled{0};
|
|||
std::vector<CudaProfiler::KernelStat> CudaProfiler::stats;
|
||||
std::unordered_map<uint32_t, uint64_t> CudaProfiler::id_map;
|
||||
|
||||
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
|
||||
|
||||
#define BUF_SIZE (32 * 1024)
|
||||
#define ALIGN_SIZE (8)
|
||||
#define ALIGN_BUFFER(buffer, align) \
|
||||
|
|
@ -62,7 +64,7 @@ void CUPTIAPI CudaProfiler::BufferCompleted(CUcontext, uint32_t, uint8_t* buffer
|
|||
do {
|
||||
status = cuptiActivityGetNextRecord(buffer, validSize, &record);
|
||||
if (status == CUPTI_SUCCESS) {
|
||||
if (CUPTI_ACTIVITY_KIND_KERNEL == record->kind) {
|
||||
if (CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL == record->kind) {
|
||||
CUpti_ActivityKernel3* kernel = (CUpti_ActivityKernel3*)record;
|
||||
stats.push_back({kernel->name, kernel->streamId,
|
||||
kernel->gridX, kernel->gridY, kernel->gridZ,
|
||||
|
|
@ -93,7 +95,7 @@ bool CudaProfiler::StartProfiling() {
|
|||
if (!enabled.test_and_set()) {
|
||||
if (cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME) == CUPTI_SUCCESS &&
|
||||
cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER) == CUPTI_SUCCESS &&
|
||||
cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL) == CUPTI_SUCCESS &&
|
||||
cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) == CUPTI_SUCCESS &&
|
||||
cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY) == CUPTI_SUCCESS &&
|
||||
cuptiActivityEnable(CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION) == CUPTI_SUCCESS &&
|
||||
cuptiActivityRegisterCallbacks(BufferRequested, BufferCompleted) == CUPTI_SUCCESS) {
|
||||
|
|
@ -179,7 +181,7 @@ void CudaProfiler::Stop(uint64_t) {
|
|||
|
||||
void CudaProfiler::DisableEvents() {
|
||||
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION);
|
||||
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_KERNEL);
|
||||
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL);
|
||||
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MEMCPY);
|
||||
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_DRIVER);
|
||||
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_RUNTIME);
|
||||
|
|
@ -194,6 +196,20 @@ void CudaProfiler::Clear() {
|
|||
}
|
||||
}
|
||||
|
||||
#else // for cuda 10.x, no profiling
|
||||
|
||||
void CUPTIAPI CudaProfiler::BufferRequested(uint8_t**, size_t*, size_t*) {}
|
||||
void CUPTIAPI CudaProfiler::BufferCompleted(CUcontext, uint32_t, uint8_t*, size_t, size_t) {}
|
||||
bool CudaProfiler::StartProfiling() { return false; }
|
||||
void CudaProfiler::EndProfiling(TimePoint, Events&) {}
|
||||
CudaProfiler::~CudaProfiler() {}
|
||||
void CudaProfiler::Start(uint64_t) {}
|
||||
void CudaProfiler::Stop(uint64_t) {}
|
||||
void CudaProfiler::DisableEvents() {}
|
||||
void CudaProfiler::Clear() {}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace profiling
|
||||
} // namespace onnxruntime
|
||||
#endif
|
||||
|
|
@ -2,23 +2,7 @@
|
|||
// Licensed under the MIT License.
|
||||
#include "core/common/profiler_common.h"
|
||||
|
||||
#if defined(USE_ROCM) || defined(ENABLE_TRAINING)
|
||||
namespace onnxruntime {
|
||||
|
||||
namespace profiling {
|
||||
|
||||
class CudaProfiler final : public EpProfiler {
|
||||
public:
|
||||
bool StartProfiling() override { return true; }
|
||||
void EndProfiling(TimePoint, Events&) override{};
|
||||
void Start(uint64_t) override{};
|
||||
void Stop(uint64_t) override{};
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#if !(defined(USE_ROCM) || defined(ENABLE_TRAINING))
|
||||
|
||||
#include "core/platform/ort_mutex.h"
|
||||
#include <cupti.h>
|
||||
|
|
@ -78,4 +62,22 @@ class CudaProfiler final : public EpProfiler {
|
|||
|
||||
} // namespace profiling
|
||||
} // namespace onnxruntime
|
||||
|
||||
#else
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
namespace profiling {
|
||||
|
||||
class CudaProfiler final : public EpProfiler {
|
||||
public:
|
||||
bool StartProfiling() override { return true; }
|
||||
void EndProfiling(TimePoint, Events&) override{};
|
||||
void Start(uint64_t) override{};
|
||||
void Stop(uint64_t) override{};
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -653,7 +653,7 @@ TEST(InferenceSessionTests, CheckRunProfilerWithSessionOptions) {
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(USE_CUDA) && !defined(ENABLE_TRAINING)
|
||||
#if defined(USE_CUDA) && !defined(ENABLE_TRAINING) && defined(CUDA_VERSION) && CUDA_VERSION >= 11000
|
||||
ASSERT_TRUE(has_kernel_info);
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue