Patching cuda profiler with enhancements (#9214)

This commit is contained in:
RandySheriffH 2021-09-29 21:02:09 -07:00 committed by GitHub
parent 4a1b386f7c
commit ffca0b777b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 39 additions and 21 deletions

View file

@ -16,6 +16,8 @@ std::atomic_flag CudaProfiler::enabled{0};
std::vector<CudaProfiler::KernelStat> CudaProfiler::stats;
std::unordered_map<uint32_t, uint64_t> CudaProfiler::id_map;
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
#define BUF_SIZE (32 * 1024)
#define ALIGN_SIZE (8)
#define ALIGN_BUFFER(buffer, align) \
@ -62,7 +64,7 @@ void CUPTIAPI CudaProfiler::BufferCompleted(CUcontext, uint32_t, uint8_t* buffer
do {
status = cuptiActivityGetNextRecord(buffer, validSize, &record);
if (status == CUPTI_SUCCESS) {
if (CUPTI_ACTIVITY_KIND_KERNEL == record->kind) {
if (CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL == record->kind) {
CUpti_ActivityKernel3* kernel = (CUpti_ActivityKernel3*)record;
stats.push_back({kernel->name, kernel->streamId,
kernel->gridX, kernel->gridY, kernel->gridZ,
@ -93,7 +95,7 @@ bool CudaProfiler::StartProfiling() {
if (!enabled.test_and_set()) {
if (cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME) == CUPTI_SUCCESS &&
cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER) == CUPTI_SUCCESS &&
cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL) == CUPTI_SUCCESS &&
cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL) == CUPTI_SUCCESS &&
cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY) == CUPTI_SUCCESS &&
cuptiActivityEnable(CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION) == CUPTI_SUCCESS &&
cuptiActivityRegisterCallbacks(BufferRequested, BufferCompleted) == CUPTI_SUCCESS) {
@ -179,7 +181,7 @@ void CudaProfiler::Stop(uint64_t) {
void CudaProfiler::DisableEvents() {
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_EXTERNAL_CORRELATION);
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_KERNEL);
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL);
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_MEMCPY);
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_DRIVER);
cuptiActivityDisable(CUPTI_ACTIVITY_KIND_RUNTIME);
@ -194,6 +196,20 @@ void CudaProfiler::Clear() {
}
}
#else // for cuda 10.x, no profiling
void CUPTIAPI CudaProfiler::BufferRequested(uint8_t**, size_t*, size_t*) {}
void CUPTIAPI CudaProfiler::BufferCompleted(CUcontext, uint32_t, uint8_t*, size_t, size_t) {}
bool CudaProfiler::StartProfiling() { return false; }
void CudaProfiler::EndProfiling(TimePoint, Events&) {}
CudaProfiler::~CudaProfiler() {}
void CudaProfiler::Start(uint64_t) {}
void CudaProfiler::Stop(uint64_t) {}
void CudaProfiler::DisableEvents() {}
void CudaProfiler::Clear() {}
#endif
} // namespace profiling
} // namespace onnxruntime
#endif

View file

@ -2,23 +2,7 @@
// Licensed under the MIT License.
#include "core/common/profiler_common.h"
#if defined(USE_ROCM) || defined(ENABLE_TRAINING)
namespace onnxruntime {
namespace profiling {
class CudaProfiler final : public EpProfiler {
public:
bool StartProfiling() override { return true; }
void EndProfiling(TimePoint, Events&) override{};
void Start(uint64_t) override{};
void Stop(uint64_t) override{};
};
}
}
#else
#if !(defined(USE_ROCM) || defined(ENABLE_TRAINING))
#include "core/platform/ort_mutex.h"
#include <cupti.h>
@ -78,4 +62,22 @@ class CudaProfiler final : public EpProfiler {
} // namespace profiling
} // namespace onnxruntime
#else
namespace onnxruntime {
namespace profiling {
class CudaProfiler final : public EpProfiler {
public:
bool StartProfiling() override { return true; }
void EndProfiling(TimePoint, Events&) override{};
void Start(uint64_t) override{};
void Stop(uint64_t) override{};
};
}
}
#endif

View file

@ -653,7 +653,7 @@ TEST(InferenceSessionTests, CheckRunProfilerWithSessionOptions) {
}
}
#if defined(USE_CUDA) && !defined(ENABLE_TRAINING)
#if defined(USE_CUDA) && !defined(ENABLE_TRAINING) && defined(CUDA_VERSION) && CUDA_VERSION >= 11000
ASSERT_TRUE(has_kernel_info);
#endif
}