// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #include #include #ifndef DISABLE_GPU_COUNTERS #include #include #endif #include #include #include #include "core/common/common.h" #define TIMER_SLOT_SIZE (128) #define CONVERT_100NS_TO_SECOND(x) ((x)*0.0000001) #define BYTE_TO_MB(x) ((x) / (1024.0 * 1024.0)) // A stopwatch to measure the time passed (in seconds) between current Stop call and the closest Start call that has been called before. class Timer { public: void Start() { LARGE_INTEGER t; QueryPerformanceCounter(&t); start_time_ = static_cast(t.QuadPart); } double Stop() { LARGE_INTEGER stop_time; QueryPerformanceCounter(&stop_time); double t = static_cast(stop_time.QuadPart) - start_time_; LARGE_INTEGER tps; QueryPerformanceFrequency(&tps); return t / static_cast(tps.QuadPart); } private: double start_time_; }; typedef enum CounterType { TIMER = 0, CPU_USAGE, PAGE_FAULT_COUNT, PAGE_FILE_USAGE, PEAK_PAGE_FILE_USAGE, WORKING_SET_USAGE, PEAK_WORKING_SET_USAGE, // GPU specific counter starts here GPU_USAGE, GPU_DEDICATED_MEM_USAGE, GPU_SHARED_MEM_USAGE, TYPE_COUNT } CounterType; typedef enum ProfilerType { CPU, GPU, PROFILER_TYPE_COUNT } ProfilerType; class IPerfCounter { public: virtual void Reset() = 0; virtual void Stop() = 0; virtual void Start() = 0; virtual void GetValues(double (&values)[CounterType::TYPE_COUNT], double time) = 0; }; class CpuPerfCounter : public IPerfCounter { public: CpuPerfCounter() {} ~CpuPerfCounter() {} void Reset() override { SYSTEM_INFO sysInfo = {0}; GetSystemInfo(&sysInfo); m_startKernelTime = {0}; m_startUserTime = {0}; m_numProcessors = sysInfo.dwNumberOfProcessors; m_procHandle = GetCurrentProcess(); ; m_pid = GetCurrentProcessId(); ; m_previousStartCallFailed = true; m_processTime = 0; m_startPageFaultCount = 0; m_startPagefileUsage = 0; m_startPeakPagefileUsage = 0; m_startWorkingSetSize = 0; m_startPeakWorkingSetSize = 0; m_deltaPageFaultCount = 0; m_deltaPagefileUsage = 0; m_deltaPeakPagefileUsage = 0; m_deltaWorkingSetSize = 0; m_deltaPeakWorkingSetSize = 0; } void Start() override { FILETIME ftIgnore, ftKernel, ftUser; if (!GetProcessTimes(m_procHandle, &ftIgnore, &ftIgnore, &ftKernel, &ftUser) || !GetProcessMemoryCounters(m_pid, m_startPageFaultCount, m_startPagefileUsage, m_startPeakPagefileUsage, m_startWorkingSetSize, m_startPeakWorkingSetSize)) { m_previousStartCallFailed = true; } else { memcpy(&m_startKernelTime, &ftKernel, sizeof(FILETIME)); memcpy(&m_startUserTime, &ftUser, sizeof(FILETIME)); m_previousStartCallFailed = false; } } void Stop() override { FILETIME ftIgnore, ftKernel, ftUser; ULARGE_INTEGER stopKernelTime, stopUserTime; ULONG stopPageFaultCount = 0; SIZE_T stopPagefileUsage = 0; SIZE_T stopPeakPagefileUsage = 0; SIZE_T stopWorkingSetSize = 0; SIZE_T stopPeakWorkingSetSize = 0; if (m_previousStartCallFailed || m_numProcessors == 0 || !GetProcessTimes(m_procHandle, &ftIgnore, &ftIgnore, &ftKernel, &ftUser) || !GetProcessMemoryCounters(m_pid, stopPageFaultCount, stopPagefileUsage, stopPeakPagefileUsage, stopWorkingSetSize, stopPeakWorkingSetSize)) { return; } memcpy(&stopKernelTime, &ftKernel, sizeof(FILETIME)); memcpy(&stopUserTime, &ftUser, sizeof(FILETIME)); m_processTime = CONVERT_100NS_TO_SECOND((stopKernelTime.QuadPart - m_startKernelTime.QuadPart) + (stopUserTime.QuadPart - m_startUserTime.QuadPart)) / m_numProcessors; m_deltaPageFaultCount = stopPageFaultCount - m_startPageFaultCount; m_deltaPagefileUsage = (double)BYTE_TO_MB((double)stopPagefileUsage - (double)m_startPagefileUsage); m_deltaPeakPagefileUsage = (double)BYTE_TO_MB((double)stopPeakPagefileUsage - (double)m_startPeakPagefileUsage); m_deltaWorkingSetSize = (double)BYTE_TO_MB((double)stopWorkingSetSize - (double)m_startWorkingSetSize); m_deltaPeakWorkingSetSize = (double)BYTE_TO_MB((double)stopPeakWorkingSetSize - (double)m_startPeakWorkingSetSize); } void GetValues(double (&values)[CounterType::TYPE_COUNT], double time) override { values[CounterType::CPU_USAGE] = 100.0 * GetProcessTime() / time; values[CounterType::PAGE_FAULT_COUNT] = GetDeltaPageFaultCount(); values[CounterType::PAGE_FILE_USAGE] = GetDeltaPageFileUsage(); values[CounterType::PEAK_PAGE_FILE_USAGE] = GetDeltaPeakPageFileUsage(); values[CounterType::WORKING_SET_USAGE] = GetDeltaWorkingSetUsage(); values[CounterType::PEAK_WORKING_SET_USAGE] = GetDeltaPeakWorkingSetUsage(); } double GetProcessTime() { return m_processTime; } ULONG GetDeltaPageFaultCount() { return m_deltaPageFaultCount; } double GetDeltaPageFileUsage() { return m_deltaPagefileUsage; } double GetDeltaPeakPageFileUsage() { return m_deltaPeakPagefileUsage; } double GetDeltaWorkingSetUsage() { return m_deltaWorkingSetSize; } double GetDeltaPeakWorkingSetUsage() { return m_deltaPeakWorkingSetSize; } private: bool GetProcessMemoryCounters(DWORD pid, ULONG& pageFaultCount, SIZE_T& pageFileUsage, SIZE_T& peakPageFileUsage, SIZE_T& workingSetSize, SIZE_T& peakWorkingSetSize) { HANDLE hProcess = NULL; hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid); if (NULL == hProcess) return false; PROCESS_MEMORY_COUNTERS pmc = {0}; bool result = GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc)); if (result) { pageFaultCount = pmc.PageFaultCount; pageFileUsage = pmc.PagefileUsage; peakPageFileUsage = pmc.PeakPagefileUsage; workingSetSize = pmc.WorkingSetSize; peakWorkingSetSize = pmc.PeakWorkingSetSize; } CloseHandle(hProcess); return result; } ULARGE_INTEGER m_startKernelTime = {}; ULARGE_INTEGER m_startUserTime = {}; UINT m_numProcessors = 0; HANDLE m_procHandle; DWORD m_pid = 0; bool m_previousStartCallFailed; double m_processTime = 0; // in second ULONG m_startPageFaultCount = 0; SIZE_T m_startPagefileUsage = 0; // in byte SIZE_T m_startPeakPagefileUsage = 0; // in byte SIZE_T m_startWorkingSetSize = 0; // in byte SIZE_T m_startPeakWorkingSetSize = 0; // in byte ULONG m_deltaPageFaultCount = 0; double m_deltaPagefileUsage = 0; // in MByte double m_deltaPeakPagefileUsage = 0; // in MByte double m_deltaWorkingSetSize = 0; // in MByte double m_deltaPeakWorkingSetSize = 0; // in MByte }; #ifndef DISABLE_GPU_COUNTERS class GpuPerfCounter : public IPerfCounter { public: GpuPerfCounter() : m_hPDH(NULL), m_pfnPdhOpenQuery(NULL), m_pfnPdhAddCounter(NULL), m_pfnPdhCollectQueryData(NULL), m_pfnPdhGetFormattedCounterArray(NULL), m_pfnPdhGetFormattedCounterValue(NULL), m_pfnPdhCloseQuery(NULL), m_query(NULL) { //#ifdef DISABLE_LOADLIBRARY m_hPDH = LoadLibraryExW(L"pdh.dll", NULL, 0); //#endif if (m_hPDH != NULL) { m_pfnPdhOpenQuery = (PFNPdhOpenQuery)GetProcAddress(m_hPDH, "PdhOpenQueryW"); m_pfnPdhAddCounter = (PFNPdhAddCounter)GetProcAddress(m_hPDH, "PdhAddCounterW"); m_pfnPdhCollectQueryData = (PFNPdhCollectQueryData)GetProcAddress(m_hPDH, "PdhCollectQueryData"); m_pfnPdhGetFormattedCounterArray = (PFNPdhGetFormattedCounterArray)GetProcAddress(m_hPDH, "PdhGetFormattedCounterArrayW"); m_pfnPdhGetFormattedCounterValue = (PFNPdhGetFormattedCounterValue)GetProcAddress(m_hPDH, "PdhGetFormattedCounterValue"); m_pfnPdhCloseQuery = (PFNPdhCloseQuery)GetProcAddress(m_hPDH, "PdhCloseQuery"); } } ~GpuPerfCounter() { if (m_query) { CloseQuery(m_query); m_query = NULL; } if (m_hPDH) { FreeLibrary(m_hPDH); m_hPDH = NULL; } } // This function consumes a lot of memory // Avoid calling this function unless it's necessary void Reset() override { m_gpuUsage = 0; m_deltaGpuDedicatedMemory = 0; m_deltaGpuSharedMemory = 0; // Setup PDH performance query std::wstring pidStr = std::to_wstring(GetCurrentProcessId()); std::wstring gpuUsageQueryStr = L"\\GPU Engine(pid_*_*)\\Utilization Percentage"; std::wstring gpuDedicatedMemQueryStr = L"\\GPU Process Memory(pid_*_*)\\Dedicated Usage"; std::wstring gpuSharedMemQueryStr = L"\\GPU Process Memory(pid_*_*)\\Shared Usage"; gpuUsageQueryStr.replace(gpuUsageQueryStr.find('*'), 1, pidStr); gpuDedicatedMemQueryStr.replace(gpuDedicatedMemQueryStr.find('*'), 1, pidStr); gpuSharedMemQueryStr.replace(gpuSharedMemQueryStr.find('*'), 1, pidStr); // Open query if (m_query) CloseQuery(m_query); OpenQuery(NULL, NULL, &m_query); AddCounter(m_query, gpuUsageQueryStr.c_str(), NULL, &m_gpuUsageCounter); AddCounter(m_query, gpuDedicatedMemQueryStr.c_str(), NULL, &m_gpuDedicatedMemUsageCounter); AddCounter(m_query, gpuSharedMemQueryStr.c_str(), NULL, &m_gpuSharedMemUsageCounter); } void Start() override { PDH_FMT_COUNTERVALUE gpuDedicatedMemUsageCounterValue = {}; PDH_FMT_COUNTERVALUE gpuSharedMemUsageCounterValue = {}; PDH_STATUS status = S_OK; // Usage rate counter require two queries. Put first one at Start() and second on at Stop() CollectQueryData(m_query); // Gpu dedicated ememory status = GetFormattedCounterValue(m_gpuDedicatedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuDedicatedMemUsageCounterValue); m_startGpuDedicatedMemory = (ERROR_SUCCESS == status) ? (double)BYTE_TO_MB(gpuDedicatedMemUsageCounterValue.largeValue) : m_startGpuDedicatedMemory; // Gpu shared ememory status = GetFormattedCounterValue(m_gpuSharedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuSharedMemUsageCounterValue); m_startGpuSharedMemory = (ERROR_SUCCESS == status) ? (double)BYTE_TO_MB(gpuSharedMemUsageCounterValue.largeValue) : m_startGpuSharedMemory; } void Stop() override { PPDH_FMT_COUNTERVALUE_ITEM_W gpuUsageCounterValue = nullptr; PDH_FMT_COUNTERVALUE gpuDedicatedMemUsageCounterValue = {}; PDH_FMT_COUNTERVALUE gpuSharedMemUsageCounterValue = {}; DWORD bufferSize = 0; DWORD itemCount = 0; PDH_STATUS status = S_OK; // Query the gpu usage. // For different IHVs, compute shader usage could be counted as either 3D or compute engine usage. // Here we simply pick the max usage from all types of engines to see if bottleneck is from GPU. // The same concept has been used in task manager to display GPU usage. status = CollectQueryData(m_query); if (S_OK != status && PDH_NO_DATA != status) return; status = GetFormattedCounterArray(m_gpuUsageCounter, PDH_FMT_DOUBLE, &bufferSize, &itemCount, gpuUsageCounterValue); if (PDH_MORE_DATA != status) return; gpuUsageCounterValue = (PPDH_FMT_COUNTERVALUE_ITEM_W)malloc(bufferSize); if (gpuUsageCounterValue != nullptr) { status = GetFormattedCounterArray(m_gpuUsageCounter, PDH_FMT_DOUBLE, &bufferSize, &itemCount, gpuUsageCounterValue); if (ERROR_SUCCESS == status) { double maxValue = 0; for (size_t i = 0; i < itemCount; ++i) { maxValue = (gpuUsageCounterValue[i].FmtValue.doubleValue > maxValue) ? gpuUsageCounterValue[i].FmtValue.doubleValue : maxValue; } m_gpuUsage = maxValue; } } free(gpuUsageCounterValue); gpuUsageCounterValue = NULL; bufferSize = 0; itemCount = 0; double stopGpuDedicatedMemory; // in MB double stopGpuSharedMemory; // in MB // Gpu dedicated ememory delta. Don't update the value if counter doesn't get values correctly. status = GetFormattedCounterValue(m_gpuDedicatedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuDedicatedMemUsageCounterValue); if (ERROR_SUCCESS == status) { stopGpuDedicatedMemory = (double)BYTE_TO_MB(gpuDedicatedMemUsageCounterValue.largeValue); m_deltaGpuDedicatedMemory = stopGpuDedicatedMemory - m_startGpuDedicatedMemory; } // Gpu shared ememory. Don't update the value if counter doesn't get values correctly. status = GetFormattedCounterValue(m_gpuSharedMemUsageCounter, PDH_FMT_LARGE, NULL, &gpuSharedMemUsageCounterValue); if (ERROR_SUCCESS == status) { stopGpuSharedMemory = (double)BYTE_TO_MB(gpuSharedMemUsageCounterValue.largeValue); m_deltaGpuSharedMemory = stopGpuSharedMemory - m_startGpuSharedMemory; } } void GetValues(double (&values)[CounterType::TYPE_COUNT], double time) override { ORT_UNUSED_PARAMETER(time); values[CounterType::GPU_USAGE] = GetGpuUsage(); values[CounterType::GPU_DEDICATED_MEM_USAGE] = GetDedicatedMemory(); values[CounterType::GPU_SHARED_MEM_USAGE] = GetSharedMemory(); } double GetGpuUsage() const { return m_gpuUsage; } double GetDedicatedMemory() const { return m_deltaGpuDedicatedMemory; } double GetSharedMemory() const { return m_deltaGpuSharedMemory; } private: // Pdh function prototypes typedef PDH_STATUS(WINAPI* PFNPdhOpenQuery)(_In_opt_ LPCWSTR szDataSource, _In_ DWORD_PTR dwUserData, _Out_ PDH_HQUERY* phQuery); typedef PDH_STATUS(WINAPI* PFNPdhAddCounter)(_In_ PDH_HQUERY hQuery, _In_ LPCWSTR szFullCounterPath, _In_ DWORD_PTR dwUserData, _Out_ PDH_HCOUNTER* phCounter); typedef PDH_STATUS(WINAPI* PFNPdhCollectQueryData)(_Inout_ PDH_HQUERY hQuery); typedef PDH_STATUS(WINAPI* PFNPdhGetFormattedCounterArray)(_In_ PDH_HCOUNTER hCounter, _In_ DWORD dwFormat, _Inout_ LPDWORD lpdwBufferSize, _Out_ LPDWORD lpdwItemCount, _Out_writes_bytes_opt_(*lpdwBufferSize) PPDH_FMT_COUNTERVALUE_ITEM_W ItemBuffer); typedef PDH_STATUS(WINAPI* PFNPdhGetFormattedCounterValue)(_In_ PDH_HCOUNTER hCounter, _In_ DWORD dwFormat, _Out_opt_ LPDWORD lpdwType, _Out_ PPDH_FMT_COUNTERVALUE pValue); typedef PDH_STATUS(WINAPI* PFNPdhCloseQuery)(_Inout_ PDH_HQUERY hQuery); PDH_STATUS OpenQuery(LPCWSTR szDataSource, DWORD_PTR dwUserData, PDH_HQUERY* phQuery) { return (m_pfnPdhOpenQuery) ? m_pfnPdhOpenQuery(szDataSource, dwUserData, phQuery) : ERROR_MOD_NOT_FOUND; } PDH_STATUS AddCounter(PDH_HQUERY hQuery, LPCWSTR szFullCounterPath, DWORD_PTR dwUserData, PDH_HCOUNTER* phCounter) { return (m_pfnPdhAddCounter) ? m_pfnPdhAddCounter(hQuery, szFullCounterPath, dwUserData, phCounter) : ERROR_MOD_NOT_FOUND; } PDH_STATUS CollectQueryData(PDH_HQUERY hQuery) { return (m_pfnPdhCollectQueryData) ? m_pfnPdhCollectQueryData(hQuery) : ERROR_MOD_NOT_FOUND; } PDH_STATUS GetFormattedCounterArray(PDH_HCOUNTER hCounter, DWORD dwFormat, LPDWORD lpdwBufferSize, LPDWORD lpdwItemCount, PPDH_FMT_COUNTERVALUE_ITEM_W ItemBuffer) { return (m_pfnPdhGetFormattedCounterArray) ? m_pfnPdhGetFormattedCounterArray(hCounter, dwFormat, lpdwBufferSize, lpdwItemCount, ItemBuffer) : ERROR_MOD_NOT_FOUND; } PDH_STATUS GetFormattedCounterValue(PDH_HCOUNTER hCounter, DWORD dwFormat, LPDWORD lpdwType, PPDH_FMT_COUNTERVALUE pValue) { return (m_pfnPdhGetFormattedCounterValue) ? m_pfnPdhGetFormattedCounterValue(hCounter, dwFormat, lpdwType, pValue) : ERROR_MOD_NOT_FOUND; } PDH_STATUS CloseQuery(PDH_HQUERY hQuery) { return (m_pfnPdhCloseQuery) ? m_pfnPdhCloseQuery(hQuery) : ERROR_MOD_NOT_FOUND; } // PDH Performance Query HMODULE m_hPDH; PFNPdhOpenQuery m_pfnPdhOpenQuery; PFNPdhAddCounter m_pfnPdhAddCounter; PFNPdhCollectQueryData m_pfnPdhCollectQueryData; PFNPdhGetFormattedCounterArray m_pfnPdhGetFormattedCounterArray; PFNPdhGetFormattedCounterValue m_pfnPdhGetFormattedCounterValue; PFNPdhCloseQuery m_pfnPdhCloseQuery; HQUERY m_query; HCOUNTER m_gpuUsageCounter = NULL; HCOUNTER m_gpuDedicatedMemUsageCounter = NULL; HCOUNTER m_gpuSharedMemUsageCounter = NULL; // Process info DWORD m_pid = 0; // Data double m_gpuUsage = 0; double m_startGpuDedicatedMemory = 0; // in MB double m_startGpuSharedMemory = 0; // in MB double m_deltaGpuDedicatedMemory = 0; // in MB double m_deltaGpuSharedMemory = 0; // in MB }; #endif ; // A statistics helper for Timer/CpuPerfCounter/GpuPerfCounter class. // It keeps the latest "TIMER_SLOT_SIZE" measured data in a ring buffer. // The statistic functions (e.g. GetVariance) assume data always starts from index 0 of the buffer. class PerfCounterStatistics { public: void Enable(ProfilerType type) { if (type == ProfilerType::CPU) { m_perfCounter[type] = std::make_unique(); } else if (type == ProfilerType::GPU) { m_perfCounter[type] = std::make_unique(); } } void Disable(ProfilerType type) { m_perfCounter[type].reset(); } void Reset(ProfilerType type) { m_pos = 0; m_bufferFull = false; for (int i = 0; i < CounterType::TYPE_COUNT; ++i) { if (!IsCounterTypeDisabled(static_cast(i))) { m_data[i].Reset(); } } if (m_perfCounter[type]) { m_perfCounter[type]->Reset(); } } void Start() { m_timer.Start(); for (unsigned int i = 0; i < m_perfCounter.size(); ++i) { if (m_perfCounter[i]) { m_perfCounter[i]->Start(); } } } void Stop() { double counterValue[CounterType::TYPE_COUNT] = {0.0f}; // Query counters double time = m_timer.Stop(); counterValue[CounterType::TIMER] = time; for (unsigned int i = 0; i < m_perfCounter.size(); ++i) { if (m_perfCounter[i]) { m_perfCounter[i]->Stop(); m_perfCounter[i]->GetValues(counterValue, time); } } // Update data blocks for (int i = 0; i < CounterType::TYPE_COUNT; ++i) { m_data[i].total = m_data[i].total - m_data[i].measured[m_pos] + counterValue[i]; m_data[i].measured[m_pos] = counterValue[i]; m_data[i].max = (counterValue[i] > m_data[i].max) ? counterValue[i] : m_data[i].max; m_data[i].min = (counterValue[i] < m_data[i].min) ? counterValue[i] : m_data[i].min; } // Update buffer index if (m_pos + 1 >= TIMER_SLOT_SIZE) { m_pos = 0; m_bufferFull = true; } else { ++m_pos; } } int GetCount() const { return (m_bufferFull) ? TIMER_SLOT_SIZE : m_pos; } double GetAverage(CounterType t) const { return IsCounterTypeDisabled(t) ? 0 : m_data[t].total / GetCount(); } double GetMin(CounterType t) const { return IsCounterTypeDisabled(t) ? 0 : m_data[t].min; } double GetMax(CounterType t) const { return IsCounterTypeDisabled(t) ? 0 : m_data[t].max; } double GetValues(CounterType t, int index) const { return IsCounterTypeDisabled(t) ? 0 : m_data[t].measured[index]; } double GetStdev(CounterType t) const { return IsCounterTypeDisabled(t) ? 0 : sqrt(GetVariance(t)); } double GetVariance(CounterType t) const { if (IsCounterTypeDisabled(t)) return 0; int count = GetCount(); double average = m_data[t].total / count; double var = 0; for (int i = 0; i < count; ++i) { var += (m_data[t].measured[i] - average) * (m_data[t].measured[i] - average); } return var / count; } private: bool IsCounterTypeDisabled(CounterType t) const { switch (t) { case CPU_USAGE: case PAGE_FAULT_COUNT: case PAGE_FILE_USAGE: case PEAK_PAGE_FILE_USAGE: case WORKING_SET_USAGE: case PEAK_WORKING_SET_USAGE: return m_perfCounter[ProfilerType::CPU] == nullptr; case GPU_USAGE: case GPU_DEDICATED_MEM_USAGE: case GPU_SHARED_MEM_USAGE: return m_perfCounter[ProfilerType::GPU] == nullptr; case TIMER: return false; default: return true; } } struct DataBlock { void Reset() { max = 0; min = DBL_MAX; total = 0; memset(measured, 0, sizeof(double) * TIMER_SLOT_SIZE); } double max; double min; double total; double measured[TIMER_SLOT_SIZE]; }; int m_pos; bool m_bufferFull; std::array, ProfilerType::PROFILER_TYPE_COUNT> m_perfCounter; Timer m_timer; DataBlock m_data[CounterType::TYPE_COUNT]; }; // A class to wrap up multiple PerfCounterStatistics objects. // To create a profiler, define intervals in an enum and use it to create the profiler object. // See an example in engine/test/Model/ModelTest.cpp template class Profiler { public: void Reset(ProfilerType type) { for (int i = 0; i < T::kCount; ++i) { m_perfCounterStat[i].Reset(type); } } PerfCounterStatistics& GetCounter(int t) { return m_perfCounterStat[t]; } PerfCounterStatistics& operator[](int t) { return m_perfCounterStat[t]; } void Enable(ProfilerType type) { for (int i = 0; i < T::kCount; ++i) { m_perfCounterStat[i].Enable(type); } } void Disable(ProfilerType type) { for (int i = 0; i < T::kCount; ++i) { m_perfCounterStat[i].Disable(type); } } //Checks if Profiler is still reseted bool IsStillReset() { for (int i = 0; i < T::kCount; ++i) { if (m_perfCounterStat[i].GetCount() > 0) { return false; } } return true; } private: PerfCounterStatistics m_perfCounterStat[T::kCount]; }; #define WINML_PROFILING #ifdef WINML_PROFILING #define WINML_PROFILING_START(profiler, interval) profiler[interval].Start() #define WINML_PROFILING_STOP(profiler, interval) profiler[interval].Stop() #else #define WINML_PROFILING_START(profiler, interval) \ do { \ } while (0) #define WINML_PROFILING_STOP(profiler, interval) \ do { \ } while (0) #endif