mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
optimize unordered_map (#166)
This commit is contained in:
parent
0aaaf4663d
commit
2ffaa8a185
4 changed files with 12 additions and 11 deletions
|
|
@ -33,14 +33,15 @@ void Profiler::StartProfiling(const std::string& file_name) {
|
|||
void Profiler::EndTimeAndRecordEvent(EventCategory category,
|
||||
const std::string& event_name,
|
||||
TimePoint& start_time,
|
||||
std::unordered_map<std::string, std::string>&& event_args,
|
||||
const std::initializer_list<std::pair<std::string, std::string>>& event_args,
|
||||
bool /*sync_gpu*/) {
|
||||
if (!enabled_ && !profile_with_logger_)
|
||||
return;
|
||||
long long dur = TimeDiffMicroSeconds(start_time);
|
||||
long long ts = TimeDiffMicroSeconds(profiling_start_time_, start_time);
|
||||
|
||||
EventRecord event(category, logging::GetProcessId(),
|
||||
logging::GetThreadId(), event_name, ts, dur, std::move(event_args));
|
||||
logging::GetThreadId(), event_name, ts, dur, { event_args.begin(), event_args.end() });
|
||||
if (profile_with_logger_) {
|
||||
custom_logger_->SendProfileEvent(event);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
#pragma once
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <tuple>
|
||||
#include <initializer_list>
|
||||
#include "core/common/logging/logging.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
@ -45,7 +47,7 @@ class Profiler {
|
|||
void EndTimeAndRecordEvent(EventCategory category,
|
||||
const std::string& event_name,
|
||||
TimePoint& start_time,
|
||||
std::unordered_map<std::string, std::string>&& event_args = std::unordered_map<std::string, std::string>(),
|
||||
const std::initializer_list<std::pair<std::string, std::string>>& event_args = {},
|
||||
bool sync_gpu = false);
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -147,8 +147,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
|
|||
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
|
||||
node_name + "_fence_before",
|
||||
sync_time_begin,
|
||||
std::unordered_map<std::string,
|
||||
std::string>{{"op_name", op_name}});
|
||||
{{"op_name", op_name}});
|
||||
|
||||
// call compute on the kernel
|
||||
VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name();
|
||||
|
|
@ -164,7 +163,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
|
|||
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
|
||||
node_name + "_kernel_time",
|
||||
kernel_begin_time,
|
||||
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
|
||||
{{"op_name", op_name}});
|
||||
|
||||
sync_time_begin = session_state.Profiler().StartTime();
|
||||
// sync after compute for outputs
|
||||
|
|
@ -191,7 +190,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
|
|||
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
|
||||
node_name + "_fence_after",
|
||||
sync_time_begin,
|
||||
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
|
||||
{{"op_name", op_name}});
|
||||
|
||||
//std::cout << "Run async node finish: " << p_node_index << std::endl;
|
||||
|
||||
|
|
|
|||
|
|
@ -92,8 +92,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
|
|||
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
|
||||
node_name + "_fence_before",
|
||||
sync_time_begin,
|
||||
std::unordered_map<std::string,
|
||||
std::string>{{"op_name", op_name}});
|
||||
{{"op_name", op_name}});
|
||||
|
||||
// call compute on the kernel
|
||||
VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name();
|
||||
|
|
@ -103,7 +102,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
|
|||
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
|
||||
node_name + "_kernel_time",
|
||||
kernel_begin_time,
|
||||
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
|
||||
{{"op_name", op_name}});
|
||||
|
||||
sync_time_begin = session_state.Profiler().StartTime();
|
||||
// sync after compute for outputs
|
||||
|
|
@ -130,7 +129,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
|
|||
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
|
||||
node_name + "_fence_after",
|
||||
sync_time_begin,
|
||||
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
|
||||
{{"op_name", op_name}});
|
||||
|
||||
// free ml-values corresponding to this node
|
||||
VLOGS(logger, 1) << "Releasing node ML values after computing kernel: " << p_op_kernel->Node().Name();
|
||||
|
|
|
|||
Loading…
Reference in a new issue