optimize unordered_map (#166)

This commit is contained in:
Weixian 2018-12-13 16:02:50 -08:00 committed by Yufeng Li
parent 0aaaf4663d
commit 2ffaa8a185
4 changed files with 12 additions and 11 deletions

View file

@ -33,14 +33,15 @@ void Profiler::StartProfiling(const std::string& file_name) {
void Profiler::EndTimeAndRecordEvent(EventCategory category,
const std::string& event_name,
TimePoint& start_time,
std::unordered_map<std::string, std::string>&& event_args,
const std::initializer_list<std::pair<std::string, std::string>>& event_args,
bool /*sync_gpu*/) {
if (!enabled_ && !profile_with_logger_)
return;
long long dur = TimeDiffMicroSeconds(start_time);
long long ts = TimeDiffMicroSeconds(profiling_start_time_, start_time);
EventRecord event(category, logging::GetProcessId(),
logging::GetThreadId(), event_name, ts, dur, std::move(event_args));
logging::GetThreadId(), event_name, ts, dur, { event_args.begin(), event_args.end() });
if (profile_with_logger_) {
custom_logger_->SendProfileEvent(event);
} else {

View file

@ -4,6 +4,8 @@
#pragma once
#include <iostream>
#include <fstream>
#include <tuple>
#include <initializer_list>
#include "core/common/logging/logging.h"
namespace onnxruntime {
@ -45,7 +47,7 @@ class Profiler {
void EndTimeAndRecordEvent(EventCategory category,
const std::string& event_name,
TimePoint& start_time,
std::unordered_map<std::string, std::string>&& event_args = std::unordered_map<std::string, std::string>(),
const std::initializer_list<std::pair<std::string, std::string>>& event_args = {},
bool sync_gpu = false);
/*

View file

@ -147,8 +147,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_before",
sync_time_begin,
std::unordered_map<std::string,
std::string>{{"op_name", op_name}});
{{"op_name", op_name}});
// call compute on the kernel
VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name();
@ -164,7 +163,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_kernel_time",
kernel_begin_time,
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
{{"op_name", op_name}});
sync_time_begin = session_state.Profiler().StartTime();
// sync after compute for outputs
@ -191,7 +190,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_after",
sync_time_begin,
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
{{"op_name", op_name}});
//std::cout << "Run async node finish: " << p_node_index << std::endl;

View file

@ -92,8 +92,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_before",
sync_time_begin,
std::unordered_map<std::string,
std::string>{{"op_name", op_name}});
{{"op_name", op_name}});
// call compute on the kernel
VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name();
@ -103,7 +102,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_kernel_time",
kernel_begin_time,
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
{{"op_name", op_name}});
sync_time_begin = session_state.Profiler().StartTime();
// sync after compute for outputs
@ -130,7 +129,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_after",
sync_time_begin,
std::unordered_map<std::string, std::string>{{"op_name", op_name}});
{{"op_name", op_name}});
// free ml-values corresponding to this node
VLOGS(logger, 1) << "Releasing node ML values after computing kernel: " << p_op_kernel->Node().Name();