mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-21 21:52:11 +00:00
Add type and shape information to profiled numbers (#10773)
* add func to collect type shape * reformat * refactor perf view * remove obsolete
This commit is contained in:
parent
d8bf9a479b
commit
a4b5fa334a
3 changed files with 33 additions and 7 deletions
|
|
@ -135,7 +135,7 @@ std::string Profiler::EndProfiling() {
|
|||
bool is_first_arg = true;
|
||||
for (std::pair<std::string, std::string> event_arg : rec.args) {
|
||||
if (!is_first_arg) profile_stream_ << ",";
|
||||
if (!event_arg.second.empty() && event_arg.second[0] == '{') {
|
||||
if (!event_arg.second.empty() && (event_arg.second[0] == '{' || event_arg.second[0] == '[')) {
|
||||
profile_stream_ << "\"" << event_arg.first << "\" : " << event_arg.second << "";
|
||||
} else {
|
||||
profile_stream_ << "\"" << event_arg.first << "\" : \"" << event_arg.second << "\"";
|
||||
|
|
|
|||
|
|
@ -56,11 +56,14 @@ LARGE_INTEGER perf_freq = OrtGetPerformanceFrequency();
|
|||
namespace onnxruntime {
|
||||
|
||||
static void CalculateTotalOutputSizes(OpKernelContextInternal* op_kernel_context,
|
||||
size_t& total_output_sizes, const std::string& node_name) {
|
||||
size_t& total_output_sizes, const std::string& node_name, std::string& output_type_shape) {
|
||||
// Calculate total output sizes for this operation.
|
||||
std::stringstream ss;
|
||||
ss << "[";
|
||||
total_output_sizes = 0;
|
||||
ORT_UNUSED_PARAMETER(node_name);
|
||||
for (auto i = 0; i < op_kernel_context->OutputCount(); i++) {
|
||||
int output_count = op_kernel_context->OutputCount();
|
||||
for (auto i = 0; i < output_count; i++) {
|
||||
const OrtValue* p_output = op_kernel_context->GetOutputMLValue(i);
|
||||
if (p_output != nullptr && p_output->IsTensor()) {
|
||||
const auto& tensor = p_output->Get<Tensor>();
|
||||
|
|
@ -74,15 +77,22 @@ static void CalculateTotalOutputSizes(OpKernelContextInternal* op_kernel_context
|
|||
<< "\n";
|
||||
#endif
|
||||
total_output_sizes += tensor_size;
|
||||
auto shape_str = tensor.Shape().ToString();
|
||||
ss << "{\"" << DataTypeImpl::ToString(tensor.DataType()) << "\":["
|
||||
<< shape_str.substr(1, shape_str.size() - 2) << "]" << (i == output_count - 1 ? "}" : "},");
|
||||
}
|
||||
}
|
||||
ss << "]";
|
||||
output_type_shape = ss.str();
|
||||
}
|
||||
|
||||
static void CalculateTotalInputSizes(const OpKernelContextInternal* op_kernel_context,
|
||||
const onnxruntime::OpKernel* p_op_kernel,
|
||||
size_t& input_activation_sizes, size_t& input_parameter_sizes,
|
||||
const std::string& node_name) {
|
||||
const std::string& node_name, std::string& input_type_shape) {
|
||||
// Calculate total input sizes for this operation.
|
||||
std::stringstream ss;
|
||||
ss << "[";
|
||||
input_activation_sizes = 0;
|
||||
input_parameter_sizes = 0;
|
||||
ORT_UNUSED_PARAMETER(node_name);
|
||||
|
|
@ -113,8 +123,13 @@ static void CalculateTotalInputSizes(const OpKernelContextInternal* op_kernel_co
|
|||
} else {
|
||||
input_activation_sizes += tensor_size;
|
||||
}
|
||||
auto shape_str = p_tensor->Shape().ToString();
|
||||
ss << "{\"" << DataTypeImpl::ToString(p_tensor->DataType()) << "\":["
|
||||
<< shape_str.substr(1, shape_str.size() - 2) << "]" << (i == input_count - 1 ? "}" : "},");
|
||||
}
|
||||
}
|
||||
ss << "]";
|
||||
input_type_shape = ss.str();
|
||||
}
|
||||
|
||||
static Status ReleaseNodeMLValues(ExecutionFrame& frame,
|
||||
|
|
@ -134,6 +149,8 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
|
|||
size_t input_activation_sizes = 0;
|
||||
size_t input_parameter_sizes = 0;
|
||||
size_t total_output_sizes = 0;
|
||||
std::string input_type_shape{};
|
||||
std::string output_type_shape{};
|
||||
|
||||
if (is_profiler_enabled) {
|
||||
tp = session_state.Profiler().Start();
|
||||
|
|
@ -300,7 +317,8 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
|
|||
|
||||
// Calculate total input sizes for this operation.
|
||||
CalculateTotalInputSizes(&op_kernel_context, p_op_kernel,
|
||||
input_activation_sizes, input_parameter_sizes, node_name_for_profiling);
|
||||
input_activation_sizes, input_parameter_sizes,
|
||||
node_name_for_profiling, input_type_shape);
|
||||
}
|
||||
|
||||
Status compute_status;
|
||||
|
|
@ -349,7 +367,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
|
|||
|
||||
if (is_profiler_enabled) {
|
||||
// Calculate total output sizes for this operation.
|
||||
CalculateTotalOutputSizes(&op_kernel_context, total_output_sizes, node_name_for_profiling);
|
||||
CalculateTotalOutputSizes(&op_kernel_context, total_output_sizes, node_name_for_profiling, output_type_shape);
|
||||
|
||||
#if defined(TRACE_EXECUTION)
|
||||
// Trace execution step.
|
||||
|
|
@ -376,6 +394,8 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
|
|||
{"activation_size", std::to_string(input_activation_sizes)},
|
||||
{"parameter_size", std::to_string(input_parameter_sizes)},
|
||||
{"output_size", std::to_string(total_output_sizes)},
|
||||
{"input_type_shape", input_type_shape},
|
||||
{"output_type_shape", output_type_shape},
|
||||
{"thread_scheduling_stats", concurrency::ThreadPool::StopProfiling(session_state.GetThreadPool())},
|
||||
});
|
||||
sync_time_begin = session_state.Profiler().Start();
|
||||
|
|
|
|||
|
|
@ -75,7 +75,13 @@
|
|||
summarized_cpu[node.args.op_name] = {all:0,children:[]};
|
||||
}
|
||||
summarized_cpu[node.args.op_name].all += node.dur;
|
||||
summarized_cpu[node.args.op_name].children.push({name: node.name, value:node.dur})
|
||||
var metadata = JSON.parse('{}');
|
||||
metadata.name = node.name
|
||||
metadata.input = node.args.input_type_shape
|
||||
metadata.output = node.args.output_type_shape
|
||||
metadata.duration_in_microseconds = node.dur
|
||||
var metadata_str = JSON.stringify(metadata).replaceAll('","','",\n"').replaceAll('},{','},\n{').replaceAll('],"','],\n"')
|
||||
summarized_cpu[node.args.op_name].children.push({name: metadata_str, value: node.dur})
|
||||
} else if (category == "Kernel") {
|
||||
var op_name = node.args.op_name == "" ? "Session" : node.args.op_name;
|
||||
if (summarized_gpu[op_name] == null) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue