mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
77 lines
2.4 KiB
Python
77 lines
2.4 KiB
Python
import argparse
|
|
import csv
|
|
import os # noqa: F401
|
|
|
|
import numpy as np # noqa: F401
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--input", type=str)
|
|
args = parser.parse_args()
|
|
|
|
|
|
def get_gpu_lines(path):
|
|
lines = []
|
|
with open(path, newline="") as f:
|
|
reader = csv.reader(f, delimiter=",")
|
|
for row in reader:
|
|
if row[2].find("TotalDurationNs") < 0:
|
|
lines.append(row)
|
|
return lines
|
|
|
|
|
|
activities = [
|
|
("nccl", lambda x: x.find("nccl") >= 0),
|
|
("gemm", lambda x: x.find("Cijk_") >= 0),
|
|
("memcpy", lambda x: x.find("CUDA mem") >= 0),
|
|
("adam", lambda x: x.lower().find("adam") >= 0),
|
|
("lamb", lambda x: x.lower().find("lamb") >= 0 or x.lower().find("multi_tensor_apply") >= 0),
|
|
("dropout", lambda x: x.lower().find("dropout") >= 0 or x.find("curand") >= 0),
|
|
("layernorm", lambda x: x.find("LayerNorm") >= 0 or x.find("cuCompute") >= 0),
|
|
("reduce", lambda x: x.find("reduce") >= 0),
|
|
("softmax", lambda x: x.lower().find("softmax") >= 0),
|
|
("transpose", lambda x: x.lower().find("transpose") >= 0),
|
|
("element-wise", lambda x: x.lower().find("elementwise") >= 0 or x.find("DivGrad") >= 0),
|
|
("jit", lambda x: x.startswith("kernel_")),
|
|
("misc", lambda x: True),
|
|
]
|
|
|
|
|
|
def group_gpu_activity(lines):
|
|
groups = {name: [] for name, _ in activities}
|
|
for line in lines:
|
|
for name, check in activities:
|
|
if check(line[0]):
|
|
groups[name].append(line)
|
|
break
|
|
return groups
|
|
|
|
|
|
def get_seconds(time):
|
|
return float(time.replace("us", "")) / (1000.0 * 1000.0 * 1000.0)
|
|
|
|
|
|
def gpu_percent_time(activities):
|
|
return sum([float(a[4].replace("%", "")) for a in activities])
|
|
|
|
|
|
def gpu_absolute_time(activities):
|
|
return sum([get_seconds(a[2]) for a in activities])
|
|
|
|
|
|
def gpu_kernel_calls(activities):
|
|
return sum([int(a[1]) for a in activities])
|
|
|
|
|
|
lines = get_gpu_lines(args.input)
|
|
groups = group_gpu_activity(lines)
|
|
|
|
for name in groups:
|
|
activities = groups[name]
|
|
print(
|
|
f"{name}: N={len(activities)}, calls={gpu_kernel_calls(activities)}, absolute={gpu_absolute_time(activities):.3f}s, percent={gpu_percent_time(activities):.2f}%"
|
|
)
|
|
|
|
total = [item for name in groups for item in groups[name]]
|
|
print(
|
|
f"Total: N={len(total)}, calls={gpu_kernel_calls(total)}, absolute={gpu_absolute_time(total):.3f}s, percent={gpu_percent_time(total):.2f}%"
|
|
)
|