mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-01 23:30:35 +00:00
Enable shape-sensitive analysis in ProfileExplorer for GPU kernels (#13647)
### Description Improve the profile explorer by enabling shape sensitivity for GPU kernels. ### Motivation and Context Due to problems with the ROCM profiler, it was previously challenging to retrieve the shapes corresponding to a GPU kernel event. [PR 13546](https://github.com/microsoft/onnxruntime/pull/13549) addresses these problems, so it's now possible to retrieve shapes from the ORT ROCM/CUDA profilers. This PR leverages [PR 13546](https://github.com/microsoft/onnxruntime/pull/13549) to enable shape-sensitive GPU kernel ranking. Co-authored-by: Abhishek Udupa <abhishek.udupa@microsoft.com>
This commit is contained in:
parent
4cd8b4269a
commit
9c6c219949
1 changed files with 31 additions and 4 deletions
|
|
@ -73,6 +73,10 @@ def _json_to_df(profile_path, filter_matcher):
|
|||
if isinstance(data, dict):
|
||||
data = data["traceEvents"]
|
||||
|
||||
most_recent_kernel_launch_event = None
|
||||
num_missing_kernel_launch_events = 0
|
||||
total_kernel_events = 0
|
||||
|
||||
for item in data:
|
||||
cat = item.get("cat")
|
||||
if cat is None:
|
||||
|
|
@ -92,6 +96,8 @@ def _json_to_df(profile_path, filter_matcher):
|
|||
|
||||
if cat != "Kernel" and not name.endswith("kernel_time"):
|
||||
continue
|
||||
elif name.endswith("kernel_time"):
|
||||
most_recent_kernel_launch_event = item
|
||||
|
||||
block_x = arg.get("block_x", -1)
|
||||
block_y = arg.get("block_y", -1)
|
||||
|
|
@ -107,18 +113,31 @@ def _json_to_df(profile_path, filter_matcher):
|
|||
"duration": dur,
|
||||
"dimensions": f"{block_x}_{block_y}_{block_z}_{grid_x}_{grid_y}_{grid_z}",
|
||||
"op_name": op_name,
|
||||
"input_type_shape": (
|
||||
_shape_to_string(most_recent_kernel_launch_event["args"]["input_type_shape"])
|
||||
if most_recent_kernel_launch_event is not None
|
||||
else "unknown"
|
||||
),
|
||||
}
|
||||
)
|
||||
total_kernel_events += 1
|
||||
if gpu_entries[-1]["input_type_shape"] == "unknown" and "hipMem" not in gpu_entries[-1]["name"]:
|
||||
num_missing_kernel_launch_events += 1
|
||||
else:
|
||||
cpu_entries.append(
|
||||
{
|
||||
"name": item["args"]["op_name"],
|
||||
"duration": dur,
|
||||
"input_shape": _shape_to_string(item["args"]["input_type_shape"]),
|
||||
"output_shape": _shape_to_string(item["args"]["output_type_shape"]),
|
||||
"input_type_shape": _shape_to_string(item["args"]["input_type_shape"]),
|
||||
"output_type_shape": _shape_to_string(item["args"]["output_type_shape"]),
|
||||
}
|
||||
)
|
||||
|
||||
if num_missing_kernel_launch_events > 0:
|
||||
print(
|
||||
f"WARNNG: Could not resolve shapes for {num_missing_kernel_launch_events} of {total_kernel_events} kernels."
|
||||
)
|
||||
|
||||
cpu_df = pd.DataFrame(cpu_entries)
|
||||
gpu_df = pd.DataFrame(gpu_entries)
|
||||
cpu_df["count"] = 1
|
||||
|
|
@ -131,7 +150,10 @@ def _print_cpu_top_hitters(frame, args):
|
|||
print("No CPU entries found!")
|
||||
return
|
||||
top = args.count
|
||||
group_key = ["name", "input_shape"] if args.shape_sensitive else ["name"]
|
||||
group_key = ["name"]
|
||||
if args.shape_sensitive:
|
||||
group_key.append("input_type_shape")
|
||||
|
||||
frame2 = frame[["duration", "count"]].sum()
|
||||
frame["pct"] = 100 * (frame["duration"] / frame2["duration"])
|
||||
fields = group_key + ["duration", "pct", "count"]
|
||||
|
|
@ -150,7 +172,12 @@ def _print_gpu_top_hitters(frame, args):
|
|||
print("No GPU entries found!")
|
||||
return
|
||||
top = args.count
|
||||
group_key = ["name", "dimensions"] if args.dimension_sensitive else ["name"]
|
||||
group_key = ["name"]
|
||||
if args.dimension_sensitive:
|
||||
group_key.append("dimensions")
|
||||
if args.shape_sensitive:
|
||||
group_key.append("input_type_shape")
|
||||
|
||||
frame2 = frame[["duration", "count"]].sum()
|
||||
frame["pct"] = 100 * (frame["duration"] / frame2["duration"])
|
||||
fields = group_key + ["duration", "pct", "count"]
|
||||
|
|
|
|||
Loading…
Reference in a new issue