mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Torchbench Dynamo Runner: Enable DDP for perf test and traces (#113332)
- Removes an outdated assert that prevents perf tests from running DDP, we now have single node --multiprocess and perf tests are already wrapping the model using `deepcopy_and_maybe_ddp` - Append rank name to traces to avoid all ranks trying to create the same file - Renames `deepcopy_and_maybe_ddp` to `deepcopy_and_maybe_parallelize` to include FSDP Pull Request resolved: https://github.com/pytorch/pytorch/pull/113332 Approved by: https://github.com/H-Huang, https://github.com/wconstab
This commit is contained in:
parent
c329eddcb9
commit
4b25948ee6
1 changed files with 10 additions and 13 deletions
|
|
@ -683,7 +683,10 @@ def speedup_experiment(args, model_iter_fn, model, example_inputs, **kwargs):
|
|||
)
|
||||
|
||||
if args.export_profiler_trace:
|
||||
name = args.profiler_trace_name + "_" + model.name + ".json"
|
||||
name = args.profiler_trace_name + "_" + model.name
|
||||
if hasattr(args, "rank"):
|
||||
name += f"_rank_{args.rank}"
|
||||
name += ".json"
|
||||
name = os.path.join(torch._dynamo.config.base_dir, name)
|
||||
p.export_chrome_trace(name)
|
||||
median = np.median(timings, axis=0)
|
||||
|
|
@ -2233,7 +2236,7 @@ class BenchmarkRunner:
|
|||
|
||||
return ModuleWrapPolicy(MODEL_FSDP_WRAP[model_name])
|
||||
|
||||
def deepcopy_and_maybe_ddp(self, model):
|
||||
def deepcopy_and_maybe_parallelize(self, model):
|
||||
model = self.deepcopy_model(model)
|
||||
if self.args.ddp:
|
||||
assert (
|
||||
|
|
@ -2329,7 +2332,7 @@ class BenchmarkRunner:
|
|||
inputs_fp64 = None
|
||||
try:
|
||||
model_fp64, inputs_fp64 = cast_to_fp64(
|
||||
self.deepcopy_and_maybe_ddp(model),
|
||||
self.deepcopy_and_maybe_parallelize(model),
|
||||
clone_inputs(example_inputs),
|
||||
)
|
||||
self.init_optimizer(name, current_device, model_fp64.parameters())
|
||||
|
|
@ -2363,7 +2366,7 @@ class BenchmarkRunner:
|
|||
reset_rng_state()
|
||||
model_copy = None
|
||||
try:
|
||||
model_copy = self.deepcopy_and_maybe_ddp(model)
|
||||
model_copy = self.deepcopy_and_maybe_parallelize(model)
|
||||
self.init_optimizer(name, current_device, model_copy.parameters())
|
||||
correct_result = self.run_n_iterations(
|
||||
model_copy, clone_inputs(example_inputs)
|
||||
|
|
@ -2384,7 +2387,7 @@ class BenchmarkRunner:
|
|||
reset_rng_state()
|
||||
model_copy = None
|
||||
try:
|
||||
model_copy = self.deepcopy_and_maybe_ddp(model)
|
||||
model_copy = self.deepcopy_and_maybe_parallelize(model)
|
||||
self.init_optimizer(name, current_device, model_copy.parameters())
|
||||
correct_rerun_result = self.run_n_iterations(
|
||||
model_copy, clone_inputs(example_inputs)
|
||||
|
|
@ -2431,7 +2434,7 @@ class BenchmarkRunner:
|
|||
torch._dynamo.reset()
|
||||
model_copy = None
|
||||
try:
|
||||
model_copy = self.deepcopy_and_maybe_ddp(model)
|
||||
model_copy = self.deepcopy_and_maybe_parallelize(model)
|
||||
self.init_optimizer(name, current_device, model_copy.parameters())
|
||||
if self.args.export or self.args.export_aot_inductor:
|
||||
# apply export on module directly
|
||||
|
|
@ -2615,7 +2618,7 @@ class BenchmarkRunner:
|
|||
model, example_inputs = self.maybe_cast(model, example_inputs)
|
||||
|
||||
# Use distributed wrapping as necessary
|
||||
model = self.deepcopy_and_maybe_ddp(model)
|
||||
model = self.deepcopy_and_maybe_parallelize(model)
|
||||
|
||||
self.init_optimizer(name, current_device, model.parameters())
|
||||
with self.pick_grad(name, self.args.training):
|
||||
|
|
@ -3409,12 +3412,6 @@ def run(runner, args, original_dir=None):
|
|||
CI, args.backend, training=args.training, dynamic=args.dynamic_shapes
|
||||
)
|
||||
if args.ddp:
|
||||
# TODO: we could also hook DDP bench up to --speedup bench, _not_ for mgpu e2e perf,
|
||||
# but just to measure impact on singlenode of performing graph-breaks.
|
||||
# Left it as a follow up to keep this PR isolated.
|
||||
assert (
|
||||
args.accuracy
|
||||
), "DDP benchmark is currently only hooked up to --accuracy bench"
|
||||
assert args.training, "DDP benchmark requires --training mode"
|
||||
if args.no_optimize_ddp:
|
||||
torch._dynamo.config.optimize_ddp = False
|
||||
|
|
|
|||
Loading…
Reference in a new issue