mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Enable FSDP to deal with channels_last memory formatted tensors. Preserving channels_last memory format makes FSDP compatible with the best kernels CUDNN offers. Summary of changes: 1) Store strides information along with shapes 2) Replace calls to flatten() with as_strided(size=(param.numel(),), stride=(1,)) for flattening 3) Replace calls to view() with as_strided with the stored sizes and strides for unflattening Pull Request resolved: https://github.com/pytorch/pytorch/pull/137382 Approved by: https://github.com/awgu |
||
|---|---|---|
| .. | ||
| test_checkpoint_wrapper.py | ||
| test_distributed_checkpoint.py | ||
| test_fsdp_apply.py | ||
| test_fsdp_backward_prefetch.py | ||
| test_fsdp_checkpoint.py | ||
| test_fsdp_clip_grad_norm.py | ||
| test_fsdp_comm.py | ||
| test_fsdp_comm_hooks.py | ||
| test_fsdp_core.py | ||
| test_fsdp_dtensor_state_dict.py | ||
| test_fsdp_exec_order.py | ||
| test_fsdp_fine_tune.py | ||
| test_fsdp_flatten_params.py | ||
| test_fsdp_freezing_weights.py | ||
| test_fsdp_fx.py | ||
| test_fsdp_grad_acc.py | ||
| test_fsdp_hybrid_shard.py | ||
| test_fsdp_ignored_modules.py | ||
| test_fsdp_input.py | ||
| test_fsdp_memory.py | ||
| test_fsdp_meta.py | ||
| test_fsdp_misc.py | ||
| test_fsdp_mixed_precision.py | ||
| test_fsdp_multiple_forward.py | ||
| test_fsdp_multiple_wrapping.py | ||
| test_fsdp_optim_state.py | ||
| test_fsdp_overlap.py | ||
| test_fsdp_pure_fp16.py | ||
| test_fsdp_sharded_grad_scaler.py | ||
| test_fsdp_state_dict.py | ||
| test_fsdp_tp_integration.py | ||
| test_fsdp_traversal.py | ||
| test_fsdp_uneven.py | ||
| test_fsdp_unshard_params.py | ||
| test_fsdp_use_orig_params.py | ||
| test_hsdp_dtensor_state_dict.py | ||
| test_shard_utils.py | ||
| test_utils.py | ||
| test_wrap.py | ||