From 27e161546621731677f6959c34419861f17b3a69 Mon Sep 17 00:00:00 2001 From: Cyril Vallez Date: Wed, 5 Feb 2025 22:12:55 +0100 Subject: [PATCH] style --- src/transformers/modeling_utils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 07df629ae..848d811db 100755 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -894,7 +894,6 @@ def _load_state_dict_into_meta_model( setattr(module, tensor_name, value) # TODO: consider removing used param_parts from state_dict before return - # In this case, let's parallelize the modules! if tp_key_registry is not None: plan = None @@ -906,7 +905,7 @@ def _load_state_dict_into_meta_model( plan = tp_key_registry[module_prefix]["plan"] prefix = module_prefix break - + if plan is not None: del tp_key_registry[prefix] parent_module = model @@ -4775,7 +4774,12 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix # correctly initialize the missing keys if it was skipped before if _fast_init: model = _initialize_missing_keys( - model, renamed_loaded_keys, ignore_mismatched_sizes, has_prefix_module, expects_prefix_module, is_quantized + model, + renamed_loaded_keys, + ignore_mismatched_sizes, + has_prefix_module, + expects_prefix_module, + is_quantized, ) # Set some modules to fp32 if needed @@ -4971,7 +4975,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix save_offload_index(disk_offload_index, disk_offload_folder) disk_offload_index = None - # 1-by-1 param loading for the cpu params + # one-at-a-time param loading for the cpu offloaded params if offload_state_dict: # Load back temporarily offloaded state dict load_offloaded_weights(model_to_load, cpu_offload_index, cpu_offload_folder)