[GGUF] Refactor and decouple gguf checkpoint loading logic (#34385)

* draft load_gguf refactor

* update

Signed-off-by: Isotr0py <2037008807@qq.com>

* remove llama mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* remove qwen2 mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* remove unused function

Signed-off-by: Isotr0py <2037008807@qq.com>

* deprecate stablelm mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* deprecate phi3 mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* deprecate t5 mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* deprecate bloom mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* fix bloom

Signed-off-by: Isotr0py <2037008807@qq.com>

* deprecate starcoder2 mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* deprecate gpt2 mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* deprecate mistral mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* deprecate nemotron mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* deprecate mamba mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* deprecate mamba mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* code format

Signed-off-by: Isotr0py <2037008807@qq.com>

* code format

Signed-off-by: Isotr0py <2037008807@qq.com>

* fix mamba

Signed-off-by: Isotr0py <2037008807@qq.com>

* fix qwen2moe

Signed-off-by: Isotr0py <2037008807@qq.com>

* remove qwen2moe mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* clean up

Signed-off-by: Isotr0py <2037008807@qq.com>

* remove falcon 7b map

Signed-off-by: Isotr0py <2037008807@qq.com>

* remove all ggml tensors mapping

Signed-off-by: Isotr0py <2037008807@qq.com>

* add comments

Signed-off-by: Isotr0py <2037008807@qq.com>

* update messages

Signed-off-by: Isotr0py <2037008807@qq.com>

* fix tensors in parsed parameters

Signed-off-by: Isotr0py <2037008807@qq.com>

* add gguf check

Signed-off-by: Isotr0py <2037008807@qq.com>

---------

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py 2025-01-07 01:02:38 +08:00 committed by GitHub
parent 86fa3cedad
commit 3951da1a6b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 92 additions and 293 deletions

View file

@ -57,7 +57,6 @@ _import_structure = {
"fsdp": ["is_fsdp_managed_module"],
"ggml": [
"GGUF_CONFIG_MAPPING",
"GGUF_TENSOR_MAPPING",
"GGUF_TOKENIZER_MAPPING",
"_gguf_parse_value",
"load_dequant_gguf_tensor",
@ -161,7 +160,6 @@ if TYPE_CHECKING:
from .fsdp import is_fsdp_managed_module
from .ggml import (
GGUF_CONFIG_MAPPING,
GGUF_TENSOR_MAPPING,
GGUF_TOKENIZER_MAPPING,
_gguf_parse_value,
load_dequant_gguf_tensor,

View file

@ -33,254 +33,6 @@ from ..utils.logging import tqdm
logger = logging.get_logger(__name__)
GGUF_TENSOR_MAPPING = {
"llama": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"mistral": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"qwen2": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"qwen2moe": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up_exps": "mlp.experts",
"ffn_up_shexp": "mlp.shared_expert.up_proj",
"ffn_down_exps": "mlp.experts",
"ffn_down_shexp": "mlp.shared_expert.down_proj",
"ffn_norm": "post_attention_layernorm",
"ffn_gate_inp.weight": "mlp.gate.weight",
"ffn_gate_exps": "mlp.experts",
"ffn_gate_shexp": "mlp.shared_expert.gate_proj",
"ffn_gate_inp_shexp": "mlp.shared_expert_gate",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"phi3": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.gate_up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_up_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_qkv": "self_attn.qkv_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"bloom": {
"token_embd.weight": "transformer.word_embeddings.weight",
"token_embd_norm": "transformer.word_embeddings_layernorm",
"blk": "transformer.h",
"ffn_up": "mlp.dense_h_to_4h",
"ffn_down": "mlp.dense_4h_to_h",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_qkv": "self_attention.query_key_value",
"attn_output": "self_attention.dense",
"output.weight": "lm_head.weight",
"output_norm": "transformer.ln_f",
},
"falcon7b": {
"token_embd": "word_embeddings",
"blk": "h",
"ffn_up": "mlp.dense_h_to_4h",
"ffn_down": "mlp.dense_4h_to_h",
"attn_norm": "input_layernorm",
"attn_qkv": "self_attention.query_key_value",
"attn_output": "self_attention.dense",
".output.": ".lm_head.",
"output_norm": "ln_f",
},
"falcon40b": {
"token_embd": "word_embeddings",
"blk": "h",
"ffn_up": "mlp.dense_h_to_4h",
"ffn_down": "mlp.dense_4h_to_h",
".attn_norm.": ".ln_mlp.",
"attn_norm_2": "ln_attn",
"attn_qkv": "self_attention.query_key_value",
"attn_output": "self_attention.dense",
".output.": ".lm_head.",
"output_norm": "ln_f",
},
"t5": {
"token_embd": "shared",
"dec.blk.{bid}.attn_q": "decoder.block.{bid}.layer.0.SelfAttention.q",
"dec.blk.{bid}.attn_k": "decoder.block.{bid}.layer.0.SelfAttention.k",
"dec.blk.{bid}.attn_v": "decoder.block.{bid}.layer.0.SelfAttention.v",
"dec.blk.{bid}.attn_o": "decoder.block.{bid}.layer.0.SelfAttention.o",
"dec.blk.{bid}.attn_rel_b": "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",
"dec.blk.{bid}.attn_norm": "decoder.block.{bid}.layer.0.layer_norm",
"dec.blk.{bid}.cross_attn_q": "decoder.block.{bid}.layer.1.EncDecAttention.q",
"dec.blk.{bid}.cross_attn_k": "decoder.block.{bid}.layer.1.EncDecAttention.k",
"dec.blk.{bid}.cross_attn_v": "decoder.block.{bid}.layer.1.EncDecAttention.v",
"dec.blk.{bid}.cross_attn_o": "decoder.block.{bid}.layer.1.EncDecAttention.o",
"dec.blk.{bid}.cross_attn_norm": "decoder.block.{bid}.layer.1.layer_norm",
"dec.blk.{bid}.ffn_gate": "decoder.block.{bid}.layer.2.DenseReluDense.wi_0",
"dec.blk.{bid}.ffn_up": "decoder.block.{bid}.layer.2.DenseReluDense.wi_1",
"dec.blk.{bid}.ffn_down": "decoder.block.{bid}.layer.2.DenseReluDense.wo",
"dec.blk.{bid}.ffn_norm": "decoder.block.{bid}.layer.2.layer_norm",
"dec.output_norm": "decoder.final_layer_norm",
"enc.blk.{bid}.attn_q": "encoder.block.{bid}.layer.0.SelfAttention.q",
"enc.blk.{bid}.attn_k": "encoder.block.{bid}.layer.0.SelfAttention.k",
"enc.blk.{bid}.attn_v": "encoder.block.{bid}.layer.0.SelfAttention.v",
"enc.blk.{bid}.attn_o": "encoder.block.{bid}.layer.0.SelfAttention.o",
"enc.blk.{bid}.attn_rel_b": "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",
"enc.blk.{bid}.attn_norm": "encoder.block.{bid}.layer.0.layer_norm",
"enc.blk.{bid}.ffn_gate": "encoder.block.{bid}.layer.1.DenseReluDense.wi_0",
"enc.blk.{bid}.ffn_up": "encoder.block.{bid}.layer.1.DenseReluDense.wi_1",
"enc.blk.{bid}.ffn_down": "encoder.block.{bid}.layer.1.DenseReluDense.wo",
"enc.blk.{bid}.ffn_norm": "encoder.block.{bid}.layer.1.layer_norm",
"enc.output_norm": "encoder.final_layer_norm",
"output.weight": "lm_head.weight",
},
"t5encoder": {
"token_embd": "shared",
"enc.blk.{bid}.attn_q": "encoder.block.{bid}.layer.0.SelfAttention.q",
"enc.blk.{bid}.attn_k": "encoder.block.{bid}.layer.0.SelfAttention.k",
"enc.blk.{bid}.attn_v": "encoder.block.{bid}.layer.0.SelfAttention.v",
"enc.blk.{bid}.attn_o": "encoder.block.{bid}.layer.0.SelfAttention.o",
"enc.blk.{bid}.attn_rel_b": "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias",
"enc.blk.{bid}.attn_norm": "encoder.block.{bid}.layer.0.layer_norm",
"enc.blk.{bid}.ffn_gate": "encoder.block.{bid}.layer.1.DenseReluDense.wi_0",
"enc.blk.{bid}.ffn_up": "encoder.block.{bid}.layer.1.DenseReluDense.wi_1",
"enc.blk.{bid}.ffn_down": "encoder.block.{bid}.layer.1.DenseReluDense.wo",
"enc.blk.{bid}.ffn_norm": "encoder.block.{bid}.layer.1.layer_norm",
"enc.output_norm": "encoder.final_layer_norm",
},
"stablelm": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"gpt2": {
"token_embd": "transformer.wte",
"blk": "transformer.h",
"position_embd": "transformer.wpe",
"output_norm": "transformer.ln_f",
"attn_norm": "ln_1",
"attn_qkv": "attn.c_attn",
"attn_output.weight": "attn.c_proj.weight",
"attn_output.bias": "attn.c_proj.bias",
"ffn_norm": "ln_2",
"ffn_up": "mlp.c_fc",
"ffn_down": "mlp.c_proj",
},
"starcoder2": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.c_fc",
"ffn_down": "mlp.c_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"mamba": {
"token_embd": "backbone.embeddings",
"blk": "backbone.layers",
"ssm_a": "mixer.A_log",
"ssm_conv1d": "mixer.conv1d",
"ssm_in": "mixer.in_proj",
"ssm_out": "mixer.out_proj",
"ssm_x": "mixer.x_proj",
"ssm_dt": "mixer.dt_proj",
"attn_norm": "norm",
"output_norm": "backbone.norm_f",
"output.weight": "lm_head.weight",
},
"nemotron": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_norm": "post_attention_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output.weight": "lm_head.weight",
"output_norm": "model.norm",
},
"gemma2": {
"token_embd": "model.embed_tokens",
"blk": "model.layers",
"ffn_up": "mlp.up_proj",
"ffn_down": "mlp.down_proj",
"ffn_gate": "mlp.gate_proj",
"ffn_norm": "pre_feedforward_layernorm",
"post_attention_norm": "post_attention_layernorm",
"post_ffw_norm": "post_feedforward_layernorm",
"attn_norm": "input_layernorm",
"attn_q": "self_attn.q_proj",
"attn_v": "self_attn.v_proj",
"attn_k": "self_attn.k_proj",
"attn_output": "self_attn.o_proj",
"output_norm": "model.norm",
},
}
GGUF_CONFIG_MAPPING = {
"general": {
"architecture": "model_type",

View file

@ -22,7 +22,6 @@ from tqdm import tqdm
from .integrations import (
GGUF_CONFIG_MAPPING,
GGUF_TENSOR_MAPPING,
GGUF_TOKENIZER_MAPPING,
_gguf_parse_value,
)
@ -47,12 +46,11 @@ GGUF_TO_TRANSFORMERS_MAPPING = {
"general": {"file_type": "file_type", "quantization_version": "quantization_version"},
},
"config": GGUF_CONFIG_MAPPING,
"tensors": GGUF_TENSOR_MAPPING,
"tokenizer": {"tokenizer": GGUF_TOKENIZER_MAPPING["tokenizer"]},
"tokenizer_config": {"tokenizer": GGUF_TOKENIZER_MAPPING["tokenizer_config"]},
}
GGUF_SUPPORTED_ARCHITECTURES = list(GGUF_TO_TRANSFORMERS_MAPPING["tensors"].keys())
GGUF_SUPPORTED_ARCHITECTURES = list(GGUF_TO_TRANSFORMERS_MAPPING["config"].keys())
class GGUFTensor(NamedTuple):
@ -121,21 +119,10 @@ class Qwen2MoeTensorProcessor(TensorProcessor):
):
# Original merge implementation
# https://github.com/ggerganov/llama.cpp/blob/master/convert_hf_to_gguf.py#L1994-L2022
exp_name = ""
if "ffn_gate_exps" in name:
exp_name = "gate_proj"
elif "ffn_down_exps" in name:
exp_name = "down_proj"
elif "ffn_up_exps" in name:
exp_name = "up_proj"
else:
raise ValueError(f"Cannot map expert tensor {name} in Qwen2Moe architecture.")
for tensor_name in tensor_key_mapping:
if tensor_name in name:
name = name.replace(tensor_name, tensor_key_mapping[tensor_name])
name = tensor_key_mapping[name]
w_counter = self.config.get("num_experts", 60)
for i in range(0, w_counter):
temp_name = name.replace(".weight", f".{i}.{exp_name}.weight")
temp_name = name.replace("mlp.experts.", f"mlp.experts.{i}.")
exp_weight = weights[i]
parsed_parameters["tensors"][temp_name] = torch.from_numpy(np.copy(exp_weight))
@ -223,10 +210,6 @@ class MambaTensorProcessor(TensorProcessor):
super().__init__(config=config)
def process(self, weights, name, **kwargs):
if "ssm_d" in name and "bias" not in name and "weight" not in name:
# ssm_d has conflicts with ssm_dt in name checking
# we have to explicitly check that name is exactly ssm_d
name = name.replace("ssm_d", "mixer.D")
if "ssm_conv1d.weight" in name:
# for compatibility tensor ssm_conv1d must be (5120, 1, 4]) dim,
# quantized one is (5120, 4)
@ -267,7 +250,84 @@ def read_field(reader, field):
return [_gguf_parse_value(value.parts[_data_index], value.types) for _data_index in value.data]
def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False):
# modified from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/model_executor/model_loader/loader.py#L1115-L1147
def get_gguf_hf_weights_map(
hf_model,
model_type: Optional[str] = None,
num_layers: Optional[int] = None,
qual_name: str = "",
):
"""
GGUF uses this naming convention for their tensors from HF checkpoint:
`blk.N.BB.weight` and `blk.N.BB.bias`
where N signifies the block number of a layer, and BB signifies the
attention/mlp layer components.
See "Standardized tensor names" in
https://github.com/ggerganov/ggml/blob/master/docs/gguf.md for details.
"""
if is_gguf_available() and is_torch_available():
from gguf import MODEL_ARCH_NAMES, get_tensor_name_map
else:
logger.error(
"Loading a GGUF checkpoint in PyTorch, requires both PyTorch and GGUF>=0.10.0 to be installed. Please see "
"https://pytorch.org/ and https://github.com/ggerganov/llama.cpp/tree/master/gguf-py for installation instructions."
)
raise ImportError("Please install torch and gguf>=0.10.0 to load a GGUF checkpoint in PyTorch.")
model_type = hf_model.config.model_type if model_type is None else model_type
num_layers = hf_model.config.num_hidden_layers if num_layers is None else num_layers
# hack: ggufs have a different name for cohere
if model_type == "cohere":
model_type = "command-r"
if model_type == "qwen2_moe":
model_type = "qwen2moe"
arch = None
for key, value in MODEL_ARCH_NAMES.items():
if value == model_type:
arch = key
break
if arch is None:
raise NotImplementedError(
f"Unknown gguf model_type: {model_type} in gguf-py. "
"This might because you're using an outdated version of gguf-py package, "
"you can install `gguf` package from source refer to "
"https://github.com/ggerganov/llama.cpp/tree/master/gguf-py#development"
)
name_map = get_tensor_name_map(arch, num_layers)
# Use a dummy conversion to get the mapping, because
# hf => gguf and gguf => hf mappings are reversed
gguf_to_hf_name_map = {}
state_dict = hf_model.state_dict()
for hf_name in state_dict.keys():
# An exception for qwen2moe model, where the expert layers are packed
if model_type == "qwen2moe" and "mlp.experts." in hf_name:
hf_name = re.sub(r"mlp.experts.\d+.", "mlp.experts.", hf_name)
name, suffix = hf_name, ""
if hf_name.endswith(".weight") or hf_name.endswith(".bias"):
name, suffix = hf_name.rsplit(".", 1)
suffix = "." + suffix
gguf_name = name_map.get_name(name)
if gguf_name is None:
continue
gguf_to_hf_name_map[gguf_name + suffix] = qual_name + hf_name
# Some model like Bloom converted from BloomModel instead of BloomForCausalLM
# Therefore, we need to check submodule as well to get a correct mapping
if named_children := hf_model.named_children():
for name, child in named_children:
sub_map = get_gguf_hf_weights_map(child, model_type, num_layers, qual_name=f"{qual_name}{name}.")
# Ignore the keys that are already in the main map to avoid overwriting
sub_map = {k: v for k, v in sub_map.items() if k not in gguf_to_hf_name_map}
gguf_to_hf_name_map.update(sub_map)
return gguf_to_hf_name_map
def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_load=None):
"""
Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed
tokenizer and config attributes.
@ -323,20 +383,8 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False):
parsed_parameters["config"]["use_qkv_bias"] = qkv_bias
parsed_parameters["config"]["use_parallel_residual"] = not use_parallel_residual
model_size = ""
# extract the number of params from file name as architectures can differ ;
# eg. for falcon : `...falcon-7b-...`
if "falcon" in architecture:
gguf_file_name = gguf_checkpoint_path.split("/")[-1].lower()
m = re.search(r"-\d+b-", gguf_file_name) # regex to catch `-7b-`
if m is None:
raise ValueError(
f"From file name, cannot determine the number of parameters for {architecture} architecture"
)
model_size = m.group().strip("-") # only keeps `7b`
if architecture + model_size not in GGUF_SUPPORTED_ARCHITECTURES:
raise ValueError(f"Architecture {architecture + model_size} not supported")
if architecture not in GGUF_SUPPORTED_ARCHITECTURES:
raise ValueError(f"GGUF model with architecture {architecture} is not supported yet.")
# Handle tie_word_embeddings, if lm_head.weight is not present in tensors,
# tie_word_embeddings is true otherwise false
@ -388,7 +436,9 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False):
)
if return_tensors:
tensor_key_mapping = GGUF_TO_TRANSFORMERS_MAPPING["tensors"][architecture + model_size]
parsed_parameters["tensors"] = {}
tensor_key_mapping = get_gguf_hf_weights_map(model_to_load)
config = parsed_parameters.get("config", {})
ProcessorClass = TENSOR_PROCESSORS.get(architecture, TensorProcessor)
@ -407,16 +457,12 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False):
weights = result.weights
name = result.name
bid = result.metadata.get("bid")
if name is None:
if name not in tensor_key_mapping:
continue
for tensor_name in tensor_key_mapping:
if tensor_name.format(bid=bid) in name:
name = name.replace(tensor_name.format(bid=bid), tensor_key_mapping[tensor_name].format(bid=bid))
name = tensor_key_mapping[name]
# Use copy to avoid errors with numpy and pytorch
parsed_parameters["tensors"][name] = torch.from_numpy(np.copy(weights))
if len(reader_keys) > 0:

View file

@ -3917,7 +3917,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
gguf_path = cached_file(pretrained_model_name_or_path, gguf_file, **cached_file_kwargs)
state_dict = load_gguf_checkpoint(gguf_path, return_tensors=True)["tensors"]
# we need a dummy model to help rename state_dict
with torch.device("meta"):
dummy_model = cls(config)
state_dict = load_gguf_checkpoint(gguf_path, return_tensors=True, model_to_load=dummy_model)["tensors"]
resolved_archive_file = None
is_sharded = False