From 87089176d9a205d69e7d4e152fc25950d77580d2 Mon Sep 17 00:00:00 2001 From: Arthur <48595927+ArthurZucker@users.noreply.github.com> Date: Mon, 13 Jan 2025 13:15:07 +0100 Subject: [PATCH] [`Phi`] bias should be True (#35650) bias should be True --- src/transformers/models/phi/modeling_phi.py | 2 +- src/transformers/models/phi/modular_phi.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/phi/modeling_phi.py b/src/transformers/models/phi/modeling_phi.py index 08d9eddd9..c81fbbd01 100644 --- a/src/transformers/models/phi/modeling_phi.py +++ b/src/transformers/models/phi/modeling_phi.py @@ -727,7 +727,7 @@ class PhiForCausalLM(PhiPreTrainedModel, GenerationMixin): super().__init__(config) self.model = PhiModel(config) self.vocab_size = config.vocab_size - self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=True) # Initialize weights and apply final processing self.post_init() diff --git a/src/transformers/models/phi/modular_phi.py b/src/transformers/models/phi/modular_phi.py index 0faa4629f..d8480a7ad 100644 --- a/src/transformers/models/phi/modular_phi.py +++ b/src/transformers/models/phi/modular_phi.py @@ -284,7 +284,9 @@ class PhiModel(LlamaModel): class PhiForCausalLM(LlamaForCausalLM): - pass + def __init__(self, config): + super().__init__(config) + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=True) class PhiForSequenceClassification(LlamaForSequenceClassification):