From 7996ef74ddd913256eaf84ba740bc9da6f5e2ef5 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Wed, 27 Jul 2022 13:06:01 +0200 Subject: [PATCH] fix module order (#18312) - put gelu before 4h to h --- src/transformers/models/bloom/modeling_bloom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/bloom/modeling_bloom.py b/src/transformers/models/bloom/modeling_bloom.py index 357d959c1..7d95d7322 100644 --- a/src/transformers/models/bloom/modeling_bloom.py +++ b/src/transformers/models/bloom/modeling_bloom.py @@ -352,9 +352,9 @@ class BloomMLP(nn.Module): self.pretraining_tp = config.pretraining_tp self.slow_but_exact = config.slow_but_exact self.dense_h_to_4h = nn.Linear(hidden_size, 4 * hidden_size) + self.gelu_impl = BloomGelu() self.dense_4h_to_h = nn.Linear(4 * hidden_size, hidden_size) self.hidden_dropout = config.hidden_dropout - self.gelu_impl = BloomGelu() def forward(self, hidden_states, residual): hidden_states = self.gelu_impl(self.dense_h_to_4h(hidden_states))