mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
Fix LED documentation (#17181)
* Fix markdown code block * Use consistent spelling for self-attention * Fix typos and phrasing * Fix code style
This commit is contained in:
parent
edcc66d27c
commit
c76afa511c
3 changed files with 16 additions and 19 deletions
|
|
@ -86,18 +86,17 @@ class LEDConfig(PretrainedConfig):
|
|||
Example:
|
||||
|
||||
```python
|
||||
>>> from transformers import LEDModel, LEDConfig
|
||||
|
||||
```
|
||||
>>> # Initializing a LED allenai/led-base-16384 style configuration
|
||||
>>> configuration = LEDConfig()
|
||||
|
||||
>>> from transformers import LEDModel, LEDConfig
|
||||
>>> # Initializing a model from the allenai/led-base-16384 style configuration
|
||||
>>> model = LEDModel(configuration)
|
||||
|
||||
>>> # Initializing a LED allenai/led-base-16384 style configuration >>> configuration = LEDConfig()
|
||||
|
||||
>>> # Initializing a model from the allenai/led-base-16384 style configuration >>> model =
|
||||
LEDModel(configuration)
|
||||
|
||||
>>> # Accessing the model configuration >>> configuration = model.config
|
||||
"""
|
||||
>>> # Accessing the model configuration
|
||||
>>> configuration = model.config
|
||||
```"""
|
||||
model_type = "led"
|
||||
attribute_map = {
|
||||
"num_attention_heads": "encoder_attention_heads",
|
||||
|
|
|
|||
|
|
@ -1007,7 +1007,7 @@ class LEDDecoderLayer(nn.Module):
|
|||
"""
|
||||
residual = hidden_states
|
||||
|
||||
# Self Attention
|
||||
# Self-Attention
|
||||
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2
|
||||
self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
|
||||
# add present self-attn cache to positions 1,2 of present_key_value tuple
|
||||
|
|
@ -1437,13 +1437,11 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
|
|||
|
||||
|
||||
LED_START_DOCSTRING = r"""
|
||||
This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
|
||||
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
|
||||
etc.)
|
||||
This model inherits from [`PreTrainedModel`]. See the superclass documentation for the generic methods the library
|
||||
implements for all its models (such as downloading or saving, resizing the input embeddings, pruning heads etc.)
|
||||
|
||||
This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
|
||||
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
|
||||
and behavior.
|
||||
Use it as a regular PyTorch Module and refer to the PyTorch documentation for general usage and behavior.
|
||||
|
||||
Parameters:
|
||||
config ([`LEDConfig`]):
|
||||
|
|
@ -1595,7 +1593,7 @@ LED_INPUTS_DOCSTRING = r"""
|
|||
|
||||
class LEDEncoder(LEDPreTrainedModel):
|
||||
"""
|
||||
Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
|
||||
Transformer encoder consisting of *config.encoder_layers* self-attention layers. Each layer is a
|
||||
[`LEDEncoderLayer`].
|
||||
|
||||
Args:
|
||||
|
|
@ -1643,7 +1641,7 @@ class LEDEncoder(LEDPreTrainedModel):
|
|||
self.post_init()
|
||||
|
||||
def _merge_to_attention_mask(self, attention_mask: torch.Tensor, global_attention_mask: torch.Tensor):
|
||||
# longformer self attention expects attention mask to have 0 (no attn), 1 (local attn), 2 (global attn)
|
||||
# longformer self-attention expects attention mask to have 0 (no attn), 1 (local attn), 2 (global attn)
|
||||
# (global_attention_mask + 1) => 1 for local attention, 2 for global attention
|
||||
# => final attention_mask => 0 for no attention, 1 for local attention 2 for global attention
|
||||
if attention_mask is not None:
|
||||
|
|
|
|||
|
|
@ -1238,7 +1238,7 @@ class TFLEDDecoderLayer(tf.keras.layers.Layer):
|
|||
"""
|
||||
residual = hidden_states
|
||||
|
||||
# Self Attention
|
||||
# Self-Attention
|
||||
# decoder uni-directional self-attention cached key/values tuple is at positions 1,2
|
||||
self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
|
||||
# add present self-attn cache to positions 1,2 of present_key_value tuple
|
||||
|
|
@ -1612,7 +1612,7 @@ LED_INPUTS_DOCSTRING = r"""
|
|||
class TFLEDEncoder(tf.keras.layers.Layer):
|
||||
config_class = LEDConfig
|
||||
"""
|
||||
Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
|
||||
Transformer encoder consisting of *config.encoder_layers* self-attention layers. Each layer is a
|
||||
[`TFLEDEncoderLayer`].
|
||||
|
||||
Args:
|
||||
|
|
|
|||
Loading…
Reference in a new issue