diff --git a/src/transformers/models/beit/modeling_beit.py b/src/transformers/models/beit/modeling_beit.py index 4fdd9191f..ce12de6e8 100755 --- a/src/transformers/models/beit/modeling_beit.py +++ b/src/transformers/models/beit/modeling_beit.py @@ -754,6 +754,7 @@ class BeitForMaskedImageModeling(BeitPreTrainedModel): ```python >>> from transformers import BeitFeatureExtractor, BeitForMaskedImageModeling + >>> import torch >>> from PIL import Image >>> import requests @@ -763,9 +764,15 @@ class BeitForMaskedImageModeling(BeitPreTrainedModel): >>> feature_extractor = BeitFeatureExtractor.from_pretrained("microsoft/beit-base-patch16-224-pt22k") >>> model = BeitForMaskedImageModeling.from_pretrained("microsoft/beit-base-patch16-224-pt22k") - >>> inputs = feature_extractor(images=image, return_tensors="pt") - >>> outputs = model(**inputs) - >>> logits = outputs.logits + >>> num_patches = (model.config.image_size // model.config.patch_size) ** 2 + >>> pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values + >>> # create random boolean mask of shape (batch_size, num_patches) + >>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool() + + >>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos) + >>> loss, logits = outputs.loss, outputs.logits + >>> list(logits.shape) + [1, 196, 8192] ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict diff --git a/src/transformers/models/deit/modeling_deit.py b/src/transformers/models/deit/modeling_deit.py index e86f6de23..9b3b3a153 100644 --- a/src/transformers/models/deit/modeling_deit.py +++ b/src/transformers/models/deit/modeling_deit.py @@ -587,19 +587,25 @@ class DeiTForMaskedImageModeling(DeiTPreTrainedModel): Examples: ```python >>> from transformers import DeiTFeatureExtractor, DeiTForMaskedImageModeling + >>> import torch >>> from PIL import Image >>> import requests >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) - >>> feature_extractor = DeiTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k") - >>> model = DeiTForMaskedImageModeling.from_pretrained("google/vit-base-patch16-224-in21k") + >>> feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224") + >>> model = DeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224") - >>> inputs = feature_extractor(images=image, return_tensors="pt") + >>> num_patches = (model.config.image_size // model.config.patch_size) ** 2 + >>> pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values + >>> # create random boolean mask of shape (batch_size, num_patches) + >>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool() - >>> outputs = model(**inputs) - >>> last_hidden_states = outputs.last_hidden_state + >>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos) + >>> loss, reconstructed_pixel_values = outputs.loss, outputs.logits + >>> list(reconstructed_pixel_values.shape) + [1, 3, 224, 224] ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict diff --git a/src/transformers/models/swin/modeling_swin.py b/src/transformers/models/swin/modeling_swin.py index 5b31b433f..ea255a6d6 100644 --- a/src/transformers/models/swin/modeling_swin.py +++ b/src/transformers/models/swin/modeling_swin.py @@ -810,6 +810,7 @@ class SwinForMaskedImageModeling(SwinPreTrainedModel): Examples: ```python >>> from transformers import AutoFeatureExtractor, SwinForMaskedImageModeling + >>> import torch >>> from PIL import Image >>> import requests @@ -819,10 +820,15 @@ class SwinForMaskedImageModeling(SwinPreTrainedModel): >>> feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/swin-tiny-patch4-window7-224") >>> model = SwinForMaskedImageModeling.from_pretrained("microsoft/swin-tiny-patch4-window7-224") - >>> inputs = feature_extractor(images=image, return_tensors="pt") + >>> num_patches = (model.config.image_size // model.config.patch_size) ** 2 + >>> pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values + >>> # create random boolean mask of shape (batch_size, num_patches) + >>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool() - >>> outputs = model(**inputs) - >>> last_hidden_states = outputs.last_hidden_state + >>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos) + >>> loss, reconstructed_pixel_values = outputs.loss, outputs.logits + >>> list(reconstructed_pixel_values.shape) + [1, 3, 224, 224] ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict diff --git a/src/transformers/models/vit/modeling_vit.py b/src/transformers/models/vit/modeling_vit.py index 2c6d660ab..bee1cd92a 100644 --- a/src/transformers/models/vit/modeling_vit.py +++ b/src/transformers/models/vit/modeling_vit.py @@ -624,6 +624,7 @@ class ViTForMaskedImageModeling(ViTPreTrainedModel): Examples: ```python >>> from transformers import ViTFeatureExtractor, ViTForMaskedImageModeling + >>> import torch >>> from PIL import Image >>> import requests @@ -633,10 +634,15 @@ class ViTForMaskedImageModeling(ViTPreTrainedModel): >>> feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k") >>> model = ViTForMaskedImageModeling.from_pretrained("google/vit-base-patch16-224-in21k") - >>> inputs = feature_extractor(images=image, return_tensors="pt") + >>> num_patches = (model.config.image_size // model.config.patch_size) ** 2 + >>> pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values + >>> # create random boolean mask of shape (batch_size, num_patches) + >>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool() - >>> outputs = model(**inputs) - >>> last_hidden_states = outputs.last_hidden_state + >>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos) + >>> loss, reconstructed_pixel_values = outputs.loss, outputs.logits + >>> list(reconstructed_pixel_values.shape) + [1, 3, 224, 224] ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt index 3a1985857..43135e225 100644 --- a/utils/documentation_tests.txt +++ b/utils/documentation_tests.txt @@ -9,7 +9,16 @@ src/transformers/models/sew/modeling_sew.py src/transformers/models/sew_d/modeling_sew_d.py src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py src/transformers/models/speech_to_text/modeling_speech_to_text.py -src/transformers/models/speech_encoder_decoder/modeling_speech_enocder_decoder.py +src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py src/transformers/models/data2vec/modeling_data2vec_audio.py +src/transformers/models/vit/modeling_vit.py +src/transformers/models/beit/modeling_beit.py +src/transformers/models/deit/modeling_deit.py +src/transformers/models/swin/modeling_swin.py +src/transformers/models/convnext/modeling_convnext.py +src/transformers/models/poolformer/modeling_poolformer.py +src/transformers/models/vit_mae/modeling_vit_mae.py +src/transformers/models/segformer/modeling_segformer.py +src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py docs/source/quicktour.mdx -docs/source/task_summary.mdx +docs/source/task_summary.mdx \ No newline at end of file