2024-02-28 00:24:34 +00:00
# coding=utf-8
# Copyright 2024 BigCode and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2024-05-22 04:40:15 +00:00
""" Testing suite for the PyTorch Starcoder2 model. """
2024-02-28 00:24:34 +00:00
import unittest
import pytest
from transformers import Starcoder2Config , is_torch_available
from transformers . testing_utils import (
require_bitsandbytes ,
require_flash_attn ,
require_torch ,
2025-01-13 13:48:39 +00:00
require_torch_accelerator ,
2024-02-28 00:24:34 +00:00
require_torch_gpu ,
slow ,
torch_device ,
)
from . . . generation . test_utils import GenerationTesterMixin
from . . . test_configuration_common import ConfigTester
from . . . test_modeling_common import ModelTesterMixin , ids_tensor
from . . . test_pipeline_mixin import PipelineTesterMixin
if is_torch_available ( ) :
import torch
from transformers import (
AutoTokenizer ,
Starcoder2ForCausalLM ,
Starcoder2ForSequenceClassification ,
2024-05-20 08:06:57 +00:00
Starcoder2ForTokenClassification ,
2024-02-28 00:24:34 +00:00
Starcoder2Model ,
)
# Copied from transformers.tests.models.mistral.test_modeling_mistral.Starcoder2ModelTester with Mistral->Starcoder2
class Starcoder2ModelTester :
def __init__ (
self ,
parent ,
batch_size = 13 ,
seq_length = 7 ,
is_training = True ,
use_input_mask = True ,
use_token_type_ids = False ,
use_labels = True ,
vocab_size = 99 ,
hidden_size = 32 ,
num_hidden_layers = 2 ,
num_attention_heads = 4 ,
num_key_value_heads = 2 ,
intermediate_size = 37 ,
hidden_act = " gelu " ,
hidden_dropout_prob = 0.1 ,
attention_probs_dropout_prob = 0.1 ,
max_position_embeddings = 512 ,
type_vocab_size = 16 ,
type_sequence_label_size = 2 ,
initializer_range = 0.02 ,
num_labels = 3 ,
num_choices = 4 ,
pad_token_id = 0 ,
scope = None ,
) :
self . parent = parent
self . batch_size = batch_size
self . seq_length = seq_length
self . is_training = is_training
self . use_input_mask = use_input_mask
self . use_token_type_ids = use_token_type_ids
self . use_labels = use_labels
self . vocab_size = vocab_size
self . hidden_size = hidden_size
self . num_hidden_layers = num_hidden_layers
self . num_attention_heads = num_attention_heads
self . num_key_value_heads = num_key_value_heads
self . intermediate_size = intermediate_size
self . hidden_act = hidden_act
self . hidden_dropout_prob = hidden_dropout_prob
self . attention_probs_dropout_prob = attention_probs_dropout_prob
self . max_position_embeddings = max_position_embeddings
self . type_vocab_size = type_vocab_size
self . type_sequence_label_size = type_sequence_label_size
self . initializer_range = initializer_range
self . num_labels = num_labels
self . num_choices = num_choices
self . pad_token_id = pad_token_id
self . scope = scope
# Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.prepare_config_and_inputs
def prepare_config_and_inputs ( self ) :
input_ids = ids_tensor ( [ self . batch_size , self . seq_length ] , self . vocab_size )
input_mask = None
if self . use_input_mask :
2024-10-03 13:01:24 +00:00
input_mask = torch . tril ( torch . ones_like ( input_ids ) . to ( torch_device ) )
2024-02-28 00:24:34 +00:00
token_type_ids = None
if self . use_token_type_ids :
token_type_ids = ids_tensor ( [ self . batch_size , self . seq_length ] , self . type_vocab_size )
sequence_labels = None
token_labels = None
choice_labels = None
if self . use_labels :
sequence_labels = ids_tensor ( [ self . batch_size ] , self . type_sequence_label_size )
token_labels = ids_tensor ( [ self . batch_size , self . seq_length ] , self . num_labels )
choice_labels = ids_tensor ( [ self . batch_size ] , self . num_choices )
config = self . get_config ( )
return config , input_ids , token_type_ids , input_mask , sequence_labels , token_labels , choice_labels
# Ignore copy
def get_config ( self ) :
return Starcoder2Config (
vocab_size = self . vocab_size ,
hidden_size = self . hidden_size ,
num_hidden_layers = self . num_hidden_layers ,
num_attention_heads = self . num_attention_heads ,
num_key_value_heads = self . num_key_value_heads ,
intermediate_size = self . intermediate_size ,
hidden_act = self . hidden_act ,
hidden_dropout_prob = self . hidden_dropout_prob ,
attention_probs_dropout_prob = self . attention_probs_dropout_prob ,
max_position_embeddings = self . max_position_embeddings ,
type_vocab_size = self . type_vocab_size ,
is_decoder = False ,
initializer_range = self . initializer_range ,
pad_token_id = self . pad_token_id ,
eos_token_id = self . pad_token_id ,
bos_token_id = self . pad_token_id ,
)
# Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.create_and_check_model with Llama->Starcoder2
def create_and_check_model (
self , config , input_ids , token_type_ids , input_mask , sequence_labels , token_labels , choice_labels
) :
model = Starcoder2Model ( config = config )
model . to ( torch_device )
model . eval ( )
result = model ( input_ids , attention_mask = input_mask )
result = model ( input_ids )
self . parent . assertEqual ( result . last_hidden_state . shape , ( self . batch_size , self . seq_length , self . hidden_size ) )
# Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.create_and_check_model_as_decoder with Llama->Starcoder2
def create_and_check_model_as_decoder (
self ,
config ,
input_ids ,
token_type_ids ,
input_mask ,
sequence_labels ,
token_labels ,
choice_labels ,
encoder_hidden_states ,
encoder_attention_mask ,
) :
config . add_cross_attention = True
model = Starcoder2Model ( config )
model . to ( torch_device )
model . eval ( )
result = model (
input_ids ,
attention_mask = input_mask ,
encoder_hidden_states = encoder_hidden_states ,
encoder_attention_mask = encoder_attention_mask ,
)
result = model (
input_ids ,
attention_mask = input_mask ,
encoder_hidden_states = encoder_hidden_states ,
)
result = model ( input_ids , attention_mask = input_mask )
self . parent . assertEqual ( result . last_hidden_state . shape , ( self . batch_size , self . seq_length , self . hidden_size ) )
# Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.create_and_check_for_causal_lm with Llama->Starcoder2
def create_and_check_for_causal_lm (
self ,
config ,
input_ids ,
token_type_ids ,
input_mask ,
sequence_labels ,
token_labels ,
choice_labels ,
encoder_hidden_states ,
encoder_attention_mask ,
) :
model = Starcoder2ForCausalLM ( config = config )
model . to ( torch_device )
model . eval ( )
result = model ( input_ids , attention_mask = input_mask , labels = token_labels )
self . parent . assertEqual ( result . logits . shape , ( self . batch_size , self . seq_length , self . vocab_size ) )
# Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.create_and_check_decoder_model_past_large_inputs with Llama->Starcoder2
def create_and_check_decoder_model_past_large_inputs (
self ,
config ,
input_ids ,
token_type_ids ,
input_mask ,
sequence_labels ,
token_labels ,
choice_labels ,
encoder_hidden_states ,
encoder_attention_mask ,
) :
config . is_decoder = True
config . add_cross_attention = True
model = Starcoder2ForCausalLM ( config = config )
model . to ( torch_device )
model . eval ( )
# first forward pass
outputs = model (
input_ids ,
attention_mask = input_mask ,
encoder_hidden_states = encoder_hidden_states ,
encoder_attention_mask = encoder_attention_mask ,
use_cache = True ,
)
past_key_values = outputs . past_key_values
# create hypothetical multiple next token and extent to next_input_ids
next_tokens = ids_tensor ( ( self . batch_size , 3 ) , config . vocab_size )
next_mask = ids_tensor ( ( self . batch_size , 3 ) , vocab_size = 2 )
# append to next input_ids and
next_input_ids = torch . cat ( [ input_ids , next_tokens ] , dim = - 1 )
next_attention_mask = torch . cat ( [ input_mask , next_mask ] , dim = - 1 )
output_from_no_past = model (
next_input_ids ,
attention_mask = next_attention_mask ,
encoder_hidden_states = encoder_hidden_states ,
encoder_attention_mask = encoder_attention_mask ,
output_hidden_states = True ,
) [ " hidden_states " ] [ 0 ]
output_from_past = model (
next_tokens ,
attention_mask = next_attention_mask ,
encoder_hidden_states = encoder_hidden_states ,
encoder_attention_mask = encoder_attention_mask ,
past_key_values = past_key_values ,
output_hidden_states = True ,
) [ " hidden_states " ] [ 0 ]
# select random slice
random_slice_idx = ids_tensor ( ( 1 , ) , output_from_past . shape [ - 1 ] ) . item ( )
output_from_no_past_slice = output_from_no_past [ : , - 3 : , random_slice_idx ] . detach ( )
output_from_past_slice = output_from_past [ : , : , random_slice_idx ] . detach ( )
self . parent . assertTrue ( output_from_past_slice . shape [ 1 ] == next_tokens . shape [ 1 ] )
# test that outputs are equal for slice
self . parent . assertTrue ( torch . allclose ( output_from_past_slice , output_from_no_past_slice , atol = 1e-3 ) )
# Copied from tests.models.llama.test_modeling_llama.LlamaModelTester.prepare_config_and_inputs_for_common
def prepare_config_and_inputs_for_common ( self ) :
config_and_inputs = self . prepare_config_and_inputs ( )
(
config ,
input_ids ,
token_type_ids ,
input_mask ,
sequence_labels ,
token_labels ,
choice_labels ,
) = config_and_inputs
inputs_dict = { " input_ids " : input_ids , " attention_mask " : input_mask }
return config , inputs_dict
@require_torch
# Copied from transformers.tests.models.mistral.test_modeling_mistral.MistralModelTest with Mistral->Starcoder2
class Starcoder2ModelTest ( ModelTesterMixin , GenerationTesterMixin , PipelineTesterMixin , unittest . TestCase ) :
all_model_classes = (
2024-05-20 08:06:57 +00:00
( Starcoder2Model , Starcoder2ForCausalLM , Starcoder2ForSequenceClassification , Starcoder2ForTokenClassification )
if is_torch_available ( )
else ( )
2024-02-28 00:24:34 +00:00
)
all_generative_model_classes = ( Starcoder2ForCausalLM , ) if is_torch_available ( ) else ( )
pipeline_model_mapping = (
{
" feature-extraction " : Starcoder2Model ,
" text-classification " : Starcoder2ForSequenceClassification ,
2024-05-20 08:06:57 +00:00
" token-classification " : Starcoder2ForTokenClassification ,
2024-02-28 00:24:34 +00:00
" text-generation " : Starcoder2ForCausalLM ,
" zero-shot " : Starcoder2ForSequenceClassification ,
}
if is_torch_available ( )
else { }
)
test_headmasking = False
test_pruning = False
# TODO (ydshieh): Check this. See https://app.circleci.com/pipelines/github/huggingface/transformers/79245/workflows/9490ef58-79c2-410d-8f51-e3495156cf9c/jobs/1012146
def is_pipeline_test_to_skip (
2024-10-09 15:46:11 +00:00
self ,
pipeline_test_case_name ,
config_class ,
model_architecture ,
tokenizer_name ,
image_processor_name ,
feature_extractor_name ,
processor_name ,
2024-02-28 00:24:34 +00:00
) :
return True
def setUp ( self ) :
self . model_tester = Starcoder2ModelTester ( self )
self . config_tester = ConfigTester ( self , config_class = Starcoder2Config , hidden_size = 37 )
def test_config ( self ) :
self . config_tester . run_common_tests ( )
def test_model ( self ) :
config_and_inputs = self . model_tester . prepare_config_and_inputs ( )
self . model_tester . create_and_check_model ( * config_and_inputs )
def test_model_various_embeddings ( self ) :
config_and_inputs = self . model_tester . prepare_config_and_inputs ( )
for type in [ " absolute " , " relative_key " , " relative_key_query " ] :
config_and_inputs [ 0 ] . position_embedding_type = type
self . model_tester . create_and_check_model ( * config_and_inputs )
def test_Starcoder2_sequence_classification_model ( self ) :
config , input_dict = self . model_tester . prepare_config_and_inputs_for_common ( )
print ( config )
config . num_labels = 3
input_ids = input_dict [ " input_ids " ]
attention_mask = input_ids . ne ( 1 ) . to ( torch_device )
sequence_labels = ids_tensor ( [ self . model_tester . batch_size ] , self . model_tester . type_sequence_label_size )
model = Starcoder2ForSequenceClassification ( config )
model . to ( torch_device )
model . eval ( )
result = model ( input_ids , attention_mask = attention_mask , labels = sequence_labels )
self . assertEqual ( result . logits . shape , ( self . model_tester . batch_size , self . model_tester . num_labels ) )
def test_Starcoder2_sequence_classification_model_for_single_label ( self ) :
config , input_dict = self . model_tester . prepare_config_and_inputs_for_common ( )
config . num_labels = 3
config . problem_type = " single_label_classification "
input_ids = input_dict [ " input_ids " ]
attention_mask = input_ids . ne ( 1 ) . to ( torch_device )
sequence_labels = ids_tensor ( [ self . model_tester . batch_size ] , self . model_tester . type_sequence_label_size )
model = Starcoder2ForSequenceClassification ( config )
model . to ( torch_device )
model . eval ( )
result = model ( input_ids , attention_mask = attention_mask , labels = sequence_labels )
self . assertEqual ( result . logits . shape , ( self . model_tester . batch_size , self . model_tester . num_labels ) )
def test_Starcoder2_sequence_classification_model_for_multi_label ( self ) :
config , input_dict = self . model_tester . prepare_config_and_inputs_for_common ( )
config . num_labels = 3
config . problem_type = " multi_label_classification "
input_ids = input_dict [ " input_ids " ]
attention_mask = input_ids . ne ( 1 ) . to ( torch_device )
sequence_labels = ids_tensor (
[ self . model_tester . batch_size , config . num_labels ] , self . model_tester . type_sequence_label_size
) . to ( torch . float )
model = Starcoder2ForSequenceClassification ( config )
model . to ( torch_device )
model . eval ( )
result = model ( input_ids , attention_mask = attention_mask , labels = sequence_labels )
self . assertEqual ( result . logits . shape , ( self . model_tester . batch_size , self . model_tester . num_labels ) )
2024-05-20 08:06:57 +00:00
# Copied from tests.models.llama.test_modeling_llama.LlamaModelTest.test_llama_token_classification_model with Llama->Starcoder2,llama->Starcoder2
def test_Starcoder2_token_classification_model ( self ) :
config , input_dict = self . model_tester . prepare_config_and_inputs_for_common ( )
config . num_labels = 3
input_ids = input_dict [ " input_ids " ]
attention_mask = input_ids . ne ( 1 ) . to ( torch_device )
token_labels = ids_tensor ( [ self . model_tester . batch_size , self . model_tester . seq_length ] , config . num_labels )
model = Starcoder2ForTokenClassification ( config = config )
model . to ( torch_device )
model . eval ( )
result = model ( input_ids , attention_mask = attention_mask , labels = token_labels )
self . assertEqual (
result . logits . shape ,
( self . model_tester . batch_size , self . model_tester . seq_length , self . model_tester . num_labels ) ,
)
2024-06-26 20:59:08 +00:00
@unittest.skip ( reason = " Starcoder2 buffers include complex numbers, which breaks this test " )
2024-02-28 00:24:34 +00:00
def test_save_load_fast_init_from_base ( self ) :
pass
2024-06-26 20:59:08 +00:00
@unittest.skip ( reason = " Starcoder2 uses GQA on all models so the KV cache is a non standard format " )
2024-02-28 00:24:34 +00:00
def test_past_key_values_format ( self ) :
pass
@require_flash_attn
@require_torch_gpu
@pytest.mark.flash_attn_test
@slow
2024-04-01 07:51:00 +00:00
def test_flash_attn_2_inference_equivalence_right_padding ( self ) :
2024-06-26 20:59:08 +00:00
self . skipTest ( reason = " Starcoder2 flash attention does not support right padding " )
2024-02-28 00:24:34 +00:00
@slow
2025-01-13 13:48:39 +00:00
@require_torch_accelerator
2024-02-28 00:24:34 +00:00
class Starcoder2IntegrationTest ( unittest . TestCase ) :
def test_starcoder2_batched_generation_sdpa ( self ) :
EXPECTED_TEXT = [
" Hello my name is Younes and I am a student at the University of Liverpool. I am currently studying for my MSc in Computer Science. I am interested in the field of Machine Learning and I am currently working on " ,
" def hello_world(): \n \t return ' Hello World! ' \n \n @app.route( ' /hello/<name> ' ) \n def hello_name(name): \n \t return ' Hello %s ! ' % name \n \n @app " ,
]
2024-02-29 02:52:13 +00:00
model_id = " bigcode/starcoder2-7b "
2024-02-28 00:24:34 +00:00
model = Starcoder2ForCausalLM . from_pretrained (
model_id , torch_dtype = torch . float16 , device_map = " auto " , attn_implementation = " sdpa "
)
tokenizer = AutoTokenizer . from_pretrained ( model_id )
tokenizer . pad_token = tokenizer . eos_token
text = [ " Hello my name is Younes and " , " def hello_world(): " ]
inputs = tokenizer ( text , return_tensors = " pt " , padding = True ) . to ( torch_device )
output = model . generate ( * * inputs , max_new_tokens = 40 , do_sample = False )
output_text = tokenizer . batch_decode ( output , skip_special_tokens = True )
self . assertEqual ( EXPECTED_TEXT , output_text )
def test_starcoder2_batched_generation_eager ( self ) :
EXPECTED_TEXT = [
" Hello my name is Younes and I am a student at the University of Liverpool. I am currently studying for my MSc in Computer Science. I am interested in the field of Machine Learning and I am currently working on " ,
" def hello_world(): \n \t return ' Hello World! ' \n \n @app.route( ' /hello/<name> ' ) \n def hello_name(name): \n \t return ' Hello %s ! ' % name \n \n @app " ,
]
2024-02-29 02:52:13 +00:00
model_id = " bigcode/starcoder2-7b "
2024-02-28 00:24:34 +00:00
model = Starcoder2ForCausalLM . from_pretrained (
model_id , torch_dtype = torch . float16 , device_map = " auto " , attn_implementation = " eager "
)
tokenizer = AutoTokenizer . from_pretrained ( model_id )
tokenizer . pad_token = tokenizer . eos_token
text = [ " Hello my name is Younes and " , " def hello_world(): " ]
inputs = tokenizer ( text , return_tensors = " pt " , padding = True ) . to ( torch_device )
output = model . generate ( * * inputs , max_new_tokens = 40 , do_sample = False )
output_text = tokenizer . batch_decode ( output , skip_special_tokens = True )
self . assertEqual ( EXPECTED_TEXT , output_text )
@require_flash_attn
2024-08-06 10:18:58 +00:00
@pytest.mark.flash_attn_test
2024-02-28 00:24:34 +00:00
def test_starcoder2_batched_generation_fa2 ( self ) :
EXPECTED_TEXT = [
" Hello my name is Younes and I am a student at the University of Liverpool. I am currently studying for my MSc in Computer Science. I am interested in the field of Machine Learning and I am currently working on " ,
" def hello_world(): \n \t return ' Hello World! ' \n \n @app.route( ' /hello/<name> ' ) \n def hello_name(name): \n \t return ' Hello %s ! ' % name \n \n @app " ,
]
2024-02-29 02:52:13 +00:00
model_id = " bigcode/starcoder2-7b "
2024-02-28 00:24:34 +00:00
model = Starcoder2ForCausalLM . from_pretrained (
model_id , torch_dtype = torch . float16 , device_map = " auto " , attn_implementation = " flash_attention_2 "
)
tokenizer = AutoTokenizer . from_pretrained ( model_id )
tokenizer . pad_token = tokenizer . eos_token
text = [ " Hello my name is Younes and " , " def hello_world(): " ]
inputs = tokenizer ( text , return_tensors = " pt " , padding = True ) . to ( torch_device )
output = model . generate ( * * inputs , max_new_tokens = 40 , do_sample = False )
output_text = tokenizer . batch_decode ( output , skip_special_tokens = True )
self . assertEqual ( EXPECTED_TEXT , output_text )
@require_bitsandbytes
def test_starcoder2_batched_generation_4bit ( self ) :
EXPECTED_TEXT = [
' Hello my name is Younes and I am a student at the University of Maryland. I am currently working on a project that is related to the topic of " How to make a game " . I am currently working on a project ' ,
' def hello_world(): \n \t return " Hello World " \n \n @app.route( \' /hello/<name> \' ) \n def hello_name(name): \n \t return " Hello " + name \n \n @app.route ' ,
]
2024-02-29 02:52:13 +00:00
model_id = " bigcode/starcoder2-7b "
2024-02-28 00:24:34 +00:00
model = Starcoder2ForCausalLM . from_pretrained ( model_id , load_in_4bit = True )
tokenizer = AutoTokenizer . from_pretrained ( model_id )
tokenizer . pad_token = tokenizer . eos_token
text = [ " Hello my name is Younes and " , " def hello_world(): " ]
inputs = tokenizer ( text , return_tensors = " pt " , padding = True ) . to ( torch_device )
output = model . generate ( * * inputs , max_new_tokens = 40 , do_sample = False )
output_text = tokenizer . batch_decode ( output , skip_special_tokens = True )
self . assertEqual ( EXPECTED_TEXT , output_text )