mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
fix
This commit is contained in:
parent
8795d1f406
commit
36d404f373
4 changed files with 51 additions and 36 deletions
|
|
@ -345,12 +345,12 @@ doc_test_job = CircleCIJob(
|
|||
pytest_num_workers=1,
|
||||
)
|
||||
|
||||
REGULAR_TESTS = [torch_and_tf_job, torch_and_flax_job, torch_job, tf_job, flax_job, hub_job, onnx_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip
|
||||
EXAMPLES_TESTS = [examples_torch_job, examples_tensorflow_job]
|
||||
PIPELINE_TESTS = [pipelines_torch_job, pipelines_tf_job]
|
||||
REPO_UTIL_TESTS = [repo_utils_job]
|
||||
DOC_TESTS = [doc_test_job]
|
||||
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip
|
||||
REGULAR_TESTS = [torch_and_tf_job] # fmt: skip
|
||||
EXAMPLES_TESTS = []
|
||||
PIPELINE_TESTS = []
|
||||
REPO_UTIL_TESTS = []
|
||||
DOC_TESTS = []
|
||||
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS # fmt: skip
|
||||
|
||||
def create_circleci_config(folder=None):
|
||||
if folder is None:
|
||||
|
|
|
|||
|
|
@ -1401,28 +1401,38 @@ def set_model_tester_for_less_flaky_test(test_case):
|
|||
|
||||
|
||||
def set_config_for_less_flaky_test(config):
|
||||
config.rms_norm_eps = 1.0
|
||||
config.layer_norm_eps = 1.0
|
||||
config.norm_eps = 1.0
|
||||
config.norm_epsilon = 1.0
|
||||
config.layer_norm_epsilon = 1.0
|
||||
target_attrs = ["rms_norm_eps", "layer_norm_eps", "norm_eps", "norm_epsilon", "layer_norm_epsilon", "batch_norm_eps"]
|
||||
for target_attr in target_attrs:
|
||||
setattr(config, target_attr, 1.0)
|
||||
|
||||
# norm layers (layer/group norm, etc.) could cause flaky tests when the tensors have very small variance.
|
||||
# (We don't need the original epsilon values to check eager/sdpa matches)
|
||||
for attr in ["text_config", "vision_config", "text_encoder", "audio_encoder", "decoder"]:
|
||||
attrs = ["text_config", "vision_config", "text_encoder", "audio_encoder", "decoder"]
|
||||
for attr in attrs:
|
||||
if hasattr(config, attr):
|
||||
getattr(config, attr).rms_norm_eps = 1.0
|
||||
getattr(config, attr).layer_norm_eps = 1.0
|
||||
getattr(config, attr).norm_eps = 1.0
|
||||
getattr(config, attr).norm_epsilon = 1.0
|
||||
getattr(config, attr).layer_norm_epsilon = 1.0
|
||||
for target_attr in target_attrs:
|
||||
setattr(getattr(config, attr), target_attr, 1.0)
|
||||
|
||||
|
||||
def set_model_for_less_flaky_test(model):
|
||||
# Another way to make sure norm layers have desired epsilon. (Some models don't set it from its config.)
|
||||
for module in model.modules():
|
||||
if type(module).__name__ in ["GemmaRMSNorm", "LayerNorm", "GroupNorm"]:
|
||||
module.eps = 1.0
|
||||
target_names = ("LayerNorm", "GroupNorm", "BatchNorm", "RMSNorm", "BatchNorm2d", "BatchNorm1d")
|
||||
target_names += ("LayerNormalization", "GroupNormalization", "BatchNormalization")
|
||||
target_attrs = ["eps", "epsilon", "variance_epsilon"]
|
||||
if is_torch_available() and isinstance(model, torch.nn.Module):
|
||||
for module in model.modules():
|
||||
if type(module).__name__.endswith(target_names):
|
||||
for attr in target_attrs:
|
||||
if hasattr(module, attr):
|
||||
setattr(module, attr, 1.0)
|
||||
elif is_tf_available():
|
||||
from transformers import TFPreTrainedModel
|
||||
if isinstance(model, TFPreTrainedModel):
|
||||
for module in model.submodules:
|
||||
if type(module).__name__.endswith(target_names):
|
||||
for attr in target_attrs:
|
||||
if hasattr(module, attr):
|
||||
setattr(module, attr, 1.0)
|
||||
|
||||
|
||||
class CaptureStd:
|
||||
|
|
|
|||
|
|
@ -2587,6 +2587,24 @@ class ModelTesterMixin:
|
|||
# Check predictions on first output (logits/hidden-states) are close enough given low-level computational differences
|
||||
pt_model.eval()
|
||||
|
||||
def foo1(func):
|
||||
def wrap(*args, **kwargs):
|
||||
kwargs["eps"] = 1.0
|
||||
return func(*args, **kwargs)
|
||||
return wrap
|
||||
|
||||
def foo2(func):
|
||||
def wrap(*args, **kwargs):
|
||||
kwargs["epsilon"] = 1.0
|
||||
return func(*args, **kwargs)
|
||||
return wrap
|
||||
|
||||
set_model_for_less_flaky_test(pt_model)
|
||||
set_model_for_less_flaky_test(tf_model)
|
||||
|
||||
import unittest
|
||||
with unittest.mock.patch.object(nn.functional, "normalize", side_effect=foo1(nn.functional.normalize)):
|
||||
with unittest.mock.patch.object(tf.math, "l2_normalize", side_effect=foo2(tf.math.l2_normalize)):
|
||||
with torch.no_grad():
|
||||
pt_outputs = pt_model(**pt_inputs_dict)
|
||||
tf_outputs = tf_model(tf_inputs_dict)
|
||||
|
|
@ -2603,9 +2621,12 @@ class ModelTesterMixin:
|
|||
@is_pt_tf_cross_test
|
||||
def test_pt_tf_model_equivalence(self, allow_missing_keys=False):
|
||||
import transformers
|
||||
from transformers.testing_utils import set_model_for_less_flaky_test, set_model_tester_for_less_flaky_test, set_config_for_less_flaky_test
|
||||
|
||||
# set_model_tester_for_less_flaky_test(self)
|
||||
for model_class in self.all_model_classes:
|
||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
set_config_for_less_flaky_test(config)
|
||||
|
||||
tf_model_class_name = "TF" + model_class.__name__ # Add the "TF" at the beginning
|
||||
if not hasattr(transformers, tf_model_class_name):
|
||||
|
|
|
|||
|
|
@ -1133,22 +1133,6 @@ def parse_commit_message(commit_message: str) -> Dict[str, bool]:
|
|||
|
||||
JOB_TO_TEST_FILE = {
|
||||
"tests_torch_and_tf": r"tests/models/.*/test_modeling_(?:tf_|(?!flax)).*",
|
||||
"tests_torch_and_flax": r"tests/models/.*/test_modeling_(?:flax|(?!tf)).*",
|
||||
"tests_tf": r"tests/models/.*/test_modeling_tf_.*",
|
||||
"tests_torch": r"tests/models/.*/test_modeling_(?!(?:flax_|tf_)).*",
|
||||
"tests_generate": r"tests/models/.*/test_modeling_(?!(?:flax_|tf_)).*",
|
||||
"tests_tokenization": r"tests/models/.*/test_tokenization.*",
|
||||
"tests_processors": r"tests/models/.*/test_(?!(?:modeling_|tokenization_)).*", # takes feature extractors, image processors, processors
|
||||
"examples_torch": r"examples/pytorch/.*test_.*",
|
||||
"examples_tensorflow": r"examples/tensorflow/.*test_.*",
|
||||
"tests_exotic_models": r"tests/models/.*(?=layoutlmv|nat|deta|udop|nougat).*",
|
||||
"tests_custom_tokenizers": r"tests/models/.*/test_tokenization_(?=bert_japanese|openai|clip).*",
|
||||
# "repo_utils": r"tests/[^models].*test.*", TODO later on we might want to do
|
||||
"pipelines_tf": r"tests/models/.*/test_modeling_tf_.*",
|
||||
"pipelines_torch": r"tests/models/.*/test_modeling_(?!(?:flax_|tf_)).*",
|
||||
"tests_hub": r"tests/.*",
|
||||
"tests_onnx": r"tests/models/.*/test_modeling_(?:tf_|(?!flax)).*",
|
||||
"tests_non_model": r"tests/[^/]*?/test_.*\.py",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue