fix: Fixed CodeGenTokenizationTest::test_truncation failing test (#32850)

* Fixed failing CodeGenTokenizationTest::test_truncation. * [run_slow] Codegen * [run_slow] codegen
2026-05-14 20:58:08 +00:00 · 2024-08-27 12:50:59 +05:30 · 2024-08-27 12:50:59 +05:30 · 3bf6dd8aa1
commit 3bf6dd8aa1
parent 9578c2597e
1 changed files with 2 additions and 2 deletions
--- a/tests/models/codegen/test_tokenization_codegen.py
+++ b/tests/models/codegen/test_tokenization_codegen.py
@ -254,12 +254,12 @@ class CodeGenTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        tokenizer = CodeGenTokenizer.from_pretrained("Salesforce/codegen-350M-mono")

        text = "\nif len_a > len_b:\n    result = a\nelse:\n    result = b\n\n\n\n#"
-        expected_trucated_text = "\nif len_a > len_b:      result = a\nelse:      result = b"
+        expected_truncated_text = "\nif len_a > len_b:\n      result = a\nelse:\n      result = b"

        input_ids = tokenizer.encode(text)
        truncation_pattern = ["^#", re.escape("<|endoftext|>"), "^'''", '^"""', "\n\n\n"]
        decoded_text = tokenizer.decode(input_ids, truncate_before_pattern=truncation_pattern)
-        self.assertEqual(decoded_text, expected_trucated_text)
+        self.assertEqual(decoded_text, expected_truncated_text)
        # TODO @ArthurZ outputs of the fast tokenizer are different in this case, un-related to the PR

    # tokenizer has no padding token