Repo utils test (#19696)

* Create repo utils test job * Last occurence * Add tests for tests_fetcher * Better filtering * Let's learn more * Should fix * Should fix * Remove debug * Style * WiP WiP WiP WiP WiP WiP WiP WiP WiP * Quality * address review comments * Fix link
2026-05-14 20:58:08 +00:00 · 2022-10-18 13:47:36 -04:00 · 2022-10-18 13:47:36 -04:00 · a929f81e92
commit a929f81e92
parent a23819ed6a
4 changed files with 115 additions and 11 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -30,7 +30,13 @@ jobs:
                else
                    touch test_preparation/test_list.txt
                fi
-            - run: python utils/tests_fetcher.py --filter_pipeline_tests
+            - run: |
+                if [ -f test_repo_utils.txt ]; then
+                    mv test_repo_utils.txt test_preparation/test_repo_utils.txt
+                else
+                    touch test_preparation/test_repo_utils.txt
+                fi
+            - run: python utils/tests_fetcher.py --filter_tests
            - run: |
                if [ -f test_list.txt ]; then
                    mv test_list.txt test_preparation/filtered_test_list.txt
@ -75,8 +81,9 @@ jobs:
                  mkdir test_preparation
                  echo "tests" > test_preparation/test_list.txt
                  echo "tests" > test_preparation/examples_test_list.txt
-            - run: python utils/tests_fetcher.py --filter_pipeline_tests
+            - run: python utils/tests_fetcher.py --filter_tests
            - run: mv test_list.txt test_preparation/filtered_test_list.txt
+            - run: mv test_repo_utils.txt test_preparation/test_repo_utils.txt
            - run: python .circleci/create_circleci_config.py --fetcher_folder test_preparation
            - run: cp test_preparation/generated_config.yml test_preparation/generated_config.txt
            - store_artifacts:
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@ -324,6 +324,18 @@ layoutlm_job = CircleCIJob(
 )


+repo_utils_job = CircleCIJob(
+    "repo_utils",
+    install_steps=[
+        "pip install --upgrade pip",
+        "pip install .[all,quality,testing]",
+    ],
+    parallelism=None,
+    pytest_num_workers=1,
+    resource_class=None,
+    tests_to_run="tests/repo_utils",
+)
+
 REGULAR_TESTS = [
    torch_and_tf_job,
    torch_and_flax_job,
@ -344,7 +356,7 @@ PIPELINE_TESTS = [
    pipelines_torch_job,
    pipelines_tf_job,
 ]
-
+REPO_UTIL_TESTS = [repo_utils_job]

 def create_circleci_config(folder=None):
    if folder is None:
@ -371,6 +383,10 @@ def create_circleci_config(folder=None):
    example_file = os.path.join(folder, "examples_test_list.txt")
    if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
        jobs.extend(EXAMPLES_TESTS)
+    
+    repo_util_file = os.path.join(folder, "test_repo_utils.txt")
+    if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
+        jobs.extend(REPO_UTIL_TESTS)

    if len(jobs) > 0:
        config = {"version": "2.1"}
--- a/tests/repo_utils/test_tests_fetcher.py
+++ b/tests/repo_utils/test_tests_fetcher.py
@ -0,0 +1,64 @@
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import unittest
+
+from git import Repo
+
+
+git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+sys.path.append(os.path.join(git_repo_path, "utils"))
+
+transformers_path = os.path.join(git_repo_path, "src", "transformers")
+# Tests are run against this specific commit for reproducibility
+# https://github.com/huggingface/transformers/tree/07f6690206e39ed7a4d9dbc58824314f7089bb38
+GIT_TEST_SHA = "07f6690206e39ed7a4d9dbc58824314f7089bb38"
+
+from tests_fetcher import checkout_commit, clean_code, get_module_dependencies  # noqa: E402
+
+
+class CheckDummiesTester(unittest.TestCase):
+    def test_clean_code(self):
+        # Clean code removes all strings in triple quotes
+        self.assertEqual(clean_code('"""\nDocstring\n"""\ncode\n"""Long string"""\ncode\n'), "code\ncode")
+        self.assertEqual(clean_code("'''\nDocstring\n'''\ncode\n'''Long string'''\ncode\n'''"), "code\ncode")
+
+        # Clean code removes all comments
+        self.assertEqual(clean_code("code\n# Comment\ncode"), "code\ncode")
+        self.assertEqual(clean_code("code  # inline comment\ncode"), "code  \ncode")
+
+    def test_checkout_commit(self):
+        repo = Repo(git_repo_path)
+        self.assertNotEqual(repo.head.commit.hexsha, GIT_TEST_SHA)
+        with checkout_commit(repo, GIT_TEST_SHA):
+            self.assertEqual(repo.head.commit.hexsha, GIT_TEST_SHA)
+        self.assertNotEqual(repo.head.commit.hexsha, GIT_TEST_SHA)
+
+    def test_get_module_dependencies(self):
+        bert_module = os.path.join(transformers_path, "models", "bert", "modeling_bert.py")
+        expected_deps = [
+            "activations.py",
+            "modeling_outputs.py",
+            "modeling_utils.py",
+            "pytorch_utils.py",
+            "models/bert/configuration_bert.py",
+        ]
+        expected_deps = set(os.path.join(transformers_path, f) for f in expected_deps)
+        repo = Repo(git_repo_path)
+        with checkout_commit(repo, GIT_TEST_SHA):
+            deps = get_module_dependencies(bert_module)
+        deps = set(os.path.expanduser(f) for f in deps)
+        self.assertEqual(deps, expected_deps)
--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@ -547,6 +547,7 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j
    # Grab the corresponding test files:
    if "setup.py" in impacted_files:
        test_files_to_run = ["tests"]
+        repo_utils_launch = True
    else:
        # Grab the corresponding test files:
        test_files_to_run = []
@ -577,6 +578,12 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j
            for filter in filters:
                filtered_files.extend([f for f in test_files_to_run if f.startswith(filter)])
            test_files_to_run = filtered_files
+        repo_utils_launch = any(f.split(os.path.sep)[1] == "repo_utils" for f in test_files_to_run)
+
+    if repo_utils_launch:
+        repo_util_file = Path(output_file).parent / "test_repo_utils.txt"
+        with open(repo_util_file, "w", encoding="utf-8") as f:
+            f.write("tests/repo_utils")

    print(f"\n### TEST TO RUN ###\n{_print_list(test_files_to_run)}")
    if len(test_files_to_run) > 0:
@ -620,20 +627,29 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, j
                json.dump(test_map, fp, ensure_ascii=False)


-def filter_pipeline_tests(output_file):
+def filter_tests(output_file, filters):
+    """
+    Reads the content of the output file and filters out all the tests in a list of given folders.
+
+    Args:
+        output_file (`str` or `os.PathLike`): The path to the output file of the tests fetcher.
+        filters (`List[str]`): A list of folders to filter.
+    """
    if not os.path.isfile(output_file):
        print("No test file found.")
        return
    with open(output_file, "r", encoding="utf-8") as f:
        test_files = f.read().split(" ")

-    if len(test_files) == 0:
+    if len(test_files) == 0 or test_files == [""]:
        print("No tests to filter.")
        return
+
+    print(test_files)
    if test_files == ["tests"]:
-        test_files = [os.path.join("tests", f) for f in os.listdir("tests") if f not in ["__init__.py", "pipelines"]]
+        test_files = [os.path.join("tests", f) for f in os.listdir("tests") if f not in ["__init__.py"] + filters]
    else:
-        test_files = [f for f in test_files if not f.startswith(os.path.join("tests", "pipelines"))]
+        test_files = [f for f in test_files if f.split(os.path.sep)[1] not in filters]

    with open(output_file, "w", encoding="utf-8") as f:
        f.write(" ".join(test_files))
@ -666,9 +682,9 @@ if __name__ == "__main__":
        help="Only keep the test files matching one of those filters.",
    )
    parser.add_argument(
-        "--filter_pipeline_tests",
+        "--filter_tests",
        action="store_true",
-        help="Will filter the pipeline tests outside of the generated list of tests.",
+        help="Will filter the pipeline/repo utils tests outside of the generated list of tests.",
    )
    parser.add_argument(
        "--print_dependencies_of",
@ -681,8 +697,8 @@ if __name__ == "__main__":
        print_tree_deps_of(args.print_dependencies_of)
    elif args.sanity_check:
        sanity_check()
-    elif args.filter_pipeline_tests:
-        filter_pipeline_tests(args.output_file)
+    elif args.filter_tests:
+        filter_tests(args.output_file, ["pipelines", "repo_utils"])
    else:
        repo = Repo(PATH_TO_TRANFORMERS)

@ -698,6 +714,7 @@ if __name__ == "__main__":
                filters=args.filters,
                json_output_file=args.json_output_file,
            )
+            filter_tests(args.output_file, ["repo_utils"])
        except Exception as e:
            print(f"\nError when trying to grab the relevant tests: {e}\n\nRunning all tests.")
            with open(args.output_file, "w", encoding="utf-8") as f: