Aggeregate test summary files in CircleCI workflow runs (#34989)

* fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * try 1 * fix * fix * fix * update * fix * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2026-05-14 20:58:08 +00:00 · 2024-12-16 11:06:17 +01:00 · 2024-12-16 11:06:17 +01:00 · 66531a1ec3
commit 66531a1ec3
parent 5615a39369
3 changed files with 110 additions and 5 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -58,14 +58,14 @@ jobs:
                name: "Prepare pipeline parameters"
                command: |
                    python utils/process_test_artifacts.py 
-            
+
            # To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
            # Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
            # We used:

            # https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
            # We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
-                
+
            - store_artifacts:
                path: test_preparation/transformed_artifacts.json
            - store_artifacts:
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@ -40,9 +40,22 @@ class EmptyJob:
    job_name = "empty"

    def to_dict(self):
+        steps = [{"run": 'ls -la'}]
+        if self.job_name == "collection_job":
+            steps.extend(
+                [
+                    "checkout",
+                    {"run": "pip install requests || true"},
+                    {"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
+                    {"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
+                    {"store_artifacts": {"path": "outputs"}},
+                    {"run": 'echo "All required jobs have now completed"'},
+                ]
+            )
+
        return {
            "docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
-            "steps":["checkout"],
+            "steps": steps,
        }


@ -352,6 +365,7 @@ REPO_UTIL_TESTS = [repo_utils_job]
 DOC_TESTS = [doc_test_job]
 ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job]  # fmt: skip

+
 def create_circleci_config(folder=None):
    if folder is None:
        folder = os.getcwd()
@ -361,7 +375,13 @@ def create_circleci_config(folder=None):

    if len(jobs) == 0:
        jobs = [EmptyJob()]
-    print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
+    else:
+        print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
+        # Add a job waiting all the test jobs and aggregate their test summary files at the end
+        collection_job = EmptyJob()
+        collection_job.job_name = "collection_job"
+        jobs = [collection_job] + jobs
+
    config = {
        "version": "2.1",
        "parameters": {
@ -371,7 +391,7 @@ def create_circleci_config(folder=None):
            **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
            **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
        },
-        "jobs" : {j.job_name: j.to_dict() for j in jobs}
+        "jobs": {j.job_name: j.to_dict() for j in jobs}
    }
    if "CIRCLE_TOKEN" in os.environ:
        # For private forked repo. (e.g. new model addition)
--- a/utils/process_circleci_workflow_test_reports.py
+++ b/utils/process_circleci_workflow_test_reports.py
@ -0,0 +1,85 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import json
+import os
+
+import requests
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--workflow_id", type=str, required=True)
+    args = parser.parse_args()
+    workflow_id = args.workflow_id
+
+    r = requests.get(
+        f"https://circleci.com/api/v2/workflow/{workflow_id}/job",
+        headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")},
+    )
+    jobs = r.json()["items"]
+
+    os.makedirs("outputs", exist_ok=True)
+
+    workflow_summary = {}
+    # for each job, download artifacts
+    for job in jobs:
+        project_slug = job["project_slug"]
+        if job["name"].startswith(("tests_", "examples_", "pipelines_")):
+            url = f'https://circleci.com/api/v2/project/{project_slug}/{job["job_number"]}/artifacts'
+            r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
+            job_artifacts = r.json()["items"]
+
+            os.makedirs(job["name"], exist_ok=True)
+            os.makedirs(f'outputs/{job["name"]}', exist_ok=True)
+
+            job_test_summaries = {}
+            for artifact in job_artifacts:
+                if artifact["path"].startswith("reports/") and artifact["path"].endswith("/summary_short.txt"):
+                    node_index = artifact["node_index"]
+                    url = artifact["url"]
+                    r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
+                    test_summary = r.text
+                    job_test_summaries[node_index] = test_summary
+
+            summary = {}
+            for node_index, node_test_summary in job_test_summaries.items():
+                for line in node_test_summary.splitlines():
+                    if line.startswith("PASSED "):
+                        test = line[len("PASSED ") :]
+                        summary[test] = "passed"
+                    elif line.startswith("FAILED "):
+                        test = line[len("FAILED ") :].split()[0]
+                        summary[test] = "failed"
+            # failed before passed
+            summary = dict(sorted(summary.items(), key=lambda x: (x[1], x[0])))
+            workflow_summary[job["name"]] = summary
+
+            # collected version
+            with open(f'outputs/{job["name"]}/test_summary.json', "w") as fp:
+                json.dump(summary, fp, indent=4)
+
+    new_workflow_summary = {}
+    for job_name, job_summary in workflow_summary.items():
+        for test, status in job_summary.items():
+            if test not in new_workflow_summary:
+                new_workflow_summary[test] = {}
+            new_workflow_summary[test][job_name] = status
+
+    for test, result in new_workflow_summary.items():
+        new_workflow_summary[test] = dict(sorted(result.items()))
+    new_workflow_summary = dict(sorted(new_workflow_summary.items()))
+
+    with open("outputs/test_summary.json", "w") as fp:
+        json.dump(new_workflow_summary, fp, indent=4)