Aggeregate test summary files in CircleCI workflow runs (#34989)

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* try 1

* fix

* fix

* fix

* update

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar 2024-12-16 11:06:17 +01:00 committed by GitHub
parent 5615a39369
commit 66531a1ec3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 110 additions and 5 deletions

View file

@ -58,14 +58,14 @@ jobs:
name: "Prepare pipeline parameters"
command: |
python utils/process_test_artifacts.py
# To avoid too long generated_config.yaml on the continuation orb, we pass the links to the artifacts as parameters.
# Otherwise the list of tests was just too big. Explicit is good but for that it was a limitation.
# We used:
# https://circleci.com/docs/api/v2/index.html#operation/getJobArtifacts : to get the job artifacts
# We could not pass a nested dict, which is why we create the test_file_... parameters for every single job
- store_artifacts:
path: test_preparation/transformed_artifacts.json
- store_artifacts:

View file

@ -40,9 +40,22 @@ class EmptyJob:
job_name = "empty"
def to_dict(self):
steps = [{"run": 'ls -la'}]
if self.job_name == "collection_job":
steps.extend(
[
"checkout",
{"run": "pip install requests || true"},
{"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""},
{"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'},
{"store_artifacts": {"path": "outputs"}},
{"run": 'echo "All required jobs have now completed"'},
]
)
return {
"docker": copy.deepcopy(DEFAULT_DOCKER_IMAGE),
"steps":["checkout"],
"steps": steps,
}
@ -352,6 +365,7 @@ REPO_UTIL_TESTS = [repo_utils_job]
DOC_TESTS = [doc_test_job]
ALL_TESTS = REGULAR_TESTS + EXAMPLES_TESTS + PIPELINE_TESTS + REPO_UTIL_TESTS + DOC_TESTS + [custom_tokenizers_job] + [exotic_models_job] # fmt: skip
def create_circleci_config(folder=None):
if folder is None:
folder = os.getcwd()
@ -361,7 +375,13 @@ def create_circleci_config(folder=None):
if len(jobs) == 0:
jobs = [EmptyJob()]
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
else:
print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs})
# Add a job waiting all the test jobs and aggregate their test summary files at the end
collection_job = EmptyJob()
collection_job.job_name = "collection_job"
jobs = [collection_job] + jobs
config = {
"version": "2.1",
"parameters": {
@ -371,7 +391,7 @@ def create_circleci_config(folder=None):
**{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs},
**{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs},
},
"jobs" : {j.job_name: j.to_dict() for j in jobs}
"jobs": {j.job_name: j.to_dict() for j in jobs}
}
if "CIRCLE_TOKEN" in os.environ:
# For private forked repo. (e.g. new model addition)

View file

@ -0,0 +1,85 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
import os
import requests
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--workflow_id", type=str, required=True)
args = parser.parse_args()
workflow_id = args.workflow_id
r = requests.get(
f"https://circleci.com/api/v2/workflow/{workflow_id}/job",
headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")},
)
jobs = r.json()["items"]
os.makedirs("outputs", exist_ok=True)
workflow_summary = {}
# for each job, download artifacts
for job in jobs:
project_slug = job["project_slug"]
if job["name"].startswith(("tests_", "examples_", "pipelines_")):
url = f'https://circleci.com/api/v2/project/{project_slug}/{job["job_number"]}/artifacts'
r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
job_artifacts = r.json()["items"]
os.makedirs(job["name"], exist_ok=True)
os.makedirs(f'outputs/{job["name"]}', exist_ok=True)
job_test_summaries = {}
for artifact in job_artifacts:
if artifact["path"].startswith("reports/") and artifact["path"].endswith("/summary_short.txt"):
node_index = artifact["node_index"]
url = artifact["url"]
r = requests.get(url, headers={"Circle-Token": os.environ.get("CIRCLE_TOKEN", "")})
test_summary = r.text
job_test_summaries[node_index] = test_summary
summary = {}
for node_index, node_test_summary in job_test_summaries.items():
for line in node_test_summary.splitlines():
if line.startswith("PASSED "):
test = line[len("PASSED ") :]
summary[test] = "passed"
elif line.startswith("FAILED "):
test = line[len("FAILED ") :].split()[0]
summary[test] = "failed"
# failed before passed
summary = dict(sorted(summary.items(), key=lambda x: (x[1], x[0])))
workflow_summary[job["name"]] = summary
# collected version
with open(f'outputs/{job["name"]}/test_summary.json', "w") as fp:
json.dump(summary, fp, indent=4)
new_workflow_summary = {}
for job_name, job_summary in workflow_summary.items():
for test, status in job_summary.items():
if test not in new_workflow_summary:
new_workflow_summary[test] = {}
new_workflow_summary[test][job_name] = status
for test, result in new_workflow_summary.items():
new_workflow_summary[test] = dict(sorted(result.items()))
new_workflow_summary = dict(sorted(new_workflow_summary.items()))
with open("outputs/test_summary.json", "w") as fp:
json.dump(new_workflow_summary, fp, indent=4)