From 573eaf12258df8e87434ffa19a42b04fb873c6dc Mon Sep 17 00:00:00 2001 From: Huy Do Date: Thu, 17 Nov 2022 03:36:56 +0000 Subject: [PATCH] Analyze and upload disabled tests rerun to S3 (#89083) Analyze and upload disabled tests rerun to S3. Note that this only picks up `test-reports` from `rerun_disable_tests` workflows. ### Testing Running the script manually `python -m tools.stats.check_disabled_tests --workflow-run-id 3473068035 --workflow-run-attempt 1 --repo pytorch/pytorch` and see the files successfully uploaded to s3://ossci-raw-job-status/rerun_disabled_tests/3473068035/1 Rockset collection created https://console.rockset.com/collections/details/commons.rerun_disabled_tests Pull Request resolved: https://github.com/pytorch/pytorch/pull/89083 Approved by: https://github.com/clee2000 --- .github/workflows/upload-test-stats.yml | 11 + tools/stats/check_disabled_tests.py | 290 ++++++++++++++++++++++++ 2 files changed, 301 insertions(+) create mode 100644 tools/stats/check_disabled_tests.py diff --git a/.github/workflows/upload-test-stats.yml b/.github/workflows/upload-test-stats.yml index 27289983e27..3f3db80670d 100644 --- a/.github/workflows/upload-test-stats.yml +++ b/.github/workflows/upload-test-stats.yml @@ -72,6 +72,17 @@ jobs: # anything on GitHub to upload. The command should return right away python3 -m tools.stats.upload_artifacts --workflow-run-id "${WORKFLOW_RUN_ID}" --workflow-run-attempt "${WORKFLOW_RUN_ATTEMPT}" --repo "${REPO_FULLNAME}" + - name: Analyze disabled tests rerun + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + WORKFLOW_ARTIFACTS_URL: ${{ github.event.workflow_run.artifacts_url }} + WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} + WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }} + REPO_FULLNAME: ${{ github.event.workflow_run.repository.full_name }} + run: | + # Analyze the results from disable tests rerun and upload them to S3 + python3 -m tools.stats.check_disabled_tests --workflow-run-id "${WORKFLOW_RUN_ID}" --workflow-run-attempt "${WORKFLOW_RUN_ATTEMPT}" --repo "${REPO_FULLNAME}" + check-api-rate: if: ${{ always() }} runs-on: [self-hosted, linux.2xlarge] diff --git a/tools/stats/check_disabled_tests.py b/tools/stats/check_disabled_tests.py new file mode 100644 index 00000000000..75c4f236ef2 --- /dev/null +++ b/tools/stats/check_disabled_tests.py @@ -0,0 +1,290 @@ +import argparse +import json +import os +import xml.etree.ElementTree as ET +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Any, Dict, Generator, Tuple + +from tools.stats.upload_stats_lib import ( + download_gha_artifacts, + download_s3_artifacts, + unzip, + upload_to_s3, +) +from tools.stats.upload_test_stats import process_xml_element + +TESTCASE_TAG = "testcase" +TARGET_WORKFLOW = "--rerun-disabled-tests" +SEPARATOR = ";" + + +def is_rerun_disabled_tests(root: ET.ElementTree) -> bool: + """ + Check if the test report is coming from rerun_disabled_tests workflow + """ + skipped = root.find(".//*skipped") + # Need to check against None here, if not skipped doesn't work as expected + if skipped is None: + return False + + message = skipped.attrib.get("message", "") + return TARGET_WORKFLOW in message or "num_red" in message + + +def process_report( + report: Path, +) -> Dict[str, Dict[str, int]]: + """ + Return a list of disabled tests that should be re-enabled and those that are still + flaky (failed or skipped) + """ + root = ET.parse(report) + + # All rerun tests from a report are grouped here: + # + # * Success test should be re-enable if it's green after rerunning in all platforms + # where it is currently disabled + # * Failures from pytest because pytest-flakefinder is used to run the same test + # multiple times, some could fails + # * Skipped tests from unittest + # + # We want to keep track of how many times the test fails (num_red) or passes (num_green) + all_tests: Dict[str, Dict[str, int]] = {} + + if not is_rerun_disabled_tests(root): + return all_tests + + for test_case in root.iter(TESTCASE_TAG): + parsed_test_case = process_xml_element(test_case) + + # Under --rerun-disabled-tests mode, a test is skipped when: + # * it's skipped explicitly inside PyToch code + # * it's skipped because it's a normal enabled test + # * or it's falky (num_red > 0 and num_green > 0) + # * or it's failing (num_red > 0 and num_green == 0) + # + # We care only about the latter two here + skipped = parsed_test_case.get("skipped", None) + if skipped and "num_red" not in skipped.get("message", ""): + continue + + name = parsed_test_case.get("name", "") + classname = parsed_test_case.get("classname", "") + filename = parsed_test_case.get("file", "") + + if not name or not classname or not filename: + continue + + # Check if the test is a failure + failure = parsed_test_case.get("failure", None) + + disabled_test_id = SEPARATOR.join([name, classname, filename]) + if disabled_test_id not in all_tests: + all_tests[disabled_test_id] = { + "num_green": 0, + "num_red": 0, + } + + # Under --rerun-disabled-tests mode, if a test is not skipped or failed, it's + # counted as a success. Otherwise, it's still flaky or failing + if skipped: + try: + stats = json.loads(skipped.get("message", "")) + except json.JSONDecodeError: + stats = {} + + all_tests[disabled_test_id]["num_green"] += stats.get("num_green", 0) + all_tests[disabled_test_id]["num_red"] += stats.get("num_red", 0) + elif failure: + # As a failure, increase the failure count + all_tests[disabled_test_id]["num_red"] += 1 + else: + all_tests[disabled_test_id]["num_green"] += 1 + + return all_tests + + +def get_test_reports( + repo: str, workflow_run_id: int, workflow_run_attempt: int +) -> Generator[Path, None, None]: + """ + Gather all the test reports from S3 and GHA. It is currently not possible to guess which + test reports are from rerun_disabled_tests workflow because the name doesn't include the + test config. So, all reports will need to be downloaded and examined + """ + with TemporaryDirectory() as temp_dir: + print("Using temporary directory:", temp_dir) + os.chdir(temp_dir) + + artifact_paths = download_s3_artifacts( + "test-reports", workflow_run_id, workflow_run_attempt + ) + for path in artifact_paths: + unzip(path) + + artifact_paths = download_gha_artifacts( + "test-report", workflow_run_id, workflow_run_attempt + ) + for path in artifact_paths: + unzip(path) + + for report in Path(".").glob("**/*.xml"): + yield report + + +def get_disabled_test_name(test_id: str) -> Tuple[str, str, str, str]: + """ + Follow flaky bot convention here, if that changes, this will also need to be updated + """ + name, classname, filename = test_id.split(SEPARATOR) + return f"{name} (__main__.{classname})", name, classname, filename + + +def prepare_record( + workflow_id: int, + workflow_run_attempt: int, + name: str, + classname: str, + filename: str, + flaky: bool, + num_red: int = 0, + num_green: int = 0, +) -> Tuple[Any, Dict[str, Any]]: + """ + Prepare the record to save onto S3 + """ + key = ( + workflow_id, + workflow_run_attempt, + name, + classname, + filename, + ) + + record = { + "workflow_id": workflow_id, + "workflow_run_attempt": workflow_run_attempt, + "name": name, + "classname": classname, + "filename": filename, + "flaky": flaky, + "num_green": num_green, + "num_red": num_red, + } + + return key, record + + +def save_results( + workflow_id: int, + workflow_run_attempt: int, + all_tests: Dict[str, Dict[str, int]], +) -> None: + """ + Save the result to S3, so it can go to Rockset + """ + should_be_enabled_tests = { + name: stats + for name, stats in all_tests.items() + if "num_green" in stats + and stats["num_green"] + and "num_red" in stats + and stats["num_red"] == 0 + } + still_flaky_tests = { + name: stats + for name, stats in all_tests.items() + if name not in should_be_enabled_tests + } + + records = {} + for test_id, stats in all_tests.items(): + num_green = stats.get("num_green", 0) + num_red = stats.get("num_red", 0) + disabled_test_name, name, classname, filename = get_disabled_test_name(test_id) + + key, record = prepare_record( + workflow_id=workflow_id, + workflow_run_attempt=workflow_run_attempt, + name=name, + classname=classname, + filename=filename, + flaky=test_id in still_flaky_tests, + num_green=num_green, + num_red=num_red, + ) + records[key] = record + + # Log the results + print(f"The following {len(should_be_enabled_tests)} tests should be re-enabled:") + for test_id, stats in should_be_enabled_tests.items(): + disabled_test_name, name, classname, filename = get_disabled_test_name(test_id) + print(f" {disabled_test_name} from {filename}") + + print(f"The following {len(still_flaky_tests)} are still flaky:") + for test_id, stats in still_flaky_tests.items(): + num_green = stats.get("num_green", 0) + num_red = stats.get("num_red", 0) + + disabled_test_name, name, classname, filename = get_disabled_test_name(test_id) + print( + f" {disabled_test_name} from {filename}, failing {num_red}/{num_red + num_green}" + ) + + upload_to_s3( + workflow_id, + workflow_run_attempt, + "rerun_disabled_tests", + list(records.values()), + ) + + +def main(repo: str, workflow_run_id: int, workflow_run_attempt: int) -> None: + """ + Find the list of all disabled tests that should be re-enabled + """ + # Aggregated across all jobs + all_tests: Dict[str, Dict[str, int]] = {} + + for report in get_test_reports( + args.repo, args.workflow_run_id, args.workflow_run_attempt + ): + tests = process_report(report) + for name, stats in tests.items(): + if name not in all_tests: + all_tests[name] = stats.copy() + else: + all_tests[name]["num_green"] += stats.get("num_green", 0) + all_tests[name]["num_red"] += stats.get("num_red", 0) + + save_results( + workflow_run_id, + workflow_run_attempt, + all_tests, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Upload test artifacts from GHA to S3") + parser.add_argument( + "--workflow-run-id", + type=int, + required=True, + help="id of the workflow to get artifacts from", + ) + parser.add_argument( + "--workflow-run-attempt", + type=int, + required=True, + help="which retry of the workflow this is", + ) + parser.add_argument( + "--repo", + type=str, + required=True, + help="which GitHub repo this workflow run belongs to", + ) + + args = parser.parse_args() + main(args.repo, args.workflow_run_id, args.workflow_run_attempt)