diff --git a/.github/actions/filter-test-configs/action.yml b/.github/actions/filter-test-configs/action.yml index 3fb107a17b9..8439a27eb9f 100644 --- a/.github/actions/filter-test-configs/action.yml +++ b/.github/actions/filter-test-configs/action.yml @@ -26,11 +26,17 @@ outputs: description: True if the filtered test configs matrix is empty. False otherwise. value: ${{ steps.filter.outputs.is-test-matrix-empty }} keep-going: - description: True if keep-going label was on PR. + description: True if keep-going label was on PR or [keep-going] in PR body. value: ${{ steps.filter.outputs.keep-going }} reenabled-issues: description: Comma separated list of issue numbers that should correspond to disable test issues that the PR fixes value: ${{ steps.filter.outputs.reenabled-issues }} + ci-verbose-test-logs: + description: True if ci-verbose-test-logs label was on PR or [ci-verbose-test-logs] in PR body. + value: ${{ steps.filter.outputs.ci-verbose-test-logs }} + ci-no-test-timeout: + description: True if ci-no-test-timeout label was on PR or [ci-no-test-timeout] in PR body. + value: ${{ steps.filter.outputs.ci-no-test-timeout }} runs: using: composite diff --git a/.github/scripts/filter_test_configs.py b/.github/scripts/filter_test_configs.py index c25b0f6fe84..e63e3d88677 100755 --- a/.github/scripts/filter_test_configs.py +++ b/.github/scripts/filter_test_configs.py @@ -474,6 +474,10 @@ def get_reenabled_issues(pr_body: str = "") -> List[str]: return parse_reenabled_issues(pr_body) + parse_reenabled_issues(commit_messages) +def check_for_setting(labels: Set[str], body: str, setting: str) -> bool: + return setting in labels or f"[{setting}]" in body + + def perform_misc_tasks( labels: Set[str], test_matrix: Dict[str, List[Any]], job_name: str, pr_body: str ) -> None: @@ -481,7 +485,14 @@ def perform_misc_tasks( In addition to apply the filter logic, the script also does the following misc tasks to set keep-going and is-unstable variables """ - set_output("keep-going", "keep-going" in labels) + set_output("keep-going", check_for_setting(labels, pr_body, "keep-going")) + set_output( + "ci-verbose-test-logs", + check_for_setting(labels, pr_body, "ci-verbose-test-logs"), + ) + set_output( + "ci-no-test-timeout", check_for_setting(labels, pr_body, "ci-no-test-timeout") + ) # Obviously, if the job name includes unstable, then this is an unstable job is_unstable = job_name and IssueType.UNSTABLE.value in job_name diff --git a/.github/scripts/test_filter_test_configs.py b/.github/scripts/test_filter_test_configs.py index 40be20a0995..5c5dfa79d50 100755 --- a/.github/scripts/test_filter_test_configs.py +++ b/.github/scripts/test_filter_test_configs.py @@ -636,55 +636,98 @@ class TestConfigFilter(TestCase): @mock.patch("subprocess.check_output") def test_perform_misc_tasks(self, mocked_subprocess: Any) -> None: + def _gen_expected_string( + keep_going: bool = False, + ci_verbose_test_logs: bool = False, + ci_no_test_timeout: bool = False, + is_unstable: bool = False, + reenabled_issues: str = "", + ) -> str: + return ( + f"keep-going={keep_going}\n" + f"ci-verbose-test-logs={ci_verbose_test_logs}\n" + f"ci-no-test-timeout={ci_no_test_timeout}\n" + f"is-unstable={is_unstable}\n" + f"reenabled-issues={reenabled_issues}\n" + ) + mocked_subprocess.return_value = b"" testcases: List[Dict[str, Any]] = [ { "labels": {}, "test_matrix": '{include: [{config: "default"}]}', "job_name": "A job name", - "expected": "keep-going=False\nis-unstable=False\nreenabled-issues=\n", + "expected": _gen_expected_string(), "description": "No keep-going, no is-unstable", }, { "labels": {"keep-going"}, "test_matrix": '{include: [{config: "default"}]}', "job_name": "A job name", - "expected": "keep-going=True\nis-unstable=False\nreenabled-issues=\n", + "expected": _gen_expected_string(keep_going=True), "description": "Has keep-going, no is-unstable", }, + { + "labels": {}, + "test_matrix": '{include: [{config: "default"}]}', + "job_name": "A job name", + "pr_body": "[keep-going]", + "expected": _gen_expected_string(keep_going=True), + "description": "Keep-going in PR body", + }, + { + "labels": {"ci-verbose-test-logs"}, + "test_matrix": '{include: [{config: "default"}]}', + "job_name": "A job name", + "pr_body": "[ci-no-test-timeout]", + "expected": _gen_expected_string( + ci_verbose_test_logs=True, ci_no_test_timeout=True + ), + "description": "No pipe logs label and no test timeout in PR body", + }, + { + "labels": {"ci-no-test-timeout"}, + "test_matrix": '{include: [{config: "default"}]}', + "job_name": "A job name", + "pr_body": "[ci-verbose-test-logs]", + "expected": _gen_expected_string( + ci_verbose_test_logs=True, ci_no_test_timeout=True + ), + "description": "No pipe logs in PR body and no test timeout in label (same as the above but swapped)", + }, { "labels": {}, "test_matrix": '{include: [{config: "default"}]}', "job_name": None, - "expected": "keep-going=False\nis-unstable=False\nreenabled-issues=\n", + "expected": _gen_expected_string(), "description": "No job name", }, { "labels": {}, "test_matrix": '{include: [{config: "default"}]}', "job_name": "macos-12-py3-arm64 / test (default, 1, 3, macos-m1-stable, unstable)", - "expected": "keep-going=False\nis-unstable=True\nreenabled-issues=\n", + "expected": _gen_expected_string(is_unstable=True), "description": "Unstable job", }, { "labels": {}, "test_matrix": '{include: [{config: "default"}]}', "job_name": "macos-12-py3-arm64 / test (default, 1, 3, macos-m1-stable, unstable)", - "expected": "keep-going=False\nis-unstable=True\nreenabled-issues=\n", + "expected": _gen_expected_string(is_unstable=True), "description": "Unstable job", }, { "labels": {}, "test_matrix": '{include: [{config: "1", unstable: "unstable"}, {config: "2", unstable: "unstable"}]}', "job_name": "macos-12-py3-arm64 / build", - "expected": "keep-going=False\nis-unstable=True\nreenabled-issues=\n", + "expected": _gen_expected_string(is_unstable=True), "description": "All configs are unstable", }, { "labels": {}, "test_matrix": '{include: [{config: "1", unstable: "unstable"}, {config: "2"}]}', "job_name": "macos-12-py3-arm64 / build", - "expected": "keep-going=False\nis-unstable=False\nreenabled-issues=\n", + "expected": _gen_expected_string(is_unstable=False), "description": "Only mark some configs as unstable", }, { @@ -692,7 +735,7 @@ class TestConfigFilter(TestCase): "test_matrix": '{include: [{config: "default"}]}', "job_name": "A job name", "pr_body": "resolves #123 fixes #234", - "expected": "keep-going=False\nis-unstable=False\nreenabled-issues=123,234\n", + "expected": _gen_expected_string(reenabled_issues="123,234"), "description": "Reenable some issues", }, ] diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml index 1d14950549a..3e0962f29c5 100644 --- a/.github/workflows/_linux-test.yml +++ b/.github/workflows/_linux-test.yml @@ -169,6 +169,8 @@ jobs: NUM_TEST_SHARDS: ${{ matrix.num_shards }} REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }} CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }} + VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }} + NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }} SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }} SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }} @@ -218,6 +220,8 @@ jobs: -e NUM_TEST_SHARDS \ -e REENABLED_ISSUES \ -e CONTINUE_THROUGH_ERROR \ + -e VERBOSE_TEST_LOGS \ + -e NO_TEST_TIMEOUT \ -e PR_LABELS \ -e MAX_JOBS="$(nproc --ignore=2)" \ -e SCCACHE_BUCKET \ diff --git a/.github/workflows/_mac-test-mps.yml b/.github/workflows/_mac-test-mps.yml index b10c1f84bd7..33302a16ecc 100644 --- a/.github/workflows/_mac-test-mps.yml +++ b/.github/workflows/_mac-test-mps.yml @@ -34,6 +34,8 @@ jobs: test-matrix: ${{ steps.filter.outputs.test-matrix }} is-test-matrix-empty: ${{ steps.filter.outputs.is-test-matrix-empty }} keep-going: ${{ steps.filter.outputs.keep-going }} + ci-verbose-test-logs: ${{ steps.filter.outputs.ci-verbose-test-logs }} + ci-no-test-timeout: ${{ steps.filter.outputs.ci-no-test-timeout }} reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }} steps: - name: Checkout PyTorch @@ -95,6 +97,8 @@ jobs: PY_VERS: 3.9 PR_BODY: ${{ github.event.pull_request.body }} CONTINUE_THROUGH_ERROR: ${{ needs.filter.outputs.keep-going }} + VERBOSE_TEST_LOGS: ${{ needs.filter.outputs.ci-verbose-test-logs }} + NO_TEST_TIMEOUT: ${{ needs.filter.outputs.ci-no-test-timeout }} PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }} run: | diff --git a/.github/workflows/_mac-test.yml b/.github/workflows/_mac-test.yml index 4848a566f15..92728df26d4 100644 --- a/.github/workflows/_mac-test.yml +++ b/.github/workflows/_mac-test.yml @@ -148,6 +148,8 @@ jobs: PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }} PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }} CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }} + VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }} + NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }} PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_WORKFLOW: ${{ github.workflow }} diff --git a/.github/workflows/_rocm-test.yml b/.github/workflows/_rocm-test.yml index 5f4f8aafe6a..f62f2a38782 100644 --- a/.github/workflows/_rocm-test.yml +++ b/.github/workflows/_rocm-test.yml @@ -148,6 +148,8 @@ jobs: BRANCH: ${{ steps.parse-ref.outputs.branch }} SHA1: ${{ github.event.pull_request.head.sha || github.sha }} CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }} + VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }} + NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }} TEST_CONFIG: ${{ matrix.config }} SHARD_NUMBER: ${{ matrix.shard }} NUM_TEST_SHARDS: ${{ matrix.num_shards }} @@ -196,6 +198,8 @@ jobs: -e NUM_TEST_SHARDS \ -e REENABLED_ISSUES \ -e CONTINUE_THROUGH_ERROR \ + -e VERBOSE_TEST_LOGS \ + -e NO_TEST_TIMEOUT \ -e MAX_JOBS="$(nproc --ignore=2)" \ -e SCCACHE_BUCKET \ -e XLA_CLANG_CACHE_S3_BUCKET_NAME \ diff --git a/.github/workflows/_win-test.yml b/.github/workflows/_win-test.yml index ebfe4211b34..5fc8da283d2 100644 --- a/.github/workflows/_win-test.yml +++ b/.github/workflows/_win-test.yml @@ -140,6 +140,8 @@ jobs: INSTALL_WINDOWS_SDK: 1 PYTHON_VERSION: 3.8 CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }} + VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }} + NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }} VC_PRODUCT: "BuildTools" VC_VERSION: "" VS_VERSION: "16.8.6" diff --git a/.github/workflows/_xpu-test.yml b/.github/workflows/_xpu-test.yml index a3f05b09522..915e8f6dabb 100644 --- a/.github/workflows/_xpu-test.yml +++ b/.github/workflows/_xpu-test.yml @@ -143,6 +143,8 @@ jobs: PYTORCH_RETRY_TEST_CASES: 1 PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1 CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }} + VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }} + NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }} TEST_CONFIG: ${{ matrix.config }} SHARD_NUMBER: ${{ matrix.shard }} NUM_TEST_SHARDS: ${{ matrix.num_shards }} @@ -185,6 +187,8 @@ jobs: -e PYTORCH_RETRY_TEST_CASES \ -e PYTORCH_OVERRIDE_FLAKY_SIGNAL \ -e CONTINUE_THROUGH_ERROR \ + -e VERBOSE_TEST_LOGS \ + -e NO_TEST_TIMEOUT \ -e MAX_JOBS="$(nproc --ignore=2)" \ -e SCCACHE_BUCKET \ -e XLA_CLANG_CACHE_S3_BUCKET_NAME \ diff --git a/test/conftest.py b/test/conftest.py index c7a1570f0bf..6253ca7c0ba 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -129,6 +129,24 @@ class _NodeReporterReruns(_NodeReporter): tag.text = bin_xml_escape(content) self.append(tag) + def append_skipped(self, report: TestReport) -> None: + # Referenced from the below + # https://github.com/pytest-dev/pytest/blob/2178ee86d7c1ee93748cfb46540a6e40b4761f2d/src/_pytest/junitxml.py#L236C6-L236C6 + # Modified to escape characters not supported by xml in the skip reason. Everything else should be the same. + if hasattr(report, "wasxfail"): + # Super here instead of the actual code so we can reduce possible divergence + super().append_skipped(report) + else: + assert isinstance(report.longrepr, tuple) + filename, lineno, skipreason = report.longrepr + if skipreason.startswith("Skipped: "): + skipreason = skipreason[9:] + details = f"{filename}:{lineno}: {skipreason}" + + skipped = ET.Element("skipped", type="pytest.skip", message=bin_xml_escape(skipreason)) + skipped.text = bin_xml_escape(details) + self.append(skipped) + self.write_captured_output(report) class LogXMLReruns(LogXML): def __init__(self, *args, **kwargs): diff --git a/test/run_test.py b/test/run_test.py index b091413415c..eba5363e5bf 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -605,7 +605,7 @@ def run_test( argv = [test_file + ".py"] + unittest_args os.makedirs(REPO_ROOT / "test" / "test-reports", exist_ok=True) - if IS_CI: + if options.pipe_logs: log_fd, log_path = tempfile.mkstemp( dir=REPO_ROOT / "test" / "test-reports", prefix=f"{sanitize_file_name(str(test_module))}_", @@ -619,7 +619,9 @@ def run_test( "BUILD_ENVRIONMENT", "" ) timeout = ( - THRESHOLD * 6 + None + if not options.enable_timeout + else THRESHOLD * 6 if is_slow else THRESHOLD * 3 if should_retry @@ -631,7 +633,7 @@ def run_test( with ExitStack() as stack: output = None - if IS_CI: + if options.pipe_logs: output = stack.enter_context(open(log_path, "w")) if should_retry: @@ -664,7 +666,7 @@ def run_test( # comes up in the future. ret_code = 0 if ret_code == 5 or ret_code == 4 else ret_code - if IS_CI: + if options.pipe_logs: handle_log_file( test_module, log_path, failed=(ret_code != 0), was_rerun=was_rerun ) @@ -1249,6 +1251,18 @@ def parse_args(): help="Runs the full test suite despite one of the tests failing", default=strtobool(os.environ.get("CONTINUE_THROUGH_ERROR", "False")), ) + parser.add_argument( + "--pipe-logs", + action="store_true", + help="Print logs to output file while running tests. True if in CI and env var is not set", + default=IS_CI and not strtobool(os.environ.get("VERBOSE_TEST_LOGS", "False")), + ) + parser.add_argument( + "--enable-timeout", + action="store_true", + help="Set a timeout based on the test times json file. Only works if there are test times available", + default=IS_CI and not strtobool(os.environ.get("NO_TEST_TIMEOUT", "False")), + ) parser.add_argument( "additional_unittest_args", nargs="*",