mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Various CI settings (#117668)
Test [ci-verbose-test-logs] (this worked, the test logs printing while running and interleaved and are really long) Settings for no timeout (step timeout still applies, only gets rid of ~30 min timeout for shard of test file) and no piping logs/extra verbose test logs (good for debugging deadlocks but results in very long and possibly interleaved logs). Also allows these to be set via pr body if the label name is in brackets ex [label name] or the test above. Pull Request resolved: https://github.com/pytorch/pytorch/pull/117668 Approved by: https://github.com/huydhn
This commit is contained in:
parent
8c167f9fc3
commit
de9ddd19a5
11 changed files with 126 additions and 14 deletions
|
|
@ -26,11 +26,17 @@ outputs:
|
|||
description: True if the filtered test configs matrix is empty. False otherwise.
|
||||
value: ${{ steps.filter.outputs.is-test-matrix-empty }}
|
||||
keep-going:
|
||||
description: True if keep-going label was on PR.
|
||||
description: True if keep-going label was on PR or [keep-going] in PR body.
|
||||
value: ${{ steps.filter.outputs.keep-going }}
|
||||
reenabled-issues:
|
||||
description: Comma separated list of issue numbers that should correspond to disable test issues that the PR fixes
|
||||
value: ${{ steps.filter.outputs.reenabled-issues }}
|
||||
ci-verbose-test-logs:
|
||||
description: True if ci-verbose-test-logs label was on PR or [ci-verbose-test-logs] in PR body.
|
||||
value: ${{ steps.filter.outputs.ci-verbose-test-logs }}
|
||||
ci-no-test-timeout:
|
||||
description: True if ci-no-test-timeout label was on PR or [ci-no-test-timeout] in PR body.
|
||||
value: ${{ steps.filter.outputs.ci-no-test-timeout }}
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
|
|
|
|||
13
.github/scripts/filter_test_configs.py
vendored
13
.github/scripts/filter_test_configs.py
vendored
|
|
@ -474,6 +474,10 @@ def get_reenabled_issues(pr_body: str = "") -> List[str]:
|
|||
return parse_reenabled_issues(pr_body) + parse_reenabled_issues(commit_messages)
|
||||
|
||||
|
||||
def check_for_setting(labels: Set[str], body: str, setting: str) -> bool:
|
||||
return setting in labels or f"[{setting}]" in body
|
||||
|
||||
|
||||
def perform_misc_tasks(
|
||||
labels: Set[str], test_matrix: Dict[str, List[Any]], job_name: str, pr_body: str
|
||||
) -> None:
|
||||
|
|
@ -481,7 +485,14 @@ def perform_misc_tasks(
|
|||
In addition to apply the filter logic, the script also does the following
|
||||
misc tasks to set keep-going and is-unstable variables
|
||||
"""
|
||||
set_output("keep-going", "keep-going" in labels)
|
||||
set_output("keep-going", check_for_setting(labels, pr_body, "keep-going"))
|
||||
set_output(
|
||||
"ci-verbose-test-logs",
|
||||
check_for_setting(labels, pr_body, "ci-verbose-test-logs"),
|
||||
)
|
||||
set_output(
|
||||
"ci-no-test-timeout", check_for_setting(labels, pr_body, "ci-no-test-timeout")
|
||||
)
|
||||
|
||||
# Obviously, if the job name includes unstable, then this is an unstable job
|
||||
is_unstable = job_name and IssueType.UNSTABLE.value in job_name
|
||||
|
|
|
|||
59
.github/scripts/test_filter_test_configs.py
vendored
59
.github/scripts/test_filter_test_configs.py
vendored
|
|
@ -636,55 +636,98 @@ class TestConfigFilter(TestCase):
|
|||
|
||||
@mock.patch("subprocess.check_output")
|
||||
def test_perform_misc_tasks(self, mocked_subprocess: Any) -> None:
|
||||
def _gen_expected_string(
|
||||
keep_going: bool = False,
|
||||
ci_verbose_test_logs: bool = False,
|
||||
ci_no_test_timeout: bool = False,
|
||||
is_unstable: bool = False,
|
||||
reenabled_issues: str = "",
|
||||
) -> str:
|
||||
return (
|
||||
f"keep-going={keep_going}\n"
|
||||
f"ci-verbose-test-logs={ci_verbose_test_logs}\n"
|
||||
f"ci-no-test-timeout={ci_no_test_timeout}\n"
|
||||
f"is-unstable={is_unstable}\n"
|
||||
f"reenabled-issues={reenabled_issues}\n"
|
||||
)
|
||||
|
||||
mocked_subprocess.return_value = b""
|
||||
testcases: List[Dict[str, Any]] = [
|
||||
{
|
||||
"labels": {},
|
||||
"test_matrix": '{include: [{config: "default"}]}',
|
||||
"job_name": "A job name",
|
||||
"expected": "keep-going=False\nis-unstable=False\nreenabled-issues=\n",
|
||||
"expected": _gen_expected_string(),
|
||||
"description": "No keep-going, no is-unstable",
|
||||
},
|
||||
{
|
||||
"labels": {"keep-going"},
|
||||
"test_matrix": '{include: [{config: "default"}]}',
|
||||
"job_name": "A job name",
|
||||
"expected": "keep-going=True\nis-unstable=False\nreenabled-issues=\n",
|
||||
"expected": _gen_expected_string(keep_going=True),
|
||||
"description": "Has keep-going, no is-unstable",
|
||||
},
|
||||
{
|
||||
"labels": {},
|
||||
"test_matrix": '{include: [{config: "default"}]}',
|
||||
"job_name": "A job name",
|
||||
"pr_body": "[keep-going]",
|
||||
"expected": _gen_expected_string(keep_going=True),
|
||||
"description": "Keep-going in PR body",
|
||||
},
|
||||
{
|
||||
"labels": {"ci-verbose-test-logs"},
|
||||
"test_matrix": '{include: [{config: "default"}]}',
|
||||
"job_name": "A job name",
|
||||
"pr_body": "[ci-no-test-timeout]",
|
||||
"expected": _gen_expected_string(
|
||||
ci_verbose_test_logs=True, ci_no_test_timeout=True
|
||||
),
|
||||
"description": "No pipe logs label and no test timeout in PR body",
|
||||
},
|
||||
{
|
||||
"labels": {"ci-no-test-timeout"},
|
||||
"test_matrix": '{include: [{config: "default"}]}',
|
||||
"job_name": "A job name",
|
||||
"pr_body": "[ci-verbose-test-logs]",
|
||||
"expected": _gen_expected_string(
|
||||
ci_verbose_test_logs=True, ci_no_test_timeout=True
|
||||
),
|
||||
"description": "No pipe logs in PR body and no test timeout in label (same as the above but swapped)",
|
||||
},
|
||||
{
|
||||
"labels": {},
|
||||
"test_matrix": '{include: [{config: "default"}]}',
|
||||
"job_name": None,
|
||||
"expected": "keep-going=False\nis-unstable=False\nreenabled-issues=\n",
|
||||
"expected": _gen_expected_string(),
|
||||
"description": "No job name",
|
||||
},
|
||||
{
|
||||
"labels": {},
|
||||
"test_matrix": '{include: [{config: "default"}]}',
|
||||
"job_name": "macos-12-py3-arm64 / test (default, 1, 3, macos-m1-stable, unstable)",
|
||||
"expected": "keep-going=False\nis-unstable=True\nreenabled-issues=\n",
|
||||
"expected": _gen_expected_string(is_unstable=True),
|
||||
"description": "Unstable job",
|
||||
},
|
||||
{
|
||||
"labels": {},
|
||||
"test_matrix": '{include: [{config: "default"}]}',
|
||||
"job_name": "macos-12-py3-arm64 / test (default, 1, 3, macos-m1-stable, unstable)",
|
||||
"expected": "keep-going=False\nis-unstable=True\nreenabled-issues=\n",
|
||||
"expected": _gen_expected_string(is_unstable=True),
|
||||
"description": "Unstable job",
|
||||
},
|
||||
{
|
||||
"labels": {},
|
||||
"test_matrix": '{include: [{config: "1", unstable: "unstable"}, {config: "2", unstable: "unstable"}]}',
|
||||
"job_name": "macos-12-py3-arm64 / build",
|
||||
"expected": "keep-going=False\nis-unstable=True\nreenabled-issues=\n",
|
||||
"expected": _gen_expected_string(is_unstable=True),
|
||||
"description": "All configs are unstable",
|
||||
},
|
||||
{
|
||||
"labels": {},
|
||||
"test_matrix": '{include: [{config: "1", unstable: "unstable"}, {config: "2"}]}',
|
||||
"job_name": "macos-12-py3-arm64 / build",
|
||||
"expected": "keep-going=False\nis-unstable=False\nreenabled-issues=\n",
|
||||
"expected": _gen_expected_string(is_unstable=False),
|
||||
"description": "Only mark some configs as unstable",
|
||||
},
|
||||
{
|
||||
|
|
@ -692,7 +735,7 @@ class TestConfigFilter(TestCase):
|
|||
"test_matrix": '{include: [{config: "default"}]}',
|
||||
"job_name": "A job name",
|
||||
"pr_body": "resolves #123 fixes #234",
|
||||
"expected": "keep-going=False\nis-unstable=False\nreenabled-issues=123,234\n",
|
||||
"expected": _gen_expected_string(reenabled_issues="123,234"),
|
||||
"description": "Reenable some issues",
|
||||
},
|
||||
]
|
||||
|
|
|
|||
4
.github/workflows/_linux-test.yml
vendored
4
.github/workflows/_linux-test.yml
vendored
|
|
@ -169,6 +169,8 @@ jobs:
|
|||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }}
|
||||
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
||||
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
||||
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
|
||||
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
|
||||
|
|
@ -218,6 +220,8 @@ jobs:
|
|||
-e NUM_TEST_SHARDS \
|
||||
-e REENABLED_ISSUES \
|
||||
-e CONTINUE_THROUGH_ERROR \
|
||||
-e VERBOSE_TEST_LOGS \
|
||||
-e NO_TEST_TIMEOUT \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
|
|
|
|||
4
.github/workflows/_mac-test-mps.yml
vendored
4
.github/workflows/_mac-test-mps.yml
vendored
|
|
@ -34,6 +34,8 @@ jobs:
|
|||
test-matrix: ${{ steps.filter.outputs.test-matrix }}
|
||||
is-test-matrix-empty: ${{ steps.filter.outputs.is-test-matrix-empty }}
|
||||
keep-going: ${{ steps.filter.outputs.keep-going }}
|
||||
ci-verbose-test-logs: ${{ steps.filter.outputs.ci-verbose-test-logs }}
|
||||
ci-no-test-timeout: ${{ steps.filter.outputs.ci-no-test-timeout }}
|
||||
reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
|
||||
steps:
|
||||
- name: Checkout PyTorch
|
||||
|
|
@ -95,6 +97,8 @@ jobs:
|
|||
PY_VERS: 3.9
|
||||
PR_BODY: ${{ github.event.pull_request.body }}
|
||||
CONTINUE_THROUGH_ERROR: ${{ needs.filter.outputs.keep-going }}
|
||||
VERBOSE_TEST_LOGS: ${{ needs.filter.outputs.ci-verbose-test-logs }}
|
||||
NO_TEST_TIMEOUT: ${{ needs.filter.outputs.ci-no-test-timeout }}
|
||||
PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
|
||||
REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
|
||||
run: |
|
||||
|
|
|
|||
2
.github/workflows/_mac-test.yml
vendored
2
.github/workflows/_mac-test.yml
vendored
|
|
@ -148,6 +148,8 @@ jobs:
|
|||
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
|
||||
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
|
||||
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
||||
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
||||
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
|
||||
PIP_REQUIREMENTS_FILE: .github/requirements/pip-requirements-${{ runner.os }}.txt
|
||||
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
GITHUB_WORKFLOW: ${{ github.workflow }}
|
||||
|
|
|
|||
4
.github/workflows/_rocm-test.yml
vendored
4
.github/workflows/_rocm-test.yml
vendored
|
|
@ -148,6 +148,8 @@ jobs:
|
|||
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
||||
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
||||
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
|
|
@ -196,6 +198,8 @@ jobs:
|
|||
-e NUM_TEST_SHARDS \
|
||||
-e REENABLED_ISSUES \
|
||||
-e CONTINUE_THROUGH_ERROR \
|
||||
-e VERBOSE_TEST_LOGS \
|
||||
-e NO_TEST_TIMEOUT \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
|
|
|
|||
2
.github/workflows/_win-test.yml
vendored
2
.github/workflows/_win-test.yml
vendored
|
|
@ -140,6 +140,8 @@ jobs:
|
|||
INSTALL_WINDOWS_SDK: 1
|
||||
PYTHON_VERSION: 3.8
|
||||
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
||||
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
||||
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
|
||||
VC_PRODUCT: "BuildTools"
|
||||
VC_VERSION: ""
|
||||
VS_VERSION: "16.8.6"
|
||||
|
|
|
|||
4
.github/workflows/_xpu-test.yml
vendored
4
.github/workflows/_xpu-test.yml
vendored
|
|
@ -143,6 +143,8 @@ jobs:
|
|||
PYTORCH_RETRY_TEST_CASES: 1
|
||||
PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
|
||||
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
||||
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
||||
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
|
||||
TEST_CONFIG: ${{ matrix.config }}
|
||||
SHARD_NUMBER: ${{ matrix.shard }}
|
||||
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
||||
|
|
@ -185,6 +187,8 @@ jobs:
|
|||
-e PYTORCH_RETRY_TEST_CASES \
|
||||
-e PYTORCH_OVERRIDE_FLAKY_SIGNAL \
|
||||
-e CONTINUE_THROUGH_ERROR \
|
||||
-e VERBOSE_TEST_LOGS \
|
||||
-e NO_TEST_TIMEOUT \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e XLA_CLANG_CACHE_S3_BUCKET_NAME \
|
||||
|
|
|
|||
|
|
@ -129,6 +129,24 @@ class _NodeReporterReruns(_NodeReporter):
|
|||
tag.text = bin_xml_escape(content)
|
||||
self.append(tag)
|
||||
|
||||
def append_skipped(self, report: TestReport) -> None:
|
||||
# Referenced from the below
|
||||
# https://github.com/pytest-dev/pytest/blob/2178ee86d7c1ee93748cfb46540a6e40b4761f2d/src/_pytest/junitxml.py#L236C6-L236C6
|
||||
# Modified to escape characters not supported by xml in the skip reason. Everything else should be the same.
|
||||
if hasattr(report, "wasxfail"):
|
||||
# Super here instead of the actual code so we can reduce possible divergence
|
||||
super().append_skipped(report)
|
||||
else:
|
||||
assert isinstance(report.longrepr, tuple)
|
||||
filename, lineno, skipreason = report.longrepr
|
||||
if skipreason.startswith("Skipped: "):
|
||||
skipreason = skipreason[9:]
|
||||
details = f"{filename}:{lineno}: {skipreason}"
|
||||
|
||||
skipped = ET.Element("skipped", type="pytest.skip", message=bin_xml_escape(skipreason))
|
||||
skipped.text = bin_xml_escape(details)
|
||||
self.append(skipped)
|
||||
self.write_captured_output(report)
|
||||
|
||||
class LogXMLReruns(LogXML):
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
|
|
|||
|
|
@ -605,7 +605,7 @@ def run_test(
|
|||
argv = [test_file + ".py"] + unittest_args
|
||||
|
||||
os.makedirs(REPO_ROOT / "test" / "test-reports", exist_ok=True)
|
||||
if IS_CI:
|
||||
if options.pipe_logs:
|
||||
log_fd, log_path = tempfile.mkstemp(
|
||||
dir=REPO_ROOT / "test" / "test-reports",
|
||||
prefix=f"{sanitize_file_name(str(test_module))}_",
|
||||
|
|
@ -619,7 +619,9 @@ def run_test(
|
|||
"BUILD_ENVRIONMENT", ""
|
||||
)
|
||||
timeout = (
|
||||
THRESHOLD * 6
|
||||
None
|
||||
if not options.enable_timeout
|
||||
else THRESHOLD * 6
|
||||
if is_slow
|
||||
else THRESHOLD * 3
|
||||
if should_retry
|
||||
|
|
@ -631,7 +633,7 @@ def run_test(
|
|||
|
||||
with ExitStack() as stack:
|
||||
output = None
|
||||
if IS_CI:
|
||||
if options.pipe_logs:
|
||||
output = stack.enter_context(open(log_path, "w"))
|
||||
|
||||
if should_retry:
|
||||
|
|
@ -664,7 +666,7 @@ def run_test(
|
|||
# comes up in the future.
|
||||
ret_code = 0 if ret_code == 5 or ret_code == 4 else ret_code
|
||||
|
||||
if IS_CI:
|
||||
if options.pipe_logs:
|
||||
handle_log_file(
|
||||
test_module, log_path, failed=(ret_code != 0), was_rerun=was_rerun
|
||||
)
|
||||
|
|
@ -1249,6 +1251,18 @@ def parse_args():
|
|||
help="Runs the full test suite despite one of the tests failing",
|
||||
default=strtobool(os.environ.get("CONTINUE_THROUGH_ERROR", "False")),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pipe-logs",
|
||||
action="store_true",
|
||||
help="Print logs to output file while running tests. True if in CI and env var is not set",
|
||||
default=IS_CI and not strtobool(os.environ.get("VERBOSE_TEST_LOGS", "False")),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-timeout",
|
||||
action="store_true",
|
||||
help="Set a timeout based on the test times json file. Only works if there are test times available",
|
||||
default=IS_CI and not strtobool(os.environ.get("NO_TEST_TIMEOUT", "False")),
|
||||
)
|
||||
parser.add_argument(
|
||||
"additional_unittest_args",
|
||||
nargs="*",
|
||||
|
|
|
|||
Loading…
Reference in a new issue