mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
CI sanity check test for env vars (#120519)
Make a test that fails on purpose to trigger retries. Check the opposite of success (that env vars exist) It's bit hacky because I want it to fail on the normal flow in order to trigger reruns but I don't want to expose the failures to users since it's confusing. Pull Request resolved: https://github.com/pytorch/pytorch/pull/120519 Approved by: https://github.com/huydhn
This commit is contained in:
parent
6c11d3ce0c
commit
fac06a12c8
2 changed files with 42 additions and 1 deletions
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import argparse
|
||||
import copy
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
|
|
@ -381,6 +382,7 @@ def run_test(
|
|||
launcher_cmd=None,
|
||||
extra_unittest_args=None,
|
||||
env=None,
|
||||
print_log=True,
|
||||
) -> int:
|
||||
env = env or os.environ.copy()
|
||||
maybe_set_hip_visible_devies()
|
||||
|
|
@ -542,7 +544,7 @@ def run_test(
|
|||
# comes up in the future.
|
||||
ret_code = 0 if ret_code == 5 or ret_code == 4 else ret_code
|
||||
|
||||
if options.pipe_logs:
|
||||
if options.pipe_logs and print_log:
|
||||
handle_log_file(
|
||||
test_module, log_path, failed=(ret_code != 0), was_rerun=was_rerun
|
||||
)
|
||||
|
|
@ -1002,6 +1004,23 @@ def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False):
|
|||
return pytest_args
|
||||
|
||||
|
||||
def run_ci_sanity_check(test: ShardedTest, test_directory, options):
|
||||
assert (
|
||||
test.name == "test_ci_sanity_check_fail"
|
||||
), f"This handler only works for test_ci_sanity_check_fail, got {test.name}"
|
||||
ret_code = run_test(test, test_directory, options, print_log=False)
|
||||
# This test should fail
|
||||
if ret_code != 1:
|
||||
return 1
|
||||
test_reports_dir = str(REPO_ROOT / "test/test-reports")
|
||||
# Delete the log files and xmls generated by the test
|
||||
for file in glob.glob(f"{test_reports_dir}/{test.name}*.log"):
|
||||
os.remove(file)
|
||||
for dirname in glob.glob(f"{test_reports_dir}/**/{test.name}"):
|
||||
shutil.rmtree(dirname)
|
||||
return 0
|
||||
|
||||
|
||||
CUSTOM_HANDLERS = {
|
||||
"test_cuda_primary_ctx": run_test_with_subprocess,
|
||||
"test_cuda_nvml_based_avail": run_test_with_subprocess,
|
||||
|
|
@ -1024,6 +1043,7 @@ CUSTOM_HANDLERS = {
|
|||
"distributed/rpc/test_share_memory": run_test_with_subprocess,
|
||||
"distributed/rpc/cuda/test_tensorpipe_agent": run_test_with_subprocess,
|
||||
"doctests": run_doctests,
|
||||
"test_ci_sanity_check_fail": run_ci_sanity_check,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
21
test/test_ci_sanity_check_fail.py
Normal file
21
test/test_ci_sanity_check_fail.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# Owner(s): ["module: ci"]
|
||||
# Sanity check for CI setup in GHA. This file is expected to fail so it can trigger reruns
|
||||
|
||||
import os
|
||||
|
||||
from torch.testing._internal.common_utils import run_tests, slowTest, TestCase
|
||||
|
||||
|
||||
class TestCISanityCheck(TestCase):
|
||||
def test_env_vars_exist(self):
|
||||
# This check should fail and trigger reruns. If it passes, something is wrong
|
||||
self.assertTrue(os.environ.get("CI") is None)
|
||||
|
||||
@slowTest
|
||||
def test_env_vars_exist_slow(self):
|
||||
# Same as the above, but for the slow suite
|
||||
self.assertTrue(os.environ.get("CI") is None)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
||||
Loading…
Reference in a new issue