From 3dce26635f1bbab4bc96801e3cfd7ce728ba78b9 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 21 Sep 2022 20:21:25 +0000 Subject: [PATCH] Revert "test in parallel at file granularity (#84961)" This reverts commit 8107666c6a1c25e96762a31296cace9ed343aaf6. Reverted https://github.com/pytorch/pytorch/pull/84961 on behalf of https://github.com/clee2000 due to makes test_forward_ad_nn_functional_max_unpool2d_cuda_float32 flakily unexpectedly pass --- .circleci/config.yml | 2 +- .circleci/docker/requirements-ci.txt | 5 - .../job-specs/job-specs-custom.yml | 2 +- .jenkins/pytorch/macos-test.sh | 1 - .../win-test-helpers/setup_pytorch_env.bat | 2 +- .../native/cuda/linalg/BatchLinearAlgebra.cpp | 3 - .../cuda/linalg/BatchLinearAlgebraLib.cpp | 3 - test/run_test.py | 142 ++---------------- tools/testing/test_selections.py | 39 ++--- torch/testing/_internal/common_utils.py | 95 ++++++------ 10 files changed, 67 insertions(+), 227 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0fb7288a533..f85010df0ba 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -693,7 +693,7 @@ jobs: - run_brew_for_macos_build - run: name: Test - no_output_timeout: "2h" + no_output_timeout: "1h" command: | set -x diff --git a/.circleci/docker/requirements-ci.txt b/.circleci/docker/requirements-ci.txt index ad5fd52f977..5662eadc4f6 100644 --- a/.circleci/docker/requirements-ci.txt +++ b/.circleci/docker/requirements-ci.txt @@ -149,11 +149,6 @@ pytest-xdist #Pinned versions: #test that import: -pytest-shard -#Description: plugin spliting up tests in pytest -#Pinned versions: -#test that import: - pytest-rerunfailures #Description: plugin for rerunning tests in pytest #Pinned versions: diff --git a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml index c0e9fea21f2..7af659bfba1 100644 --- a/.circleci/verbatim-sources/job-specs/job-specs-custom.yml +++ b/.circleci/verbatim-sources/job-specs/job-specs-custom.yml @@ -218,7 +218,7 @@ - run_brew_for_macos_build - run: name: Test - no_output_timeout: "2h" + no_output_timeout: "1h" command: | set -x diff --git a/.jenkins/pytorch/macos-test.sh b/.jenkins/pytorch/macos-test.sh index de958b45b59..244c9dda7fc 100755 --- a/.jenkins/pytorch/macos-test.sh +++ b/.jenkins/pytorch/macos-test.sh @@ -16,7 +16,6 @@ fi pip install "unittest-xml-reporting<=3.2.0,>=2.0.0" \ pytest \ pytest-xdist \ - pytest-shard \ pytest-rerunfailures \ "xdoctest==1.0.2" \ "pygments==2.12.0" diff --git a/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat b/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat index 6ebe8bda8ed..79e8aedfab7 100644 --- a/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat +++ b/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat @@ -36,7 +36,7 @@ popd ======= :: Pin unittest-xml-reporting to freeze printing test summary logic, related: https://github.com/pytorch/pytorch/issues/69014 -pip install "ninja==1.10.0.post1" future "hypothesis==5.35.1" "expecttest==0.1.3" "librosa>=0.6.2" "scipy==1.6.3" psutil pillow "unittest-xml-reporting<=3.2.0,>=2.0.0" pytest pytest-xdist pytest-shard pytest-rerunfailures "xdoctest==1.0.2" "pygments==2.12.0" +pip install "ninja==1.10.0.post1" future "hypothesis==5.35.1" "expecttest==0.1.3" "librosa>=0.6.2" "scipy==1.6.3" psutil pillow "unittest-xml-reporting<=3.2.0,>=2.0.0" pytest pytest-xdist pytest-rerunfailures "xdoctest==1.0.2" "pygments==2.12.0" if errorlevel 1 exit /b if not errorlevel 0 exit /b diff --git a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp index 7497f595acb..a7d379ec462 100644 --- a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp +++ b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp @@ -1561,9 +1561,6 @@ static void apply_lu_factor_batched_magma(const Tensor& input, const Tensor& piv input_array[i] = &input_data[i * input_matrix_stride]; } - // needed to run lu tests in parallel, see https://github.com/pytorch/pytorch/issues/82894 for examples - // of failures - c10::cuda::device_synchronize(); MAGMAQueue magma_queue(input.get_device()); if (compute_pivots) { diff --git a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLib.cpp b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLib.cpp index 01788e0bdff..d80b93b3da0 100644 --- a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLib.cpp +++ b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLib.cpp @@ -237,9 +237,6 @@ void apply_ldl_solve_cusolver( auto pivots_ = pivots.to(kLong); auto pivots_data = pivots_.data_ptr(); - // needed to run ldl_solve tests in parallel - // see https://github.com/pytorch/pytorch/issues/82894 for examples of failures - c10::cuda::device_synchronize(); auto handle = at::cuda::getCurrentCUDASolverDnHandle(); auto datatype = at::cuda::solver::get_cusolver_datatype(); size_t worksize_device = 0; diff --git a/test/run_test.py b/test/run_test.py index b8edfa72b0c..9651f7f7a74 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -27,7 +27,7 @@ from torch.testing._internal.common_utils import ( parser as common_parser, ) import torch.distributed as dist -from torch.multiprocessing import Pool, get_context +from torch.multiprocessing import Pool REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent @@ -39,7 +39,6 @@ try: get_reordered_tests, get_test_case_configs, calculate_shards, - NUM_PROCS ) HAVE_TEST_SELECTION_TOOLS = True except ImportError: @@ -126,6 +125,7 @@ TESTS = discover_tests( "distributed/elastic/utils/util_test", "distributed/elastic/utils/distributed_test", "distributed/elastic/multiprocessing/api_test", + "test_deploy", ] ) @@ -264,29 +264,6 @@ RUN_PARALLEL_BLOCKLIST = [ "test_cuda_trace", ] + FSDP_TEST -CI_SERIAL_LIST = [ - 'test_nn', - 'test_fake_tensor', - 'test_cpp_api_parity', - 'test_reductions', - 'test_cuda', - 'test_jit_cuda_fuser', # OOM on test_issue_1785, also profiling? - 'test_indexing', - 'test_fx_backends', - 'test_linalg', - 'test_cpp_extensions_jit', - 'test_torch', - 'test_tensor_creation_ops', - 'test_sparse_csr', - 'test_dispatch', - 'nn/test_pooling', - 'distributions/test_distributions', - 'test_autograd', # slow gradcheck runs a test that checks the cuda memory allocator - 'test_prims', # slow gradcheck runs a test that checks the cuda memory allocator - 'test_modules', # failed test due to mismatched elements -] - - # A subset of our TEST list that validates PyTorch's ops, modules, and autograd function as expected CORE_TEST_LIST = [ "test_autograd", @@ -363,7 +340,6 @@ def discover_functorch_tests(): assert len(result) >= 8 return result - FUNCTORCH_TESTS = discover_functorch_tests() TESTS_REQUIRING_LAPACK = [ @@ -398,7 +374,7 @@ def run_test( launcher_cmd=None, extra_unittest_args=None, env=None, -) -> int: +): unittest_args = options.additional_unittest_args.copy() if options.verbose: unittest_args.append(f'-{"v"*options.verbose}') # in case of pytest @@ -426,16 +402,9 @@ def run_test( # in `if __name__ == '__main__': `. So call `python test_*.py` instead. argv = [test_module + ".py"] + unittest_args - log_fd, log_path = tempfile.mkstemp(dir=REPO_ROOT / "test" / "test-reports", - prefix=test_module.replace("\\", "-").replace("/", "-")) - os.close(log_fd) command = (launcher_cmd or []) + executable + argv print_to_stderr("Executing {} ... [{}]".format(command, datetime.now())) - with open(log_path, "w") as f: - ret_code = shell(command, test_directory, stdout=f, stderr=f, env=env) - print_log_file(test_module, log_path) - os.remove(log_path) - return ret_code + return shell(command, test_directory, env=env) def test_cuda_primary_ctx(test_module, test_directory, options): @@ -707,49 +676,6 @@ def run_doctests(test_module, test_directory, options): return result -def print_log_file(test: str, file_path: str) -> None: - with open(file_path, "r") as f: - print_to_stderr("") - print_to_stderr(f"PRINT LOG FILE of {test} ({file_path})") - print_to_stderr(f"##[group]PRINT LOG FILE of {test} ({file_path})") - print_to_stderr(f.read()) - print_to_stderr("##[endgroup]") - print_to_stderr(f"FINISHED PRINT LOG FILE of {test} ({file_path})") - print_to_stderr("") - - -def run_test_ops(test_module, test_directory, options): - if 'slow-gradcheck' in os.getenv("BUILD_ENVIRONMENT", ""): - # there are a lot of tests that take up a lot of space in slowgrad check, so don't bother parallelizing - # it's also on periodic so we don't care about TTS as much - return run_test(test_module, test_directory, copy.deepcopy(options), - extra_unittest_args=["--use-pytest", '-vv', '-x', '--reruns=2', '-rfEX'], - ) - - return_codes = [] - os.environ["PARALLEL_TESTING"] = "1" - pool = Pool(NUM_PROCS) - for i in range(NUM_PROCS): - return_code = pool.apply_async(run_test, args=(test_module, test_directory, copy.deepcopy(options)), - kwds={"extra_unittest_args": ["--use-pytest", '-vv', '-x', '--reruns=2', '-rfEX', - f'--shard-id={i}', f'--num-shards={NUM_PROCS}', - "-k=not _linalg_cholesky_"], - }) - return_codes.append(return_code) - pool.close() - pool.join() - del os.environ['PARALLEL_TESTING'] - - for return_code in return_codes: - if return_code.get() != 0: - return return_code.get() - return_code = run_test(test_module, test_directory, copy.deepcopy(options), - extra_unittest_args=["--use-pytest", '-vv', '-x', '--reruns=2', '-rfEX', - "-k=_linalg_cholesky_"], - ) - return return_code - - CUSTOM_HANDLERS = { "test_cuda_primary_ctx": test_cuda_primary_ctx, "test_cuda_trace": get_run_test_with_subprocess_fn(), @@ -769,9 +695,6 @@ CUSTOM_HANDLERS = { "distributed/rpc/test_share_memory": get_run_test_with_subprocess_fn(), "distributed/rpc/cuda/test_tensorpipe_agent": get_run_test_with_subprocess_fn(), "doctests": run_doctests, - "test_ops": run_test_ops, - "test_ops_gradients": run_test_ops, - "test_ops_jit": run_test_ops, } @@ -988,18 +911,6 @@ def exclude_tests(exclude_list, selected_tests, exclude_message=None): return selected_tests -def must_serial(file: str) -> bool: - return ( - "distributed" in os.getenv("TEST_CONFIG", "") or - "functorch" in os.getenv("TEST_CONFIG", "") or - "dynamo" in os.getenv("TEST_CONFIG", "") or - "distributed" in file or - file in CUSTOM_HANDLERS or - file in RUN_PARALLEL_BLOCKLIST or - file in CI_SERIAL_LIST - ) - - def get_selected_tests(options): selected_tests = options.include @@ -1099,12 +1010,11 @@ def get_selected_tests(options): print( "::warning:: Gathered no stats from artifacts. Proceeding with default sharding plan." ) - selected_tests = selected_tests[which_shard - 1:: num_shards] + selected_tests = selected_tests[which_shard - 1 :: num_shards] else: print("Found test time stats from artifacts") test_file_times_config = test_file_times[test_config] - shards = calculate_shards(num_shards, selected_tests, test_file_times_config, - must_serial=must_serial) + shards = calculate_shards(num_shards, selected_tests, test_file_times_config) _, tests_from_shard = shards[which_shard - 1] selected_tests = tests_from_shard @@ -1130,7 +1040,7 @@ def run_test_module(test: str, test_directory: str, options) -> Optional[str]: return_code = handler(test_module, test_directory, options) assert isinstance(return_code, int) and not isinstance( return_code, bool - ), f"While running {test} got non integer return code {return_code}" + ), "Return code should be an integer" if return_code == 0: return None @@ -1163,52 +1073,22 @@ def main(): # downloading test cases configuration to local environment get_test_case_configs(dirpath=test_directory) + has_failed = False failure_messages = [] - - selected_tests_parallel = [x for x in selected_tests if not must_serial(x)] - selected_tests_serial = [x for x in selected_tests if x not in selected_tests_parallel] - print_to_stderr("parallel tests:\n {}".format("\n ".join(selected_tests_parallel))) - print_to_stderr("serial tests:\n {}".format("\n ".join(selected_tests_serial))) - - pool = get_context("spawn").Pool(NUM_PROCS, maxtasksperchild=1) - os.makedirs(REPO_ROOT / "test" / "test-reports", exist_ok=True) - - def success_callback(err_message): - if err_message is None: - return True - failure_messages.append(err_message) - print_to_stderr(err_message) - if not options.continue_through_error: - pool.terminate() - return False - try: - os.environ['PARALLEL_TESTING'] = '1' - for test in selected_tests_parallel: - pool.apply_async(run_test_module, args=(test, test_directory, - copy.deepcopy(options)), callback=success_callback) - pool.close() - pool.join() - del os.environ['PARALLEL_TESTING'] - - if not options.continue_through_error and len(failure_messages) != 0: - raise RuntimeError("\n".join(failure_messages)) - - for test in selected_tests_serial: + for test in selected_tests: options_clone = copy.deepcopy(options) if test in USE_PYTEST_LIST: options_clone.pytest = True err_message = run_test_module(test, test_directory, options_clone) if err_message is None: continue + has_failed = True failure_messages.append(err_message) if not options_clone.continue_through_error: raise RuntimeError(err_message) print_to_stderr(err_message) finally: - pool.terminate() - pool.join() - if options.coverage: from coverage import Coverage @@ -1221,7 +1101,7 @@ def main(): if not PYTORCH_COLLECT_COVERAGE: cov.html_report() - if len(failure_messages) != 0: + if options.continue_through_error and has_failed: for err in failure_messages: print_to_stderr(err) sys.exit(1) diff --git a/tools/testing/test_selections.py b/tools/testing/test_selections.py index 3fc2c8082ba..1fa13ee99b0 100644 --- a/tools/testing/test_selections.py +++ b/tools/testing/test_selections.py @@ -1,24 +1,15 @@ import os import subprocess -from typing import Callable, Dict, List, Optional, Tuple +from typing import Dict, List, Tuple from tools.stats.import_test_stats import get_disabled_tests, get_slow_tests -# mac has 3 CPUs and also received the best speedup with 3 processes. Setting this any larger -# will also force use further restrict the amount of memory per process for cuda -NUM_PROCS = 3 - def calculate_shards( - num_shards: int, - tests: List[str], - job_times: Dict[str, float], - must_serial: Optional[Callable[[str], bool]] = None, + num_shards: int, tests: List[str], job_times: Dict[str, float] ) -> List[Tuple[float, List[str]]]: - must_serial = must_serial if callable(must_serial) else lambda x: True - - filtered_job_times: Dict[str, float] = dict() + filtered_job_times: Dict[str, float] = {} unknown_jobs: List[str] = [] for test in tests: if test in job_times: @@ -26,30 +17,18 @@ def calculate_shards( else: unknown_jobs.append(test) + # The following attempts to implement a partition approximation greedy algorithm + # See more at https://en.wikipedia.org/wiki/Greedy_number_partitioning sorted_jobs = sorted( filtered_job_times, key=lambda j: filtered_job_times[j], reverse=True ) sharded_jobs: List[Tuple[float, List[str]]] = [(0.0, []) for _ in range(num_shards)] - - serial = [x for x in sorted_jobs if must_serial(x)] - parallel = [x for x in sorted_jobs if x not in serial] - - for i in range(0, len(serial)): - min_shard_index = sorted(range(num_shards), key=lambda j: sharded_jobs[j][0])[0] + for job in sorted_jobs: + min_shard_index = sorted(range(num_shards), key=lambda i: sharded_jobs[i][0])[0] curr_shard_time, curr_shard_jobs = sharded_jobs[min_shard_index] - curr_shard_jobs.append(serial[i]) + curr_shard_jobs.append(job) sharded_jobs[min_shard_index] = ( - curr_shard_time + filtered_job_times[serial[i]], - curr_shard_jobs, - ) - - # Not the best idea, but attempt to mask the long jobs with other long jobs - for i in range(0, len(parallel), NUM_PROCS): - min_shard_index = sorted(range(num_shards), key=lambda j: sharded_jobs[j][0])[0] - curr_shard_time, curr_shard_jobs = sharded_jobs[min_shard_index] - curr_shard_jobs.extend(parallel[i : i + NUM_PROCS]) - sharded_jobs[min_shard_index] = ( - curr_shard_time + filtered_job_times[parallel[i]], + curr_shard_time + filtered_job_times[job], curr_shard_jobs, ) diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index ec5220162a2..e5f6c53ba28 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -95,6 +95,8 @@ from .composite_compliance import no_dispatch torch.backends.disable_global_flags() +PYTEST_FILES = ["test_ops", "test_ops_gradients", "test_ops_jit"] + FILE_SCHEMA = "file://" if sys.platform == 'win32': FILE_SCHEMA = "file:///" @@ -496,7 +498,6 @@ parser.add_argument('--accept', action='store_true') parser.add_argument('--jit_executor', type=str) parser.add_argument('--repeat', type=int, default=1) parser.add_argument('--test_bailouts', action='store_true') -parser.add_argument('--use-pytest', action='store_true') parser.add_argument('--save-xml', nargs='?', type=str, const=_get_test_report_path(), default=_get_test_report_path() if IS_CI else None) @@ -532,7 +533,6 @@ DISABLED_TESTS_FILE = args.import_disabled_tests LOG_SUFFIX = args.log_suffix RUN_PARALLEL = args.run_parallel TEST_BAILOUTS = args.test_bailouts -USE_PYTEST = args.use_pytest TEST_DISCOVER = args.discover_tests TEST_IN_SUBPROCESS = args.subprocess TEST_SAVE_XML = args.save_xml @@ -567,7 +567,7 @@ def wait_for_process(p): # Always call p.wait() to ensure exit p.wait() -def shell(command, cwd=None, env=None, stdout=None, stderr=None): +def shell(command, cwd=None, env=None): sys.stdout.flush() sys.stderr.flush() # The following cool snippet is copied from Py3 core library subprocess.call @@ -578,7 +578,7 @@ def shell(command, cwd=None, env=None, stdout=None, stderr=None): # # https://github.com/python/cpython/blob/71b6c1af727fbe13525fb734568057d78cea33f3/Lib/subprocess.py#L309-L323 assert not isinstance(command, torch._six.string_classes), "Command to shell should be a list or tuple of tokens" - p = subprocess.Popen(command, universal_newlines=True, cwd=cwd, env=env, stdout=stdout, stderr=stderr) + p = subprocess.Popen(command, universal_newlines=True, cwd=cwd, env=env) return wait_for_process(p) @@ -638,22 +638,6 @@ def lint_test_case_extension(suite): succeed = False return succeed - -def get_report_path(pytest=False): - test_filename = inspect.getfile(sys._getframe(2)) - test_filename = sanitize_if_functorch_test_filename(test_filename) - test_filename = sanitize_test_filename(test_filename) - test_report_path = TEST_SAVE_XML + LOG_SUFFIX - test_report_path = os.path.join(test_report_path, test_filename) - if pytest: - test_report_path = test_report_path.replace('python-unittest', 'python-pytest') - os.makedirs(test_report_path, exist_ok=True) - test_report_path = os.path.join(test_report_path, f"{test_filename}-{os.urandom(8).hex()}.xml") - return test_report_path - os.makedirs(test_report_path, exist_ok=True) - return test_report_path - - def sanitize_pytest_xml(xml_file: str): # pytext xml is different from unittext xml, this function makes pytest xml more similar to unittest xml # consider somehow modifying the XML logger in conftest to do this instead @@ -734,22 +718,6 @@ def run_tests(argv=UNITTEST_ARGS): for p in processes: failed |= wait_for_process(p) != 0 assert not failed, "Some test shards have failed" - elif USE_PYTEST: - if TEST_SAVE_XML: - test_report_path = get_report_path(pytest=True) - print(f'Test results will be stored in {test_report_path}') - - import pytest - os.environ["NO_COLOR"] = "1" - os.environ["USING_PYTEST"] = "1" - exit_code = pytest.main(args=argv + [f'--junit-xml-reruns={test_report_path}'] if TEST_SAVE_XML else []) - del os.environ["USING_PYTEST"] - if TEST_SAVE_XML: - sanitize_pytest_xml(test_report_path) - print("If in CI, skip info is located in the xml test reports, please either go to s3 or the hud to download them") - # exitcode of 5 means no tests were found, which happens since some test configs don't - # run tests from certain files - exit(0 if exit_code == 5 else exit_code) elif TEST_SAVE_XML is not None: # import here so that non-CI doesn't need xmlrunner installed import xmlrunner # type: ignore[import] @@ -776,14 +744,46 @@ def run_tests(argv=UNITTEST_ARGS): # it stands for `verbose_str` captured in the closure c.cell_contents = f"skip: {reason}" - test_report_path = get_report_path() - verbose = '--verbose' in argv or '-v' in argv - if verbose: - print(f'Test results will be stored in {test_report_path}') - unittest.main(argv=argv, testRunner=xmlrunner.XMLTestRunner( - output=test_report_path, - verbosity=2 if verbose else 1, - resultclass=XMLTestResultVerbose)) + test_filename = inspect.getfile(sys._getframe(1)) + test_filename = sanitize_if_functorch_test_filename(test_filename) + test_filename = sanitize_test_filename(test_filename) + test_report_path = TEST_SAVE_XML + LOG_SUFFIX + test_report_path = os.path.join(test_report_path, test_filename) + build_environment = os.environ.get("BUILD_ENVIRONMENT", "") + if test_filename in PYTEST_FILES and not IS_SANDCASTLE and not ( + "cuda" in build_environment and "linux" in build_environment + ): + # exclude linux cuda tests because we run into memory issues when running in parallel + import pytest + os.environ["NO_COLOR"] = "1" + os.environ["USING_PYTEST"] = "1" + pytest_report_path = test_report_path.replace('python-unittest', 'python-pytest') + os.makedirs(pytest_report_path, exist_ok=True) + # part of our xml parsing looks for grandparent folder names + pytest_report_path = os.path.join(pytest_report_path, f"{test_filename}.xml") + print(f'Test results will be stored in {pytest_report_path}') + # mac slower on 4 proc than 3 + num_procs = 3 if "macos" in build_environment else 4 + # f = failed + # E = error + # X = unexpected success + exit_code = pytest.main(args=[inspect.getfile(sys._getframe(1)), f'-n={num_procs}', '-vv', '-x', + '--reruns=2', '-rfEX', f'--junit-xml-reruns={pytest_report_path}']) + del os.environ["USING_PYTEST"] + sanitize_pytest_xml(f'{pytest_report_path}') + print("Skip info is located in the xml test reports, please either go to s3 or the hud to download them") + # exitcode of 5 means no tests were found, which happens since some test configs don't + # run tests from certain files + exit(0 if exit_code == 5 else exit_code) + else: + os.makedirs(test_report_path, exist_ok=True) + verbose = '--verbose' in argv or '-v' in argv + if verbose: + print(f'Test results will be stored in {test_report_path}') + unittest.main(argv=argv, testRunner=xmlrunner.XMLTestRunner( + output=test_report_path, + verbosity=2 if verbose else 1, + resultclass=XMLTestResultVerbose)) elif REPEAT_COUNT > 1: for _ in range(REPEAT_COUNT): if not unittest.main(exit=False, argv=argv).result.wasSuccessful(): @@ -904,13 +904,6 @@ TEST_SKIP_FAST = os.getenv('PYTORCH_TEST_SKIP_FAST', '0') == '1' # as we had before. By default, we don't run these tests. TEST_WITH_CROSSREF = os.getenv('PYTORCH_TEST_WITH_CROSSREF', '0') == '1' - -if TEST_CUDA and 'NUM_PARALLEL_PROCS' in os.environ: - from tools.testing.test_selections import NUM_PROCS - # other libraries take up about 11% of space per process - torch.cuda.set_per_process_memory_fraction(round(1 / NUM_PROCS - .11, 2)) - - def skipIfCrossRef(fn): @wraps(fn) def wrapper(*args, **kwargs):