From 63f66d19ea8973563edbb48c8887d2e6b4931408 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Sat, 24 Jun 2023 03:10:46 +0000
Subject: [PATCH] [Tests] Make `run_test.py` usable without boto3 (#104111)

There is a `HAVE_TEST_SELECTION_TOOLS` conditional, but turns out it does not really work, so fix it by defining all missing prototypes and make it work as single-shard instance

Add lint rule to test stat it would succeed for runnign only test_cuda with released version of PyTorch

Pull Request resolved: https://github.com/pytorch/pytorch/pull/104111
Approved by: https://github.com/clee2000, https://github.com/ZainRizvi
---
 .github/workflows/lint.yml | 25 ++++++++++++++++++++++
 test/run_test.py           | 44 +++++++++++++++++++++++++-------------
 2 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index a06ec0bb02e..23b54e375e8 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -236,6 +236,31 @@ jobs:
         python3 -m unittest discover -vs tools/test -p 'test_*.py'
         python3 -m unittest discover -vs .github/scripts -p 'test_*.py'
 
+  test_run_test:
+    name: Test `run_test.py` is usable without boto3/rockset
+    if: ${{ github.repository == 'pytorch/pytorch' }}
+    runs-on: linux.20_04.4x
+    steps:
+      - name: Checkout PyTorch
+        uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
+        with:
+          submodules: false
+          fetch-depth: 1
+      - name: Setup Python 3.8
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.8'
+          architecture: x64
+          cache: pip
+      - name: Install dependencies
+        run: |
+          pip install pytest-rerunfailures==11.1.* pytest-shard==0.1.* pytest-flakefinder==1.1.* pytest-xdist==3.3.* expecttest==0.1.* numpy==1.24.*
+          pip install torch --pre --index-url https://download.pytorch.org/whl/nightly/cpu/
+      - name: Run run_test.py (nonretryable)
+        run: |
+          # Run test_weak, which is very fast
+          python3 test/run_test.py --include test_weak --verbose
+
   test_collect_env:
     if: ${{ github.repository == 'pytorch/pytorch' }}
     name: Test collect_env
diff --git a/test/run_test.py b/test/run_test.py
index 11e0d1aa6b0..9cea930c461 100755
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -13,7 +13,7 @@ import sys
 import tempfile
 from datetime import datetime
 from distutils.version import LooseVersion
-from typing import Any, cast, Dict, List, Optional
+from typing import Any, cast, Dict, List, Optional, Union
 
 import pkg_resources
 
@@ -52,6 +52,11 @@ try:
 
     HAVE_TEST_SELECTION_TOOLS = True
 except ImportError as e:
+
+    class ShardedTest:
+        pass
+
+    NUM_PROCS = 2
     HAVE_TEST_SELECTION_TOOLS = False
     print(
         f"Unable to import test_selections from tools/testing. Running without test selection stats.... Reason: {e}"
@@ -608,7 +613,7 @@ def run_test(
         and test_module.time is not None
         else None
     )
-    print_to_stderr("Executing {} ... [{}]".format(command, datetime.now()))
+    print_to_stderr(f"Executing {command} ... [{datetime.now()}]")
 
     with open(log_path, "w") as f:
         ret_code = retry_shell(
@@ -978,7 +983,7 @@ def get_pytest_args(
     if not is_cpp_test:
         # C++ tests need to be run with pytest directly, not via python
         pytest_args.extend(["-p", "no:xdist", "--use-pytest"])
-        if not options.continue_through_error:
+        if not options.continue_through_error and HAVE_TEST_SELECTION_TOOLS:
             pytest_args.append(f"--sc={stepcurrent_key}")
     else:
         # Use pytext-dist to run C++ tests in parallel as running them sequentially using run_test
@@ -1424,23 +1429,28 @@ def get_selected_tests(options) -> List[ShardedTest]:
     else:
         print("Found test time stats from artifacts")
 
-    # Do sharding
-    test_file_times_config = test_file_times.get(test_config, {})
-    shards = calculate_shards(
-        num_shards, selected_tests, test_file_times_config, must_serial=must_serial
-    )
-    _, tests_from_shard = shards[which_shard - 1]
-    selected_tests = tests_from_shard
+    if HAVE_TEST_SELECTION_TOOLS:
+        # Do sharding
+        test_file_times_config = test_file_times.get(test_config, {})
+        shards = calculate_shards(
+            num_shards, selected_tests, test_file_times_config, must_serial=must_serial
+        )
+        _, tests_from_shard = shards[which_shard - 1]
+        selected_tests = tests_from_shard
 
     return selected_tests
 
 
-def run_test_module(test: ShardedTest, test_directory: str, options) -> Optional[str]:
+def run_test_module(
+    test: Union[ShardedTest, str], test_directory: str, options
+) -> Optional[str]:
     maybe_set_hip_visible_devies()
 
     # Printing the date here can help diagnose which tests are slow
-    print_to_stderr("Running {} ... [{}]".format(str(test), datetime.now()))
-    handler = CUSTOM_HANDLERS.get(test.name, run_test)
+    print_to_stderr(f"Running {str(test)} ... [{datetime.now()}]")
+    handler = CUSTOM_HANDLERS.get(
+        test.name if isinstance(test, ShardedTest) else test, run_test
+    )
     return_code = handler(test, test_directory, options)
     assert isinstance(return_code, int) and not isinstance(
         return_code, bool
@@ -1468,7 +1478,11 @@ def run_tests(
 
     # parallel = in parallel with other files
     # serial = this file on it's own.  The file might still be run in parallel with itself (ex test_ops)
-    selected_tests_parallel = [x for x in selected_tests if not must_serial(x.name)]
+    selected_tests_parallel = [
+        x
+        for x in selected_tests
+        if not must_serial(x.name if isinstance(x, ShardedTest) else x)
+    ]
     selected_tests_serial = [
         x for x in selected_tests if x not in selected_tests_parallel
     ]
@@ -1602,7 +1616,7 @@ def main():
 
     prioritized_tests = []
     remaining_tests = selected_tests
-    if IS_CI:
+    if IS_CI and HAVE_TEST_SELECTION_TOOLS:
         (prioritized_tests, remaining_tests) = get_reordered_tests(selected_tests)
         log_time_savings(
             selected_tests,