From 5e4dfd014050b3ad57f7af643a42e03685ed1a7f Mon Sep 17 00:00:00 2001
From: "davidriazati@fb.com" <davidriazati@fb.com>
Date: Wed, 21 Apr 2021 13:43:52 -0700
Subject: [PATCH] Add quicklint make target (#56559)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
This queries the local git repo for changed files (any changed files, not just committed ones) and sends them to mypy/flake8 instead of the default (which is the whole repo, defined the .flake8 and mypy.ini files). This brings a good speedup (from 15 seconds with no cache to < 1 second from my local testing on this PR).

```bash
make quicklint -j 6
```

It should be noted that the results of this aren’t exactly what’s in the CI, since mypy and flake8 ignore the `include` and `exclude` parts of their config when an explicit list of files is passed in.
](https://our.intern.facebook.com/intern/diff/27901577/)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56559

Pulled By: driazati

Reviewed By: malfet

Differential Revision: D27901577

fbshipit-source-id: 99f351cdfe5aba007948aea2b8a78f683c5d8583
---
 .github/workflows/lint.yml              |  24 ++++--
 CONTRIBUTING.md                         |   4 +
 Makefile                                |  13 +++
 mypy-strict.ini                         |   2 +
 tools/actions_local_runner.py           | 102 +++++++++++++++++++-----
 tools/test/test_actions_local_runner.py |  60 ++++++++++++++
 6 files changed, 181 insertions(+), 24 deletions(-)
 create mode 100644 tools/test/test_actions_local_runner.py

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 137193c9d8f..eed01ea8917 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -6,6 +6,12 @@ on:
     - master
   pull_request:
 
+# note [local lint]
+# LOCAL_FILES in the jobs below is intended to be filled by local (i.e. on a
+# developer's machine) job in tools/actions_local_runner.py to pass only the
+# changed files to whatever tool is consuming the files. This helps speed up
+# local lints while keeping the linting code unified between local / CI.
+
 jobs:
   quick-checks:
     runs-on: ubuntu-18.04
@@ -128,15 +134,15 @@ jobs:
         uses: actions/checkout@v2
       - name: Install markdown-toc
         run: npm install -g markdown-toc
-      - name: Regenerate ToCs
+      - name: Regenerate ToCs and check that they didn't change
         run: |
           set -eux
           export PATH=~/.npm-global/bin:"$PATH"
           for FILE in $(git grep -Il '<!-- toc -->' -- '**.md'); do
             markdown-toc --bullets='-' -i "$FILE"
           done
-      - name: Assert that regenerating the ToCs didn't change them
-        run: .github/scripts/report_git_status.sh
+
+          .github/scripts/report_git_status.sh
 
   flake8-py3:
     runs-on: ubuntu-18.04
@@ -164,9 +170,13 @@ jobs:
           pip install -r requirements-flake8.txt
           flake8 --version
       - name: Run flake8
+        env:
+          # See note [local lint]
+          LOCAL_FILES: ""
         run: |
           set -eux
-          flake8 | tee "${GITHUB_WORKSPACE}"/flake8-output.txt
+          # shellcheck disable=SC2086
+          flake8 $LOCAL_FILES | tee "${GITHUB_WORKSPACE}"/flake8-output.txt
       - name: Translate annotations
         if: github.event_name == 'pull_request'
         env:
@@ -342,6 +352,10 @@ jobs:
           time python -mtools.codegen.gen -s aten/src/ATen -d build/aten/src/ATen
           time python -mtools.pyi.gen_pyi --native-functions-path aten/src/ATen/native/native_functions.yaml --deprecated-functions-path "tools/autograd/deprecated.yaml"
       - name: Run mypy
+        env:
+          # See note [local lint]
+          LOCAL_FILES: ""
         run: |
           set -eux
-          for CONFIG in mypy*.ini; do mypy --config="$CONFIG"; done
+          # shellcheck disable=SC2086
+          for CONFIG in mypy*.ini; do mypy --config="$CONFIG" $LOCAL_FILES; done
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0ba2d40447c..ff1f8fe95b9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -364,7 +364,11 @@ command runs tests such as `TestNN.test_BCELoss` and
 You can run the same linting steps that are used in CI locally via `make`:
 
 ```bash
+# Lint all files
 make lint -j 6  # run lint (using 6 parallel jobs)
+
+# Lint only the files you have changed
+make quicklint -j 6
 ```
 
 These jobs may require extra dependencies that aren't dependencies of PyTorch
diff --git a/Makefile b/Makefile
index 26a47cd7f07..9f1bf4e103a 100644
--- a/Makefile
+++ b/Makefile
@@ -55,12 +55,16 @@ quick_checks:
 flake8:
 	@python tools/actions_local_runner.py \
 		--file .github/workflows/lint.yml \
+		--file-filter '.py' \
+		$(CHANGED_ONLY) \
 		--job 'flake8-py3' \
 		--step 'Run flake8'
 
 mypy:
 	@python tools/actions_local_runner.py \
 		--file .github/workflows/lint.yml \
+		--file-filter '.py' \
+		$(CHANGED_ONLY) \
 		--job 'mypy' \
 		--step 'Run mypy'
 
@@ -74,4 +78,13 @@ clang_tidy:
 	echo "clang-tidy local lint is not yet implemented"
 	exit 1
 
+toc:
+	@python tools/actions_local_runner.py \
+		--file .github/workflows/lint.yml \
+		--job 'toc' \
+		--step "Regenerate ToCs and check that they didn't change"
+
 lint: flake8 mypy quick_checks cmakelint generate-gha-workflows
+
+quicklint: CHANGED_ONLY=--changed-only
+quicklint: mypy flake8 mypy quick_checks cmakelint generate-gha-workflows
diff --git a/mypy-strict.ini b/mypy-strict.ini
index e76bccc9e3a..02478cd418e 100644
--- a/mypy-strict.ini
+++ b/mypy-strict.ini
@@ -51,9 +51,11 @@ files =
     tools/test/test_mypy_wrapper.py,
     tools/test/test_test_history.py,
     tools/test/test_trailing_newlines.py,
+    tools/test/test_actions_local_runner.py,
     tools/test/test_translate_annotations.py,
     tools/trailing_newlines.py,
     tools/translate_annotations.py,
+    tools/actions_local_runner.py,
     torch/testing/_internal/framework_utils.py,
     torch/utils/benchmark/utils/common.py,
     torch/utils/benchmark/utils/timer.py,
diff --git a/tools/actions_local_runner.py b/tools/actions_local_runner.py
index ac5b203f128..6b916d2ca3e 100755
--- a/tools/actions_local_runner.py
+++ b/tools/actions_local_runner.py
@@ -1,10 +1,12 @@
-#!/bin/python3
+#!/usr/bin/env python3
 
 import subprocess
+import sys
 import os
 import argparse
 import yaml
 import asyncio
+from typing import List, Dict, Any, Optional
 
 
 REPO_ROOT = os.path.dirname(os.path.dirname(__file__))
@@ -21,23 +23,71 @@ class col:
     UNDERLINE = "\033[4m"
 
 
-def color(the_color, text):
+def color(the_color: str, text: str) -> str:
     return col.BOLD + the_color + str(text) + col.RESET
 
 
-def cprint(the_color, text):
-    print(color(the_color, text))
+def cprint(the_color: str, text: str) -> None:
+    if hasattr(sys.stdout, "isatty") and sys.stdout.isatty():
+        print(color(the_color, text))
+    else:
+        print(text)
 
 
-async def run_step(step, job_name):
+def git(args: List[str]) -> List[str]:
+    p = subprocess.run(
+        ["git"] + args,
+        cwd=REPO_ROOT,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        check=True,
+    )
+    lines = p.stdout.decode().strip().split("\n")
+    return [line.strip() for line in lines]
+
+
+def find_changed_files(merge_base: str = "origin/master") -> List[str]:
+    untracked = []
+
+    for line in git(["status", "--porcelain"]):
+        # Untracked files start with ??, so grab all of those
+        if line.startswith("?? "):
+            untracked.append(line.replace("?? ", ""))
+
+    # Modified, unstaged
+    modified = git(["diff", "--name-only"])
+
+    # Modified, staged
+    cached = git(["diff", "--cached", "--name-only"])
+
+    # Committed
+    diff_with_origin = git(["diff", "--name-only", "--merge-base", merge_base, "HEAD"])
+
+    # De-duplicate
+    all_files = set(untracked + cached + modified + diff_with_origin)
+    return [x.strip() for x in all_files if x.strip() != ""]
+
+
+async def run_step(step: Dict[str, Any], job_name: str, files: Optional[List[str]]) -> bool:
     env = os.environ.copy()
     env["GITHUB_WORKSPACE"] = "/tmp"
+    if files is None:
+        env["LOCAL_FILES"] = ""
+    else:
+        env["LOCAL_FILES"] = " ".join(files)
     script = step["run"]
 
+    PASS = "\U00002705"
+    FAIL = "\U0000274C"
     # We don't need to print the commands for local running
     # TODO: Either lint that GHA scripts only use 'set -eux' or make this more
     # resilient
     script = script.replace("set -eux", "set -eu")
+    name = f'{job_name}: {step["name"]}'
+
+    def header(passed: bool) -> None:
+        icon = PASS if passed else FAIL
+        cprint(col.BLUE, f"{icon} {name}")
 
     try:
         proc = await asyncio.create_subprocess_shell(
@@ -49,27 +99,31 @@ async def run_step(step, job_name):
             stderr=subprocess.PIPE,
         )
 
-        stdout, stderr = await proc.communicate()
-        cprint(col.BLUE, f'{job_name}: {step["name"]}')
-    except Exception as e:
-        cprint(col.BLUE, f'{job_name}: {step["name"]}')
-        print(e)
+        stdout_bytes, stderr_bytes = await proc.communicate()
 
-    stdout = stdout.decode().strip()
-    stderr = stderr.decode().strip()
+        header(passed=proc.returncode == 0)
+    except Exception as e:
+        header(passed=False)
+        print(e)
+        return False
+
+    stdout = stdout_bytes.decode().strip()
+    stderr = stderr_bytes.decode().strip()
 
     if stderr != "":
         print(stderr)
     if stdout != "":
         print(stdout)
 
+    return proc.returncode == 0
 
-async def run_steps(steps, job_name):
-    coros = [run_step(step, job_name) for step in steps]
+
+async def run_steps(steps: List[Dict[str, Any]], job_name: str, files: Optional[List[str]]) -> None:
+    coros = [run_step(step, job_name, files) for step in steps]
     await asyncio.gather(*coros)
 
 
-def grab_specific_steps(steps_to_grab, job):
+def grab_specific_steps(steps_to_grab: List[str], job: Dict[str, Any]) -> List[Dict[str, Any]]:
     relevant_steps = []
     for step in steps_to_grab:
         for actual_step in job["steps"]:
@@ -83,7 +137,7 @@ def grab_specific_steps(steps_to_grab, job):
     return relevant_steps
 
 
-def grab_all_steps_after(last_step, job):
+def grab_all_steps_after(last_step: str, job: Dict[str, Any]) -> List[Dict[str, Any]]:
     relevant_steps = []
 
     found = False
@@ -96,11 +150,13 @@ def grab_all_steps_after(last_step, job):
     return relevant_steps
 
 
-def main():
+def main() -> None:
     parser = argparse.ArgumentParser(
         description="Pull shell scripts out of GitHub actions and run them"
     )
     parser.add_argument("--file", help="YAML file with actions", required=True)
+    parser.add_argument("--file-filter", help="only pass through files with this extension", default='')
+    parser.add_argument("--changed-only", help="only run on changed files", action='store_true', default=False)
     parser.add_argument("--job", help="job name", required=True)
     parser.add_argument("--step", action="append", help="steps to run (in order)")
     parser.add_argument(
@@ -108,6 +164,15 @@ def main():
     )
     args = parser.parse_args()
 
+    changed_files = None
+    if args.changed_only:
+        changed_files = []
+        for f in find_changed_files():
+            for file_filter in args.file_filter:
+                if f.endswith(file_filter):
+                    changed_files.append(f)
+                    break
+
     if args.step is None and args.all_steps_after is None:
         raise RuntimeError("1+ --steps or --all-steps-after must be provided")
 
@@ -129,8 +194,7 @@ def main():
     else:
         relevant_steps = grab_all_steps_after(args.all_steps_after, job)
 
-    # pprint.pprint(relevant_steps)
-    asyncio.run(run_steps(relevant_steps, args.job))
+    asyncio.run(run_steps(relevant_steps, args.job, changed_files))  # type: ignore[attr-defined]
 
 
 if __name__ == "__main__":
diff --git a/tools/test/test_actions_local_runner.py b/tools/test/test_actions_local_runner.py
new file mode 100644
index 00000000000..2de1410d663
--- /dev/null
+++ b/tools/test/test_actions_local_runner.py
@@ -0,0 +1,60 @@
+import unittest
+import sys
+import contextlib
+import io
+from typing import List, Dict, Any
+
+from tools import actions_local_runner
+
+
+if __name__ == '__main__':
+    if sys.version_info >= (3, 8):
+        # actions_local_runner uses asyncio features not available in 3.6, and
+        # IsolatedAsyncioTestCase was added in 3.8, so skip testing on
+        # unsupported systems
+        class TestRunner(unittest.IsolatedAsyncioTestCase):
+            def run(self, *args: List[Any], **kwargs: List[Dict[str, Any]]) -> Any:
+                return super().run(*args, **kwargs)
+
+            def test_step_extraction(self) -> None:
+                fake_job = {
+                    "steps": [
+                        {
+                            "name": "test1",
+                            "run": "echo hi"
+                        },
+                        {
+                            "name": "test2",
+                            "run": "echo hi"
+                        },
+                        {
+                            "name": "test3",
+                            "run": "echo hi"
+                        },
+                    ]
+                }
+
+                actual = actions_local_runner.grab_specific_steps(["test2"], fake_job)
+                expected = [
+                    {
+                        "name": "test2",
+                        "run": "echo hi"
+                    },
+                ]
+                self.assertEqual(actual, expected)
+
+            async def test_runner(self) -> None:
+                fake_step = {
+                    "name": "say hello",
+                    "run": "echo hi"
+                }
+                f = io.StringIO()
+                with contextlib.redirect_stdout(f):
+                    await actions_local_runner.run_steps([fake_step], "test", None)
+
+                result = f.getvalue()
+                self.assertIn("say hello", result)
+                self.assertIn("hi", result)
+
+
+    unittest.main()