From 3e0e137555d6b5908a89a5803ca7d9e7ab95d2ba Mon Sep 17 00:00:00 2001
From: Michael Suo <suo@fb.com>
Date: Fri, 15 Apr 2022 11:07:31 -0700
Subject: [PATCH] [lint] add test ownership lint to lintrunner

Pull Request resolved: https://github.com/pytorch/pytorch/pull/75898

Approved by: https://github.com/seemethere, https://github.com/janeyx99
---
 .github/scripts/lint_test_ownership.py     |  88 -----------
 .github/workflows/lint.yml                 |   5 -
 .lintrunner.toml                           |  16 ++
 tools/linter/adapters/exec_linter.py       |  10 +-
 tools/linter/adapters/testowners_linter.py | 164 +++++++++++++++++++++
 5 files changed, 187 insertions(+), 96 deletions(-)
 delete mode 100755 .github/scripts/lint_test_ownership.py
 create mode 100755 tools/linter/adapters/testowners_linter.py

diff --git a/.github/scripts/lint_test_ownership.py b/.github/scripts/lint_test_ownership.py
deleted file mode 100755
index 270019c0f56..00000000000
--- a/.github/scripts/lint_test_ownership.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env python3
-'''
-Test ownership was introduced in https://github.com/pytorch/pytorch/issues/66232.
-
-This lint verifies that every Python test file (file that matches test_*.py or *_test.py in the test folder)
-has valid ownership information in a comment header. Valid means:
-  - The format of the header follows the pattern "# Owner(s): ["list", "of owner", "labels"]
-  - Each owner label actually exists in PyTorch
-  - Each owner label starts with "module: " or "oncall: " or is in ACCEPTABLE_OWNER_LABELS
-
-This file is expected to run in the root directory of pytorch/pytorch.
-'''
-import boto3  # type: ignore[import]
-import botocore  # type: ignore[import]
-import fnmatch
-import json
-import sys
-from pathlib import Path
-from typing import List, Any
-
-
-# Team/owner labels usually start with "module: " or "oncall: ", but the following are acceptable exceptions
-ACCEPTABLE_OWNER_LABELS = ["NNC", "high priority"]
-GLOB_EXCEPTIONS = [
-    "**/test/run_test.py"
-]
-
-PYTORCH_ROOT = Path(__file__).resolve().parent.parent.parent
-TEST_DIR = PYTORCH_ROOT / "test"
-CURRENT_FILE_NAME = Path(__file__).resolve().relative_to(PYTORCH_ROOT)
-
-S3_RESOURCE_READ_ONLY = boto3.resource("s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED))
-
-
-def get_all_test_files() -> List[Path]:
-    test_files = list(TEST_DIR.glob("**/test_*.py"))
-    test_files.extend(list(TEST_DIR.glob("**/*_test.py")))
-    return [f for f in test_files if not any([fnmatch.fnmatch(str(f), g) for g in GLOB_EXCEPTIONS])]
-
-
-def get_pytorch_labels() -> Any:
-    bucket = S3_RESOURCE_READ_ONLY.Bucket("ossci-metrics")
-    summaries = bucket.objects.filter(Prefix="pytorch_labels.json")
-    for summary in summaries:
-        labels = summary.get()["Body"].read()
-    return json.loads(labels)
-
-
-# Returns a string denoting the error invalidating the label OR an empty string if nothing is wrong
-def validate_label(label: str, pytorch_labels: List[str]) -> str:
-    if label not in pytorch_labels:
-        return f"{label} is not a PyTorch label (please choose from https://github.com/pytorch/pytorch/labels)"
-    if label.startswith("module:") or label.startswith("oncall:") or label in ACCEPTABLE_OWNER_LABELS:
-        return ""
-    return f"{label} is not an acceptable owner (please update to another label or edit ACCEPTABLE_OWNERS_LABELS " \
-        "in {CURRENT_FILE_NAME}"
-
-
-# Returns a string denoting the error invalidating the file OR an empty string if nothing is wrong
-def validate_file(filename: Path, pytorch_labels: List[str]) -> str:
-    prefix = "# Owner(s): "
-    relative_name = Path(filename).relative_to(PYTORCH_ROOT)
-    with open(filename) as f:
-        for line in f.readlines():
-            if line.startswith(prefix):
-                labels = json.loads(line[len(prefix):])
-                labels_msgs = [validate_label(label, pytorch_labels) for label in labels]
-                file_msg = ", ".join([x for x in labels_msgs if x != ""])
-                return f"{relative_name}: {file_msg}" if file_msg != "" else ""
-    return f"{relative_name}: missing a comment header with ownership information."
-
-
-def main() -> None:
-    test_file_paths = get_all_test_files()
-    pytorch_labels = get_pytorch_labels()
-
-    file_msgs = [validate_file(f, pytorch_labels) for f in test_file_paths]
-    err_msg = "\n".join([x for x in file_msgs if x != ""])
-    if err_msg != "":
-        err_msg = err_msg + "\n\nIf you see files with missing ownership information above, " \
-            "please add the following line\n\n# Owner(s): [\"<owner: label>\"]\n\nto the top of each test file. " \
-            "The owner should be an existing pytorch/pytorch label."
-        print(err_msg)
-        sys.exit(1)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 50f8dcfc652..3b12f678b09 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -90,11 +90,6 @@ jobs:
         run: |
           set -eux
           python torch/testing/_check_kernel_launches.py |& tee "${GITHUB_WORKSPACE}"/cuda_kernel_launch_checks.txt
-      - name: Ensure all test files have header containing ownership information
-        if: always()
-        run: |
-          python3 -m pip install boto3==1.19.12
-          .github/scripts/lint_test_ownership.py
 
   workflow-checks:
     name: workflow-checks
diff --git a/.lintrunner.toml b/.lintrunner.toml
index 4888122a13c..020295bea31 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -515,3 +515,19 @@ init_command = [
     '--output-dir=.lintbin',
     '--output-name=actionlint',
 ]
+
+[[linter]]
+code = 'TESTOWNERS'
+include_patterns = [
+    'test/**/test_*.py',
+    'test/**/*_test.py',
+]
+exclude_patterns = [
+    'test/run_test.py',
+]
+command = [
+    'python3',
+    'tools/linter/adapters/testowners_linter.py',
+    '--',
+    '@{{PATHSFILE}}',
+]
diff --git a/tools/linter/adapters/exec_linter.py b/tools/linter/adapters/exec_linter.py
index f263d11d545..f00dc60afbb 100644
--- a/tools/linter/adapters/exec_linter.py
+++ b/tools/linter/adapters/exec_linter.py
@@ -51,13 +51,17 @@ def check_file(filename: str) -> Optional[LintMessage]:
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="native functions linter", fromfile_prefix_chars="@",
+        description="exec linter",
+        fromfile_prefix_chars="@",
     )
     parser.add_argument(
-        "--verbose", action="store_true", help="location of native_functions.yaml",
+        "--verbose",
+        action="store_true",
     )
     parser.add_argument(
-        "filenames", nargs="+", help="paths to lint",
+        "filenames",
+        nargs="+",
+        help="paths to lint",
     )
 
     args = parser.parse_args()
diff --git a/tools/linter/adapters/testowners_linter.py b/tools/linter/adapters/testowners_linter.py
new file mode 100755
index 00000000000..b65cfde4d79
--- /dev/null
+++ b/tools/linter/adapters/testowners_linter.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+"""
+Test ownership was introduced in https://github.com/pytorch/pytorch/issues/66232.
+
+This lint verifies that every Python test file (file that matches test_*.py or *_test.py in the test folder)
+has valid ownership information in a comment header. Valid means:
+  - The format of the header follows the pattern "# Owner(s): ["list", "of owner", "labels"]
+  - Each owner label actually exists in PyTorch
+  - Each owner label starts with "module: " or "oncall: " or is in ACCEPTABLE_OWNER_LABELS
+"""
+import json
+import argparse
+from enum import Enum
+from typing import List, Any, Optional, NamedTuple
+from urllib.request import urlopen
+
+
+LINTER_CODE = "TESTOWNERS"
+
+
+class LintSeverity(str, Enum):
+    ERROR = "error"
+    WARNING = "warning"
+    ADVICE = "advice"
+    DISABLED = "disabled"
+
+
+class LintMessage(NamedTuple):
+    path: Optional[str]
+    line: Optional[int]
+    char: Optional[int]
+    code: str
+    severity: LintSeverity
+    name: str
+    original: Optional[str]
+    replacement: Optional[str]
+    description: Optional[str]
+
+
+# Team/owner labels usually start with "module: " or "oncall: ", but the following are acceptable exceptions
+ACCEPTABLE_OWNER_LABELS = ["NNC", "high priority"]
+OWNERS_PREFIX = "# Owner(s): "
+
+
+def get_pytorch_labels() -> Any:
+    labels = (
+        urlopen("https://ossci-metrics.s3.amazonaws.com/pytorch_labels.json")
+        .read()
+        .decode("utf-8")
+    )
+    return json.loads(labels)
+
+
+PYTORCH_LABELS = get_pytorch_labels()
+# Team/owner labels usually start with "module: " or "oncall: ", but the following are acceptable exceptions
+ACCEPTABLE_OWNER_LABELS = ["NNC", "high priority"]
+GLOB_EXCEPTIONS = ["**/test/run_test.py"]
+
+
+def check_labels(
+    labels: List[str], filename: str, line_number: int
+) -> List[LintMessage]:
+    lint_messages = []
+    for label in labels:
+        if label not in PYTORCH_LABELS:
+            lint_messages.append(
+                LintMessage(
+                    path=filename,
+                    line=line_number,
+                    char=None,
+                    code=LINTER_CODE,
+                    severity=LintSeverity.ERROR,
+                    name="[invalid-label]",
+                    original=None,
+                    replacement=None,
+                    description=(
+                        f"{label} is not a PyTorch label "
+                        "(please choose from https://github.com/pytorch/pytorch/labels)"
+                    ),
+                )
+            )
+
+        if (
+            label.startswith("module:")
+            or label.startswith("oncall:")
+            or label in ACCEPTABLE_OWNER_LABELS
+        ):
+            continue
+
+        lint_messages.append(
+            LintMessage(
+                path=filename,
+                line=line_number,
+                char=None,
+                code=LINTER_CODE,
+                severity=LintSeverity.ERROR,
+                name="[invalid-owner]",
+                original=None,
+                replacement=None,
+                description=(
+                    f"{label} is not an acceptable owner "
+                    "(please update to another label or edit ACCEPTABLE_OWNERS_LABELS "
+                    "in tools/linters/adapters/testowners_linter.py"
+                ),
+            )
+        )
+
+    return lint_messages
+
+
+def check_file(filename: str) -> List[LintMessage]:
+    lint_messages = []
+    has_ownership_info = False
+
+    with open(filename) as f:
+        for idx, line in enumerate(f):
+            if not line.startswith(OWNERS_PREFIX):
+                continue
+
+            has_ownership_info = True
+            labels = json.loads(line[len(OWNERS_PREFIX) :])
+            lint_messages.extend(check_labels(labels, filename, idx + 1))
+
+    if has_ownership_info is False:
+        lint_messages.append(
+            LintMessage(
+                path=filename,
+                line=None,
+                char=None,
+                code=LINTER_CODE,
+                severity=LintSeverity.ERROR,
+                name="[no-owner-info]",
+                original=None,
+                replacement=None,
+                description="Missing a comment header with ownership information.",
+            )
+        )
+
+    return lint_messages
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="test ownership linter",
+        fromfile_prefix_chars="@",
+    )
+    parser.add_argument(
+        "filenames",
+        nargs="+",
+        help="paths to lint",
+    )
+
+    args = parser.parse_args()
+    lint_messages = []
+
+    for filename in args.filenames:
+        lint_messages.extend(check_file(filename))
+
+    for lint_message in lint_messages:
+        print(json.dumps(lint_message._asdict()), flush=True)
+
+
+if __name__ == "__main__":
+    main()