pytorch/.github/scripts/process_commit.py
Andrey Talman fc914346d7 Fix alignment, make sure release labels are included (#73739)
Summary:
This issue was reported on this [PR](https://github.com/pytorch/pytorch/pull/73436#issuecomment-1057496607)  the bot commented on the PR even tho the labels where already added to pr correctly.  Issue found to one not idented properly line that collected repo labels in a set.

Tested locally by running the script against the commit, commit_hash = "2ab9702955e461eab03809a71e10c6fc9cb54cb5"

Produced following output
```
PR Labels:
{'ciflow/default', 'cla signed', 'release notes: quantization', 'topic: improvements'}
Repo release Labels:
{'release notes: distributed (fsdp)', 'release notes: python_frontend', 'release notes: mobile', 'release notes: linalg_frontend', 'release notes: performance_as_product', 'release notes: autograd', 'release notes: sparse', 'release notes: build', 'release notes: composability', 'release notes: lazy', 'release notes: visualization', 'release notes: cpp', 'release notes: package/deploy', 'release notes: distributed (sharded)', 'release notes: distributed (ddp)', 'release notes: rocm', 'release notes: memory format', 'release notes: foreach_frontend', 'release notes: vulkan', 'release notes: releng', 'release notes: profiler', 'release notes: distributed (rpc)', 'release notes: jit', 'release notes: onnx', 'release notes: dataloader', 'release notes: complex', 'release notes: benchmark', 'release notes: distributed (c10d)', 'release notes: fx', 'release notes: distributed (pipeline)', 'release notes: Meta API', 'release notes: quantization', 'release notes: cuda', 'release notes: nn'}
Intersection release labels:
{'release notes: quantization'}
Intersection topic labels:
{'topic: improvements'}
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/73739

Reviewed By: jbschlosser

Differential Revision: D34613506

Pulled By: atalman

fbshipit-source-id: 354a43aee88c6db228eb890e4f1bef3bc6b7675f
(cherry picked from commit a2a99686e0b23afa07a9ee71d2f5f15f5bec2513)
2022-03-03 16:30:55 +00:00

106 lines
4.1 KiB
Python

#!/usr/bin/env python3
"""
This script finds the user/pr creator responsible for labeling a PR by a commit SHA. It is used by the workflow in
'.github/workflows/pr-labels.yml'. If there exists no PR associated with the commit or the PR is properly labeled,
this script is a no-op.
Note: we ping the user only, not the reviewers, as the reviewers can sometimes be external to pytorch
with no labeling responsibility, so we don't want to bother them.
This script is based on: https://github.com/pytorch/vision/blob/main/.github/process_commit.py
"""
import sys
from typing import Any, Set, Tuple, List
import re
import os
import json
import requests
# For a PR to be properly labeled it should have release notes label and one topic label
PULL_REQUEST_EXP = "Pull Request resolved:.*pull/(.*)"
PRIMARY_LABEL_FILTER = "release notes:"
SECONDARY_LABELS = {
"topic: bc_breaking",
"topic: deprecation",
"topic: new feature",
"topic: improvements",
"topic: bug fixes",
"topic: performance",
"topic: documentation",
"topic: developer feature",
"topic: not user facing",
}
# This secondary does not require a primary
ALLOWED_ONLY_SECONDARY = {"topic: not user facing"}
PYTORCH_REPO = "https://api.github.com/repos/pytorch/pytorch"
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
REQUEST_HEADERS = {'Accept': 'application/vnd.github.v3+json', 'Authorization': f'token {GITHUB_TOKEN}'}
def query_pytorch(cmd: str) -> Any:
response = requests.get(f"{PYTORCH_REPO}/{cmd}", headers=REQUEST_HEADERS)
return response.json()
def get_pr_number(commit_hash: str) -> Any:
data = query_pytorch(f"commits/{commit_hash}")
if not data or (not data["commit"]["message"]):
return None
message = data["commit"]["message"]
p = re.compile(PULL_REQUEST_EXP)
result = p.search(message)
if not result:
return None
return result.group(1)
def get_pr_author_and_labels(pr_number: int) -> Tuple[str, Set[str]]:
# See https://docs.github.com/en/rest/reference/pulls#get-a-pull-request
data = query_pytorch(f"pulls/{pr_number}")
user = data["user"]["login"]
labels = {label["name"] for label in data["labels"]}
return user, labels
def get_repo_labels() -> List[str]:
collected_labels: List[str] = list()
for page in range(0, 10):
response = query_pytorch(f"labels?per_page=100&page={page}")
page_labels = list(map(lambda x: str(x["name"]), response))
if not page_labels:
break
collected_labels += page_labels
return collected_labels
def post_pytorch_comment(pr_number: int, merger: str) -> Any:
message = {'body' : f"Hey @{merger}." + """
You've committed this PR, but it does not have both a 'release notes: ...' and 'topics: ...' label. \
Please add one of each to the PR. The 'release notes: ...' label should represent the part of \
PyTorch that this PR changes (fx, autograd, distributed, etc) and the 'topics: ...' label should \
represent the kind of PR it is (not user facing, new feature, bug fix, perf improvement, etc). \
The list of valid labels can be found [here](https://github.com/pytorch/pytorch/labels?q=release+notes) \
for the 'release notes: ...' and [here](https://github.com/pytorch/pytorch/labels?q=topic) for the \
'topics: ...'.
For changes that are 'topic: not user facing' there is no need for a release notes label."""}
response = requests.post(
f"{PYTORCH_REPO}/issues/{pr_number}/comments",
json.dumps(message),
headers=REQUEST_HEADERS)
return response.json()
if __name__ == "__main__":
commit_hash = sys.argv[1]
pr_number = get_pr_number(commit_hash)
if not pr_number:
sys.exit(0)
user, labels = get_pr_author_and_labels(pr_number)
repo_labels = get_repo_labels()
primary_labels = set(filter(lambda x: x.startswith(PRIMARY_LABEL_FILTER), repo_labels))
has_both_labels = bool(primary_labels.intersection(labels) and SECONDARY_LABELS.intersection(labels))
is_properly_labeled = has_both_labels or bool(ALLOWED_ONLY_SECONDARY.intersection(labels))
if not is_properly_labeled:
post_pytorch_comment(pr_number, user)