mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Move close_nonexistent_disable_issues.py queries to ClickHouse (#139296)
Example run: https://github.com/pytorch/pytorch/actions/runs/11601996563/job/32305991204?pr=139296 (commented out the part that actually closes issues but the queries run) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139296 Approved by: https://github.com/huydhn
This commit is contained in:
parent
ae6cbd4256
commit
754b262bdb
3 changed files with 85 additions and 32 deletions
|
|
@ -3,26 +3,37 @@ import json
|
|||
import multiprocessing as mp
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
import requests
|
||||
import rockset # type: ignore[import]
|
||||
from gitutils import retries_decorator
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
from tools.testing.clickhouse import query_clickhouse
|
||||
|
||||
|
||||
sys.path.pop(0)
|
||||
|
||||
|
||||
LOGS_QUERY = """
|
||||
with
|
||||
shas as (
|
||||
SELECT
|
||||
push.head_commit.id as sha,
|
||||
distinct
|
||||
push.head_commit.id as sha
|
||||
FROM
|
||||
commons.push
|
||||
-- Not bothering with final here
|
||||
default.push
|
||||
WHERE
|
||||
push.ref = 'refs/heads/viable/strict'
|
||||
AND push.repository.full_name = 'pytorch/pytorch'
|
||||
AND push.repository.'full_name' = 'pytorch/pytorch'
|
||||
ORDER BY
|
||||
push._event_time DESC
|
||||
push.head_commit.'timestamp' desc
|
||||
LIMIT
|
||||
5
|
||||
)
|
||||
|
|
@ -30,27 +41,29 @@ select
|
|||
id,
|
||||
name
|
||||
from
|
||||
workflow_job j
|
||||
default.workflow_job j final
|
||||
join shas on shas.sha = j.head_sha
|
||||
where
|
||||
j.name like '% / test%'
|
||||
j.id in (select id from materialized_views.workflow_job_by_head_sha where head_sha in (select sha from shas))
|
||||
and j.name like '% / test%'
|
||||
and j.name not like '%rerun_disabled_tests%'
|
||||
and j.name not like '%mem_leak_check%'
|
||||
"""
|
||||
|
||||
TEST_EXISTS_QUERY = """
|
||||
select
|
||||
count(*) as c
|
||||
name
|
||||
from
|
||||
test_run_s3
|
||||
default.test_run_s3
|
||||
where
|
||||
cast(name as string) like :name
|
||||
and classname like :classname
|
||||
and _event_time > CURRENT_TIMESTAMP() - DAYS(7)
|
||||
name::String like {name: String}
|
||||
and classname like {classname: String}
|
||||
and time_inserted > CURRENT_TIMESTAMP() - INTERVAL 7 DAY
|
||||
limit 1
|
||||
"""
|
||||
|
||||
CLOSING_COMMENT = (
|
||||
"I cannot find any mention of this test in rockset for the past 7 days "
|
||||
"I cannot find any mention of this test in the database for the past 7 days "
|
||||
"or in the logs for the past 5 commits on viable/strict. Closing this "
|
||||
"issue as it is highly likely that this test has either been renamed or "
|
||||
"removed. If you think this is a false positive, please feel free to "
|
||||
|
|
@ -62,6 +75,11 @@ DISABLED_TESTS_JSON = (
|
|||
)
|
||||
|
||||
|
||||
@retries_decorator()
|
||||
def query_db(query: str, params: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
return query_clickhouse(query, params)
|
||||
|
||||
|
||||
def parse_args() -> Any:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
|
|
@ -72,17 +90,6 @@ def parse_args() -> Any:
|
|||
return parser.parse_args()
|
||||
|
||||
|
||||
@retries_decorator()
|
||||
def query_rockset(
|
||||
query: str, params: Optional[Dict[str, Any]] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
res = rockset.RocksetClient(
|
||||
host="api.rs2.usw2.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
|
||||
).sql(query, params)
|
||||
results: List[Dict[str, Any]] = res.results
|
||||
return results
|
||||
|
||||
|
||||
def download_log_worker(temp_dir: str, id: int, name: str) -> None:
|
||||
url = f"https://ossci-raw-job-status.s3.amazonaws.com/log/{id}"
|
||||
data = requests.get(url).text
|
||||
|
|
@ -137,13 +144,13 @@ def check_if_exists(
|
|||
if present:
|
||||
return True, "found in logs"
|
||||
|
||||
# Query rockset to see if the test is there
|
||||
count = query_rockset(
|
||||
# Query DB to see if the test is there
|
||||
count = query_db(
|
||||
TEST_EXISTS_QUERY, {"name": f"{name}%", "classname": f"{classname}%"}
|
||||
)
|
||||
if count[0]["c"] == 0:
|
||||
if len(count) == 0:
|
||||
return False, "not found"
|
||||
return True, "found in rockset"
|
||||
return True, "found in DB"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -151,7 +158,7 @@ if __name__ == "__main__":
|
|||
disabled_tests_json = json.loads(requests.get(DISABLED_TESTS_JSON).text)
|
||||
|
||||
all_logs = []
|
||||
jobs = query_rockset(LOGS_QUERY)
|
||||
jobs = query_db(LOGS_QUERY, {})
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
pool = mp.Pool(20)
|
||||
for job in jobs:
|
||||
|
|
|
|||
|
|
@ -12,12 +12,17 @@ jobs:
|
|||
steps:
|
||||
- name: Checkout PyTorch
|
||||
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
||||
with:
|
||||
submodules: false
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run close_nonexistent_disable_issues.py
|
||||
env:
|
||||
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
CLICKHOUSE_ENDPOINT: ${{ secrets.CLICKHOUSE_ENDPOINT }}
|
||||
CLICKHOUSE_USERNAME: ${{ secrets.CLICKHOUSE_READONLY_USERNAME }}
|
||||
CLICKHOUSE_PASSWORD: ${{ secrets.CLICKHOUSE_READONLY_PASSWORD }}
|
||||
run: |
|
||||
pip3 install requests==2.32.2
|
||||
pip3 install rockset==1.0.3
|
||||
pip3 install clickhouse-connect==0.7.16
|
||||
python3 .github/scripts/close_nonexistent_disable_issues.py
|
||||
|
|
|
|||
41
tools/testing/clickhouse.py
Normal file
41
tools/testing/clickhouse.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
import json
|
||||
import os
|
||||
from functools import lru_cache
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import clickhouse_connect # type: ignore[import]
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_clickhouse_client() -> Any:
|
||||
endpoint = os.environ["CLICKHOUSE_ENDPOINT"]
|
||||
# I cannot figure out why these values aren't being handled automatically
|
||||
# when it is fine in the lambda
|
||||
if endpoint.startswith("https://"):
|
||||
endpoint = endpoint[len("https://") :]
|
||||
if endpoint.endswith(":8443"):
|
||||
endpoint = endpoint[: -len(":8443")]
|
||||
return clickhouse_connect.get_client(
|
||||
host=endpoint,
|
||||
user=os.environ["CLICKHOUSE_USERNAME"],
|
||||
password=os.environ["CLICKHOUSE_PASSWORD"],
|
||||
secure=True,
|
||||
interface="https",
|
||||
port=8443,
|
||||
)
|
||||
|
||||
|
||||
def query_clickhouse(query: str, params: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Queries ClickHouse. Returns datetime in YYYY-MM-DD HH:MM:SS format.
|
||||
"""
|
||||
|
||||
def convert_to_json_list(res: bytes) -> List[Dict[str, Any]]:
|
||||
rows = []
|
||||
for row in res.decode().split("\n"):
|
||||
if row:
|
||||
rows.append(json.loads(row))
|
||||
return rows
|
||||
|
||||
res = get_clickhouse_client().raw_query(query, params, fmt="JSONEachRow")
|
||||
return convert_to_json_list(res)
|
||||
Loading…
Reference in a new issue