From df136df8d5f646d49bcd68cf6e0b9ce3c553a700 Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Thu, 7 Nov 2024 20:14:12 +0000 Subject: [PATCH] Remove upload_test_stat_aggregates script (#139915) Instead of moving these queries to ClickHouse, we're just going to remove it since it's not really used. We do want something for test aggregates, but we can make a new script instead Pull Request resolved: https://github.com/pytorch/pytorch/pull/139915 Approved by: https://github.com/huydhn --- ...set-uploads.yml => nightly-s3-uploads.yml} | 13 +-- tools/stats/upload_external_contrib_stats.py | 2 +- tools/stats/upload_stats_lib.py | 3 +- tools/stats/upload_test_stat_aggregates.py | 86 ------------------- 4 files changed, 7 insertions(+), 97 deletions(-) rename .github/workflows/{nightly-rockset-uploads.yml => nightly-s3-uploads.yml} (73%) delete mode 100644 tools/stats/upload_test_stat_aggregates.py diff --git a/.github/workflows/nightly-rockset-uploads.yml b/.github/workflows/nightly-s3-uploads.yml similarity index 73% rename from .github/workflows/nightly-rockset-uploads.yml rename to .github/workflows/nightly-s3-uploads.yml index 08942b15a32..39869c94997 100644 --- a/.github/workflows/nightly-rockset-uploads.yml +++ b/.github/workflows/nightly-s3-uploads.yml @@ -1,4 +1,4 @@ -name: Nightly Upload to rockset +name: Nightly Upload to s3 on: schedule: @@ -7,8 +7,7 @@ on: pull_request: paths: - 'tools/stats/upload_external_contrib_stats.py' - - 'tools/stats/upload_test_stat_aggregates.py' - - '.github/workflows/nightly-rockset-uploads.yml' + - '.github/workflows/nightly-s3-uploads.yml' concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} @@ -16,7 +15,7 @@ concurrency: jobs: - upload-stats-to-rockset: + upload-stats-to-s3: if: github.repository_owner == 'pytorch' runs-on: ubuntu-22.04 environment: upload-stats @@ -33,16 +32,14 @@ jobs: cache: pip - run: | - pip3 install requests==2.32.2 rockset==1.0.3 boto3==1.35.42 + pip3 install requests==2.32.2 boto3==1.35.42 - name: Upload external contribution stats uses: nick-fields/retry@v3.0.0 env: - ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - if: ${{ env.ROCKSET_API_KEY != '' }} with: timeout_minutes: 10 max_attempts: 10 @@ -50,5 +47,3 @@ jobs: command: | echo "Uploading external contribution stats for 10 days starting on" "$(date -d '10 days ago' '+%Y-%m-%d')" python3 -m tools.stats.upload_external_contrib_stats --startDate "$(date -d '10 days ago' '+%Y-%m-%d')" --length 10 - echo "Uploading testing aggregate data" "$(date -d yesterday '+%Y-%m-%d')" - python3 -m tools.stats.upload_test_stat_aggregates --date "$(date -d yesterday '+%Y-%m-%d')" diff --git a/tools/stats/upload_external_contrib_stats.py b/tools/stats/upload_external_contrib_stats.py index a90811592e2..62c96cb46e9 100644 --- a/tools/stats/upload_external_contrib_stats.py +++ b/tools/stats/upload_external_contrib_stats.py @@ -112,7 +112,7 @@ def get_external_pr_data( if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Upload external contribution stats to Rockset" + description="Upload external contribution stats to s3" ) parser.add_argument( "--startDate", diff --git a/tools/stats/upload_stats_lib.py b/tools/stats/upload_stats_lib.py index 213cc7abc48..872e7fcae94 100644 --- a/tools/stats/upload_stats_lib.py +++ b/tools/stats/upload_stats_lib.py @@ -13,7 +13,6 @@ from typing import Any, Callable, Dict, List, Optional import boto3 # type: ignore[import] import requests -import rockset # type: ignore[import] PYTORCH_REPO = "https://api.github.com/repos/pytorch/pytorch" @@ -128,6 +127,8 @@ def upload_to_rockset( workspace: str = "commons", client: Any = None, ) -> None: + import rockset # type: ignore[import] + if not client: client = rockset.RocksetClient( host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"] diff --git a/tools/stats/upload_test_stat_aggregates.py b/tools/stats/upload_test_stat_aggregates.py deleted file mode 100644 index e128ca4bf14..00000000000 --- a/tools/stats/upload_test_stat_aggregates.py +++ /dev/null @@ -1,86 +0,0 @@ -from __future__ import annotations - -import argparse -import ast -import datetime -import json -import os -import re -from typing import Any - -import rockset # type: ignore[import] - -from tools.stats.upload_stats_lib import upload_to_s3 - - -def get_oncall_from_testfile(testfile: str) -> list[str] | None: - path = f"test/{testfile}" - if not path.endswith(".py"): - path += ".py" - # get oncall on test file - try: - with open(path) as f: - for line in f: - if line.startswith("# Owner(s): "): - possible_lists = re.findall(r"\[.*\]", line) - if len(possible_lists) > 1: - raise Exception("More than one list found") # noqa: TRY002 - elif len(possible_lists) == 0: - raise Exception( # noqa: TRY002 - "No oncalls found or file is badly formatted" - ) # noqa: TRY002 - oncalls = ast.literal_eval(possible_lists[0]) - return list(oncalls) - except Exception as e: - if "." in testfile: - return [f"module: {testfile.split('.')[0]}"] - else: - return ["module: unmarked"] - return None - - -def get_test_stat_aggregates(date: datetime.date) -> Any: - # Initialize the Rockset client with your API key - rockset_api_key = os.environ["ROCKSET_API_KEY"] - rockset_api_server = "api.rs2.usw2.rockset.com" - iso_date = date.isoformat() - rs = rockset.RocksetClient(host="api.usw2a1.rockset.com", api_key=rockset_api_key) - - # Define the name of the Rockset collection and lambda function - collection_name = "commons" - lambda_function_name = "test_insights_per_daily_upload" - query_parameters = [ - rockset.models.QueryParameter(name="startTime", type="string", value=iso_date) - ] - api_response = rs.QueryLambdas.execute_query_lambda( - query_lambda=lambda_function_name, - version="692684fa5b37177f", - parameters=query_parameters, - ) - for i in range(len(api_response["results"])): - oncalls = get_oncall_from_testfile(api_response["results"][i]["test_file"]) - api_response["results"][i]["oncalls"] = oncalls - return json.loads( - json.dumps(api_response["results"], indent=4, sort_keys=True, default=str) - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Upload test stat aggregates to Rockset." - ) - parser.add_argument( - "--date", - type=datetime.date.fromisoformat, - help="Date to upload test stat aggregates for (YYYY-MM-DD). Must be in the last 30 days", - required=True, - ) - args = parser.parse_args() - if args.date < datetime.datetime.now().date() - datetime.timedelta(days=30): - raise ValueError("date must be in the last 30 days") - data = get_test_stat_aggregates(date=args.date) - upload_to_s3( - bucket_name="torchci-aggregated-stats", - key=f"test_data_aggregates/{str(args.date)}", - docs=data, - )