diff --git a/tools/stats/upload_stats_lib.py b/tools/stats/upload_stats_lib.py index e7c4e414187..1cba78f68da 100644 --- a/tools/stats/upload_stats_lib.py +++ b/tools/stats/upload_stats_lib.py @@ -1,11 +1,14 @@ +import gzip +import io +import json import os -import requests import zipfile from pathlib import Path -from typing import Dict, List, Any +from typing import Any, Dict, List -import rockset # type: ignore[import] import boto3 # type: ignore[import] +import requests +import rockset # type: ignore[import] PYTORCH_REPO = "https://api.github.com/repos/pytorch/pytorch" S3_RESOURCE = boto3.resource("s3") @@ -110,6 +113,29 @@ def upload_to_rockset(collection: str, docs: List[Any]) -> None: print("Done!") +def upload_to_s3( + workflow_run_id: int, + workflow_run_attempt: int, + collection: str, + docs: List[Dict[str, Any]], +) -> None: + print(f"Writing {len(docs)} documents to S3") + body = io.StringIO() + for doc in docs: + json.dump(doc, body) + body.write("\n") + + S3_RESOURCE.Object( + "ossci-raw-job-status", + f"{collection}/{workflow_run_id}/{workflow_run_attempt}", + ).put( + Body=gzip.compress(body.getvalue().encode()), + ContentEncoding="gzip", + ContentType="application/json", + ) + print("Done!") + + def unzip(p: Path) -> None: """Unzip the provided zipfile to a similarly-named directory. diff --git a/tools/stats/upload_test_stats.py b/tools/stats/upload_test_stats.py index a5ce449c0a6..9392cc5fb45 100644 --- a/tools/stats/upload_test_stats.py +++ b/tools/stats/upload_test_stats.py @@ -8,7 +8,7 @@ from tempfile import TemporaryDirectory from tools.stats.upload_stats_lib import ( download_gha_artifacts, download_s3_artifacts, - upload_to_rockset, + upload_to_s3, unzip, ) @@ -207,8 +207,17 @@ if __name__ == "__main__": # For PRs, only upload a summary of test_runs. This helps lower the # volume of writes we do to Rockset. - upload_to_rockset("test_run_summary", summarize_test_cases(test_cases)) + upload_to_s3( + args.workflow_run_id, + args.workflow_run_attempt, + "test_run_summary", + summarize_test_cases(test_cases), + ) + + # upload_to_rockset("test_run_summary", summarize_test_cases(test_cases)) if args.head_branch == "master": # For master jobs, upload everytihng. - upload_to_rockset("test_run", test_cases) + upload_to_s3( + args.workflow_run_id, args.workflow_run_attempt, "test_run", test_cases + )