From 21dccbca62c88b796ba5730115141e69b8b2e6ab Mon Sep 17 00:00:00 2001
From: Sam Estep <sestep@fb.com>
Date: Mon, 8 Feb 2021 10:12:37 -0800
Subject: [PATCH] Revert D26232345: [pytorch][PR] Report test time regressions

Test Plan: revert-hammer

Differential Revision:
D26232345 (https://github.com/pytorch/pytorch/commit/7467f90b131ab28340d1b6c1f565fc4291424af1)

Original commit changeset: b687b1737519

fbshipit-source-id: 10a031c5500b083f7c82f2ae2743b671c5a07bff
---
 .circleci/config.yml                          |   4 +-
 .../job-specs/pytorch-job-specs.yml           |   4 +-
 mypy.ini                                      |   6 -
 test/print_test_stats.py                      | 254 +++++
 test/test_testing.py                          | 452 ---------
 tools/code_coverage/package/oss/init.py       |   1 +
 torch/testing/_internal/print_test_stats.py   | 864 ------------------
 7 files changed, 259 insertions(+), 1326 deletions(-)
 create mode 100755 test/print_test_stats.py
 delete mode 100755 torch/testing/_internal/print_test_stats.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 7420b41939d..ca26c432e08 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -642,9 +642,9 @@ jobs:
           export CIRCLE_JOB="$CIRCLE_JOB"
           export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
           cd workspace
-          python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+          python test/print_test_stats.py --upload-to-s3 test
           EOL
-          echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
+          echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
           unbuffer bash command.sh | ts
 
           echo "Retrieving test reports"
diff --git a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
index bf70df71e98..99b327c275a 100644
--- a/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
+++ b/.circleci/verbatim-sources/job-specs/pytorch-job-specs.yml
@@ -201,9 +201,9 @@ jobs:
           export CIRCLE_JOB="$CIRCLE_JOB"
           export CIRCLE_WORKFLOW_ID="$CIRCLE_WORKFLOW_ID"
           cd workspace
-          python torch/testing/_internal/print_test_stats.py --upload-to-s3 --compare-with-s3 test
+          python test/print_test_stats.py --upload-to-s3 test
           EOL
-          echo "(cat docker_commands.sh | docker exec -u jenkins -e LANG=C.UTF-8 -i "$id" bash) 2>&1" > command.sh
+          echo "(cat docker_commands.sh | docker exec -u jenkins -i "$id" bash) 2>&1" > command.sh
           unbuffer bash command.sh | ts
 
           echo "Retrieving test reports"
diff --git a/mypy.ini b/mypy.ini
index ae1547b6c6e..65187724bfd 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -242,9 +242,3 @@ ignore_missing_imports = True
 
 [mypy-mypy.*]
 ignore_missing_imports = True
-
-[mypy-xml.*]
-ignore_missing_imports = True
-
-[mypy-boto3.*]
-ignore_missing_imports = True
diff --git a/test/print_test_stats.py b/test/print_test_stats.py
new file mode 100755
index 00000000000..95b267028fa
--- /dev/null
+++ b/test/print_test_stats.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Read and print test results statistics
+from xml.dom import minidom
+from glob import glob
+import bz2
+import json
+import os
+import statistics
+import time
+
+import boto3
+import datetime
+import requests
+
+class TestCase:
+    def __init__(self, dom):
+        self.class_name = str(dom.attributes['classname'].value)
+        self.name = str(dom.attributes['name'].value)
+        self.time = float(dom.attributes['time'].value)
+        self.errored = len(dom.getElementsByTagName('error')) > 0
+        self.failed = len(dom.getElementsByTagName('failure')) > 0
+        self.skipped = len(dom.getElementsByTagName('skipped')) > 0
+
+
+class TestSuite:
+    def __init__(self, name):
+        self.name = name
+        self.test_cases = []
+        self.failed_count = 0
+        self.skipped_count = 0
+        self.errored_count = 0
+        self.total_time = 0.0
+
+    def __repr__(self):
+        rc = f'{self.name} run_time: {self.total_time:.2f} tests: {len(self.test_cases)}'
+        if self.skipped_count > 0:
+            rc += f' skipped: {self.skipped_count}'
+        return f'TestSuite({rc})'
+
+    def append(self, test_case):
+        self.test_cases.append(test_case)
+        self.total_time += test_case.time
+        self.failed_count += 1 if test_case.failed else 0
+        self.skipped_count += 1 if test_case.skipped else 0
+        self.errored_count += 1 if test_case.errored else 0
+
+    def print_report(self, num_longest=3):
+        sorted_tests = sorted(self.test_cases, key=lambda x: x.time)
+        test_count = len(sorted_tests)
+        print(f"class {self.name}:")
+        print(f"    tests: {test_count} failed: {self.failed_count} skipped: {self.skipped_count} errored: {self.errored_count}")
+        print(f"    run_time: {self.total_time:.2f} seconds")
+        print(f"    avg_time: {self.total_time/test_count:.2f} seconds")
+        if test_count >= 2:
+            print(f"    median_time: {statistics.median(x.time for x in sorted_tests):.2f} seconds")
+        sorted_tests = sorted_tests[-num_longest:]
+        print(f"    {len(sorted_tests)} longest tests:")
+        for test in reversed(sorted_tests):
+            print(f"        {test.name} time: {test.time:.2f} seconds")
+        print("")
+
+
+
+def parse_report(path):
+    dom = minidom.parse(path)
+    for test_case in dom.getElementsByTagName('testcase'):
+        yield TestCase(test_case)
+
+def parse_reports(folder):
+    reports = glob(os.path.join(folder, '**', '*.xml'), recursive=True)
+    tests_by_class = dict()
+    for report in reports:
+        for test_case in parse_report(report):
+            class_name = test_case.class_name
+            if class_name not in tests_by_class:
+                tests_by_class[class_name] = TestSuite(class_name)
+            tests_by_class[class_name].append(test_case)
+    return tests_by_class
+
+def build_info():
+    return {
+        "build_pr": os.environ.get("CIRCLE_PR_NUMBER"),
+        "build_tag": os.environ.get("CIRCLE_TAG"),
+        "build_sha1": os.environ.get("CIRCLE_SHA1"),
+        "build_branch": os.environ.get("CIRCLE_BRANCH"),
+        "build_job": os.environ.get("CIRCLE_JOB"),
+        "build_workflow_id": os.environ.get("CIRCLE_WORKFLOW_ID"),
+    }
+
+def build_message(test_case):
+    return {
+        "normal": {
+            **build_info(),
+            "test_suite_name": test_case.class_name,
+            "test_case_name": test_case.name,
+        },
+        "int": {
+            "time": int(time.time()),
+            "test_total_count": 1,
+            "test_total_time": int(test_case.time * 1000),
+            "test_failed_count": 1 if test_case.failed > 0 else 0,
+            "test_skipped_count": 1 if test_case.skipped > 0 else 0,
+            "test_errored_count": 1 if test_case.errored > 0 else 0,
+        },
+    }
+
+def send_report_to_scribe(reports):
+    access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
+
+    if not access_token:
+        print("No scribe access token provided, skip sending report!")
+        return
+    print("Scribe access token provided, sending report...")
+    url = "https://graph.facebook.com/scribe_logs"
+    r = requests.post(
+        url,
+        data={
+            "access_token": access_token,
+            "logs": json.dumps(
+                [
+                    {
+                        "category": "perfpipe_pytorch_test_times",
+                        "message": json.dumps(build_message(test_case)),
+                        "line_escape": False,
+                    }
+                    for name in sorted(reports.keys())
+                    for test_case in reports[name].test_cases
+                ]
+            ),
+        },
+    )
+    r.raise_for_status()
+
+def send_report_to_s3(reports, *, total_seconds):
+    job = os.environ.get('CIRCLE_JOB')
+    sha1 = os.environ.get('CIRCLE_SHA1')
+    branch = os.environ.get('CIRCLE_BRANCH', '')
+    if branch not in ['master', 'nightly'] and not branch.startswith("release/"):
+        print("S3 upload only enabled on master, nightly and release branches.")
+        print(f"skipping test report on branch: {branch}")
+        return
+    now = datetime.datetime.utcnow().isoformat()
+    key = f'test_time/{sha1}/{job}/{now}Z.json.bz2'  # Z meaning UTC
+    s3 = boto3.resource('s3')
+    try:
+        s3.get_bucket_acl(Bucket='ossci-metrics')
+    except Exception as e:
+        print(f"AWS ACL failed: {e}")
+    print("AWS credential found, uploading to S3...")
+
+    obj = s3.Object('ossci-metrics', key)
+    print("")
+    # use bz2 because the results are smaller than gzip, and the
+    # compression time penalty we pay is only about half a second for
+    # input files of a few megabytes in size like these JSON files, and
+    # because for some reason zlib doesn't seem to play nice with the
+    # gunzip command whereas Python's bz2 does work with bzip2
+    obj.put(Body=bz2.compress(json.dumps({
+        **build_info(),
+        'total_seconds': total_seconds,
+        'suites': {
+            name: {
+                'total_seconds': suite.total_time,
+                'cases': [
+                    {
+                        'name': case.name,
+                        'seconds': case.time,
+                        'errored': case.errored,
+                        'failed': case.failed,
+                        'skipped': case.skipped,
+                    }
+                    for case in suite.test_cases
+                ],
+            }
+            for name, suite in reports.items()
+        }
+    }).encode()))
+
+def positive_integer(value):
+    parsed = int(value)
+    if parsed < 1:
+        raise argparse.ArgumentTypeError(f"{value} is not a natural number")
+    return parsed
+
+def positive_float(value):
+    parsed = float(value)
+    if parsed <= 0.0:
+        raise argparse.ArgumentTypeError(f"{value} is not a positive rational number")
+    return parsed
+
+if __name__ == '__main__':
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        "Print statistics from test XML output.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--longest-of-class",
+        type=positive_integer,
+        default=3,
+        metavar="N",
+        help="how many longest tests to show for each class",
+    )
+    parser.add_argument(
+        "--class-print-threshold",
+        type=positive_float,
+        default=1.0,
+        metavar="N",
+        help="Minimal total time to warrant class report",
+    )
+    parser.add_argument(
+        "--longest-of-run",
+        type=positive_integer,
+        default=10,
+        metavar="N",
+        help="how many longest tests to show from the entire run",
+    )
+    parser.add_argument(
+        "--upload-to-s3",
+        action="store_true",
+        help="upload test time to S3 bucket",
+    )
+    parser.add_argument(
+        "folder",
+        help="test report folder",
+    )
+    args = parser.parse_args()
+
+    reports = parse_reports(args.folder)
+    if len(reports) == 0:
+        print(f"No test reports found in {args.folder}")
+        sys.exit(0)
+
+    send_report_to_scribe(reports)
+
+    longest_tests = []
+    total_time = 0
+    for name in sorted(reports.keys()):
+        test_suite = reports[name]
+        if test_suite.total_time >= args.class_print_threshold:
+            test_suite.print_report(args.longest_of_class)
+        total_time += test_suite.total_time
+        longest_tests.extend(test_suite.test_cases)
+    longest_tests = sorted(longest_tests, key=lambda x: x.time)[-args.longest_of_run:]
+
+    if args.upload_to_s3:
+        send_report_to_s3(reports, total_seconds=total_time)
+
+    print(f"Total runtime is {datetime.timedelta(seconds=int(total_time))}")
+    print(f"{len(longest_tests)} longest tests of entire run:")
+    for test_case in reversed(longest_tests):
+        print(f"    {test_case.class_name}.{test_case.name}  time: {test_case.time:.2f} seconds")
diff --git a/test/test_testing.py b/test/test_testing.py
index 05d1e135c53..280a312914e 100644
--- a/test/test_testing.py
+++ b/test/test_testing.py
@@ -8,7 +8,6 @@ from torch.testing._internal.common_utils import \
 from torch.testing._internal.common_device_type import \
     (instantiate_device_type_tests, onlyCUDA, onlyOnCPUAndCUDA, dtypes)
 from torch.testing._internal import mypy_wrapper
-from torch.testing._internal import print_test_stats
 
 # For testing TestCase methods and torch.testing functions
 class TestTesting(TestCase):
@@ -546,456 +545,5 @@ class TestMypyWrapper(TestCase):
         ))
 
 
-def fakehash(char):
-    return char * 40
-
-
-def makecase(name, seconds, *, errored=False, failed=False, skipped=False):
-    return {
-        'name': name,
-        'seconds': seconds,
-        'errored': errored,
-        'failed': failed,
-        'skipped': skipped,
-    }
-
-
-def makereport(tests):
-    suites = {
-        suite_name: {
-            'total_seconds': sum(case['seconds'] for case in cases),
-            'cases': cases,
-        }
-        for suite_name, cases in tests.items()
-    }
-    return {
-        'total_seconds': sum(s['total_seconds'] for s in suites.values()),
-        'suites': suites,
-    }
-
-
-class TestPrintTestStats(TestCase):
-    maxDiff = None
-
-    def test_analysis(self):
-        head_report = makereport({
-            # input ordering of the suites is ignored
-            'Grault': [
-                # not printed: status same and time similar
-                makecase('test_grault0', 4.78, failed=True),
-                # status same, but time increased a lot
-                makecase('test_grault2', 1.473, errored=True),
-            ],
-            # individual tests times changed, not overall suite
-            'Qux': [
-                # input ordering of the test cases is ignored
-                makecase('test_qux1', 0.001, skipped=True),
-                makecase('test_qux6', 0.002, skipped=True),
-                # time in bounds, but status changed
-                makecase('test_qux4', 7.158, failed=True),
-                # not printed because it's the same as before
-                makecase('test_qux7', 0.003, skipped=True),
-                makecase('test_qux5', 11.968),
-                makecase('test_qux3', 23.496),
-            ],
-            # new test suite
-            'Bar': [
-                makecase('test_bar2', 3.742, failed=True),
-                makecase('test_bar1', 50.447),
-            ],
-            # overall suite time changed but no individual tests
-            'Norf': [
-                makecase('test_norf1', 3),
-                makecase('test_norf2', 3),
-                makecase('test_norf3', 3),
-                makecase('test_norf4', 3),
-            ],
-            # suite doesn't show up if it doesn't change enough
-            'Foo': [
-                makecase('test_foo1', 42),
-                makecase('test_foo2', 56),
-            ],
-        })
-
-        base_reports = {
-            # bbbb has no reports, so base is cccc instead
-            fakehash('b'): [],
-            fakehash('c'): [
-                makereport({
-                    'Baz': [
-                        makecase('test_baz2', 13.605),
-                        # no recent suites have & skip this test
-                        makecase('test_baz1', 0.004, skipped=True),
-                    ],
-                    'Foo': [
-                        makecase('test_foo1', 43),
-                        # test added since dddd
-                        makecase('test_foo2', 57),
-                    ],
-                    'Grault': [
-                        makecase('test_grault0', 4.88, failed=True),
-                        makecase('test_grault1', 11.967, failed=True),
-                        makecase('test_grault2', 0.395, errored=True),
-                        makecase('test_grault3', 30.460),
-                    ],
-                    'Norf': [
-                        makecase('test_norf1', 2),
-                        makecase('test_norf2', 2),
-                        makecase('test_norf3', 2),
-                        makecase('test_norf4', 2),
-                    ],
-                    'Qux': [
-                        makecase('test_qux3', 4.978, errored=True),
-                        makecase('test_qux7', 0.002, skipped=True),
-                        makecase('test_qux2', 5.618),
-                        makecase('test_qux4', 7.766, errored=True),
-                        makecase('test_qux6', 23.589, failed=True),
-                    ],
-                }),
-            ],
-            fakehash('d'): [
-                makereport({
-                    'Foo': [
-                        makecase('test_foo1', 40),
-                        # removed in cccc
-                        makecase('test_foo3', 17),
-                    ],
-                    'Baz': [
-                        # not skipped, so not included in stdev
-                        makecase('test_baz1', 3.14),
-                    ],
-                    'Qux': [
-                        makecase('test_qux7', 0.004, skipped=True),
-                        makecase('test_qux2', 6.02),
-                        makecase('test_qux4', 20.932),
-                    ],
-                    'Norf': [
-                        makecase('test_norf1', 3),
-                        makecase('test_norf2', 3),
-                        makecase('test_norf3', 3),
-                        makecase('test_norf4', 3),
-                    ],
-                    'Grault': [
-                        makecase('test_grault0', 5, failed=True),
-                        makecase('test_grault1', 14.325, failed=True),
-                        makecase('test_grault2', 0.31, errored=True),
-                    ],
-                }),
-            ],
-            fakehash('e'): [],
-            fakehash('f'): [
-                makereport({
-                    'Foo': [
-                        makecase('test_foo3', 24),
-                        makecase('test_foo1', 43),
-                    ],
-                    'Baz': [
-                        makecase('test_baz2', 16.857),
-                    ],
-                    'Qux': [
-                        makecase('test_qux2', 6.422),
-                        makecase('test_qux4', 6.382, errored=True),
-                    ],
-                    'Norf': [
-                        makecase('test_norf1', 0.9),
-                        makecase('test_norf3', 0.9),
-                        makecase('test_norf2', 0.9),
-                        makecase('test_norf4', 0.9),
-                    ],
-                    'Grault': [
-                        makecase('test_grault0', 4.7, failed=True),
-                        makecase('test_grault1', 13.146, failed=True),
-                        makecase('test_grault2', 0.48, errored=True),
-                    ],
-                }),
-            ],
-        }
-
-        simpler_head = print_test_stats.simplify(head_report)
-        simpler_base = {}
-        for commit, reports in base_reports.items():
-            simpler_base[commit] = [print_test_stats.simplify(r) for r in reports]
-        analysis = print_test_stats.analyze(
-            head_report=simpler_head,
-            base_reports=simpler_base,
-        )
-
-        self.assertEqual(
-            '''\
-
-- class Baz:
--     # was   15.23s ±   2.30s
--
--     def test_baz1: ...
--         # was   0.004s           (skipped)
--
--     def test_baz2: ...
--         # was  15.231s ±  2.300s
-
-
-  class Grault:
-      # was   48.86s ±   1.19s
-      # now    6.25s
-
-    - def test_grault1: ...
-    -     # was  13.146s ±  1.179s (failed)
-
-    - def test_grault3: ...
-    -     # was  30.460s
-
-
-  class Qux:
-      # was   41.66s ±   1.06s
-      # now   42.63s
-
-    - def test_qux2: ...
-    -     # was   6.020s ±  0.402s
-
-    ! def test_qux3: ...
-    !     # was   4.978s           (errored)
-    !     # now  23.496s
-
-    ! def test_qux4: ...
-    !     # was   7.074s ±  0.979s (errored)
-    !     # now   7.158s           (failed)
-
-    ! def test_qux6: ...
-    !     # was  23.589s           (failed)
-    !     # now   0.002s           (skipped)
-
-    + def test_qux1: ...
-    +     # now   0.001s           (skipped)
-
-    + def test_qux5: ...
-    +     # now  11.968s
-
-
-+ class Bar:
-+     # now   54.19s
-+
-+     def test_bar1: ...
-+         # now  50.447s
-+
-+     def test_bar2: ...
-+         # now   3.742s           (failed)
-
-''',
-            print_test_stats.anomalies(analysis),
-        )
-
-    def test_graph(self):
-        # HEAD is on master
-        self.assertEqual(
-            '''\
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    * aaaaaaaaaa (HEAD)              total time   502.99s
-    * bbbbbbbbbb (base)   1 report,  total time    47.84s
-    * cccccccccc          1 report,  total time   332.50s
-    * dddddddddd          0 reports
-    |
-    :
-''',
-            print_test_stats.graph(
-                head_sha=fakehash('a'),
-                head_seconds=502.99,
-                base_seconds={
-                    fakehash('b'): [47.84],
-                    fakehash('c'): [332.50],
-                    fakehash('d'): [],
-                },
-                on_master=True,
-            )
-        )
-
-        self.assertEqual(
-            '''\
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time  9988.77s
-    |/
-    * bbbbbbbbbb (base) 121 reports, total time  7654.32s ±   55.55s
-    * cccccccccc         20 reports, total time  5555.55s ±  253.19s
-    * dddddddddd          1 report,  total time  1234.56s
-    |
-    :
-''',
-            print_test_stats.graph(
-                head_sha=fakehash('a'),
-                head_seconds=9988.77,
-                base_seconds={
-                    fakehash('b'): [7598.77] * 60 + [7654.32] + [7709.87] * 60,
-                    fakehash('c'): [5308.77] * 10 + [5802.33] * 10,
-                    fakehash('d'): [1234.56],
-                },
-                on_master=False,
-            )
-        )
-
-        self.assertEqual(
-            '''\
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time    25.52s
-    | |
-    | : (5 commits)
-    |/
-    * bbbbbbbbbb          0 reports
-    * cccccccccc          0 reports
-    * dddddddddd (base)  15 reports, total time    58.92s ±   25.82s
-    |
-    :
-''',
-            print_test_stats.graph(
-                head_sha=fakehash('a'),
-                head_seconds=25.52,
-                base_seconds={
-                    fakehash('b'): [],
-                    fakehash('c'): [],
-                    fakehash('d'): [52.25] * 14 + [152.26],
-                },
-                on_master=False,
-                ancestry_path=5,
-            )
-        )
-
-        self.assertEqual(
-            '''\
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time     0.08s
-    |/|
-    | : (1 commit)
-    |
-    * bbbbbbbbbb          0 reports
-    * cccccccccc (base)   1 report,  total time     0.09s
-    * dddddddddd          3 reports, total time     0.10s ±    0.05s
-    |
-    :
-''',
-            print_test_stats.graph(
-                head_sha=fakehash('a'),
-                head_seconds=0.08,
-                base_seconds={
-                    fakehash('b'): [],
-                    fakehash('c'): [0.09],
-                    fakehash('d'): [0.05, 0.10, 0.15],
-                },
-                on_master=False,
-                other_ancestors=1,
-            )
-        )
-
-        self.assertEqual(
-            '''\
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time     5.98s
-    | |
-    | : (1 commit)
-    |/|
-    | : (7 commits)
-    |
-    * bbbbbbbbbb (base)   2 reports, total time     6.02s ±    1.71s
-    * cccccccccc          0 reports
-    * dddddddddd         10 reports, total time     5.84s ±    0.92s
-    |
-    :
-''',
-            print_test_stats.graph(
-                head_sha=fakehash('a'),
-                head_seconds=5.98,
-                base_seconds={
-                    fakehash('b'): [4.81, 7.23],
-                    fakehash('c'): [],
-                    fakehash('d'): [4.97] * 5 + [6.71] * 5,
-                },
-                on_master=False,
-                ancestry_path=1,
-                other_ancestors=7,
-            )
-        )
-
-    def test_regression_info(self):
-        self.assertEqual(
-            '''\
------ Historic stats comparison result ------
-
-    job: foo_job
-    commit: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-
-
-  class Foo:
-      # was   42.50s ±   2.12s
-      # now    3.02s
-
-    - def test_bar: ...
-    -     # was   1.000s
-
-    ! def test_foo: ...
-    !     # was  41.500s ±  2.121s
-    !     # now   0.020s           (skipped)
-
-    + def test_baz: ...
-    +     # now   3.000s
-
-
-Commit graph (base is most recent master ancestor with at least one S3 report):
-
-    : (master)
-    |
-    | * aaaaaaaaaa (HEAD)            total time     3.02s
-    |/
-    * bbbbbbbbbb (base)   1 report,  total time    41.00s
-    * cccccccccc          1 report,  total time    43.00s
-    |
-    :
-
-Removed  (across    1 suite)      1 test,  totaling -   1.00s
-Modified (across    1 suite)      1 test,  totaling -  41.48s ±   2.12s
-Added    (across    1 suite)      1 test,  totaling +   3.00s
-''',
-            print_test_stats.regression_info(
-                head_sha=fakehash('a'),
-                head_report=makereport({
-                    'Foo': [
-                        makecase('test_foo', 0.02, skipped=True),
-                        makecase('test_baz', 3),
-                    ]}),
-                base_reports={
-                    fakehash('b'): [
-                        makereport({
-                            'Foo': [
-                                makecase('test_foo', 40),
-                                makecase('test_bar', 1),
-                            ],
-                        }),
-                    ],
-                    fakehash('c'): [
-                        makereport({
-                            'Foo': [
-                                makecase('test_foo', 43),
-                            ],
-                        }),
-                    ],
-                },
-                job_name='foo_job',
-                on_master=False,
-                ancestry_path=0,
-                other_ancestors=0,
-            )
-        )
-
-
 if __name__ == '__main__':
     run_tests()
diff --git a/tools/code_coverage/package/oss/init.py b/tools/code_coverage/package/oss/init.py
index 33eceed125b..53169b1c332 100644
--- a/tools/code_coverage/package/oss/init.py
+++ b/tools/code_coverage/package/oss/init.py
@@ -31,6 +31,7 @@ from .utils import (
 
 BLOCKED_PYTHON_TESTS = {
     "run_test.py",
+    "print_test_stats.py",
     "test_dataloader.py",
     "test_multiprocessing.py",
     "test_multiprocessing_spawn.py",
diff --git a/torch/testing/_internal/print_test_stats.py b/torch/testing/_internal/print_test_stats.py
deleted file mode 100755
index b33ca5623d5..00000000000
--- a/torch/testing/_internal/print_test_stats.py
+++ /dev/null
@@ -1,864 +0,0 @@
-#!/usr/bin/env python
-
-import bz2
-import datetime
-import json
-import math
-import os
-import statistics
-import subprocess
-import time
-from collections import defaultdict
-from glob import glob
-from pathlib import Path
-from typing import DefaultDict, Dict, Iterable, List, Optional, Tuple
-from xml.dom import minidom
-
-import requests
-from typing_extensions import TypedDict
-
-try:
-    import boto3
-    HAVE_BOTO3 = True
-except ImportError:
-    HAVE_BOTO3 = False
-
-Commit = str  # 40-digit SHA-1 hex string
-Status = Optional[str]  # errored, failed, skipped, or None
-
-# represent suite as dict because indexing is useful
-SimplerCase = Tuple[float, Status]
-SimplerSuite = Dict[str, SimplerCase]
-SimplerReport = Dict[str, SimplerSuite]
-
-
-class Case(TypedDict):
-    name: str
-    seconds: float
-    errored: bool
-    failed: bool
-    skipped: bool
-
-
-class Suite(TypedDict):
-    total_seconds: float
-    cases: List[Case]
-
-
-class ReportMeta(TypedDict, total=False):
-    build_pr: str
-    build_tag: str
-    build_sha1: Commit
-    build_branch: str
-    build_job: str
-    build_workflow_id: str
-
-
-class Report(ReportMeta):
-    total_seconds: float
-    suites: Dict[str, Suite]
-
-
-class Stat(TypedDict):
-    center: float
-    spread: Optional[float]
-
-
-class CaseDiff(TypedDict):
-    margin: str
-    name: str
-    was: Optional[Tuple[Stat, Status]]
-    now: Optional[SimplerCase]
-
-
-class SuiteDiff(TypedDict):
-    margin: str
-    name: str
-    was: Optional[Stat]
-    now: Optional[float]
-    cases: List[CaseDiff]
-
-
-def case_status(case: Case) -> Status:
-    for k in {'errored', 'failed', 'skipped'}:
-        if case[k]:  # type: ignore
-            return k
-    return None
-
-
-def simplify(report: Report) -> SimplerReport:
-    return {
-        suite_name: {
-            case['name']: (case['seconds'], case_status(case))
-            for case in suite['cases']
-        }
-        for suite_name, suite in report['suites'].items()
-    }
-
-
-def plural(n: int) -> str:
-    return '' if n == 1 else 's'
-
-
-def display_stat(
-    x: Stat,
-    format: Tuple[Tuple[int, int], Tuple[int, int]],
-) -> str:
-    spread_len = format[1][0] + 1 + format[1][1]
-    spread = x['spread']
-    if spread is not None:
-        spread_str = f' ± {spread:{spread_len}.{format[1][1]}f}s'
-    else:
-        spread_str = ' ' * (3 + spread_len + 1)
-    mean_len = format[0][0] + 1 + format[0][1]
-    return f'{x["center"]:{mean_len}.{format[0][1]}f}s{spread_str}'
-
-
-def list_stat(l: List[float]) -> Stat:
-    return {
-        'center': statistics.mean(l),
-        'spread': statistics.stdev(l) if len(l) > 1 else None
-    }
-
-
-def zero_stat() -> Stat:
-    return {'center': 0, 'spread': None}
-
-
-def recenter(was: Stat, now: float) -> Stat:
-    return {'center': now - was['center'], 'spread': was['spread']}
-
-
-def sum_normals(stats: Iterable[Stat]) -> Stat:
-    """
-    Returns a stat corresponding to the sum of the given stats.
-
-    Assumes that the center and spread for each of the given stats are
-    mean and stdev, respectively.
-    """
-    l = list(stats)
-    spread: Optional[float]
-    if any(stat['spread'] is not None for stat in l):
-        spread = math.sqrt(sum((stat['spread'] or 0)**2 for stat in l))
-    else:
-        spread = None
-    return {
-        'center': sum(stat['center'] for stat in l),
-        'spread': spread,
-    }
-
-
-def format_seconds(seconds: List[float]) -> str:
-    if len(seconds) > 0:
-        x = list_stat(seconds)
-        return f'total time {display_stat(x, ((5, 2), (4, 2)))}'.strip()
-    return ''
-
-
-def show_ancestors(num_commits: int) -> str:
-    return f'    | : ({num_commits} commit{plural(num_commits)})'
-
-
-def unlines(lines: List[str]) -> str:
-    return ''.join(f'{line}\n' for line in lines)
-
-
-def matching_test_times(
-    base_reports: Dict[Commit, List[SimplerReport]],
-    suite_name: str,
-    case_name: str,
-    status: Status,
-) -> List[float]:
-    times: List[float] = []
-    for reports in base_reports.values():
-        for report in reports:
-            suite = report.get(suite_name)
-            if suite:
-                case = suite.get(case_name)
-                if case:
-                    t, s = case
-                    if s == status:
-                        times.append(t)
-    return times
-
-
-def analyze(
-    *,
-    head_report: SimplerReport,
-    base_reports: Dict[Commit, List[SimplerReport]],
-) -> List[SuiteDiff]:
-    # most recent master ancestor with at least one S3 report
-    base_sha = next(sha for sha, reports in base_reports.items() if reports)
-
-    # find all relevant suites (those in either base or head or both)
-    all_reports = [head_report] + base_reports[base_sha]
-    all_suites = {k for r in all_reports for k in r.keys()}
-
-    removed_suites: List[SuiteDiff] = []
-    modified_suites: List[SuiteDiff] = []
-    added_suites: List[SuiteDiff] = []
-
-    for suite_name in sorted(all_suites):
-        case_diffs: List[CaseDiff] = []
-        head_suite = head_report.get(suite_name)
-        base_cases: Dict[str, Status] = dict(sorted(set.intersection(*[
-            {(n, s) for n, (_, s) in report.get(suite_name, {}).items()}
-            for report in base_reports[base_sha]
-        ] or [set()])))
-        case_stats: Dict[str, Stat] = {}
-        if head_suite:
-            now = sum(case[0] for case in head_suite.values())
-            if any(suite_name in report for report in base_reports[base_sha]):
-                removed_cases: List[CaseDiff] = []
-                for case_name, case_status in base_cases.items():
-                    case_stats[case_name] = list_stat(matching_test_times(
-                        base_reports,
-                        suite_name,
-                        case_name,
-                        case_status,
-                    ))
-                    if case_name not in head_suite:
-                        removed_cases.append({
-                            'margin': '-',
-                            'name': case_name,
-                            'was': (case_stats[case_name], case_status),
-                            'now': None,
-                        })
-                modified_cases: List[CaseDiff] = []
-                added_cases: List[CaseDiff] = []
-                for head_case_name in sorted(head_suite):
-                    head_case = head_suite[head_case_name]
-                    if head_case_name in base_cases:
-                        stat = case_stats[head_case_name]
-                        base_status = base_cases[head_case_name]
-                        if head_case[1] != base_status:
-                            modified_cases.append({
-                                'margin': '!',
-                                'name': head_case_name,
-                                'was': (stat, base_status),
-                                'now': head_case,
-                            })
-                    else:
-                        added_cases.append({
-                            'margin': '+',
-                            'name': head_case_name,
-                            'was': None,
-                            'now': head_case,
-                        })
-                # there might be a bug calculating this stdev, not sure
-                was = sum_normals(case_stats.values())
-                case_diffs = removed_cases + modified_cases + added_cases
-                if case_diffs:
-                    modified_suites.append({
-                        'margin': ' ',
-                        'name': suite_name,
-                        'was': was,
-                        'now': now,
-                        'cases': case_diffs,
-                    })
-            else:
-                for head_case_name in sorted(head_suite):
-                    head_case = head_suite[head_case_name]
-                    case_diffs.append({
-                        'margin': ' ',
-                        'name': head_case_name,
-                        'was': None,
-                        'now': head_case,
-                    })
-                added_suites.append({
-                    'margin': '+',
-                    'name': suite_name,
-                    'was': None,
-                    'now': now,
-                    'cases': case_diffs,
-                })
-        else:
-            for case_name, case_status in base_cases.items():
-                case_stats[case_name] = list_stat(matching_test_times(
-                    base_reports,
-                    suite_name,
-                    case_name,
-                    case_status,
-                ))
-                case_diffs.append({
-                    'margin': ' ',
-                    'name': case_name,
-                    'was': (case_stats[case_name], case_status),
-                    'now': None,
-                })
-            removed_suites.append({
-                'margin': '-',
-                'name': suite_name,
-                # there might be a bug calculating this stdev, not sure
-                'was': sum_normals(case_stats.values()),
-                'now': None,
-                'cases': case_diffs,
-            })
-
-    return removed_suites + modified_suites + added_suites
-
-
-def case_diff_lines(diff: CaseDiff) -> List[str]:
-    lines = [f'def {diff["name"]}: ...']
-
-    case_fmt = ((3, 3), (2, 3))
-
-    was = diff['was']
-    if was:
-        was_line = f'    # was {display_stat(was[0], case_fmt)}'
-        was_status = was[1]
-        if was_status:
-            was_line += f' ({was_status})'
-        lines.append(was_line)
-
-    now = diff['now']
-    if now:
-        now_stat: Stat = {'center': now[0], 'spread': None}
-        now_line = f'    # now {display_stat(now_stat, case_fmt)}'
-        now_status = now[1]
-        if now_status:
-            now_line += f' ({now_status})'
-        lines.append(now_line)
-
-    return [''] + [f'{diff["margin"]} {l}' for l in lines]
-
-
-def display_suite_diff(diff: SuiteDiff) -> str:
-    lines = [f'class {diff["name"]}:']
-
-    suite_fmt = ((4, 2), (3, 2))
-
-    was = diff['was']
-    if was:
-        lines.append(f'    # was {display_stat(was, suite_fmt)}')
-
-    now = diff['now']
-    if now is not None:
-        now_stat: Stat = {'center': now, 'spread': None}
-        lines.append(f'    # now {display_stat(now_stat, suite_fmt)}')
-
-    for case_diff in diff['cases']:
-        lines.extend([f'  {l}' for l in case_diff_lines(case_diff)])
-
-    return unlines([''] + [f'{diff["margin"]} {l}'.rstrip() for l in lines] + [''])
-
-
-def anomalies(diffs: List[SuiteDiff]) -> str:
-    return ''.join(map(display_suite_diff, diffs))
-
-
-def graph(
-    *,
-    head_sha: Commit,
-    head_seconds: float,
-    base_seconds: Dict[Commit, List[float]],
-    on_master: bool,
-    ancestry_path: int = 0,
-    other_ancestors: int = 0,
-) -> str:
-    lines = [
-        'Commit graph (base is most recent master ancestor with at least one S3 report):',
-        '',
-        '    : (master)',
-        '    |',
-    ]
-
-    head_time_str = f'           {format_seconds([head_seconds])}'
-    if on_master:
-        lines.append(f'    * {head_sha[:10]} (HEAD)   {head_time_str}')
-    else:
-        lines.append(f'    | * {head_sha[:10]} (HEAD) {head_time_str}')
-
-        if ancestry_path > 0:
-            lines += [
-                '    | |',
-                show_ancestors(ancestry_path),
-            ]
-
-        if other_ancestors > 0:
-            lines += [
-                '    |/|',
-                show_ancestors(other_ancestors),
-                '    |',
-            ]
-        else:
-            lines.append('    |/')
-
-    is_first = True
-    for sha, seconds in base_seconds.items():
-        num_runs = len(seconds)
-        prefix = str(num_runs).rjust(3)
-        base = '(base)' if is_first and num_runs > 0 else '      '
-        if num_runs > 0:
-            is_first = False
-        t = format_seconds(seconds)
-        p = plural(num_runs)
-        if t:
-            p = f'{p}, '.ljust(3)
-        lines.append(f'    * {sha[:10]} {base} {prefix} report{p}{t}')
-
-    lines.extend(['    |', '    :'])
-
-    return unlines(lines)
-
-
-def case_delta(case: CaseDiff) -> Stat:
-    was = case['was']
-    now = case['now']
-    return recenter(
-        was[0] if was else zero_stat(),
-        now[0] if now else 0,
-    )
-
-
-def display_final_stat(stat: Stat) -> str:
-    center = stat['center']
-    spread = stat['spread']
-    displayed = display_stat(
-        {'center': abs(center), 'spread': spread},
-        ((4, 2), (3, 2)),
-    )
-    if center < 0:
-        sign = '-'
-    elif center > 0:
-        sign = '+'
-    else:
-        sign = ' '
-    return f'{sign}{displayed}'.rstrip()
-
-
-def summary_line(message: str, d: DefaultDict[str, List[CaseDiff]]) -> str:
-    all_cases = [c for cs in d.values() for c in cs]
-    tests = len(all_cases)
-    suites = len(d)
-    sp = f'{plural(suites)})'.ljust(2)
-    tp = f'{plural(tests)},'.ljust(2)
-    # there might be a bug calculating this stdev, not sure
-    stat = sum_normals(case_delta(c) for c in all_cases)
-    return ''.join([
-        f'{message} (across {suites:>4} suite{sp}',
-        f'{tests:>6} test{tp}',
-        f' totaling {display_final_stat(stat)}',
-    ])
-
-
-def summary(analysis: List[SuiteDiff]) -> str:
-    removed_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
-    modified_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
-    added_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
-
-    for diff in analysis:
-        # the use of 'margin' here is not the most elegant
-        name = diff['name']
-        margin = diff['margin']
-        cases = diff['cases']
-        if margin == '-':
-            removed_tests[name] += cases
-        elif margin == '+':
-            added_tests[name] += cases
-        else:
-            removed = list(filter(lambda c: c['margin'] == '-', cases))
-            added = list(filter(lambda c: c['margin'] == '+', cases))
-            modified = list(filter(lambda c: c['margin'] == '!', cases))
-            if removed:
-                removed_tests[name] += removed
-            if added:
-                added_tests[name] += added
-            if modified:
-                modified_tests[name] += modified
-
-    return unlines([
-        summary_line('Removed ', removed_tests),
-        summary_line('Modified', modified_tests),
-        summary_line('Added   ', added_tests),
-    ])
-
-
-def regression_info(
-    *,
-    head_sha: Commit,
-    head_report: Report,
-    base_reports: Dict[Commit, List[Report]],
-    job_name: str,
-    on_master: bool,
-    ancestry_path: int,
-    other_ancestors: int,
-) -> str:
-    """
-    Return a human-readable report describing any test time regressions.
-
-    The head_sha and head_report args give info about the current commit
-    and its test times. Since Python dicts maintain insertion order
-    (guaranteed as part of the language spec since 3.7), the
-    base_reports argument must list the head's several most recent
-    master commits, from newest to oldest (so the merge-base is
-    list(base_reports)[0]).
-    """
-    simpler_head = simplify(head_report)
-    simpler_base: Dict[Commit, List[SimplerReport]] = {}
-    for commit, reports in base_reports.items():
-        simpler_base[commit] = [simplify(r) for r in reports]
-    analysis = analyze(
-        head_report=simpler_head,
-        base_reports=simpler_base,
-    )
-
-    return '\n'.join([
-        unlines([
-            '----- Historic stats comparison result ------',
-            '',
-            f'    job: {job_name}',
-            f'    commit: {head_sha}',
-        ]),
-        anomalies(analysis),
-        graph(
-            head_sha=head_sha,
-            head_seconds=head_report['total_seconds'],
-            base_seconds={
-                c: [r['total_seconds'] for r in rs]
-                for c, rs in base_reports.items()
-            },
-            on_master=on_master,
-            ancestry_path=ancestry_path,
-            other_ancestors=other_ancestors,
-        ),
-        summary(analysis),
-    ])
-
-
-class TestCase:
-    def __init__(self, dom) -> None:
-        self.class_name = str(dom.attributes['classname'].value)
-        self.name = str(dom.attributes['name'].value)
-        self.time = float(dom.attributes['time'].value)
-        self.errored = len(dom.getElementsByTagName('error')) > 0
-        self.failed = len(dom.getElementsByTagName('failure')) > 0
-        self.skipped = len(dom.getElementsByTagName('skipped')) > 0
-
-
-class TestSuite:
-    def __init__(self, name) -> None:
-        self.name = name
-        self.test_cases: List[TestCase] = []
-        self.failed_count = 0
-        self.skipped_count = 0
-        self.errored_count = 0
-        self.total_time = 0.0
-
-    def __repr__(self):
-        rc = f'{self.name} run_time: {self.total_time:.2f} tests: {len(self.test_cases)}'
-        if self.skipped_count > 0:
-            rc += f' skipped: {self.skipped_count}'
-        return f'TestSuite({rc})'
-
-    def append(self, test_case):
-        self.test_cases.append(test_case)
-        self.total_time += test_case.time
-        self.failed_count += 1 if test_case.failed else 0
-        self.skipped_count += 1 if test_case.skipped else 0
-        self.errored_count += 1 if test_case.errored else 0
-
-    def print_report(self, num_longest=3):
-        sorted_tests = sorted(self.test_cases, key=lambda x: x.time)
-        test_count = len(sorted_tests)
-        print(f"class {self.name}:")
-        print(f"    tests: {test_count} failed: {self.failed_count} skipped: {self.skipped_count} errored: {self.errored_count}")
-        print(f"    run_time: {self.total_time:.2f} seconds")
-        print(f"    avg_time: {self.total_time/test_count:.2f} seconds")
-        if test_count >= 2:
-            print(f"    median_time: {statistics.median(x.time for x in sorted_tests):.2f} seconds")
-        sorted_tests = sorted_tests[-num_longest:]
-        print(f"    {len(sorted_tests)} longest tests:")
-        for test in reversed(sorted_tests):
-            print(f"        {test.name} time: {test.time:.2f} seconds")
-        print("")
-
-
-def parse_report(path):
-    dom = minidom.parse(path)
-    for test_case in dom.getElementsByTagName('testcase'):
-        yield TestCase(test_case)
-
-
-def parse_reports(folder):
-    reports = glob(os.path.join(folder, '**', '*.xml'), recursive=True)
-    tests_by_class = dict()
-    for report in reports:
-        for test_case in parse_report(report):
-            class_name = test_case.class_name
-            if class_name not in tests_by_class:
-                tests_by_class[class_name] = TestSuite(class_name)
-            tests_by_class[class_name].append(test_case)
-    return tests_by_class
-
-
-def build_info():
-    return {
-        "build_pr": os.environ.get("CIRCLE_PR_NUMBER"),
-        "build_tag": os.environ.get("CIRCLE_TAG"),
-        "build_sha1": os.environ.get("CIRCLE_SHA1"),
-        "build_branch": os.environ.get("CIRCLE_BRANCH"),
-        "build_job": os.environ.get("CIRCLE_JOB"),
-        "build_workflow_id": os.environ.get("CIRCLE_WORKFLOW_ID"),
-    }
-
-
-def build_message(test_case):
-    return {
-        "normal": {
-            **build_info(),
-            "test_suite_name": test_case.class_name,
-            "test_case_name": test_case.name,
-        },
-        "int": {
-            "time": int(time.time()),
-            "test_total_count": 1,
-            "test_total_time": int(test_case.time * 1000),
-            "test_failed_count": 1 if test_case.failed > 0 else 0,
-            "test_skipped_count": 1 if test_case.skipped > 0 else 0,
-            "test_errored_count": 1 if test_case.errored > 0 else 0,
-        },
-    }
-
-
-def send_report_to_scribe(reports):
-    access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")
-
-    if not access_token:
-        print("No scribe access token provided, skip sending report!")
-        return
-    print("Scribe access token provided, sending report...")
-    url = "https://graph.facebook.com/scribe_logs"
-    r = requests.post(
-        url,
-        data={
-            "access_token": access_token,
-            "logs": json.dumps(
-                [
-                    {
-                        "category": "perfpipe_pytorch_test_times",
-                        "message": json.dumps(build_message(test_case)),
-                        "line_escape": False,
-                    }
-                    for name in sorted(reports.keys())
-                    for test_case in reports[name].test_cases
-                ]
-            ),
-        },
-    )
-    r.raise_for_status()
-
-
-def assemble_s3_object(reports, *, total_seconds):
-    return {
-        **build_info(),
-        'total_seconds': total_seconds,
-        'suites': {
-            name: {
-                'total_seconds': suite.total_time,
-                'cases': [
-                    {
-                        'name': case.name,
-                        'seconds': case.time,
-                        'errored': case.errored,
-                        'failed': case.failed,
-                        'skipped': case.skipped,
-                    }
-                    for case in suite.test_cases
-                ],
-            }
-            for name, suite in reports.items()
-        }
-    }
-
-
-def send_report_to_s3(obj):
-    job = os.environ.get('CIRCLE_JOB')
-    sha1 = os.environ.get('CIRCLE_SHA1')
-    branch = os.environ.get('CIRCLE_BRANCH', '')
-    if branch not in ['master', 'nightly'] and not branch.startswith("release/"):
-        print("S3 upload only enabled on master, nightly and release branches.")
-        print(f"skipping test report on branch: {branch}")
-        return
-    now = datetime.datetime.utcnow().isoformat()
-    key = f'test_time/{sha1}/{job}/{now}Z.json.bz2'  # Z meaning UTC
-    s3 = boto3.resource('s3')
-    try:
-        s3.get_bucket_acl(Bucket='ossci-metrics')
-    except Exception as e:
-        print(f"AWS ACL failed: {e}")
-    print("AWS credential found, uploading to S3...")
-
-    obj = s3.Object('ossci-metrics', key)
-    print("")
-    # use bz2 because the results are smaller than gzip, and the
-    # compression time penalty we pay is only about half a second for
-    # input files of a few megabytes in size like these JSON files, and
-    # because for some reason zlib doesn't seem to play nice with the
-    # gunzip command whereas Python's bz2 does work with bzip2
-    obj.put(Body=bz2.compress(json.dumps(obj).encode()))
-
-
-def print_regressions(obj, *, num_prev_commits):
-    sha1 = os.environ.get("CIRCLE_SHA1", "HEAD")
-
-    base = subprocess.check_output(
-        ["git", "merge-base", sha1, "origin/master"],
-        encoding="ascii",
-    ).strip()
-
-    count_spec = f"{base}..{sha1}"
-    intermediate_commits = int(subprocess.check_output(
-        ["git", "rev-list", "--count", count_spec],
-        encoding="ascii"
-    ))
-    ancestry_path = int(subprocess.check_output(
-        ["git", "rev-list", "--ancestry-path", "--count", count_spec],
-        encoding="ascii",
-    ))
-
-    # if current commit is already on master, we need to exclude it from
-    # this history; otherwise we include the merge-base
-    commits = subprocess.check_output(
-        ["git", "rev-list", f"--max-count={num_prev_commits+1}", base],
-        encoding="ascii",
-    ).splitlines()
-    on_master = False
-    if base == sha1:
-        on_master = True
-        commits = commits[1:]
-    else:
-        commits = commits[:-1]
-
-    job = os.environ.get("CIRCLE_JOB", "")
-    s3 = boto3.resource("s3")
-    bucket = s3.Bucket(name="ossci-metrics")
-    index = {}
-    for commit in commits:
-        summaries = bucket.objects.filter(Prefix=f"test_time/{commit}/{job}/")
-        index[commit] = list(summaries)
-
-    objects: Dict[Commit, List[Report]] = {}
-    # should we do these in parallel?
-    for commit, summaries in index.items():
-        objects[commit] = []
-        for summary in summaries:
-            binary = summary.get()["Body"].read()
-            string = bz2.decompress(binary).decode("utf-8")
-            objects[commit].append(json.loads(string))
-
-    print()
-    print(regression_info(
-        head_sha=sha1,
-        head_report=obj,
-        base_reports=objects,
-        job_name=job,
-        on_master=on_master,
-        ancestry_path=ancestry_path - 1,
-        other_ancestors=intermediate_commits - ancestry_path,
-    ), end="")
-
-
-def positive_integer(value):
-    parsed = int(value)
-    if parsed < 1:
-        raise argparse.ArgumentTypeError(f"{value} is not a natural number")
-    return parsed
-
-
-def positive_float(value):
-    parsed = float(value)
-    if parsed <= 0.0:
-        raise argparse.ArgumentTypeError(f"{value} is not a positive rational number")
-    return parsed
-
-
-if __name__ == '__main__':
-    import argparse
-    import sys
-    parser = argparse.ArgumentParser(
-        "Print statistics from test XML output.",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--longest-of-class",
-        type=positive_integer,
-        default=3,
-        metavar="N",
-        help="how many longest tests to show for each class",
-    )
-    parser.add_argument(
-        "--class-print-threshold",
-        type=positive_float,
-        default=1.0,
-        metavar="N",
-        help="Minimal total time to warrant class report",
-    )
-    parser.add_argument(
-        "--longest-of-run",
-        type=positive_integer,
-        default=10,
-        metavar="N",
-        help="how many longest tests to show from the entire run",
-    )
-    if HAVE_BOTO3:
-        parser.add_argument(
-            "--upload-to-s3",
-            action="store_true",
-            help="upload test time to S3 bucket",
-        )
-        parser.add_argument(
-            "--compare-with-s3",
-            action="store_true",
-            help="download test times for base commits and compare",
-        )
-    parser.add_argument(
-        "--num-prev-commits",
-        type=positive_integer,
-        default=10,
-        metavar="N",
-        help="how many previous commits to compare test times with",
-    )
-    parser.add_argument(
-        "--use-json",
-        metavar="FILE.json",
-        help="compare S3 with JSON file, instead of the test report folder",
-    )
-    parser.add_argument(
-        "folder",
-        help="test report folder",
-    )
-    args = parser.parse_args()
-
-    reports = parse_reports(args.folder)
-    if len(reports) == 0:
-        print(f"No test reports found in {args.folder}")
-        sys.exit(0)
-
-    send_report_to_scribe(reports)
-
-    longest_tests = []
-    total_time = 0
-    for name in sorted(reports.keys()):
-        test_suite = reports[name]
-        if test_suite.total_time >= args.class_print_threshold:
-            test_suite.print_report(args.longest_of_class)
-        total_time += test_suite.total_time
-        longest_tests.extend(test_suite.test_cases)
-    longest_tests = sorted(longest_tests, key=lambda x: x.time)[-args.longest_of_run:]
-
-    obj = assemble_s3_object(reports, total_seconds=total_time)
-
-    if args.upload_to_s3:
-        send_report_to_s3(obj)
-
-    print(f"Total runtime is {datetime.timedelta(seconds=int(total_time))}")
-    print(f"{len(longest_tests)} longest tests of entire run:")
-    for test_case in reversed(longest_tests):
-        print(f"    {test_case.class_name}.{test_case.name}  time: {test_case.time:.2f} seconds")
-
-    if args.compare_with_s3:
-        head_json = obj
-        if args.use_json:
-            head_json = json.loads(Path(args.use_json).read_text())
-        print_regressions(head_json, num_prev_commits=args.num_prev_commits)