From a95abc464888f37b8dc61cf800b41c9b4a24cb2e Mon Sep 17 00:00:00 2001
From: Sam Estep <sestep@fb.com>
Date: Thu, 18 Mar 2021 14:03:56 -0700
Subject: [PATCH] Test tools/test_history.py (#54259)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/54259

Test Plan:
The main point of this is to be run in our "Test tools" GitHub Actions workflow. To test locally:
```
mypy --config=mypy-strict.ini
python tools/test/test_test_history.py
```

Reviewed By: seemethere

Differential Revision: D27164519

Pulled By: samestep

fbshipit-source-id: 46f90e62e2d4d0c413b202419e509d471bad43de
---
 .github/workflows/test_tools.yml |   7 +-
 mypy-strict.ini                  |   1 +
 tools/test/test_test_history.py  |  74 +++++++++++++++++++
 tools/test_history.py            | 120 ++++++++++++++++++-------------
 4 files changed, 150 insertions(+), 52 deletions(-)
 create mode 100644 tools/test/test_test_history.py

diff --git a/.github/workflows/test_tools.yml b/.github/workflows/test_tools.yml
index 0ae159ad770..fb8a448e16c 100644
--- a/.github/workflows/test_tools.yml
+++ b/.github/workflows/test_tools.yml
@@ -19,7 +19,12 @@ jobs:
         uses: actions/checkout@v2
         with:
           ref: ${{ github.event.pull_request.head.sha }}
+          fetch-depth: 0 # deep clone, to allow us to use git log
       - name: Install dependencies
-        run: pip install -r requirements.txt
+        # boto3 version copied from .circleci/docker/common/install_conda.sh
+        run: |
+          set -eux
+          pip install -r requirements.txt
+          pip install boto3==1.16.34
       - name: Run tests
         run: python -m unittest discover -vs tools/test -p 'test_*.py'
diff --git a/mypy-strict.ini b/mypy-strict.ini
index c0b6a2a0da3..eae58b9b919 100644
--- a/mypy-strict.ini
+++ b/mypy-strict.ini
@@ -41,6 +41,7 @@ files =
     tools/pyi/*.py,
     tools/stats_utils/*.py,
     tools/test_history.py,
+    tools/test/test_test_history.py,
     torch/testing/_internal/framework_utils.py,
     torch/testing/_internal/mypy_wrapper.py,
     torch/utils/benchmark/utils/common.py,
diff --git a/tools/test/test_test_history.py b/tools/test/test_test_history.py
new file mode 100644
index 00000000000..2526855ec65
--- /dev/null
+++ b/tools/test/test_test_history.py
@@ -0,0 +1,74 @@
+import itertools
+import re
+import shlex
+import unittest
+from typing import List, Optional
+
+from tools import test_history
+from typing_extensions import TypedDict
+
+
+class Example(TypedDict):
+    cmd: str
+    args: List[str]
+    lines: List[str]
+
+
+def parse_block(block: List[str]) -> Optional[Example]:
+    if block:
+        match = re.match(r'^\$ ([^ ]+) (.*)$', block[0])
+        if match:
+            cmd, first = match.groups()
+            args = []
+            for i, line in enumerate([first] + block[1:]):
+                if line.endswith('\\'):
+                    args.append(line[:-1])
+                else:
+                    args.append(line)
+                    break
+            return {
+                'cmd': cmd,
+                'args': shlex.split(''.join(args)),
+                'lines': block[i + 1:]
+            }
+    return None
+
+
+def parse_description(description: str) -> List[Example]:
+    examples: List[Example] = []
+    for block in description.split('\n\n'):
+        matches = [
+            re.match(r'^    (.*)$', line)
+            for line in block.splitlines()
+        ]
+        if all(matches):
+            lines = []
+            for match in matches:
+                assert match
+                line, = match.groups()
+                lines.append(line)
+            example = parse_block(lines)
+            if example:
+                examples.append(example)
+    return examples
+
+
+class TestTestHistory(unittest.TestCase):
+    maxDiff = None
+
+    def test_help_examples(self) -> None:
+        examples = parse_description(test_history.description())
+        self.assertEqual(len(examples), 3)
+        for i, example in enumerate(examples):
+            with self.subTest(i=i):
+                self.assertTrue(test_history.__file__.endswith(example['cmd']))
+                expected = example['lines']
+                actual = list(itertools.islice(
+                    test_history.run(example['args']),
+                    len(expected),
+                ))
+                self.assertEqual(actual, expected)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/test_history.py b/tools/test_history.py
index 6fa0b983038..ab3749e68eb 100755
--- a/tools/test_history.py
+++ b/tools/test_history.py
@@ -4,10 +4,13 @@ import argparse
 import bz2
 import json
 import subprocess
+import sys
 from collections import defaultdict
-from datetime import datetime
-from typing import Any, Dict, List, Optional, Set, Tuple
-from tools.stats_utils.s3_stat_parser import (get_S3_bucket_readonly, get_cases, Report)
+from datetime import datetime, timezone
+from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
+
+from tools.stats_utils.s3_stat_parser import (Report, get_cases,
+                                              get_S3_bucket_readonly)
 
 
 def get_git_commit_history(
@@ -19,7 +22,7 @@ def get_git_commit_history(
         ['git', '-C', path, 'log', '--pretty=format:%H %ct', ref],
     ).decode("latin-1")
     return [
-        (x[0], datetime.fromtimestamp(int(x[1])))
+        (x[0], datetime.fromtimestamp(int(x[1]), tz=timezone.utc))
         for x in [line.split(" ") for line in rc.split("\n")]
     ]
 
@@ -144,7 +147,7 @@ def make_lines(
         return ['(no reports in S3)']
 
 
-def display_history(
+def history_lines(
     *,
     bucket: Any,
     commits: List[Tuple[str, datetime]],
@@ -156,8 +159,8 @@ def display_history(
     sha_length: int,
     mode: str,
     digits: int,
-) -> None:
-    prev_time = datetime.now()
+) -> Iterator[str]:
+    prev_time = datetime.now(tz=timezone.utc)
     for sha, time in commits:
         if (prev_time - time).total_seconds() < delta * 3600:
             continue
@@ -195,7 +198,7 @@ def display_history(
                 test_name=test_name,
             )
         for line in lines:
-            print(f"{time} {sha[:sha_length]} {line}".rstrip())
+            yield f"{time:%Y-%m-%d %H:%M:%S}Z {sha[:sha_length]} {line}".rstrip()
 
 
 class HelpFormatter(
@@ -205,10 +208,8 @@ class HelpFormatter(
     pass
 
 
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        __file__,
-        description='''
+def description() -> str:
+    return r'''
 Display the history of a test.
 
 Each line of (non-error) output starts with the timestamp and SHA1 hash
@@ -220,53 +221,59 @@ In multiline mode, each line next includes the name of a CircleCI job,
 followed by the time of the specified test in that job at that commit.
 Example:
 
-    $ tools/test_history.py multiline --ref=594a66 --sha-length=8 \\
-      test_set_dir pytorch_linux_xenial_py3_6_gcc{5_4,7}_test
-    2021-02-10 03:13:34 594a66d7 pytorch_linux_xenial_py3_6_gcc5_4_test 0.36s
-    2021-02-10 03:13:34 594a66d7 pytorch_linux_xenial_py3_6_gcc7_test 0.573s errored
-    2021-02-10 02:13:25 9c0caf03 pytorch_linux_xenial_py3_6_gcc5_4_test 0.819s
-    2021-02-10 02:13:25 9c0caf03 pytorch_linux_xenial_py3_6_gcc7_test 0.449s
-    2021-02-10 02:09:14 602434bc pytorch_linux_xenial_py3_6_gcc5_4_test 0.361s
-    2021-02-10 02:09:14 602434bc pytorch_linux_xenial_py3_6_gcc7_test 0.454s
-    2021-02-10 02:09:10 2e35fe95 (no reports in S3)
-    2021-02-10 02:09:07 ff73be7e (no reports in S3)
-    2021-02-10 02:05:39 74082f0d (no reports in S3)
-    2021-02-09 23:42:29 0620c96f pytorch_linux_xenial_py3_6_gcc5_4_test 0.414s (1 S3 reports omitted)
-    2021-02-09 23:42:29 0620c96f pytorch_linux_xenial_py3_6_gcc7_test 0.377s (1 S3 reports omitted)
+    $ tools/test_history.py multiline --ref=594a66 --sha-length=8 test_set_dir \
+       pytorch_linux_xenial_py3_6_gcc5_4_test pytorch_linux_xenial_py3_6_gcc7_test
+    2021-02-10 11:13:34Z 594a66d7 pytorch_linux_xenial_py3_6_gcc5_4_test 0.36s
+    2021-02-10 11:13:34Z 594a66d7 pytorch_linux_xenial_py3_6_gcc7_test 0.573s errored
+    2021-02-10 10:13:25Z 9c0caf03 pytorch_linux_xenial_py3_6_gcc5_4_test 0.819s
+    2021-02-10 10:13:25Z 9c0caf03 pytorch_linux_xenial_py3_6_gcc7_test 0.449s
+    2021-02-10 10:09:14Z 602434bc pytorch_linux_xenial_py3_6_gcc5_4_test 0.361s
+    2021-02-10 10:09:14Z 602434bc pytorch_linux_xenial_py3_6_gcc7_test 0.454s
+    2021-02-10 10:09:10Z 2e35fe95 (no reports in S3)
+    2021-02-10 10:09:07Z ff73be7e (no reports in S3)
+    2021-02-10 10:05:39Z 74082f0d (no reports in S3)
+    2021-02-10 07:42:29Z 0620c96f pytorch_linux_xenial_py3_6_gcc5_4_test 0.414s (1 S3 reports omitted)
+    2021-02-10 07:42:29Z 0620c96f pytorch_linux_xenial_py3_6_gcc7_test 0.377s (1 S3 reports omitted)
 
 Another multiline example, this time with the --all flag:
 
-    $ tools/test_history.py multiline --all --ref=321b9 --delta=12 --sha-length=8 \\
+    $ tools/test_history.py multiline --all --ref=321b9 --delta=12 --sha-length=8 \
       test_qr_square_many_batched_complex_cuda
-    2021-01-07 02:04:56 321b9883 pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test2 424.284s
-    2021-01-07 02:04:56 321b9883 pytorch_linux_xenial_cuda10_2_cudnn7_py3_slow_test 0.006s skipped
-    2021-01-07 02:04:56 321b9883 pytorch_linux_xenial_cuda11_1_cudnn8_py3_gcc7_test 402.572s
-    2021-01-07 02:04:56 321b9883 pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test 287.164s
-    2021-01-06 12:58:28 fcb69d2e pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test2 436.732s
-    2021-01-06 12:58:28 fcb69d2e pytorch_linux_xenial_cuda10_2_cudnn7_py3_slow_test 0.006s skipped
-    2021-01-06 12:58:28 fcb69d2e pytorch_linux_xenial_cuda11_1_cudnn8_py3_gcc7_test 407.616s
-    2021-01-06 12:58:28 fcb69d2e pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test 287.044s
+    2021-01-07 10:04:56Z 321b9883 pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test2 424.284s
+    2021-01-07 10:04:56Z 321b9883 pytorch_linux_xenial_cuda10_2_cudnn7_py3_slow_test 0.006s skipped
+    2021-01-07 10:04:56Z 321b9883 pytorch_linux_xenial_cuda11_1_cudnn8_py3_gcc7_test 402.572s
+    2021-01-07 10:04:56Z 321b9883 pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test 287.164s
+    2021-01-06 20:58:28Z fcb69d2e pytorch_linux_xenial_cuda10_2_cudnn7_py3_gcc7_test2 436.732s
+    2021-01-06 20:58:28Z fcb69d2e pytorch_linux_xenial_cuda10_2_cudnn7_py3_slow_test 0.006s skipped
+    2021-01-06 20:58:28Z fcb69d2e pytorch_linux_xenial_cuda11_1_cudnn8_py3_gcc7_test 407.616s
+    2021-01-06 20:58:28Z fcb69d2e pytorch_linux_xenial_cuda9_2_cudnn7_py3_gcc7_test 287.044s
 
 In columns mode, the name of the job isn't printed, but the order of the
 columns is guaranteed to match the order of the jobs passed on the
 command line. Example:
 
-    $ tools/test_history.py columns --ref=3cf783 --sha-length=8 \\
-      test_set_dir pytorch_linux_xenial_py3_6_gcc{5_4,7}_test
-    2021-02-10 04:18:50 3cf78395    0.644s    0.312s
-    2021-02-10 03:13:34 594a66d7    0.360s  errored
-    2021-02-10 02:13:25 9c0caf03    0.819s    0.449s
-    2021-02-10 02:09:14 602434bc    0.361s    0.454s
-    2021-02-10 02:09:10 2e35fe95
-    2021-02-10 02:09:07 ff73be7e
-    2021-02-10 02:05:39 74082f0d
-    2021-02-09 23:42:29 0620c96f    0.414s    0.377s (2 S3 reports omitted)
-    2021-02-09 23:27:53 33afb5f1    0.381s    0.294s
+    $ tools/test_history.py columns --ref=3cf783 --sha-length=8 test_set_dir \
+      pytorch_linux_xenial_py3_6_gcc5_4_test pytorch_linux_xenial_py3_6_gcc7_test
+    2021-02-10 12:18:50Z 3cf78395    0.644s    0.312s
+    2021-02-10 11:13:34Z 594a66d7    0.360s  errored
+    2021-02-10 10:13:25Z 9c0caf03    0.819s    0.449s
+    2021-02-10 10:09:14Z 602434bc    0.361s    0.454s
+    2021-02-10 10:09:10Z 2e35fe95
+    2021-02-10 10:09:07Z ff73be7e
+    2021-02-10 10:05:39Z 74082f0d
+    2021-02-10 07:42:29Z 0620c96f    0.414s    0.377s (2 S3 reports omitted)
+    2021-02-10 07:27:53Z 33afb5f1    0.381s    0.294s
 
 Minor note: in columns mode, a blank cell means that no report was found
 in S3, while the word "absent" means that a report was found but the
 indicated test was not found in that report.
-''',
+'''
+
+
+def parse_args(raw: List[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        __file__,
+        description=description(),
         formatter_class=HelpFormatter,
     )
     parser.add_argument(
@@ -325,19 +332,25 @@ indicated test was not found in that report.
         help='names of jobs to display columns for, in order',
         default=[],
     )
-    args = parser.parse_args()
+    args = parser.parse_args(raw)
 
-    jobs = None if args.all else args.job
-    if jobs == []:  # no jobs, and not None (which would mean all jobs)
+    args.jobs = None if args.all else args.job
+    if args.jobs == []:  # no jobs, and not None (which would mean all jobs)
         parser.error('No jobs specified.')
 
+    return args
+
+
+def run(raw: List[str]) -> Iterator[str]:
+    args = parse_args(raw)
+
     commits = get_git_commit_history(path=args.pytorch, ref=args.ref)
     bucket = get_S3_bucket_readonly('ossci-metrics')
 
-    display_history(
+    return history_lines(
         bucket=bucket,
         commits=commits,
-        jobs=jobs,
+        jobs=args.jobs,
         filename=args.file,
         suite_name=args.suite,
         test_name=args.test,
@@ -348,5 +361,10 @@ indicated test was not found in that report.
     )
 
 
+def main() -> None:
+    for line in run(sys.argv[1:]):
+        print(line)
+
+
 if __name__ == "__main__":
     main()