From bee924d173c9bc48ac640507b8dfd2c3af481a3d Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Wed, 24 Apr 2024 03:13:38 +0000
Subject: [PATCH] Enable test config selection when doing workflow dispatch
 (#124795)

Fixes https://github.com/pytorch/test-infra/issues/4468

This is done by updating the filter config script to accept a list of test configs coming from workflow dispatch.  For example, having `inductor_huggingface_perf,inductor_timm_perf,inductor_torchbench_perf` will benchmark all 3 datasets, while having `inductor_torchbench_perf` will only run TorchBench.  This is exposed via a new string workflow dispatch parameters called `benchmark_configs`.

Note that GH limits the maximum number of workflow dispatch parameters to 10, so I need to consolidate `training` and `inference` into `training_and_inference` to squeeze the new parameter into the list.

### Testing

Run the script manually and confirm that the filtered list of test config is correct.

Also manually dispatch the job with the new parameter https://github.com/pytorch/pytorch/actions/runs/8808159905 and only the selected `inductor_huggingface_perf` is kept https://github.com/pytorch/pytorch/actions/runs/8808159905/job/24176683708#step:11:128
Pull Request resolved: https://github.com/pytorch/pytorch/pull/124795
Approved by: https://github.com/clee2000
---
 .../actions/filter-test-configs/action.yml    |  8 ++++
 .github/scripts/filter_test_configs.py        | 38 +++++++++++++++
 .github/scripts/test_filter_test_configs.py   | 46 +++++++++++++++++++
 .github/workflows/_linux-build.yml            | 10 +++-
 .../workflows/inductor-perf-test-nightly.yml  | 21 +++++----
 5 files changed, 112 insertions(+), 11 deletions(-)

diff --git a/.github/actions/filter-test-configs/action.yml b/.github/actions/filter-test-configs/action.yml
index 77a0c50c11d..e1f2067d580 100644
--- a/.github/actions/filter-test-configs/action.yml
+++ b/.github/actions/filter-test-configs/action.yml
@@ -13,6 +13,13 @@ inputs:
     required: true
     type: string
     description: JSON description of what test configs to run.
+  selected-test-configs:
+    required: false
+    type: string
+    description: |
+      A comma-separated list of test configurations from the test matrix to keep,
+      The empty list means we are going to keep every configurations by defaults
+    default: ""
   job-name:
     type: string
     required: false
@@ -126,6 +133,7 @@ runs:
           --workflow "${GITHUB_WORKFLOW}" \
           --job-name "${JOB_NAME}" \
           --test-matrix "${{ inputs.test-matrix }}" \
+          --selected-test-configs "${{ inputs.selected-test-configs }}" \
           --pr-number "${PR_NUMBER}" \
           --tag "${TAG}" \
           --event-name "${EVENT_NAME}" \
diff --git a/.github/scripts/filter_test_configs.py b/.github/scripts/filter_test_configs.py
index ebeccaeb160..c2e45bac811 100755
--- a/.github/scripts/filter_test_configs.py
+++ b/.github/scripts/filter_test_configs.py
@@ -66,6 +66,12 @@ def parse_args() -> Any:
     parser.add_argument(
         "--test-matrix", type=str, required=True, help="the original test matrix"
     )
+    parser.add_argument(
+        "--selected-test-configs",
+        type=str,
+        default="",
+        help="a comma-separated list of test configurations from the test matrix to keep",
+    )
     parser.add_argument(
         "--workflow", type=str, help="the name of the current workflow, i.e. pull"
     )
@@ -177,6 +183,28 @@ def filter(test_matrix: Dict[str, List[Any]], labels: Set[str]) -> Dict[str, Lis
         return filtered_test_matrix
 
 
+def filter_selected_test_configs(
+    test_matrix: Dict[str, List[Any]], selected_test_configs: Set[str]
+) -> Dict[str, List[Any]]:
+    """
+    Keep only the selected configs if the list if not empty. Otherwise, keep all test configs.
+    This filter is used when the workflow is dispatched manually.
+    """
+    if not selected_test_configs:
+        return test_matrix
+
+    filtered_test_matrix: Dict[str, List[Any]] = {"include": []}
+    for entry in test_matrix.get("include", []):
+        config_name = entry.get("config", "")
+        if not config_name:
+            continue
+
+        if config_name in selected_test_configs:
+            filtered_test_matrix["include"].append(entry)
+
+    return filtered_test_matrix
+
+
 def set_periodic_modes(
     test_matrix: Dict[str, List[Any]], job_name: Optional[str]
 ) -> Dict[str, List[Any]]:
@@ -558,6 +586,16 @@ def main() -> None:
         # No PR number, no tag, we can just return the test matrix as it is
         filtered_test_matrix = test_matrix
 
+    if args.selected_test_configs:
+        selected_test_configs = {
+            v.strip().lower()
+            for v in args.selected_test_configs.split(",")
+            if v.strip()
+        }
+        filtered_test_matrix = filter_selected_test_configs(
+            filtered_test_matrix, selected_test_configs
+        )
+
     if args.event_name == "schedule" and args.schedule == "29 8 * * *":
         # we don't want to run the mem leak check or disabled tests on normal
         # periodically scheduled jobs, only the ones at this time
diff --git a/.github/scripts/test_filter_test_configs.py b/.github/scripts/test_filter_test_configs.py
index 163c84795df..2f73d022c3d 100755
--- a/.github/scripts/test_filter_test_configs.py
+++ b/.github/scripts/test_filter_test_configs.py
@@ -9,6 +9,7 @@ from unittest import main, mock, TestCase
 import yaml
 from filter_test_configs import (
     filter,
+    filter_selected_test_configs,
     get_labels,
     mark_unstable_jobs,
     parse_reenabled_issues,
@@ -315,6 +316,51 @@ class TestConfigFilter(TestCase):
             )
             self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))
 
+    def test_filter_selected_test_configs(self) -> None:
+        testcases = [
+            {
+                "test_matrix": '{include: [{config: "default"}]}',
+                "selected_test_configs": "",
+                "expected": '{"include": [{"config": "default"}]}',
+                "description": "No selected test configs",
+            },
+            {
+                "test_matrix": '{include: [{config: "default"}]}',
+                "selected_test_configs": "foo",
+                "expected": '{"include": []}',
+                "description": "A different test config is selected",
+            },
+            {
+                "test_matrix": '{include: [{config: "default"}]}',
+                "selected_test_configs": "foo, bar",
+                "expected": '{"include": []}',
+                "description": "A different set of test configs is selected",
+            },
+            {
+                "test_matrix": '{include: [{config: "default"}]}',
+                "selected_test_configs": "foo, bar,default",
+                "expected": '{"include": [{"config": "default"}]}',
+                "description": "One of the test config is selected",
+            },
+            {
+                "test_matrix": '{include: [{config: "default"}, {config: "bar"}]}',
+                "selected_test_configs": "foo, bar,Default",
+                "expected": '{"include": [{"config": "default"}, {"config": "bar"}]}',
+                "description": "Several test configs are selected",
+            },
+        ]
+
+        for case in testcases:
+            selected_test_configs = {
+                v.strip().lower()
+                for v in case["selected_test_configs"].split(",")
+                if v.strip()
+            }
+            filtered_test_matrix = filter_selected_test_configs(
+                yaml.safe_load(case["test_matrix"]), selected_test_configs
+            )
+            self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))
+
     def test_set_periodic_modes(self) -> None:
         testcases: List[Dict[str, str]] = [
             {
diff --git a/.github/workflows/_linux-build.yml b/.github/workflows/_linux-build.yml
index 7edda580f53..c3bcb0d888d 100644
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@@ -47,13 +47,20 @@ on:
           An option JSON description of what test configs to run later on. This
           is moved here from the Linux test workflow so that we can apply filter
           logic using test-config labels earlier and skip unnecessary builds
+      selected-test-configs:
+        description: |
+          A comma-separated list of test configurations from the test matrix to keep,
+          The empty list means we are going to keep every configurations by defaults
+        required: false
+        type: string
+        default: ""
       s3-bucket:
         description: S3 bucket to download artifact
         required: false
         type: string
         default: "gha-artifacts"
       aws-role-to-assume:
-        description: role to assume for downloading artifacts
+        description: Role to assume for downloading artifacts
         required: false
         type: string
         default: ""
@@ -143,6 +150,7 @@ jobs:
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           test-matrix: ${{ inputs.test-matrix }}
+          selected-test-configs: ${{ inputs.selected-test-configs }}
           job-name: ${{ steps.get-job-id.outputs.job-name }}
 
       - name: Download pytest cache
diff --git a/.github/workflows/inductor-perf-test-nightly.yml b/.github/workflows/inductor-perf-test-nightly.yml
index 417646d5f77..16dd92f553f 100644
--- a/.github/workflows/inductor-perf-test-nightly.yml
+++ b/.github/workflows/inductor-perf-test-nightly.yml
@@ -8,16 +8,11 @@ on:
   # out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
   workflow_dispatch:
     inputs:
-      training:
-        description: Run training?
+      training_and_inference:
+        description: Run training and inference?
         required: false
-        type: boolean
-        default: true
-      inference:
-        description: Run inference?
-        required: false
-        type: boolean
-        default: false
+        type: string
+        default: training-true-inference-false
       default:
         description: Run inductor_default?
         required: false
@@ -58,6 +53,11 @@ on:
         required: false
         type: boolean
         default: false
+      benchmark_configs:
+        description: The list of configs used the benchmark
+        required: false
+        type: string
+        default: inductor_huggingface_perf,inductor_timm_perf,inductor_torchbench_perf
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
@@ -88,6 +88,7 @@ jobs:
           { config: "inductor_torchbench_perf", shard: 3, num_shards: 4, runner: "linux.gcp.a100.large" },
           { config: "inductor_torchbench_perf", shard: 4, num_shards: 4, runner: "linux.gcp.a100.large" },
         ]}
+      selected-test-configs: ${{ inputs.benchmark_configs }}
     secrets:
       HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
 
@@ -128,7 +129,7 @@ jobs:
     if: github.event_name == 'workflow_dispatch'
     with:
       build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
+      dashboard-tag: ${{ inputs.training_and_inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
       docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
       test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
       use-gha: anything-non-empty-to-use-gha