Enable test config selection when doing workflow dispatch (#124795)

Fixes https://github.com/pytorch/test-infra/issues/4468 This is done by updating the filter config script to accept a list of test configs coming from workflow dispatch. For example, having `inductor_huggingface_perf,inductor_timm_perf,inductor_torchbench_perf` will benchmark all 3 datasets, while having `inductor_torchbench_perf` will only run TorchBench. This is exposed via a new string workflow dispatch parameters called `benchmark_configs`. Note that GH limits the maximum number of workflow dispatch parameters to 10, so I need to consolidate `training` and `inference` into `training_and_inference` to squeeze the new parameter into the list. ### Testing Run the script manually and confirm that the filtered list of test config is correct. Also manually dispatch the job with the new parameter https://github.com/pytorch/pytorch/actions/runs/8808159905 and only the selected `inductor_huggingface_perf` is kept https://github.com/pytorch/pytorch/actions/runs/8808159905/job/24176683708#step:11:128 Pull Request resolved: https://github.com/pytorch/pytorch/pull/124795 Approved by: https://github.com/clee2000
2026-05-14 20:57:59 +00:00 · 2024-04-24 03:13:38 +00:00 · 2024-04-24 03:13:38 +00:00 · bee924d173
commit bee924d173
parent 9dded148d0
5 changed files with 112 additions and 11 deletions
--- a/.github/actions/filter-test-configs/action.yml
+++ b/.github/actions/filter-test-configs/action.yml
@ -13,6 +13,13 @@ inputs:
    required: true
    type: string
    description: JSON description of what test configs to run.
+  selected-test-configs:
+    required: false
+    type: string
+    description: |
+      A comma-separated list of test configurations from the test matrix to keep,
+      The empty list means we are going to keep every configurations by defaults
+    default: ""
  job-name:
    type: string
    required: false
@ -126,6 +133,7 @@ runs:
          --workflow "${GITHUB_WORKFLOW}" \
          --job-name "${JOB_NAME}" \
          --test-matrix "${{ inputs.test-matrix }}" \
+          --selected-test-configs "${{ inputs.selected-test-configs }}" \
          --pr-number "${PR_NUMBER}" \
          --tag "${TAG}" \
          --event-name "${EVENT_NAME}" \
--- a/.github/scripts/filter_test_configs.py
+++ b/.github/scripts/filter_test_configs.py
@ -66,6 +66,12 @@ def parse_args() -> Any:
    parser.add_argument(
        "--test-matrix", type=str, required=True, help="the original test matrix"
    )
+    parser.add_argument(
+        "--selected-test-configs",
+        type=str,
+        default="",
+        help="a comma-separated list of test configurations from the test matrix to keep",
+    )
    parser.add_argument(
        "--workflow", type=str, help="the name of the current workflow, i.e. pull"
    )
@ -177,6 +183,28 @@ def filter(test_matrix: Dict[str, List[Any]], labels: Set[str]) -> Dict[str, Lis
        return filtered_test_matrix


+def filter_selected_test_configs(
+    test_matrix: Dict[str, List[Any]], selected_test_configs: Set[str]
+) -> Dict[str, List[Any]]:
+    """
+    Keep only the selected configs if the list if not empty. Otherwise, keep all test configs.
+    This filter is used when the workflow is dispatched manually.
+    """
+    if not selected_test_configs:
+        return test_matrix
+
+    filtered_test_matrix: Dict[str, List[Any]] = {"include": []}
+    for entry in test_matrix.get("include", []):
+        config_name = entry.get("config", "")
+        if not config_name:
+            continue
+
+        if config_name in selected_test_configs:
+            filtered_test_matrix["include"].append(entry)
+
+    return filtered_test_matrix
+
+
 def set_periodic_modes(
    test_matrix: Dict[str, List[Any]], job_name: Optional[str]
 ) -> Dict[str, List[Any]]:
@ -558,6 +586,16 @@ def main() -> None:
        # No PR number, no tag, we can just return the test matrix as it is
        filtered_test_matrix = test_matrix

+    if args.selected_test_configs:
+        selected_test_configs = {
+            v.strip().lower()
+            for v in args.selected_test_configs.split(",")
+            if v.strip()
+        }
+        filtered_test_matrix = filter_selected_test_configs(
+            filtered_test_matrix, selected_test_configs
+        )
+
    if args.event_name == "schedule" and args.schedule == "29 8 * * *":
        # we don't want to run the mem leak check or disabled tests on normal
        # periodically scheduled jobs, only the ones at this time
--- a/.github/scripts/test_filter_test_configs.py
+++ b/.github/scripts/test_filter_test_configs.py
@ -9,6 +9,7 @@ from unittest import main, mock, TestCase
 import yaml
 from filter_test_configs import (
    filter,
+    filter_selected_test_configs,
    get_labels,
    mark_unstable_jobs,
    parse_reenabled_issues,
@ -315,6 +316,51 @@ class TestConfigFilter(TestCase):
            )
            self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))

+    def test_filter_selected_test_configs(self) -> None:
+        testcases = [
+            {
+                "test_matrix": '{include: [{config: "default"}]}',
+                "selected_test_configs": "",
+                "expected": '{"include": [{"config": "default"}]}',
+                "description": "No selected test configs",
+            },
+            {
+                "test_matrix": '{include: [{config: "default"}]}',
+                "selected_test_configs": "foo",
+                "expected": '{"include": []}',
+                "description": "A different test config is selected",
+            },
+            {
+                "test_matrix": '{include: [{config: "default"}]}',
+                "selected_test_configs": "foo, bar",
+                "expected": '{"include": []}',
+                "description": "A different set of test configs is selected",
+            },
+            {
+                "test_matrix": '{include: [{config: "default"}]}',
+                "selected_test_configs": "foo, bar,default",
+                "expected": '{"include": [{"config": "default"}]}',
+                "description": "One of the test config is selected",
+            },
+            {
+                "test_matrix": '{include: [{config: "default"}, {config: "bar"}]}',
+                "selected_test_configs": "foo, bar,Default",
+                "expected": '{"include": [{"config": "default"}, {"config": "bar"}]}',
+                "description": "Several test configs are selected",
+            },
+        ]
+
+        for case in testcases:
+            selected_test_configs = {
+                v.strip().lower()
+                for v in case["selected_test_configs"].split(",")
+                if v.strip()
+            }
+            filtered_test_matrix = filter_selected_test_configs(
+                yaml.safe_load(case["test_matrix"]), selected_test_configs
+            )
+            self.assertEqual(case["expected"], json.dumps(filtered_test_matrix))
+
    def test_set_periodic_modes(self) -> None:
        testcases: List[Dict[str, str]] = [
            {
--- a/.github/workflows/_linux-build.yml
+++ b/.github/workflows/_linux-build.yml
@ -47,13 +47,20 @@ on:
          An option JSON description of what test configs to run later on. This
          is moved here from the Linux test workflow so that we can apply filter
          logic using test-config labels earlier and skip unnecessary builds
+      selected-test-configs:
+        description: |
+          A comma-separated list of test configurations from the test matrix to keep,
+          The empty list means we are going to keep every configurations by defaults
+        required: false
+        type: string
+        default: ""
      s3-bucket:
        description: S3 bucket to download artifact
        required: false
        type: string
        default: "gha-artifacts"
      aws-role-to-assume:
-        description: role to assume for downloading artifacts
+        description: Role to assume for downloading artifacts
        required: false
        type: string
        default: ""
@ -143,6 +150,7 @@ jobs:
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          test-matrix: ${{ inputs.test-matrix }}
+          selected-test-configs: ${{ inputs.selected-test-configs }}
          job-name: ${{ steps.get-job-id.outputs.job-name }}

      - name: Download pytest cache
--- a/.github/workflows/inductor-perf-test-nightly.yml
+++ b/.github/workflows/inductor-perf-test-nightly.yml
@ -8,16 +8,11 @@ on:
  # out, let try to run torchao cudagraphs_low_precision as part of cudagraphs
  workflow_dispatch:
    inputs:
-      training:
-        description: Run training?
+      training_and_inference:
+        description: Run training and inference?
        required: false
-        type: boolean
-        default: true
-      inference:
-        description: Run inference?
-        required: false
-        type: boolean
-        default: false
+        type: string
+        default: training-true-inference-false
      default:
        description: Run inductor_default?
        required: false
@ -58,6 +53,11 @@ on:
        required: false
        type: boolean
        default: false
+      benchmark_configs:
+        description: The list of configs used the benchmark
+        required: false
+        type: string
+        default: inductor_huggingface_perf,inductor_timm_perf,inductor_torchbench_perf

 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
@ -88,6 +88,7 @@ jobs:
          { config: "inductor_torchbench_perf", shard: 3, num_shards: 4, runner: "linux.gcp.a100.large" },
          { config: "inductor_torchbench_perf", shard: 4, num_shards: 4, runner: "linux.gcp.a100.large" },
        ]}
+      selected-test-configs: ${{ inputs.benchmark_configs }}
    secrets:
      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

@ -128,7 +129,7 @@ jobs:
    if: github.event_name == 'workflow_dispatch'
    with:
      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
+      dashboard-tag: ${{ inputs.training_and_inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cudagraphs-${{ inputs.cudagraphs }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-maxautotune-${{ inputs.maxautotune }}-freezing_cudagraphs-${{ inputs.freezing_cudagraphs }}-cudagraphs_low_precision-${{ inputs.cudagraphs }}
      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
      use-gha: anything-non-empty-to-use-gha