diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
index b88d6ca8f57..87fbdaeb833 100644
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import abc
 import argparse
 import collections
 import contextlib
@@ -22,13 +21,10 @@ import sys
 import time
 import weakref
 from contextlib import contextmanager
-from pathlib import Path
-from typing import Any, Callable, NamedTuple, Optional, TYPE_CHECKING
-from typing_extensions import Self
+from typing import Any, NamedTuple, TYPE_CHECKING
 from unittest.mock import MagicMock
 
 import numpy as np
-import numpy.typing as npt
 import pandas as pd
 import psutil
 import yaml
@@ -86,9 +82,7 @@ except ImportError:
 
 
 if TYPE_CHECKING:
-    from collections.abc import Generator, Mapping, Sequence
-
-    from torch.onnx._internal.fx import diagnostics
+    from collections.abc import Mapping
 
 
 log = logging.getLogger(__name__)
@@ -106,7 +100,6 @@ current_mode = ""
 current_dtype = ""
 current_quantization = ""
 current_settings = None
-current_onnx_compiler = ""
 current_batch_size = None
 output_filename = None
 disable_output = False
@@ -1277,170 +1270,6 @@ def speedup_experiment_ds(args, model_iter_fn, model, example_inputs):
     return output_str
 
 
-@contextlib.contextmanager
-def override_synchronize_with_onnx_iobinding(iobinding):
-    global synchronize
-    prev_synchrnoize = synchronize
-    try:
-        if iobinding is not None:
-
-            def new_synchronize():
-                iobinding.synchronize_inputs()
-                iobinding.synchronize_outputs()
-
-            synchronize = new_synchronize
-        yield
-    finally:
-        synchronize = prev_synchrnoize
-
-
-def speedup_experiment_onnx(
-    args,
-    model_iter_fn,
-    onnx_model: OnnxModel,
-    model,
-    example_inputs,
-    **kwargs,
-):
-    """
-    Measure speedups over eager.
-
-    This function is responsible for the following:
-        1. Creating iobinding with OnnxModel if device is CUDA, which is essential for perf measurement.
-        2. Running ORT with OnnxModel.
-
-    Writes to ./{output_filename}, which should be
-        `Path(self.output_dir) / f"{self.compiler}_{suite}_{self.dtype}_{self.mode}_{self.device}_{self.testing}.csv".
-
-    TODO(bowbao): Record export time and export peak memory usage.
-    """
-    timings = np.zeros((args.repeat, 2), np.float64)
-    is_correct = True
-    should_randomize_input = args.randomize_input
-    times = args.iterations_per_run
-
-    def create_onnx_input_binded_fn(onnx_model: OnnxModel, pt_inputs, example_outputs):
-        # Goal is to move the iobinding creation outside of the timer function.
-        iobinding, outputs = onnx_model.create_iobinding(pt_inputs, example_outputs)
-
-        def onnxrt_model_iter_fn(model, inputs, collect_outputs=True):
-            onnx_model.run_with_iobinding(iobinding, outputs)
-            if collect_outputs:
-                return outputs
-
-        return onnxrt_model_iter_fn, iobinding
-
-    def create_onnx_fn(onnx_model: OnnxModel, pt_inputs):
-        # NOTE: Making perf comparison fair by moving out the i/o adapting part.
-        # 1. Pre-adapt `pt_inputs` to `onnx_inputs` here.
-        # 2. Drop `onnx_outputs` to `pt_outputs` adapting. Output comparison is not part of perf measurement.
-        onnx_inputs = onnx_model.adapt_pt_inputs_to_onnx(pt_inputs)
-
-        def onnxrt_model_iter_fn(model, inputs, collect_outputs=True):
-            return onnx_model.run_with_onnx_inputs(onnx_inputs)
-
-        return onnxrt_model_iter_fn
-
-    def timed_onnx(model, onnx_model: OnnxModel, inputs):
-        if current_device == "cpu" or onnx_model.is_cpu():
-            onnxrt_model_iter_fn = create_onnx_fn(onnx_model, inputs)
-            iobinding = None
-        else:
-            onnxrt_model_iter_fn, iobinding = create_onnx_input_binded_fn(
-                onnx_model, inputs, expected_output
-            )
-        with override_synchronize_with_onnx_iobinding(iobinding):
-            return timed(
-                model,
-                onnxrt_model_iter_fn,
-                inputs,
-                return_result=True,
-                times=times,
-                collect_outputs=args.collect_outputs,
-            )
-
-    # Insert ONNX warm-up
-    inputs = (
-        randomize_input(copy.deepcopy(example_inputs))
-        if should_randomize_input
-        else example_inputs
-    )
-    _, expected_output = timed(
-        model,
-        model_iter_fn,
-        inputs,
-        return_result=True,
-        times=times,
-        collect_outputs=args.collect_outputs,
-    )
-    for _ in range(2):
-        timed_onnx(model, onnx_model, inputs)
-
-    for rep in range(args.repeat):
-        inputs = (
-            randomize_input(copy.deepcopy(example_inputs))
-            if should_randomize_input
-            else example_inputs
-        )
-        if torch.cuda.device_count() > 1:
-            # Manually set correct torch.cuda.current_device to ensure torch.cuda.synchronize() works as intended.
-            # When there are more than 1 cuda devices, the first one is used for pytorch eager.
-            # The second one is used for onnx ort.
-            torch.cuda.set_device(0)
-        timings[rep, 0], expected_output = timed(
-            model,
-            model_iter_fn,
-            inputs,
-            return_result=True,
-            times=times,
-            collect_outputs=args.collect_outputs,
-        )
-        if torch.cuda.device_count() > 1:
-            # Manually set correct torch.cuda.current_device to ensure torch.cuda.synchronize() works as intended.
-            # When there are more than 1 cuda devices, the first one is used for pytorch eager.
-            # The second one is used for onnx ort.
-            torch.cuda.set_device(1)
-        timings[rep, 1], actual_output = timed_onnx(model, onnx_model, inputs)
-
-    pvalue = ttest_ind(timings[:, 0], timings[:, 1]).pvalue
-    median = np.median(timings, axis=0)
-    speedup = median[0] / median[1]
-    if args.dump_raw_metrics:
-        np.save(
-            f"{output_filename[:-4]}-raw_timings-{current_name}-{current_device}.npy",
-            timings,
-        )
-
-    headers = ["dev", "name", "batch_size", "speedup", "abs_latency"]
-    row = [
-        current_device,
-        current_name,
-        current_batch_size,
-        float(speedup),
-        median[1] * 1000,
-    ]
-    if "compilation_latency" in kwargs:
-        headers = headers + ["compilation_latency", "compression_ratio"]
-        row.append(kwargs["compilation_latency"])
-        row.append(kwargs["compression_ratio"])
-
-    write_outputs(
-        output_filename,
-        headers,
-        row,
-    )
-    headers, data = torch._dynamo.utils.compile_times(repr="csv", aggregate=True)
-    assert (
-        output_filename.find(".csv") > 0
-    ), f"expected output_filename to be a .csv, but got {output_filename}"
-    write_outputs(
-        output_filename[:-4] + "_compilation_metrics.csv",
-        ["dev", "name", "batch_size"] + headers,
-        [current_device, current_name, current_batch_size] + data,
-    )
-    return format_speedup(speedup, pvalue, is_correct=is_correct)
-
-
 def overhead_experiment(*args, model_iter_fn):
     """
     Measure overheads of TorchDynamo by running with no backend (only
@@ -1683,685 +1512,6 @@ def download_retry_decorator(download_fn):
     return wrapper
 
 
-class OnnxModel(abc.ABC):
-    TORCH_TO_NUMPY_DTYPE = {
-        torch.float16: np.float16,
-        torch.float32: np.float32,
-        torch.float64: np.float64,
-        torch.uint8: np.uint8,
-        torch.int8: np.int8,
-        torch.int16: np.int16,
-        torch.int32: np.int32,
-        torch.int64: np.longlong,
-        torch.bool: np.bool_,
-    }
-
-    _COMPILER_NAME: str
-
-    def __init__(
-        self,
-        output_directory,
-        model,
-        example_inputs,
-        dynamic_shapes: bool,
-        copy_before_export: bool = False,
-        use_experimental_patch: bool = False,
-    ):
-        """The abstract class for exporting ONNX model.
-
-        Args:
-            output_directory: output path
-            model: model
-            example_inputs: example inputs for exporting
-            dynamic_shapes (bool): Whether to export the model with dynamic shapes.
-            copy_before_export (bool,): copy before export. Defaults to False.
-            use_experimental_patch (bool): Whether to apply torch_onnx patch which exports
-                with torch.export and onnx ir. Defaults to False.
-        """
-        model_name = current_name
-        self.copy_before_export = copy_before_export
-        self.use_experimental_patch = use_experimental_patch
-        # NOTE: torch_onnx patch is using OnnxModelFromTorchScript to export ONNX model.
-        if self.use_experimental_patch:
-            self._COMPILER_NAME = "torch_onnx_patch"
-        self.model_dir = self._generate_onnx_model_directory(
-            output_directory, self._COMPILER_NAME, model_name
-        )
-        self.model_path = str(
-            self.model_dir / f"{model_name}_{self._COMPILER_NAME}.onnx"
-        )
-
-    def _determine_deepcopy_target_device(self):
-        if current_device == "cpu":
-            target_device = "cpu"
-        else:
-            if torch.cuda.device_count() > 1:
-                # Copy to another cuda device to avoid OOM.
-                target_device = "cuda:1"
-            else:
-                target_device = "cuda"
-        return target_device
-
-    def deepcopy_model_and_inputs_to_device(self, model, example_inputs, target_device):
-        # Deepcopy model before export to avoid modification to baseline model.
-        # To avoid OOM, the model is first moved to CPU. Both models are then moved to device.
-        model_device = next(model.parameters()).device
-        model.to("cpu")
-        model_copy = copy.deepcopy(model).to(target_device)
-        model.to(model_device)
-
-        target_device_example_inputs = tree_map_only(
-            torch.Tensor, lambda x: x.to(device=target_device), example_inputs
-        )
-
-        return model_copy, target_device_example_inputs
-
-    @classmethod
-    def _generate_onnx_model_directory(
-        cls, output_directory: str, compiler_name: str, model_name: str
-    ) -> Path:
-        model_path = Path(
-            output_directory,
-            ".onnx_models",
-            model_name,
-            compiler_name,
-        )
-        if model_path.exists() and model_path.is_dir():
-            shutil.rmtree(model_path)
-        model_path.mkdir(parents=True, exist_ok=True)
-        return model_path
-
-    @abc.abstractmethod
-    def format_pt_inputs(self, pt_inputs: Any) -> Sequence[torch.Tensor]: ...
-
-    @abc.abstractmethod
-    def format_pt_outputs(self, pt_outputs: Any) -> Sequence[torch.Tensor]: ...
-
-    def adapt_pt_inputs_to_onnx(self, pt_inputs) -> Mapping[str, npt.NDArray]:
-        pt_inputs = self.format_pt_inputs(pt_inputs)
-        return {
-            ort_input.name: pt_input.cpu().numpy()
-            for ort_input, pt_input in zip(self.onnx_session.get_inputs(), pt_inputs)
-        }
-
-    def adapt_onnx_outputs_to_pt(self, onnx_outputs: list[npt.NDArray]) -> Any:
-        pt_outputs = [
-            torch.from_numpy(onnx_output).to(current_device)
-            for onnx_output in onnx_outputs
-        ]
-        if len(pt_outputs) == 1:
-            return pt_outputs[0]
-        return pt_outputs
-
-    def _init_ort_session(self, model_path: str):
-        import onnxruntime
-
-        if current_device == "cpu":
-            ort_providers = ["CPUExecutionProvider"]
-        else:
-            # NOTE(bowbao): Reduce OOM by running ORT on another gpu.
-            # TODO(bowbao): This works to avoid OOM, but performance is surprisingly very bad.
-            cuda_provider_options = {
-                "device_id": 1 if torch.cuda.device_count() > 1 else 0,
-            }
-            ort_providers = [("CUDAExecutionProvider", cuda_provider_options)]
-        session_options = onnxruntime.SessionOptions()
-        session_options.log_severity_level = 3  # Error
-
-        ort_session = onnxruntime.InferenceSession(
-            self.model_path,
-            providers=ort_providers,
-            sess_options=session_options,
-        )
-        return ort_session
-
-    def is_cpu(self) -> bool:
-        return self.onnx_session.get_providers()[0] == "CPUExecutionProvider"
-
-    def cpu(self) -> Self:
-        self.onnx_session.set_providers(["CPUExecutionProvider"])
-        return self
-
-    def create_outputs(self, *example_outputs):
-        return tuple(torch.empty_like(x) for x in example_outputs)
-
-    def create_iobinding(self, pt_inputs, example_outputs):
-        pt_inputs = self.format_pt_inputs(pt_inputs)
-        example_outputs = self.format_pt_outputs(example_outputs)
-
-        iobinding = self.onnx_session.io_binding()
-        args = [arg.contiguous() for arg in pt_inputs]
-        for ort_input, arg in zip(self.onnx_session.get_inputs(), args):
-            # NOTE: Run ORT on another cuda device to reduce OOM.
-            if torch.cuda.device_count() > 1:
-                arg = arg.detach().to("cuda:1")
-            device = arg.device
-            iobinding.bind_input(
-                ort_input.name,
-                device.type,
-                device.index or 0,
-                self.TORCH_TO_NUMPY_DTYPE[arg.dtype],
-                arg.size(),
-                arg.data_ptr(),
-            )
-
-        outputs = self.create_outputs(*example_outputs)
-        for ort_output, output in zip(self.onnx_session.get_outputs(), outputs):
-            if torch.cuda.device_count() > 1:
-                output = output.detach().to("cuda:1")
-            device = output.device
-            iobinding.bind_output(
-                ort_output.name,
-                device.type,
-                device.index or 0,
-                self.TORCH_TO_NUMPY_DTYPE[output.dtype],
-                output.size(),
-                output.data_ptr(),
-            )
-        return iobinding, outputs
-
-    def run_with_iobinding(self, iobinding, outputs):
-        # 'outputs' are torch empty tensors binded to 'iobinding'.
-        self.onnx_session.run_with_iobinding(iobinding)
-        return outputs
-
-    def run_with_onnx_inputs(self, onnx_inputs):
-        return self.onnx_session.run(None, onnx_inputs)
-
-    @classmethod
-    def save_tensor_data(cls, numpy_tensor, output_path):
-        from onnx import numpy_helper
-
-        proto_tensor = numpy_helper.from_array(numpy_tensor)
-        with open(output_path, "wb") as f:
-            f.write(proto_tensor.SerializeToString())
-
-    def run_and_serialize_inputs_outputs(self, pt_inputs):
-        test_data_dir = self.model_dir / "test_data_set_0"
-        test_data_dir.mkdir(parents=True, exist_ok=True)
-
-        onnx_inputs = self.adapt_pt_inputs_to_onnx(pt_inputs)
-        for i, onnx_input in enumerate(onnx_inputs.values()):
-            self.save_tensor_data(onnx_input, str(test_data_dir / f"input_{i}.pb"))
-
-        onnx_outputs = self.run_with_onnx_inputs(onnx_inputs)
-
-        for i, onnx_output in enumerate(onnx_outputs):
-            self.save_tensor_data(onnx_output, str(test_data_dir / f"output_{i}.pb"))
-
-        return self.adapt_onnx_outputs_to_pt(onnx_outputs)
-
-    def run(self, pt_inputs):
-        # NOTE: For CUDA performance testing, use `run_with_iobinding` to exclude memory
-        # copying overhead for inputs/outputs between cpu and gpu.
-        # Otherwise perf number is inaccurate.
-        onnx_inputs = self.adapt_pt_inputs_to_onnx(pt_inputs)
-        onnx_outputs = self.run_with_onnx_inputs(onnx_inputs)
-        return self.adapt_onnx_outputs_to_pt(onnx_outputs)
-
-
-class OnnxModelFromTorchScript(OnnxModel):
-    """TorchScript based onnx export. `torch.onnx.export`
-
-    TODO(bowbao):
-    * large model export failed.
-          Onnx Model is larger than 2GB, but exporter makes decision based pt model size, which is
-          smaller than 2GB.
-    * OOM on slightly larger model.
-          Both pt model and ort inference session are on gpu. Attempt has been made to move ORT to
-          cuda:1, however ORT perf drop significantly.
-          For now running everything with batch_size 1 set in launch script.
-    """
-
-    _COMPILER_NAME = "torchscript"
-
-    def __init__(
-        self, output_directory, model, example_inputs, dynamic_shapes: bool, **kwargs
-    ):
-        if dynamic_shapes:
-            raise NotImplementedError("NYI dynamic shapes for OnnxModelFromTorchScript")
-        super().__init__(
-            output_directory, model, example_inputs, dynamic_shapes, **kwargs
-        )
-        self._export(
-            model,
-            example_inputs,
-            self.model_path,
-            opset_version=17,
-            do_constant_folding=False,
-            verbose=False,
-        )
-        self.onnx_session = self._init_ort_session(self.model_path)
-
-    def _export(self, model, example_inputs, output_path: str, /, **kwargs) -> None:
-        if self.copy_before_export:
-            # Deepcopy model before export to avoid modification to baseline model.
-            model, example_inputs = self.deepcopy_model_and_inputs_to_device(
-                model, example_inputs, self._determine_deepcopy_target_device()
-            )
-
-        # Hack for huggingface models (kwargs only).
-        if isinstance(example_inputs, dict):
-
-            class WrapperModel(torch.nn.Module):
-                def __init__(self, model, keys):
-                    super().__init__()
-                    self.model = model
-                    self.keys = keys
-
-                def forward(self, *args):
-                    return self.model(**dict(zip(self.keys, args)))
-
-            model = WrapperModel(model, list(example_inputs.keys()))
-
-        if self.use_experimental_patch:
-            import torch_onnx
-
-            torch_onnx.patch_torch(
-                error_report=True,
-                profile=True,
-                dump_exported_program=True,
-                artifacts_dir=os.path.dirname(output_path),
-            )
-        else:
-            # make sure the patch is not in effect
-            try:
-                import torch_onnx
-
-                torch_onnx.unpatch_torch()
-            except ImportError:
-                pass
-
-        torch.onnx.export(
-            model,
-            self.format_pt_inputs(example_inputs),
-            output_path,
-            **kwargs,
-        )
-
-    def format_pt_inputs(self, pt_inputs):
-        # NOTE(bowbao): For huggingface benchmark, pt_inputs are formatted as dictionary,
-        # and consumed like `model(**pt_inputs)`.
-        # For other benchmarks, pt_inputs are formatted as tuple and consumed
-        # like `model(*pt_inputs)`.
-        if isinstance(pt_inputs, dict):
-            pt_inputs = list(pt_inputs.values())
-        if isinstance(pt_inputs, torch.Tensor):
-            pt_inputs = (pt_inputs,)
-        return tuple(arg.contiguous() for arg in pt_inputs)
-
-    def format_pt_outputs(self, pt_outputs):
-        if isinstance(pt_outputs, torch.Tensor):
-            pt_outputs = (pt_outputs,)
-
-        pt_outputs = pytree.tree_leaves(pt_outputs)
-
-        # Hack for huggingface model outputs
-        try:
-            from transformers import modeling_outputs
-        except ImportError:
-            pass
-        else:
-
-            def _to_tuple(x):
-                if isinstance(x, modeling_outputs.ModelOutput):
-                    return x.to_tuple()
-                return x
-
-            pt_outputs = pytree.tree_map(_to_tuple, pt_outputs)
-            pt_outputs = pytree.tree_leaves(pt_outputs)
-
-        return pt_outputs
-
-
-class OnnxModelFromDynamo(OnnxModel):
-    """Dynamo and Fx based export. `torch.onnx.dynamo_export`."""
-
-    _COMPILER_NAME = "dynamo"
-
-    def __init__(
-        self, output_directory, model, example_inputs, dynamic_shapes: bool, **kwargs
-    ):
-        super().__init__(
-            output_directory, model, example_inputs, dynamic_shapes, **kwargs
-        )
-        self._dynamic_shapes = dynamic_shapes
-        self._onnx_program = self._export(model, example_inputs, self.model_path)
-        # Clear the model proto to save memory.
-        # The model proto is saved to disk and no longer needed from `onnx_program`.
-        # `onnx_program` is kept for i/o adapter usage.
-        self._onnx_program.model_proto.Clear()
-        self.onnx_session = self._init_ort_session(self.model_path)
-
-    def _export(
-        self, model, example_inputs, output_path: str
-    ) -> torch.onnx.ONNXProgram:
-        if self.copy_before_export:
-            # Deepcopy model before export to avoid modification to baseline model.
-            model, example_inputs = self.deepcopy_model_and_inputs_to_device(
-                model, example_inputs, self._determine_deepcopy_target_device()
-            )
-
-        example_args, example_kwargs = _normalize_bench_inputs(example_inputs)
-        options = torch.onnx.ExportOptions(dynamic_shapes=self._dynamic_shapes)
-        onnx_program = torch.onnx.dynamo_export(
-            model, *example_args, **example_kwargs, export_options=options
-        )
-
-        onnx_program.save(output_path)
-        return onnx_program
-
-    def format_pt_inputs(self, pt_inputs):
-        pt_args, pt_kwargs = _normalize_bench_inputs(pt_inputs)
-        return self._onnx_program.adapt_torch_inputs_to_onnx(*pt_args, **pt_kwargs)
-
-    def format_pt_outputs(self, pt_outputs):
-        return self._onnx_program.adapt_torch_outputs_to_onnx(pt_outputs)
-
-
-class OnnxModelFromDynamoAotInline(OnnxModelFromDynamo):
-    """Dynamo and Fx based export, with AOT inline post export. `torch.onnx.dynamo_export`."""
-
-    _COMPILER_NAME = "dynamo_aot_inline"
-
-    def _export(
-        self, model, example_inputs, output_path: str
-    ) -> torch.onnx.ONNXProgram:
-        if self.copy_before_export:
-            # Deepcopy model before export to avoid modification to baseline model.
-            model, example_inputs = self.deepcopy_model_and_inputs_to_device(
-                model, example_inputs, self._determine_deepcopy_target_device()
-            )
-
-        example_args, example_kwargs = _normalize_bench_inputs(example_inputs)
-        options = torch.onnx.ExportOptions(dynamic_shapes=self._dynamic_shapes)
-        onnx_program = torch.onnx.dynamo_export(
-            model, *example_args, **example_kwargs, export_options=options
-        )
-        # Apply AOT inline post export.
-        # Requires onnx >= 1.15
-        import onnx
-        import onnx.inliner
-
-        # Workaround for inliner not supporting with models larger than 2GB.
-        # Save model to disk first separating out external data,
-        # and load back without external data for inliner to work on.
-        model_proto = onnx_program.model_proto
-        onnx.save_model(model_proto, output_path, save_as_external_data=True)
-        model_proto = onnx.load(output_path, load_external_data=False)
-        model_proto = onnx.inliner.inline_local_functions(model_proto)
-        onnx.save_model(model_proto, output_path)
-        return onnx_program
-
-
-class OnnxModelFromDynamoAotOptimize(OnnxModelFromDynamo):
-    """Dynamo and Fx based export, with AOT optimize post export. `torch.onnx.dynamo_export`."""
-
-    _COMPILER_NAME = "dynamo_aot_optimize"
-
-    def _export(
-        self, model, example_inputs, output_path: str
-    ) -> torch.onnx.ONNXProgram:
-        if self.copy_before_export:
-            # Deepcopy model before export to avoid modification to baseline model.
-            model, example_inputs = self.deepcopy_model_and_inputs_to_device(
-                model, example_inputs, self._determine_deepcopy_target_device()
-            )
-
-        example_args, example_kwargs = _normalize_bench_inputs(example_inputs)
-        options = torch.onnx.ExportOptions(dynamic_shapes=self._dynamic_shapes)
-        export_output = torch.onnx.dynamo_export(
-            model, *example_args, **example_kwargs, export_options=options
-        )
-
-        import onnx
-        from onnxscript.rewriter.onnxruntime import rewrite
-
-        model_proto = rewrite(export_output.model_proto)
-        onnx.save_model(
-            model_proto,
-            output_path,
-            save_as_external_data=True,
-            all_tensors_to_one_file=True,
-        )
-
-        return export_output
-
-
-class _OnnxPatch:
-    @classmethod
-    def patch_non_tensor_outputs(cls, correct_result, new_result, fp64_outputs):
-        """Patch non-tensor outputs to make them comparable with the correct result.
-
-        ONNX model always returns a flat tuple of tensors, but the PyTorch model outputs
-        `correct_result` and `fp64_outputs` can be arbitrary types. This function normalizes
-        the outputs to make them comparable with the ONNX model output.
-        """
-        try:
-            from transformers import modeling_outputs
-        except ImportError:
-            has_transformers = False
-        else:
-            has_transformers = True
-
-        if has_transformers and isinstance(
-            correct_result, modeling_outputs.ModelOutput
-        ):
-            correct_result = correct_result.to_tuple()
-            fp64_outputs = fp64_outputs.to_tuple() if fp64_outputs is not None else None
-        elif type(correct_result).__name__ in (
-            "MaskedLMOutput",
-            "Seq2SeqLMOutput",
-            "CausalLMOutputWithCrossAttentions",
-            "LongformerMaskedLMOutput",
-            "Instances",
-            "SquashedNormal",
-            "Boxes",
-            "Normal",
-            "TanhTransform",
-            "Foo",
-            "Variable",
-        ):
-            # Copied from `same` function in `torch._dynamo.utils`
-            correct_result = [
-                value
-                for key in correct_result.__dict__.keys()
-                if (value := getattr(correct_result, key)) is not None
-            ]
-            fp64_outputs = (
-                [
-                    value
-                    for key in fp64_outputs.__dict__.keys()
-                    if (value := getattr(fp64_outputs, key)) is not None
-                ]
-                if fp64_outputs is not None
-                else None
-            )
-
-        # Flatten nested tuple of tensors, i.e. past_key_values
-        correct_result = pytree.tree_leaves(correct_result)
-        # Hack to put results from different runs on same device.
-        # This is needed for ONNX CPU fallback benchmark, where PyTorch eager is run on GPU.
-        # Assuming outputs from a single run are always on same device!
-        devices = [x.device for x in correct_result if isinstance(x, torch.Tensor)]
-        assert devices and all(
-            x == devices[0] for x in devices
-        ), "All tensors must be on same device!"
-        device = devices[0]
-        new_result = pytree.tree_leaves(new_result)
-        new_result = pytree.tree_map(
-            lambda x: x.to(device=device) if isinstance(x, torch.Tensor) else x,
-            new_result,
-        )
-        fp64_outputs = pytree.tree_leaves(fp64_outputs)
-
-        return correct_result, new_result, fp64_outputs
-
-
-@dataclasses.dataclass
-class OnnxExportErrorRow:
-    device: str
-    model_name: str
-    batch_size: int
-    rule_id: Optional[str] = None
-    rule_name: Optional[str] = None
-    diagnostic_level: Optional[str] = None
-    diagnostic_message: Optional[str] = None
-    exception_type_name: Optional[str] = None
-    exception_message: Optional[str] = None
-
-    def __post_init__(self):
-        assert (
-            self.rule_id is not None
-            and self.rule_name is not None
-            and self.diagnostic_level is not None
-            and self.diagnostic_message is not None
-        ) or self.exception_type_name, (
-            "Either rule_id, rule_name, diagnostic_level and diagnostic_message "
-            "must be set or exception_type_name must be set"
-        )
-
-    @property
-    def headers(self) -> list[str]:
-        return [field.name for field in dataclasses.fields(self)]
-
-    @property
-    def row(self) -> list[str]:
-        return [getattr(self, field.name) for field in dataclasses.fields(self)]
-
-
-class OnnxExportErrorParser:
-    def __init__(self, device: str, model_name: str, batch_size: int):
-        self.device = device
-        self.model_name = model_name
-        self.batch_size = batch_size
-
-    def _qualified_exception_class_name(self, exception: Exception) -> str:
-        if exception.__class__.__module__ == "builtins":
-            return exception.__class__.__name__
-        return f"{exception.__class__.__module__}.{exception.__class__.__name__}"
-
-    def parse_diagnostic_context(
-        self,
-        diagnostic_context: diagnostics.DiagnosticContext,
-    ) -> Generator[OnnxExportErrorRow, Any, Any]:
-        from torch.onnx._internal.fx import diagnostics
-
-        for diagnostic in diagnostic_context.diagnostics:
-            if diagnostic.level >= diagnostics.levels.ERROR:
-                yield OnnxExportErrorRow(
-                    device=self.device,
-                    model_name=self.model_name,
-                    batch_size=self.batch_size,
-                    rule_id=diagnostic.rule.id,
-                    rule_name=diagnostic.rule.name,
-                    diagnostic_level=diagnostic.level.name,
-                    diagnostic_message=diagnostic.message,
-                )
-
-    def parse_exception(self, exception: Exception) -> OnnxExportErrorRow:
-        return OnnxExportErrorRow(
-            device=self.device,
-            model_name=self.model_name,
-            batch_size=self.batch_size,
-            exception_type_name=self._qualified_exception_class_name(exception),
-            exception_message=str(exception),
-        )
-
-
-@dataclasses.dataclass
-class OnnxContext:
-    onnx_model: Optional[OnnxModel] = None
-
-
-def optimize_onnx_ctx(
-    output_directory: str,
-    onnx_model_cls: type[OnnxModel],
-    run_n_iterations: Callable,
-    dynamic_shapes: bool = False,
-    copy_before_export: bool = False,
-    use_experimental_patch: bool = False,
-) -> Callable:
-    # NOTE(bowbao): This function creates and returns the onnx version of 'run_n_iterations',
-    # which does the following:
-    #   1. Export and cache model.
-    #   2. Create iobinding for ORT.
-    #   3. Run ORT for n iterations.
-    # The cached model is stored in 'context' under the returned callable.
-    context = OnnxContext()
-    test_data_dumped = False
-
-    def run_n_iterations_onnx(model, inputs, n=2):
-        from torch.onnx._internal import _exporter_legacy
-        from torch.onnx._internal.fx import diagnostics
-
-        # NOTE(bowbao): Capture all export & ort errors and diagnostics.
-        # Serialize to csv, to be parsed and summarized later by '._onnx/reporter.py'.
-        # TODO: Accuracy mismatch is not reported here in csv.
-        assert (
-            output_filename.find(".csv") > 0
-        ), f"expected output_filename to be a .csv, but got {output_filename}"
-        output_error_filename = output_filename[:-4] + "_export_error.csv"
-        parser = OnnxExportErrorParser(current_device, current_name, current_batch_size)
-        try:
-            nonlocal context
-            if context.onnx_model is None:
-                context.onnx_model = onnx_model_cls(
-                    output_directory,
-                    model,
-                    copy.deepcopy(inputs),
-                    dynamic_shapes=dynamic_shapes,
-                    copy_before_export=copy_before_export,
-                    use_experimental_patch=use_experimental_patch,
-                )
-            onnx_model = context.onnx_model
-
-            for _ in range(n):
-                nonlocal test_data_dumped
-                if not test_data_dumped:
-                    # Serializes inputs and outputs to .pb files for further offline analysis.
-                    # Due to this, this function is not and should not be used for perf measurement.
-                    outputs = onnx_model.run_and_serialize_inputs_outputs(inputs)
-                    test_data_dumped = True
-                else:
-                    outputs = onnx_model.run(inputs)
-            return outputs
-        except _exporter_legacy.OnnxExporterError as e:
-            # `torch.onnx.dynamo_export` raises error that encloses diagnostics.
-            diagnostic_context = e.onnx_program.diagnostic_context
-            for parsed_error in parser.parse_diagnostic_context(diagnostic_context):
-                write_outputs(
-                    output_error_filename, parsed_error.headers, parsed_error.row
-                )
-            if context.onnx_model is not None:
-                e.onnx_program.save_diagnostics(
-                    f"{context.onnx_model.model_dir}/"
-                    f"{current_onnx_compiler}_{current_name}_{current_device}.sarif"
-                )
-
-            # Check also the raw exception that caused export failure.
-            # Skip if it is already analyzed by diagnostics.
-            cause_of_exception = e.__cause__
-            if not isinstance(
-                cause_of_exception, diagnostics.RuntimeErrorWithDiagnostic
-            ):
-                parsed_error = parser.parse_exception(cause_of_exception)
-                write_outputs(
-                    output_error_filename, parsed_error.headers, parsed_error.row
-                )
-            raise
-        except Exception as e:
-            # `torch.onnx.export` errors.
-            # ORT errors.
-            parsed_error = parser.parse_exception(e)
-            write_outputs(output_error_filename, parsed_error.headers, parsed_error.row)
-            raise
-
-    run_n_iterations_onnx.context = context
-
-    return run_n_iterations_onnx
-
-
 def read_batch_size_from_file(args, filename, model_name):
     batch_size = None
     if os.path.exists("benchmarks"):
@@ -3095,26 +2245,6 @@ class BenchmarkRunner:
             if name in self.skip_accuracy_check_as_eager_non_deterministic:
                 return record_status("pass_due_to_skip", dynamo_start_stats=start_stats)
 
-            if (
-                current_onnx_compiler == "torchscript"
-                or current_onnx_compiler == "dynamo"
-            ):
-                # Workaround for ONNX for non-tensor outputs
-                (
-                    correct_result,
-                    new_result,
-                    fp64_outputs,
-                ) = _OnnxPatch.patch_non_tensor_outputs(
-                    correct_result, new_result, fp64_outputs
-                )
-                # Relax tolerance for ONNX cuda
-                if current_device == "cuda":
-                    tolerance = 1e-2
-
-                # TODO: store correct_result into the dumped file for offline onnx model validation.
-                # The downside and potential problem, is that the output formats may be different.
-                # E.g., the output order might not match, None might be part of output, etc.
-
             force_max_multiplier = False
             if (
                 self.args.freezing
@@ -3378,10 +2508,6 @@ class BenchmarkRunner:
                         dynamo_cache_lookup_latency
                     )
 
-            if experiment.func is speedup_experiment_onnx:
-                experiment = functools.partial(
-                    experiment, optimized_model_iter_fn.context.onnx_model
-                )
             backend_timings = experiment(
                 model, example_inputs, mark="expected", **experiment_kwargs
             )
@@ -3552,11 +2678,6 @@ class BenchmarkRunner:
                     f"{ok:3}/{total:3} +{frames_third_pass} frames {compilation_time:3.0f}s"
                 )
 
-            if experiment.func is speedup_experiment_onnx:
-                experiment = functools.partial(
-                    experiment, optimized_model_iter_fn.context.onnx_model
-                )
-
             if not hasattr(model, name):
                 model.name = name
             results.append(experiment(model, example_inputs, **experiment_kwargs))
@@ -4209,36 +3330,6 @@ def parse_args(args=None):
     group.add_argument(
         "--xla", action="store_true", help="Compare TorchXLA to eager PyTorch"
     )
-    group.add_argument(
-        "--torchscript-onnx",
-        "--torchscript_onnx",
-        action="store_true",
-        help="Measure speedup with TorchScript ONNX, i.e. `torch.onnx.export`",
-    )
-    group.add_argument(
-        "--torch-onnx-patch",
-        "--torch_onnx_patch",
-        action="store_true",
-        help="Measure speedup with dynamo ONNX patch, i.e. `torch_onnx`",
-    )
-    group.add_argument(
-        "--dynamo-onnx",
-        "--dynamo_onnx",
-        action="store_true",
-        help="Measure speedup with Dynamo ONNX, i.e. `torch.onnx.dynamo_export`",
-    )
-    group.add_argument(
-        "--dynamo-onnx-aot-inline",
-        "--dynamo_onnx_aot_inline",
-        action="store_true",
-        help="Measure speedup with Dynamo ONNX AOT Inline, i.e. `torch.onnx.dynamo_export`",
-    )
-    group.add_argument(
-        "--dynamo-onnx-aot-optimize",
-        "--dynamo_onnx_aot_optimize",
-        action="store_true",
-        help="Measure speedup with Dynamo ONNX w/ ort fusions, i.e. `torch.onnx.dynamo_export`",
-    )
     group.add_argument(
         "--backend",
         choices=torch._dynamo.list_backends(exclude_tags=None),
@@ -4591,8 +3682,7 @@ def run(runner, args, original_dir=None):
         current_settings, \
         output_filename, \
         disable_output, \
-        optimize_ctx, \
-        current_onnx_compiler
+        optimize_ctx
     optimize_ctx = contextlib.nullcontext()
 
     if args.disable_output:
@@ -4625,60 +3715,6 @@ def run(runner, args, original_dir=None):
         torch._dynamo.mark_dynamic = MagicMock()
         experiment = xla
         output_filename = "xla.csv"
-    elif args.torchscript_onnx:
-        optimize_ctx = functools.partial(
-            optimize_onnx_ctx,
-            args.output_directory or ".",
-            OnnxModelFromTorchScript,
-            copy_before_export=args.performance,  # Accuarcy bench already did deepcopy
-        )
-        experiment = speedup_experiment_onnx
-        output_filename = "torchscript_onnx.csv"
-        current_onnx_compiler = "torchscript"
-    elif args.torch_onnx_patch:
-        optimize_ctx = functools.partial(
-            optimize_onnx_ctx,
-            args.output_directory or ".",
-            OnnxModelFromTorchScript,
-            copy_before_export=args.performance,
-            use_experimental_patch=True,
-        )
-        experiment = speedup_experiment_onnx
-        output_filename = "torch_onnx_patch.csv"
-        current_onnx_compiler = "dynamo"
-    elif args.dynamo_onnx:
-        optimize_ctx = functools.partial(
-            optimize_onnx_ctx,
-            args.output_directory or ".",
-            OnnxModelFromDynamo,
-            dynamic_shapes=args.dynamic_shapes,
-            copy_before_export=args.performance,
-        )
-        experiment = speedup_experiment_onnx
-        output_filename = "dynamo_onnx.csv"
-        current_onnx_compiler = "dynamo"
-    elif args.dynamo_onnx_aot_inline:
-        optimize_ctx = functools.partial(
-            optimize_onnx_ctx,
-            args.output_directory or ".",
-            OnnxModelFromDynamoAotInline,
-            dynamic_shapes=args.dynamic_shapes,
-            copy_before_export=args.performance,
-        )
-        experiment = speedup_experiment_onnx
-        output_filename = "dynamo_onnx_aot_inline.csv"
-        current_onnx_compiler = "dynamo"
-    elif args.dynamo_onnx_aot_optimize:
-        optimize_ctx = functools.partial(
-            optimize_onnx_ctx,
-            args.output_directory or ".",
-            OnnxModelFromDynamoAotOptimize,
-            dynamic_shapes=args.dynamic_shapes,
-            copy_before_export=args.performance,
-        )
-        experiment = speedup_experiment_onnx
-        output_filename = "dynamo_onnx_aot_optimize.csv"
-        current_onnx_compiler = "dynamo"
     elif args.speedup_dynamo_ts:
         optimize_ctx = torch._dynamo.optimize("ts", nopython=args.nopython)
         experiment = speedup_experiment
diff --git a/benchmarks/dynamo/runner.py b/benchmarks/dynamo/runner.py
index 4d49b60ffa4..430815139c7 100755
--- a/benchmarks/dynamo/runner.py
+++ b/benchmarks/dynamo/runner.py
@@ -90,8 +90,6 @@ TABLE = {
         "inductor_max_autotune_no_cudagraphs": (
             "--inference -n50 --inductor --inductor-compile-mode max-autotune-no-cudagraphs --disable-cudagraphs "
         ),
-        "torchscript-onnx": "--inference -n5 --torchscript-onnx",
-        "dynamo-onnx": "--inference -n5 --dynamo-onnx",
     },
 }
 
diff --git a/scripts/onnx/test.sh b/scripts/onnx/test.sh
index f397cb63343..c3b9a2ae618 100755
--- a/scripts/onnx/test.sh
+++ b/scripts/onnx/test.sh
@@ -6,7 +6,6 @@ UNKNOWN=()
 
 # defaults
 PARALLEL=1
-export TORCH_ONNX_EXPERIMENTAL_RUNTIME_TYPE_CHECK=ERRORS
 
 while [[ $# -gt 0 ]]
 do
@@ -48,44 +47,6 @@ if [[ "$SHARD_NUMBER" == "2" ]]; then
   xdoctest torch.onnx --style=google --options="+IGNORE_WHITESPACE"
 fi
 
-if [[ "$SHARD_NUMBER" == "2" ]]; then
-  # Sanity check on torchbench w/ onnx
-  pip install pandas
-  log_folder="test/.torchbench_logs"
-  device="cpu"
-  modes=("accuracy" "performance")
-  compilers=("dynamo-onnx" "torchscript-onnx")
-  suites=("huggingface" "timm_models")
-
-  mkdir -p "${log_folder}"
-  for mode in "${modes[@]}"; do
-    for compiler in "${compilers[@]}"; do
-      for suite in "${suites[@]}"; do
-        output_file="${log_folder}/${compiler}_${suite}_float32_inference_${device}_${mode}.csv"
-        bench_file="benchmarks/dynamo/${suite}.py"
-        bench_args=("--${mode}" --float32 "-d${device}" "--output=${output_file}" "--output-directory=${top_dir}" --inference -n5 "--${compiler}" --no-skip --dashboard --batch-size 1)
-        # Run only selected model for each suite to quickly validate the benchmark suite works as expected.
-        case "$suite" in
-            "torchbench")
-                bench_args+=(-k resnet18)
-                ;;
-            "huggingface")
-                bench_args+=(-k ElectraForQuestionAnswering)
-                ;;
-            "timm_models")
-                bench_args+=(-k lcnet_050)
-                ;;
-            *)
-                echo "Unknown suite: ${suite}"
-                exit 1
-                ;;
-        esac
-        python "${top_dir}/${bench_file}" "${bench_args[@]}"
-      done
-    done
-  done
-fi
-
 # Our CI expects both coverage.xml and .coverage to be within test/
 if [ -d .coverage ]; then
   mv .coverage test/.coverage