From c7c8757a1c8f561e63ef32a92e04d6ca01340126 Mon Sep 17 00:00:00 2001
From: Justin Chu <justinchuby@users.noreply.github.com>
Date: Thu, 16 Jan 2025 11:14:15 -0800
Subject: [PATCH] Use ruff as the formatter to replace black-isort (#23397)

Use ruff as the code formatter in place of black and isort since it is
much faster, and as projects like PyTorch and ONNX have adopted ruff
format as well.

This PR include only auto-fixed changes in formatting.
---
 .lintrunner.toml                              |  4 +-
 docs/python/_common/onnx_sphinx.py            |  1 +
 docs/python/examples/plot_backend.py          |  1 +
 docs/python/examples/plot_common_errors.py    |  1 +
 .../plot_convert_pipeline_vectorizer.py       |  1 +
 docs/python/examples/plot_profiling.py        |  1 +
 onnxruntime/__init__.py                       | 71 ++++++++++---------
 onnxruntime/python/backend/backend.py         |  1 +
 onnxruntime/python/backend/backend_rep.py     |  1 +
 onnxruntime/python/datasets/__init__.py       |  1 +
 .../onnxruntime_inference_collection.py       |  5 +-
 onnxruntime/python/onnxruntime_validation.py  |  1 +
 .../profile_explorer/profile_explorer.py      |  2 +-
 .../tools/pytorch_export_contrib_ops.py       |  1 +
 .../python/tools/qnn/add_trans_cast.py        |  6 +-
 .../tools/qnn/gen_qnn_ctx_onnx_model.py       | 12 ++--
 .../python/tools/quantization/__init__.py     | 16 +++--
 .../tools/quantization/base_quantizer.py      | 36 +++++-----
 .../python/tools/quantization/calibrate.py    | 18 ++---
 .../quantization/matmul_4bits_quantizer.py    |  8 +--
 .../tools/quantization/operators/conv.py      |  4 +-
 .../tools/quantization/operators/gemm.py      | 12 +++-
 .../tools/quantization/operators/lstm.py      |  8 ++-
 .../tools/quantization/qdq_quantizer.py       |  6 +-
 .../python/tools/quantization/quant_utils.py  | 11 ++-
 .../python/tools/symbolic_shape_infer.py      | 12 ++--
 .../python/tools/transformers/benchmark.py    | 54 +++++++-------
 .../tools/transformers/benchmark_helper.py    |  6 +-
 .../python/tools/transformers/float16.py      |  6 +-
 .../transformers/fusion_attention_unet.py     |  8 ++-
 .../transformers/large_model_exporter.py      | 11 +--
 .../transformers/models/gpt2/gpt2_helper.py   |  4 +-
 .../transformers/models/llama/benchmark.py    |  6 +-
 .../models/llama/benchmark_e2e.py             | 14 ++--
 .../models/llama/convert_to_onnx.py           |  3 +-
 .../models/longformer/benchmark_longformer.py |  6 +-
 .../transformers/models/sam2/image_decoder.py |  1 -
 .../transformers/models/sam2/sam2_demo.py     |  2 +-
 .../models/sam2/sam2_image_onnx_predictor.py  |  6 +-
 .../models/stable_diffusion/benchmark.py      |  6 +-
 .../stable_diffusion/diffusion_schedulers.py  |  3 +-
 .../pipeline_stable_diffusion.py              |  2 +-
 .../python/tools/transformers/onnx_model.py   |  2 +-
 .../tools/transformers/quantize_helper.py     |  4 +-
 .../cpu/tensor/affine_grid_test_gen.py        |  8 +--
 .../cpu/tensor/grid_sample_test_gen.py        |  6 +-
 .../test/python/onnxruntime_test_float8.py    |  9 +--
 .../python/onnxruntime_test_float8_gemm8.py   | 40 +++++------
 .../onnxruntime_test_python_iobinding.py      |  1 -
 .../python/quantization/test_conv_dynamic.py  | 10 ++-
 .../python/quantization/test_op_pooling.py    |  9 ++-
 .../test/python/quantization/test_qdq.py      | 12 ++--
 .../test_tensor_quant_overrides_option.py     |  4 +-
 .../test/python/transformers/benchmark_gqa.py |  1 +
 .../transformers/conformer_model_generator.py |  4 +-
 .../python/transformers/parity_utilities.py   |  6 +-
 .../test/python/transformers/test_mha.py      | 13 ++--
 .../python/transformers/test_parity_t5_mha.py | 12 ++--
 .../transformers/test_rotary_mha_fusion.py    |  8 +--
 .../transformers/test_sparse_attention.py     |  1 +
 .../transformers/whisper_model_generator.py   |  4 +-
 .../test/testdata/dummy_t5_model_generator.py |  2 +-
 .../testdata/sparse_initializer_as_output.py  | 18 +++--
 .../test/testdata/sparse_to_dense_matmul.py   | 18 +++--
 .../adamw_test/adamw_test_data_generator.py   |  2 +-
 .../lr_scheduler_test_data_generator.py       |  4 +-
 .../sgd_test/sgd_test_data_generator.py       |  2 +-
 .../ort_format/prepare_artifacts.py           |  2 +-
 .../transform/convert_qdq_ops_to_ms_domain.py |  1 +
 .../recompute_test_graph_generator.py         |  8 +--
 .../python/training/optim/__init__.py         | 12 ++--
 .../python/training/optim/config.py           |  6 +-
 .../python/training/optim/lr_scheduler.py     |  6 +-
 .../python/training/ort_triton/__init__.py    |  8 ++-
 .../python/training/ort_triton/_codegen.py    | 13 ++--
 .../training/ort_triton/kernel/_flash_attn.py |  4 +-
 .../_custom_autograd_function_exporter.py     |  1 -
 .../python/training/ortmodule/_fallback.py    |  2 +-
 .../ortmodule/_graph_transition_manager.py    |  7 +-
 .../training/ortmodule/_runtime_inspector.py  |  1 -
 .../ortmodule/_zero_stage3_compatibility.py   |  8 +--
 .../json_config/_load_config_from_json.py     | 30 ++++----
 .../utils/hooks/_statistics_subscriber.py     |  4 +-
 .../python/training/utils/torch_io_helper.py  |  6 +-
 .../orttraining/test/python/_test_helpers.py  |  7 +-
 .../test/python/orttraining_test_dort.py      |  6 +-
 .../test/python/orttraining_test_gru.py       |  4 +-
 .../test/python/orttraining_test_lstm.py      | 27 ++++---
 .../orttraining_test_ort_apis_onnxblock.py    |  1 -
 .../python/orttraining_test_ortmodule_api.py  |  5 +-
 .../orttraining_test_ortmodule_autograd.py    | 25 +++----
 ...training_test_ortmodule_bert_classifier.py |  6 +-
 ...test_ortmodule_bert_classifier_autocast.py | 15 ++--
 ...t_ortmodule_deepspeed_pipeline_parallel.py |  5 +-
 .../orttraining_test_ortmodule_onnx_ops.py    |  4 +-
 .../test/python/qat_poc_example/train.py      |  4 +-
 .../tools/ci_test/run_batch_size_test.py      |  2 +-
 .../tools/ci_test/run_bert_perf_test.py       |  4 +-
 .../tools/ci_test/run_gpt2_perf_test.py       |  2 +-
 .../tools/scripts/nv_run_pretraining.py       |  1 -
 pyproject.toml                                | 43 +----------
 requirements-lintrunner.txt                   |  3 -
 tools/ci_build/build.py                       |  7 +-
 tools/ci_build/compile_triton.py              | 12 ++--
 .../github/android/build_aar_package.py       |  5 +-
 .../apple/build_and_assemble_apple_pods.py    |  3 +-
 .../github/apple/package_release_tasks.py     |  3 +-
 tools/python/dump_ort_model.py                |  2 +-
 tools/python/gen_contrib_doc.py               | 10 +--
 tools/python/sparsify_initializers.py         |  8 +--
 .../util/mobile_helpers/usability_checker.py  | 15 ++--
 tools/python/util/onnx_model_utils.py         |  4 +-
 .../python/util/ort_format_model/__init__.py  |  8 ++-
 tools/python/util/ort_format_model/types.py   |  1 +
 114 files changed, 476 insertions(+), 464 deletions(-)

diff --git a/.lintrunner.toml b/.lintrunner.toml
index 5ef9ad9337..74744277fa 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -61,7 +61,7 @@ is_formatter = true
 
 
 [[linter]]
-code = 'BLACK-ISORT'
+code = 'RUFF-FORMAT'
 include_patterns = [
     '**/*.py',
 ]
@@ -76,7 +76,7 @@ command = [
     '-m',
     'lintrunner_adapters',
     'run',
-    'black_isort_linter',
+    'ruff_format_linter',
     '--',
     '@{{PATHSFILE}}'
 ]
diff --git a/docs/python/_common/onnx_sphinx.py b/docs/python/_common/onnx_sphinx.py
index 7562d23289..926a2b1d84 100644
--- a/docs/python/_common/onnx_sphinx.py
+++ b/docs/python/_common/onnx_sphinx.py
@@ -2,6 +2,7 @@
 """
 Automates the generation of ONNX operators.
 """
+
 import importlib
 import inspect
 import keyword
diff --git a/docs/python/examples/plot_backend.py b/docs/python/examples/plot_backend.py
index 58fb4cd84f..65b5fd0cf7 100644
--- a/docs/python/examples/plot_backend.py
+++ b/docs/python/examples/plot_backend.py
@@ -14,6 +14,7 @@ to run predictions using this runtime.
 Let's use the API to compute the prediction
 of a simple logistic regression model.
 """
+
 import numpy as np
 from onnx import load
 
diff --git a/docs/python/examples/plot_common_errors.py b/docs/python/examples/plot_common_errors.py
index dc7078831a..85cfbf6b97 100644
--- a/docs/python/examples/plot_common_errors.py
+++ b/docs/python/examples/plot_common_errors.py
@@ -15,6 +15,7 @@ It starts by loading the model trained in example
 trained on *Iris* datasets. The model takes
 a vector of dimension 2 and returns a class among three.
 """
+
 import numpy
 
 import onnxruntime as rt
diff --git a/docs/python/examples/plot_convert_pipeline_vectorizer.py b/docs/python/examples/plot_convert_pipeline_vectorizer.py
index 06e9e8d29e..2215cb73ee 100644
--- a/docs/python/examples/plot_convert_pipeline_vectorizer.py
+++ b/docs/python/examples/plot_convert_pipeline_vectorizer.py
@@ -16,6 +16,7 @@ Train a pipeline
 
 The first step consists in creating a dummy datasets.
 """
+
 import pandas
 from sklearn.datasets import make_regression
 from sklearn.model_selection import train_test_split
diff --git a/docs/python/examples/plot_profiling.py b/docs/python/examples/plot_profiling.py
index d35ef72556..6e575ec9eb 100644
--- a/docs/python/examples/plot_profiling.py
+++ b/docs/python/examples/plot_profiling.py
@@ -11,6 +11,7 @@ Profile the execution of a simple model
 *ONNX Runtime* can profile the execution of the model.
 This example shows how to interpret the results.
 """
+
 import numpy
 import onnx
 
diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py
index 9d533af616..c874df8153 100644
--- a/onnxruntime/__init__.py
+++ b/onnxruntime/__init__.py
@@ -7,6 +7,7 @@ ONNX Runtime is a performance-focused scoring engine for Open Neural Network Exc
 For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://aka.ms/onnxruntime/>`_
 or the `Github project <https://github.com/microsoft/onnxruntime/>`_.
 """
+
 __version__ = "1.21.0"
 __author__ = "Microsoft"
 
@@ -20,33 +21,35 @@ __author__ = "Microsoft"
 # meaningful messages to the user.
 # the saved exception is raised after device version validation.
 try:
-    from onnxruntime.capi._pybind_state import ExecutionMode  # noqa: F401
-    from onnxruntime.capi._pybind_state import ExecutionOrder  # noqa: F401
-    from onnxruntime.capi._pybind_state import GraphOptimizationLevel  # noqa: F401
-    from onnxruntime.capi._pybind_state import LoraAdapter  # noqa: F401
-    from onnxruntime.capi._pybind_state import ModelMetadata  # noqa: F401
-    from onnxruntime.capi._pybind_state import NodeArg  # noqa: F401
-    from onnxruntime.capi._pybind_state import OrtAllocatorType  # noqa: F401
-    from onnxruntime.capi._pybind_state import OrtArenaCfg  # noqa: F401
-    from onnxruntime.capi._pybind_state import OrtMemoryInfo  # noqa: F401
-    from onnxruntime.capi._pybind_state import OrtMemType  # noqa: F401
-    from onnxruntime.capi._pybind_state import OrtSparseFormat  # noqa: F401
-    from onnxruntime.capi._pybind_state import RunOptions  # noqa: F401
-    from onnxruntime.capi._pybind_state import SessionIOBinding  # noqa: F401
-    from onnxruntime.capi._pybind_state import SessionOptions  # noqa: F401
-    from onnxruntime.capi._pybind_state import create_and_register_allocator  # noqa: F401
-    from onnxruntime.capi._pybind_state import create_and_register_allocator_v2  # noqa: F401
-    from onnxruntime.capi._pybind_state import disable_telemetry_events  # noqa: F401
-    from onnxruntime.capi._pybind_state import enable_telemetry_events  # noqa: F401
-    from onnxruntime.capi._pybind_state import get_all_providers  # noqa: F401
-    from onnxruntime.capi._pybind_state import get_available_providers  # noqa: F401
-    from onnxruntime.capi._pybind_state import get_build_info  # noqa: F401
-    from onnxruntime.capi._pybind_state import get_device  # noqa: F401
-    from onnxruntime.capi._pybind_state import get_version_string  # noqa: F401
-    from onnxruntime.capi._pybind_state import has_collective_ops  # noqa: F401
-    from onnxruntime.capi._pybind_state import set_default_logger_severity  # noqa: F401
-    from onnxruntime.capi._pybind_state import set_default_logger_verbosity  # noqa: F401
-    from onnxruntime.capi._pybind_state import set_seed  # noqa: F401
+    from onnxruntime.capi._pybind_state import (
+        ExecutionMode,  # noqa: F401
+        ExecutionOrder,  # noqa: F401
+        GraphOptimizationLevel,  # noqa: F401
+        LoraAdapter,  # noqa: F401
+        ModelMetadata,  # noqa: F401
+        NodeArg,  # noqa: F401
+        OrtAllocatorType,  # noqa: F401
+        OrtArenaCfg,  # noqa: F401
+        OrtMemoryInfo,  # noqa: F401
+        OrtMemType,  # noqa: F401
+        OrtSparseFormat,  # noqa: F401
+        RunOptions,  # noqa: F401
+        SessionIOBinding,  # noqa: F401
+        SessionOptions,  # noqa: F401
+        create_and_register_allocator,  # noqa: F401
+        create_and_register_allocator_v2,  # noqa: F401
+        disable_telemetry_events,  # noqa: F401
+        enable_telemetry_events,  # noqa: F401
+        get_all_providers,  # noqa: F401
+        get_available_providers,  # noqa: F401
+        get_build_info,  # noqa: F401
+        get_device,  # noqa: F401
+        get_version_string,  # noqa: F401
+        has_collective_ops,  # noqa: F401
+        set_default_logger_severity,  # noqa: F401
+        set_default_logger_verbosity,  # noqa: F401
+        set_seed,  # noqa: F401
+    )
 
     import_capi_exception = None
 except Exception as e:
@@ -57,12 +60,14 @@ from onnxruntime.capi import onnxruntime_validation
 if import_capi_exception:
     raise import_capi_exception
 
-from onnxruntime.capi.onnxruntime_inference_collection import AdapterFormat  # noqa: F401
-from onnxruntime.capi.onnxruntime_inference_collection import InferenceSession  # noqa: F401
-from onnxruntime.capi.onnxruntime_inference_collection import IOBinding  # noqa: F401
-from onnxruntime.capi.onnxruntime_inference_collection import OrtDevice  # noqa: F401
-from onnxruntime.capi.onnxruntime_inference_collection import OrtValue  # noqa: F401
-from onnxruntime.capi.onnxruntime_inference_collection import SparseTensor  # noqa: F401
+from onnxruntime.capi.onnxruntime_inference_collection import (
+    AdapterFormat,  # noqa: F401
+    InferenceSession,  # noqa: F401
+    IOBinding,  # noqa: F401
+    OrtDevice,  # noqa: F401
+    OrtValue,  # noqa: F401
+    SparseTensor,  # noqa: F401
+)
 
 # TODO: thiagofc: Temporary experimental namespace for new PyTorch front-end
 try:  # noqa: SIM105
diff --git a/onnxruntime/python/backend/backend.py b/onnxruntime/python/backend/backend.py
index 67423fe9b5..19f46189e2 100644
--- a/onnxruntime/python/backend/backend.py
+++ b/onnxruntime/python/backend/backend.py
@@ -5,6 +5,7 @@
 """
 Implements ONNX's backend API.
 """
+
 import os
 import unittest
 
diff --git a/onnxruntime/python/backend/backend_rep.py b/onnxruntime/python/backend/backend_rep.py
index c4dddaaba1..af785b71c5 100644
--- a/onnxruntime/python/backend/backend_rep.py
+++ b/onnxruntime/python/backend/backend_rep.py
@@ -5,6 +5,7 @@
 """
 Implements ONNX's backend API.
 """
+
 from typing import Any, Tuple  # noqa: F401
 
 from onnx.backend.base import BackendRep
diff --git a/onnxruntime/python/datasets/__init__.py b/onnxruntime/python/datasets/__init__.py
index ba64aa8a6e..1a04b37698 100644
--- a/onnxruntime/python/datasets/__init__.py
+++ b/onnxruntime/python/datasets/__init__.py
@@ -3,6 +3,7 @@
 """
 Short examples used in the documentation.
 """
+
 import os
 
 
diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py
index d05fba1928..c12efc7fdf 100644
--- a/onnxruntime/python/onnxruntime_inference_collection.py
+++ b/onnxruntime/python/onnxruntime_inference_collection.py
@@ -115,8 +115,9 @@ def check_and_normalize_provider_args(
     def set_provider_options(name, options):
         if name not in available_provider_names:
             warnings.warn(
-                "Specified provider '{}' is not in available provider names."
-                "Available providers: '{}'".format(name, ", ".join(available_provider_names))
+                "Specified provider '{}' is not in available provider names.Available providers: '{}'".format(
+                    name, ", ".join(available_provider_names)
+                )
             )
 
         if name in provider_name_to_options:
diff --git a/onnxruntime/python/onnxruntime_validation.py b/onnxruntime/python/onnxruntime_validation.py
index 4f29c7f424..09ce886c8f 100644
--- a/onnxruntime/python/onnxruntime_validation.py
+++ b/onnxruntime/python/onnxruntime_validation.py
@@ -5,6 +5,7 @@
 """
 Check OS requirements for ONNX Runtime Python Bindings.
 """
+
 import linecache
 import platform
 import warnings
diff --git a/onnxruntime/python/tools/profile_explorer/profile_explorer.py b/onnxruntime/python/tools/profile_explorer/profile_explorer.py
index 6e07478839..3c3b8c90f4 100644
--- a/onnxruntime/python/tools/profile_explorer/profile_explorer.py
+++ b/onnxruntime/python/tools/profile_explorer/profile_explorer.py
@@ -86,7 +86,7 @@ def _shape_to_string(shape):
         value = next(iter(dict_obj.values()))
         if len(res) != 0:
             res += ","
-        res += f'{key}({"x".join(str(v) for v in value)})'
+        res += f"{key}({'x'.join(str(v) for v in value)})"
     return res
 
 
diff --git a/onnxruntime/python/tools/pytorch_export_contrib_ops.py b/onnxruntime/python/tools/pytorch_export_contrib_ops.py
index d8cf3c1304..f3cd4c2c89 100644
--- a/onnxruntime/python/tools/pytorch_export_contrib_ops.py
+++ b/onnxruntime/python/tools/pytorch_export_contrib_ops.py
@@ -5,6 +5,7 @@
 Support for registering ONNX Runtime's built-in contrib ops with
 PyTorch-ONNX exporter (torch.onnx.export).
 """
+
 import typing
 
 try:
diff --git a/onnxruntime/python/tools/qnn/add_trans_cast.py b/onnxruntime/python/tools/qnn/add_trans_cast.py
index ced3e3519a..edeaa6b4e2 100644
--- a/onnxruntime/python/tools/qnn/add_trans_cast.py
+++ b/onnxruntime/python/tools/qnn/add_trans_cast.py
@@ -126,9 +126,9 @@ def parse_qnn_json_file(qnn_json_file_path, qnn_input_output_tensor_dic):
                 qnn_tensor.dim = qnn_tensor_attribute["dims"]
                 qnn_input_output_tensor_dic[qnn_tensor_name] = qnn_tensor
 
-    assert (
-        len(qnn_input_output_tensor_dic) > 1
-    ), "Converted QNN model not valid. It should have at least 1 input & 1 output."
+    assert len(qnn_input_output_tensor_dic) > 1, (
+        "Converted QNN model not valid. It should have at least 1 input & 1 output."
+    )
 
 
 def compare_onnx_shape_with_qnn_shape(onnx_dims, qnn_dims):
diff --git a/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py b/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py
index b7d32fd6b2..7a3e364a08 100644
--- a/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py
+++ b/onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py
@@ -150,9 +150,9 @@ def parse_qnn_converter_json_file(qnn_convert_json, qnn_input_tensor_dic, qnn_ou
                 qnn_tensor.offset = 0 - qnn_tensor_attribute["quant_params"]["scale_offset"]["offset"]
             qnn_output_tensor_dic[qnn_tensor_name] = qnn_tensor
 
-    assert (
-        len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1
-    ), "Converted QNN model not valid. It should have at least 1 input & 1 output."
+    assert len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1, (
+        "Converted QNN model not valid. It should have at least 1 input & 1 output."
+    )
 
 
 def generate_wrapper_onnx_file(
@@ -286,9 +286,9 @@ def parse_qnn_graph(qnn_graph, qnn_input_tensor_dic, qnn_output_tensor_dic):
             qnn_tensor.offset = 0 - tensor_info["quantizeParams"]["scaleOffset"]["offset"]
         qnn_output_tensor_dic[qnn_tensor.name] = qnn_tensor
 
-    assert (
-        len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1
-    ), "Converted QNN model not valid. It should have at least 1 input & 1 output."
+    assert len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1, (
+        "Converted QNN model not valid. It should have at least 1 input & 1 output."
+    )
 
     return graph_name
 
diff --git a/onnxruntime/python/tools/quantization/__init__.py b/onnxruntime/python/tools/quantization/__init__.py
index 712e15a6a1..ac99de348f 100644
--- a/onnxruntime/python/tools/quantization/__init__.py
+++ b/onnxruntime/python/tools/quantization/__init__.py
@@ -7,11 +7,13 @@ from .calibrate import (  # noqa: F401
 )
 from .qdq_quantizer import QDQQuantizer  # noqa: F401
 from .quant_utils import QuantFormat, QuantType, write_calibration_table  # noqa: F401
-from .quantize import DynamicQuantConfig  # noqa: F401
-from .quantize import QuantizationMode  # noqa: F401
-from .quantize import StaticQuantConfig  # noqa: F401
-from .quantize import get_qdq_config  # noqa: F401
-from .quantize import quantize  # noqa: F401
-from .quantize import quantize_dynamic  # noqa: F401
-from .quantize import quantize_static  # noqa: F401
+from .quantize import (
+    DynamicQuantConfig,  # noqa: F401
+    QuantizationMode,  # noqa: F401
+    StaticQuantConfig,  # noqa: F401
+    get_qdq_config,  # noqa: F401
+    quantize,  # noqa: F401
+    quantize_dynamic,  # noqa: F401
+    quantize_static,  # noqa: F401
+)
 from .shape_inference import quant_pre_process  # noqa: F401
diff --git a/onnxruntime/python/tools/quantization/base_quantizer.py b/onnxruntime/python/tools/quantization/base_quantizer.py
index 6235db3234..0cd186bffd 100644
--- a/onnxruntime/python/tools/quantization/base_quantizer.py
+++ b/onnxruntime/python/tools/quantization/base_quantizer.py
@@ -331,9 +331,9 @@ class BaseQuantizer:
             scale = np.array(quant_overrides["scale"])
             q_weight_data = quantize_nparray(qType, weight_data.flatten(), scale, zero_point)
             assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}"
-            assert (
-                zero_point.dtype != np.float32 and zero_point.dtype != np.float16
-            ), f"Unexpected dtype {zero_point.dtype}"
+            assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, (
+                f"Unexpected dtype {zero_point.dtype}"
+            )
             assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}"
 
         else:
@@ -349,9 +349,9 @@ class BaseQuantizer:
             )
 
             assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}"
-            assert (
-                zero_point.dtype != np.float32 and zero_point.dtype != np.float16
-            ), f"Unexpected dtype {zero_point.dtype}"
+            assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, (
+                f"Unexpected dtype {zero_point.dtype}"
+            )
             assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}"
 
         scale_dtype = weight.data_type
@@ -465,13 +465,13 @@ class BaseQuantizer:
                     weight_qType, per_channel_data.flatten(), scale, zero_point
                 )
                 assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}"
-                assert (
-                    zero_point.dtype != np.float32 and zero_point.dtype != np.float16
-                ), f"Unexpected dtype {zero_point.dtype}"
+                assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, (
+                    f"Unexpected dtype {zero_point.dtype}"
+                )
                 assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}"
-                assert isinstance(
-                    quantized_per_channel_data, np.ndarray
-                ), f"Unexpected type {type(quantized_per_channel_data)}"
+                assert isinstance(quantized_per_channel_data, np.ndarray), (
+                    f"Unexpected type {type(quantized_per_channel_data)}"
+                )
 
             else:
                 zero_point, scale, quantized_per_channel_data = quantize_data(
@@ -485,13 +485,13 @@ class BaseQuantizer:
                 )
 
                 assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}"
-                assert (
-                    zero_point.dtype != np.float32 and zero_point.dtype != np.float16
-                ), f"Unexpected dtype {zero_point.dtype}"
+                assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, (
+                    f"Unexpected dtype {zero_point.dtype}"
+                )
                 assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}"
-                assert isinstance(
-                    quantized_per_channel_data, np.ndarray
-                ), f"Unexpected type {type(quantized_per_channel_data)}"
+                assert isinstance(quantized_per_channel_data, np.ndarray), (
+                    f"Unexpected type {type(quantized_per_channel_data)}"
+                )
 
             zero_point_list.append(zero_point)
             scale_list.append(scale)
diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py
index 4bbb63fef3..7855f260a5 100644
--- a/onnxruntime/python/tools/quantization/calibrate.py
+++ b/onnxruntime/python/tools/quantization/calibrate.py
@@ -820,9 +820,9 @@ class HistogramCollector(CalibrationDataCollector):
                 for arr in data_arr:
                     assert isinstance(arr, np.ndarray), f"Unexpected type {type(arr)} for tensor={tensor!r}"
                 dtypes = set(a.dtype for a in data_arr)
-                assert (
-                    len(dtypes) == 1
-                ), f"The calibration expects only one element type but got {dtypes} for tensor={tensor!r}"
+                assert len(dtypes) == 1, (
+                    f"The calibration expects only one element type but got {dtypes} for tensor={tensor!r}"
+                )
                 data_arr_np = np.asarray(data_arr)
             elif not isinstance(data_arr, np.ndarray):
                 raise ValueError(f"Unexpected type {type(data_arr)} for tensor={tensor!r}")
@@ -842,9 +842,9 @@ class HistogramCollector(CalibrationDataCollector):
                 # first time it uses num_bins to compute histogram.
                 hist, hist_edges = np.histogram(data_arr_np, bins=self.num_bins)
                 hist_edges = hist_edges.astype(data_arr_np.dtype)
-                assert (
-                    data_arr_np.dtype != np.float64
-                ), "only float32 or float16 is supported, every constant must be explicitly typed"
+                assert data_arr_np.dtype != np.float64, (
+                    "only float32 or float16 is supported, every constant must be explicitly typed"
+                )
                 self.histogram_dict[tensor] = (hist, hist_edges, min_value, max_value)
             else:
                 old_histogram = self.histogram_dict[tensor]
@@ -864,9 +864,9 @@ class HistogramCollector(CalibrationDataCollector):
                 hist, hist_edges = np.histogram(data_arr_np, bins=old_hist_edges)
                 hist_edges = hist_edges.astype(data_arr_np.dtype)
                 hist[: len(old_hist)] += old_hist
-                assert (
-                    data_arr_np.dtype != np.float64
-                ), "only float32 or float16 is supported, every constant must be explicitly typed"
+                assert data_arr_np.dtype != np.float64, (
+                    "only float32 or float16 is supported, every constant must be explicitly typed"
+                )
                 self.histogram_dict[tensor] = (hist, hist_edges, min(old_min, min_value), max(old_max, max_value))
 
     def collect_value(self, name_to_arr):
diff --git a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py
index 1d91141a11..4cf9adcd32 100644
--- a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py
+++ b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py
@@ -1259,7 +1259,6 @@ class MatMul4BitsQuantizer:
             self._process_subgraph(graph_stack)
             self.model.clean_initializers()
         elif self.algo_config.algorithm == "nvidia_awq":
-
             # Handle nvidia_awq quantization
             logger.info("Processing nvidia_awq quantization...")
             self.model = self.node_quantizer.quantize_awq(
@@ -1280,9 +1279,9 @@ class MatMul4BitsQuantizer:
 
             import neural_compressor
 
-            assert version.parse(neural_compressor.__version__) >= version.parse(
-                "2.3.2"
-            ), "Require neural-compressor >= 2.3.2 to support weight only quantization!"
+            assert version.parse(neural_compressor.__version__) >= version.parse("2.3.2"), (
+                "Require neural-compressor >= 2.3.2 to support weight only quantization!"
+            )
 
             self.int4_quant_algo()
 
@@ -1446,7 +1445,6 @@ if __name__ == "__main__":
     elif args.quant_method == "gptq":
         quant_config = GPTQWeightOnlyQuantConfig(block_size=args.block_size, op_types_to_quantize=op_types_to_quantize)
     elif args.quant_method == "nvidia_awq":
-
         if quant_format == QuantFormat.QOperator:
             logger.warning("QOperator is not applicable to nvidia_awq. overriding the value to QDQ")
             quant_format = QuantFormat.QDQ
diff --git a/onnxruntime/python/tools/quantization/operators/conv.py b/onnxruntime/python/tools/quantization/operators/conv.py
index 922884a5f6..7c5248f90f 100644
--- a/onnxruntime/python/tools/quantization/operators/conv.py
+++ b/onnxruntime/python/tools/quantization/operators/conv.py
@@ -158,7 +158,9 @@ class QLinearConv(QuantOperatorBase):
                 nodes,
             ) = self.quantizer.quantize_activation(node, [0])
             quant_weight_tuple = self.quantizer.quantize_weight_per_channel(
-                node.input[1], onnx_proto.TensorProto.INT8, 0  # self.quantizer.weight_qType?
+                node.input[1],
+                onnx_proto.TensorProto.INT8,
+                0,  # self.quantizer.weight_qType?
             )
             quantized_input_names.append(quant_weight_tuple[0])
             zero_point_names.append(quant_weight_tuple[1])
diff --git a/onnxruntime/python/tools/quantization/operators/gemm.py b/onnxruntime/python/tools/quantization/operators/gemm.py
index 5d7bf6e2cd..6b8a389824 100644
--- a/onnxruntime/python/tools/quantization/operators/gemm.py
+++ b/onnxruntime/python/tools/quantization/operators/gemm.py
@@ -3,9 +3,15 @@ import logging
 import numpy as np  # noqa: F401
 import onnx
 
-from ..quant_utils import find_by_name  # noqa: F401
-from ..quant_utils import get_mul_node  # noqa: F401
-from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain
+from ..quant_utils import (
+    TENSOR_NAME_QUANT_SUFFIX,
+    QuantizedValue,
+    QuantizedValueType,
+    attribute_to_kwarg,
+    find_by_name,  # noqa: F401
+    get_mul_node,  # noqa: F401
+    ms_domain,
+)
 from .base_operator import QuantOperatorBase  # noqa: F401
 from .matmul import QOpMatMul
 from .qdq_base_operator import QDQOperatorBase
diff --git a/onnxruntime/python/tools/quantization/operators/lstm.py b/onnxruntime/python/tools/quantization/operators/lstm.py
index 3ad3147cb8..3a0c94aca6 100644
--- a/onnxruntime/python/tools/quantization/operators/lstm.py
+++ b/onnxruntime/python/tools/quantization/operators/lstm.py
@@ -47,10 +47,14 @@ class LSTMQuant(QuantOperatorBase):
             R.dims[0] = R_num_dir * R_4_hidden_size
 
         quant_input_weight_tuple = self.quantizer.quantize_weight_per_channel(
-            node.input[1], onnx_proto.TensorProto.INT8, 0  # self.quantizer.weight_qType?
+            node.input[1],
+            onnx_proto.TensorProto.INT8,
+            0,  # self.quantizer.weight_qType?
         )
         quant_recurrent_weight_tuple = self.quantizer.quantize_weight_per_channel(
-            node.input[2], onnx_proto.TensorProto.INT8, 0  # self.quantizer.weight_qType?
+            node.input[2],
+            onnx_proto.TensorProto.INT8,
+            0,  # self.quantizer.weight_qType?
         )
 
         W_quant_weight = model.get_initializer(quant_input_weight_tuple[0])  # noqa: N806
diff --git a/onnxruntime/python/tools/quantization/qdq_quantizer.py b/onnxruntime/python/tools/quantization/qdq_quantizer.py
index 5552a4451c..1eed87ba53 100644
--- a/onnxruntime/python/tools/quantization/qdq_quantizer.py
+++ b/onnxruntime/python/tools/quantization/qdq_quantizer.py
@@ -1253,9 +1253,9 @@ class QDQQuantizer(BaseQuantizer):
         scale = quant_params["scale"]
         zero_point_type = quant_params["quant_type"]
         axis: int | None = quant_params.get("axis")
-        assert (axis is not None and len(scale.shape) == 1) or (
-            axis is None and len(scale.shape) == 0
-        ), "Wrong scale/zp shapes"
+        assert (axis is not None and len(scale.shape) == 1) or (axis is None and len(scale.shape) == 0), (
+            "Wrong scale/zp shapes"
+        )
         assert len(scale.shape) == len(zero_point.shape), "Scale and zero-point must have the same rank"
 
         zero_point_name = param_name + "_zero_point" + init_name_suffix
diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py
index df53aafeaf..7dd8a7cafc 100644
--- a/onnxruntime/python/tools/quantization/quant_utils.py
+++ b/onnxruntime/python/tools/quantization/quant_utils.py
@@ -197,9 +197,9 @@ def _check_type(*args, zero_point_index=-1):
 
 
 def quantize_nparray(qType, arr, scale, zero_point, low=None, high=None):
-    assert (
-        qType in ONNX_TYPE_TO_NP_TYPE
-    ), f"Unexpected data type {qType} requested. Only INT8, UINT8, INT16, and UINT16 are supported."
+    assert qType in ONNX_TYPE_TO_NP_TYPE, (
+        f"Unexpected data type {qType} requested. Only INT8, UINT8, INT16, and UINT16 are supported."
+    )
     if qType in (
         onnx_proto.TensorProto.FLOAT8E4M3FN,
         onnx_proto.TensorProto.FLOAT8E4M3FNUZ,
@@ -918,10 +918,7 @@ def smooth_distribution(p, eps=0.0001):
 
 def model_has_external_data(model_path: Path):
     model = onnx.load(model_path.as_posix(), load_external_data=False)
-    for intializer in model.graph.initializer:
-        if external_data_helper.uses_external_data(intializer):
-            return True
-    return False
+    return any(external_data_helper.uses_external_data(intializer) for intializer in model.graph.initializer)
 
 
 def optimize_model(model_path: Path, opt_model_path: Path):
diff --git a/onnxruntime/python/tools/symbolic_shape_infer.py b/onnxruntime/python/tools/symbolic_shape_infer.py
index f88011c7a2..b9ff215902 100755
--- a/onnxruntime/python/tools/symbolic_shape_infer.py
+++ b/onnxruntime/python/tools/symbolic_shape_infer.py
@@ -1814,12 +1814,12 @@ class SymbolicShapeInference:
 
                 def replace_min_with_arg(arg_idx):
                     replaced = list(expr.args)
-                    assert isinstance(
-                        replaced[min_pos], sympy.Min
-                    ), f"Expected a sympy.Min() at position {min_pos}, got {replaced[min_pos]}"
-                    assert (
-                        len(replaced[min_pos].args) == 2
-                    ), f"Expected a sympy.Min() with exactly 2 arguments, got {replaced[min_pos]}"
+                    assert isinstance(replaced[min_pos], sympy.Min), (
+                        f"Expected a sympy.Min() at position {min_pos}, got {replaced[min_pos]}"
+                    )
+                    assert len(replaced[min_pos].args) == 2, (
+                        f"Expected a sympy.Min() with exactly 2 arguments, got {replaced[min_pos]}"
+                    )
                     replaced[min_pos] = replaced[min_pos].args[arg_idx]
                     return sympy.Add(*replaced)
 
diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py
index 450474d96d..54027a5a70 100644
--- a/onnxruntime/python/tools/transformers/benchmark.py
+++ b/onnxruntime/python/tools/transformers/benchmark.py
@@ -13,33 +13,33 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-""" Benchmarking the inference of pretrained transformer models.
-    PyTorch/TorchScript benchmark is based on https://github.com/huggingface/transformers/blob/master/examples/benchmarks.py.
-    One difference is that random input_ids is generated in this benchmark.
+"""Benchmarking the inference of pretrained transformer models.
+PyTorch/TorchScript benchmark is based on https://github.com/huggingface/transformers/blob/master/examples/benchmarks.py.
+One difference is that random input_ids is generated in this benchmark.
 
-    For onnxruntime, this script will convert a pretrained model to ONNX, and optimize it when -o parameter is used.
+For onnxruntime, this script will convert a pretrained model to ONNX, and optimize it when -o parameter is used.
 
-    Example commands:
-        Export all models to ONNX, optimize and validate them:
-            python benchmark.py -b 0 -o -v -i 1 2 3
-        Run OnnxRuntime on GPU for all models:
-            python benchmark.py -g
-        Run OnnxRuntime on GPU for all models with fp32 optimization:
-            python benchmark.py -g -o
-        Run OnnxRuntime on GPU with fp16 optimization:
-            python benchmark.py -g -o -p "fp16"
-        Run TorchScript on GPU for all models:
-            python benchmark.py -e torchscript -g
-        Run TorchScript on GPU for all models with fp16:
-            python benchmark.py -e torchscript -g -p "fp16"
-        Run ONNXRuntime and TorchScript on CPU for all models with quantization:
-            python benchmark.py -e torchscript onnxruntime -p "int8" -o
-        Run OnnxRuntime with the ROCM provider and graph optimization script:
-            python benchmark.py -g -m bert-base-cased --provider rocm --optimizer_info by_script --disable_embed_layer_norm
-        Run OnnxRuntime with bfloat16 fastmath mode kernels on aarch64 platforms with bfloat16 support:
-            python benchmark.py --enable_arm64_bfloat16_fastmath_mlas_gemm
+Example commands:
+    Export all models to ONNX, optimize and validate them:
+        python benchmark.py -b 0 -o -v -i 1 2 3
+    Run OnnxRuntime on GPU for all models:
+        python benchmark.py -g
+    Run OnnxRuntime on GPU for all models with fp32 optimization:
+        python benchmark.py -g -o
+    Run OnnxRuntime on GPU with fp16 optimization:
+        python benchmark.py -g -o -p "fp16"
+    Run TorchScript on GPU for all models:
+        python benchmark.py -e torchscript -g
+    Run TorchScript on GPU for all models with fp16:
+        python benchmark.py -e torchscript -g -p "fp16"
+    Run ONNXRuntime and TorchScript on CPU for all models with quantization:
+        python benchmark.py -e torchscript onnxruntime -p "int8" -o
+    Run OnnxRuntime with the ROCM provider and graph optimization script:
+        python benchmark.py -g -m bert-base-cased --provider rocm --optimizer_info by_script --disable_embed_layer_norm
+    Run OnnxRuntime with bfloat16 fastmath mode kernels on aarch64 platforms with bfloat16 support:
+        python benchmark.py --enable_arm64_bfloat16_fastmath_mlas_gemm
 
-    It is recommended to use run_benchmark.sh to launch benchmark.
+It is recommended to use run_benchmark.sh to launch benchmark.
 """
 
 import argparse
@@ -439,9 +439,9 @@ def run_with_tf_optimizations(do_eager_mode: bool, use_xla: bool):
             return func(*args, **kwargs)
 
         if do_eager_mode is True:
-            assert (
-                use_xla is False
-            ), "Cannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`."
+            assert use_xla is False, (
+                "Cannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`."
+            )
             return run_in_eager_mode
         else:
             return run_in_graph_mode
diff --git a/onnxruntime/python/tools/transformers/benchmark_helper.py b/onnxruntime/python/tools/transformers/benchmark_helper.py
index 66f7a63447..d88e689521 100644
--- a/onnxruntime/python/tools/transformers/benchmark_helper.py
+++ b/onnxruntime/python/tools/transformers/benchmark_helper.py
@@ -167,9 +167,9 @@ def prepare_environment(cache_dir, output_dir, use_gpu, provider=None):
 
     if use_gpu:
         if provider == "dml":
-            assert (
-                "DmlExecutionProvider" in onnxruntime.get_available_providers()
-            ), "Please install onnxruntime-directml package to test GPU inference."
+            assert "DmlExecutionProvider" in onnxruntime.get_available_providers(), (
+                "Please install onnxruntime-directml package to test GPU inference."
+            )
 
         else:
             assert not set(onnxruntime.get_available_providers()).isdisjoint(
diff --git a/onnxruntime/python/tools/transformers/float16.py b/onnxruntime/python/tools/transformers/float16.py
index 74adc951c4..e9ac4a64f9 100644
--- a/onnxruntime/python/tools/transformers/float16.py
+++ b/onnxruntime/python/tools/transformers/float16.py
@@ -201,9 +201,9 @@ def convert_float_to_float16(
     Returns:
         ModelProto: converted model.
     """
-    assert (
-        min_positive_val >= 5.96e-08
-    ), "invalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05"
+    assert min_positive_val >= 5.96e-08, (
+        "invalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05"
+    )
     assert max_finite_val <= float(np.finfo(np.float16).max), "invalid max_finite_val. largest float16 value: 65504"
 
     force_fp16_inputs_dict = {} if force_fp16_inputs is None else force_fp16_inputs
diff --git a/onnxruntime/python/tools/transformers/fusion_attention_unet.py b/onnxruntime/python/tools/transformers/fusion_attention_unet.py
index 048c13cdb1..9a353e7e2d 100644
--- a/onnxruntime/python/tools/transformers/fusion_attention_unet.py
+++ b/onnxruntime/python/tools/transformers/fusion_attention_unet.py
@@ -373,7 +373,9 @@ class FusionAttentionUnet(Fusion):
             else "MultiHeadAttention ({})".format(
                 "self attention with packed qkv"
                 if self.enable_packed_qkv
-                else "cross attention with packed kv" if self.enable_packed_kv else "cross attention"
+                else "cross attention with packed kv"
+                if self.enable_packed_kv
+                else "cross attention"
             )
         )
         self.increase_counter(counter_name)
@@ -841,7 +843,9 @@ class FusionAttentionUnet(Fusion):
             else "MultiHeadAttention ({})".format(
                 "self attention with packed qkv"
                 if self.enable_packed_qkv
-                else "cross attention with packed kv" if self.enable_packed_kv else "cross attention"
+                else "cross attention with packed kv"
+                if self.enable_packed_kv
+                else "cross attention"
             )
         )
         self.increase_counter(counter_name)
diff --git a/onnxruntime/python/tools/transformers/large_model_exporter.py b/onnxruntime/python/tools/transformers/large_model_exporter.py
index 0eaccc0faf..f623102802 100644
--- a/onnxruntime/python/tools/transformers/large_model_exporter.py
+++ b/onnxruntime/python/tools/transformers/large_model_exporter.py
@@ -6,6 +6,7 @@
 """
 Export LLM to onnx
 """
+
 import argparse
 import inspect
 import math
@@ -173,8 +174,8 @@ def move_to_appropriate_device(model: nn.Module, sample_inputs_tp: tuple) -> nn.
     """
     total_mem_per_cpu = torch.cuda.get_device_properties(0).total_memory / 1024 / 1024
 
-    print(f"Model_Size = {get_model_parameter_size(model)/1024} GB")
-    print(f"total_mem_per_cpu = {total_mem_per_cpu/1024} GB")
+    print(f"Model_Size = {get_model_parameter_size(model) / 1024} GB")
+    print(f"total_mem_per_cpu = {total_mem_per_cpu / 1024} GB")
     if get_model_parameter_size(model) > total_mem_per_cpu * 0.45:
         device_collection = [torch.device(i) for i in range(torch.cuda.device_count())]
         if len(device_collection) > 1:
@@ -228,9 +229,9 @@ def fetch_onnx_inputs_outputs_name(
     onnx_inp_names = tuple(
         [torch_input_names[i] for i in range(len(torch_input_names)) if isinstance(onnx_inputs[i], torch.Tensor)]
     )
-    assert (
-        "input_ids" in onnx_inp_names and "attention_mask" in onnx_inp_names
-    ), "input_ids and attention_mask must be existed in inputs"
+    assert "input_ids" in onnx_inp_names and "attention_mask" in onnx_inp_names, (
+        "input_ids and attention_mask must be existed in inputs"
+    )
     onnx_out_names = ("logits",)
     onnx_dynamic_axes = {
         "input_ids": {0: "batch_size", 1: "seq_len"},
diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py
index 9153193a49..1b12fe9005 100644
--- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py
+++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py
@@ -889,11 +889,11 @@ class Gpt2Helper:
         result["nan_rate"] = (total_test_cases - len(max_abs_diff_list)) * 1.0 / total_test_cases
 
         logger.info(
-            f"Parity Test Cases={total_test_cases}; Passed={passed_test_cases}; Nan={total_test_cases-len(max_abs_diff_list)}; Top1_Matched={top1_matched_cases}"
+            f"Parity Test Cases={total_test_cases}; Passed={passed_test_cases}; Nan={total_test_cases - len(max_abs_diff_list)}; Top1_Matched={top1_matched_cases}"
         )
 
         if passed_test_cases > 0.95 * total_test_cases:
-            logger.info(f"Parity is good: passed rate={int(passed_test_cases*100/total_test_cases):.0f}%")
+            logger.info(f"Parity is good: passed rate={int(passed_test_cases * 100 / total_test_cases):.0f}%")
 
         return result
 
diff --git a/onnxruntime/python/tools/transformers/models/llama/benchmark.py b/onnxruntime/python/tools/transformers/models/llama/benchmark.py
index d05de369b3..61bfc95073 100644
--- a/onnxruntime/python/tools/transformers/models/llama/benchmark.py
+++ b/onnxruntime/python/tools/transformers/models/llama/benchmark.py
@@ -642,9 +642,9 @@ def get_args(rank=0):
 
     # Check that only one (batch_size, sequence_length) combination is set for profiling
     if args.profile:
-        assert (
-            len(args.batch_sizes) == 1 and len(args.sequence_lengths) == 1
-        ), "Please provide only one (batch_size, sequence_length) combination for profiling"
+        assert len(args.batch_sizes) == 1 and len(args.sequence_lengths) == 1, (
+            "Please provide only one (batch_size, sequence_length) combination for profiling"
+        )
 
     return args
 
diff --git a/onnxruntime/python/tools/transformers/models/llama/benchmark_e2e.py b/onnxruntime/python/tools/transformers/models/llama/benchmark_e2e.py
index 9f6f86fc28..db78d837f8 100644
--- a/onnxruntime/python/tools/transformers/models/llama/benchmark_e2e.py
+++ b/onnxruntime/python/tools/transformers/models/llama/benchmark_e2e.py
@@ -259,14 +259,16 @@ def get_args():
         help="Use when GroupQueryAttention (GQA) is in ONNX model",
     )
 
-    parser.add_argument(
-        "--anomaly-filtering",
-        default=False,
-        action="store_true",
-        help="Use this flag to filter anomaly accelerator times for tokens generated. \
+    (
+        parser.add_argument(
+            "--anomaly-filtering",
+            default=False,
+            action="store_true",
+            help="Use this flag to filter anomaly accelerator times for tokens generated. \
               This may give more accurate latency and throughput metrics for tokens generated. \
               Wall-clock metrics are still reported with anomaly times though.",
-    ),
+        ),
+    )
 
     parser.add_argument(
         "-b",
diff --git a/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py
index f5446ed718..7bf8bcb82e 100644
--- a/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py
+++ b/onnxruntime/python/tools/transformers/models/llama/convert_to_onnx.py
@@ -455,9 +455,8 @@ def smooth_quant(
     decoder_model_int8_path: str,
     decoder_with_past_model_int8_path: str,
 ):
-    from neural_compressor import PostTrainingQuantConfig
+    from neural_compressor import PostTrainingQuantConfig, set_workspace
     from neural_compressor import quantization as intel_quantization
-    from neural_compressor import set_workspace
     from onnx.external_data_helper import load_external_data_for_model
     from quant_kv_dataloader import QuantKVDataLoader
 
diff --git a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py
index ab92a12343..274d56df3f 100644
--- a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py
+++ b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py
@@ -148,9 +148,9 @@ def test_ort_latency(
     for batch_size in batch_sizes:
         for sequence_length in sequence_lengths:
             for global_length in global_lengths:
-                assert (
-                    global_length <= model.config.attention_window[0]
-                ), "Limitation of current implementation: number of global token <= attention_window"
+                assert global_length <= model.config.attention_window[0], (
+                    "Limitation of current implementation: number of global token <= attention_window"
+                )
 
                 logger.info(
                     f"Testing batch_size={batch_size} sequence_length={sequence_length} global_length={global_length} "
diff --git a/onnxruntime/python/tools/transformers/models/sam2/image_decoder.py b/onnxruntime/python/tools/transformers/models/sam2/image_decoder.py
index 5eafb29713..07ed150631 100644
--- a/onnxruntime/python/tools/transformers/models/sam2/image_decoder.py
+++ b/onnxruntime/python/tools/transformers/models/sam2/image_decoder.py
@@ -212,7 +212,6 @@ def test_decoder_onnx(
     onnx_model_path: str,
     multimask_output=False,
 ):
-
     batch_size = 1
     image = random_sam2_input_image(batch_size)
     sam2_encoder = SAM2ImageEncoder(sam2_model).cpu()
diff --git a/onnxruntime/python/tools/transformers/models/sam2/sam2_demo.py b/onnxruntime/python/tools/transformers/models/sam2/sam2_demo.py
index 9533e2652f..af6b0e17e7 100644
--- a/onnxruntime/python/tools/transformers/models/sam2/sam2_demo.py
+++ b/onnxruntime/python/tools/transformers/models/sam2/sam2_demo.py
@@ -76,7 +76,7 @@ def show_masks(
             show_box(box_coords, plt.gca())
 
         if len(scores) > 1:
-            plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
+            plt.title(f"Mask {i + 1}, Score: {score:.3f}", fontsize=18)
 
         plt.axis("off")
         if output_image_file_prefix:
diff --git a/onnxruntime/python/tools/transformers/models/sam2/sam2_image_onnx_predictor.py b/onnxruntime/python/tools/transformers/models/sam2/sam2_image_onnx_predictor.py
index 363b5daf46..3c0c886b87 100644
--- a/onnxruntime/python/tools/transformers/models/sam2/sam2_image_onnx_predictor.py
+++ b/onnxruntime/python/tools/transformers/models/sam2/sam2_image_onnx_predictor.py
@@ -136,9 +136,9 @@ class SAM2ImageOnnxPredictor(SAM2ImagePredictor):
         input_image = self._transforms(image)
         input_image = input_image[None, ...].to(self.device)
 
-        assert (
-            len(input_image.shape) == 4 and input_image.shape[1] == 3
-        ), f"input_image must be of size 1x3xHxW, got {input_image.shape}"
+        assert len(input_image.shape) == 4 and input_image.shape[1] == 3, (
+            f"input_image must be of size 1x3xHxW, got {input_image.shape}"
+        )
 
         # Computing image embeddings for the provided image
         io_shapes = encoder_shape_dict(batch_size=1, height=input_image.shape[2], width=input_image.shape[3])
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py
index 0452cff235..74652239bc 100755
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py
@@ -1368,9 +1368,9 @@ def main():
             use_io_binding=args.use_io_binding,
         )
     elif args.engine == "onnxruntime":
-        assert args.pipeline and os.path.isdir(
-            args.pipeline
-        ), "--pipeline should be specified for the directory of ONNX models"
+        assert args.pipeline and os.path.isdir(args.pipeline), (
+            "--pipeline should be specified for the directory of ONNX models"
+        )
         print(f"Testing diffusers StableDiffusionPipeline with {provider} provider and tuning={args.tuning}")
         result = run_ort(
             model_name=sd_model,
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py
index 57cb51bbea..41d2d267c5 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_schedulers.py
@@ -156,8 +156,7 @@ class DDIMScheduler:
             model_output = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample
         else:
             raise ValueError(
-                f"prediction_type given as {self.prediction_type} must be one of `epsilon`, `sample`, or"
-                " `v_prediction`"
+                f"prediction_type given as {self.prediction_type} must be one of `epsilon`, `sample`, or `v_prediction`"
             )
 
         # 4. Clip "predicted x_0"
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py
index 522cc541c1..ac955f5014 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py
@@ -568,7 +568,7 @@ class StableDiffusionPipeline:
             prefix = "".join(x for x in prompt[i] if x.isalnum() or x in ", -").replace(" ", "_")[:20]
             parts = [prefix, session_id, str(i + 1), str(seed), self.current_scheduler, str(self.actual_steps)]
             image_path = os.path.join(self.output_dir, "-".join(parts) + ".png")
-            print(f"Saving image {i+1} / {len(images)} to: {image_path}")
+            print(f"Saving image {i + 1} / {len(images)} to: {image_path}")
 
             from PIL import PngImagePlugin
 
diff --git a/onnxruntime/python/tools/transformers/onnx_model.py b/onnxruntime/python/tools/transformers/onnx_model.py
index 2a6f9c3d75..33506d6d00 100644
--- a/onnxruntime/python/tools/transformers/onnx_model.py
+++ b/onnxruntime/python/tools/transformers/onnx_model.py
@@ -1284,7 +1284,7 @@ class OnnxModel:
             op_count[op] = 1 if op not in op_count else (op_count[op] + 1)
 
         # Sorted by count in the descending order, then by key in alphabetical order.
-        logger.info(f"Operators:{sorted(op_count.items(), key=lambda kv:(-kv[1], kv[0]))}")
+        logger.info(f"Operators:{sorted(op_count.items(), key=lambda kv: (-kv[1], kv[0]))}")
 
         return op_count
 
diff --git a/onnxruntime/python/tools/transformers/quantize_helper.py b/onnxruntime/python/tools/transformers/quantize_helper.py
index 6a25196dbc..9e44921bde 100644
--- a/onnxruntime/python/tools/transformers/quantize_helper.py
+++ b/onnxruntime/python/tools/transformers/quantize_helper.py
@@ -64,7 +64,7 @@ class QuantizeHelper:
         from onnxruntime.quantization import quantize_dynamic
 
         Path(quantized_model_path).parent.mkdir(parents=True, exist_ok=True)
-        logger.info(f"Size of full precision ONNX model(MB):{os.path.getsize(onnx_model_path)/(1024*1024)}")
+        logger.info(f"Size of full precision ONNX model(MB):{os.path.getsize(onnx_model_path) / (1024 * 1024)}")
         quantize_dynamic(
             onnx_model_path,
             quantized_model_path,
@@ -73,4 +73,4 @@ class QuantizeHelper:
         )
         logger.info(f"quantized model saved to:{quantized_model_path}")
         # TODO: inlcude external data in total model size.
-        logger.info(f"Size of quantized ONNX model(MB):{os.path.getsize(quantized_model_path)/(1024*1024)}")
+        logger.info(f"Size of quantized ONNX model(MB):{os.path.getsize(quantized_model_path) / (1024 * 1024)}")
diff --git a/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py b/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py
index 7dcd6484a5..796a58f1a9 100644
--- a/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py
+++ b/onnxruntime/test/providers/cpu/tensor/affine_grid_test_gen.py
@@ -49,13 +49,13 @@ if args.dim is None or args.dim == 2:
                 print(f'  OpTester test("AffineGrid", {opset_version});')
                 print(f'  test.AddAttribute("align_corners", (int64_t){1 if align_corners else 0});')
                 print(
-                    f"  test.AddInput<float>(\"theta\", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{', '.join([f'{x:.6f}f' for x in theta.flatten()])}}});"
+                    f'  test.AddInput<float>("theta", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{", ".join([f"{x:.6f}f" for x in theta.flatten()])}}});'
                 )
                 print(
                     f'  test.AddInput<int64_t>("size", {{{len(size)}}}, {{{size[0]}, {size[1]}, {size[2]}, {size[3]}}});'
                 )
                 print(
-                    f"  test.AddOutput<float>(\"grid\", {{{size[0]}, {size[2]}, {size[3]}, 2}}, {{{', '.join([f'{x:.4f}f' for x in grid.flatten()])}}});"
+                    f'  test.AddOutput<float>("grid", {{{size[0]}, {size[2]}, {size[3]}, 2}}, {{{", ".join([f"{x:.4f}f" for x in grid.flatten()])}}});'
                 )
                 print("  test.Run();")
                 print("}\n")
@@ -104,13 +104,13 @@ if args.dim is None or args.dim == 3:
                 print(f'  OpTester test("AffineGrid", {opset_version});')
                 print(f'  test.AddAttribute("align_corners", (int64_t){1 if align_corners else 0});')
                 print(
-                    f"  test.AddInput<float>(\"theta\", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{', '.join([f'{x:.6f}f' for x in theta.flatten()])}}});"
+                    f'  test.AddInput<float>("theta", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{", ".join([f"{x:.6f}f" for x in theta.flatten()])}}});'
                 )
                 print(
                     f'  test.AddInput<int64_t>("size", {{{len(size)}}}, {{{size[0]}, {size[1]}, {size[2]}, {size[3]}, {size[4]}}});'
                 )
                 print(
-                    f"  test.AddOutput<float>(\"grid\", {{{size[0]}, {size[2]}, {size[3]}, {size[4]}, 3}}, {{{', '.join([f'{x:.4f}f' for x in grid.flatten()])}}});"
+                    f'  test.AddOutput<float>("grid", {{{size[0]}, {size[2]}, {size[3]}, {size[4]}, 3}}, {{{", ".join([f"{x:.4f}f" for x in grid.flatten()])}}});'
                 )
                 print("  test.Run();")
                 print("}\n")
diff --git a/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py b/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py
index bf58a5d3fc..627b681793 100644
--- a/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py
+++ b/onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py
@@ -80,11 +80,11 @@ for opset_version in [16, 20]:
                     print(f'{spaces}std::string padding_mode = "{padding_mode}";')
                     print(f"{spaces}int64_t align_corners = {onnx_align_corners};")
                     print(f"{spaces}std::initializer_list<int64_t> X_shape {{ {', '.join(map(str, input_shape))} }};")
-                    print(f"{spaces}std::initializer_list<TypeParam> X_data { X_data_str };")
+                    print(f"{spaces}std::initializer_list<TypeParam> X_data {X_data_str};")
                     print(f"{spaces}std::initializer_list<int64_t> Grid_shape {{ {', '.join(map(str, grid_shape))} }};")
-                    print(f"{spaces}std::initializer_list<TypeParam> Grid_data { Grid_data_str };")
+                    print(f"{spaces}std::initializer_list<TypeParam> Grid_data {Grid_data_str};")
                     print(f"{spaces}std::initializer_list<int64_t> Y_shape {{ {', '.join(map(str, Y_shape))} }};")
-                    print(f"{spaces}std::initializer_list<TypeParam> Y_data { Y_data_str };")
+                    print(f"{spaces}std::initializer_list<TypeParam> Y_data {Y_data_str};")
 
                     print(f'{spaces}test.AddInput<TypeParam>("X", X_shape, X_data);')
                     print(f'{spaces}test.AddInput<TypeParam>("Grid", Grid_shape, Grid_data);')
diff --git a/onnxruntime/test/python/onnxruntime_test_float8.py b/onnxruntime/test/python/onnxruntime_test_float8.py
index bb63ea2344..29aede0784 100644
--- a/onnxruntime/test/python/onnxruntime_test_float8.py
+++ b/onnxruntime/test/python/onnxruntime_test_float8.py
@@ -354,8 +354,7 @@ class TestInferenceSession(unittest.TestCase):
             assert_allclose(expect, y)
         except AssertionError as e:
             raise AssertionError(
-                f"Discrepancies with name={name}, float_name={float_name}, "
-                f"saturate={saturate}\nexpect={expect}\ny={y}"
+                f"Discrepancies with name={name}, float_name={float_name}, saturate={saturate}\nexpect={expect}\ny={y}"
             ) from e
         self.assertEqual(expect.shape, y.shape)
         self.assertEqual(expect.dtype, y.dtype)
@@ -394,8 +393,7 @@ class TestInferenceSession(unittest.TestCase):
             assert_allclose(expect, y)
         except AssertionError as e:
             raise AssertionError(
-                f"Discrepancies with name={name}, float_name={float_name}, "
-                f"saturate={saturate}\nexpect={expect}\ny={y}"
+                f"Discrepancies with name={name}, float_name={float_name}, saturate={saturate}\nexpect={expect}\ny={y}"
             ) from e
         self.assertEqual(expect.shape, y.shape)
         self.assertEqual(expect.dtype, y.dtype)
@@ -608,8 +606,7 @@ class TestInferenceSession(unittest.TestCase):
             if not saturate:
                 return
             raise AssertionError(
-                f"Discrepancies with name={name}, float_name={float_name}, "
-                f"saturate={saturate}\nexpect={expect}\ny={y}"
+                f"Discrepancies with name={name}, float_name={float_name}, saturate={saturate}\nexpect={expect}\ny={y}"
             ) from e
         self.assertEqual(expect.shape, y.shape)
         self.assertEqual(expect.dtype, y.dtype)
diff --git a/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py b/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py
index 2dba8ff532..c9876d3d55 100644
--- a/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py
+++ b/onnxruntime/test/python/onnxruntime_test_float8_gemm8.py
@@ -173,16 +173,16 @@ class TestFloat8Gemm8(unittest.TestCase):
 
                 raise AssertionError(
                     f"Gemm ERROR len(inputs)={len(feeds)}"
-                    f"\na@b=\n{check(lambda:a@b)}"
-                    f"\na.T@b=\n{check(lambda:a.T@b)}"
-                    f"\na@b.T=\n{check(lambda:a@b.T)}"
-                    f"\na.T@b.T=\n{check(lambda:a.T@b.T)}"
-                    f"\n----\nb@a=\n{check(lambda:b@a)}"
-                    f"\nb.T@a=\n{check(lambda:b.T@a)}"
-                    f"\nb@a.T=\n{check(lambda:b@a.T)}"
-                    f"\nb.T@a.T=\n{check(lambda:b.T@a.T)}"
-                    f"\n----\nexpected=\n{expected[:2,:2]}"
-                    f"\n----\ngot=\n{y[:2,:2]}"
+                    f"\na@b=\n{check(lambda: a @ b)}"
+                    f"\na.T@b=\n{check(lambda: a.T @ b)}"
+                    f"\na@b.T=\n{check(lambda: a @ b.T)}"
+                    f"\na.T@b.T=\n{check(lambda: a.T @ b.T)}"
+                    f"\n----\nb@a=\n{check(lambda: b @ a)}"
+                    f"\nb.T@a=\n{check(lambda: b.T @ a)}"
+                    f"\nb@a.T=\n{check(lambda: b @ a.T)}"
+                    f"\nb.T@a.T=\n{check(lambda: b.T @ a.T)}"
+                    f"\n----\nexpected=\n{expected[:2, :2]}"
+                    f"\n----\ngot=\n{y[:2, :2]}"
                     f"\nkwargs={kwargs}"
                 ) from e
 
@@ -225,16 +225,16 @@ class TestFloat8Gemm8(unittest.TestCase):
 
             raise AssertionError(
                 f"Gemm ERROR len(inputs)={len(feeds)}"
-                f"\na@b=\n{check(lambda:a@b)}"
-                f"\na.T@b=\n{check(lambda:a.T@b)}"
-                f"\na@b.T=\n{check(lambda:a@b.T)}"
-                f"\na.T@b.T=\n{check(lambda:a.T@b.T)}"
-                f"\n----\nb@a=\n{check(lambda:b@a)}"
-                f"\nb.T@a=\n{check(lambda:b.T@a)}"
-                f"\nb@a.T=\n{check(lambda:b@a.T)}"
-                f"\nb.T@a.T=\n{check(lambda:b.T@a.T)}"
-                f"\n----\nexpected=\n{expected[:2,:2]}"
-                f"\n----\ngot=\n{y[:2,:2]}"
+                f"\na@b=\n{check(lambda: a @ b)}"
+                f"\na.T@b=\n{check(lambda: a.T @ b)}"
+                f"\na@b.T=\n{check(lambda: a @ b.T)}"
+                f"\na.T@b.T=\n{check(lambda: a.T @ b.T)}"
+                f"\n----\nb@a=\n{check(lambda: b @ a)}"
+                f"\nb.T@a=\n{check(lambda: b.T @ a)}"
+                f"\nb@a.T=\n{check(lambda: b @ a.T)}"
+                f"\nb.T@a.T=\n{check(lambda: b.T @ a.T)}"
+                f"\n----\nexpected=\n{expected[:2, :2]}"
+                f"\n----\ngot=\n{y[:2, :2]}"
                 f"\nkwargs={kwargs}"
             ) from e
         self.assertEqual(expected.shape, y.shape)
diff --git a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py
index 76fc78e376..77f9e6f5cf 100644
--- a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py
+++ b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py
@@ -223,7 +223,6 @@ class TestIOBinding(unittest.TestCase):
         for inner_device, provider in devices:
             for onnx_dtype, torch_dtype in onnx_to_torch_type_map.items():
                 with self.subTest(onnx_dtype=onnx_dtype, inner_device=str(inner_device)):
-
                     # Create onnx graph with dynamic axes
                     X = helper.make_tensor_value_info("X", onnx_dtype, [None])  # noqa: N806
                     Y = helper.make_tensor_value_info("Y", onnx_dtype, [None])  # noqa: N806
diff --git a/onnxruntime/test/python/quantization/test_conv_dynamic.py b/onnxruntime/test/python/quantization/test_conv_dynamic.py
index f6ee3fe97a..5892e18bae 100644
--- a/onnxruntime/test/python/quantization/test_conv_dynamic.py
+++ b/onnxruntime/test/python/quantization/test_conv_dynamic.py
@@ -10,9 +10,13 @@ import unittest
 import numpy as np
 import onnx
 from onnx import TensorProto, helper, numpy_helper
-from op_test_utils import TestDataFeeds  # noqa: F401
-from op_test_utils import check_op_type_order  # noqa: F401
-from op_test_utils import check_model_correctness, check_op_type_count, check_qtype_by_node_type
+from op_test_utils import (
+    TestDataFeeds,  # noqa: F401
+    check_model_correctness,
+    check_op_type_count,
+    check_op_type_order,  # noqa: F401
+    check_qtype_by_node_type,
+)
 
 from onnxruntime.quantization import DynamicQuantConfig, QuantType, quantize, quantize_dynamic
 
diff --git a/onnxruntime/test/python/quantization/test_op_pooling.py b/onnxruntime/test/python/quantization/test_op_pooling.py
index 539affc314..5364171307 100644
--- a/onnxruntime/test/python/quantization/test_op_pooling.py
+++ b/onnxruntime/test/python/quantization/test_op_pooling.py
@@ -10,8 +10,13 @@ import unittest
 import numpy as np
 import onnx
 from onnx import TensorProto, helper
-from op_test_utils import check_op_nodes  # noqa: F401
-from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
+from op_test_utils import (
+    TestDataFeeds,
+    check_model_correctness,
+    check_op_nodes,  # noqa: F401
+    check_op_type_count,
+    check_qtype_by_node_type,
+)
 
 from onnxruntime.quantization import QuantFormat, QuantType, quantize_static
 
diff --git a/onnxruntime/test/python/quantization/test_qdq.py b/onnxruntime/test/python/quantization/test_qdq.py
index 23b397ffd8..178cb9d876 100644
--- a/onnxruntime/test/python/quantization/test_qdq.py
+++ b/onnxruntime/test/python/quantization/test_qdq.py
@@ -759,12 +759,12 @@ class TestQDQFormatConvRelu(TestQDQFormat):
             QuantType.QInt16: TensorProto.INT16,
             QuantType.QUInt16: TensorProto.UINT16,
         }
-        assert (
-            weight_type not in to_tensor_types or to_tensor_types[weight_type] in zero_types
-        ), f"weight_type={weight_type} not in zero_types={zero_types}"
-        assert (
-            activation_type not in to_tensor_types or to_tensor_types[activation_type] in zero_types
-        ), f"activation_type={activation_type} not in zero_types={zero_types}"
+        assert weight_type not in to_tensor_types or to_tensor_types[weight_type] in zero_types, (
+            f"weight_type={weight_type} not in zero_types={zero_types}"
+        )
+        assert activation_type not in to_tensor_types or to_tensor_types[activation_type] in zero_types, (
+            f"activation_type={activation_type} not in zero_types={zero_types}"
+        )
 
         check_model_correctness(self, model_fp32_path, model_qdq_path, data_reader.get_next(), rtol=rtol, atol=atol)
 
diff --git a/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py b/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py
index 41dae04f1c..5617a424cf 100644
--- a/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py
+++ b/onnxruntime/test/python/quantization/test_tensor_quant_overrides_option.py
@@ -1195,7 +1195,9 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
         # get_qnn_qdq_config() should be able to validate the per-channel axis without having to load
         # the external weight data.
         qnn_config = get_qnn_qdq_config(
-            str(model_path), DummyDataReader([]), init_overrides=init_overrides  # Dummy data reader does nothing
+            str(model_path),
+            DummyDataReader([]),
+            init_overrides=init_overrides,  # Dummy data reader does nothing
         )
         self.assertEqual(set(qnn_config.op_types_to_quantize), {"Conv"})
         self.assertTrue(qnn_config.use_external_data_format)
diff --git a/onnxruntime/test/python/transformers/benchmark_gqa.py b/onnxruntime/test/python/transformers/benchmark_gqa.py
index 53d015a029..5cef4ae863 100644
--- a/onnxruntime/test/python/transformers/benchmark_gqa.py
+++ b/onnxruntime/test/python/transformers/benchmark_gqa.py
@@ -6,6 +6,7 @@
 """
 Benchmark performance of GroupQueryAttention.
 """
+
 from typing import Optional
 
 import torch
diff --git a/onnxruntime/test/python/transformers/conformer_model_generator.py b/onnxruntime/test/python/transformers/conformer_model_generator.py
index 5b27a46ea0..71e4f2b63c 100644
--- a/onnxruntime/test/python/transformers/conformer_model_generator.py
+++ b/onnxruntime/test/python/transformers/conformer_model_generator.py
@@ -22,7 +22,9 @@ def get_tensor_and_weight(name: str, shape: List[int], random=False, zeros=False
     weights = (
         [np.random.uniform(low, high) for _ in range(total_elements)]
         if random
-        else [0.0] * total_elements if zeros else [1.0] * total_elements
+        else [0.0] * total_elements
+        if zeros
+        else [1.0] * total_elements
     )
     return helper.make_tensor(name, TensorProto.FLOAT, shape, weights), weights
 
diff --git a/onnxruntime/test/python/transformers/parity_utilities.py b/onnxruntime/test/python/transformers/parity_utilities.py
index d7f79304d2..376b684c76 100644
--- a/onnxruntime/test/python/transformers/parity_utilities.py
+++ b/onnxruntime/test/python/transformers/parity_utilities.py
@@ -115,9 +115,9 @@ def optimize_onnx(
     onnx_model.save_model_to_file(optimized_onnx_path)
 
     if expected_op is not None:
-        assert (
-            len(onnx_model.get_nodes_by_op_type(expected_op)) == 1
-        ), f"Expected {expected_op} node not found in the optimized model {optimized_onnx_path}"
+        assert len(onnx_model.get_nodes_by_op_type(expected_op)) == 1, (
+            f"Expected {expected_op} node not found in the optimized model {optimized_onnx_path}"
+        )
 
 
 def diff_outputs(torch_outputs, ort_outputs, index):
diff --git a/onnxruntime/test/python/transformers/test_mha.py b/onnxruntime/test/python/transformers/test_mha.py
index 45726ecc7c..6f396f35f7 100644
--- a/onnxruntime/test/python/transformers/test_mha.py
+++ b/onnxruntime/test/python/transformers/test_mha.py
@@ -183,14 +183,14 @@ def mha_with_past_reference(
     assert config.kv_sequence_length == config.sequence_length
     assert config.use_kv_cache
     if past_k is not None:
-        assert (
-            past_k.dim() == 4 and k.dim() == 4 and past_k.size(1) == k.size(1)
-        ), f"expect BNSH format: {past_k.shape=} {k.shape=}"
+        assert past_k.dim() == 4 and k.dim() == 4 and past_k.size(1) == k.size(1), (
+            f"expect BNSH format: {past_k.shape=} {k.shape=}"
+        )
 
     if past_v is not None:
-        assert (
-            past_v.dim() == 4 and v.dim() == 4 and past_v.size(1) == v.size(1)
-        ), f"expect BNSH format: {past_v.shape=} {v.shape=}"
+        assert past_v.dim() == 4 and v.dim() == 4 and past_v.size(1) == v.size(1), (
+            f"expect BNSH format: {past_v.shape=} {v.shape=}"
+        )
 
     present_k = torch.cat((past_k, k), dim=2) if past_k is not None else k
     present_v = torch.cat((past_v, v), dim=2) if past_v is not None else v
@@ -533,7 +533,6 @@ def causal_mask(seqlen_q, seqlen_k, query_padding_mask=None, key_padding_mask=No
 
 
 def merge_padding_and_causal_masks(config):
-
     q_mask, k_mask, mask = config.right_side_padding_masks()
     if config.causal:
         query_padding_mask = q_mask.reshape(config.batch_size, config.sequence_length)
diff --git a/onnxruntime/test/python/transformers/test_parity_t5_mha.py b/onnxruntime/test/python/transformers/test_parity_t5_mha.py
index 84708ddcf8..7eae2f0a23 100644
--- a/onnxruntime/test/python/transformers/test_parity_t5_mha.py
+++ b/onnxruntime/test/python/transformers/test_parity_t5_mha.py
@@ -418,9 +418,9 @@ class T5Attention(nn.Module):
         real_seq_length = seq_length
 
         if past_key_value is not None:
-            assert (
-                len(past_key_value) == 2
-            ), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states"
+            assert len(past_key_value) == 2, (
+                f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states"
+            )
             real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length
 
         key_length = real_seq_length if key_value_states is None else key_value_states.shape[1]
@@ -538,9 +538,9 @@ class T5Attention(nn.Module):
         real_seq_length = seq_length
 
         if past_key_value is not None:
-            assert (
-                len(past_key_value) == 2
-            ), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states"
+            assert len(past_key_value) == 2, (
+                f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states"
+            )
             real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length
 
         def project(hidden_states, proj_layer, key_value_states, past_key_value):
diff --git a/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py b/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py
index 373ad86ced..aba0ccdac2 100644
--- a/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py
+++ b/onnxruntime/test/python/transformers/test_rotary_mha_fusion.py
@@ -1026,14 +1026,14 @@ class TestRotaryAttentionFusion(unittest.TestCase):
             unsqueeze_0_node = helper.make_node(
                 "Unsqueeze",
                 inputs=[gather_0_node.output[0] if not use_mul_and_add_nodes_0 else "mul_extra_out", "zero"],
-                outputs=[f"unsqueeze_extra_{2*i}"],
-                name=f"Unsqueeze_extra_{2*i}",
+                outputs=[f"unsqueeze_extra_{2 * i}"],
+                name=f"Unsqueeze_extra_{2 * i}",
             )
             unsqueeze_1_node = helper.make_node(
                 "Unsqueeze",
                 inputs=[gather_1_node.output[0] if not use_mul_and_add_nodes_1 else "add_extra_out", "zero"],
-                outputs=[f"unsqueeze_extra_{2*i + 1}"],
-                name=f"Unsqueeze_extra_{2*i + 1}",
+                outputs=[f"unsqueeze_extra_{2 * i + 1}"],
+                name=f"Unsqueeze_extra_{2 * i + 1}",
             )
 
             reshape_name = reshape_node.name
diff --git a/onnxruntime/test/python/transformers/test_sparse_attention.py b/onnxruntime/test/python/transformers/test_sparse_attention.py
index 5dbb9a277e..774761afdd 100644
--- a/onnxruntime/test/python/transformers/test_sparse_attention.py
+++ b/onnxruntime/test/python/transformers/test_sparse_attention.py
@@ -6,6 +6,7 @@
 """
 Parity test and benchmark performance of SparseAttention. Requires Nvidia GPU of Compute Capability 7.5 or above.
 """
+
 import math
 import unittest
 from typing import Optional, Union
diff --git a/onnxruntime/test/python/transformers/whisper_model_generator.py b/onnxruntime/test/python/transformers/whisper_model_generator.py
index a57b45cbc5..71d1a4cbdc 100644
--- a/onnxruntime/test/python/transformers/whisper_model_generator.py
+++ b/onnxruntime/test/python/transformers/whisper_model_generator.py
@@ -22,7 +22,9 @@ def get_tensor_and_weight(name: str, shape: List[int], random=False, zeros=False
     weights = (
         [np.random.uniform(low, high) for _ in range(total_elements)]
         if random
-        else [0.0] * total_elements if zeros else [1.0] * total_elements
+        else [0.0] * total_elements
+        if zeros
+        else [1.0] * total_elements
     )
     return helper.make_tensor(name, TensorProto.FLOAT, shape, weights), weights
 
diff --git a/onnxruntime/test/testdata/dummy_t5_model_generator.py b/onnxruntime/test/testdata/dummy_t5_model_generator.py
index 1ecd8b9ee9..00d9231fc8 100644
--- a/onnxruntime/test/testdata/dummy_t5_model_generator.py
+++ b/onnxruntime/test/testdata/dummy_t5_model_generator.py
@@ -1,4 +1,4 @@
-""" Script to generate a dummy ONNX model emulating T5 model with BeamSearch op. """
+"""Script to generate a dummy ONNX model emulating T5 model with BeamSearch op."""
 
 import argparse
 
diff --git a/onnxruntime/test/testdata/sparse_initializer_as_output.py b/onnxruntime/test/testdata/sparse_initializer_as_output.py
index 1f85f5690d..b10c84ccc1 100644
--- a/onnxruntime/test/testdata/sparse_initializer_as_output.py
+++ b/onnxruntime/test/testdata/sparse_initializer_as_output.py
@@ -6,13 +6,17 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Text, Tuple, T
 
 import numpy as np
 import onnx
-from onnx import AttributeProto  # noqa: F401
-from onnx import GraphProto  # noqa: F401
-from onnx import SparseTensorProto  # noqa: F401
-from onnx import mapping  # noqa: F401
-from onnx import numpy_helper  # noqa: F401
-from onnx import utils  # noqa: F401
-from onnx import TensorProto, ValueInfoProto, helper
+from onnx import (
+    AttributeProto,  # noqa: F401
+    GraphProto,  # noqa: F401
+    SparseTensorProto,  # noqa: F401
+    TensorProto,
+    ValueInfoProto,
+    helper,
+    mapping,  # noqa: F401
+    numpy_helper,  # noqa: F401
+    utils,  # noqa: F401
+)
 from onnx.helper import make_opsetid
 
 
diff --git a/onnxruntime/test/testdata/sparse_to_dense_matmul.py b/onnxruntime/test/testdata/sparse_to_dense_matmul.py
index ceabae9c2d..57a15ba723 100644
--- a/onnxruntime/test/testdata/sparse_to_dense_matmul.py
+++ b/onnxruntime/test/testdata/sparse_to_dense_matmul.py
@@ -6,13 +6,17 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Text, Tuple, T
 
 import numpy as np  # noqa: F401
 import onnx
-from onnx import AttributeProto  # noqa: F401
-from onnx import GraphProto  # noqa: F401
-from onnx import SparseTensorProto  # noqa: F401
-from onnx import mapping  # noqa: F401
-from onnx import numpy_helper  # noqa: F401
-from onnx import utils  # noqa: F401
-from onnx import TensorProto, ValueInfoProto, helper
+from onnx import (
+    AttributeProto,  # noqa: F401
+    GraphProto,  # noqa: F401
+    SparseTensorProto,  # noqa: F401
+    TensorProto,
+    ValueInfoProto,
+    helper,
+    mapping,  # noqa: F401
+    numpy_helper,  # noqa: F401
+    utils,  # noqa: F401
+)
 from onnx.helper import make_opsetid
 
 
diff --git a/onnxruntime/test/testdata/test_data_generation/adamw_test/adamw_test_data_generator.py b/onnxruntime/test/testdata/test_data_generation/adamw_test/adamw_test_data_generator.py
index 443444044b..430a9a345e 100644
--- a/onnxruntime/test/testdata/test_data_generation/adamw_test/adamw_test_data_generator.py
+++ b/onnxruntime/test/testdata/test_data_generation/adamw_test/adamw_test_data_generator.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 
 """This file is used to generate test data for Adam optimizer tests in
-   orttraining/orttraining/test/training_ops/cuda/optimizer/adamw_test.cc."""
+orttraining/orttraining/test/training_ops/cuda/optimizer/adamw_test.cc."""
 
 import torch
 
diff --git a/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py b/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py
index c67faaca5c..e4ecae4b18 100644
--- a/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py
+++ b/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 
 """This file is used to generate test data for LR scheduler optimizer tests in
-   orttraining/orttraining/test/training_api/core/training_api_tests.cc."""
+orttraining/orttraining/test/training_api/core/training_api_tests.cc."""
 
 import torch
 from torch.optim.lr_scheduler import LambdaLR
@@ -33,7 +33,7 @@ class WarmupLinearSchedule(LambdaLR):
         super().__init__(optimizer, self.lr_lambda, last_epoch=last_epoch)
 
     def lr_lambda(self, step):
-        print(f"warmup_step_count_: {self.warmup_steps }, step: {step}, total_step_count_: {self.t_total}")
+        print(f"warmup_step_count_: {self.warmup_steps}, step: {step}, total_step_count_: {self.t_total}")
         if step < self.warmup_steps:
             return float(step) / float(max(1, self.warmup_steps))
         return max(0.0, float(self.t_total - step) / float(max(1.0, self.t_total - self.warmup_steps)))
diff --git a/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py b/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py
index 173225a21a..e601385dc8 100644
--- a/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py
+++ b/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 
 """This file is used to generate test data for SGD optimizer tests in
-   orttraining/orttraining/test/training_ops/cuda/optimizer/sgd_test.cc."""
+orttraining/orttraining/test/training_ops/cuda/optimizer/sgd_test.cc."""
 
 import torch
 
diff --git a/onnxruntime/test/testdata/training_api/ort_format/prepare_artifacts.py b/onnxruntime/test/testdata/training_api/ort_format/prepare_artifacts.py
index 70e8c4ac01..b2ad2463aa 100644
--- a/onnxruntime/test/testdata/training_api/ort_format/prepare_artifacts.py
+++ b/onnxruntime/test/testdata/training_api/ort_format/prepare_artifacts.py
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 
 """This file is used to generate test data for ort format model tests in
-   orttraining/orttraining/test/training_api/core/training_capi_tests.cc."""
+orttraining/orttraining/test/training_api/core/training_capi_tests.cc."""
 
 import onnx
 import torch
diff --git a/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py b/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py
index e7fd4ac70f..1dd4ae0aee 100644
--- a/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py
+++ b/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py
@@ -24,6 +24,7 @@ Models created with this script:
 - fusion/constant_folding_qdq_node_unit.graph_output.qdq_contrib.onnx
 - fusion/constant_folding_qdq_node_unit.graph_output.qdq16_contrib.onnx
 """
+
 from __future__ import annotations
 
 import argparse
diff --git a/onnxruntime/test/testdata/transform/recompute/recompute_test_graph_generator.py b/onnxruntime/test/testdata/transform/recompute/recompute_test_graph_generator.py
index 2c734feda7..b7552d9a26 100644
--- a/onnxruntime/test/testdata/transform/recompute/recompute_test_graph_generator.py
+++ b/onnxruntime/test/testdata/transform/recompute/recompute_test_graph_generator.py
@@ -2,11 +2,11 @@
 # Licensed under the MIT License.
 
 """This file is used to generate test data for MemoryOptimizer tests in
-   onnxruntime/test/optimizer/memory_optimizer_test.cc.
+onnxruntime/test/optimizer/memory_optimizer_test.cc.
 
-   Be noticed, after run this script, manually rename recompute_XXXX_execution_model_training.onnx to
-   recompute_XXXX.onnx
-   """
+Be noticed, after run this script, manually rename recompute_XXXX_execution_model_training.onnx to
+recompute_XXXX.onnx
+"""
 
 import torch
 
diff --git a/orttraining/orttraining/python/training/optim/__init__.py b/orttraining/orttraining/python/training/optim/__init__.py
index 3cace4d30c..2ce3a32b0b 100644
--- a/orttraining/orttraining/python/training/optim/__init__.py
+++ b/orttraining/orttraining/python/training/optim/__init__.py
@@ -1,8 +1,10 @@
 from .config import AdamConfig, LambConfig, SGDConfig, _OptimizerConfig  # noqa: F401
 from .fp16_optimizer import FP16_Optimizer  # noqa: F401
 from .fused_adam import AdamWMode, FusedAdam  # noqa: F401
-from .lr_scheduler import ConstantWarmupLRScheduler  # noqa: F401
-from .lr_scheduler import CosineWarmupLRScheduler  # noqa: F401
-from .lr_scheduler import LinearWarmupLRScheduler  # noqa: F401
-from .lr_scheduler import PolyWarmupLRScheduler  # noqa: F401
-from .lr_scheduler import _LRScheduler  # noqa: F401
+from .lr_scheduler import (
+    ConstantWarmupLRScheduler,  # noqa: F401
+    CosineWarmupLRScheduler,  # noqa: F401
+    LinearWarmupLRScheduler,  # noqa: F401
+    PolyWarmupLRScheduler,  # noqa: F401
+    _LRScheduler,  # noqa: F401
+)
diff --git a/orttraining/orttraining/python/training/optim/config.py b/orttraining/orttraining/python/training/optim/config.py
index d63c7ab40a..d509c8b06f 100644
--- a/orttraining/orttraining/python/training/optim/config.py
+++ b/orttraining/orttraining/python/training/optim/config.py
@@ -57,9 +57,9 @@ class _OptimizerConfig:
             )
             for k in group:
                 if k != "params":
-                    assert (
-                        k in defaults or k.replace("_coef", "") in defaults
-                    ), f"'params' has {k} hyper parameter not present at 'defaults'"
+                    assert k in defaults or k.replace("_coef", "") in defaults, (
+                        f"'params' has {k} hyper parameter not present at 'defaults'"
+                    )
 
         self.name = name
         self.lr = float(defaults["lr"])
diff --git a/orttraining/orttraining/python/training/optim/lr_scheduler.py b/orttraining/orttraining/python/training/optim/lr_scheduler.py
index 2a9bf438fa..bef6abb4a2 100644
--- a/orttraining/orttraining/python/training/optim/lr_scheduler.py
+++ b/orttraining/orttraining/python/training/optim/lr_scheduler.py
@@ -273,9 +273,9 @@ class PolyWarmupLRScheduler(_LRScheduler):
         self._num_warmup_steps = warmup * total_steps
 
     def _warmup_poly(self, train_step_info):
-        assert (
-            train_step_info.optimizer_config.lr > self.lr_end
-        ), f"lr_end ({lr_end}) must be be smaller than initial lr ({train_step_info.optimizer_config.lr})"  # noqa: F821
+        assert train_step_info.optimizer_config.lr > self.lr_end, (
+            f"lr_end ({self.lr_end}) must be be smaller than initial lr ({train_step_info.optimizer_config.lr})"
+        )
 
         if train_step_info.optimization_step < self._num_warmup_steps:
             return float(train_step_info.optimization_step) / float(max(1, self._num_warmup_steps))
diff --git a/orttraining/orttraining/python/training/ort_triton/__init__.py b/orttraining/orttraining/python/training/ort_triton/__init__.py
index 5f2d0c62ff..f87f8d73e7 100644
--- a/orttraining/orttraining/python/training/ort_triton/__init__.py
+++ b/orttraining/orttraining/python/training/ort_triton/__init__.py
@@ -9,8 +9,12 @@ from functools import wraps
 from onnxruntime.capi import _pybind_state as _C
 
 from .kernel import *  # noqa: F403
-from .triton_op_executor import register_triton_kernel  # noqa: F401
-from .triton_op_executor import call_triton_by_name, call_triton_by_onnx, get_config
+from .triton_op_executor import (
+    call_triton_by_name,
+    call_triton_by_onnx,
+    get_config,
+    register_triton_kernel,  # noqa: F401
+)
 
 
 def run_once_register_triton_op_executor(f):
diff --git a/orttraining/orttraining/python/training/ort_triton/_codegen.py b/orttraining/orttraining/python/training/ort_triton/_codegen.py
index 9a447d8019..c6759630b2 100644
--- a/orttraining/orttraining/python/training/ort_triton/_codegen.py
+++ b/orttraining/orttraining/python/training/ort_triton/_codegen.py
@@ -105,9 +105,9 @@ class TritonCodegen(NodeVisitor):
         name = node.tensor_arg.name
         var_name = context.get_variable_name(name)
         internal_var_name = context.get_internal_variable_name(name)
-        assert (
-            var_name != internal_var_name
-        ), f"variable name {var_name} and its internal variable name should not be the same."
+        assert var_name != internal_var_name, (
+            f"variable name {var_name} and its internal variable name should not be the same."
+        )
 
         offset_str, mask_str = self._get_offset_mask(node.offset_calc, node.tensor_arg.name)
         if offset_str:
@@ -359,8 +359,7 @@ class TritonCodegen(NodeVisitor):
         for reduce_node in node.reduce_nodes:
             tmp_var_name = "tmp_" + context.get_internal_variable_name(reduce_node.outputs[0].name)
             code_buffer += (
-                f"{space_indent}{tmp_var_name} = "
-                f"tl.zeros([XBLOCK, RBLOCK], tl.float32) + {reduce_node.default_value}\n"
+                f"{space_indent}{tmp_var_name} = tl.zeros([XBLOCK, RBLOCK], tl.float32) + {reduce_node.default_value}\n"
             )
         code_buffer += (
             f"{space_indent}for roffset in range(0, rnumel, RBLOCK):\n{space_indent}    rindex = rbase + roffset\n"
@@ -440,9 +439,7 @@ class TritonCodegen(NodeVisitor):
     def ModuleNode(self, node: ModuleNode, context: CodegenContext, code_buffer: CodeBuffer, indent: int):  # noqa: N802
         space_indent = " " * indent
         code_buffer += (
-            f"{space_indent}import triton\n"
-            f"{space_indent}import triton.language as tl\n"
-            f"{space_indent}import torch\n"
+            f"{space_indent}import triton\n{space_indent}import triton.language as tl\n{space_indent}import torch\n"
         )
 
         for kernel_node in node.kernels:
diff --git a/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py b/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py
index f7b7c1ff08..3850d988ef 100644
--- a/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py
+++ b/orttraining/orttraining/python/training/ort_triton/kernel/_flash_attn.py
@@ -793,7 +793,7 @@ def flash_attn_forward(q, k, v, bias=None, **kwargs):
         elif bias.shape[2:] == (seqlen_q, seqlen_k):
             bias_type = "matrix"
         else:
-            raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k)" " or (seqlen_q, seqlen_k)")
+            raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k) or (seqlen_q, seqlen_k)")
         bias = bias.expand(batch, nheads, seqlen_q, seqlen_k)
     bias_strides = (bias.stride(0), bias.stride(1), bias.stride(2)) if has_bias else (0, 0, 0)
 
@@ -903,7 +903,7 @@ def flash_attn_backward(do, q, k, v, o, lse, bias=None, **kwargs):
         elif bias.shape[2:] == (seqlen_q, seqlen_k):
             bias_type = "matrix"
         else:
-            raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k)" " or (seqlen_q, seqlen_k)")
+            raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k) or (seqlen_q, seqlen_k)")
         bias = bias.expand(batch, nheads, seqlen_q, seqlen_k)
     bias_strides = (bias.stride(0), bias.stride(1), bias.stride(2)) if has_bias else (0, 0, 0)
 
diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py
index 1efc3a23ee..3e679c994f 100644
--- a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py
+++ b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py
@@ -191,7 +191,6 @@ def _export_pt_1_10(g, n, *args, **kwargs):
 def _default_export(
     g, func_full_qual_name, func_class, cconv, output_size, output_tensor_types, output_tensor_ranks, *args, **kwargs
 ):
-
     input_tensor_types = []
     input_tensor_ranks = []
 
diff --git a/orttraining/orttraining/python/training/ortmodule/_fallback.py b/orttraining/orttraining/python/training/ortmodule/_fallback.py
index 56bb45d064..6a3793cf0f 100644
--- a/orttraining/orttraining/python/training/ortmodule/_fallback.py
+++ b/orttraining/orttraining/python/training/ortmodule/_fallback.py
@@ -11,7 +11,6 @@ from typing import Optional
 import torch
 
 from . import _logger, _utils
-from ._fallback_exceptions import wrap_exception  # noqa: F401
 from ._fallback_exceptions import (
     ORTModuleDeviceException,
     ORTModuleFallbackException,
@@ -19,6 +18,7 @@ from ._fallback_exceptions import (
     ORTModuleIOError,
     ORTModuleONNXModelException,
     ORTModuleTorchModelException,
+    wrap_exception,  # noqa: F401
 )
 
 
diff --git a/orttraining/orttraining/python/training/ortmodule/_graph_transition_manager.py b/orttraining/orttraining/python/training/ortmodule/_graph_transition_manager.py
index d9cae8e1f9..bbf271e4e9 100755
--- a/orttraining/orttraining/python/training/ortmodule/_graph_transition_manager.py
+++ b/orttraining/orttraining/python/training/ortmodule/_graph_transition_manager.py
@@ -580,9 +580,9 @@ class GraphTransitionManager:
             parameter_names = {k: v for k, v in flatten_module.named_parameters()}
             for input_name in exported_model_info.onnx_graph_input_names:
                 if input_name in exported_model_info.onnx_graph_input_names_user_defined:
-                    assert (
-                        input_name in model_info_for_export.onnx_graph_input_data_accessor_user_defined
-                    ), f"{input_name} model_info_for_export.onnx_graph_input_data_accessor_user_defined"
+                    assert input_name in model_info_for_export.onnx_graph_input_data_accessor_user_defined, (
+                        f"{input_name} model_info_for_export.onnx_graph_input_data_accessor_user_defined"
+                    )
                     # We assume the data accessor should be the same as the one used for the previous export, because
                     # there is args and kwargs schema check during export check phase.
                     if model_info_for_export.onnx_graph_input_data_accessor_user_defined[input_name](
@@ -736,7 +736,6 @@ class GraphTransitionManager:
         runtime_inspector: RuntimeInspector,
         logger: logging.Logger,
     ) -> tuple[onnx.ModelProto, ORTModelInputOutputSchemaType, list[str], list[str]]:
-
         # Add hooks to check the sparsity of the embedding and label inputs during the export.
         embedding_hook_handles = GraphTransitionManager._add_check_embedding_sparsity_hook(
             enable_embedding_sparse_optimizer, device, logger, runtime_inspector, flattened_module
diff --git a/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py b/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py
index 86fa4c9c9a..c739283e5c 100644
--- a/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py
+++ b/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py
@@ -201,7 +201,6 @@ class MemoryObserver:
             _MemoryOptimizationLevel.TRANSFORMER_LAYERWISE_RECOMPUTE,
             _MemoryOptimizationLevel.TRANSFORMER_LAYERWISE_RECOMPUTE_WITH_COMPROMISE,
         ]:
-
             apply_config = []
 
             for cluster_id in self.cluster_id_combination_to_saving_symbolics_map:
diff --git a/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py b/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py
index 11d978e71d..7da3e18007 100644
--- a/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py
+++ b/orttraining/orttraining/python/training/ortmodule/_zero_stage3_compatibility.py
@@ -102,9 +102,9 @@ def post_processing_enable_zero_stage3_compat(
 
             func_name = _get_func_name(c)
             if func_name == pre_forward_function_name:
-                assert (
-                    pre_forward_pythonop_node is None
-                ), "Multiple ORTZeROOffloadPreForwardFunction nodes found, it should not happen"
+                assert pre_forward_pythonop_node is None, (
+                    "Multiple ORTZeROOffloadPreForwardFunction nodes found, it should not happen"
+                )
                 pre_forward_pythonop_node = c
 
         if pre_forward_pythonop_node is None:
@@ -210,7 +210,7 @@ def post_processing_enable_zero_stage3_compat(
 
 
 def _create_weight_retrieval_function(
-    zero_stage3_named_params: Optional[Dict[str, torch.nn.parameter.Parameter]]
+    zero_stage3_named_params: Optional[Dict[str, torch.nn.parameter.Parameter]],
 ) -> str:
     """This function is used to create a weight retrieving function using zero_stage3_named_params."""
 
diff --git a/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py b/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py
index 76c8ce3bf3..7cda029524 100644
--- a/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py
+++ b/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py
@@ -59,9 +59,9 @@ def _load_use_external_gpu_allocator(ortmodule_config_accessor, data):
     assert hasattr(data, _load_use_external_gpu_allocator.loading_key)
     log.info(f"Found keyword {_load_use_external_gpu_allocator.loading_key} in json. Loading attributes from file.")
 
-    assert isinstance(
-        data.UseExternalGPUAllocator, bool
-    ), f"{_load_use_external_gpu_allocator.loading_key} must be a boolean"
+    assert isinstance(data.UseExternalGPUAllocator, bool), (
+        f"{_load_use_external_gpu_allocator.loading_key} must be a boolean"
+    )
     ortmodule_config_accessor._runtime_options.use_external_gpu_allocator = data.UseExternalGPUAllocator
 
 
@@ -73,9 +73,9 @@ def _load_enable_custom_autograd_function(ortmodule_config_accessor, data):
         f"Found keyword {_load_enable_custom_autograd_function.loading_key} in json. Loading attributes from file."
     )
 
-    assert isinstance(
-        data.EnableCustomAutogradFunction, bool
-    ), f"{_load_enable_custom_autograd_function.loading_key} must be a boolean"
+    assert isinstance(data.EnableCustomAutogradFunction, bool), (
+        f"{_load_enable_custom_autograd_function.loading_key} must be a boolean"
+    )
 
     from onnxruntime.training.ortmodule._custom_autograd_function import enable_custom_autograd_support
 
@@ -89,9 +89,9 @@ def _load_enable_grad_acc_optimization(ortmodule_config_accessor, data):
     assert hasattr(data, _load_enable_grad_acc_optimization.loading_key)
     log.info(f"Found keyword {_load_enable_grad_acc_optimization.loading_key} in json. Loading attributes from file.")
 
-    assert isinstance(
-        data.EnableGradAccOptimization, bool
-    ), f"{_load_enable_grad_acc_optimization.loading_key} must be a boolean"
+    assert isinstance(data.EnableGradAccOptimization, bool), (
+        f"{_load_enable_grad_acc_optimization.loading_key} must be a boolean"
+    )
     ortmodule_config_accessor._runtime_options.enable_grad_acc_optimization = data.EnableGradAccOptimization
 
 
@@ -101,9 +101,9 @@ def _load_run_symbolic_shape_infer(ortmodule_config_accessor, data):
     assert hasattr(data, _load_run_symbolic_shape_infer.loading_key)
     log.info(f"Found keyword {_load_run_symbolic_shape_infer.loading_key} in json. Loading attributes from file.")
 
-    assert isinstance(
-        data.RunSymbolicShapeInference, bool
-    ), f"{_load_run_symbolic_shape_infer.loading_key} must be a boolean"
+    assert isinstance(data.RunSymbolicShapeInference, bool), (
+        f"{_load_run_symbolic_shape_infer.loading_key} must be a boolean"
+    )
     ortmodule_config_accessor._runtime_options.run_symbolic_shape_infer = data.RunSymbolicShapeInference
 
 
@@ -175,9 +175,9 @@ def _load_use_memory_efficient_gradient(ortmodule_config_accessor, data):
     assert hasattr(data, _load_use_memory_efficient_gradient.loading_key)
     log.info(f"Found keyword {_load_use_memory_efficient_gradient.loading_key} in json. Loading attributes from file.")
 
-    assert isinstance(
-        data.UseMemoryEfficientGradient, bool
-    ), f"{_load_use_memory_efficient_gradient.loading_key} must be a boolean"
+    assert isinstance(data.UseMemoryEfficientGradient, bool), (
+        f"{_load_use_memory_efficient_gradient.loading_key} must be a boolean"
+    )
     ortmodule_config_accessor._runtime_options.use_memory_efficient_gradient = data.UseMemoryEfficientGradient
 
 
diff --git a/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py b/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py
index a8e730488d..d7ea3dc419 100644
--- a/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py
+++ b/orttraining/orttraining/python/training/utils/hooks/_statistics_subscriber.py
@@ -278,11 +278,11 @@ def _summarize_tensor(
         std_value = torch.sqrt(s.sum() / (element_count - 1))
 
     f.write(
-        f"{'>'*max(0, depth) + display_name} shape: {tensor_shape} dtype: {tensor_dtype} size: {flatten_array.size()} \n"
+        f"{'>' * max(0, depth) + display_name} shape: {tensor_shape} dtype: {tensor_dtype} size: {flatten_array.size()} \n"
         f"min: {min_value} max: {max_value}, mean: {mean_value}, "
         f"std: {std_value} \n"
         f"nan: {num_nan}, inf: {num_inf}\n"
     )
     f.write(f"samples(top 128): {flatten_array[:128]}\n")
     f.write(f"neg: {num_neg}, pos: {num_pos}, zero: {num_zero},\n")
-    f.write(f"{'='*16}\n")
+    f.write(f"{'=' * 16}\n")
diff --git a/orttraining/orttraining/python/training/utils/torch_io_helper.py b/orttraining/orttraining/python/training/utils/torch_io_helper.py
index e98fe48fc4..a6aa390a3e 100644
--- a/orttraining/orttraining/python/training/utils/torch_io_helper.py
+++ b/orttraining/orttraining/python/training/utils/torch_io_helper.py
@@ -291,9 +291,9 @@ def unflatten_data_using_schema(
         elif PrimitiveType.is_primitive_type(data_schema):
             return data_schema
         elif isinstance(data_schema, _TensorStub):
-            assert isinstance(
-                data[data_schema.tensor_idx], torch.Tensor
-            ), f"Expecting torch.Tensor, got {type(data[data_schema.tensor_idx])}"
+            assert isinstance(data[data_schema.tensor_idx], torch.Tensor), (
+                f"Expecting torch.Tensor, got {type(data[data_schema.tensor_idx])}"
+            )
             return data[data_schema.tensor_idx]
         elif isinstance(data_schema, abc.Sequence):
             sequence_type = type(data_schema)
diff --git a/orttraining/orttraining/test/python/_test_helpers.py b/orttraining/orttraining/test/python/_test_helpers.py
index 65043c10d8..3d75b3f988 100644
--- a/orttraining/orttraining/test/python/_test_helpers.py
+++ b/orttraining/orttraining/test/python/_test_helpers.py
@@ -84,7 +84,12 @@ def _get_name(name):
 # Depending on calling backward() from which outputs, it's possible that grad of some weights are not calculated.
 # none_pt_params is to tell what these weights are, so we will not compare the tensors.
 def assert_gradients_match_and_reset_gradient(
-    ort_model, pt_model, none_pt_params=[], reset_gradient=True, rtol=1e-04, atol=1e-05  # noqa: B006
+    ort_model,
+    pt_model,
+    none_pt_params=(),
+    reset_gradient=True,
+    rtol=1e-04,
+    atol=1e-05,
 ):
     ort_named_params = list(ort_model.named_parameters())
     pt_named_params = list(pt_model.named_parameters())
diff --git a/orttraining/orttraining/test/python/orttraining_test_dort.py b/orttraining/orttraining/test/python/orttraining_test_dort.py
index e57b615de0..bd36ebf545 100644
--- a/orttraining/orttraining/test/python/orttraining_test_dort.py
+++ b/orttraining/orttraining/test/python/orttraining_test_dort.py
@@ -165,9 +165,9 @@ class TestTorchDynamoOrt(unittest.TestCase):
                 for tensor, baseline_tensor in zip(tensors, baseline_tensors):
                     torch.testing.assert_close(tensor, baseline_tensor)
 
-            assert (
-                len(cached.keys()) == 2
-            ), "Should only see two GraphModules so far. One for forward and the other one for backward."
+            assert len(cached.keys()) == 2, (
+                "Should only see two GraphModules so far. One for forward and the other one for backward."
+            )
             for value in cached.values():
                 assert len(value) == 1, (
                     "One GraphModule should only be mapped to one ONNX model since "
diff --git a/orttraining/orttraining/test/python/orttraining_test_gru.py b/orttraining/orttraining/test/python/orttraining_test_gru.py
index c9e22bf738..fcb7e13b16 100644
--- a/orttraining/orttraining/test/python/orttraining_test_gru.py
+++ b/orttraining/orttraining/test/python/orttraining_test_gru.py
@@ -355,7 +355,9 @@ class GRU:
                 prev_h = (
                     all_hidden_states[t - 1, 0, idx, :]
                     if t > 0
-                    else initial_hidden_state[0, idx, :] if initial_hidden_state is not None else 0
+                    else initial_hidden_state[0, idx, :]
+                    if initial_hidden_state is not None
+                    else 0
                 )
 
                 grad_update_gate = (prev_h - hidden_gate) * grad_h
diff --git a/orttraining/orttraining/test/python/orttraining_test_lstm.py b/orttraining/orttraining/test/python/orttraining_test_lstm.py
index 4debe73951..1d75f12801 100644
--- a/orttraining/orttraining/test/python/orttraining_test_lstm.py
+++ b/orttraining/orttraining/test/python/orttraining_test_lstm.py
@@ -480,7 +480,9 @@ class LSTM:
                 grad_forget_gate = grad_c * (
                     all_cell_states[t - 1, 0, idx, :]
                     if t > 0
-                    else initial_cell_state[0, idx, :] if initial_cell_state is not None else 0
+                    else initial_cell_state[0, idx, :]
+                    if initial_cell_state is not None
+                    else 0
                 )
                 grad_control_gate = grad_c * input_gate
 
@@ -520,7 +522,9 @@ class LSTM:
                 prev_h = (
                     all_hidden_states[t - 1, 0, idx, :]
                     if t > 0
-                    else initial_hidden_state[0, idx, :] if initial_hidden_state is not None else 0
+                    else initial_hidden_state[0, idx, :]
+                    if initial_hidden_state is not None
+                    else 0
                 )
                 grad_recurrence_weights[0, : self._hidden_size, :] += np.dot(
                     np.expand_dims(grad_input_activation, axis=0).T, np.expand_dims(prev_h, axis=0)
@@ -549,17 +553,22 @@ class LSTM:
                     grad_peephole_weights[0, : self._hidden_size] += grad_input_activation * (
                         all_cell_states[t - 1, 0, idx, :]
                         if t > 0
-                        else initial_cell_state[0, idx, :] if initial_cell_state is not None else 0
+                        else initial_cell_state[0, idx, :]
+                        if initial_cell_state is not None
+                        else 0
                     )
                     grad_peephole_weights[0, self._hidden_size : 2 * self._hidden_size] += (
                         grad_output_activation * all_cell_states[t, 0, idx, :]
                     )
-                    grad_peephole_weights[
-                        0, 2 * self._hidden_size : 3 * self._hidden_size
-                    ] += grad_forget_activation * (
-                        all_cell_states[t - 1, 0, idx, :]
-                        if t > 0
-                        else initial_cell_state[0, idx, :] if initial_cell_state is not None else 0
+                    grad_peephole_weights[0, 2 * self._hidden_size : 3 * self._hidden_size] += (
+                        grad_forget_activation
+                        * (
+                            all_cell_states[t - 1, 0, idx, :]
+                            if t > 0
+                            else initial_cell_state[0, idx, :]
+                            if initial_cell_state is not None
+                            else 0
+                        )
                     )
 
                 grad_c = grad_prev_c
diff --git a/orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py b/orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py
index 0866d4a411..275d53daec 100644
--- a/orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ort_apis_onnxblock.py
@@ -1102,7 +1102,6 @@ def test_custom_optimizer_block():
 
 
 def test_generate_artifacts_path():
-
     with tempfile.TemporaryDirectory() as temp_dir:
         _, simple_net = _get_models("cpu", 32, 28, 10, 10)
 
diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py
index 0ab441ac93..912af9bc88 100644
--- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py
@@ -6562,7 +6562,8 @@ def test_bert_memory_inspection(caplog):
     os.environ["ORTMODULE_PRINT_MEMORY_STATS"] = "1"
     pt_model.eval()  # Put it in evaluate mode by intention, in case some initialization in ORTModule use the module.is_training for its checks by mistake.
     ort_model = ORTModule(
-        copy.deepcopy(pt_model), DebugOptions(log_level=LogLevel.INFO)  # The logged memory info is in INFO level.
+        copy.deepcopy(pt_model),
+        DebugOptions(log_level=LogLevel.INFO),  # The logged memory info is in INFO level.
     )
 
     def run_step(model, x, y, z):
@@ -6776,11 +6777,9 @@ def test_enable_layerwise_recompute(memory_optimization_level, allow_gradient_ch
 
 
 def test_layerwise_recompute_pythonop_deterministic():
-
     original_val = os.environ.get("ORTMODULE_MEMORY_OPT_LEVEL", None)
 
     class DropoutFunction(torch.autograd.Function):
-
         @staticmethod
         def forward(ctx, x):
             return torch.nn.functional.dropout(x, p=0.5, training=True)
diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py
index 95012aa050..5764a6a81e 100644
--- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py
@@ -1414,13 +1414,9 @@ def test_pythonop_training_mode():
     def check_pythonop_training_mode(model, is_eval_mode):
         ## make sure the ort's PythonOp's training_mode is correct
         if is_eval_mode:
-            onnx_nodes = (
-                model._torch_module._execution_manager._inference_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
-            )
+            onnx_nodes = model._torch_module._execution_manager._inference_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
         else:
-            onnx_nodes = (
-                model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
-            )
+            onnx_nodes = model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
 
         found_pythonop = False
         for node in onnx_nodes:
@@ -1642,14 +1638,14 @@ def test_customized_shape_inference():
         _find_shape_and_dtype(graph.value_info)
 
         assert all(s is not None for s in input_shapes), "PythonOp input shape should be found in the optimized_model"
-        assert (
-            all(d is not None for d in input_dtypes) is not None
-        ), "PythonOp input dtype should be found in the optimized_model"
+        assert all(d is not None for d in input_dtypes) is not None, (
+            "PythonOp input dtype should be found in the optimized_model"
+        )
 
         assert all(s is not None for s in output_shapes), "PythonOp output shape should be found in the optimized_model"
-        assert (
-            all(d is not None for d in output_dtypes) is not None
-        ), "PythonOp output dtype should be found in the optimized_model"
+        assert all(d is not None for d in output_dtypes) is not None, (
+            "PythonOp output dtype should be found in the optimized_model"
+        )
 
         def _compare_shape(shape1, shape2):
             if len(shape1.dim) != len(shape2.dim):
@@ -1805,7 +1801,6 @@ def test_python_op_return_persistent_param_as_value():
 
 
 def test_determistic_pythonop_export():
-
     class TestFunction(torch.autograd.Function):
         @staticmethod
         # bias is an optional argument
@@ -1839,9 +1834,7 @@ def test_determistic_pythonop_export():
     ortmodule = ORTModule(TestModel(output_size)).train()
     _ = ortmodule(torch.randn(output_size, dtype=torch.float))
 
-    onnx_nodes = (
-        ortmodule._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
-    )
+    onnx_nodes = ortmodule._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
 
     found_pythonop = False
     for node in onnx_nodes:
diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py
index 877dcd2baa..0d5825fb31 100644
--- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py
@@ -12,10 +12,10 @@ import torch
 import wget
 from sklearn.model_selection import train_test_split
 from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
-from transformers import BertConfig  # noqa: F401
 from transformers import (
     AdamW,
     AutoConfig,
+    BertConfig,  # noqa: F401
     BertForSequenceClassification,
     BertTokenizer,
     get_linear_schedule_with_warmup,
@@ -429,7 +429,9 @@ def main():
 
     # Create the learning rate scheduler.
     scheduler = get_linear_schedule_with_warmup(
-        optimizer, num_warmup_steps=0, num_training_steps=total_steps  # Default value in run_glue.py
+        optimizer,
+        num_warmup_steps=0,
+        num_training_steps=total_steps,  # Default value in run_glue.py
     )
     # Seed
     random.seed(args.seed)
diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py
index 4930f73edf..50f411c02a 100644
--- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py
@@ -12,9 +12,14 @@ import torch
 import wget
 from sklearn.model_selection import train_test_split
 from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
-from transformers import AdamW  # noqa: F401
-from transformers import BertConfig  # noqa: F401
-from transformers import AutoConfig, BertForSequenceClassification, BertTokenizer, get_linear_schedule_with_warmup
+from transformers import (
+    AdamW,  # noqa: F401
+    AutoConfig,
+    BertConfig,  # noqa: F401
+    BertForSequenceClassification,
+    BertTokenizer,
+    get_linear_schedule_with_warmup,
+)
 
 import onnxruntime
 from onnxruntime.training.ortmodule import DebugOptions, ORTModule
@@ -432,7 +437,9 @@ def main():
 
     # Create the learning rate scheduler.
     scheduler = get_linear_schedule_with_warmup(
-        optimizer, num_warmup_steps=0, num_training_steps=total_steps  # Default value in run_glue.py
+        optimizer,
+        num_warmup_steps=0,
+        num_training_steps=total_steps,  # Default value in run_glue.py
     )
     scaler = torch.cuda.amp.GradScaler()
 
diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py
index 46b172a396..174edf3775 100755
--- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py
@@ -108,7 +108,10 @@ ds = SampleData(x, y)
 
 print("Initialize deepspeed")
 model_engine, optimizer, _, _ = deepspeed.initialize(
-    args=args, model=model, model_parameters=params, training_data=ds  # (x,y)#
+    args=args,
+    model=model,
+    model_parameters=params,
+    training_data=ds,  # (x,y)#
 )
 
 for step in range(args.steps):
diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py
index 35e5bae3ea..07d581b576 100644
--- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py
+++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_onnx_ops.py
@@ -69,9 +69,7 @@ class TestOnnxOpsOrtModule(unittest.TestCase):
             self.assert_values_are_close(ort_prediction, pt_prediction, **kwargs)
             self.assert_gradients_match_and_reset_gradient(ort_model, pt_model, **kwargs)
 
-        onnx_graph_inf = (
-            ort_model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model
-        )
+        onnx_graph_inf = ort_model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model
         onnx_graph_train = ort_model._torch_module._execution_manager._training_manager._onnx_models.optimized_model
         if debug:
             with open(f"debug_{name}_ortmodule_infer.onnx", "wb") as f:
diff --git a/orttraining/orttraining/test/python/qat_poc_example/train.py b/orttraining/orttraining/test/python/qat_poc_example/train.py
index a25c071c58..45c0aa77ae 100644
--- a/orttraining/orttraining/test/python/qat_poc_example/train.py
+++ b/orttraining/orttraining/test/python/qat_poc_example/train.py
@@ -68,8 +68,8 @@ def train_model(qat_train_model, qat_eval_model, qat_optimizer_model, qat_checkp
     # Training loop
     epochs = 5
     for epoch in range(epochs):
-        logging.info(f"Starting epoch: {epoch+1}")
+        logging.info(f"Starting epoch: {epoch + 1}")
         training_loss = _train_epoch(model, optimizer, train_loader)
         eval_loss = _eval(model, test_loader)
 
-        logging.info(f"End of epoch: {epoch+1}, training loss: {training_loss:.4f}, eval loss: {eval_loss:.4f}")
+        logging.info(f"End of epoch: {epoch + 1}, training loss: {training_loss:.4f}, eval loss: {eval_loss:.4f}")
diff --git a/orttraining/tools/ci_test/run_batch_size_test.py b/orttraining/tools/ci_test/run_batch_size_test.py
index 348d490678..a1bf3fd71c 100755
--- a/orttraining/tools/ci_test/run_batch_size_test.py
+++ b/orttraining/tools/ci_test/run_batch_size_test.py
@@ -106,7 +106,7 @@ def main():
         ]
 
         if config.enable_mixed_precision:
-            cmds.append("--use_mixed_precision"),
+            (cmds.append("--use_mixed_precision"),)
 
         subprocess.run(cmds, timeout=120).check_returncode()  # noqa: PLW1510
 
diff --git a/orttraining/tools/ci_test/run_bert_perf_test.py b/orttraining/tools/ci_test/run_bert_perf_test.py
index 13d5e9f140..c848621c88 100644
--- a/orttraining/tools/ci_test/run_bert_perf_test.py
+++ b/orttraining/tools/ci_test/run_bert_perf_test.py
@@ -94,8 +94,8 @@ def main():
         ]
 
         if c.use_mixed_precision:
-            cmds.append("--use_mixed_precision"),
-            cmds.append("--allreduce_in_fp16"),
+            (cmds.append("--use_mixed_precision"),)
+            (cmds.append("--allreduce_in_fp16"),)
 
         subprocess.run(cmds).check_returncode()  # noqa: PLW1510
         if c.expected_perf > 0.0:
diff --git a/orttraining/tools/ci_test/run_gpt2_perf_test.py b/orttraining/tools/ci_test/run_gpt2_perf_test.py
index 18e59d275b..1df71f02b7 100644
--- a/orttraining/tools/ci_test/run_gpt2_perf_test.py
+++ b/orttraining/tools/ci_test/run_gpt2_perf_test.py
@@ -60,7 +60,7 @@ def main():
         ]
 
         if c.use_mixed_precision:
-            cmds.append("--use_mixed_precision"),
+            (cmds.append("--use_mixed_precision"),)
 
         subprocess.run(cmds).check_returncode()  # noqa: PLW1510
 
diff --git a/orttraining/tools/scripts/nv_run_pretraining.py b/orttraining/tools/scripts/nv_run_pretraining.py
index 8c57101f72..8f399263e1 100644
--- a/orttraining/tools/scripts/nv_run_pretraining.py
+++ b/orttraining/tools/scripts/nv_run_pretraining.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """BERT finetuning runner."""
 
-
 import argparse
 
 # ==================
diff --git a/pyproject.toml b/pyproject.toml
index 40e6eb96df..60fe630b13 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,43 +1,6 @@
-[tool.black]
-line-length = 120
-# NOTE: Do not extend the exclude list. Edit .lintrunner.toml instead
-extend-exclude = "cmake|onnxruntime/core/flatbuffers/"
-# NOTE: use the minimum supported python version as target-version
-target-version = ["py310"]
-
-[tool.isort]
-# NOTE: Do not extend the exclude list. Edit .lintrunner.toml instead
-profile = "black"
-line_length = 120
-extend_skip_glob = [
-    "cmake/*",
-    "orttraining/*",
-    "onnxruntime/core/flatbuffers/*",
-]
-
 [tool.pydocstyle]
 convention = "google"
 
-[tool.pylint.BASIC]
-good-names = [
-    "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n",
-    "p", "q", "r", "s", "t", "u", "v", "w", "ex", "Run", "_", "x", "y", "z"
-]
-
-[tool.pylint.messages_control]
-disable = [
-    "format",
-    "line-too-long",
-    "import-error",
-    "no-name-in-module",
-    "no-member",
-    "too-many-arguments",
-    "too-many-locals",
-    "too-few-public-methods",
-    "missing-docstring",
-    "fixme",
-]
-
 [tool.pyright]
 exclude = ["onnxruntime/core/flatbuffers/*"]
 reportMissingImports = false
@@ -45,6 +8,7 @@ reportMissingImports = false
 [tool.ruff]
 # NOTE: Do not create an exclude list. Edit .lintrunner.toml instead
 target-version = "py38"
+line-length = 120
 
 [tool.ruff.lint]
 select = [
@@ -53,6 +17,7 @@ select = [
     "F", # Pyflakes
     "FURB", # refurb
     "G", # flake8-logging-format
+    "I", # isort
     "ISC", # flake8-implicit-str-concat
     "N", # pep8-naming
     "NPY", # numpy
@@ -92,10 +57,6 @@ ignore = [
     "SIM116", # Don't use dict lookup to replace if-else
 ]
 ignore-init-module-imports = true
-unfixable = [
-    "F401", # Unused imports
-    "SIM112", # Use upper case for env vars
-]
 
 [tool.ruff.lint.per-file-ignores]
 # NOTE: Refrain from growing the ignore list unless for exceptional cases.
diff --git a/requirements-lintrunner.txt b/requirements-lintrunner.txt
index 2257e259a4..f51a828ff5 100644
--- a/requirements-lintrunner.txt
+++ b/requirements-lintrunner.txt
@@ -4,8 +4,5 @@ lintrunner==0.12.5
 lintrunner-adapters==0.12.4
 # RUFF
 ruff==0.9.1
-# BLACK-ISORT
-black==24.10.0
-isort==5.13.2
 # CLANGFORMAT
 clang-format==19.1.6
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 9e567e1ceb..87180a242e 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -451,9 +451,7 @@ def parse_arguments():
     parser.add_argument(
         "--apple_deploy_target",
         type=str,
-        help="Specify the minimum version of the target platform "
-        "(e.g. macOS or iOS)"
-        "This is only supported on MacOS",
+        help="Specify the minimum version of the target platform (e.g. macOS or iOS)This is only supported on MacOS",
     )
     # A 32-bit progress doesn't have enough memory to run all the tests in onnxruntime_test_all.
     # Mimalloc is incompatible with address sanitizer.
@@ -1248,8 +1246,7 @@ def generate_build_tree(
             cmake_args += ["-Donnxruntime_MPI_HOME=" + mpi_home]
         else:
             log.warning(
-                "mpi_home is supplied but use_mpi is set to false."
-                " Build will continue without linking MPI libraries."
+                "mpi_home is supplied but use_mpi is set to false. Build will continue without linking MPI libraries."
             )
 
     if nccl_home and os.path.exists(nccl_home):
diff --git a/tools/ci_build/compile_triton.py b/tools/ci_build/compile_triton.py
index c1119aad49..abe95b31e8 100644
--- a/tools/ci_build/compile_triton.py
+++ b/tools/ci_build/compile_triton.py
@@ -93,9 +93,9 @@ def convert_and_save(metadata, header_file, out_dir, out_obj_file):
 
         lib_name = m["lib_file"].replace(".", "_")
         meta_ele.append(f'"_binary_{lib_name}_start"')
-        meta_ele.append(f"\"{m['func_name']}\"")
-        meta_ele.append(f"\"{m['group']}\"")
-        meta_ele.append(f"\"{m['name']}\"")
+        meta_ele.append(f'"{m["func_name"]}"')
+        meta_ele.append(f'"{m["group"]}"')
+        meta_ele.append(f'"{m["name"]}"')
         meta_ele.append(str(m["num_warps"]))
         meta_ele.append(str(m["shared"]))
 
@@ -103,9 +103,9 @@ def convert_and_save(metadata, header_file, out_dir, out_obj_file):
         constants = []
         for k, v in m["constants"].items():
             constants.append(f'{{ "{k}", {v!s}}}')
-        meta_ele.append(f"{{ { ', '.join(constants) } }}")
+        meta_ele.append(f"{{ {', '.join(constants)} }}")
 
-        c_metadata.append(f"{{ { ', '.join(meta_ele) } }}")
+        c_metadata.append(f"{{ {', '.join(meta_ele)} }}")
 
     archive_obj_files(binary_files, out_dir, out_obj_file)
 
@@ -123,7 +123,7 @@ struct _TritonKernelInfo {{
 }};
 
 const _TritonKernelInfo kernel_infos[] = {{
-  { ', '.join(c_metadata) },
+  {", ".join(c_metadata)},
 }};
     """
 
diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py
index 1b34b3d302..e9f8fea951 100644
--- a/tools/ci_build/github/android/build_aar_package.py
+++ b/tools/ci_build/github/android/build_aar_package.py
@@ -41,10 +41,7 @@ def _parse_build_settings(args):
 
     build_settings = {}
 
-    if "build_abis" in build_settings_data:
-        build_settings["build_abis"] = build_settings_data["build_abis"]
-    else:
-        build_settings["build_abis"] = DEFAULT_BUILD_ABIS
+    build_settings["build_abis"] = build_settings_data.get("build_abis", DEFAULT_BUILD_ABIS)
 
     build_params = []
     if "build_params" in build_settings_data:
diff --git a/tools/ci_build/github/apple/build_and_assemble_apple_pods.py b/tools/ci_build/github/apple/build_and_assemble_apple_pods.py
index dd037c17ae..c18cb1d070 100755
--- a/tools/ci_build/github/apple/build_and_assemble_apple_pods.py
+++ b/tools/ci_build/github/apple/build_and_assemble_apple_pods.py
@@ -11,9 +11,10 @@ import sys
 import tempfile
 
 from c.assemble_c_pod_package import assemble_c_pod_package
-from objectivec.assemble_objc_pod_package import assemble_objc_pod_package
 from package_assembly_utils import PackageVariant, get_ort_version
 
+from objectivec.assemble_objc_pod_package import assemble_objc_pod_package
+
 SCRIPT_PATH = pathlib.Path(__file__).resolve()
 SCRIPT_DIR = SCRIPT_PATH.parent
 REPO_DIR = SCRIPT_PATH.parents[4]
diff --git a/tools/ci_build/github/apple/package_release_tasks.py b/tools/ci_build/github/apple/package_release_tasks.py
index 592a326d86..c8d78400c6 100755
--- a/tools/ci_build/github/apple/package_release_tasks.py
+++ b/tools/ci_build/github/apple/package_release_tasks.py
@@ -52,8 +52,7 @@ def _resolve_single_path_from_pattern(path_pattern: str) -> Path:
 
 def _parse_args():
     parser = argparse.ArgumentParser(
-        description="Helper script to perform release tasks. "
-        "Mostly useful for the CocoaPods package release pipeline.",
+        description="Helper script to perform release tasks. Mostly useful for the CocoaPods package release pipeline.",
     )
 
     parser.add_argument(
diff --git a/tools/python/dump_ort_model.py b/tools/python/dump_ort_model.py
index b9e3bfa0d3..9d7e23bf3a 100644
--- a/tools/python/dump_ort_model.py
+++ b/tools/python/dump_ort_model.py
@@ -80,7 +80,7 @@ class OrtFormatModelDumper:
         outputs = [node.Outputs(i).decode() for i in range(node.OutputsLength())]
         print(
             f"{node.Index()}:{node.Name().decode()}({domain}:{optype}:{since_version}) "
-            f'inputs=[{",".join(inputs)}] outputs=[{",".join(outputs)}]'
+            f"inputs=[{','.join(inputs)}] outputs=[{','.join(outputs)}]"
         )
 
     def _dump_graph(self, graph: fbs.Graph):
diff --git a/tools/python/gen_contrib_doc.py b/tools/python/gen_contrib_doc.py
index ab9421b395..ce6f0a1205 100644
--- a/tools/python/gen_contrib_doc.py
+++ b/tools/python/gen_contrib_doc.py
@@ -320,9 +320,7 @@ def main(output_path: str, domain_filter: [str]):
         )
 
         # domain -> support level -> name -> [schema]
-        index = defaultdict(
-            lambda: defaultdict(lambda: defaultdict(list))
-        )  # type: Dict[Text, Dict[int, Dict[Text, List[OpSchema]]]]
+        index = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))  # type: Dict[Text, Dict[int, Dict[Text, List[OpSchema]]]]
 
         for schema in rtpy.get_all_operator_schema():
             index[schema.domain][int(schema.support_level)][schema.name].append(schema)
@@ -331,9 +329,7 @@ def main(output_path: str, domain_filter: [str]):
 
         # Preprocess the Operator Schemas
         # [(domain, [(support_level, [(schema name, current schema, all versions schemas)])])]
-        operator_schemas = (
-            list()
-        )  # type: List[Tuple[Text, List[Tuple[int, List[Tuple[Text, OpSchema, List[OpSchema]]]]]]]
+        operator_schemas = list()  # type: List[Tuple[Text, List[Tuple[int, List[Tuple[Text, OpSchema, List[OpSchema]]]]]]]
         exsting_ops = set()  # type: Set[Text]
         for domain, _supportmap in sorted(index.items()):
             if not should_render_domain(domain, domain_filter):
@@ -394,7 +390,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "--domains",
         nargs="+",
-        help="Filter to specified domains. " "e.g. `--domains com.microsoft com.microsoft.nchwc`",  # noqa: ISC001
+        help="Filter to specified domains. e.g. `--domains com.microsoft com.microsoft.nchwc`",
     )
     parser.add_argument(
         "--output_path",
diff --git a/tools/python/sparsify_initializers.py b/tools/python/sparsify_initializers.py
index f9cc8db38e..2c80b07cd0 100644
--- a/tools/python/sparsify_initializers.py
+++ b/tools/python/sparsify_initializers.py
@@ -54,9 +54,7 @@ def setup_logging(verbose):  # type: (bool)  -> None
     logger.setLevel(logging_level)
 
 
-def convert_tensor_to_sparse(
-    tensor, sparsity_threshold, tolerance
-):  # type: (TensorProto, float, float) -> Tuple[SparseTensorProto, float]
+def convert_tensor_to_sparse(tensor, sparsity_threshold, tolerance):  # type: (TensorProto, float, float) -> Tuple[SparseTensorProto, float]
     """returns a tuple of sparse_tensor and sparsity level"""
     values = []
     indices = []
@@ -140,9 +138,7 @@ def convert_tensor_to_sparse(
     return (sparse_tensor, sparsity)
 
 
-def convert_initializers(
-    model, exclude_names, sparsity_threshold, tolerance
-):  # type: (ModelProto, List[str], float, float) -> None
+def convert_initializers(model, exclude_names, sparsity_threshold, tolerance):  # type: (ModelProto, List[str], float, float) -> None
     graph = model.graph
     converted_sparse = []
     remaining_initializers = []
diff --git a/tools/python/util/mobile_helpers/usability_checker.py b/tools/python/util/mobile_helpers/usability_checker.py
index e7948c43ba..81c3c07aa9 100644
--- a/tools/python/util/mobile_helpers/usability_checker.py
+++ b/tools/python/util/mobile_helpers/usability_checker.py
@@ -151,23 +151,23 @@ class PartitioningInfo:
 
         if self.supported_groups:
             logger.info(
-                f'\tPartition sizes: [{", ".join([str(len(partition)) for partition in self.supported_groups])}]'
+                f"\tPartition sizes: [{', '.join([str(len(partition)) for partition in self.supported_groups])}]"
             )
 
             # dump full groups if debug output is enabled
             for group in self.supported_groups:
-                logger.debug(f'Nodes in group: {",".join([f"{node.op_type}:{node.name}" for node in group])}')
+                logger.debug(f"Nodes in group: {','.join([f'{node.op_type}:{node.name}' for node in group])}")
 
         logger.info(f"Unsupported nodes due to operator={self.nodes_unsupported_due_to_op}")
         if self.unsupported_ops:
-            logger.info(f'\tUnsupported ops: {",".join(sorted(self.unsupported_ops))}')
+            logger.info(f"\tUnsupported ops: {','.join(sorted(self.unsupported_ops))}")
 
         caveats = self.supported_ops_checker.get_caveats()
         if caveats:
             indent = " " * 5
             logger.info(
                 "\tCaveats that have not been checked and may result in a node not actually being supported:  "
-                f'{"".join([os.linesep + indent + caveat for caveat in caveats])}'
+                f"{''.join([os.linesep + indent + caveat for caveat in caveats])}"
             )
 
         if self.nodes_unsupported_due_to_dynamic_input:
@@ -341,7 +341,7 @@ def _check_partitioning_for_graph(
                 continue
 
             if not is_op_supported:
-                unsupported_ops.add(f'{node.domain if node.domain else "ai.onnx"}:{node.op_type}')
+                unsupported_ops.add(f"{node.domain if node.domain else 'ai.onnx'}:{node.op_type}")
                 num_unsupported_nodes_due_to_op += 1
 
             if not is_input_shape_supported:
@@ -349,7 +349,7 @@ def _check_partitioning_for_graph(
 
             if not is_rank_supported:
                 num_unsupported_nodes_due_to_rank += 1
-                ops_with_unsupported_rank.add(f'{node.domain if node.domain else "ai.onnx"}:{node.op_type}')
+                ops_with_unsupported_rank.add(f"{node.domain if node.domain else 'ai.onnx'}:{node.op_type}")
 
         if is_node_supported:
             num_supported_nodes += 1
@@ -569,8 +569,7 @@ def check_shapes(graph: onnx.GraphProto, logger: logging.Logger | None = None):
     # a model where all inputs are dynamic (results in no value_info)
     if not graph.value_info and not (len(graph.node) == 1 or len(dynamic_inputs) == len(graph.input)):
         logger.warning(
-            "Unable to check shapes within model. "
-            "ONNX shape inferencing should be run on the model prior to checking."
+            "Unable to check shapes within model. ONNX shape inferencing should be run on the model prior to checking."
         )
 
     for vi in graph.value_info:
diff --git a/tools/python/util/onnx_model_utils.py b/tools/python/util/onnx_model_utils.py
index 5c970430a3..1938a2411e 100644
--- a/tools/python/util/onnx_model_utils.py
+++ b/tools/python/util/onnx_model_utils.py
@@ -227,7 +227,7 @@ def make_input_shape_fixed(graph: onnx.GraphProto, input_name: str, fixed_shape:
 
     raise ValueError(
         f"Input {input_name} was not found in graph inputs. "
-        f'Valid input names are: {",".join([i.name for i in graph.input])}'
+        f"Valid input names are: {','.join([i.name for i in graph.input])}"
     )
 
 
@@ -337,7 +337,7 @@ def get_producer_consumer_maps(graph: onnx.GraphProto):
     # top level graph should have no implicit inputs
     if implicit_inputs:
         raise ValueError(
-            f'This appears to be an invalid model with missing inputs of {",".join(sorted(implicit_inputs))}'
+            f"This appears to be an invalid model with missing inputs of {','.join(sorted(implicit_inputs))}"
         )
 
     return node_to_producers, node_to_consumers
diff --git a/tools/python/util/ort_format_model/__init__.py b/tools/python/util/ort_format_model/__init__.py
index 318851642d..29e8e70ed2 100644
--- a/tools/python/util/ort_format_model/__init__.py
+++ b/tools/python/util/ort_format_model/__init__.py
@@ -18,8 +18,10 @@ else:
 
 sys.path.append(ort_fbs_py_parent_dir)
 
-from .operator_type_usage_processors import GloballyAllowedTypesOpTypeImplFilter  # noqa: E402, F401
-from .operator_type_usage_processors import OperatorTypeUsageManager  # noqa: E402, F401
-from .operator_type_usage_processors import OpTypeImplFilterInterface  # noqa: E402, F401
+from .operator_type_usage_processors import (  # noqa: E402
+    GloballyAllowedTypesOpTypeImplFilter,  # noqa: F401
+    OperatorTypeUsageManager,  # noqa: F401
+    OpTypeImplFilterInterface,  # noqa: F401
+)
 from .ort_model_processor import OrtFormatModelProcessor  # noqa: E402, F401
 from .utils import create_config_from_models  # noqa: E402, F401
diff --git a/tools/python/util/ort_format_model/types.py b/tools/python/util/ort_format_model/types.py
index ffeda6b2e7..9661eb33c9 100644
--- a/tools/python/util/ort_format_model/types.py
+++ b/tools/python/util/ort_format_model/types.py
@@ -6,6 +6,7 @@ import ort_flatbuffers_py.fbs as fbs
 
 class FbsTypeInfo:
     "Class to provide conversion between ORT flatbuffers schema values and C++ types"
+
     tensordatatype_to_string = {  # noqa: RUF012
         fbs.TensorDataType.TensorDataType.FLOAT: "float",
         fbs.TensorDataType.TensorDataType.UINT8: "uint8_t",