mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Use ruff as the formatter to replace black-isort (#23397)
Use ruff as the code formatter in place of black and isort since it is much faster, and as projects like PyTorch and ONNX have adopted ruff format as well. This PR include only auto-fixed changes in formatting.
This commit is contained in:
parent
080c67e900
commit
c7c8757a1c
114 changed files with 476 additions and 464 deletions
|
|
@ -61,7 +61,7 @@ is_formatter = true
|
|||
|
||||
|
||||
[[linter]]
|
||||
code = 'BLACK-ISORT'
|
||||
code = 'RUFF-FORMAT'
|
||||
include_patterns = [
|
||||
'**/*.py',
|
||||
]
|
||||
|
|
@ -76,7 +76,7 @@ command = [
|
|||
'-m',
|
||||
'lintrunner_adapters',
|
||||
'run',
|
||||
'black_isort_linter',
|
||||
'ruff_format_linter',
|
||||
'--',
|
||||
'@{{PATHSFILE}}'
|
||||
]
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
"""
|
||||
Automates the generation of ONNX operators.
|
||||
"""
|
||||
|
||||
import importlib
|
||||
import inspect
|
||||
import keyword
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ to run predictions using this runtime.
|
|||
Let's use the API to compute the prediction
|
||||
of a simple logistic regression model.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from onnx import load
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ It starts by loading the model trained in example
|
|||
trained on *Iris* datasets. The model takes
|
||||
a vector of dimension 2 and returns a class among three.
|
||||
"""
|
||||
|
||||
import numpy
|
||||
|
||||
import onnxruntime as rt
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ Train a pipeline
|
|||
|
||||
The first step consists in creating a dummy datasets.
|
||||
"""
|
||||
|
||||
import pandas
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ Profile the execution of a simple model
|
|||
*ONNX Runtime* can profile the execution of the model.
|
||||
This example shows how to interpret the results.
|
||||
"""
|
||||
|
||||
import numpy
|
||||
import onnx
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ ONNX Runtime is a performance-focused scoring engine for Open Neural Network Exc
|
|||
For more information on ONNX Runtime, please see `aka.ms/onnxruntime <https://aka.ms/onnxruntime/>`_
|
||||
or the `Github project <https://github.com/microsoft/onnxruntime/>`_.
|
||||
"""
|
||||
|
||||
__version__ = "1.21.0"
|
||||
__author__ = "Microsoft"
|
||||
|
||||
|
|
@ -20,33 +21,35 @@ __author__ = "Microsoft"
|
|||
# meaningful messages to the user.
|
||||
# the saved exception is raised after device version validation.
|
||||
try:
|
||||
from onnxruntime.capi._pybind_state import ExecutionMode # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import ExecutionOrder # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import GraphOptimizationLevel # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import LoraAdapter # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import ModelMetadata # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import NodeArg # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import OrtAllocatorType # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import OrtArenaCfg # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import OrtMemoryInfo # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import OrtMemType # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import OrtSparseFormat # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import RunOptions # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import SessionIOBinding # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import SessionOptions # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import create_and_register_allocator # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import create_and_register_allocator_v2 # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import disable_telemetry_events # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import enable_telemetry_events # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import get_all_providers # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import get_available_providers # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import get_build_info # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import get_device # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import get_version_string # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import has_collective_ops # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import set_default_logger_severity # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import set_default_logger_verbosity # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import set_seed # noqa: F401
|
||||
from onnxruntime.capi._pybind_state import (
|
||||
ExecutionMode, # noqa: F401
|
||||
ExecutionOrder, # noqa: F401
|
||||
GraphOptimizationLevel, # noqa: F401
|
||||
LoraAdapter, # noqa: F401
|
||||
ModelMetadata, # noqa: F401
|
||||
NodeArg, # noqa: F401
|
||||
OrtAllocatorType, # noqa: F401
|
||||
OrtArenaCfg, # noqa: F401
|
||||
OrtMemoryInfo, # noqa: F401
|
||||
OrtMemType, # noqa: F401
|
||||
OrtSparseFormat, # noqa: F401
|
||||
RunOptions, # noqa: F401
|
||||
SessionIOBinding, # noqa: F401
|
||||
SessionOptions, # noqa: F401
|
||||
create_and_register_allocator, # noqa: F401
|
||||
create_and_register_allocator_v2, # noqa: F401
|
||||
disable_telemetry_events, # noqa: F401
|
||||
enable_telemetry_events, # noqa: F401
|
||||
get_all_providers, # noqa: F401
|
||||
get_available_providers, # noqa: F401
|
||||
get_build_info, # noqa: F401
|
||||
get_device, # noqa: F401
|
||||
get_version_string, # noqa: F401
|
||||
has_collective_ops, # noqa: F401
|
||||
set_default_logger_severity, # noqa: F401
|
||||
set_default_logger_verbosity, # noqa: F401
|
||||
set_seed, # noqa: F401
|
||||
)
|
||||
|
||||
import_capi_exception = None
|
||||
except Exception as e:
|
||||
|
|
@ -57,12 +60,14 @@ from onnxruntime.capi import onnxruntime_validation
|
|||
if import_capi_exception:
|
||||
raise import_capi_exception
|
||||
|
||||
from onnxruntime.capi.onnxruntime_inference_collection import AdapterFormat # noqa: F401
|
||||
from onnxruntime.capi.onnxruntime_inference_collection import InferenceSession # noqa: F401
|
||||
from onnxruntime.capi.onnxruntime_inference_collection import IOBinding # noqa: F401
|
||||
from onnxruntime.capi.onnxruntime_inference_collection import OrtDevice # noqa: F401
|
||||
from onnxruntime.capi.onnxruntime_inference_collection import OrtValue # noqa: F401
|
||||
from onnxruntime.capi.onnxruntime_inference_collection import SparseTensor # noqa: F401
|
||||
from onnxruntime.capi.onnxruntime_inference_collection import (
|
||||
AdapterFormat, # noqa: F401
|
||||
InferenceSession, # noqa: F401
|
||||
IOBinding, # noqa: F401
|
||||
OrtDevice, # noqa: F401
|
||||
OrtValue, # noqa: F401
|
||||
SparseTensor, # noqa: F401
|
||||
)
|
||||
|
||||
# TODO: thiagofc: Temporary experimental namespace for new PyTorch front-end
|
||||
try: # noqa: SIM105
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
"""
|
||||
Implements ONNX's backend API.
|
||||
"""
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
"""
|
||||
Implements ONNX's backend API.
|
||||
"""
|
||||
|
||||
from typing import Any, Tuple # noqa: F401
|
||||
|
||||
from onnx.backend.base import BackendRep
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
"""
|
||||
Short examples used in the documentation.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -115,8 +115,9 @@ def check_and_normalize_provider_args(
|
|||
def set_provider_options(name, options):
|
||||
if name not in available_provider_names:
|
||||
warnings.warn(
|
||||
"Specified provider '{}' is not in available provider names."
|
||||
"Available providers: '{}'".format(name, ", ".join(available_provider_names))
|
||||
"Specified provider '{}' is not in available provider names.Available providers: '{}'".format(
|
||||
name, ", ".join(available_provider_names)
|
||||
)
|
||||
)
|
||||
|
||||
if name in provider_name_to_options:
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
"""
|
||||
Check OS requirements for ONNX Runtime Python Bindings.
|
||||
"""
|
||||
|
||||
import linecache
|
||||
import platform
|
||||
import warnings
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ def _shape_to_string(shape):
|
|||
value = next(iter(dict_obj.values()))
|
||||
if len(res) != 0:
|
||||
res += ","
|
||||
res += f'{key}({"x".join(str(v) for v in value)})'
|
||||
res += f"{key}({'x'.join(str(v) for v in value)})"
|
||||
return res
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
Support for registering ONNX Runtime's built-in contrib ops with
|
||||
PyTorch-ONNX exporter (torch.onnx.export).
|
||||
"""
|
||||
|
||||
import typing
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -126,9 +126,9 @@ def parse_qnn_json_file(qnn_json_file_path, qnn_input_output_tensor_dic):
|
|||
qnn_tensor.dim = qnn_tensor_attribute["dims"]
|
||||
qnn_input_output_tensor_dic[qnn_tensor_name] = qnn_tensor
|
||||
|
||||
assert (
|
||||
len(qnn_input_output_tensor_dic) > 1
|
||||
), "Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
assert len(qnn_input_output_tensor_dic) > 1, (
|
||||
"Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
)
|
||||
|
||||
|
||||
def compare_onnx_shape_with_qnn_shape(onnx_dims, qnn_dims):
|
||||
|
|
|
|||
|
|
@ -150,9 +150,9 @@ def parse_qnn_converter_json_file(qnn_convert_json, qnn_input_tensor_dic, qnn_ou
|
|||
qnn_tensor.offset = 0 - qnn_tensor_attribute["quant_params"]["scale_offset"]["offset"]
|
||||
qnn_output_tensor_dic[qnn_tensor_name] = qnn_tensor
|
||||
|
||||
assert (
|
||||
len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1
|
||||
), "Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
assert len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1, (
|
||||
"Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
)
|
||||
|
||||
|
||||
def generate_wrapper_onnx_file(
|
||||
|
|
@ -286,9 +286,9 @@ def parse_qnn_graph(qnn_graph, qnn_input_tensor_dic, qnn_output_tensor_dic):
|
|||
qnn_tensor.offset = 0 - tensor_info["quantizeParams"]["scaleOffset"]["offset"]
|
||||
qnn_output_tensor_dic[qnn_tensor.name] = qnn_tensor
|
||||
|
||||
assert (
|
||||
len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1
|
||||
), "Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
assert len(qnn_input_tensor_dic) >= 1 and len(qnn_output_tensor_dic) >= 1, (
|
||||
"Converted QNN model not valid. It should have at least 1 input & 1 output."
|
||||
)
|
||||
|
||||
return graph_name
|
||||
|
||||
|
|
|
|||
|
|
@ -7,11 +7,13 @@ from .calibrate import ( # noqa: F401
|
|||
)
|
||||
from .qdq_quantizer import QDQQuantizer # noqa: F401
|
||||
from .quant_utils import QuantFormat, QuantType, write_calibration_table # noqa: F401
|
||||
from .quantize import DynamicQuantConfig # noqa: F401
|
||||
from .quantize import QuantizationMode # noqa: F401
|
||||
from .quantize import StaticQuantConfig # noqa: F401
|
||||
from .quantize import get_qdq_config # noqa: F401
|
||||
from .quantize import quantize # noqa: F401
|
||||
from .quantize import quantize_dynamic # noqa: F401
|
||||
from .quantize import quantize_static # noqa: F401
|
||||
from .quantize import (
|
||||
DynamicQuantConfig, # noqa: F401
|
||||
QuantizationMode, # noqa: F401
|
||||
StaticQuantConfig, # noqa: F401
|
||||
get_qdq_config, # noqa: F401
|
||||
quantize, # noqa: F401
|
||||
quantize_dynamic, # noqa: F401
|
||||
quantize_static, # noqa: F401
|
||||
)
|
||||
from .shape_inference import quant_pre_process # noqa: F401
|
||||
|
|
|
|||
|
|
@ -331,9 +331,9 @@ class BaseQuantizer:
|
|||
scale = np.array(quant_overrides["scale"])
|
||||
q_weight_data = quantize_nparray(qType, weight_data.flatten(), scale, zero_point)
|
||||
assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}"
|
||||
assert (
|
||||
zero_point.dtype != np.float32 and zero_point.dtype != np.float16
|
||||
), f"Unexpected dtype {zero_point.dtype}"
|
||||
assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, (
|
||||
f"Unexpected dtype {zero_point.dtype}"
|
||||
)
|
||||
assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}"
|
||||
|
||||
else:
|
||||
|
|
@ -349,9 +349,9 @@ class BaseQuantizer:
|
|||
)
|
||||
|
||||
assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}"
|
||||
assert (
|
||||
zero_point.dtype != np.float32 and zero_point.dtype != np.float16
|
||||
), f"Unexpected dtype {zero_point.dtype}"
|
||||
assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, (
|
||||
f"Unexpected dtype {zero_point.dtype}"
|
||||
)
|
||||
assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}"
|
||||
|
||||
scale_dtype = weight.data_type
|
||||
|
|
@ -465,13 +465,13 @@ class BaseQuantizer:
|
|||
weight_qType, per_channel_data.flatten(), scale, zero_point
|
||||
)
|
||||
assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}"
|
||||
assert (
|
||||
zero_point.dtype != np.float32 and zero_point.dtype != np.float16
|
||||
), f"Unexpected dtype {zero_point.dtype}"
|
||||
assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, (
|
||||
f"Unexpected dtype {zero_point.dtype}"
|
||||
)
|
||||
assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}"
|
||||
assert isinstance(
|
||||
quantized_per_channel_data, np.ndarray
|
||||
), f"Unexpected type {type(quantized_per_channel_data)}"
|
||||
assert isinstance(quantized_per_channel_data, np.ndarray), (
|
||||
f"Unexpected type {type(quantized_per_channel_data)}"
|
||||
)
|
||||
|
||||
else:
|
||||
zero_point, scale, quantized_per_channel_data = quantize_data(
|
||||
|
|
@ -485,13 +485,13 @@ class BaseQuantizer:
|
|||
)
|
||||
|
||||
assert isinstance(zero_point, np.ndarray), f"Unexpected type {type(zero_point)}"
|
||||
assert (
|
||||
zero_point.dtype != np.float32 and zero_point.dtype != np.float16
|
||||
), f"Unexpected dtype {zero_point.dtype}"
|
||||
assert zero_point.dtype != np.float32 and zero_point.dtype != np.float16, (
|
||||
f"Unexpected dtype {zero_point.dtype}"
|
||||
)
|
||||
assert isinstance(scale, np.ndarray), f"Unexpected type {type(scale)}"
|
||||
assert isinstance(
|
||||
quantized_per_channel_data, np.ndarray
|
||||
), f"Unexpected type {type(quantized_per_channel_data)}"
|
||||
assert isinstance(quantized_per_channel_data, np.ndarray), (
|
||||
f"Unexpected type {type(quantized_per_channel_data)}"
|
||||
)
|
||||
|
||||
zero_point_list.append(zero_point)
|
||||
scale_list.append(scale)
|
||||
|
|
|
|||
|
|
@ -820,9 +820,9 @@ class HistogramCollector(CalibrationDataCollector):
|
|||
for arr in data_arr:
|
||||
assert isinstance(arr, np.ndarray), f"Unexpected type {type(arr)} for tensor={tensor!r}"
|
||||
dtypes = set(a.dtype for a in data_arr)
|
||||
assert (
|
||||
len(dtypes) == 1
|
||||
), f"The calibration expects only one element type but got {dtypes} for tensor={tensor!r}"
|
||||
assert len(dtypes) == 1, (
|
||||
f"The calibration expects only one element type but got {dtypes} for tensor={tensor!r}"
|
||||
)
|
||||
data_arr_np = np.asarray(data_arr)
|
||||
elif not isinstance(data_arr, np.ndarray):
|
||||
raise ValueError(f"Unexpected type {type(data_arr)} for tensor={tensor!r}")
|
||||
|
|
@ -842,9 +842,9 @@ class HistogramCollector(CalibrationDataCollector):
|
|||
# first time it uses num_bins to compute histogram.
|
||||
hist, hist_edges = np.histogram(data_arr_np, bins=self.num_bins)
|
||||
hist_edges = hist_edges.astype(data_arr_np.dtype)
|
||||
assert (
|
||||
data_arr_np.dtype != np.float64
|
||||
), "only float32 or float16 is supported, every constant must be explicitly typed"
|
||||
assert data_arr_np.dtype != np.float64, (
|
||||
"only float32 or float16 is supported, every constant must be explicitly typed"
|
||||
)
|
||||
self.histogram_dict[tensor] = (hist, hist_edges, min_value, max_value)
|
||||
else:
|
||||
old_histogram = self.histogram_dict[tensor]
|
||||
|
|
@ -864,9 +864,9 @@ class HistogramCollector(CalibrationDataCollector):
|
|||
hist, hist_edges = np.histogram(data_arr_np, bins=old_hist_edges)
|
||||
hist_edges = hist_edges.astype(data_arr_np.dtype)
|
||||
hist[: len(old_hist)] += old_hist
|
||||
assert (
|
||||
data_arr_np.dtype != np.float64
|
||||
), "only float32 or float16 is supported, every constant must be explicitly typed"
|
||||
assert data_arr_np.dtype != np.float64, (
|
||||
"only float32 or float16 is supported, every constant must be explicitly typed"
|
||||
)
|
||||
self.histogram_dict[tensor] = (hist, hist_edges, min(old_min, min_value), max(old_max, max_value))
|
||||
|
||||
def collect_value(self, name_to_arr):
|
||||
|
|
|
|||
|
|
@ -1259,7 +1259,6 @@ class MatMul4BitsQuantizer:
|
|||
self._process_subgraph(graph_stack)
|
||||
self.model.clean_initializers()
|
||||
elif self.algo_config.algorithm == "nvidia_awq":
|
||||
|
||||
# Handle nvidia_awq quantization
|
||||
logger.info("Processing nvidia_awq quantization...")
|
||||
self.model = self.node_quantizer.quantize_awq(
|
||||
|
|
@ -1280,9 +1279,9 @@ class MatMul4BitsQuantizer:
|
|||
|
||||
import neural_compressor
|
||||
|
||||
assert version.parse(neural_compressor.__version__) >= version.parse(
|
||||
"2.3.2"
|
||||
), "Require neural-compressor >= 2.3.2 to support weight only quantization!"
|
||||
assert version.parse(neural_compressor.__version__) >= version.parse("2.3.2"), (
|
||||
"Require neural-compressor >= 2.3.2 to support weight only quantization!"
|
||||
)
|
||||
|
||||
self.int4_quant_algo()
|
||||
|
||||
|
|
@ -1446,7 +1445,6 @@ if __name__ == "__main__":
|
|||
elif args.quant_method == "gptq":
|
||||
quant_config = GPTQWeightOnlyQuantConfig(block_size=args.block_size, op_types_to_quantize=op_types_to_quantize)
|
||||
elif args.quant_method == "nvidia_awq":
|
||||
|
||||
if quant_format == QuantFormat.QOperator:
|
||||
logger.warning("QOperator is not applicable to nvidia_awq. overriding the value to QDQ")
|
||||
quant_format = QuantFormat.QDQ
|
||||
|
|
|
|||
|
|
@ -158,7 +158,9 @@ class QLinearConv(QuantOperatorBase):
|
|||
nodes,
|
||||
) = self.quantizer.quantize_activation(node, [0])
|
||||
quant_weight_tuple = self.quantizer.quantize_weight_per_channel(
|
||||
node.input[1], onnx_proto.TensorProto.INT8, 0 # self.quantizer.weight_qType?
|
||||
node.input[1],
|
||||
onnx_proto.TensorProto.INT8,
|
||||
0, # self.quantizer.weight_qType?
|
||||
)
|
||||
quantized_input_names.append(quant_weight_tuple[0])
|
||||
zero_point_names.append(quant_weight_tuple[1])
|
||||
|
|
|
|||
|
|
@ -3,9 +3,15 @@ import logging
|
|||
import numpy as np # noqa: F401
|
||||
import onnx
|
||||
|
||||
from ..quant_utils import find_by_name # noqa: F401
|
||||
from ..quant_utils import get_mul_node # noqa: F401
|
||||
from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain
|
||||
from ..quant_utils import (
|
||||
TENSOR_NAME_QUANT_SUFFIX,
|
||||
QuantizedValue,
|
||||
QuantizedValueType,
|
||||
attribute_to_kwarg,
|
||||
find_by_name, # noqa: F401
|
||||
get_mul_node, # noqa: F401
|
||||
ms_domain,
|
||||
)
|
||||
from .base_operator import QuantOperatorBase # noqa: F401
|
||||
from .matmul import QOpMatMul
|
||||
from .qdq_base_operator import QDQOperatorBase
|
||||
|
|
|
|||
|
|
@ -47,10 +47,14 @@ class LSTMQuant(QuantOperatorBase):
|
|||
R.dims[0] = R_num_dir * R_4_hidden_size
|
||||
|
||||
quant_input_weight_tuple = self.quantizer.quantize_weight_per_channel(
|
||||
node.input[1], onnx_proto.TensorProto.INT8, 0 # self.quantizer.weight_qType?
|
||||
node.input[1],
|
||||
onnx_proto.TensorProto.INT8,
|
||||
0, # self.quantizer.weight_qType?
|
||||
)
|
||||
quant_recurrent_weight_tuple = self.quantizer.quantize_weight_per_channel(
|
||||
node.input[2], onnx_proto.TensorProto.INT8, 0 # self.quantizer.weight_qType?
|
||||
node.input[2],
|
||||
onnx_proto.TensorProto.INT8,
|
||||
0, # self.quantizer.weight_qType?
|
||||
)
|
||||
|
||||
W_quant_weight = model.get_initializer(quant_input_weight_tuple[0]) # noqa: N806
|
||||
|
|
|
|||
|
|
@ -1253,9 +1253,9 @@ class QDQQuantizer(BaseQuantizer):
|
|||
scale = quant_params["scale"]
|
||||
zero_point_type = quant_params["quant_type"]
|
||||
axis: int | None = quant_params.get("axis")
|
||||
assert (axis is not None and len(scale.shape) == 1) or (
|
||||
axis is None and len(scale.shape) == 0
|
||||
), "Wrong scale/zp shapes"
|
||||
assert (axis is not None and len(scale.shape) == 1) or (axis is None and len(scale.shape) == 0), (
|
||||
"Wrong scale/zp shapes"
|
||||
)
|
||||
assert len(scale.shape) == len(zero_point.shape), "Scale and zero-point must have the same rank"
|
||||
|
||||
zero_point_name = param_name + "_zero_point" + init_name_suffix
|
||||
|
|
|
|||
|
|
@ -197,9 +197,9 @@ def _check_type(*args, zero_point_index=-1):
|
|||
|
||||
|
||||
def quantize_nparray(qType, arr, scale, zero_point, low=None, high=None):
|
||||
assert (
|
||||
qType in ONNX_TYPE_TO_NP_TYPE
|
||||
), f"Unexpected data type {qType} requested. Only INT8, UINT8, INT16, and UINT16 are supported."
|
||||
assert qType in ONNX_TYPE_TO_NP_TYPE, (
|
||||
f"Unexpected data type {qType} requested. Only INT8, UINT8, INT16, and UINT16 are supported."
|
||||
)
|
||||
if qType in (
|
||||
onnx_proto.TensorProto.FLOAT8E4M3FN,
|
||||
onnx_proto.TensorProto.FLOAT8E4M3FNUZ,
|
||||
|
|
@ -918,10 +918,7 @@ def smooth_distribution(p, eps=0.0001):
|
|||
|
||||
def model_has_external_data(model_path: Path):
|
||||
model = onnx.load(model_path.as_posix(), load_external_data=False)
|
||||
for intializer in model.graph.initializer:
|
||||
if external_data_helper.uses_external_data(intializer):
|
||||
return True
|
||||
return False
|
||||
return any(external_data_helper.uses_external_data(intializer) for intializer in model.graph.initializer)
|
||||
|
||||
|
||||
def optimize_model(model_path: Path, opt_model_path: Path):
|
||||
|
|
|
|||
|
|
@ -1814,12 +1814,12 @@ class SymbolicShapeInference:
|
|||
|
||||
def replace_min_with_arg(arg_idx):
|
||||
replaced = list(expr.args)
|
||||
assert isinstance(
|
||||
replaced[min_pos], sympy.Min
|
||||
), f"Expected a sympy.Min() at position {min_pos}, got {replaced[min_pos]}"
|
||||
assert (
|
||||
len(replaced[min_pos].args) == 2
|
||||
), f"Expected a sympy.Min() with exactly 2 arguments, got {replaced[min_pos]}"
|
||||
assert isinstance(replaced[min_pos], sympy.Min), (
|
||||
f"Expected a sympy.Min() at position {min_pos}, got {replaced[min_pos]}"
|
||||
)
|
||||
assert len(replaced[min_pos].args) == 2, (
|
||||
f"Expected a sympy.Min() with exactly 2 arguments, got {replaced[min_pos]}"
|
||||
)
|
||||
replaced[min_pos] = replaced[min_pos].args[arg_idx]
|
||||
return sympy.Add(*replaced)
|
||||
|
||||
|
|
|
|||
|
|
@ -13,33 +13,33 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" Benchmarking the inference of pretrained transformer models.
|
||||
PyTorch/TorchScript benchmark is based on https://github.com/huggingface/transformers/blob/master/examples/benchmarks.py.
|
||||
One difference is that random input_ids is generated in this benchmark.
|
||||
"""Benchmarking the inference of pretrained transformer models.
|
||||
PyTorch/TorchScript benchmark is based on https://github.com/huggingface/transformers/blob/master/examples/benchmarks.py.
|
||||
One difference is that random input_ids is generated in this benchmark.
|
||||
|
||||
For onnxruntime, this script will convert a pretrained model to ONNX, and optimize it when -o parameter is used.
|
||||
For onnxruntime, this script will convert a pretrained model to ONNX, and optimize it when -o parameter is used.
|
||||
|
||||
Example commands:
|
||||
Export all models to ONNX, optimize and validate them:
|
||||
python benchmark.py -b 0 -o -v -i 1 2 3
|
||||
Run OnnxRuntime on GPU for all models:
|
||||
python benchmark.py -g
|
||||
Run OnnxRuntime on GPU for all models with fp32 optimization:
|
||||
python benchmark.py -g -o
|
||||
Run OnnxRuntime on GPU with fp16 optimization:
|
||||
python benchmark.py -g -o -p "fp16"
|
||||
Run TorchScript on GPU for all models:
|
||||
python benchmark.py -e torchscript -g
|
||||
Run TorchScript on GPU for all models with fp16:
|
||||
python benchmark.py -e torchscript -g -p "fp16"
|
||||
Run ONNXRuntime and TorchScript on CPU for all models with quantization:
|
||||
python benchmark.py -e torchscript onnxruntime -p "int8" -o
|
||||
Run OnnxRuntime with the ROCM provider and graph optimization script:
|
||||
python benchmark.py -g -m bert-base-cased --provider rocm --optimizer_info by_script --disable_embed_layer_norm
|
||||
Run OnnxRuntime with bfloat16 fastmath mode kernels on aarch64 platforms with bfloat16 support:
|
||||
python benchmark.py --enable_arm64_bfloat16_fastmath_mlas_gemm
|
||||
Example commands:
|
||||
Export all models to ONNX, optimize and validate them:
|
||||
python benchmark.py -b 0 -o -v -i 1 2 3
|
||||
Run OnnxRuntime on GPU for all models:
|
||||
python benchmark.py -g
|
||||
Run OnnxRuntime on GPU for all models with fp32 optimization:
|
||||
python benchmark.py -g -o
|
||||
Run OnnxRuntime on GPU with fp16 optimization:
|
||||
python benchmark.py -g -o -p "fp16"
|
||||
Run TorchScript on GPU for all models:
|
||||
python benchmark.py -e torchscript -g
|
||||
Run TorchScript on GPU for all models with fp16:
|
||||
python benchmark.py -e torchscript -g -p "fp16"
|
||||
Run ONNXRuntime and TorchScript on CPU for all models with quantization:
|
||||
python benchmark.py -e torchscript onnxruntime -p "int8" -o
|
||||
Run OnnxRuntime with the ROCM provider and graph optimization script:
|
||||
python benchmark.py -g -m bert-base-cased --provider rocm --optimizer_info by_script --disable_embed_layer_norm
|
||||
Run OnnxRuntime with bfloat16 fastmath mode kernels on aarch64 platforms with bfloat16 support:
|
||||
python benchmark.py --enable_arm64_bfloat16_fastmath_mlas_gemm
|
||||
|
||||
It is recommended to use run_benchmark.sh to launch benchmark.
|
||||
It is recommended to use run_benchmark.sh to launch benchmark.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
|
@ -439,9 +439,9 @@ def run_with_tf_optimizations(do_eager_mode: bool, use_xla: bool):
|
|||
return func(*args, **kwargs)
|
||||
|
||||
if do_eager_mode is True:
|
||||
assert (
|
||||
use_xla is False
|
||||
), "Cannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`."
|
||||
assert use_xla is False, (
|
||||
"Cannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`."
|
||||
)
|
||||
return run_in_eager_mode
|
||||
else:
|
||||
return run_in_graph_mode
|
||||
|
|
|
|||
|
|
@ -167,9 +167,9 @@ def prepare_environment(cache_dir, output_dir, use_gpu, provider=None):
|
|||
|
||||
if use_gpu:
|
||||
if provider == "dml":
|
||||
assert (
|
||||
"DmlExecutionProvider" in onnxruntime.get_available_providers()
|
||||
), "Please install onnxruntime-directml package to test GPU inference."
|
||||
assert "DmlExecutionProvider" in onnxruntime.get_available_providers(), (
|
||||
"Please install onnxruntime-directml package to test GPU inference."
|
||||
)
|
||||
|
||||
else:
|
||||
assert not set(onnxruntime.get_available_providers()).isdisjoint(
|
||||
|
|
|
|||
|
|
@ -201,9 +201,9 @@ def convert_float_to_float16(
|
|||
Returns:
|
||||
ModelProto: converted model.
|
||||
"""
|
||||
assert (
|
||||
min_positive_val >= 5.96e-08
|
||||
), "invalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05"
|
||||
assert min_positive_val >= 5.96e-08, (
|
||||
"invalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05"
|
||||
)
|
||||
assert max_finite_val <= float(np.finfo(np.float16).max), "invalid max_finite_val. largest float16 value: 65504"
|
||||
|
||||
force_fp16_inputs_dict = {} if force_fp16_inputs is None else force_fp16_inputs
|
||||
|
|
|
|||
|
|
@ -373,7 +373,9 @@ class FusionAttentionUnet(Fusion):
|
|||
else "MultiHeadAttention ({})".format(
|
||||
"self attention with packed qkv"
|
||||
if self.enable_packed_qkv
|
||||
else "cross attention with packed kv" if self.enable_packed_kv else "cross attention"
|
||||
else "cross attention with packed kv"
|
||||
if self.enable_packed_kv
|
||||
else "cross attention"
|
||||
)
|
||||
)
|
||||
self.increase_counter(counter_name)
|
||||
|
|
@ -841,7 +843,9 @@ class FusionAttentionUnet(Fusion):
|
|||
else "MultiHeadAttention ({})".format(
|
||||
"self attention with packed qkv"
|
||||
if self.enable_packed_qkv
|
||||
else "cross attention with packed kv" if self.enable_packed_kv else "cross attention"
|
||||
else "cross attention with packed kv"
|
||||
if self.enable_packed_kv
|
||||
else "cross attention"
|
||||
)
|
||||
)
|
||||
self.increase_counter(counter_name)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
"""
|
||||
Export LLM to onnx
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import inspect
|
||||
import math
|
||||
|
|
@ -173,8 +174,8 @@ def move_to_appropriate_device(model: nn.Module, sample_inputs_tp: tuple) -> nn.
|
|||
"""
|
||||
total_mem_per_cpu = torch.cuda.get_device_properties(0).total_memory / 1024 / 1024
|
||||
|
||||
print(f"Model_Size = {get_model_parameter_size(model)/1024} GB")
|
||||
print(f"total_mem_per_cpu = {total_mem_per_cpu/1024} GB")
|
||||
print(f"Model_Size = {get_model_parameter_size(model) / 1024} GB")
|
||||
print(f"total_mem_per_cpu = {total_mem_per_cpu / 1024} GB")
|
||||
if get_model_parameter_size(model) > total_mem_per_cpu * 0.45:
|
||||
device_collection = [torch.device(i) for i in range(torch.cuda.device_count())]
|
||||
if len(device_collection) > 1:
|
||||
|
|
@ -228,9 +229,9 @@ def fetch_onnx_inputs_outputs_name(
|
|||
onnx_inp_names = tuple(
|
||||
[torch_input_names[i] for i in range(len(torch_input_names)) if isinstance(onnx_inputs[i], torch.Tensor)]
|
||||
)
|
||||
assert (
|
||||
"input_ids" in onnx_inp_names and "attention_mask" in onnx_inp_names
|
||||
), "input_ids and attention_mask must be existed in inputs"
|
||||
assert "input_ids" in onnx_inp_names and "attention_mask" in onnx_inp_names, (
|
||||
"input_ids and attention_mask must be existed in inputs"
|
||||
)
|
||||
onnx_out_names = ("logits",)
|
||||
onnx_dynamic_axes = {
|
||||
"input_ids": {0: "batch_size", 1: "seq_len"},
|
||||
|
|
|
|||
|
|
@ -889,11 +889,11 @@ class Gpt2Helper:
|
|||
result["nan_rate"] = (total_test_cases - len(max_abs_diff_list)) * 1.0 / total_test_cases
|
||||
|
||||
logger.info(
|
||||
f"Parity Test Cases={total_test_cases}; Passed={passed_test_cases}; Nan={total_test_cases-len(max_abs_diff_list)}; Top1_Matched={top1_matched_cases}"
|
||||
f"Parity Test Cases={total_test_cases}; Passed={passed_test_cases}; Nan={total_test_cases - len(max_abs_diff_list)}; Top1_Matched={top1_matched_cases}"
|
||||
)
|
||||
|
||||
if passed_test_cases > 0.95 * total_test_cases:
|
||||
logger.info(f"Parity is good: passed rate={int(passed_test_cases*100/total_test_cases):.0f}%")
|
||||
logger.info(f"Parity is good: passed rate={int(passed_test_cases * 100 / total_test_cases):.0f}%")
|
||||
|
||||
return result
|
||||
|
||||
|
|
|
|||
|
|
@ -642,9 +642,9 @@ def get_args(rank=0):
|
|||
|
||||
# Check that only one (batch_size, sequence_length) combination is set for profiling
|
||||
if args.profile:
|
||||
assert (
|
||||
len(args.batch_sizes) == 1 and len(args.sequence_lengths) == 1
|
||||
), "Please provide only one (batch_size, sequence_length) combination for profiling"
|
||||
assert len(args.batch_sizes) == 1 and len(args.sequence_lengths) == 1, (
|
||||
"Please provide only one (batch_size, sequence_length) combination for profiling"
|
||||
)
|
||||
|
||||
return args
|
||||
|
||||
|
|
|
|||
|
|
@ -259,14 +259,16 @@ def get_args():
|
|||
help="Use when GroupQueryAttention (GQA) is in ONNX model",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--anomaly-filtering",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Use this flag to filter anomaly accelerator times for tokens generated. \
|
||||
(
|
||||
parser.add_argument(
|
||||
"--anomaly-filtering",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Use this flag to filter anomaly accelerator times for tokens generated. \
|
||||
This may give more accurate latency and throughput metrics for tokens generated. \
|
||||
Wall-clock metrics are still reported with anomaly times though.",
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
|
|
|
|||
|
|
@ -455,9 +455,8 @@ def smooth_quant(
|
|||
decoder_model_int8_path: str,
|
||||
decoder_with_past_model_int8_path: str,
|
||||
):
|
||||
from neural_compressor import PostTrainingQuantConfig
|
||||
from neural_compressor import PostTrainingQuantConfig, set_workspace
|
||||
from neural_compressor import quantization as intel_quantization
|
||||
from neural_compressor import set_workspace
|
||||
from onnx.external_data_helper import load_external_data_for_model
|
||||
from quant_kv_dataloader import QuantKVDataLoader
|
||||
|
||||
|
|
|
|||
|
|
@ -148,9 +148,9 @@ def test_ort_latency(
|
|||
for batch_size in batch_sizes:
|
||||
for sequence_length in sequence_lengths:
|
||||
for global_length in global_lengths:
|
||||
assert (
|
||||
global_length <= model.config.attention_window[0]
|
||||
), "Limitation of current implementation: number of global token <= attention_window"
|
||||
assert global_length <= model.config.attention_window[0], (
|
||||
"Limitation of current implementation: number of global token <= attention_window"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Testing batch_size={batch_size} sequence_length={sequence_length} global_length={global_length} "
|
||||
|
|
|
|||
|
|
@ -212,7 +212,6 @@ def test_decoder_onnx(
|
|||
onnx_model_path: str,
|
||||
multimask_output=False,
|
||||
):
|
||||
|
||||
batch_size = 1
|
||||
image = random_sam2_input_image(batch_size)
|
||||
sam2_encoder = SAM2ImageEncoder(sam2_model).cpu()
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ def show_masks(
|
|||
show_box(box_coords, plt.gca())
|
||||
|
||||
if len(scores) > 1:
|
||||
plt.title(f"Mask {i+1}, Score: {score:.3f}", fontsize=18)
|
||||
plt.title(f"Mask {i + 1}, Score: {score:.3f}", fontsize=18)
|
||||
|
||||
plt.axis("off")
|
||||
if output_image_file_prefix:
|
||||
|
|
|
|||
|
|
@ -136,9 +136,9 @@ class SAM2ImageOnnxPredictor(SAM2ImagePredictor):
|
|||
input_image = self._transforms(image)
|
||||
input_image = input_image[None, ...].to(self.device)
|
||||
|
||||
assert (
|
||||
len(input_image.shape) == 4 and input_image.shape[1] == 3
|
||||
), f"input_image must be of size 1x3xHxW, got {input_image.shape}"
|
||||
assert len(input_image.shape) == 4 and input_image.shape[1] == 3, (
|
||||
f"input_image must be of size 1x3xHxW, got {input_image.shape}"
|
||||
)
|
||||
|
||||
# Computing image embeddings for the provided image
|
||||
io_shapes = encoder_shape_dict(batch_size=1, height=input_image.shape[2], width=input_image.shape[3])
|
||||
|
|
|
|||
|
|
@ -1368,9 +1368,9 @@ def main():
|
|||
use_io_binding=args.use_io_binding,
|
||||
)
|
||||
elif args.engine == "onnxruntime":
|
||||
assert args.pipeline and os.path.isdir(
|
||||
args.pipeline
|
||||
), "--pipeline should be specified for the directory of ONNX models"
|
||||
assert args.pipeline and os.path.isdir(args.pipeline), (
|
||||
"--pipeline should be specified for the directory of ONNX models"
|
||||
)
|
||||
print(f"Testing diffusers StableDiffusionPipeline with {provider} provider and tuning={args.tuning}")
|
||||
result = run_ort(
|
||||
model_name=sd_model,
|
||||
|
|
|
|||
|
|
@ -156,8 +156,7 @@ class DDIMScheduler:
|
|||
model_output = (alpha_prod_t**0.5) * model_output + (beta_prod_t**0.5) * sample
|
||||
else:
|
||||
raise ValueError(
|
||||
f"prediction_type given as {self.prediction_type} must be one of `epsilon`, `sample`, or"
|
||||
" `v_prediction`"
|
||||
f"prediction_type given as {self.prediction_type} must be one of `epsilon`, `sample`, or `v_prediction`"
|
||||
)
|
||||
|
||||
# 4. Clip "predicted x_0"
|
||||
|
|
|
|||
|
|
@ -568,7 +568,7 @@ class StableDiffusionPipeline:
|
|||
prefix = "".join(x for x in prompt[i] if x.isalnum() or x in ", -").replace(" ", "_")[:20]
|
||||
parts = [prefix, session_id, str(i + 1), str(seed), self.current_scheduler, str(self.actual_steps)]
|
||||
image_path = os.path.join(self.output_dir, "-".join(parts) + ".png")
|
||||
print(f"Saving image {i+1} / {len(images)} to: {image_path}")
|
||||
print(f"Saving image {i + 1} / {len(images)} to: {image_path}")
|
||||
|
||||
from PIL import PngImagePlugin
|
||||
|
||||
|
|
|
|||
|
|
@ -1284,7 +1284,7 @@ class OnnxModel:
|
|||
op_count[op] = 1 if op not in op_count else (op_count[op] + 1)
|
||||
|
||||
# Sorted by count in the descending order, then by key in alphabetical order.
|
||||
logger.info(f"Operators:{sorted(op_count.items(), key=lambda kv:(-kv[1], kv[0]))}")
|
||||
logger.info(f"Operators:{sorted(op_count.items(), key=lambda kv: (-kv[1], kv[0]))}")
|
||||
|
||||
return op_count
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ class QuantizeHelper:
|
|||
from onnxruntime.quantization import quantize_dynamic
|
||||
|
||||
Path(quantized_model_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"Size of full precision ONNX model(MB):{os.path.getsize(onnx_model_path)/(1024*1024)}")
|
||||
logger.info(f"Size of full precision ONNX model(MB):{os.path.getsize(onnx_model_path) / (1024 * 1024)}")
|
||||
quantize_dynamic(
|
||||
onnx_model_path,
|
||||
quantized_model_path,
|
||||
|
|
@ -73,4 +73,4 @@ class QuantizeHelper:
|
|||
)
|
||||
logger.info(f"quantized model saved to:{quantized_model_path}")
|
||||
# TODO: inlcude external data in total model size.
|
||||
logger.info(f"Size of quantized ONNX model(MB):{os.path.getsize(quantized_model_path)/(1024*1024)}")
|
||||
logger.info(f"Size of quantized ONNX model(MB):{os.path.getsize(quantized_model_path) / (1024 * 1024)}")
|
||||
|
|
|
|||
|
|
@ -49,13 +49,13 @@ if args.dim is None or args.dim == 2:
|
|||
print(f' OpTester test("AffineGrid", {opset_version});')
|
||||
print(f' test.AddAttribute("align_corners", (int64_t){1 if align_corners else 0});')
|
||||
print(
|
||||
f" test.AddInput<float>(\"theta\", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{', '.join([f'{x:.6f}f' for x in theta.flatten()])}}});"
|
||||
f' test.AddInput<float>("theta", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{", ".join([f"{x:.6f}f" for x in theta.flatten()])}}});'
|
||||
)
|
||||
print(
|
||||
f' test.AddInput<int64_t>("size", {{{len(size)}}}, {{{size[0]}, {size[1]}, {size[2]}, {size[3]}}});'
|
||||
)
|
||||
print(
|
||||
f" test.AddOutput<float>(\"grid\", {{{size[0]}, {size[2]}, {size[3]}, 2}}, {{{', '.join([f'{x:.4f}f' for x in grid.flatten()])}}});"
|
||||
f' test.AddOutput<float>("grid", {{{size[0]}, {size[2]}, {size[3]}, 2}}, {{{", ".join([f"{x:.4f}f" for x in grid.flatten()])}}});'
|
||||
)
|
||||
print(" test.Run();")
|
||||
print("}\n")
|
||||
|
|
@ -104,13 +104,13 @@ if args.dim is None or args.dim == 3:
|
|||
print(f' OpTester test("AffineGrid", {opset_version});')
|
||||
print(f' test.AddAttribute("align_corners", (int64_t){1 if align_corners else 0});')
|
||||
print(
|
||||
f" test.AddInput<float>(\"theta\", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{', '.join([f'{x:.6f}f' for x in theta.flatten()])}}});"
|
||||
f' test.AddInput<float>("theta", {{{theta.shape[0]}, {theta.shape[1]}, {theta.shape[2]}}}, {{{", ".join([f"{x:.6f}f" for x in theta.flatten()])}}});'
|
||||
)
|
||||
print(
|
||||
f' test.AddInput<int64_t>("size", {{{len(size)}}}, {{{size[0]}, {size[1]}, {size[2]}, {size[3]}, {size[4]}}});'
|
||||
)
|
||||
print(
|
||||
f" test.AddOutput<float>(\"grid\", {{{size[0]}, {size[2]}, {size[3]}, {size[4]}, 3}}, {{{', '.join([f'{x:.4f}f' for x in grid.flatten()])}}});"
|
||||
f' test.AddOutput<float>("grid", {{{size[0]}, {size[2]}, {size[3]}, {size[4]}, 3}}, {{{", ".join([f"{x:.4f}f" for x in grid.flatten()])}}});'
|
||||
)
|
||||
print(" test.Run();")
|
||||
print("}\n")
|
||||
|
|
|
|||
|
|
@ -80,11 +80,11 @@ for opset_version in [16, 20]:
|
|||
print(f'{spaces}std::string padding_mode = "{padding_mode}";')
|
||||
print(f"{spaces}int64_t align_corners = {onnx_align_corners};")
|
||||
print(f"{spaces}std::initializer_list<int64_t> X_shape {{ {', '.join(map(str, input_shape))} }};")
|
||||
print(f"{spaces}std::initializer_list<TypeParam> X_data { X_data_str };")
|
||||
print(f"{spaces}std::initializer_list<TypeParam> X_data {X_data_str};")
|
||||
print(f"{spaces}std::initializer_list<int64_t> Grid_shape {{ {', '.join(map(str, grid_shape))} }};")
|
||||
print(f"{spaces}std::initializer_list<TypeParam> Grid_data { Grid_data_str };")
|
||||
print(f"{spaces}std::initializer_list<TypeParam> Grid_data {Grid_data_str};")
|
||||
print(f"{spaces}std::initializer_list<int64_t> Y_shape {{ {', '.join(map(str, Y_shape))} }};")
|
||||
print(f"{spaces}std::initializer_list<TypeParam> Y_data { Y_data_str };")
|
||||
print(f"{spaces}std::initializer_list<TypeParam> Y_data {Y_data_str};")
|
||||
|
||||
print(f'{spaces}test.AddInput<TypeParam>("X", X_shape, X_data);')
|
||||
print(f'{spaces}test.AddInput<TypeParam>("Grid", Grid_shape, Grid_data);')
|
||||
|
|
|
|||
|
|
@ -354,8 +354,7 @@ class TestInferenceSession(unittest.TestCase):
|
|||
assert_allclose(expect, y)
|
||||
except AssertionError as e:
|
||||
raise AssertionError(
|
||||
f"Discrepancies with name={name}, float_name={float_name}, "
|
||||
f"saturate={saturate}\nexpect={expect}\ny={y}"
|
||||
f"Discrepancies with name={name}, float_name={float_name}, saturate={saturate}\nexpect={expect}\ny={y}"
|
||||
) from e
|
||||
self.assertEqual(expect.shape, y.shape)
|
||||
self.assertEqual(expect.dtype, y.dtype)
|
||||
|
|
@ -394,8 +393,7 @@ class TestInferenceSession(unittest.TestCase):
|
|||
assert_allclose(expect, y)
|
||||
except AssertionError as e:
|
||||
raise AssertionError(
|
||||
f"Discrepancies with name={name}, float_name={float_name}, "
|
||||
f"saturate={saturate}\nexpect={expect}\ny={y}"
|
||||
f"Discrepancies with name={name}, float_name={float_name}, saturate={saturate}\nexpect={expect}\ny={y}"
|
||||
) from e
|
||||
self.assertEqual(expect.shape, y.shape)
|
||||
self.assertEqual(expect.dtype, y.dtype)
|
||||
|
|
@ -608,8 +606,7 @@ class TestInferenceSession(unittest.TestCase):
|
|||
if not saturate:
|
||||
return
|
||||
raise AssertionError(
|
||||
f"Discrepancies with name={name}, float_name={float_name}, "
|
||||
f"saturate={saturate}\nexpect={expect}\ny={y}"
|
||||
f"Discrepancies with name={name}, float_name={float_name}, saturate={saturate}\nexpect={expect}\ny={y}"
|
||||
) from e
|
||||
self.assertEqual(expect.shape, y.shape)
|
||||
self.assertEqual(expect.dtype, y.dtype)
|
||||
|
|
|
|||
|
|
@ -173,16 +173,16 @@ class TestFloat8Gemm8(unittest.TestCase):
|
|||
|
||||
raise AssertionError(
|
||||
f"Gemm ERROR len(inputs)={len(feeds)}"
|
||||
f"\na@b=\n{check(lambda:a@b)}"
|
||||
f"\na.T@b=\n{check(lambda:a.T@b)}"
|
||||
f"\na@b.T=\n{check(lambda:a@b.T)}"
|
||||
f"\na.T@b.T=\n{check(lambda:a.T@b.T)}"
|
||||
f"\n----\nb@a=\n{check(lambda:b@a)}"
|
||||
f"\nb.T@a=\n{check(lambda:b.T@a)}"
|
||||
f"\nb@a.T=\n{check(lambda:b@a.T)}"
|
||||
f"\nb.T@a.T=\n{check(lambda:b.T@a.T)}"
|
||||
f"\n----\nexpected=\n{expected[:2,:2]}"
|
||||
f"\n----\ngot=\n{y[:2,:2]}"
|
||||
f"\na@b=\n{check(lambda: a @ b)}"
|
||||
f"\na.T@b=\n{check(lambda: a.T @ b)}"
|
||||
f"\na@b.T=\n{check(lambda: a @ b.T)}"
|
||||
f"\na.T@b.T=\n{check(lambda: a.T @ b.T)}"
|
||||
f"\n----\nb@a=\n{check(lambda: b @ a)}"
|
||||
f"\nb.T@a=\n{check(lambda: b.T @ a)}"
|
||||
f"\nb@a.T=\n{check(lambda: b @ a.T)}"
|
||||
f"\nb.T@a.T=\n{check(lambda: b.T @ a.T)}"
|
||||
f"\n----\nexpected=\n{expected[:2, :2]}"
|
||||
f"\n----\ngot=\n{y[:2, :2]}"
|
||||
f"\nkwargs={kwargs}"
|
||||
) from e
|
||||
|
||||
|
|
@ -225,16 +225,16 @@ class TestFloat8Gemm8(unittest.TestCase):
|
|||
|
||||
raise AssertionError(
|
||||
f"Gemm ERROR len(inputs)={len(feeds)}"
|
||||
f"\na@b=\n{check(lambda:a@b)}"
|
||||
f"\na.T@b=\n{check(lambda:a.T@b)}"
|
||||
f"\na@b.T=\n{check(lambda:a@b.T)}"
|
||||
f"\na.T@b.T=\n{check(lambda:a.T@b.T)}"
|
||||
f"\n----\nb@a=\n{check(lambda:b@a)}"
|
||||
f"\nb.T@a=\n{check(lambda:b.T@a)}"
|
||||
f"\nb@a.T=\n{check(lambda:b@a.T)}"
|
||||
f"\nb.T@a.T=\n{check(lambda:b.T@a.T)}"
|
||||
f"\n----\nexpected=\n{expected[:2,:2]}"
|
||||
f"\n----\ngot=\n{y[:2,:2]}"
|
||||
f"\na@b=\n{check(lambda: a @ b)}"
|
||||
f"\na.T@b=\n{check(lambda: a.T @ b)}"
|
||||
f"\na@b.T=\n{check(lambda: a @ b.T)}"
|
||||
f"\na.T@b.T=\n{check(lambda: a.T @ b.T)}"
|
||||
f"\n----\nb@a=\n{check(lambda: b @ a)}"
|
||||
f"\nb.T@a=\n{check(lambda: b.T @ a)}"
|
||||
f"\nb@a.T=\n{check(lambda: b @ a.T)}"
|
||||
f"\nb.T@a.T=\n{check(lambda: b.T @ a.T)}"
|
||||
f"\n----\nexpected=\n{expected[:2, :2]}"
|
||||
f"\n----\ngot=\n{y[:2, :2]}"
|
||||
f"\nkwargs={kwargs}"
|
||||
) from e
|
||||
self.assertEqual(expected.shape, y.shape)
|
||||
|
|
|
|||
|
|
@ -223,7 +223,6 @@ class TestIOBinding(unittest.TestCase):
|
|||
for inner_device, provider in devices:
|
||||
for onnx_dtype, torch_dtype in onnx_to_torch_type_map.items():
|
||||
with self.subTest(onnx_dtype=onnx_dtype, inner_device=str(inner_device)):
|
||||
|
||||
# Create onnx graph with dynamic axes
|
||||
X = helper.make_tensor_value_info("X", onnx_dtype, [None]) # noqa: N806
|
||||
Y = helper.make_tensor_value_info("Y", onnx_dtype, [None]) # noqa: N806
|
||||
|
|
|
|||
|
|
@ -10,9 +10,13 @@ import unittest
|
|||
import numpy as np
|
||||
import onnx
|
||||
from onnx import TensorProto, helper, numpy_helper
|
||||
from op_test_utils import TestDataFeeds # noqa: F401
|
||||
from op_test_utils import check_op_type_order # noqa: F401
|
||||
from op_test_utils import check_model_correctness, check_op_type_count, check_qtype_by_node_type
|
||||
from op_test_utils import (
|
||||
TestDataFeeds, # noqa: F401
|
||||
check_model_correctness,
|
||||
check_op_type_count,
|
||||
check_op_type_order, # noqa: F401
|
||||
check_qtype_by_node_type,
|
||||
)
|
||||
|
||||
from onnxruntime.quantization import DynamicQuantConfig, QuantType, quantize, quantize_dynamic
|
||||
|
||||
|
|
|
|||
|
|
@ -10,8 +10,13 @@ import unittest
|
|||
import numpy as np
|
||||
import onnx
|
||||
from onnx import TensorProto, helper
|
||||
from op_test_utils import check_op_nodes # noqa: F401
|
||||
from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type
|
||||
from op_test_utils import (
|
||||
TestDataFeeds,
|
||||
check_model_correctness,
|
||||
check_op_nodes, # noqa: F401
|
||||
check_op_type_count,
|
||||
check_qtype_by_node_type,
|
||||
)
|
||||
|
||||
from onnxruntime.quantization import QuantFormat, QuantType, quantize_static
|
||||
|
||||
|
|
|
|||
|
|
@ -759,12 +759,12 @@ class TestQDQFormatConvRelu(TestQDQFormat):
|
|||
QuantType.QInt16: TensorProto.INT16,
|
||||
QuantType.QUInt16: TensorProto.UINT16,
|
||||
}
|
||||
assert (
|
||||
weight_type not in to_tensor_types or to_tensor_types[weight_type] in zero_types
|
||||
), f"weight_type={weight_type} not in zero_types={zero_types}"
|
||||
assert (
|
||||
activation_type not in to_tensor_types or to_tensor_types[activation_type] in zero_types
|
||||
), f"activation_type={activation_type} not in zero_types={zero_types}"
|
||||
assert weight_type not in to_tensor_types or to_tensor_types[weight_type] in zero_types, (
|
||||
f"weight_type={weight_type} not in zero_types={zero_types}"
|
||||
)
|
||||
assert activation_type not in to_tensor_types or to_tensor_types[activation_type] in zero_types, (
|
||||
f"activation_type={activation_type} not in zero_types={zero_types}"
|
||||
)
|
||||
|
||||
check_model_correctness(self, model_fp32_path, model_qdq_path, data_reader.get_next(), rtol=rtol, atol=atol)
|
||||
|
||||
|
|
|
|||
|
|
@ -1195,7 +1195,9 @@ class TestTensorQuantOverridesOption(unittest.TestCase):
|
|||
# get_qnn_qdq_config() should be able to validate the per-channel axis without having to load
|
||||
# the external weight data.
|
||||
qnn_config = get_qnn_qdq_config(
|
||||
str(model_path), DummyDataReader([]), init_overrides=init_overrides # Dummy data reader does nothing
|
||||
str(model_path),
|
||||
DummyDataReader([]),
|
||||
init_overrides=init_overrides, # Dummy data reader does nothing
|
||||
)
|
||||
self.assertEqual(set(qnn_config.op_types_to_quantize), {"Conv"})
|
||||
self.assertTrue(qnn_config.use_external_data_format)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
"""
|
||||
Benchmark performance of GroupQueryAttention.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
|
|
|
|||
|
|
@ -22,7 +22,9 @@ def get_tensor_and_weight(name: str, shape: List[int], random=False, zeros=False
|
|||
weights = (
|
||||
[np.random.uniform(low, high) for _ in range(total_elements)]
|
||||
if random
|
||||
else [0.0] * total_elements if zeros else [1.0] * total_elements
|
||||
else [0.0] * total_elements
|
||||
if zeros
|
||||
else [1.0] * total_elements
|
||||
)
|
||||
return helper.make_tensor(name, TensorProto.FLOAT, shape, weights), weights
|
||||
|
||||
|
|
|
|||
|
|
@ -115,9 +115,9 @@ def optimize_onnx(
|
|||
onnx_model.save_model_to_file(optimized_onnx_path)
|
||||
|
||||
if expected_op is not None:
|
||||
assert (
|
||||
len(onnx_model.get_nodes_by_op_type(expected_op)) == 1
|
||||
), f"Expected {expected_op} node not found in the optimized model {optimized_onnx_path}"
|
||||
assert len(onnx_model.get_nodes_by_op_type(expected_op)) == 1, (
|
||||
f"Expected {expected_op} node not found in the optimized model {optimized_onnx_path}"
|
||||
)
|
||||
|
||||
|
||||
def diff_outputs(torch_outputs, ort_outputs, index):
|
||||
|
|
|
|||
|
|
@ -183,14 +183,14 @@ def mha_with_past_reference(
|
|||
assert config.kv_sequence_length == config.sequence_length
|
||||
assert config.use_kv_cache
|
||||
if past_k is not None:
|
||||
assert (
|
||||
past_k.dim() == 4 and k.dim() == 4 and past_k.size(1) == k.size(1)
|
||||
), f"expect BNSH format: {past_k.shape=} {k.shape=}"
|
||||
assert past_k.dim() == 4 and k.dim() == 4 and past_k.size(1) == k.size(1), (
|
||||
f"expect BNSH format: {past_k.shape=} {k.shape=}"
|
||||
)
|
||||
|
||||
if past_v is not None:
|
||||
assert (
|
||||
past_v.dim() == 4 and v.dim() == 4 and past_v.size(1) == v.size(1)
|
||||
), f"expect BNSH format: {past_v.shape=} {v.shape=}"
|
||||
assert past_v.dim() == 4 and v.dim() == 4 and past_v.size(1) == v.size(1), (
|
||||
f"expect BNSH format: {past_v.shape=} {v.shape=}"
|
||||
)
|
||||
|
||||
present_k = torch.cat((past_k, k), dim=2) if past_k is not None else k
|
||||
present_v = torch.cat((past_v, v), dim=2) if past_v is not None else v
|
||||
|
|
@ -533,7 +533,6 @@ def causal_mask(seqlen_q, seqlen_k, query_padding_mask=None, key_padding_mask=No
|
|||
|
||||
|
||||
def merge_padding_and_causal_masks(config):
|
||||
|
||||
q_mask, k_mask, mask = config.right_side_padding_masks()
|
||||
if config.causal:
|
||||
query_padding_mask = q_mask.reshape(config.batch_size, config.sequence_length)
|
||||
|
|
|
|||
|
|
@ -418,9 +418,9 @@ class T5Attention(nn.Module):
|
|||
real_seq_length = seq_length
|
||||
|
||||
if past_key_value is not None:
|
||||
assert (
|
||||
len(past_key_value) == 2
|
||||
), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states"
|
||||
assert len(past_key_value) == 2, (
|
||||
f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states"
|
||||
)
|
||||
real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length
|
||||
|
||||
key_length = real_seq_length if key_value_states is None else key_value_states.shape[1]
|
||||
|
|
@ -538,9 +538,9 @@ class T5Attention(nn.Module):
|
|||
real_seq_length = seq_length
|
||||
|
||||
if past_key_value is not None:
|
||||
assert (
|
||||
len(past_key_value) == 2
|
||||
), f"past_key_value should have 2 past states: keys and values. Got { len(past_key_value)} past states"
|
||||
assert len(past_key_value) == 2, (
|
||||
f"past_key_value should have 2 past states: keys and values. Got {len(past_key_value)} past states"
|
||||
)
|
||||
real_seq_length += past_key_value[0].shape[2] if query_length is None else query_length
|
||||
|
||||
def project(hidden_states, proj_layer, key_value_states, past_key_value):
|
||||
|
|
|
|||
|
|
@ -1026,14 +1026,14 @@ class TestRotaryAttentionFusion(unittest.TestCase):
|
|||
unsqueeze_0_node = helper.make_node(
|
||||
"Unsqueeze",
|
||||
inputs=[gather_0_node.output[0] if not use_mul_and_add_nodes_0 else "mul_extra_out", "zero"],
|
||||
outputs=[f"unsqueeze_extra_{2*i}"],
|
||||
name=f"Unsqueeze_extra_{2*i}",
|
||||
outputs=[f"unsqueeze_extra_{2 * i}"],
|
||||
name=f"Unsqueeze_extra_{2 * i}",
|
||||
)
|
||||
unsqueeze_1_node = helper.make_node(
|
||||
"Unsqueeze",
|
||||
inputs=[gather_1_node.output[0] if not use_mul_and_add_nodes_1 else "add_extra_out", "zero"],
|
||||
outputs=[f"unsqueeze_extra_{2*i + 1}"],
|
||||
name=f"Unsqueeze_extra_{2*i + 1}",
|
||||
outputs=[f"unsqueeze_extra_{2 * i + 1}"],
|
||||
name=f"Unsqueeze_extra_{2 * i + 1}",
|
||||
)
|
||||
|
||||
reshape_name = reshape_node.name
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
"""
|
||||
Parity test and benchmark performance of SparseAttention. Requires Nvidia GPU of Compute Capability 7.5 or above.
|
||||
"""
|
||||
|
||||
import math
|
||||
import unittest
|
||||
from typing import Optional, Union
|
||||
|
|
|
|||
|
|
@ -22,7 +22,9 @@ def get_tensor_and_weight(name: str, shape: List[int], random=False, zeros=False
|
|||
weights = (
|
||||
[np.random.uniform(low, high) for _ in range(total_elements)]
|
||||
if random
|
||||
else [0.0] * total_elements if zeros else [1.0] * total_elements
|
||||
else [0.0] * total_elements
|
||||
if zeros
|
||||
else [1.0] * total_elements
|
||||
)
|
||||
return helper.make_tensor(name, TensorProto.FLOAT, shape, weights), weights
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
""" Script to generate a dummy ONNX model emulating T5 model with BeamSearch op. """
|
||||
"""Script to generate a dummy ONNX model emulating T5 model with BeamSearch op."""
|
||||
|
||||
import argparse
|
||||
|
||||
|
|
|
|||
|
|
@ -6,13 +6,17 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Text, Tuple, T
|
|||
|
||||
import numpy as np
|
||||
import onnx
|
||||
from onnx import AttributeProto # noqa: F401
|
||||
from onnx import GraphProto # noqa: F401
|
||||
from onnx import SparseTensorProto # noqa: F401
|
||||
from onnx import mapping # noqa: F401
|
||||
from onnx import numpy_helper # noqa: F401
|
||||
from onnx import utils # noqa: F401
|
||||
from onnx import TensorProto, ValueInfoProto, helper
|
||||
from onnx import (
|
||||
AttributeProto, # noqa: F401
|
||||
GraphProto, # noqa: F401
|
||||
SparseTensorProto, # noqa: F401
|
||||
TensorProto,
|
||||
ValueInfoProto,
|
||||
helper,
|
||||
mapping, # noqa: F401
|
||||
numpy_helper, # noqa: F401
|
||||
utils, # noqa: F401
|
||||
)
|
||||
from onnx.helper import make_opsetid
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,13 +6,17 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Text, Tuple, T
|
|||
|
||||
import numpy as np # noqa: F401
|
||||
import onnx
|
||||
from onnx import AttributeProto # noqa: F401
|
||||
from onnx import GraphProto # noqa: F401
|
||||
from onnx import SparseTensorProto # noqa: F401
|
||||
from onnx import mapping # noqa: F401
|
||||
from onnx import numpy_helper # noqa: F401
|
||||
from onnx import utils # noqa: F401
|
||||
from onnx import TensorProto, ValueInfoProto, helper
|
||||
from onnx import (
|
||||
AttributeProto, # noqa: F401
|
||||
GraphProto, # noqa: F401
|
||||
SparseTensorProto, # noqa: F401
|
||||
TensorProto,
|
||||
ValueInfoProto,
|
||||
helper,
|
||||
mapping, # noqa: F401
|
||||
numpy_helper, # noqa: F401
|
||||
utils, # noqa: F401
|
||||
)
|
||||
from onnx.helper import make_opsetid
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the MIT License.
|
||||
|
||||
"""This file is used to generate test data for Adam optimizer tests in
|
||||
orttraining/orttraining/test/training_ops/cuda/optimizer/adamw_test.cc."""
|
||||
orttraining/orttraining/test/training_ops/cuda/optimizer/adamw_test.cc."""
|
||||
|
||||
import torch
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the MIT License.
|
||||
|
||||
"""This file is used to generate test data for LR scheduler optimizer tests in
|
||||
orttraining/orttraining/test/training_api/core/training_api_tests.cc."""
|
||||
orttraining/orttraining/test/training_api/core/training_api_tests.cc."""
|
||||
|
||||
import torch
|
||||
from torch.optim.lr_scheduler import LambdaLR
|
||||
|
|
@ -33,7 +33,7 @@ class WarmupLinearSchedule(LambdaLR):
|
|||
super().__init__(optimizer, self.lr_lambda, last_epoch=last_epoch)
|
||||
|
||||
def lr_lambda(self, step):
|
||||
print(f"warmup_step_count_: {self.warmup_steps }, step: {step}, total_step_count_: {self.t_total}")
|
||||
print(f"warmup_step_count_: {self.warmup_steps}, step: {step}, total_step_count_: {self.t_total}")
|
||||
if step < self.warmup_steps:
|
||||
return float(step) / float(max(1, self.warmup_steps))
|
||||
return max(0.0, float(self.t_total - step) / float(max(1.0, self.t_total - self.warmup_steps)))
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the MIT License.
|
||||
|
||||
"""This file is used to generate test data for SGD optimizer tests in
|
||||
orttraining/orttraining/test/training_ops/cuda/optimizer/sgd_test.cc."""
|
||||
orttraining/orttraining/test/training_ops/cuda/optimizer/sgd_test.cc."""
|
||||
|
||||
import torch
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the MIT License.
|
||||
|
||||
"""This file is used to generate test data for ort format model tests in
|
||||
orttraining/orttraining/test/training_api/core/training_capi_tests.cc."""
|
||||
orttraining/orttraining/test/training_api/core/training_capi_tests.cc."""
|
||||
|
||||
import onnx
|
||||
import torch
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ Models created with this script:
|
|||
- fusion/constant_folding_qdq_node_unit.graph_output.qdq_contrib.onnx
|
||||
- fusion/constant_folding_qdq_node_unit.graph_output.qdq16_contrib.onnx
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
|
|
|
|||
|
|
@ -2,11 +2,11 @@
|
|||
# Licensed under the MIT License.
|
||||
|
||||
"""This file is used to generate test data for MemoryOptimizer tests in
|
||||
onnxruntime/test/optimizer/memory_optimizer_test.cc.
|
||||
onnxruntime/test/optimizer/memory_optimizer_test.cc.
|
||||
|
||||
Be noticed, after run this script, manually rename recompute_XXXX_execution_model_training.onnx to
|
||||
recompute_XXXX.onnx
|
||||
"""
|
||||
Be noticed, after run this script, manually rename recompute_XXXX_execution_model_training.onnx to
|
||||
recompute_XXXX.onnx
|
||||
"""
|
||||
|
||||
import torch
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
from .config import AdamConfig, LambConfig, SGDConfig, _OptimizerConfig # noqa: F401
|
||||
from .fp16_optimizer import FP16_Optimizer # noqa: F401
|
||||
from .fused_adam import AdamWMode, FusedAdam # noqa: F401
|
||||
from .lr_scheduler import ConstantWarmupLRScheduler # noqa: F401
|
||||
from .lr_scheduler import CosineWarmupLRScheduler # noqa: F401
|
||||
from .lr_scheduler import LinearWarmupLRScheduler # noqa: F401
|
||||
from .lr_scheduler import PolyWarmupLRScheduler # noqa: F401
|
||||
from .lr_scheduler import _LRScheduler # noqa: F401
|
||||
from .lr_scheduler import (
|
||||
ConstantWarmupLRScheduler, # noqa: F401
|
||||
CosineWarmupLRScheduler, # noqa: F401
|
||||
LinearWarmupLRScheduler, # noqa: F401
|
||||
PolyWarmupLRScheduler, # noqa: F401
|
||||
_LRScheduler, # noqa: F401
|
||||
)
|
||||
|
|
|
|||
|
|
@ -57,9 +57,9 @@ class _OptimizerConfig:
|
|||
)
|
||||
for k in group:
|
||||
if k != "params":
|
||||
assert (
|
||||
k in defaults or k.replace("_coef", "") in defaults
|
||||
), f"'params' has {k} hyper parameter not present at 'defaults'"
|
||||
assert k in defaults or k.replace("_coef", "") in defaults, (
|
||||
f"'params' has {k} hyper parameter not present at 'defaults'"
|
||||
)
|
||||
|
||||
self.name = name
|
||||
self.lr = float(defaults["lr"])
|
||||
|
|
|
|||
|
|
@ -273,9 +273,9 @@ class PolyWarmupLRScheduler(_LRScheduler):
|
|||
self._num_warmup_steps = warmup * total_steps
|
||||
|
||||
def _warmup_poly(self, train_step_info):
|
||||
assert (
|
||||
train_step_info.optimizer_config.lr > self.lr_end
|
||||
), f"lr_end ({lr_end}) must be be smaller than initial lr ({train_step_info.optimizer_config.lr})" # noqa: F821
|
||||
assert train_step_info.optimizer_config.lr > self.lr_end, (
|
||||
f"lr_end ({self.lr_end}) must be be smaller than initial lr ({train_step_info.optimizer_config.lr})"
|
||||
)
|
||||
|
||||
if train_step_info.optimization_step < self._num_warmup_steps:
|
||||
return float(train_step_info.optimization_step) / float(max(1, self._num_warmup_steps))
|
||||
|
|
|
|||
|
|
@ -9,8 +9,12 @@ from functools import wraps
|
|||
from onnxruntime.capi import _pybind_state as _C
|
||||
|
||||
from .kernel import * # noqa: F403
|
||||
from .triton_op_executor import register_triton_kernel # noqa: F401
|
||||
from .triton_op_executor import call_triton_by_name, call_triton_by_onnx, get_config
|
||||
from .triton_op_executor import (
|
||||
call_triton_by_name,
|
||||
call_triton_by_onnx,
|
||||
get_config,
|
||||
register_triton_kernel, # noqa: F401
|
||||
)
|
||||
|
||||
|
||||
def run_once_register_triton_op_executor(f):
|
||||
|
|
|
|||
|
|
@ -105,9 +105,9 @@ class TritonCodegen(NodeVisitor):
|
|||
name = node.tensor_arg.name
|
||||
var_name = context.get_variable_name(name)
|
||||
internal_var_name = context.get_internal_variable_name(name)
|
||||
assert (
|
||||
var_name != internal_var_name
|
||||
), f"variable name {var_name} and its internal variable name should not be the same."
|
||||
assert var_name != internal_var_name, (
|
||||
f"variable name {var_name} and its internal variable name should not be the same."
|
||||
)
|
||||
|
||||
offset_str, mask_str = self._get_offset_mask(node.offset_calc, node.tensor_arg.name)
|
||||
if offset_str:
|
||||
|
|
@ -359,8 +359,7 @@ class TritonCodegen(NodeVisitor):
|
|||
for reduce_node in node.reduce_nodes:
|
||||
tmp_var_name = "tmp_" + context.get_internal_variable_name(reduce_node.outputs[0].name)
|
||||
code_buffer += (
|
||||
f"{space_indent}{tmp_var_name} = "
|
||||
f"tl.zeros([XBLOCK, RBLOCK], tl.float32) + {reduce_node.default_value}\n"
|
||||
f"{space_indent}{tmp_var_name} = tl.zeros([XBLOCK, RBLOCK], tl.float32) + {reduce_node.default_value}\n"
|
||||
)
|
||||
code_buffer += (
|
||||
f"{space_indent}for roffset in range(0, rnumel, RBLOCK):\n{space_indent} rindex = rbase + roffset\n"
|
||||
|
|
@ -440,9 +439,7 @@ class TritonCodegen(NodeVisitor):
|
|||
def ModuleNode(self, node: ModuleNode, context: CodegenContext, code_buffer: CodeBuffer, indent: int): # noqa: N802
|
||||
space_indent = " " * indent
|
||||
code_buffer += (
|
||||
f"{space_indent}import triton\n"
|
||||
f"{space_indent}import triton.language as tl\n"
|
||||
f"{space_indent}import torch\n"
|
||||
f"{space_indent}import triton\n{space_indent}import triton.language as tl\n{space_indent}import torch\n"
|
||||
)
|
||||
|
||||
for kernel_node in node.kernels:
|
||||
|
|
|
|||
|
|
@ -793,7 +793,7 @@ def flash_attn_forward(q, k, v, bias=None, **kwargs):
|
|||
elif bias.shape[2:] == (seqlen_q, seqlen_k):
|
||||
bias_type = "matrix"
|
||||
else:
|
||||
raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k)" " or (seqlen_q, seqlen_k)")
|
||||
raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k) or (seqlen_q, seqlen_k)")
|
||||
bias = bias.expand(batch, nheads, seqlen_q, seqlen_k)
|
||||
bias_strides = (bias.stride(0), bias.stride(1), bias.stride(2)) if has_bias else (0, 0, 0)
|
||||
|
||||
|
|
@ -903,7 +903,7 @@ def flash_attn_backward(do, q, k, v, o, lse, bias=None, **kwargs):
|
|||
elif bias.shape[2:] == (seqlen_q, seqlen_k):
|
||||
bias_type = "matrix"
|
||||
else:
|
||||
raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k)" " or (seqlen_q, seqlen_k)")
|
||||
raise RuntimeError("Last 2 dimensions of bias must be (1, seqlen_k) or (seqlen_q, seqlen_k)")
|
||||
bias = bias.expand(batch, nheads, seqlen_q, seqlen_k)
|
||||
bias_strides = (bias.stride(0), bias.stride(1), bias.stride(2)) if has_bias else (0, 0, 0)
|
||||
|
||||
|
|
|
|||
|
|
@ -191,7 +191,6 @@ def _export_pt_1_10(g, n, *args, **kwargs):
|
|||
def _default_export(
|
||||
g, func_full_qual_name, func_class, cconv, output_size, output_tensor_types, output_tensor_ranks, *args, **kwargs
|
||||
):
|
||||
|
||||
input_tensor_types = []
|
||||
input_tensor_ranks = []
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ from typing import Optional
|
|||
import torch
|
||||
|
||||
from . import _logger, _utils
|
||||
from ._fallback_exceptions import wrap_exception # noqa: F401
|
||||
from ._fallback_exceptions import (
|
||||
ORTModuleDeviceException,
|
||||
ORTModuleFallbackException,
|
||||
|
|
@ -19,6 +18,7 @@ from ._fallback_exceptions import (
|
|||
ORTModuleIOError,
|
||||
ORTModuleONNXModelException,
|
||||
ORTModuleTorchModelException,
|
||||
wrap_exception, # noqa: F401
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -580,9 +580,9 @@ class GraphTransitionManager:
|
|||
parameter_names = {k: v for k, v in flatten_module.named_parameters()}
|
||||
for input_name in exported_model_info.onnx_graph_input_names:
|
||||
if input_name in exported_model_info.onnx_graph_input_names_user_defined:
|
||||
assert (
|
||||
input_name in model_info_for_export.onnx_graph_input_data_accessor_user_defined
|
||||
), f"{input_name} model_info_for_export.onnx_graph_input_data_accessor_user_defined"
|
||||
assert input_name in model_info_for_export.onnx_graph_input_data_accessor_user_defined, (
|
||||
f"{input_name} model_info_for_export.onnx_graph_input_data_accessor_user_defined"
|
||||
)
|
||||
# We assume the data accessor should be the same as the one used for the previous export, because
|
||||
# there is args and kwargs schema check during export check phase.
|
||||
if model_info_for_export.onnx_graph_input_data_accessor_user_defined[input_name](
|
||||
|
|
@ -736,7 +736,6 @@ class GraphTransitionManager:
|
|||
runtime_inspector: RuntimeInspector,
|
||||
logger: logging.Logger,
|
||||
) -> tuple[onnx.ModelProto, ORTModelInputOutputSchemaType, list[str], list[str]]:
|
||||
|
||||
# Add hooks to check the sparsity of the embedding and label inputs during the export.
|
||||
embedding_hook_handles = GraphTransitionManager._add_check_embedding_sparsity_hook(
|
||||
enable_embedding_sparse_optimizer, device, logger, runtime_inspector, flattened_module
|
||||
|
|
|
|||
|
|
@ -201,7 +201,6 @@ class MemoryObserver:
|
|||
_MemoryOptimizationLevel.TRANSFORMER_LAYERWISE_RECOMPUTE,
|
||||
_MemoryOptimizationLevel.TRANSFORMER_LAYERWISE_RECOMPUTE_WITH_COMPROMISE,
|
||||
]:
|
||||
|
||||
apply_config = []
|
||||
|
||||
for cluster_id in self.cluster_id_combination_to_saving_symbolics_map:
|
||||
|
|
|
|||
|
|
@ -102,9 +102,9 @@ def post_processing_enable_zero_stage3_compat(
|
|||
|
||||
func_name = _get_func_name(c)
|
||||
if func_name == pre_forward_function_name:
|
||||
assert (
|
||||
pre_forward_pythonop_node is None
|
||||
), "Multiple ORTZeROOffloadPreForwardFunction nodes found, it should not happen"
|
||||
assert pre_forward_pythonop_node is None, (
|
||||
"Multiple ORTZeROOffloadPreForwardFunction nodes found, it should not happen"
|
||||
)
|
||||
pre_forward_pythonop_node = c
|
||||
|
||||
if pre_forward_pythonop_node is None:
|
||||
|
|
@ -210,7 +210,7 @@ def post_processing_enable_zero_stage3_compat(
|
|||
|
||||
|
||||
def _create_weight_retrieval_function(
|
||||
zero_stage3_named_params: Optional[Dict[str, torch.nn.parameter.Parameter]]
|
||||
zero_stage3_named_params: Optional[Dict[str, torch.nn.parameter.Parameter]],
|
||||
) -> str:
|
||||
"""This function is used to create a weight retrieving function using zero_stage3_named_params."""
|
||||
|
||||
|
|
|
|||
|
|
@ -59,9 +59,9 @@ def _load_use_external_gpu_allocator(ortmodule_config_accessor, data):
|
|||
assert hasattr(data, _load_use_external_gpu_allocator.loading_key)
|
||||
log.info(f"Found keyword {_load_use_external_gpu_allocator.loading_key} in json. Loading attributes from file.")
|
||||
|
||||
assert isinstance(
|
||||
data.UseExternalGPUAllocator, bool
|
||||
), f"{_load_use_external_gpu_allocator.loading_key} must be a boolean"
|
||||
assert isinstance(data.UseExternalGPUAllocator, bool), (
|
||||
f"{_load_use_external_gpu_allocator.loading_key} must be a boolean"
|
||||
)
|
||||
ortmodule_config_accessor._runtime_options.use_external_gpu_allocator = data.UseExternalGPUAllocator
|
||||
|
||||
|
||||
|
|
@ -73,9 +73,9 @@ def _load_enable_custom_autograd_function(ortmodule_config_accessor, data):
|
|||
f"Found keyword {_load_enable_custom_autograd_function.loading_key} in json. Loading attributes from file."
|
||||
)
|
||||
|
||||
assert isinstance(
|
||||
data.EnableCustomAutogradFunction, bool
|
||||
), f"{_load_enable_custom_autograd_function.loading_key} must be a boolean"
|
||||
assert isinstance(data.EnableCustomAutogradFunction, bool), (
|
||||
f"{_load_enable_custom_autograd_function.loading_key} must be a boolean"
|
||||
)
|
||||
|
||||
from onnxruntime.training.ortmodule._custom_autograd_function import enable_custom_autograd_support
|
||||
|
||||
|
|
@ -89,9 +89,9 @@ def _load_enable_grad_acc_optimization(ortmodule_config_accessor, data):
|
|||
assert hasattr(data, _load_enable_grad_acc_optimization.loading_key)
|
||||
log.info(f"Found keyword {_load_enable_grad_acc_optimization.loading_key} in json. Loading attributes from file.")
|
||||
|
||||
assert isinstance(
|
||||
data.EnableGradAccOptimization, bool
|
||||
), f"{_load_enable_grad_acc_optimization.loading_key} must be a boolean"
|
||||
assert isinstance(data.EnableGradAccOptimization, bool), (
|
||||
f"{_load_enable_grad_acc_optimization.loading_key} must be a boolean"
|
||||
)
|
||||
ortmodule_config_accessor._runtime_options.enable_grad_acc_optimization = data.EnableGradAccOptimization
|
||||
|
||||
|
||||
|
|
@ -101,9 +101,9 @@ def _load_run_symbolic_shape_infer(ortmodule_config_accessor, data):
|
|||
assert hasattr(data, _load_run_symbolic_shape_infer.loading_key)
|
||||
log.info(f"Found keyword {_load_run_symbolic_shape_infer.loading_key} in json. Loading attributes from file.")
|
||||
|
||||
assert isinstance(
|
||||
data.RunSymbolicShapeInference, bool
|
||||
), f"{_load_run_symbolic_shape_infer.loading_key} must be a boolean"
|
||||
assert isinstance(data.RunSymbolicShapeInference, bool), (
|
||||
f"{_load_run_symbolic_shape_infer.loading_key} must be a boolean"
|
||||
)
|
||||
ortmodule_config_accessor._runtime_options.run_symbolic_shape_infer = data.RunSymbolicShapeInference
|
||||
|
||||
|
||||
|
|
@ -175,9 +175,9 @@ def _load_use_memory_efficient_gradient(ortmodule_config_accessor, data):
|
|||
assert hasattr(data, _load_use_memory_efficient_gradient.loading_key)
|
||||
log.info(f"Found keyword {_load_use_memory_efficient_gradient.loading_key} in json. Loading attributes from file.")
|
||||
|
||||
assert isinstance(
|
||||
data.UseMemoryEfficientGradient, bool
|
||||
), f"{_load_use_memory_efficient_gradient.loading_key} must be a boolean"
|
||||
assert isinstance(data.UseMemoryEfficientGradient, bool), (
|
||||
f"{_load_use_memory_efficient_gradient.loading_key} must be a boolean"
|
||||
)
|
||||
ortmodule_config_accessor._runtime_options.use_memory_efficient_gradient = data.UseMemoryEfficientGradient
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -278,11 +278,11 @@ def _summarize_tensor(
|
|||
std_value = torch.sqrt(s.sum() / (element_count - 1))
|
||||
|
||||
f.write(
|
||||
f"{'>'*max(0, depth) + display_name} shape: {tensor_shape} dtype: {tensor_dtype} size: {flatten_array.size()} \n"
|
||||
f"{'>' * max(0, depth) + display_name} shape: {tensor_shape} dtype: {tensor_dtype} size: {flatten_array.size()} \n"
|
||||
f"min: {min_value} max: {max_value}, mean: {mean_value}, "
|
||||
f"std: {std_value} \n"
|
||||
f"nan: {num_nan}, inf: {num_inf}\n"
|
||||
)
|
||||
f.write(f"samples(top 128): {flatten_array[:128]}\n")
|
||||
f.write(f"neg: {num_neg}, pos: {num_pos}, zero: {num_zero},\n")
|
||||
f.write(f"{'='*16}\n")
|
||||
f.write(f"{'=' * 16}\n")
|
||||
|
|
|
|||
|
|
@ -291,9 +291,9 @@ def unflatten_data_using_schema(
|
|||
elif PrimitiveType.is_primitive_type(data_schema):
|
||||
return data_schema
|
||||
elif isinstance(data_schema, _TensorStub):
|
||||
assert isinstance(
|
||||
data[data_schema.tensor_idx], torch.Tensor
|
||||
), f"Expecting torch.Tensor, got {type(data[data_schema.tensor_idx])}"
|
||||
assert isinstance(data[data_schema.tensor_idx], torch.Tensor), (
|
||||
f"Expecting torch.Tensor, got {type(data[data_schema.tensor_idx])}"
|
||||
)
|
||||
return data[data_schema.tensor_idx]
|
||||
elif isinstance(data_schema, abc.Sequence):
|
||||
sequence_type = type(data_schema)
|
||||
|
|
|
|||
|
|
@ -84,7 +84,12 @@ def _get_name(name):
|
|||
# Depending on calling backward() from which outputs, it's possible that grad of some weights are not calculated.
|
||||
# none_pt_params is to tell what these weights are, so we will not compare the tensors.
|
||||
def assert_gradients_match_and_reset_gradient(
|
||||
ort_model, pt_model, none_pt_params=[], reset_gradient=True, rtol=1e-04, atol=1e-05 # noqa: B006
|
||||
ort_model,
|
||||
pt_model,
|
||||
none_pt_params=(),
|
||||
reset_gradient=True,
|
||||
rtol=1e-04,
|
||||
atol=1e-05,
|
||||
):
|
||||
ort_named_params = list(ort_model.named_parameters())
|
||||
pt_named_params = list(pt_model.named_parameters())
|
||||
|
|
|
|||
|
|
@ -165,9 +165,9 @@ class TestTorchDynamoOrt(unittest.TestCase):
|
|||
for tensor, baseline_tensor in zip(tensors, baseline_tensors):
|
||||
torch.testing.assert_close(tensor, baseline_tensor)
|
||||
|
||||
assert (
|
||||
len(cached.keys()) == 2
|
||||
), "Should only see two GraphModules so far. One for forward and the other one for backward."
|
||||
assert len(cached.keys()) == 2, (
|
||||
"Should only see two GraphModules so far. One for forward and the other one for backward."
|
||||
)
|
||||
for value in cached.values():
|
||||
assert len(value) == 1, (
|
||||
"One GraphModule should only be mapped to one ONNX model since "
|
||||
|
|
|
|||
|
|
@ -355,7 +355,9 @@ class GRU:
|
|||
prev_h = (
|
||||
all_hidden_states[t - 1, 0, idx, :]
|
||||
if t > 0
|
||||
else initial_hidden_state[0, idx, :] if initial_hidden_state is not None else 0
|
||||
else initial_hidden_state[0, idx, :]
|
||||
if initial_hidden_state is not None
|
||||
else 0
|
||||
)
|
||||
|
||||
grad_update_gate = (prev_h - hidden_gate) * grad_h
|
||||
|
|
|
|||
|
|
@ -480,7 +480,9 @@ class LSTM:
|
|||
grad_forget_gate = grad_c * (
|
||||
all_cell_states[t - 1, 0, idx, :]
|
||||
if t > 0
|
||||
else initial_cell_state[0, idx, :] if initial_cell_state is not None else 0
|
||||
else initial_cell_state[0, idx, :]
|
||||
if initial_cell_state is not None
|
||||
else 0
|
||||
)
|
||||
grad_control_gate = grad_c * input_gate
|
||||
|
||||
|
|
@ -520,7 +522,9 @@ class LSTM:
|
|||
prev_h = (
|
||||
all_hidden_states[t - 1, 0, idx, :]
|
||||
if t > 0
|
||||
else initial_hidden_state[0, idx, :] if initial_hidden_state is not None else 0
|
||||
else initial_hidden_state[0, idx, :]
|
||||
if initial_hidden_state is not None
|
||||
else 0
|
||||
)
|
||||
grad_recurrence_weights[0, : self._hidden_size, :] += np.dot(
|
||||
np.expand_dims(grad_input_activation, axis=0).T, np.expand_dims(prev_h, axis=0)
|
||||
|
|
@ -549,17 +553,22 @@ class LSTM:
|
|||
grad_peephole_weights[0, : self._hidden_size] += grad_input_activation * (
|
||||
all_cell_states[t - 1, 0, idx, :]
|
||||
if t > 0
|
||||
else initial_cell_state[0, idx, :] if initial_cell_state is not None else 0
|
||||
else initial_cell_state[0, idx, :]
|
||||
if initial_cell_state is not None
|
||||
else 0
|
||||
)
|
||||
grad_peephole_weights[0, self._hidden_size : 2 * self._hidden_size] += (
|
||||
grad_output_activation * all_cell_states[t, 0, idx, :]
|
||||
)
|
||||
grad_peephole_weights[
|
||||
0, 2 * self._hidden_size : 3 * self._hidden_size
|
||||
] += grad_forget_activation * (
|
||||
all_cell_states[t - 1, 0, idx, :]
|
||||
if t > 0
|
||||
else initial_cell_state[0, idx, :] if initial_cell_state is not None else 0
|
||||
grad_peephole_weights[0, 2 * self._hidden_size : 3 * self._hidden_size] += (
|
||||
grad_forget_activation
|
||||
* (
|
||||
all_cell_states[t - 1, 0, idx, :]
|
||||
if t > 0
|
||||
else initial_cell_state[0, idx, :]
|
||||
if initial_cell_state is not None
|
||||
else 0
|
||||
)
|
||||
)
|
||||
|
||||
grad_c = grad_prev_c
|
||||
|
|
|
|||
|
|
@ -1102,7 +1102,6 @@ def test_custom_optimizer_block():
|
|||
|
||||
|
||||
def test_generate_artifacts_path():
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
_, simple_net = _get_models("cpu", 32, 28, 10, 10)
|
||||
|
||||
|
|
|
|||
|
|
@ -6562,7 +6562,8 @@ def test_bert_memory_inspection(caplog):
|
|||
os.environ["ORTMODULE_PRINT_MEMORY_STATS"] = "1"
|
||||
pt_model.eval() # Put it in evaluate mode by intention, in case some initialization in ORTModule use the module.is_training for its checks by mistake.
|
||||
ort_model = ORTModule(
|
||||
copy.deepcopy(pt_model), DebugOptions(log_level=LogLevel.INFO) # The logged memory info is in INFO level.
|
||||
copy.deepcopy(pt_model),
|
||||
DebugOptions(log_level=LogLevel.INFO), # The logged memory info is in INFO level.
|
||||
)
|
||||
|
||||
def run_step(model, x, y, z):
|
||||
|
|
@ -6776,11 +6777,9 @@ def test_enable_layerwise_recompute(memory_optimization_level, allow_gradient_ch
|
|||
|
||||
|
||||
def test_layerwise_recompute_pythonop_deterministic():
|
||||
|
||||
original_val = os.environ.get("ORTMODULE_MEMORY_OPT_LEVEL", None)
|
||||
|
||||
class DropoutFunction(torch.autograd.Function):
|
||||
|
||||
@staticmethod
|
||||
def forward(ctx, x):
|
||||
return torch.nn.functional.dropout(x, p=0.5, training=True)
|
||||
|
|
|
|||
|
|
@ -1414,13 +1414,9 @@ def test_pythonop_training_mode():
|
|||
def check_pythonop_training_mode(model, is_eval_mode):
|
||||
## make sure the ort's PythonOp's training_mode is correct
|
||||
if is_eval_mode:
|
||||
onnx_nodes = (
|
||||
model._torch_module._execution_manager._inference_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
|
||||
)
|
||||
onnx_nodes = model._torch_module._execution_manager._inference_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
|
||||
else:
|
||||
onnx_nodes = (
|
||||
model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
|
||||
)
|
||||
onnx_nodes = model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
|
||||
|
||||
found_pythonop = False
|
||||
for node in onnx_nodes:
|
||||
|
|
@ -1642,14 +1638,14 @@ def test_customized_shape_inference():
|
|||
_find_shape_and_dtype(graph.value_info)
|
||||
|
||||
assert all(s is not None for s in input_shapes), "PythonOp input shape should be found in the optimized_model"
|
||||
assert (
|
||||
all(d is not None for d in input_dtypes) is not None
|
||||
), "PythonOp input dtype should be found in the optimized_model"
|
||||
assert all(d is not None for d in input_dtypes) is not None, (
|
||||
"PythonOp input dtype should be found in the optimized_model"
|
||||
)
|
||||
|
||||
assert all(s is not None for s in output_shapes), "PythonOp output shape should be found in the optimized_model"
|
||||
assert (
|
||||
all(d is not None for d in output_dtypes) is not None
|
||||
), "PythonOp output dtype should be found in the optimized_model"
|
||||
assert all(d is not None for d in output_dtypes) is not None, (
|
||||
"PythonOp output dtype should be found in the optimized_model"
|
||||
)
|
||||
|
||||
def _compare_shape(shape1, shape2):
|
||||
if len(shape1.dim) != len(shape2.dim):
|
||||
|
|
@ -1805,7 +1801,6 @@ def test_python_op_return_persistent_param_as_value():
|
|||
|
||||
|
||||
def test_determistic_pythonop_export():
|
||||
|
||||
class TestFunction(torch.autograd.Function):
|
||||
@staticmethod
|
||||
# bias is an optional argument
|
||||
|
|
@ -1839,9 +1834,7 @@ def test_determistic_pythonop_export():
|
|||
ortmodule = ORTModule(TestModel(output_size)).train()
|
||||
_ = ortmodule(torch.randn(output_size, dtype=torch.float))
|
||||
|
||||
onnx_nodes = (
|
||||
ortmodule._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
|
||||
)
|
||||
onnx_nodes = ortmodule._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model.graph.node
|
||||
|
||||
found_pythonop = False
|
||||
for node in onnx_nodes:
|
||||
|
|
|
|||
|
|
@ -12,10 +12,10 @@ import torch
|
|||
import wget
|
||||
from sklearn.model_selection import train_test_split
|
||||
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
|
||||
from transformers import BertConfig # noqa: F401
|
||||
from transformers import (
|
||||
AdamW,
|
||||
AutoConfig,
|
||||
BertConfig, # noqa: F401
|
||||
BertForSequenceClassification,
|
||||
BertTokenizer,
|
||||
get_linear_schedule_with_warmup,
|
||||
|
|
@ -429,7 +429,9 @@ def main():
|
|||
|
||||
# Create the learning rate scheduler.
|
||||
scheduler = get_linear_schedule_with_warmup(
|
||||
optimizer, num_warmup_steps=0, num_training_steps=total_steps # Default value in run_glue.py
|
||||
optimizer,
|
||||
num_warmup_steps=0,
|
||||
num_training_steps=total_steps, # Default value in run_glue.py
|
||||
)
|
||||
# Seed
|
||||
random.seed(args.seed)
|
||||
|
|
|
|||
|
|
@ -12,9 +12,14 @@ import torch
|
|||
import wget
|
||||
from sklearn.model_selection import train_test_split
|
||||
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
|
||||
from transformers import AdamW # noqa: F401
|
||||
from transformers import BertConfig # noqa: F401
|
||||
from transformers import AutoConfig, BertForSequenceClassification, BertTokenizer, get_linear_schedule_with_warmup
|
||||
from transformers import (
|
||||
AdamW, # noqa: F401
|
||||
AutoConfig,
|
||||
BertConfig, # noqa: F401
|
||||
BertForSequenceClassification,
|
||||
BertTokenizer,
|
||||
get_linear_schedule_with_warmup,
|
||||
)
|
||||
|
||||
import onnxruntime
|
||||
from onnxruntime.training.ortmodule import DebugOptions, ORTModule
|
||||
|
|
@ -432,7 +437,9 @@ def main():
|
|||
|
||||
# Create the learning rate scheduler.
|
||||
scheduler = get_linear_schedule_with_warmup(
|
||||
optimizer, num_warmup_steps=0, num_training_steps=total_steps # Default value in run_glue.py
|
||||
optimizer,
|
||||
num_warmup_steps=0,
|
||||
num_training_steps=total_steps, # Default value in run_glue.py
|
||||
)
|
||||
scaler = torch.cuda.amp.GradScaler()
|
||||
|
||||
|
|
|
|||
|
|
@ -108,7 +108,10 @@ ds = SampleData(x, y)
|
|||
|
||||
print("Initialize deepspeed")
|
||||
model_engine, optimizer, _, _ = deepspeed.initialize(
|
||||
args=args, model=model, model_parameters=params, training_data=ds # (x,y)#
|
||||
args=args,
|
||||
model=model,
|
||||
model_parameters=params,
|
||||
training_data=ds, # (x,y)#
|
||||
)
|
||||
|
||||
for step in range(args.steps):
|
||||
|
|
|
|||
|
|
@ -69,9 +69,7 @@ class TestOnnxOpsOrtModule(unittest.TestCase):
|
|||
self.assert_values_are_close(ort_prediction, pt_prediction, **kwargs)
|
||||
self.assert_gradients_match_and_reset_gradient(ort_model, pt_model, **kwargs)
|
||||
|
||||
onnx_graph_inf = (
|
||||
ort_model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model
|
||||
)
|
||||
onnx_graph_inf = ort_model._torch_module._execution_manager._training_manager._graph_transition_manager._exported_model_info.exported_model
|
||||
onnx_graph_train = ort_model._torch_module._execution_manager._training_manager._onnx_models.optimized_model
|
||||
if debug:
|
||||
with open(f"debug_{name}_ortmodule_infer.onnx", "wb") as f:
|
||||
|
|
|
|||
|
|
@ -68,8 +68,8 @@ def train_model(qat_train_model, qat_eval_model, qat_optimizer_model, qat_checkp
|
|||
# Training loop
|
||||
epochs = 5
|
||||
for epoch in range(epochs):
|
||||
logging.info(f"Starting epoch: {epoch+1}")
|
||||
logging.info(f"Starting epoch: {epoch + 1}")
|
||||
training_loss = _train_epoch(model, optimizer, train_loader)
|
||||
eval_loss = _eval(model, test_loader)
|
||||
|
||||
logging.info(f"End of epoch: {epoch+1}, training loss: {training_loss:.4f}, eval loss: {eval_loss:.4f}")
|
||||
logging.info(f"End of epoch: {epoch + 1}, training loss: {training_loss:.4f}, eval loss: {eval_loss:.4f}")
|
||||
|
|
|
|||
|
|
@ -106,7 +106,7 @@ def main():
|
|||
]
|
||||
|
||||
if config.enable_mixed_precision:
|
||||
cmds.append("--use_mixed_precision"),
|
||||
(cmds.append("--use_mixed_precision"),)
|
||||
|
||||
subprocess.run(cmds, timeout=120).check_returncode() # noqa: PLW1510
|
||||
|
||||
|
|
|
|||
|
|
@ -94,8 +94,8 @@ def main():
|
|||
]
|
||||
|
||||
if c.use_mixed_precision:
|
||||
cmds.append("--use_mixed_precision"),
|
||||
cmds.append("--allreduce_in_fp16"),
|
||||
(cmds.append("--use_mixed_precision"),)
|
||||
(cmds.append("--allreduce_in_fp16"),)
|
||||
|
||||
subprocess.run(cmds).check_returncode() # noqa: PLW1510
|
||||
if c.expected_perf > 0.0:
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ def main():
|
|||
]
|
||||
|
||||
if c.use_mixed_precision:
|
||||
cmds.append("--use_mixed_precision"),
|
||||
(cmds.append("--use_mixed_precision"),)
|
||||
|
||||
subprocess.run(cmds).check_returncode() # noqa: PLW1510
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# limitations under the License.
|
||||
"""BERT finetuning runner."""
|
||||
|
||||
|
||||
import argparse
|
||||
|
||||
# ==================
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue