mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-16 21:00:14 +00:00
### Description This PR is to update the win-ort-main branch to the tip main branch as of 2025-01-16. ### Motivation and Context This update includes the OpenVino fix for debug builds. --------- Signed-off-by: Liqun Fu <liqfu@microsoft.com> Signed-off-by: Liqun Fu <liqun.fu@microsoft.com> Signed-off-by: Junze Wu <junze.wu@intel.com> Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: Jianhui Dai <jianhui.j.dai@intel.com> Co-authored-by: Yueqing Zhang <yuz75@Pitt.edu> Co-authored-by: amancini-N <63410090+amancini-N@users.noreply.github.com> Co-authored-by: Adrian Lizarraga <adlizarraga@microsoft.com> Co-authored-by: liqun Fu <liqfu@microsoft.com> Co-authored-by: Guenther Schmuelling <guschmue@microsoft.com> Co-authored-by: Yifan Li <109183385+yf711@users.noreply.github.com> Co-authored-by: yf711 <yifanl@microsoft.com> Co-authored-by: Wanming Lin <wanming.lin@intel.com> Co-authored-by: wejoncy <wejoncy@163.com> Co-authored-by: wejoncy <wejoncy@.com> Co-authored-by: Scott McKay <skottmckay@gmail.com> Co-authored-by: Changming Sun <chasun@microsoft.com> Co-authored-by: Jean-Michaël Celerier <jeanmichael.celerier+github@gmail.com> Co-authored-by: Dmitry Deshevoy <mityada@gmail.com> Co-authored-by: xhcao <xinghua.cao@intel.com> Co-authored-by: Yueqing Zhang <yueqingz@amd.com> Co-authored-by: Yulong Wang <7679871+fs-eire@users.noreply.github.com> Co-authored-by: Jiajia Qin <jiajiaqin@microsoft.com> Co-authored-by: Wu, Junze <junze.wu@intel.com> Co-authored-by: Jian Chen <cjian@microsoft.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Matthieu Darbois <mayeut@users.noreply.github.com> Co-authored-by: Prathik Rao <prathik.rao@gmail.com> Co-authored-by: wonchung-microsoft <wonchung@microsoft.com> Co-authored-by: Vincent Wang <wangwchpku@outlook.com> Co-authored-by: PARK DongHa <luncliff@gmail.com> Co-authored-by: Hector Li <hecli@microsoft.com> Co-authored-by: Sam Webster <13457618+samwebster@users.noreply.github.com> Co-authored-by: Adrian Lizarraga <adrianlm2@gmail.com> Co-authored-by: Preetha Veeramalai <preetha.veeramalai@intel.com> Co-authored-by: jatinwadhwa921 <jatin.wadhwa@intel.com> Co-authored-by: Satya Kumar Jandhyala <satya.k.jandhyala@gmail.com> Co-authored-by: Corentin Maravat <101636442+cocotdf@users.noreply.github.com> Co-authored-by: Xiaoyu <85524621+xiaoyu-work@users.noreply.github.com> Co-authored-by: Tianlei Wu <tlwu@microsoft.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jie Chen <jie.a.chen@intel.com> Co-authored-by: Jianhui Dai <jianhui.j.dai@intel.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com> Co-authored-by: Baiju Meswani <bmeswani@microsoft.com> Co-authored-by: kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> Co-authored-by: Justin Chu <justinchuby@users.noreply.github.com> Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: Ted Themistokleous <107195283+TedThemistokleous@users.noreply.github.com> Co-authored-by: Jeff Daily <jeff.daily@amd.com> Co-authored-by: Artur Wojcik <artur.wojcik@outlook.com> Co-authored-by: Ted Themistokleous <tedthemistokleous@amd.com> Co-authored-by: Xinya Zhang <Xinya.Zhang@amd.com> Co-authored-by: ikalinic <ilija.kalinic@amd.com> Co-authored-by: sstamenk <sstamenk@amd.com> Co-authored-by: Yi-Hong Lyu <yilyu@microsoft.com> Co-authored-by: Ti-Tai Wang <titaiwang@microsoft.com>
184 lines
7.1 KiB
Python
184 lines
7.1 KiB
Python
# -------------------------------------------------------------------------
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT License.
|
|
# --------------------------------------------------------------------------
|
|
# This script opens an existing model in onnx format and attempts to
|
|
# move initializers from model.graph.initializer field to model.graph.sparse_initializer field
|
|
# and convert them into ONNX COO flat index format.
|
|
|
|
import argparse
|
|
import logging
|
|
import sys
|
|
from typing import List, Tuple # noqa: F401
|
|
|
|
import numpy as np
|
|
import onnx
|
|
from onnx import ModelProto, SparseTensorProto, TensorProto, numpy_helper # noqa: F401
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
real_types = {int(TensorProto.FLOAT), int(TensorProto.DOUBLE)}
|
|
|
|
|
|
def parse_arguments():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--input", required=True, type=str, help="input model path")
|
|
parser.add_argument("--output", required=True, type=str, help="output model path")
|
|
parser.add_argument(
|
|
"--exclude", required=False, type=str, help="semicolon separated list of initializer names to exclude"
|
|
)
|
|
parser.add_argument("--tolerance", required=False, type=float, default=1e-6, help="FP absolute tolerance.")
|
|
parser.add_argument(
|
|
"--sparsity_threshold",
|
|
required=False,
|
|
type=float,
|
|
default=0.5,
|
|
help="convert to sparse initializers if sparsity is at least this much",
|
|
)
|
|
parser.add_argument("--verbose", required=False, action="store_true")
|
|
parser.set_defaults(verbose=False)
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def setup_logging(verbose): # type: (bool) -> None
|
|
log_handler = logging.StreamHandler(sys.stdout)
|
|
if verbose:
|
|
log_handler.setFormatter(logging.Formatter("[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s"))
|
|
logging_level = logging.DEBUG
|
|
else:
|
|
log_handler.setFormatter(logging.Formatter("%(filename)20s: %(message)s"))
|
|
logging_level = logging.INFO
|
|
log_handler.setLevel(logging_level)
|
|
logger.addHandler(log_handler)
|
|
logger.setLevel(logging_level)
|
|
|
|
|
|
def convert_tensor_to_sparse(tensor, sparsity_threshold, tolerance): # type: (TensorProto, float, float) -> Tuple[SparseTensorProto, float]
|
|
"""returns a tuple of sparse_tensor and sparsity level"""
|
|
values = []
|
|
indices = []
|
|
nnz_count = 0
|
|
tensor_data = numpy_helper.to_array(tensor).flatten()
|
|
data_len = len(tensor_data)
|
|
if tensor_data.dtype in real_types:
|
|
for index in range(data_len):
|
|
el = tensor_data[index]
|
|
if abs(el) <= tolerance:
|
|
values.append(el)
|
|
indices.append(index)
|
|
nnz_count += 1
|
|
else:
|
|
for index in range(data_len):
|
|
el = tensor_data[index]
|
|
if el != 0:
|
|
values.append(el)
|
|
indices.append(index)
|
|
nnz_count += 1
|
|
|
|
sparsity = 1.0 - float(nnz_count) / data_len
|
|
|
|
ind_data_type = TensorProto.INT8
|
|
ind_dtype = np.int8
|
|
ind_len = len(indices)
|
|
max_indices_value = 0
|
|
if ind_len > 0:
|
|
max_indices_value = indices[-1]
|
|
if max_indices_value <= np.iinfo(np.int8).max:
|
|
ind_data_type = TensorProto.INT8
|
|
ind_dtype = np.int8
|
|
elif max_indices_value <= np.iinfo(np.int16).max:
|
|
ind_data_type = TensorProto.INT16
|
|
ind_dtype = np.int16
|
|
elif max_indices_value <= np.iinfo(np.int32).max:
|
|
ind_data_type = TensorProto.INT32
|
|
ind_dtype = np.int32
|
|
else:
|
|
ind_data_type = TensorProto.INT64
|
|
ind_dtype = np.int64
|
|
|
|
logger.debug(
|
|
f"initializer={tensor.name}, dtype={tensor_data.dtype}, \
|
|
data_len={data_len}, nnz={nnz_count}, sparsity={sparsity}, \
|
|
max_indices_value={max_indices_value}, sparse_indices_type={ind_dtype}"
|
|
)
|
|
|
|
if sparsity < sparsity_threshold:
|
|
return (object(), sparsity)
|
|
|
|
tensor_data_bytes = tensor_data.nbytes
|
|
# create np array and cast data to the appropriate type
|
|
np_values = np.array(values).astype(tensor_data.dtype)
|
|
# create np array and cast data to the inferred index type
|
|
np_indices = np.array(indices).astype(ind_dtype)
|
|
total_sparse_bytes = np_values.nbytes + np_indices.nbytes
|
|
|
|
logger.debug(
|
|
f"initializer={tensor.name}, initializer_bytes={tensor_data_bytes}, \
|
|
sparse_initializer_bytes={total_sparse_bytes}"
|
|
)
|
|
|
|
# This check is usually useful for sparsity_threshold=0.5 where much
|
|
# depends on the size of the indices entries and the size of the original tensor.
|
|
# Big dense tensors command larger indices data type and for large float32 tensors
|
|
# int32 indices are often selected, thus we really want to guard against loosing
|
|
# rather than winning.
|
|
if tensor_data_bytes <= total_sparse_bytes:
|
|
sparsity = 1.0 - float(tensor_data_bytes) / total_sparse_bytes
|
|
logger.debug(f"initializer={tensor.name}, adjusted_sparsity={sparsity}")
|
|
return (object(), sparsity)
|
|
|
|
values_tensor = onnx.helper.make_tensor(tensor.name, tensor.data_type, [len(values)], np_values.tobytes(), raw=True)
|
|
|
|
indicies_tensor = onnx.helper.make_tensor(
|
|
tensor.name + "_indicies", ind_data_type, [ind_len], np_indices.tobytes(), raw=True
|
|
)
|
|
|
|
sparse_tensor = onnx.helper.make_sparse_tensor(values_tensor, indicies_tensor, tensor.dims)
|
|
return (sparse_tensor, sparsity)
|
|
|
|
|
|
def convert_initializers(model, exclude_names, sparsity_threshold, tolerance): # type: (ModelProto, List[str], float, float) -> None
|
|
graph = model.graph
|
|
converted_sparse = []
|
|
remaining_initializers = []
|
|
for initializer in graph.initializer:
|
|
if initializer.name in exclude_names:
|
|
logger.info(f"initializer={initializer.name} was excluded")
|
|
continue
|
|
if initializer.data_type == TensorProto.BOOL:
|
|
logger.info(f"initializer={initializer.name} contains bool, not converted")
|
|
remaining_initializers.append(initializer)
|
|
continue
|
|
sparse_tensor, sparsity = convert_tensor_to_sparse(initializer, sparsity_threshold, tolerance)
|
|
if sparsity >= sparsity_threshold:
|
|
logger.info(f"initializer={initializer.name} converted. sparsity={sparsity}")
|
|
converted_sparse.append(sparse_tensor)
|
|
else:
|
|
remaining_initializers.append(initializer)
|
|
logger.info(f"initializer={initializer.name} is not converted. sparsity={sparsity}")
|
|
|
|
graph.sparse_initializer.extend(converted_sparse)
|
|
del graph.initializer[:]
|
|
graph.initializer.extend(remaining_initializers)
|
|
|
|
|
|
def main():
|
|
args = parse_arguments()
|
|
setup_logging(args.verbose)
|
|
|
|
exclude_names = set() if args.exclude is None else set(args.exclude.split(";"))
|
|
|
|
model = ModelProto()
|
|
with open(args.input, "rb") as input_file:
|
|
model.ParseFromString(input_file.read())
|
|
|
|
convert_initializers(model, exclude_names, args.sparsity_threshold, args.tolerance)
|
|
|
|
with open(args.output, "wb") as output_file:
|
|
s = model.SerializeToString()
|
|
output_file.write(s)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|