From f19bae944bac72001ca7212925527e6df8376fa1 Mon Sep 17 00:00:00 2001 From: rachguo Date: Tue, 1 Mar 2022 02:08:32 -0800 Subject: [PATCH] wip --- .../session/onnxruntime_session_options_config_keys.h | 5 +++++ onnxruntime/core/optimizer/graph_transformer_utils.cc | 7 ++++++- .../selectors_actions/qdq_selector_action_transformer.h | 2 +- tools/python/util/convert_onnx_models_to_ort.py | 8 ++++++++ 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h index 55e328b1bb..afc8ae0cd7 100644 --- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -69,6 +69,11 @@ static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session // has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed. static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly"; +// It controls whether qdq is int8 allowed in QDQ format model or not. +// "0": not allowed; "1": allowed. It is used as a forced option when exporting to ort format model do disable certain +// usage of unsupported qdq transformers(in minimal build), such as QDQS8ToU8Transformer. +static const char* const kOrtSessionOptionsQDQIsInt8Allowed = "session.qdqisint8allowed"; + // Save information for replaying graph optimizations later instead of applying them directly. // // When an ONNX model is loaded, ORT can perform various optimizations on the graph. diff --git a/onnxruntime/core/optimizer/graph_transformer_utils.cc b/onnxruntime/core/optimizer/graph_transformer_utils.cc index 276c0c629a..27025170f4 100644 --- a/onnxruntime/core/optimizer/graph_transformer_utils.cc +++ b/onnxruntime/core/optimizer/graph_transformer_utils.cc @@ -158,6 +158,8 @@ InlinedVector> GenerateTransformers( InlinedVector> transformers; const bool disable_quant_qdq = session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsDisableQuantQDQ, "0") == "1"; + const bool qdq_is_int8_allowed = + session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsQDQIsInt8Allowed, "1") == "1"; #ifndef DISABLE_CONTRIB_OPS const bool enable_gelu_approximation = session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableGeluApproximation, "0") == "1"; @@ -205,7 +207,10 @@ InlinedVector> GenerateTransformers( onnxruntime::kArmNNExecutionProvider}; if (!disable_quant_qdq) { - if (!QDQIsInt8Allowed()) { + // currently we don't support QDQS8ToU8Transformer in a minimal build and if supported, this needs to run in + // Level 1 during export and not Level 2 at runtime as it would result in overlapping optimizations which + // runtime optimization does not support, so add session config value here to force qdqisint8allowed equals true. + if (!qdq_is_int8_allowed) { transformers.emplace_back(std::make_unique(cpu_ep)); } transformers.emplace_back(std::make_unique()); diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h index b324f3962d..b7d7e026bc 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h @@ -18,7 +18,7 @@ inline constexpr bool QDQIsInt8Allowed(){ /** -Transformer that fuses QDQ and fp32 ops into quantized ops. +Transformer that fuses QDQ and fp32 ops into quantized ops. */ class QDQSelectorActionTransformer : public SelectorActionTransformer { public: diff --git a/tools/python/util/convert_onnx_models_to_ort.py b/tools/python/util/convert_onnx_models_to_ort.py index e057857054..19589ecaf2 100644 --- a/tools/python/util/convert_onnx_models_to_ort.py +++ b/tools/python/util/convert_onnx_models_to_ort.py @@ -197,6 +197,11 @@ def parse_args(): help='Specify the list of NNAPI EP partitioning stop ops. ' 'In particular, specify the value of the "ep.nnapi.partitioning_stop_ops" session ' 'options config entry.') + + parser.add_argument('--target_platform', type=str, default='arm', choices=['arm', 'amd64'], + help='Specify the target platform where the exported model will be used.' + 'This parameter can be used to choose between platform specifically related options,' + 'such as QDQIsInt8Allowed or not, NCHWc (amd64) and NHWC (arm) format optimizer level options,etc.') parser.add_argument('model_path_or_dir', type=pathlib.Path, help='Provide path to ONNX model or directory containing ONNX model/s to convert. ' @@ -227,6 +232,9 @@ def convert_onnx_models_to_ort(): if args.nnapi_partitioning_stop_ops is not None: session_options_config_entries["ep.nnapi.partitioning_stop_ops"] = args.nnapi_partitioning_stop_ops + + if args.target_platform == 'arm': + session_options_config_entries["session.qdqisint8allowed"] = "1" for optimization_level in args.optimization_level: print(f"Converting models and creating configuration file for optimization level '{optimization_level}'")