From a9dc50ba8b324e4f653eff2b172421d8b2e1b3c3 Mon Sep 17 00:00:00 2001 From: Rachel Guo <35738743+YUNQIUGUO@users.noreply.github.com> Date: Wed, 2 Mar 2022 23:26:14 -0800 Subject: [PATCH] Add option to force QDQIsInt8Allowed to return true when exporting to ORT format (#10719) * wip * save * minor update * fix * fix * Revert "fix" This reverts commit a76f364b2d9b4b0967d8d8c852d5cc05f02e5360. * revert * revert * revert submodule removal * address pr comments * minor fix * address cr comments * fix format Co-authored-by: rachguo --- .../onnxruntime_session_options_config_keys.h | 4 ++++ .../core/optimizer/graph_transformer_utils.cc | 10 ++++++++-- .../qdq_selector_action_transformer.cc | 4 ++-- .../qdq_selector_action_transformer.h | 4 ++-- .../python/util/convert_onnx_models_to_ort.py | 18 +++++++++++++++--- 5 files changed, 31 insertions(+), 9 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h index 82fe74cf76..457d20f9a1 100644 --- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -78,6 +78,10 @@ static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session // has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed. static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly"; +// This should only be specified when exporting an ORT format model for use on a different platform. +// If the ORT format model will be used on ARM platforms set to "1". For other platforms set to "0" +static const char* const kOrtSessionOptionsQDQIsInt8Allowed = "session.qdqisint8allowed"; + // Save information for replaying graph optimizations later instead of applying them directly. // // When an ONNX model is loaded, ORT can perform various optimizations on the graph. diff --git a/onnxruntime/core/optimizer/graph_transformer_utils.cc b/onnxruntime/core/optimizer/graph_transformer_utils.cc index 93146ad459..c9bfddb56f 100644 --- a/onnxruntime/core/optimizer/graph_transformer_utils.cc +++ b/onnxruntime/core/optimizer/graph_transformer_utils.cc @@ -7,6 +7,7 @@ #include "core/optimizer/conv_activation_fusion.h" #include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h" +#include "core/optimizer/selectors_actions/selector_action_transformer_apply_contexts.h" #include "core/session/onnxruntime_session_options_config_keys.h" #if !defined(ORT_MINIMAL_BUILD) @@ -161,6 +162,8 @@ InlinedVector> GenerateTransformers( session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsDisableQuantQDQ, "0") == "1"; const bool enable_quant_qdq_cleanup = session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableQuantQDQCleanup, "0") == "1"; + const bool qdq_is_int8_allowed = + session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsQDQIsInt8Allowed, QDQIsInt8Allowed() ? "1" : "0") == "1"; #ifndef DISABLE_CONTRIB_OPS const bool enable_gelu_approximation = session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableGeluApproximation, "0") == "1"; @@ -208,10 +211,13 @@ InlinedVector> GenerateTransformers( onnxruntime::kArmNNExecutionProvider}; if (!disable_quant_qdq) { - if (!QDQIsInt8Allowed()) { + // currently we don't support QDQS8ToU8Transformer in a minimal build and if supported, this needs to run in + // Level 1 during export and not Level 2 at runtime as it would result in overlapping optimizations which + // runtime optimization does not support, so add session config value here to force qdqisint8allowed to be true. + if (!qdq_is_int8_allowed) { transformers.emplace_back(std::make_unique(cpu_ep)); } - transformers.emplace_back(std::make_unique()); + transformers.emplace_back(std::make_unique(SatApplyContextVariant{}, qdq_is_int8_allowed)); } transformers.emplace_back(std::make_unique(cpu_ep)); diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index e137ba3f94..694c2cab6b 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -208,10 +208,10 @@ SelectorActionRegistry CreateSelectorActionRegistry(bool is_int8_allowed) { } // namespace QDQSelectorActionTransformer::QDQSelectorActionTransformer( - const SatApplyContextVariant& apply_context) + const SatApplyContextVariant& apply_context, bool is_int8_allowed) : SelectorActionTransformer{ "QDQSelectorActionTransformer", - CreateSelectorActionRegistry(QDQIsInt8Allowed()), + CreateSelectorActionRegistry(is_int8_allowed), apply_context, // this transformer is only compatible with the CPU EP {kCpuExecutionProvider}} { diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h index d8b265d420..2c48109de1 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h @@ -18,11 +18,11 @@ inline constexpr bool QDQIsInt8Allowed(){ /** -Transformer that fuses QDQ and fp32 ops into quantized ops. +Transformer that fuses QDQ and fp32 ops into quantized ops. */ class QDQSelectorActionTransformer : public SelectorActionTransformer { public: - QDQSelectorActionTransformer(const SatApplyContextVariant& apply_context = {}); + QDQSelectorActionTransformer(const SatApplyContextVariant& apply_context = {}, bool is_int8_allowed = false); }; } // namespace onnxruntime diff --git a/tools/python/util/convert_onnx_models_to_ort.py b/tools/python/util/convert_onnx_models_to_ort.py index e057857054..48c7d6552b 100644 --- a/tools/python/util/convert_onnx_models_to_ort.py +++ b/tools/python/util/convert_onnx_models_to_ort.py @@ -62,7 +62,7 @@ def _create_session_options(optimization_level: ort.GraphOptimizationLevel, def _convert(model_path_or_dir: pathlib.Path, optimization_level_str: str, use_nnapi: bool, use_coreml: bool, custom_op_library: pathlib.Path, create_optimized_onnx_model: bool, allow_conversion_failures: bool, - session_options_config_entries: typing.Dict[str, str]): + target_platform: str, session_options_config_entries: typing.Dict[str, str]): optimization_level = get_optimization_level(optimization_level_str) @@ -91,7 +91,7 @@ def _convert(model_path_or_dir: pathlib.Path, optimization_level_str: str, use_n # If someone really really really wants to run it they could manually create an optimized onnx model first, # or they could comment out this code. optimizer_filter = None - if optimization_level == ort.GraphOptimizationLevel.ORT_ENABLE_ALL: + if optimization_level == ort.GraphOptimizationLevel.ORT_ENABLE_ALL and target_platform != 'amd64': optimizer_filter = ['NchwcTransformer'] num_failures = 0 @@ -198,6 +198,12 @@ def parse_args(): 'In particular, specify the value of the "ep.nnapi.partitioning_stop_ops" session ' 'options config entry.') + parser.add_argument('--target_platform', type=str, default=None, choices=['arm', 'amd64'], + help='Specify the target platform where the exported model will be used. ' + 'This parameter can be used to choose between platform specific options, ' + 'such as QDQIsInt8Allowed(arm), NCHWc (amd64) and NHWC (arm/amd64) format different ' + 'optimizer level options,etc.') + parser.add_argument('model_path_or_dir', type=pathlib.Path, help='Provide path to ONNX model or directory containing ONNX model/s to convert. ' 'All files with a .onnx extension, including in subdirectories, will be processed.') @@ -228,10 +234,16 @@ def convert_onnx_models_to_ort(): if args.nnapi_partitioning_stop_ops is not None: session_options_config_entries["ep.nnapi.partitioning_stop_ops"] = args.nnapi_partitioning_stop_ops + if args.target_platform == 'arm': + session_options_config_entries["session.qdqisint8allowed"] = "1" + else: + session_options_config_entries["session.qdqisint8allowed"] = "0" + for optimization_level in args.optimization_level: print(f"Converting models and creating configuration file for optimization level '{optimization_level}'") _convert(model_path_or_dir, optimization_level, args.use_nnapi, args.use_coreml, custom_op_library, - args.save_optimized_onnx_model, args.allow_conversion_failures, session_options_config_entries) + args.save_optimized_onnx_model, args.allow_conversion_failures, args.target_platform, + session_options_config_entries) _create_config_file_from_ort_models(model_path_or_dir, optimization_level, args.enable_type_reduction)