wip

2026-07-21 19:18:55 +00:00 · 2022-03-01 02:08:32 -08:00 · 2022-03-01 02:08:32 -08:00 · f19bae944b
commit f19bae944b
parent e47434ea12
4 changed files with 20 additions and 2 deletions
--- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
+++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@ -69,6 +69,11 @@ static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session
 // has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed.
 static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly";

+// It controls whether qdq is int8 allowed in QDQ format model or not.
+// "0": not allowed; "1": allowed. It is used as a forced option when exporting to ort format model do disable certain
+// usage of unsupported qdq transformers(in minimal build), such as QDQS8ToU8Transformer.
+static const char* const kOrtSessionOptionsQDQIsInt8Allowed = "session.qdqisint8allowed";
+
 // Save information for replaying graph optimizations later instead of applying them directly.
 //
 // When an ONNX model is loaded, ORT can perform various optimizations on the graph.
--- a/onnxruntime/core/optimizer/graph_transformer_utils.cc
+++ b/onnxruntime/core/optimizer/graph_transformer_utils.cc
@ -158,6 +158,8 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
  InlinedVector<std::unique_ptr<GraphTransformer>> transformers;
  const bool disable_quant_qdq =
      session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsDisableQuantQDQ, "0") == "1";
+  const bool qdq_is_int8_allowed =
+      session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsQDQIsInt8Allowed, "1") == "1";
 #ifndef DISABLE_CONTRIB_OPS
  const bool enable_gelu_approximation =
      session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableGeluApproximation, "0") == "1";
@ -205,7 +207,10 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
                                                                            onnxruntime::kArmNNExecutionProvider};

      if (!disable_quant_qdq) {
-        if (!QDQIsInt8Allowed()) {
+        // currently we don't support QDQS8ToU8Transformer in a minimal build and if supported, this needs to run in
+        // Level 1 during export and not Level 2 at runtime as it would result in overlapping optimizations which
+        // runtime optimization does not support, so add session config value here to force qdqisint8allowed equals true.
+        if (!qdq_is_int8_allowed) {
          transformers.emplace_back(std::make_unique<QDQS8ToU8Transformer>(cpu_ep));
        }
        transformers.emplace_back(std::make_unique<QDQSelectorActionTransformer>());
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h
@ -18,7 +18,7 @@ inline constexpr bool QDQIsInt8Allowed(){


 /**
-Transformer that fuses QDQ and fp32 ops into quantized ops. 
+Transformer that fuses QDQ and fp32 ops into quantized ops.
 */
 class QDQSelectorActionTransformer : public SelectorActionTransformer {
 public:
--- a/tools/python/util/convert_onnx_models_to_ort.py
+++ b/tools/python/util/convert_onnx_models_to_ort.py
@ -197,6 +197,11 @@ def parse_args():
                        help='Specify the list of NNAPI EP partitioning stop ops. '
                             'In particular, specify the value of the "ep.nnapi.partitioning_stop_ops" session '
                             'options config entry.')
+    
+    parser.add_argument('--target_platform', type=str, default='arm', choices=['arm', 'amd64'],
+                        help='Specify the target platform where the exported model will be used.'
+                             'This parameter can be used to choose between platform specifically related options,'
+                             'such as QDQIsInt8Allowed or not, NCHWc (amd64) and NHWC (arm) format optimizer level options,etc.')

    parser.add_argument('model_path_or_dir', type=pathlib.Path,
                        help='Provide path to ONNX model or directory containing ONNX model/s to convert. '
@ -227,6 +232,9 @@ def convert_onnx_models_to_ort():

    if args.nnapi_partitioning_stop_ops is not None:
        session_options_config_entries["ep.nnapi.partitioning_stop_ops"] = args.nnapi_partitioning_stop_ops
+    
+    if args.target_platform == 'arm':
+        session_options_config_entries["session.qdqisint8allowed"] = "1"

    for optimization_level in args.optimization_level:
        print(f"Converting models and creating configuration file for optimization level '{optimization_level}'")