From f19bae944bac72001ca7212925527e6df8376fa1 Mon Sep 17 00:00:00 2001
From: rachguo <rachguo@rachguos-Mini.attlocal.net>
Date: Tue, 1 Mar 2022 02:08:32 -0800
Subject: [PATCH] wip

---
 .../session/onnxruntime_session_options_config_keys.h     | 5 +++++
 onnxruntime/core/optimizer/graph_transformer_utils.cc     | 7 ++++++-
 .../selectors_actions/qdq_selector_action_transformer.h   | 2 +-
 tools/python/util/convert_onnx_models_to_ort.py           | 8 ++++++++
 4 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
index 55e328b1bb..afc8ae0cd7 100644
--- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
+++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -69,6 +69,11 @@ static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session
 // has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed.
 static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly";
 
+// It controls whether qdq is int8 allowed in QDQ format model or not.
+// "0": not allowed; "1": allowed. It is used as a forced option when exporting to ort format model do disable certain
+// usage of unsupported qdq transformers(in minimal build), such as QDQS8ToU8Transformer.
+static const char* const kOrtSessionOptionsQDQIsInt8Allowed = "session.qdqisint8allowed";
+
 // Save information for replaying graph optimizations later instead of applying them directly.
 //
 // When an ONNX model is loaded, ORT can perform various optimizations on the graph.
diff --git a/onnxruntime/core/optimizer/graph_transformer_utils.cc b/onnxruntime/core/optimizer/graph_transformer_utils.cc
index 276c0c629a..27025170f4 100644
--- a/onnxruntime/core/optimizer/graph_transformer_utils.cc
+++ b/onnxruntime/core/optimizer/graph_transformer_utils.cc
@@ -158,6 +158,8 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
   InlinedVector<std::unique_ptr<GraphTransformer>> transformers;
   const bool disable_quant_qdq =
       session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsDisableQuantQDQ, "0") == "1";
+  const bool qdq_is_int8_allowed =
+      session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsQDQIsInt8Allowed, "1") == "1";
 #ifndef DISABLE_CONTRIB_OPS
   const bool enable_gelu_approximation =
       session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableGeluApproximation, "0") == "1";
@@ -205,7 +207,10 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
                                                                             onnxruntime::kArmNNExecutionProvider};
 
       if (!disable_quant_qdq) {
-        if (!QDQIsInt8Allowed()) {
+        // currently we don't support QDQS8ToU8Transformer in a minimal build and if supported, this needs to run in
+        // Level 1 during export and not Level 2 at runtime as it would result in overlapping optimizations which
+        // runtime optimization does not support, so add session config value here to force qdqisint8allowed equals true.
+        if (!qdq_is_int8_allowed) {
           transformers.emplace_back(std::make_unique<QDQS8ToU8Transformer>(cpu_ep));
         }
         transformers.emplace_back(std::make_unique<QDQSelectorActionTransformer>());
diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h
index b324f3962d..b7d7e026bc 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h
+++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h
@@ -18,7 +18,7 @@ inline constexpr bool QDQIsInt8Allowed(){
 
 
 /**
-Transformer that fuses QDQ and fp32 ops into quantized ops. 
+Transformer that fuses QDQ and fp32 ops into quantized ops.
 */
 class QDQSelectorActionTransformer : public SelectorActionTransformer {
  public:
diff --git a/tools/python/util/convert_onnx_models_to_ort.py b/tools/python/util/convert_onnx_models_to_ort.py
index e057857054..19589ecaf2 100644
--- a/tools/python/util/convert_onnx_models_to_ort.py
+++ b/tools/python/util/convert_onnx_models_to_ort.py
@@ -197,6 +197,11 @@ def parse_args():
                         help='Specify the list of NNAPI EP partitioning stop ops. '
                              'In particular, specify the value of the "ep.nnapi.partitioning_stop_ops" session '
                              'options config entry.')
+    
+    parser.add_argument('--target_platform', type=str, default='arm', choices=['arm', 'amd64'],
+                        help='Specify the target platform where the exported model will be used.'
+                             'This parameter can be used to choose between platform specifically related options,'
+                             'such as QDQIsInt8Allowed or not, NCHWc (amd64) and NHWC (arm) format optimizer level options,etc.')
 
     parser.add_argument('model_path_or_dir', type=pathlib.Path,
                         help='Provide path to ONNX model or directory containing ONNX model/s to convert. '
@@ -227,6 +232,9 @@ def convert_onnx_models_to_ort():
 
     if args.nnapi_partitioning_stop_ops is not None:
         session_options_config_entries["ep.nnapi.partitioning_stop_ops"] = args.nnapi_partitioning_stop_ops
+    
+    if args.target_platform == 'arm':
+        session_options_config_entries["session.qdqisint8allowed"] = "1"
 
     for optimization_level in args.optimization_level:
         print(f"Converting models and creating configuration file for optimization level '{optimization_level}'")