This commit is contained in:
rachguo 2022-03-01 02:08:32 -08:00
parent e47434ea12
commit f19bae944b
4 changed files with 20 additions and 2 deletions

View file

@ -69,6 +69,11 @@ static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session
// has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed.
static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly";
// It controls whether qdq is int8 allowed in QDQ format model or not.
// "0": not allowed; "1": allowed. It is used as a forced option when exporting to ort format model do disable certain
// usage of unsupported qdq transformers(in minimal build), such as QDQS8ToU8Transformer.
static const char* const kOrtSessionOptionsQDQIsInt8Allowed = "session.qdqisint8allowed";
// Save information for replaying graph optimizations later instead of applying them directly.
//
// When an ONNX model is loaded, ORT can perform various optimizations on the graph.

View file

@ -158,6 +158,8 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
InlinedVector<std::unique_ptr<GraphTransformer>> transformers;
const bool disable_quant_qdq =
session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsDisableQuantQDQ, "0") == "1";
const bool qdq_is_int8_allowed =
session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsQDQIsInt8Allowed, "1") == "1";
#ifndef DISABLE_CONTRIB_OPS
const bool enable_gelu_approximation =
session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableGeluApproximation, "0") == "1";
@ -205,7 +207,10 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
onnxruntime::kArmNNExecutionProvider};
if (!disable_quant_qdq) {
if (!QDQIsInt8Allowed()) {
// currently we don't support QDQS8ToU8Transformer in a minimal build and if supported, this needs to run in
// Level 1 during export and not Level 2 at runtime as it would result in overlapping optimizations which
// runtime optimization does not support, so add session config value here to force qdqisint8allowed equals true.
if (!qdq_is_int8_allowed) {
transformers.emplace_back(std::make_unique<QDQS8ToU8Transformer>(cpu_ep));
}
transformers.emplace_back(std::make_unique<QDQSelectorActionTransformer>());

View file

@ -18,7 +18,7 @@ inline constexpr bool QDQIsInt8Allowed(){
/**
Transformer that fuses QDQ and fp32 ops into quantized ops.
Transformer that fuses QDQ and fp32 ops into quantized ops.
*/
class QDQSelectorActionTransformer : public SelectorActionTransformer {
public:

View file

@ -197,6 +197,11 @@ def parse_args():
help='Specify the list of NNAPI EP partitioning stop ops. '
'In particular, specify the value of the "ep.nnapi.partitioning_stop_ops" session '
'options config entry.')
parser.add_argument('--target_platform', type=str, default='arm', choices=['arm', 'amd64'],
help='Specify the target platform where the exported model will be used.'
'This parameter can be used to choose between platform specifically related options,'
'such as QDQIsInt8Allowed or not, NCHWc (amd64) and NHWC (arm) format optimizer level options,etc.')
parser.add_argument('model_path_or_dir', type=pathlib.Path,
help='Provide path to ONNX model or directory containing ONNX model/s to convert. '
@ -227,6 +232,9 @@ def convert_onnx_models_to_ort():
if args.nnapi_partitioning_stop_ops is not None:
session_options_config_entries["ep.nnapi.partitioning_stop_ops"] = args.nnapi_partitioning_stop_ops
if args.target_platform == 'arm':
session_options_config_entries["session.qdqisint8allowed"] = "1"
for optimization_level in args.optimization_level:
print(f"Converting models and creating configuration file for optimization level '{optimization_level}'")