mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-03 23:49:44 +00:00
wip
This commit is contained in:
parent
e47434ea12
commit
f19bae944b
4 changed files with 20 additions and 2 deletions
|
|
@ -69,6 +69,11 @@ static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session
|
|||
// has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed.
|
||||
static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly";
|
||||
|
||||
// It controls whether qdq is int8 allowed in QDQ format model or not.
|
||||
// "0": not allowed; "1": allowed. It is used as a forced option when exporting to ort format model do disable certain
|
||||
// usage of unsupported qdq transformers(in minimal build), such as QDQS8ToU8Transformer.
|
||||
static const char* const kOrtSessionOptionsQDQIsInt8Allowed = "session.qdqisint8allowed";
|
||||
|
||||
// Save information for replaying graph optimizations later instead of applying them directly.
|
||||
//
|
||||
// When an ONNX model is loaded, ORT can perform various optimizations on the graph.
|
||||
|
|
|
|||
|
|
@ -158,6 +158,8 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
|
|||
InlinedVector<std::unique_ptr<GraphTransformer>> transformers;
|
||||
const bool disable_quant_qdq =
|
||||
session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsDisableQuantQDQ, "0") == "1";
|
||||
const bool qdq_is_int8_allowed =
|
||||
session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsQDQIsInt8Allowed, "1") == "1";
|
||||
#ifndef DISABLE_CONTRIB_OPS
|
||||
const bool enable_gelu_approximation =
|
||||
session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableGeluApproximation, "0") == "1";
|
||||
|
|
@ -205,7 +207,10 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
|
|||
onnxruntime::kArmNNExecutionProvider};
|
||||
|
||||
if (!disable_quant_qdq) {
|
||||
if (!QDQIsInt8Allowed()) {
|
||||
// currently we don't support QDQS8ToU8Transformer in a minimal build and if supported, this needs to run in
|
||||
// Level 1 during export and not Level 2 at runtime as it would result in overlapping optimizations which
|
||||
// runtime optimization does not support, so add session config value here to force qdqisint8allowed equals true.
|
||||
if (!qdq_is_int8_allowed) {
|
||||
transformers.emplace_back(std::make_unique<QDQS8ToU8Transformer>(cpu_ep));
|
||||
}
|
||||
transformers.emplace_back(std::make_unique<QDQSelectorActionTransformer>());
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ inline constexpr bool QDQIsInt8Allowed(){
|
|||
|
||||
|
||||
/**
|
||||
Transformer that fuses QDQ and fp32 ops into quantized ops.
|
||||
Transformer that fuses QDQ and fp32 ops into quantized ops.
|
||||
*/
|
||||
class QDQSelectorActionTransformer : public SelectorActionTransformer {
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -197,6 +197,11 @@ def parse_args():
|
|||
help='Specify the list of NNAPI EP partitioning stop ops. '
|
||||
'In particular, specify the value of the "ep.nnapi.partitioning_stop_ops" session '
|
||||
'options config entry.')
|
||||
|
||||
parser.add_argument('--target_platform', type=str, default='arm', choices=['arm', 'amd64'],
|
||||
help='Specify the target platform where the exported model will be used.'
|
||||
'This parameter can be used to choose between platform specifically related options,'
|
||||
'such as QDQIsInt8Allowed or not, NCHWc (amd64) and NHWC (arm) format optimizer level options,etc.')
|
||||
|
||||
parser.add_argument('model_path_or_dir', type=pathlib.Path,
|
||||
help='Provide path to ONNX model or directory containing ONNX model/s to convert. '
|
||||
|
|
@ -227,6 +232,9 @@ def convert_onnx_models_to_ort():
|
|||
|
||||
if args.nnapi_partitioning_stop_ops is not None:
|
||||
session_options_config_entries["ep.nnapi.partitioning_stop_ops"] = args.nnapi_partitioning_stop_ops
|
||||
|
||||
if args.target_platform == 'arm':
|
||||
session_options_config_entries["session.qdqisint8allowed"] = "1"
|
||||
|
||||
for optimization_level in args.optimization_level:
|
||||
print(f"Converting models and creating configuration file for optimization level '{optimization_level}'")
|
||||
|
|
|
|||
Loading…
Reference in a new issue