mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-03 23:49:44 +00:00
OVEP - PR 1.19 (#21443)
### Description Add OVEP features for 1.19 The PR has, - Added support for EpCtx with ORT Session options for optimized performance. - Added bug fixes - Support for OV 2024.3 --------- Co-authored-by: ubuntu <ubuntu@ubuntu-mtlp-118727.iind.intel.com> Co-authored-by: vthaniel <vishnudas.thaniel.s@intel.com> Co-authored-by: sfatimar <sahar.fatima@intel.com> Co-authored-by: saurabhkale17 <saurabh1.kale@intel.com> Co-authored-by: Maheshkar <ankit.maheshkar@intel.com>
This commit is contained in:
parent
ae3ec2e9ac
commit
ca47f0fdd3
21 changed files with 271 additions and 121 deletions
|
|
@ -17,8 +17,8 @@
|
|||
|
||||
# Header paths
|
||||
find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX)
|
||||
if(OpenVINO_VERSION VERSION_LESS 2023.0)
|
||||
message(FATAL_ERROR "OpenVINO 2023.0 and newer are supported. Please, latest OpenVINO release")
|
||||
if(OpenVINO_VERSION VERSION_LESS 2024.0)
|
||||
message(FATAL_ERROR "OpenVINO 2024.0 and newer are supported. Please, use latest OpenVINO release")
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
|
|||
- Intel® CPUs
|
||||
- Intel® integrated GPUs
|
||||
- Intel® discrete GPUs
|
||||
- Intel® integrated NPUs (Windows only)
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
|
@ -15,26 +16,27 @@ Requirements
|
|||
^^^^^^^^^^^^
|
||||
|
||||
- Ubuntu 18.04, 20.04, RHEL(CPU only) or Windows 10 - 64 bit
|
||||
- Python 3.8 or 3.9 or 3.10 for Linux and only Python3.10 for Windows
|
||||
- Python 3.9 or 3.10 or 3.11 for Linux and Python 3.10, 3.11 for Windows
|
||||
|
||||
This package supports:
|
||||
- Intel® CPUs
|
||||
- Intel® integrated GPUs
|
||||
- Intel® discrete GPUs
|
||||
- Intel® integrated NPUs (Windows only)
|
||||
|
||||
``pip3 install onnxruntime-openvino``
|
||||
|
||||
Please install OpenVINO™ PyPi Package separately for Windows.
|
||||
For installation instructions on Windows please refer to `OpenVINO™ Execution Provider for ONNX Runtime for Windows <https://github.com/intel/onnxruntime/releases/>`_.
|
||||
|
||||
**OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2023.0.0 eliminating the need to install OpenVINO™ separately. The OpenVINO™ libraries are prebuilt with CXX11_ABI flag set to 0.
|
||||
**OpenVINO™ Execution Provider for ONNX Runtime** Linux Wheels comes with pre-built libraries of OpenVINO™ version 2024.1.0 eliminating the need to install OpenVINO™ separately.
|
||||
|
||||
For more details on build and installation please refer to `Build <https://onnxruntime.ai/docs/build/eps.html#openvino>`_.
|
||||
|
||||
Usage
|
||||
^^^^^
|
||||
|
||||
By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated or discrete GPU.
|
||||
By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated GPU, discrete GPU, integrated NPU (Windows only).
|
||||
Invoke `the provider config device type argument <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options>`_ to change the hardware on which inferencing is done.
|
||||
|
||||
For more API calls and environment variables, see `Usage <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#configuration-options>`_.
|
||||
|
|
|
|||
|
|
@ -28,9 +28,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
|
|||
const onnxruntime::Node& fused_node,
|
||||
const onnxruntime::GraphViewer& subgraph,
|
||||
const logging::Logger& logger,
|
||||
EPCtxHandler& ctx_handle) {
|
||||
EPCtxHandler& ep_ctx_handle_) {
|
||||
global_context_ = global_context;
|
||||
ep_ctx_handle_ = ctx_handle;
|
||||
|
||||
openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." +
|
||||
std::to_string(global_context_.OpenVINO_Version.at(1));
|
||||
|
|
@ -147,13 +146,20 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
|
|||
|
||||
std::string model_blob_str;
|
||||
auto compiled_model = concrete_backend_->GetOVCompiledModel();
|
||||
auto graph_name = global_context_.onnx_model_path_name;
|
||||
// Remove extension so we can append suffix to form the complete name of output graph
|
||||
graph_name = [&]() {
|
||||
size_t dot = graph_name.find_last_of(".");
|
||||
if (dot == std::string::npos) return graph_name;
|
||||
return graph_name.substr(0, dot);
|
||||
}();
|
||||
std::string graph_name = "";
|
||||
// Epctx file path from SO is mapped to cache_dir variable for OVEP for readability
|
||||
if (global_context_.cache_dir != "") {
|
||||
graph_name = global_context_.cache_dir;
|
||||
} else {
|
||||
graph_name = global_context_.onnx_model_path_name;
|
||||
// Remove extension so we can append suffix to form the complete name of output graph
|
||||
graph_name = [&]() {
|
||||
size_t dot = graph_name.find_last_of(".");
|
||||
if (dot == std::string::npos) return graph_name;
|
||||
return graph_name.substr(0, dot);
|
||||
}();
|
||||
graph_name = graph_name + "-ov_" + GetGlobalContext().device_type + "_blob.onnx";
|
||||
}
|
||||
// If embed_mode, then pass on the serialized blob
|
||||
// If not embed_mode, dump the blob here and only pass on the path to the blob
|
||||
if (global_context_.ep_context_embed_mode) {
|
||||
|
|
@ -162,9 +168,19 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
|
|||
model_blob_str = model_blob_stream.str();
|
||||
ORT_ENFORCE(model_blob_str.size() != 0);
|
||||
} else {
|
||||
std::ofstream f(graph_name + ".blob", std::ios::out | std::ios::trunc | std::ios::binary);
|
||||
compiled_model.export_model(f);
|
||||
model_blob_str = graph_name + ".blob";
|
||||
// Remove extension so we can append suffix to form the complete name of output graph
|
||||
auto blob_name = [&]() {
|
||||
size_t dot = graph_name.find_last_of(".");
|
||||
if (dot == std::string::npos) return graph_name;
|
||||
return graph_name.substr(0, dot);
|
||||
}();
|
||||
std::ofstream blob_file(blob_name + ".blob",
|
||||
std::ios::out | std::ios::trunc | std::ios::binary);
|
||||
if (!blob_file) {
|
||||
ORT_THROW("Unable to open file for epctx model dump.");
|
||||
}
|
||||
compiled_model.export_model(blob_file);
|
||||
model_blob_str = blob_name + ".blob";
|
||||
}
|
||||
|
||||
ORT_RETURN_IF_ERROR(ep_ctx_handle_.ExportEPCtxModel(graph_body_viewer,
|
||||
|
|
@ -172,8 +188,7 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
|
|||
logger,
|
||||
global_context_.ep_context_embed_mode,
|
||||
model_blob_str,
|
||||
openvino_sdk_version_,
|
||||
GetGlobalContext().device_type));
|
||||
openvino_sdk_version_));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
@ -248,7 +263,7 @@ static void DumpOpenVINOEPModel(std::string onnx_model_path_name,
|
|||
ONNX_NAMESPACE::ModelProto* model_proto,
|
||||
const onnxruntime::Node& fused_node) {
|
||||
if (openvino_ep::backend_utils::IsDebugEnabled()) {
|
||||
auto model_name = onnx_model_path_name.empty() ? "unknown.onnx" : onnx_model_path_name;
|
||||
auto model_name = onnx_model_path_name.empty() ? "unknown.onnx" : std::move(onnx_model_path_name);
|
||||
#ifdef _WIN32
|
||||
size_t slash = model_name.find_last_of("\\");
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
|
|||
PopulateConfigValue(device_config);
|
||||
|
||||
// Enable caching
|
||||
EnableCaching();
|
||||
EnableCaching(device_config);
|
||||
|
||||
// Setting OpenCL queue throttling for GPU
|
||||
EnableGPUThrottling(device_config);
|
||||
|
|
@ -82,26 +82,28 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
|
|||
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
}
|
||||
#else // !IO_BUFFER_ENABLED
|
||||
std::string prec_str = (global_context_.precision_str != "ACCURACY") ? global_context_.precision_str : global_context_.model_precision;
|
||||
if (is_ep_ctx_graph_) {
|
||||
// If the blob is held in an EPContext node, then skip FE+Compile
|
||||
// and directly move on to creating a backend with the executable blob
|
||||
exe_network_ = global_context_.ie_core.ImportModel(ep_ctx_handle.GetModelBlobStream(),
|
||||
hw_target,
|
||||
device_config,
|
||||
global_context_.ep_context_embed_mode,
|
||||
subgraph_context_.subgraph_name);
|
||||
ie_cnn_network_ = exe_network_.Get().get_runtime_model();
|
||||
} else if (!subgraph_context_.has_dynamic_input_shape) {
|
||||
} else if ((!subgraph_context_.has_dynamic_input_shape) &&
|
||||
((hw_target.find("AUTO") == std::string::npos) ||
|
||||
(global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) > 2))) {
|
||||
// Optimized OV compile_model API is supported with AUTO from version 2024.3 and above
|
||||
// Inputs with static dimenstions
|
||||
std::string prec_str = (global_context_.precision_str != "ACCURACY") ? global_context_.precision_str : global_context_.model_precision;
|
||||
const std::string model = model_proto.SerializeAsString();
|
||||
exe_network_ = global_context_.ie_core.CompileModel(model,
|
||||
hw_target,
|
||||
prec_str,
|
||||
global_context_.cache_dir,
|
||||
device_config,
|
||||
subgraph_context_.subgraph_name);
|
||||
ie_cnn_network_ = exe_network_.Get().get_runtime_model();
|
||||
} else { // Inputs with dynamic dimensions
|
||||
} else { // For all other types use ov::Model Type
|
||||
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_);
|
||||
exe_network_ = global_context_.ie_core.CompileModel(
|
||||
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
|
|
@ -173,13 +175,19 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
|
|||
}
|
||||
}
|
||||
|
||||
void BasicBackend::EnableCaching() {
|
||||
void BasicBackend::EnableCaching(ov::AnyMap& device_config) {
|
||||
// cache_dir argument has no effect when working with an embed-mode EPContext Graph
|
||||
if (is_ep_ctx_graph_) return;
|
||||
|
||||
if (!global_context_.cache_dir.empty()) {
|
||||
if (!global_context_.cache_dir.empty() && !global_context_.export_ep_ctx_blob) {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Enables Caching";
|
||||
global_context_.ie_core.SetCache(global_context_.cache_dir, global_context_.device_type);
|
||||
if (global_context_.device_type.find("AUTO:GPU") != std::string::npos) {
|
||||
std::pair<std::string, ov::Any> device_property;
|
||||
device_property = std::make_pair("CACHE_DIR", global_context_.cache_dir);
|
||||
device_config.emplace(ov::device::properties("GPU", device_property));
|
||||
} else {
|
||||
global_context_.ie_core.SetCache(global_context_.cache_dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -274,7 +282,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
|
|||
}
|
||||
|
||||
try {
|
||||
infer_request->SetTensor(input_name, tensor_ptr);
|
||||
infer_request->SetTensor(std::move(input_name), tensor_ptr);
|
||||
} catch (const char* msg) {
|
||||
ORT_THROW(msg);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ class BasicBackend : public IBackend {
|
|||
void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
|
||||
bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
|
||||
void PopulateConfigValue(ov::AnyMap& device_config);
|
||||
void EnableCaching();
|
||||
void EnableCaching(ov::AnyMap& device_config);
|
||||
void EnableGPUThrottling(ov::AnyMap& device_config);
|
||||
void EnableStreams();
|
||||
void SetNumThreads(ov::AnyMap& device_config);
|
||||
|
|
|
|||
|
|
@ -19,8 +19,7 @@ Status EPCtxHandler::ExportEPCtxModel(const GraphViewer& graph_viewer,
|
|||
const logging::Logger& logger,
|
||||
const bool& ep_context_embed_mode,
|
||||
const std::string& model_blob_str,
|
||||
const std::string& openvino_sdk_version,
|
||||
const std::string& device_type) const {
|
||||
const std::string& openvino_sdk_version) const {
|
||||
auto model_build = graph_viewer.CreateModel(logger);
|
||||
auto& graph_build = model_build->MainGraph();
|
||||
|
||||
|
|
@ -77,9 +76,12 @@ Status EPCtxHandler::ExportEPCtxModel(const GraphViewer& graph_viewer,
|
|||
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
|
||||
|
||||
// Finally, dump the model
|
||||
std::ofstream dump(graph_name + "-ov_" + device_type + "_blob.onnx",
|
||||
std::ios::out | std::ios::trunc | std::ios::binary);
|
||||
model_proto->SerializeToOstream(dump);
|
||||
std::ofstream epctx_onnx_model(graph_name,
|
||||
std::ios::out | std::ios::trunc | std::ios::binary);
|
||||
if (!epctx_onnx_model) {
|
||||
ORT_THROW("Unable to create epctx onnx model file ");
|
||||
}
|
||||
model_proto->SerializeToOstream(epctx_onnx_model);
|
||||
|
||||
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Export blob as EPContext Node";
|
||||
|
||||
|
|
@ -90,9 +92,7 @@ Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer) {
|
|||
auto node = graph_viewer.GetNode(0);
|
||||
auto& attrs = node->GetAttributes();
|
||||
ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) > 0);
|
||||
|
||||
model_stream_ = std::make_shared<std::istringstream>(attrs.at(EP_CACHE_CONTEXT).s());
|
||||
|
||||
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";
|
||||
|
||||
is_valid_ep_ctx_graph_ = true;
|
||||
|
|
|
|||
|
|
@ -29,8 +29,7 @@ class EPCtxHandler {
|
|||
const logging::Logger& logger,
|
||||
const bool& ep_context_embed_mode,
|
||||
const std::string& model_blob_str,
|
||||
const std::string& openvino_sdk_version,
|
||||
const std::string& device_type) const;
|
||||
const std::string& openvino_sdk_version) const;
|
||||
Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer);
|
||||
bool CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const;
|
||||
bool IsValidOVEPCtxGraph() const { return is_valid_ep_ctx_graph_; }
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
|
|||
global_context_->export_ep_ctx_blob = info.export_ep_ctx_blob_;
|
||||
global_context_->enable_qdq_optimizer = info.enable_qdq_optimizer_;
|
||||
global_context_->disable_cpu_fallback = info.disable_cpu_fallback_;
|
||||
global_context_->ep_context_embed_mode = info.so_epctx_embed_mode_;
|
||||
|
||||
// to check if target device is available
|
||||
// using ie_core capability GetAvailableDevices to fetch list of devices plugged in
|
||||
|
|
@ -47,7 +48,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
|
|||
info.device_type_.find("AUTO") != std::string::npos) {
|
||||
device_found = true;
|
||||
} else {
|
||||
for (std::string device : available_devices) {
|
||||
for (const std::string& device : available_devices) {
|
||||
if (device.rfind(info.device_type_, 0) == 0) {
|
||||
if (info.device_type_.find("GPU") != std::string::npos && (info.precision_ == "FP32" ||
|
||||
info.precision_ == "FP16" ||
|
||||
|
|
|
|||
|
|
@ -16,16 +16,23 @@
|
|||
|
||||
namespace onnxruntime {
|
||||
|
||||
struct OVDevices {
|
||||
ov::Core core;
|
||||
std::vector<std::string> get_ov_devices() const {
|
||||
return core.get_available_devices();
|
||||
}
|
||||
};
|
||||
|
||||
static void print_build_options() {
|
||||
std::cout << "[ERROR] INVALID DEVICE BUILD TYPE SPECIFIED" << std::endl;
|
||||
std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority "
|
||||
<< "you want to build"
|
||||
<< std::endl;
|
||||
std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build "
|
||||
<< "are ['CPU','GPU','NPU']"
|
||||
<< "are ['CPU','GPU','NPU','GPU.x'] where x = 0,1,2 and so on"
|
||||
<< std::endl;
|
||||
std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. "
|
||||
<< "Ex: HETERO:GPU,CPU Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU"
|
||||
<< "Ex: HETERO:GPU,CPU Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU Ex: AUTO:GPU.0,CPU Ex: AUTO:GPU.1,CPU"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +47,8 @@ static std::vector<std::string> split(const std::string& s, char delim) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static std::vector<std::string> parseDevices(const std::string& device_string) {
|
||||
static std::vector<std::string> parseDevices(const std::string& device_string,
|
||||
const std::vector<std::string>& available_devices) {
|
||||
std::string comma_separated_devices = device_string;
|
||||
if (comma_separated_devices.find(":") != std::string::npos) {
|
||||
comma_separated_devices = comma_separated_devices.substr(comma_separated_devices.find(":") + 1);
|
||||
|
|
@ -50,8 +58,15 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
|
|||
print_build_options();
|
||||
ORT_THROW("Invalid device string: " + device_string);
|
||||
}
|
||||
std::vector<std::string> dev_options = {"CPU", "GPU", "NPU"};
|
||||
for (std::string dev : devices) {
|
||||
std::set<std::string> dev_options = {"CPU", "GPU", "NPU"};
|
||||
|
||||
for (auto& device : available_devices) {
|
||||
if (dev_options.find(device) == dev_options.end()) {
|
||||
auto dev_options_update = dev_options.emplace(device);
|
||||
}
|
||||
}
|
||||
|
||||
for (const std::string& dev : devices) {
|
||||
if (!std::count(dev_options.begin(), dev_options.end(), dev)) {
|
||||
print_build_options();
|
||||
ORT_THROW("Invalid device string: " + device_string);
|
||||
|
|
@ -75,28 +90,42 @@ struct OpenVINOExecutionProviderInfo {
|
|||
bool export_ep_ctx_blob_{false};
|
||||
bool enable_qdq_optimizer_{false};
|
||||
bool disable_cpu_fallback_{false};
|
||||
bool so_epctx_embed_mode_{true};
|
||||
|
||||
OpenVINOExecutionProviderInfo() = delete;
|
||||
|
||||
explicit OpenVINOExecutionProviderInfo(std::string dev_type, std::string precision, bool enable_npu_fast_compile,
|
||||
size_t num_of_threads, std::string cache_dir, std::string model_priority,
|
||||
explicit OpenVINOExecutionProviderInfo(const std::string& dev_type, const std::string& precision,
|
||||
bool enable_npu_fast_compile, size_t num_of_threads,
|
||||
const std::string& cache_dir, const std::string& model_priority,
|
||||
int num_streams, void* context, bool enable_opencl_throttling,
|
||||
bool disable_dynamic_shapes, bool export_ep_ctx_blob,
|
||||
bool enable_qdq_optimizer, bool disable_cpu_fallback)
|
||||
: precision_(precision),
|
||||
bool enable_qdq_optimizer, bool disable_cpu_fallback,
|
||||
bool so_epctx_embed_mode)
|
||||
: precision_(std::move(precision)),
|
||||
enable_npu_fast_compile_(enable_npu_fast_compile),
|
||||
num_of_threads_(num_of_threads),
|
||||
cache_dir_(std::move(cache_dir)),
|
||||
model_priority_(model_priority),
|
||||
model_priority_(std::move(model_priority)),
|
||||
num_streams_(num_streams),
|
||||
context_(context),
|
||||
enable_opencl_throttling_(enable_opencl_throttling),
|
||||
disable_dynamic_shapes_(disable_dynamic_shapes),
|
||||
export_ep_ctx_blob_(export_ep_ctx_blob),
|
||||
enable_qdq_optimizer_(enable_qdq_optimizer),
|
||||
disable_cpu_fallback_(disable_cpu_fallback) {
|
||||
disable_cpu_fallback_(disable_cpu_fallback),
|
||||
so_epctx_embed_mode_{so_epctx_embed_mode} {
|
||||
std::set<std::string> ov_supported_device_types = {"CPU", "GPU",
|
||||
"GPU.0", "GPU.1", "NPU"};
|
||||
|
||||
OVDevices devices;
|
||||
std::vector<std::string> available_devices = devices.get_ov_devices();
|
||||
|
||||
for (auto& device : available_devices) {
|
||||
if (ov_supported_device_types.find(device) == ov_supported_device_types.end()) {
|
||||
ov_supported_device_types.emplace(device);
|
||||
}
|
||||
}
|
||||
|
||||
if (dev_type == "") {
|
||||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
|
||||
<< "No runtime device selection option provided.";
|
||||
|
|
@ -116,7 +145,7 @@ struct OpenVINOExecutionProviderInfo {
|
|||
dev_type = DEVICE;
|
||||
|
||||
if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0 || dev_type.find("AUTO") == 0) {
|
||||
std::vector<std::string> devices = parseDevices(dev_type);
|
||||
std::vector<std::string> devices = parseDevices(dev_type, available_devices);
|
||||
precision_ = "FP16";
|
||||
if (devices[0] == "CPU") {
|
||||
precision_ = "FP32";
|
||||
|
|
@ -127,7 +156,7 @@ struct OpenVINOExecutionProviderInfo {
|
|||
} else if (ov_supported_device_types.find(dev_type) != ov_supported_device_types.end()) {
|
||||
device_type_ = std::move(dev_type);
|
||||
} else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0 || dev_type.find("AUTO") == 0) {
|
||||
std::vector<std::string> devices = parseDevices(dev_type);
|
||||
std::vector<std::string> devices = parseDevices(dev_type, available_devices);
|
||||
device_type_ = dev_type;
|
||||
} else {
|
||||
ORT_THROW("Invalid device string: " + dev_type);
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
|
|||
int num_streams, void* context,
|
||||
bool enable_opencl_throttling, bool disable_dynamic_shapes,
|
||||
bool export_ep_ctx_blob, bool enable_qdq_optimizer,
|
||||
bool disable_cpu_fallback)
|
||||
bool disable_cpu_fallback,
|
||||
bool so_epctx_embed_mode)
|
||||
: precision_(precision),
|
||||
enable_npu_fast_compile_(enable_npu_fast_compile),
|
||||
num_of_threads_(num_of_threads),
|
||||
|
|
@ -25,10 +26,12 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
|
|||
disable_dynamic_shapes_(disable_dynamic_shapes),
|
||||
export_ep_ctx_blob_(export_ep_ctx_blob),
|
||||
enable_qdq_optimizer_(enable_qdq_optimizer),
|
||||
disable_cpu_fallback_(disable_cpu_fallback) {
|
||||
disable_cpu_fallback_(disable_cpu_fallback),
|
||||
so_epctx_embed_mode_(so_epctx_embed_mode) {
|
||||
device_type_ = (device_type == nullptr) ? "" : device_type;
|
||||
cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
|
||||
}
|
||||
|
||||
~OpenVINOProviderFactory() override {
|
||||
}
|
||||
|
||||
|
|
@ -48,13 +51,15 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
|
|||
bool export_ep_ctx_blob_;
|
||||
bool enable_qdq_optimizer_;
|
||||
bool disable_cpu_fallback_;
|
||||
bool so_epctx_embed_mode_;
|
||||
};
|
||||
|
||||
std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
|
||||
OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_,
|
||||
cache_dir_, model_priority_, num_streams_, context_, enable_opencl_throttling_,
|
||||
disable_dynamic_shapes_, export_ep_ctx_blob_, enable_qdq_optimizer_,
|
||||
disable_cpu_fallback_);
|
||||
disable_cpu_fallback_,
|
||||
so_epctx_embed_mode_);
|
||||
return std::make_unique<OpenVINOExecutionProvider>(info);
|
||||
}
|
||||
|
||||
|
|
@ -105,6 +110,8 @@ struct OpenVINO_Provider : Provider {
|
|||
|
||||
bool disable_cpu_fallback = false;
|
||||
|
||||
bool so_epctx_embed_mode = true;
|
||||
|
||||
if (provider_options_map.find("device_type") != provider_options_map.end()) {
|
||||
device_type = provider_options_map.at("device_type").c_str();
|
||||
|
||||
|
|
@ -113,6 +120,14 @@ struct OpenVINO_Provider : Provider {
|
|||
std::set<std::string> deprecated_device_types = {"CPU_FP32", "GPU_FP32",
|
||||
"GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
|
||||
"GPU.0_FP16", "GPU.1_FP16"};
|
||||
OVDevices devices;
|
||||
std::vector<std::string> available_devices = devices.get_ov_devices();
|
||||
|
||||
for (auto& device : available_devices) {
|
||||
if (ov_supported_device_types.find(device) == ov_supported_device_types.end()) {
|
||||
ov_supported_device_types.emplace(device);
|
||||
}
|
||||
}
|
||||
if (deprecated_device_types.find(device_type) != deprecated_device_types.end()) {
|
||||
std::string deprecated_device = device_type;
|
||||
int delimit = device_type.find("_");
|
||||
|
|
@ -128,8 +143,8 @@ struct OpenVINO_Provider : Provider {
|
|||
(device_type.find("MULTI:") == 0) ||
|
||||
(device_type.find("AUTO:") == 0))) {
|
||||
ORT_THROW(
|
||||
"[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
|
||||
"Select from 'CPU', 'GPU', 'GPU.0', 'GPU.1', 'NPU' or from"
|
||||
"[ERROR] [OpenVINO] You have selected wrong configuration value for the key 'device_type'. "
|
||||
"Select from 'CPU', 'GPU', 'NPU', 'GPU.x' where x = 0,1,2 and so on or from"
|
||||
" HETERO/MULTI/AUTO options available. \n");
|
||||
}
|
||||
}
|
||||
|
|
@ -253,9 +268,8 @@ struct OpenVINO_Provider : Provider {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (provider_options_map.find("export_ep_ctx_blob") != provider_options_map.end()) {
|
||||
bool_flag = provider_options_map.at("export_ep_ctx_blob");
|
||||
if (provider_options_map.find("so_export_ep_ctx_blob") != provider_options_map.end()) {
|
||||
bool_flag = provider_options_map.at("so_export_ep_ctx_blob");
|
||||
if (bool_flag == "true" || bool_flag == "True")
|
||||
export_ep_ctx_blob = true;
|
||||
else if (bool_flag == "false" || bool_flag == "False")
|
||||
|
|
@ -271,6 +285,23 @@ struct OpenVINO_Provider : Provider {
|
|||
disable_cpu_fallback = false;
|
||||
bool_flag = "";
|
||||
}
|
||||
if (provider_options_map.find("so_epctx_embed_mode") != provider_options_map.end()) {
|
||||
bool_flag = provider_options_map.at("so_epctx_embed_mode");
|
||||
if (bool_flag == "true" || bool_flag == "True")
|
||||
so_epctx_embed_mode = true;
|
||||
else if (bool_flag == "false" || bool_flag == "False")
|
||||
so_epctx_embed_mode = false;
|
||||
bool_flag = "";
|
||||
}
|
||||
|
||||
if (provider_options_map.find("so_epctx_path") != provider_options_map.end()) {
|
||||
// The path to dump epctx model is valid only when epctx is enabled.
|
||||
// Overrides the cache_dir option to dump model cache files from OV.
|
||||
if (export_ep_ctx_blob) {
|
||||
cache_dir = provider_options_map.at("so_epctx_path").c_str();
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
|
||||
const_cast<char*>(precision.c_str()),
|
||||
enable_npu_fast_compile,
|
||||
|
|
@ -283,7 +314,8 @@ struct OpenVINO_Provider : Provider {
|
|||
disable_dynamic_shapes,
|
||||
export_ep_ctx_blob,
|
||||
enable_qdq_optimizer,
|
||||
disable_cpu_fallback);
|
||||
disable_cpu_fallback,
|
||||
so_epctx_embed_mode);
|
||||
}
|
||||
|
||||
void Initialize() override {
|
||||
|
|
|
|||
|
|
@ -63,7 +63,6 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model, const std
|
|||
return FE->convert(inputModel);
|
||||
} else {
|
||||
ORT_THROW(log_tag + "[OpenVINO-EP] Unknown exception while Reading network");
|
||||
return NULL;
|
||||
}
|
||||
} catch (const Exception& e) {
|
||||
ORT_THROW(log_tag + "[OpenVINO-EP] Exception while Reading network: " + std::string(e.what()));
|
||||
|
|
@ -73,9 +72,9 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model, const std
|
|||
}
|
||||
|
||||
OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_network,
|
||||
std::string hw_target,
|
||||
const ov::AnyMap& device_config,
|
||||
std::string name) {
|
||||
std::string& hw_target,
|
||||
ov::AnyMap& device_config,
|
||||
const std::string& name) {
|
||||
ov::CompiledModel obj;
|
||||
try {
|
||||
obj = oe.compile_model(ie_cnn_network, hw_target, device_config);
|
||||
|
|
@ -92,22 +91,12 @@ OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_netwo
|
|||
}
|
||||
|
||||
OVExeNetwork OVCore::CompileModel(const std::string& onnx_model,
|
||||
std::string hw_target,
|
||||
std::string precision,
|
||||
std::string cache_dir,
|
||||
const ov::AnyMap& device_config,
|
||||
std::string name) {
|
||||
std::string& hw_target,
|
||||
ov::AnyMap& device_config,
|
||||
const std::string& name) {
|
||||
ov::CompiledModel obj;
|
||||
try {
|
||||
if (hw_target == "AUTO:GPU,CPU") {
|
||||
obj = oe.compile_model(onnx_model, ov::Tensor(),
|
||||
"AUTO",
|
||||
ov::device::priorities("GPU", "CPU"),
|
||||
ov::device::properties("GPU", {ov::cache_dir(cache_dir),
|
||||
ov::hint::inference_precision(precision)}));
|
||||
} else {
|
||||
obj = oe.compile_model(onnx_model, ov::Tensor(), hw_target, device_config);
|
||||
}
|
||||
obj = oe.compile_model(onnx_model, ov::Tensor(), hw_target, device_config);
|
||||
#ifndef NDEBUG
|
||||
printDebugInfo(obj);
|
||||
#endif
|
||||
|
|
@ -123,9 +112,19 @@ OVExeNetwork OVCore::CompileModel(const std::string& onnx_model,
|
|||
OVExeNetwork OVCore::ImportModel(std::shared_ptr<std::istringstream> model_stream,
|
||||
std::string hw_target,
|
||||
const ov::AnyMap& device_config,
|
||||
bool embed_mode,
|
||||
std::string name) {
|
||||
try {
|
||||
auto obj = oe.import_model(*model_stream, hw_target, device_config);
|
||||
ov::CompiledModel obj;
|
||||
if (embed_mode) {
|
||||
obj = oe.import_model(*model_stream, hw_target, device_config);
|
||||
} else {
|
||||
std::string blob_file_path = (*model_stream).str();
|
||||
std::ifstream modelStream(blob_file_path, std::ios_base::binary | std::ios_base::in);
|
||||
obj = oe.import_model(modelStream,
|
||||
hw_target,
|
||||
{});
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
printDebugInfo(obj);
|
||||
#endif
|
||||
|
|
@ -138,10 +137,8 @@ OVExeNetwork OVCore::ImportModel(std::shared_ptr<std::istringstream> model_strea
|
|||
}
|
||||
}
|
||||
|
||||
void OVCore::SetCache(std::string cache_dir_path, std::string device_type) {
|
||||
if (device_type != "AUTO:GPU,CPU") {
|
||||
oe.set_property(ov::cache_dir(cache_dir_path));
|
||||
}
|
||||
void OVCore::SetCache(const std::string& cache_dir_path) {
|
||||
oe.set_property(ov::cache_dir(cache_dir_path));
|
||||
}
|
||||
|
||||
#ifdef IO_BUFFER_ENABLED
|
||||
|
|
|
|||
|
|
@ -40,20 +40,23 @@ class OVCore {
|
|||
ov::Core oe;
|
||||
|
||||
public:
|
||||
// OV Interface For Reading Model
|
||||
std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream, const std::string& model_path) const;
|
||||
// OV Interface for Compiling OV Model Type
|
||||
OVExeNetwork CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_network,
|
||||
std::string hw_target,
|
||||
const ov::AnyMap& device_config,
|
||||
std::string name);
|
||||
std::string& hw_target,
|
||||
ov::AnyMap& device_config,
|
||||
const std::string& name);
|
||||
// OV Interface for Fast Compile
|
||||
OVExeNetwork CompileModel(const std::string& onnx_model,
|
||||
std::string hw_target,
|
||||
std::string precision,
|
||||
std::string cache_dir,
|
||||
const ov::AnyMap& device_config,
|
||||
std::string name);
|
||||
std::string& hw_target,
|
||||
ov::AnyMap& device_config,
|
||||
const std::string& name);
|
||||
// OV Interface for Import model Stream
|
||||
OVExeNetwork ImportModel(std::shared_ptr<std::istringstream> model_stream,
|
||||
std::string hw_target,
|
||||
const ov::AnyMap& device_config,
|
||||
bool embed_mode,
|
||||
std::string name);
|
||||
#ifdef IO_BUFFER_ENABLED
|
||||
OVExeNetwork CompileModel(std::shared_ptr<const OVNetwork>& model,
|
||||
|
|
@ -64,7 +67,7 @@ class OVCore {
|
|||
std::string name);
|
||||
#endif
|
||||
std::vector<std::string> GetAvailableDevices();
|
||||
void SetCache(std::string cache_dir_path, std::string device_type);
|
||||
void SetCache(const std::string& cache_dir_path);
|
||||
ov::Core& Get() { return oe; }
|
||||
void SetStreams(const std::string& device_type, int num_streams);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -35,18 +35,16 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param,
|
|||
device_type_ = "CPU";
|
||||
if (enable_qdq_optimizer) npu_qdq_optimizer_enabled = true;
|
||||
}
|
||||
#if OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 1
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_, npu_qdq_optimizer_enabled);
|
||||
#elif OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 2
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2023_2, device_type_, npu_qdq_optimizer_enabled);
|
||||
#elif OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 3
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2023_3, device_type_, npu_qdq_optimizer_enabled);
|
||||
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 0
|
||||
#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 0
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_, npu_qdq_optimizer_enabled);
|
||||
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 1
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_, npu_qdq_optimizer_enabled);
|
||||
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 2
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_2, device_type_, npu_qdq_optimizer_enabled);
|
||||
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled);
|
||||
#else
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_, npu_qdq_optimizer_enabled);
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -142,6 +142,7 @@ std::vector<SupportedOp> supported_op_mode = {
|
|||
{"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
|
||||
{"GridSample", V_2022_3, {"CPU"}},
|
||||
{"GridSample", V_2023_0, {"GPU"}},
|
||||
{"GRU", V_2024_1, {"CPU", "GPU"}},
|
||||
{"HardMax", V_2023_1, {"CPU", "GPU"}},
|
||||
{"Identity", V_2020_4, {"CPU", "GPU"}},
|
||||
{"If", V_2022_3, {"CPU", "GPU"}},
|
||||
|
|
@ -155,6 +156,7 @@ std::vector<SupportedOp> supported_op_mode = {
|
|||
{"LessOrEqual", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Log", V_2020_4, {"CPU", "GPU"}},
|
||||
{"LogSoftMax", V_2022_1, {"CPU", "GPU"}},
|
||||
{"LogSoftmax", V_2024_1, {"CPU", "GPU"}},
|
||||
{"Loop", V_2021_4, {"CPU", "GPU"}},
|
||||
{"LpNormalization", V_2023_1, {"CPU", "GPU"}},
|
||||
{"LRN", V_2020_4, {"CPU", "GPU"}},
|
||||
|
|
@ -361,7 +363,7 @@ void DataOps::populate_op_mode_supported() {
|
|||
|
||||
// populate unsupportedmode_t
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2024_1},
|
||||
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch)
|
||||
for (size_t i = 0; i < node->InputDefs().size(); i++) {
|
||||
|
|
@ -376,7 +378,7 @@ void DataOps::populate_op_mode_supported() {
|
|||
op_list_.insert({"ReduceMax", obj});
|
||||
}
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1},
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
const auto& input_arg = node->InputDefs()[1];
|
||||
auto shape = input_arg->Shape();
|
||||
|
|
@ -393,7 +395,7 @@ void DataOps::populate_op_mode_supported() {
|
|||
op_list_.insert({"Reshape", obj});
|
||||
}
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1},
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// If the operator is unsqueeze
|
||||
// If axes is an input, then we cannot produce a static graph.
|
||||
|
|
@ -408,7 +410,7 @@ void DataOps::populate_op_mode_supported() {
|
|||
op_list_.insert({"Unsqueeze", obj});
|
||||
}
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1},
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// check for attributes
|
||||
auto& upsample_attr = node->GetAttributes();
|
||||
|
|
|
|||
|
|
@ -28,7 +28,9 @@ enum versionNum {
|
|||
V_2023_2,
|
||||
V_2023_3,
|
||||
V_2024_0,
|
||||
V_2024_1
|
||||
V_2024_1,
|
||||
V_2024_2,
|
||||
V_2024_3
|
||||
};
|
||||
|
||||
using VersionNum = enum versionNum;
|
||||
|
|
|
|||
|
|
@ -205,11 +205,11 @@ static bool IsConnectedQAConstantInitializer(const Node* dq_node, const onnxrunt
|
|||
|
||||
// Check required because in some cases, when a NodeUnit cannot be formed with this standalone DQ
|
||||
// we still need to check if it feeds into a supported Op
|
||||
static bool DQFeedsASupportedOp(const Node* dq_node, const onnxruntime::GraphViewer& src_graph) {
|
||||
static bool DQFeedsASupportedOp(const Node* dq_node) {
|
||||
if (!dq_node->GetOutputEdgesCount()) return false; // Only feeds the graph output, and not any node
|
||||
|
||||
const auto& target_node = *dq_node->OutputNodesBegin();
|
||||
const auto op_type = target_node.OpType();
|
||||
const auto& op_type = target_node.OpType();
|
||||
|
||||
if (op_type == "Conv" || op_type == "MatMul") {
|
||||
// Conv and MatMul always keeps int8 DQs except if the DQ is sandwiched between Softmax and Conv/MatMul
|
||||
|
|
@ -219,8 +219,8 @@ static bool DQFeedsASupportedOp(const Node* dq_node, const onnxruntime::GraphVie
|
|||
return true;
|
||||
}
|
||||
} else if (op_type == "Add") {
|
||||
// Add keeps all DQs except if it has const inits
|
||||
return !IsAnyDQAConstantInitializer(&target_node, src_graph);
|
||||
// Add => keeps all DQs
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
@ -291,7 +291,7 @@ static bool CheckDQRuleSet(const NodeUnit& node_unit,
|
|||
const onnxruntime::GraphViewer& src_graph,
|
||||
SkipReason& reason) {
|
||||
const auto& target_node = node_unit.GetNode();
|
||||
auto op_type = node_unit.OpType();
|
||||
const auto& op_type = node_unit.OpType();
|
||||
|
||||
// #1 Reverse DQ duplication
|
||||
if (dq_node->Name().find(DuplicateDQ) != std::string::npos) {
|
||||
|
|
@ -337,6 +337,18 @@ static bool CheckDQRuleSet(const NodeUnit& node_unit,
|
|||
}
|
||||
}
|
||||
|
||||
static bool CheckQFeedsIntoQuantizedOutput(const NodeUnit& node_unit,
|
||||
const std::unordered_map<std::string, std::string> graph_op_data_type) {
|
||||
auto op_of_quantized_layer = node_unit.Outputs();
|
||||
for (auto& itr : op_of_quantized_layer) {
|
||||
auto it = graph_op_data_type.find(itr.node_arg.Name());
|
||||
if (it != graph_op_data_type.end() && it->second == "tensor(uint8)") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool CheckQRuleSet(const NodeUnit& node_unit,
|
||||
const Node* q_node,
|
||||
const onnxruntime::GraphViewer& src_graph,
|
||||
|
|
@ -345,7 +357,13 @@ static bool CheckQRuleSet(const NodeUnit& node_unit,
|
|||
// This Q should also be uint8
|
||||
|
||||
const auto& target_node = node_unit.GetNode();
|
||||
auto op_type = node_unit.OpType();
|
||||
const auto& op_type = node_unit.OpType();
|
||||
|
||||
auto op = src_graph.GetOutputs();
|
||||
std::unordered_map<std::string, std::string> graph_op_data_type;
|
||||
for (auto& ops : op) {
|
||||
graph_op_data_type[src_graph.GetNodeArg(ops->Name())->Name()] = ops->Type()->data();
|
||||
}
|
||||
|
||||
// If UInt16 Q, don't keep it
|
||||
if (GetQDQDataType(q_node) == DT_UINT16 || GetQDQDataType(q_node) == DT_INT16) {
|
||||
|
|
@ -359,6 +377,8 @@ static bool CheckQRuleSet(const NodeUnit& node_unit,
|
|||
} else if (op_type == "Add") {
|
||||
// Add keeps all Qs
|
||||
return true;
|
||||
} else if (CheckQFeedsIntoQuantizedOutput(node_unit, std::move(graph_op_data_type))) {
|
||||
return true;
|
||||
} else {
|
||||
// Keep Q of an unsupported Op only if the target that succeeds it is a supported Op in this list
|
||||
return IsNextTargetNodeOfQValid(q_node, &target_node, src_graph, {"Conv", "Add", "MatMul"}, false);
|
||||
|
|
@ -469,7 +489,7 @@ static void AddStandaloneNodeUnit(onnxruntime::Graph& dst_graph, const onnxrunti
|
|||
add_identity_op(true);
|
||||
else if (IsConnectedQPresent(src_graph, dst_graph.Nodes(), &node_unit.GetNode(), node_unit.GetNode().InputDefs()))
|
||||
AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
|
||||
else if (DQFeedsASupportedOp(&node_unit.GetNode(), src_graph))
|
||||
else if (DQFeedsASupportedOp(&node_unit.GetNode()))
|
||||
AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
|
||||
else
|
||||
add_identity_op(false);
|
||||
|
|
@ -543,7 +563,7 @@ static void AddQDQNodeUnit(onnxruntime::Graph& dst_graph,
|
|||
|
||||
// Add Node args for inputs
|
||||
for (const auto& node_unit_input : node_unit_inputs) {
|
||||
auto node_arg_name = node_unit_input.node_arg.Name();
|
||||
const auto& node_arg_name = node_unit_input.node_arg.Name();
|
||||
if (auto dq_node_arg = dq_node_args_to_keep.find(node_arg_name); dq_node_arg != dq_node_args_to_keep.end()) {
|
||||
// Add supported DQ as an input arg for the target node
|
||||
input_args.push_back(dq_node_arg->second);
|
||||
|
|
|
|||
|
|
@ -1931,12 +1931,31 @@ void ORTSessionOptionsToOrtOpenVINOProviderOptions(ProviderOptions& ov_options,
|
|||
kOrtSessionOptionsDisableCPUEPFallback, "0") == "1";
|
||||
if (disable_cpu_fallback)
|
||||
ov_options["disable_cpu_fallback"] = "true";
|
||||
|
||||
// values from session options will override the providerOptions Value
|
||||
bool so_epctx_enable = session_options->config_options.GetConfigOrDefault(
|
||||
kOrtSessionOptionEpContextEnable, "0") == "1";
|
||||
if (so_epctx_enable)
|
||||
ov_options["so_export_ep_ctx_blob"] = "true";
|
||||
|
||||
std::string so_cache_path = session_options->config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "").c_str();
|
||||
ov_options["so_epctx_path"] = so_cache_path;
|
||||
|
||||
// Default embedMode is 1. Saving the compiled model contents as a Epctx node attribute
|
||||
bool so_epctx_embed_mode = session_options->config_options.GetConfigOrDefault(
|
||||
kOrtSessionOptionEpContextEmbedMode, "1") == "0";
|
||||
if (so_epctx_embed_mode) {
|
||||
// defaults to true
|
||||
ov_options["so_epctx_embed_mode"] = "false";
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(ProviderOptions* provider_options_map,
|
||||
const SessionOptions* session_options) {
|
||||
if (session_options)
|
||||
// Append session options applicable for EP to EP Provider options.
|
||||
if (session_options) {
|
||||
onnxruntime::ORTSessionOptionsToOrtOpenVINOProviderOptions(*provider_options_map, session_options);
|
||||
}
|
||||
return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options_map);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -253,7 +253,6 @@ static bool ParseSessionConfigs(const std::string& configs_string,
|
|||
test_config.machine_config.provider_type_name = onnxruntime::kDnnlExecutionProvider;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("openvino"))) {
|
||||
test_config.machine_config.provider_type_name = onnxruntime::kOpenVINOExecutionProvider;
|
||||
test_config.run_config.optimization_level = ORT_DISABLE_ALL;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("tensorrt"))) {
|
||||
test_config.machine_config.provider_type_name = onnxruntime::kTensorrtExecutionProvider;
|
||||
} else if (!CompareCString(optarg, ORT_TSTR("qnn"))) {
|
||||
|
|
|
|||
|
|
@ -699,6 +699,10 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
|
|||
std::set<std::string> deprecated_device_types = {"CPU_FP32", "GPU_FP32",
|
||||
"GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
|
||||
"GPU.0_FP16", "GPU.1_FP16"};
|
||||
size_t num_gpus = 10;
|
||||
for (size_t i = 0; i <= num_gpus; i++) {
|
||||
ov_supported_device_types.emplace("GPU." + std::to_string(i));
|
||||
}
|
||||
if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) {
|
||||
ov_options[key] = value;
|
||||
} else if (deprecated_device_types.find(value) != deprecated_device_types.end()) {
|
||||
|
|
|
|||
|
|
@ -25,7 +25,15 @@ struct DefaultTolerance<double> {
|
|||
static constexpr float relative = 1e-5f;
|
||||
|
||||
// Allow to have different default absolute tolerance for different providers.
|
||||
static float get_absolute(const std::string& /*provider_type*/) {
|
||||
static float get_absolute(const std::string& provider_type /*provider_type*/) {
|
||||
if (provider_type == kOpenVINOExecutionProvider) {
|
||||
#ifdef OPENVINO_CONFIG_NPU
|
||||
return 0.005f;
|
||||
#else
|
||||
return absolute;
|
||||
#endif
|
||||
}
|
||||
|
||||
return absolute;
|
||||
}
|
||||
};
|
||||
|
|
@ -40,7 +48,15 @@ struct DefaultTolerance<float> {
|
|||
|
||||
static constexpr float relative = 1e-4f;
|
||||
|
||||
static float get_absolute(const std::string& /*provider_type*/) {
|
||||
static float get_absolute(const std::string& provider_type /*provider_type*/) {
|
||||
if (provider_type == kOpenVINOExecutionProvider) {
|
||||
#ifdef OPENVINO_CONFIG_NPU
|
||||
return 0.005f;
|
||||
#else
|
||||
return absolute;
|
||||
#endif
|
||||
}
|
||||
|
||||
return absolute;
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -98,8 +98,12 @@ static void RunGruTest(const std::vector<float>& X_data,
|
|||
test.AddOptionalOutputEdge<float>();
|
||||
}
|
||||
|
||||
// TensorRT failed on GRU tests
|
||||
// TensorRT, OpenVINO failed on GRU tests
|
||||
#if defined(USE_OPENVINO)
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
|
||||
#else
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
|
||||
#endif
|
||||
}
|
||||
|
||||
void DefaultActivationsSimpleWeightsNoBias(std::string direction,
|
||||
|
|
|
|||
Loading…
Reference in a new issue