mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
### Description These changes include Support to OpenVINO 2024.1 Import PreCompiled Blobs with EPContext Blob Separate Device/Precision as input Deprecate CPU_FP32 , GPU_FP32 terminology , introduce CPU, GPU AUTO GPU, CPU will only create GPU Blob and not CPU Blob. ### Motivation and Context - OpenVINO 2024.1 will be out soon - Import Precompiled Blob can greatly reduce FEIL/FIL Time. - Separating Device/Precision will make the input cleaner - --------- Co-authored-by: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com> Co-authored-by: Preetha Veeramalai <preetha.veeramalai@intel.com>
446 lines
15 KiB
C++
446 lines
15 KiB
C++
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
|
|
#pragma once
|
|
|
|
#include "core/common/logging/logging.h"
|
|
#include "core/common/logging/sinks/cerr_sink.h"
|
|
#include "core/common/optional.h"
|
|
#include "core/framework/allocator.h"
|
|
#include "core/framework/session_options.h"
|
|
#include "core/session/environment.h"
|
|
#include "core/session/abi_session_options_impl.h"
|
|
#include "core/session/inference_session.h"
|
|
#ifdef ENABLE_TRAINING
|
|
#include "core/dlpack/dlpack_converter.h"
|
|
#endif
|
|
|
|
#include "onnxruntime_pybind.h" // must use this for the include of <pybind11/pybind11.h>
|
|
|
|
// execution provider factory creator headers
|
|
struct OrtStatus {
|
|
OrtErrorCode code;
|
|
char msg[1]; // a null-terminated string
|
|
};
|
|
|
|
#define BACKEND_DEVICE BACKEND_PROC BACKEND_DNNL BACKEND_OPENVINO BACKEND_TVM BACKEND_OPENBLAS BACKEND_MIGRAPHX BACKEND_ACL BACKEND_ARMNN BACKEND_DML BACKEND_CANN
|
|
#include "core/session/onnxruntime_cxx_api.h"
|
|
#include "core/providers/providers.h"
|
|
#include "core/providers/provider_factory_creators.h"
|
|
#include "core/providers/tensorrt/tensorrt_provider_options.h"
|
|
|
|
#if defined(USE_CUDA) || defined(USE_ROCM)
|
|
#define BACKEND_PROC "GPU"
|
|
#else
|
|
#define BACKEND_PROC "CPU"
|
|
#endif
|
|
|
|
#if USE_DNNL
|
|
#define BACKEND_DNNL "-DNNL"
|
|
#else
|
|
#define BACKEND_DNNL ""
|
|
#endif
|
|
|
|
#if USE_MIGRAPHX
|
|
#define BACKEND_MIGRAPHX "-MIGRAPHX"
|
|
#else
|
|
#define BACKEND_MIGRAPHX ""
|
|
#endif
|
|
|
|
#ifdef USE_OPENVINO
|
|
#if OPENVINO_CONFIG_CPU
|
|
#define BACKEND_OPENVINO "-OPENVINO_CPU"
|
|
|
|
#elif OPENVINO_CONFIG_GPU
|
|
#define BACKEND_OPENVINO "-OPENVINO_GPU"
|
|
|
|
#elif OPENVINO_CONFIG_NPU
|
|
#define BACKEND_OPENVINO "-OPENVINO_NPU"
|
|
|
|
#elif OPENVINO_CONFIG_MULTI
|
|
#define BACKEND_OPENVINO "-OPENVINO_MULTI"
|
|
|
|
#elif OPENVINO_CONFIG_AUTO
|
|
#define BACKEND_OPENVINO "-OPENVINO_AUTO"
|
|
|
|
#elif OPENVINO_CONFIG_HETERO
|
|
#define BACKEND_OPENVINO "-OPENVINO_HETERO"
|
|
#endif
|
|
#else
|
|
#define BACKEND_OPENVINO ""
|
|
#endif
|
|
|
|
#ifdef USE_TVM
|
|
#define BACKEND_TVM "-TVM"
|
|
#else
|
|
#define BACKEND_TVM ""
|
|
#endif
|
|
|
|
#if USE_OPENBLAS
|
|
#define BACKEND_OPENBLAS "-OPENBLAS"
|
|
#else
|
|
#define BACKEND_OPENBLAS ""
|
|
#endif
|
|
|
|
#if USE_ACL
|
|
#define BACKEND_ACL "-ACL"
|
|
#else
|
|
#define BACKEND_ACL ""
|
|
#endif
|
|
|
|
#if USE_ARMNN
|
|
#define BACKEND_ARMNN "-ARMNN"
|
|
#else
|
|
#define BACKEND_ARMNN ""
|
|
#endif
|
|
|
|
#if USE_DML
|
|
#define BACKEND_DML "-DML"
|
|
#else
|
|
#define BACKEND_DML ""
|
|
#endif
|
|
|
|
#if USE_CANN
|
|
#define BACKEND_CANN "-CANN"
|
|
#else
|
|
#define BACKEND_CANN ""
|
|
#endif
|
|
|
|
#ifdef USE_CUDA
|
|
#include "core/providers/cuda/cuda_provider_factory.h"
|
|
#include "core/providers/cuda/cuda_execution_provider_info.h"
|
|
#endif
|
|
#ifdef USE_ROCM
|
|
#include "core/providers/rocm/rocm_provider_factory.h"
|
|
#include "core/providers/rocm/rocm_execution_provider_info.h"
|
|
#endif
|
|
#ifdef USE_TENSORRT
|
|
#include "core/providers/tensorrt/tensorrt_provider_factory.h"
|
|
#endif
|
|
#ifdef USE_MIGRAPHX
|
|
#include "core/providers/migraphx/migraphx_provider_factory.h"
|
|
#endif
|
|
#ifdef USE_OPENVINO
|
|
#include "core/providers/openvino/openvino_provider_factory.h"
|
|
// TODO remove deprecated global config
|
|
namespace onnxruntime {
|
|
ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO();
|
|
namespace python {
|
|
extern std::string openvino_device_type;
|
|
}
|
|
} // namespace onnxruntime
|
|
#endif
|
|
#ifdef USE_TVM
|
|
#include "core/providers/tvm/tvm_ep_options.h"
|
|
#endif
|
|
#ifdef USE_ACL
|
|
#include "core/providers/acl/acl_provider_factory.h"
|
|
#endif
|
|
#ifdef USE_ARMNN
|
|
#include "core/providers/armnn/armnn_provider_factory.h"
|
|
#endif
|
|
#ifdef USE_DML
|
|
#include "core/providers/dml/dml_provider_factory.h"
|
|
#endif
|
|
#ifdef USE_CANN
|
|
#include "core/providers/cann/cann_provider_factory.h"
|
|
#include "core/providers/cann/cann_execution_provider_info.h"
|
|
#endif
|
|
|
|
#ifdef USE_CUDA
|
|
namespace onnxruntime {
|
|
ProviderInfo_CUDA* TryGetProviderInfo_CUDA();
|
|
ProviderInfo_CUDA& GetProviderInfo_CUDA();
|
|
namespace python {
|
|
// TODO remove deprecated global config
|
|
extern OrtCudnnConvAlgoSearch cudnn_conv_algo_search;
|
|
// TODO remove deprecated global config
|
|
extern bool do_copy_in_default_stream;
|
|
// TODO remove deprecated global config
|
|
extern onnxruntime::cuda::TunableOpInfo tunable_op;
|
|
extern onnxruntime::CUDAExecutionProviderExternalAllocatorInfo external_allocator_info;
|
|
extern onnxruntime::ArenaExtendStrategy arena_extend_strategy;
|
|
} // namespace python
|
|
} // namespace onnxruntime
|
|
#endif
|
|
|
|
#ifdef USE_TENSORRT
|
|
namespace onnxruntime {
|
|
ProviderInfo_TensorRT* TryGetProviderInfo_TensorRT();
|
|
ProviderInfo_TensorRT& GetProviderInfo_TensorRT();
|
|
} // namespace onnxruntime
|
|
#endif
|
|
|
|
#ifdef USE_CANN
|
|
namespace onnxruntime {
|
|
ProviderInfo_CANN* TryGetProviderInfo_CANN();
|
|
ProviderInfo_CANN& GetProviderInfo_CANN();
|
|
} // namespace onnxruntime
|
|
#endif
|
|
|
|
#ifdef USE_ROCM
|
|
namespace onnxruntime {
|
|
ProviderInfo_ROCM* TryGetProviderInfo_ROCM();
|
|
ProviderInfo_ROCM& GetProviderInfo_ROCM();
|
|
namespace python {
|
|
// TODO remove deprecated global config
|
|
extern bool miopen_conv_exhaustive_search;
|
|
// TODO remove deprecated global config
|
|
extern bool do_copy_in_default_stream;
|
|
// TODO remove deprecated global config
|
|
extern onnxruntime::rocm::TunableOpInfo tunable_op;
|
|
extern onnxruntime::ROCMExecutionProviderExternalAllocatorInfo external_allocator_info;
|
|
extern onnxruntime::ArenaExtendStrategy arena_extend_strategy;
|
|
} // namespace python
|
|
} // namespace onnxruntime
|
|
#endif
|
|
|
|
#include "core/providers/dnnl/dnnl_provider_factory.h"
|
|
#include "core/providers/shared_library/provider_host_api.h"
|
|
|
|
namespace onnxruntime {
|
|
#if !defined(SHARED_PROVIDER) && !defined(DISABLE_SPARSE_TENSORS)
|
|
class SparseTensor;
|
|
#endif
|
|
namespace python {
|
|
|
|
using ExecutionProviderRegistrationFn = std::function<void(InferenceSession*,
|
|
const std::vector<std::string>&,
|
|
const ProviderOptionsMap&)>;
|
|
|
|
// TODO remove deprecated global config
|
|
extern OrtDevice::DeviceId cuda_device_id;
|
|
// TODO remove deprecated global config
|
|
extern size_t gpu_mem_limit;
|
|
|
|
using PySessionOptions = OrtSessionOptions;
|
|
|
|
// Thin wrapper over internal C++ InferenceSession to accommodate custom op library management for the Python user
|
|
struct PyInferenceSession {
|
|
PyInferenceSession(std::shared_ptr<Environment> env, const PySessionOptions& so)
|
|
: env_(std::move(env)) {
|
|
sess_ = std::make_unique<InferenceSession>(so.value, *env_);
|
|
}
|
|
|
|
#if !defined(ORT_MINIMAL_BUILD)
|
|
PyInferenceSession(std::shared_ptr<Environment> env, const PySessionOptions& so, const std::string& arg, bool is_arg_file_name)
|
|
: env_(std::move(env)) {
|
|
if (is_arg_file_name) {
|
|
// Given arg is the file path. Invoke the corresponding ctor().
|
|
sess_ = std::make_unique<InferenceSession>(so.value, *env_, arg);
|
|
} else {
|
|
// Given arg is the model content as bytes. Invoke the corresponding ctor().
|
|
std::istringstream buffer(arg);
|
|
sess_ = std::make_unique<InferenceSession>(so.value, *env_, buffer);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
InferenceSession* GetSessionHandle() const { return sess_.get(); }
|
|
|
|
virtual ~PyInferenceSession() = default;
|
|
|
|
protected:
|
|
PyInferenceSession(std::shared_ptr<Environment> env, std::unique_ptr<InferenceSession> sess)
|
|
: env_(std::move(env)), sess_(std::move(sess)) {
|
|
}
|
|
|
|
private:
|
|
std::shared_ptr<Environment> env_;
|
|
std::unique_ptr<InferenceSession> sess_;
|
|
};
|
|
|
|
inline const PySessionOptions& GetDefaultCPUSessionOptions() {
|
|
static PySessionOptions so;
|
|
return so;
|
|
}
|
|
|
|
inline AllocatorPtr& GetAllocator() {
|
|
static AllocatorPtr alloc = std::make_shared<CPUAllocator>();
|
|
return alloc;
|
|
}
|
|
|
|
#if !defined(DISABLE_SPARSE_TENSORS)
|
|
// This class exposes SparseTensor to Python
|
|
// The class serves two major purposes
|
|
// - to be able to map numpy arrays memory and use it on input, this serves as a reference holder
|
|
// so incoming arrays do not disappear. To this end we create an instance of SparseTensor
|
|
// on top of the user provided numpy arrays and create a duplicate of py::objects for those
|
|
// numpy array for ref-counting purposes and store it here.
|
|
//
|
|
// - to be able to expose SparseTensor returned from run method. We get an OrtValue from run()
|
|
// and store a copy of it in ort_value_. The OrtValue shared_ptr ref-counting will make sure
|
|
// the memory stays around.
|
|
//
|
|
// An object of the class must never have both instance_ and ort_value_ have data at the same time.
|
|
class PySparseTensor {
|
|
public:
|
|
/// <summary>
|
|
/// Use this constructor when you created a SparseTensor instance which is backed
|
|
/// by python array storage and it important that they stay alive while this object is
|
|
/// alive
|
|
/// </summary>
|
|
/// <param name="instance">a fully constructed and populated instance of SparseTensor</param>
|
|
/// <param name="storage">a collection reference guards</param>
|
|
PySparseTensor(std::unique_ptr<SparseTensor>&& instance,
|
|
std::vector<pybind11::object>&& storage)
|
|
: instance_(std::move(instance)), backing_storage_(std::move(storage)), ort_value_() {
|
|
}
|
|
|
|
/// <summary>
|
|
/// Same as above but no backing storage as SparseTensor owns the memory
|
|
/// </summary>
|
|
/// <param name="instance"></param>
|
|
explicit PySparseTensor(std::unique_ptr<SparseTensor>&& instance)
|
|
: instance_(std::move(instance)), backing_storage_(), ort_value_() {
|
|
}
|
|
|
|
/// <summary>
|
|
/// Edge case when we can not copy memory on GPU and therefore
|
|
/// can not own it.
|
|
/// </summary>
|
|
/// <param name="ort_value"></param>
|
|
explicit PySparseTensor(const OrtValue& ort_value)
|
|
: instance_(), backing_storage_(), ort_value_(ort_value) {}
|
|
|
|
PySparseTensor(const PySparseTensor&) = delete;
|
|
PySparseTensor& operator=(const PySparseTensor&) = delete;
|
|
|
|
PySparseTensor(PySparseTensor&& o) noexcept {
|
|
*this = std::move(o);
|
|
}
|
|
|
|
PySparseTensor& operator=(PySparseTensor&& o) noexcept {
|
|
instance_ = std::move(o.instance_);
|
|
backing_storage_ = std::move(o.backing_storage_);
|
|
ort_value_ = std::move(o.ort_value_);
|
|
return *this;
|
|
}
|
|
|
|
~PySparseTensor();
|
|
|
|
const SparseTensor& Instance() const {
|
|
if (instance_) {
|
|
return *instance_;
|
|
}
|
|
return ort_value_.Get<SparseTensor>();
|
|
}
|
|
|
|
std::unique_ptr<OrtValue> AsOrtValue() const;
|
|
|
|
private:
|
|
// instance_ represents data that comes as input. Thus we depend on numpy
|
|
// arrays that own the underlying memory to stay around. We store copies
|
|
// of py::objects for those arrays in backing_storage_ as an extra ref-count.
|
|
|
|
// If we have and are able to copy from the OrtValue returned by run() to CPU, then this owns the data
|
|
// and backing_storage_ is empty.
|
|
std::unique_ptr<SparseTensor> instance_;
|
|
std::vector<pybind11::object> backing_storage_;
|
|
|
|
// We create a copy of OrtValue when we obtain it from a run method.
|
|
OrtValue ort_value_;
|
|
};
|
|
#endif // !defined(DISABLE_SPARSE_TENSORS)
|
|
|
|
#if defined(_MSC_VER) && !defined(__clang__)
|
|
#pragma warning(push)
|
|
// You can attempt to make 'onnxruntime::python::SessionObjectInitializer::Get' constexpr
|
|
#pragma warning(disable : 26497)
|
|
#endif
|
|
class SessionObjectInitializer {
|
|
public:
|
|
typedef const PySessionOptions& Arg1;
|
|
// typedef logging::LoggingManager* Arg2;
|
|
static const std::string default_logger_id;
|
|
operator Arg1() {
|
|
return GetDefaultCPUSessionOptions();
|
|
}
|
|
|
|
// operator Arg2() {
|
|
// static LoggingManager default_logging_manager{std::unique_ptr<ISink>{new CErrSink{}},
|
|
// Severity::kWARNING, false, LoggingManager::InstanceType::Default,
|
|
// &default_logger_id};
|
|
// return &default_logging_manager;
|
|
// }
|
|
|
|
static SessionObjectInitializer Get() {
|
|
return SessionObjectInitializer();
|
|
}
|
|
};
|
|
#if defined(_MSC_VER) && !defined(__clang__)
|
|
#pragma warning(pop)
|
|
#endif
|
|
std::shared_ptr<Environment> GetEnv();
|
|
|
|
// Initialize an InferenceSession.
|
|
// Any provider_options should have entries in matching order to provider_types.
|
|
void InitializeSession(InferenceSession* sess,
|
|
ExecutionProviderRegistrationFn ep_registration_fn,
|
|
const std::vector<std::string>& provider_types = {},
|
|
const ProviderOptionsVector& provider_options = {},
|
|
const std::unordered_set<std::string>& disabled_optimizer_names = {});
|
|
|
|
// Checks if PyErrOccured, fetches status and throws.
|
|
void ThrowIfPyErrOccured();
|
|
|
|
void addOrtValueMethods(pybind11::module& m);
|
|
|
|
void addIoBindingMethods(pybind11::module& m);
|
|
|
|
void addSparseTensorMethods(pybind11::module& m);
|
|
|
|
void addGlobalSchemaFunctions(pybind11::module& m);
|
|
|
|
void addOpKernelSubmodule(pybind11::module& m);
|
|
|
|
void addOpSchemaSubmodule(pybind11::module& m);
|
|
|
|
const char* GetDeviceName(const OrtDevice& device);
|
|
|
|
bool IsCudaDeviceIdValid(const onnxruntime::logging::Logger& logger, int id);
|
|
|
|
AllocatorPtr GetCudaAllocator(OrtDevice::DeviceId id);
|
|
|
|
bool CheckIfTensor(const std::vector<const NodeArg*>& def_list,
|
|
const std::string& name,
|
|
/*out*/ ONNX_NAMESPACE::TypeProto& type_proto);
|
|
|
|
#ifdef ENABLE_TRAINING
|
|
|
|
// Allocate a new Capsule object, which takes the ownership of OrtValue.
|
|
// Caller is responsible for releasing.
|
|
// This function calls OrtValueToDlpack(...).
|
|
PyObject* ToDlpack(OrtValue ort_value);
|
|
|
|
// Consume a Capsule object and claims the ownership of its underlying tensor to
|
|
// create a OrtValue. This function calls DlpackToOrtValue(...) to do the conversion.
|
|
OrtValue FromDlpack(PyObject* dlpack_tensor, const bool is_bool_tensor);
|
|
|
|
// Destructor for Capsule object holding a DLPack structure.
|
|
void DlpackCapsuleDestructor(PyObject* data);
|
|
|
|
#endif
|
|
|
|
} // namespace python
|
|
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptionsV2* params);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(int device_id);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(const OrtMIGraphXProviderOptions* params);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* params);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(const OrtDnnlProviderOptions* params);
|
|
#ifdef USE_TVM
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const tvm::TvmEPOptions& info);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const char* params);
|
|
#endif
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_ACL(int use_arena);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_ArmNN(int use_arena);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_DML(int device_id);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nnapi(
|
|
uint32_t flags, const optional<std::string>& partitioning_stop_ops_list);
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rknpu();
|
|
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_CoreML(uint32_t flags);
|
|
constexpr const char* kDefaultExecutionProviderEntry = "GetProvider";
|
|
} // namespace onnxruntime
|