mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-22 22:01:08 +00:00
Merge related issues and code review feedback.
This commit is contained in:
parent
67adb7bfe0
commit
9ddbb046a2
11 changed files with 55 additions and 69 deletions
|
|
@ -193,6 +193,7 @@ if(onnxruntime_USE_VALGRIND AND NOT WIN32)
|
|||
endif()
|
||||
|
||||
if (onnxruntime_ENABLE_NVTX_PROFILE)
|
||||
message(WARNING "NTVX profile temporarily disabled, will be fixed soon")
|
||||
# TODO: This doesn't work with the shared cuda provider. Disabling temporarily to do a clean fix later as it wasn't trivial
|
||||
# add_definitions(-DENABLE_NVTX_PROFILE=1)
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -34,8 +34,12 @@ namespace onnxruntime {
|
|||
*/
|
||||
class Tensor final {
|
||||
public:
|
||||
static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator) { return std::make_unique<Tensor>(p_type, shape, allocator); }
|
||||
static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc, ptrdiff_t offset = 0) { return std::make_unique<Tensor>(p_type, shape, p_data, alloc, offset); }
|
||||
static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator) {
|
||||
return std::make_unique<Tensor>(p_type, shape, allocator);
|
||||
}
|
||||
static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc, ptrdiff_t offset = 0) {
|
||||
return std::make_unique<Tensor>(p_type, shape, p_data, alloc, offset);
|
||||
}
|
||||
|
||||
Tensor() = default; // to allow creating vector<Tensor> to support seq(tensor)
|
||||
|
||||
|
|
|
|||
|
|
@ -120,6 +120,7 @@ final class OnnxRuntime {
|
|||
* in time.
|
||||
*
|
||||
* @param file The file to remove.
|
||||
* @param onExitOnly Delete the file on exit only, vs trying to do it immediately
|
||||
*/
|
||||
private static void cleanUp(File file, boolean onExitOnly) {
|
||||
if (!file.exists()) {
|
||||
|
|
@ -146,6 +147,7 @@ final class OnnxRuntime {
|
|||
*
|
||||
* @param tempDirectory The temp directory to write the library resource to.
|
||||
* @param library The bare name of the library.
|
||||
* @param systemLoad If system.Load(..) should be called on the library vs just preparing it
|
||||
* @throws IOException If the file failed to read or write.
|
||||
*/
|
||||
private static void load(Path tempDirectory, String library, boolean systemLoad)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/providers/cuda/cuda_common.h"
|
||||
#include "core/providers/cuda/shared_inc/fpgeneric.h"
|
||||
#include "attention.h"
|
||||
#include "attention_impl.h"
|
||||
#include "core/providers/cuda/cuda_common.h"
|
||||
#include "core/providers/cuda/shared_inc/fpgeneric.h"
|
||||
|
||||
using namespace onnxruntime::cuda;
|
||||
using namespace ::onnxruntime::common;
|
||||
|
|
|
|||
|
|
@ -1007,48 +1007,48 @@ std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(int16_t device_id, const c
|
|||
}
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* provider_options) {
|
||||
if (auto provider = s_library_cuda.Get())
|
||||
if (auto* provider = s_library_cuda.Get())
|
||||
return provider->CreateExecutionProviderFactory(provider_options);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena) {
|
||||
if (auto provider = s_library_dnnl.Get())
|
||||
if (auto* provider = s_library_dnnl.Get())
|
||||
return provider->CreateExecutionProviderFactory(use_arena);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(int device_id) {
|
||||
if (auto provider = s_library_tensorrt.Get())
|
||||
if (auto* provider = s_library_tensorrt.Get())
|
||||
return provider->CreateExecutionProviderFactory(device_id);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* provider_options) {
|
||||
if (auto provider = s_library_tensorrt.Get())
|
||||
if (auto* provider = s_library_tensorrt.Get())
|
||||
return provider->CreateExecutionProviderFactory(provider_options);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* provider_options) {
|
||||
if (auto provider = s_library_openvino.Get())
|
||||
if (auto* provider = s_library_openvino.Get())
|
||||
return provider->CreateExecutionProviderFactory(provider_options);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO() {
|
||||
if (auto provider = s_library_openvino.Get())
|
||||
if (auto* provider = s_library_openvino.Get())
|
||||
return reinterpret_cast<ProviderInfo_OpenVINO*>(provider->GetInfo());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ProviderInfo_CUDA* GetProviderInfo_CUDA() {
|
||||
if (auto provider = s_library_cuda.Get())
|
||||
if (auto* provider = s_library_cuda.Get())
|
||||
return reinterpret_cast<ProviderInfo_CUDA*>(provider->GetInfo());
|
||||
LOGS_DEFAULT(WARNING) << "GetProviderInfo_CUDA called, returning nullptr";
|
||||
ORT_THROW("CUDA Provider not available, can't get interface for it");
|
||||
|
|
|
|||
|
|
@ -17,10 +17,10 @@ namespace cuda {
|
|||
13, \
|
||||
T, \
|
||||
kCudaExecutionProvider, \
|
||||
KernelDefBuilder() \
|
||||
(*KernelDefBuilder::Create()) \
|
||||
.TypeConstraint("T", DataTypeImpl::GetTensorType<T>()) \
|
||||
.TypeConstraint("T1", DataTypeImpl::GetTensorType<int32_t>()) \
|
||||
.InputMemoryType<OrtMemTypeCPUInput>(RNN_Input_Index::sequence_lens), \
|
||||
.InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \
|
||||
GRU<T>);
|
||||
|
||||
#define REGISTER_KERNEL_TYPED(T) \
|
||||
|
|
|
|||
|
|
@ -15,10 +15,10 @@ namespace cuda {
|
|||
13, \
|
||||
T, \
|
||||
kCudaExecutionProvider, \
|
||||
KernelDefBuilder() \
|
||||
(*KernelDefBuilder::Create()) \
|
||||
.TypeConstraint("T", DataTypeImpl::GetTensorType<T>()) \
|
||||
.TypeConstraint("T1", DataTypeImpl::GetTensorType<int32_t>()) \
|
||||
.InputMemoryType<OrtMemTypeCPUInput>(RNN_Input_Index::sequence_lens), \
|
||||
.InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \
|
||||
LSTM<T>);
|
||||
|
||||
#define REGISTER_KERNEL_TYPED(T) \
|
||||
|
|
|
|||
|
|
@ -17,10 +17,10 @@ namespace cuda {
|
|||
13, \
|
||||
T, \
|
||||
kCudaExecutionProvider, \
|
||||
KernelDefBuilder() \
|
||||
(*KernelDefBuilder::Create()) \
|
||||
.TypeConstraint("T", DataTypeImpl::GetTensorType<T>()) \
|
||||
.TypeConstraint("T1", DataTypeImpl::GetTensorType<int32_t>()) \
|
||||
.InputMemoryType<OrtMemTypeCPUInput>(RNN_Input_Index::sequence_lens), \
|
||||
.InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \
|
||||
RNN<T>);
|
||||
|
||||
#define REGISTER_KERNEL_TYPED(T) \
|
||||
|
|
|
|||
|
|
@ -11,11 +11,11 @@ ONNX_OPERATOR_KERNEL_EX(
|
|||
kOnnxDomain,
|
||||
14,
|
||||
kCudaExecutionProvider,
|
||||
KernelDefBuilder()
|
||||
(*KernelDefBuilder::Create())
|
||||
.TypeConstraint("T", DataTypeImpl::AllFixedSizeTensorTypes())
|
||||
.TypeConstraint("shape", DataTypeImpl::GetTensorType<int64_t>())
|
||||
.Alias(0, 0)
|
||||
.InputMemoryType<OrtMemTypeCPUInput>(1),
|
||||
.InputMemoryType(OrtMemTypeCPUInput, 1),
|
||||
Reshape);
|
||||
|
||||
ONNX_OPERATOR_VERSIONED_KERNEL_EX(
|
||||
|
|
|
|||
|
|
@ -206,23 +206,6 @@ using NameMLValMap = std::unordered_map<std::string, OrtValue>;
|
|||
|
||||
namespace onnxruntime {
|
||||
|
||||
// From Tensor.h
|
||||
class BufferDeleter {
|
||||
public:
|
||||
BufferDeleter() : alloc_(nullptr) {}
|
||||
BufferDeleter(AllocatorPtr alloc) : alloc_(alloc) {}
|
||||
|
||||
void operator()(void* p) const {
|
||||
if (alloc_)
|
||||
alloc_->Free(p);
|
||||
}
|
||||
|
||||
private:
|
||||
AllocatorPtr alloc_;
|
||||
};
|
||||
|
||||
using BufferUniquePtr = std::unique_ptr<void, BufferDeleter>;
|
||||
|
||||
// The function passed in will be run on provider DLL unload. This is used to free thread_local variables that are in threads we don't own
|
||||
// Since these are not destroyed when the DLL unloads we have to do it manually. Search for usage for an example.
|
||||
void RunOnUnload(std::function<void()> function);
|
||||
|
|
|
|||
|
|
@ -8,6 +8,35 @@
|
|||
#include <mutex>
|
||||
#include "core/providers/shared/common.h"
|
||||
|
||||
#include "core/framework/random_generator.h"
|
||||
#include "core/providers/cpu/controlflow/if.h"
|
||||
#include "core/providers/cpu/controlflow/loop.h"
|
||||
#include "core/providers/cpu/controlflow/scan.h"
|
||||
#include "core/providers/cpu/math/einsum.h"
|
||||
#include "core/providers/cpu/object_detection/non_max_suppression.h"
|
||||
#include "core/providers/cpu/tensor/concatbase.h"
|
||||
#include "core/providers/cpu/tensor/padbase.h"
|
||||
#include "core/providers/cpu/tensor/gatherbase.h"
|
||||
#include "core/providers/cpu/tensor/slice.h"
|
||||
#include "core/providers/cpu/tensor/split.h"
|
||||
#include "core/providers/cpu/tensor/size.h"
|
||||
#include "core/providers/cpu/tensor/scatter_nd.h"
|
||||
#include "core/providers/cpu/tensor/unsqueeze.h"
|
||||
#include "core/providers/cpu/tensor/tile.h"
|
||||
|
||||
#ifndef DISABLE_CONTRIB_OPS
|
||||
#include "contrib_ops/cpu/bert/attention_base.h"
|
||||
#include "contrib_ops/cpu/bert/bias_gelu_helper.h"
|
||||
#include "contrib_ops/cpu/bert/embed_layer_norm_helper.h"
|
||||
#include "contrib_ops/cpu/bert/longformer_attention_base.h"
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_TRAINING
|
||||
#include "orttraining/training_ops/cpu/aten_ops/aten_op.h"
|
||||
#include "orttraining/training_ops/cpu/controlflow/group.h"
|
||||
#include "orttraining/training_ops/cpu/controlflow/yield.h"
|
||||
#endif
|
||||
|
||||
#ifndef _Ret_notnull_
|
||||
#define _Ret_notnull_
|
||||
#endif
|
||||
|
|
@ -311,39 +340,6 @@ std::unique_ptr<OpKernelInfo> CopyOpKernelInfo(const OpKernelInfo& info) {
|
|||
return g_host->CopyOpKernelInfo(info);
|
||||
}
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
||||
#include "core/providers/cpu/tensor/unsqueeze.h"
|
||||
#include "core/providers/cpu/tensor/slice.h"
|
||||
#include "core/providers/cpu/tensor/split.h"
|
||||
#include "core/providers/cpu/tensor/size.h"
|
||||
#include "core/providers/cpu/tensor/scatter_nd.h"
|
||||
#include "core/providers/cpu/tensor/padbase.h"
|
||||
#include "core/providers/cpu/tensor/concatbase.h"
|
||||
#include "core/providers/cpu/tensor/gatherbase.h"
|
||||
#include "core/providers/cpu/controlflow/scan.h"
|
||||
#include "core/providers/cpu/controlflow/loop.h"
|
||||
#include "core/providers/cpu/tensor/tile.h"
|
||||
#include "core/providers/cpu/object_detection/non_max_suppression.h"
|
||||
#include "core/framework/random_generator.h"
|
||||
#include "core/providers/cpu/math/einsum.h"
|
||||
#include "core/providers/cpu/controlflow/if.h"
|
||||
|
||||
#ifndef DISABLE_CONTRIB_OPS
|
||||
#include "contrib_ops/cpu/bert/bias_gelu_helper.h"
|
||||
#include "contrib_ops/cpu/bert/embed_layer_norm_helper.h"
|
||||
#include "contrib_ops/cpu/bert/longformer_attention_base.h"
|
||||
#include "contrib_ops/cpu/bert/attention_base.h"
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_TRAINING
|
||||
#include "orttraining/training_ops/cpu/aten_ops/aten_op.h"
|
||||
#include "orttraining/training_ops/cpu/controlflow/group.h"
|
||||
#include "orttraining/training_ops/cpu/controlflow/yield.h"
|
||||
#endif
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
namespace utils {
|
||||
template <>
|
||||
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ bool* p_data, size_t expected_size) { return g_host->UnpackTensor(tensor, raw_data, raw_data_len, p_data, expected_size); }
|
||||
|
|
|
|||
Loading…
Reference in a new issue