Merge related issues and code review feedback.

This commit is contained in:
Ryan Hill 2021-05-16 19:22:03 -07:00
parent 67adb7bfe0
commit 9ddbb046a2
11 changed files with 55 additions and 69 deletions

View file

@ -193,6 +193,7 @@ if(onnxruntime_USE_VALGRIND AND NOT WIN32)
endif()
if (onnxruntime_ENABLE_NVTX_PROFILE)
message(WARNING "NTVX profile temporarily disabled, will be fixed soon")
# TODO: This doesn't work with the shared cuda provider. Disabling temporarily to do a clean fix later as it wasn't trivial
# add_definitions(-DENABLE_NVTX_PROFILE=1)
endif()

View file

@ -34,8 +34,12 @@ namespace onnxruntime {
*/
class Tensor final {
public:
static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator) { return std::make_unique<Tensor>(p_type, shape, allocator); }
static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc, ptrdiff_t offset = 0) { return std::make_unique<Tensor>(p_type, shape, p_data, alloc, offset); }
static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator) {
return std::make_unique<Tensor>(p_type, shape, allocator);
}
static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc, ptrdiff_t offset = 0) {
return std::make_unique<Tensor>(p_type, shape, p_data, alloc, offset);
}
Tensor() = default; // to allow creating vector<Tensor> to support seq(tensor)

View file

@ -120,6 +120,7 @@ final class OnnxRuntime {
* in time.
*
* @param file The file to remove.
* @param onExitOnly Delete the file on exit only, vs trying to do it immediately
*/
private static void cleanUp(File file, boolean onExitOnly) {
if (!file.exists()) {
@ -146,6 +147,7 @@ final class OnnxRuntime {
*
* @param tempDirectory The temp directory to write the library resource to.
* @param library The bare name of the library.
* @param systemLoad If system.Load(..) should be called on the library vs just preparing it
* @throws IOException If the file failed to read or write.
*/
private static void load(Path tempDirectory, String library, boolean systemLoad)

View file

@ -1,10 +1,10 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/providers/cuda/cuda_common.h"
#include "core/providers/cuda/shared_inc/fpgeneric.h"
#include "attention.h"
#include "attention_impl.h"
#include "core/providers/cuda/cuda_common.h"
#include "core/providers/cuda/shared_inc/fpgeneric.h"
using namespace onnxruntime::cuda;
using namespace ::onnxruntime::common;

View file

@ -1007,48 +1007,48 @@ std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(int16_t device_id, const c
}
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* provider_options) {
if (auto provider = s_library_cuda.Get())
if (auto* provider = s_library_cuda.Get())
return provider->CreateExecutionProviderFactory(provider_options);
return nullptr;
}
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena) {
if (auto provider = s_library_dnnl.Get())
if (auto* provider = s_library_dnnl.Get())
return provider->CreateExecutionProviderFactory(use_arena);
return nullptr;
}
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(int device_id) {
if (auto provider = s_library_tensorrt.Get())
if (auto* provider = s_library_tensorrt.Get())
return provider->CreateExecutionProviderFactory(device_id);
return nullptr;
}
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* provider_options) {
if (auto provider = s_library_tensorrt.Get())
if (auto* provider = s_library_tensorrt.Get())
return provider->CreateExecutionProviderFactory(provider_options);
return nullptr;
}
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* provider_options) {
if (auto provider = s_library_openvino.Get())
if (auto* provider = s_library_openvino.Get())
return provider->CreateExecutionProviderFactory(provider_options);
return nullptr;
}
ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO() {
if (auto provider = s_library_openvino.Get())
if (auto* provider = s_library_openvino.Get())
return reinterpret_cast<ProviderInfo_OpenVINO*>(provider->GetInfo());
return nullptr;
}
ProviderInfo_CUDA* GetProviderInfo_CUDA() {
if (auto provider = s_library_cuda.Get())
if (auto* provider = s_library_cuda.Get())
return reinterpret_cast<ProviderInfo_CUDA*>(provider->GetInfo());
LOGS_DEFAULT(WARNING) << "GetProviderInfo_CUDA called, returning nullptr";
ORT_THROW("CUDA Provider not available, can't get interface for it");

View file

@ -17,10 +17,10 @@ namespace cuda {
13, \
T, \
kCudaExecutionProvider, \
KernelDefBuilder() \
(*KernelDefBuilder::Create()) \
.TypeConstraint("T", DataTypeImpl::GetTensorType<T>()) \
.TypeConstraint("T1", DataTypeImpl::GetTensorType<int32_t>()) \
.InputMemoryType<OrtMemTypeCPUInput>(RNN_Input_Index::sequence_lens), \
.InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \
GRU<T>);
#define REGISTER_KERNEL_TYPED(T) \

View file

@ -15,10 +15,10 @@ namespace cuda {
13, \
T, \
kCudaExecutionProvider, \
KernelDefBuilder() \
(*KernelDefBuilder::Create()) \
.TypeConstraint("T", DataTypeImpl::GetTensorType<T>()) \
.TypeConstraint("T1", DataTypeImpl::GetTensorType<int32_t>()) \
.InputMemoryType<OrtMemTypeCPUInput>(RNN_Input_Index::sequence_lens), \
.InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \
LSTM<T>);
#define REGISTER_KERNEL_TYPED(T) \

View file

@ -17,10 +17,10 @@ namespace cuda {
13, \
T, \
kCudaExecutionProvider, \
KernelDefBuilder() \
(*KernelDefBuilder::Create()) \
.TypeConstraint("T", DataTypeImpl::GetTensorType<T>()) \
.TypeConstraint("T1", DataTypeImpl::GetTensorType<int32_t>()) \
.InputMemoryType<OrtMemTypeCPUInput>(RNN_Input_Index::sequence_lens), \
.InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \
RNN<T>);
#define REGISTER_KERNEL_TYPED(T) \

View file

@ -11,11 +11,11 @@ ONNX_OPERATOR_KERNEL_EX(
kOnnxDomain,
14,
kCudaExecutionProvider,
KernelDefBuilder()
(*KernelDefBuilder::Create())
.TypeConstraint("T", DataTypeImpl::AllFixedSizeTensorTypes())
.TypeConstraint("shape", DataTypeImpl::GetTensorType<int64_t>())
.Alias(0, 0)
.InputMemoryType<OrtMemTypeCPUInput>(1),
.InputMemoryType(OrtMemTypeCPUInput, 1),
Reshape);
ONNX_OPERATOR_VERSIONED_KERNEL_EX(

View file

@ -206,23 +206,6 @@ using NameMLValMap = std::unordered_map<std::string, OrtValue>;
namespace onnxruntime {
// From Tensor.h
class BufferDeleter {
public:
BufferDeleter() : alloc_(nullptr) {}
BufferDeleter(AllocatorPtr alloc) : alloc_(alloc) {}
void operator()(void* p) const {
if (alloc_)
alloc_->Free(p);
}
private:
AllocatorPtr alloc_;
};
using BufferUniquePtr = std::unique_ptr<void, BufferDeleter>;
// The function passed in will be run on provider DLL unload. This is used to free thread_local variables that are in threads we don't own
// Since these are not destroyed when the DLL unloads we have to do it manually. Search for usage for an example.
void RunOnUnload(std::function<void()> function);

View file

@ -8,6 +8,35 @@
#include <mutex>
#include "core/providers/shared/common.h"
#include "core/framework/random_generator.h"
#include "core/providers/cpu/controlflow/if.h"
#include "core/providers/cpu/controlflow/loop.h"
#include "core/providers/cpu/controlflow/scan.h"
#include "core/providers/cpu/math/einsum.h"
#include "core/providers/cpu/object_detection/non_max_suppression.h"
#include "core/providers/cpu/tensor/concatbase.h"
#include "core/providers/cpu/tensor/padbase.h"
#include "core/providers/cpu/tensor/gatherbase.h"
#include "core/providers/cpu/tensor/slice.h"
#include "core/providers/cpu/tensor/split.h"
#include "core/providers/cpu/tensor/size.h"
#include "core/providers/cpu/tensor/scatter_nd.h"
#include "core/providers/cpu/tensor/unsqueeze.h"
#include "core/providers/cpu/tensor/tile.h"
#ifndef DISABLE_CONTRIB_OPS
#include "contrib_ops/cpu/bert/attention_base.h"
#include "contrib_ops/cpu/bert/bias_gelu_helper.h"
#include "contrib_ops/cpu/bert/embed_layer_norm_helper.h"
#include "contrib_ops/cpu/bert/longformer_attention_base.h"
#endif
#ifdef ENABLE_TRAINING
#include "orttraining/training_ops/cpu/aten_ops/aten_op.h"
#include "orttraining/training_ops/cpu/controlflow/group.h"
#include "orttraining/training_ops/cpu/controlflow/yield.h"
#endif
#ifndef _Ret_notnull_
#define _Ret_notnull_
#endif
@ -311,39 +340,6 @@ std::unique_ptr<OpKernelInfo> CopyOpKernelInfo(const OpKernelInfo& info) {
return g_host->CopyOpKernelInfo(info);
}
} // namespace onnxruntime
#include "core/providers/cpu/tensor/unsqueeze.h"
#include "core/providers/cpu/tensor/slice.h"
#include "core/providers/cpu/tensor/split.h"
#include "core/providers/cpu/tensor/size.h"
#include "core/providers/cpu/tensor/scatter_nd.h"
#include "core/providers/cpu/tensor/padbase.h"
#include "core/providers/cpu/tensor/concatbase.h"
#include "core/providers/cpu/tensor/gatherbase.h"
#include "core/providers/cpu/controlflow/scan.h"
#include "core/providers/cpu/controlflow/loop.h"
#include "core/providers/cpu/tensor/tile.h"
#include "core/providers/cpu/object_detection/non_max_suppression.h"
#include "core/framework/random_generator.h"
#include "core/providers/cpu/math/einsum.h"
#include "core/providers/cpu/controlflow/if.h"
#ifndef DISABLE_CONTRIB_OPS
#include "contrib_ops/cpu/bert/bias_gelu_helper.h"
#include "contrib_ops/cpu/bert/embed_layer_norm_helper.h"
#include "contrib_ops/cpu/bert/longformer_attention_base.h"
#include "contrib_ops/cpu/bert/attention_base.h"
#endif
#ifdef ENABLE_TRAINING
#include "orttraining/training_ops/cpu/aten_ops/aten_op.h"
#include "orttraining/training_ops/cpu/controlflow/group.h"
#include "orttraining/training_ops/cpu/controlflow/yield.h"
#endif
namespace onnxruntime {
namespace utils {
template <>
Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ bool* p_data, size_t expected_size) { return g_host->UnpackTensor(tensor, raw_data, raw_data_len, p_data, expected_size); }