From 9ddbb046a2c1a26b26ea4628f25c2f6147ae4004 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Sun, 16 May 2021 19:22:03 -0700 Subject: [PATCH] Merge related issues and code review feedback. --- cmake/CMakeLists.txt | 1 + include/onnxruntime/core/framework/tensor.h | 8 ++- .../main/java/ai/onnxruntime/OnnxRuntime.java | 2 + .../contrib_ops/cuda/bert/attention.cc | 4 +- .../core/framework/provider_bridge_ort.cc | 14 ++--- onnxruntime/core/providers/cuda/rnn/gru.cc | 4 +- onnxruntime/core/providers/cuda/rnn/lstm.cc | 4 +- onnxruntime/core/providers/cuda/rnn/rnn.cc | 4 +- .../core/providers/cuda/tensor/reshape.cc | 4 +- .../providers/shared_library/provider_api.h | 17 ----- .../provider_bridge_provider.cc | 62 +++++++++---------- 11 files changed, 55 insertions(+), 69 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index fb7a898085..9e7a537d01 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -193,6 +193,7 @@ if(onnxruntime_USE_VALGRIND AND NOT WIN32) endif() if (onnxruntime_ENABLE_NVTX_PROFILE) + message(WARNING "NTVX profile temporarily disabled, will be fixed soon") # TODO: This doesn't work with the shared cuda provider. Disabling temporarily to do a clean fix later as it wasn't trivial # add_definitions(-DENABLE_NVTX_PROFILE=1) endif() diff --git a/include/onnxruntime/core/framework/tensor.h b/include/onnxruntime/core/framework/tensor.h index cea531a6e1..4b09260856 100644 --- a/include/onnxruntime/core/framework/tensor.h +++ b/include/onnxruntime/core/framework/tensor.h @@ -34,8 +34,12 @@ namespace onnxruntime { */ class Tensor final { public: - static std::unique_ptr Create(MLDataType p_type, const TensorShape& shape, std::shared_ptr allocator) { return std::make_unique(p_type, shape, allocator); } - static std::unique_ptr Create(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc, ptrdiff_t offset = 0) { return std::make_unique(p_type, shape, p_data, alloc, offset); } + static std::unique_ptr Create(MLDataType p_type, const TensorShape& shape, std::shared_ptr allocator) { + return std::make_unique(p_type, shape, allocator); + } + static std::unique_ptr Create(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc, ptrdiff_t offset = 0) { + return std::make_unique(p_type, shape, p_data, alloc, offset); + } Tensor() = default; // to allow creating vector to support seq(tensor) diff --git a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java index a6b7085e96..4e6db62ace 100644 --- a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java +++ b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java @@ -120,6 +120,7 @@ final class OnnxRuntime { * in time. * * @param file The file to remove. + * @param onExitOnly Delete the file on exit only, vs trying to do it immediately */ private static void cleanUp(File file, boolean onExitOnly) { if (!file.exists()) { @@ -146,6 +147,7 @@ final class OnnxRuntime { * * @param tempDirectory The temp directory to write the library resource to. * @param library The bare name of the library. + * @param systemLoad If system.Load(..) should be called on the library vs just preparing it * @throws IOException If the file failed to read or write. */ private static void load(Path tempDirectory, String library, boolean systemLoad) diff --git a/onnxruntime/contrib_ops/cuda/bert/attention.cc b/onnxruntime/contrib_ops/cuda/bert/attention.cc index 8cc92094c4..733b98ea1c 100644 --- a/onnxruntime/contrib_ops/cuda/bert/attention.cc +++ b/onnxruntime/contrib_ops/cuda/bert/attention.cc @@ -1,10 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include "core/providers/cuda/cuda_common.h" -#include "core/providers/cuda/shared_inc/fpgeneric.h" #include "attention.h" #include "attention_impl.h" +#include "core/providers/cuda/cuda_common.h" +#include "core/providers/cuda/shared_inc/fpgeneric.h" using namespace onnxruntime::cuda; using namespace ::onnxruntime::common; diff --git a/onnxruntime/core/framework/provider_bridge_ort.cc b/onnxruntime/core/framework/provider_bridge_ort.cc index 09f665eb8a..0d26c876ab 100644 --- a/onnxruntime/core/framework/provider_bridge_ort.cc +++ b/onnxruntime/core/framework/provider_bridge_ort.cc @@ -1007,48 +1007,48 @@ std::unique_ptr CreateCUDAPinnedAllocator(int16_t device_id, const c } std::shared_ptr CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* provider_options) { - if (auto provider = s_library_cuda.Get()) + if (auto* provider = s_library_cuda.Get()) return provider->CreateExecutionProviderFactory(provider_options); return nullptr; } std::shared_ptr CreateExecutionProviderFactory_Dnnl(int use_arena) { - if (auto provider = s_library_dnnl.Get()) + if (auto* provider = s_library_dnnl.Get()) return provider->CreateExecutionProviderFactory(use_arena); return nullptr; } std::shared_ptr CreateExecutionProviderFactory_Tensorrt(int device_id) { - if (auto provider = s_library_tensorrt.Get()) + if (auto* provider = s_library_tensorrt.Get()) return provider->CreateExecutionProviderFactory(device_id); return nullptr; } std::shared_ptr CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* provider_options) { - if (auto provider = s_library_tensorrt.Get()) + if (auto* provider = s_library_tensorrt.Get()) return provider->CreateExecutionProviderFactory(provider_options); return nullptr; } std::shared_ptr CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* provider_options) { - if (auto provider = s_library_openvino.Get()) + if (auto* provider = s_library_openvino.Get()) return provider->CreateExecutionProviderFactory(provider_options); return nullptr; } ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO() { - if (auto provider = s_library_openvino.Get()) + if (auto* provider = s_library_openvino.Get()) return reinterpret_cast(provider->GetInfo()); return nullptr; } ProviderInfo_CUDA* GetProviderInfo_CUDA() { - if (auto provider = s_library_cuda.Get()) + if (auto* provider = s_library_cuda.Get()) return reinterpret_cast(provider->GetInfo()); LOGS_DEFAULT(WARNING) << "GetProviderInfo_CUDA called, returning nullptr"; ORT_THROW("CUDA Provider not available, can't get interface for it"); diff --git a/onnxruntime/core/providers/cuda/rnn/gru.cc b/onnxruntime/core/providers/cuda/rnn/gru.cc index 578c1dd27b..964aebf560 100644 --- a/onnxruntime/core/providers/cuda/rnn/gru.cc +++ b/onnxruntime/core/providers/cuda/rnn/gru.cc @@ -17,10 +17,10 @@ namespace cuda { 13, \ T, \ kCudaExecutionProvider, \ - KernelDefBuilder() \ + (*KernelDefBuilder::Create()) \ .TypeConstraint("T", DataTypeImpl::GetTensorType()) \ .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ - .InputMemoryType(RNN_Input_Index::sequence_lens), \ + .InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \ GRU); #define REGISTER_KERNEL_TYPED(T) \ diff --git a/onnxruntime/core/providers/cuda/rnn/lstm.cc b/onnxruntime/core/providers/cuda/rnn/lstm.cc index ad9e2ddecc..890d15cef6 100644 --- a/onnxruntime/core/providers/cuda/rnn/lstm.cc +++ b/onnxruntime/core/providers/cuda/rnn/lstm.cc @@ -15,10 +15,10 @@ namespace cuda { 13, \ T, \ kCudaExecutionProvider, \ - KernelDefBuilder() \ + (*KernelDefBuilder::Create()) \ .TypeConstraint("T", DataTypeImpl::GetTensorType()) \ .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ - .InputMemoryType(RNN_Input_Index::sequence_lens), \ + .InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \ LSTM); #define REGISTER_KERNEL_TYPED(T) \ diff --git a/onnxruntime/core/providers/cuda/rnn/rnn.cc b/onnxruntime/core/providers/cuda/rnn/rnn.cc index b438f981c9..4bd22340ef 100644 --- a/onnxruntime/core/providers/cuda/rnn/rnn.cc +++ b/onnxruntime/core/providers/cuda/rnn/rnn.cc @@ -17,10 +17,10 @@ namespace cuda { 13, \ T, \ kCudaExecutionProvider, \ - KernelDefBuilder() \ + (*KernelDefBuilder::Create()) \ .TypeConstraint("T", DataTypeImpl::GetTensorType()) \ .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ - .InputMemoryType(RNN_Input_Index::sequence_lens), \ + .InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \ RNN); #define REGISTER_KERNEL_TYPED(T) \ diff --git a/onnxruntime/core/providers/cuda/tensor/reshape.cc b/onnxruntime/core/providers/cuda/tensor/reshape.cc index 297768fcf4..61bca5bfe7 100644 --- a/onnxruntime/core/providers/cuda/tensor/reshape.cc +++ b/onnxruntime/core/providers/cuda/tensor/reshape.cc @@ -11,11 +11,11 @@ ONNX_OPERATOR_KERNEL_EX( kOnnxDomain, 14, kCudaExecutionProvider, - KernelDefBuilder() + (*KernelDefBuilder::Create()) .TypeConstraint("T", DataTypeImpl::AllFixedSizeTensorTypes()) .TypeConstraint("shape", DataTypeImpl::GetTensorType()) .Alias(0, 0) - .InputMemoryType(1), + .InputMemoryType(OrtMemTypeCPUInput, 1), Reshape); ONNX_OPERATOR_VERSIONED_KERNEL_EX( diff --git a/onnxruntime/core/providers/shared_library/provider_api.h b/onnxruntime/core/providers/shared_library/provider_api.h index e21882f5df..3c58150e9f 100644 --- a/onnxruntime/core/providers/shared_library/provider_api.h +++ b/onnxruntime/core/providers/shared_library/provider_api.h @@ -206,23 +206,6 @@ using NameMLValMap = std::unordered_map; namespace onnxruntime { -// From Tensor.h -class BufferDeleter { - public: - BufferDeleter() : alloc_(nullptr) {} - BufferDeleter(AllocatorPtr alloc) : alloc_(alloc) {} - - void operator()(void* p) const { - if (alloc_) - alloc_->Free(p); - } - - private: - AllocatorPtr alloc_; -}; - -using BufferUniquePtr = std::unique_ptr; - // The function passed in will be run on provider DLL unload. This is used to free thread_local variables that are in threads we don't own // Since these are not destroyed when the DLL unloads we have to do it manually. Search for usage for an example. void RunOnUnload(std::function function); diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc index 26ab338108..058211379d 100644 --- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc +++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc @@ -8,6 +8,35 @@ #include #include "core/providers/shared/common.h" +#include "core/framework/random_generator.h" +#include "core/providers/cpu/controlflow/if.h" +#include "core/providers/cpu/controlflow/loop.h" +#include "core/providers/cpu/controlflow/scan.h" +#include "core/providers/cpu/math/einsum.h" +#include "core/providers/cpu/object_detection/non_max_suppression.h" +#include "core/providers/cpu/tensor/concatbase.h" +#include "core/providers/cpu/tensor/padbase.h" +#include "core/providers/cpu/tensor/gatherbase.h" +#include "core/providers/cpu/tensor/slice.h" +#include "core/providers/cpu/tensor/split.h" +#include "core/providers/cpu/tensor/size.h" +#include "core/providers/cpu/tensor/scatter_nd.h" +#include "core/providers/cpu/tensor/unsqueeze.h" +#include "core/providers/cpu/tensor/tile.h" + +#ifndef DISABLE_CONTRIB_OPS +#include "contrib_ops/cpu/bert/attention_base.h" +#include "contrib_ops/cpu/bert/bias_gelu_helper.h" +#include "contrib_ops/cpu/bert/embed_layer_norm_helper.h" +#include "contrib_ops/cpu/bert/longformer_attention_base.h" +#endif + +#ifdef ENABLE_TRAINING +#include "orttraining/training_ops/cpu/aten_ops/aten_op.h" +#include "orttraining/training_ops/cpu/controlflow/group.h" +#include "orttraining/training_ops/cpu/controlflow/yield.h" +#endif + #ifndef _Ret_notnull_ #define _Ret_notnull_ #endif @@ -311,39 +340,6 @@ std::unique_ptr CopyOpKernelInfo(const OpKernelInfo& info) { return g_host->CopyOpKernelInfo(info); } -} // namespace onnxruntime - -#include "core/providers/cpu/tensor/unsqueeze.h" -#include "core/providers/cpu/tensor/slice.h" -#include "core/providers/cpu/tensor/split.h" -#include "core/providers/cpu/tensor/size.h" -#include "core/providers/cpu/tensor/scatter_nd.h" -#include "core/providers/cpu/tensor/padbase.h" -#include "core/providers/cpu/tensor/concatbase.h" -#include "core/providers/cpu/tensor/gatherbase.h" -#include "core/providers/cpu/controlflow/scan.h" -#include "core/providers/cpu/controlflow/loop.h" -#include "core/providers/cpu/tensor/tile.h" -#include "core/providers/cpu/object_detection/non_max_suppression.h" -#include "core/framework/random_generator.h" -#include "core/providers/cpu/math/einsum.h" -#include "core/providers/cpu/controlflow/if.h" - -#ifndef DISABLE_CONTRIB_OPS -#include "contrib_ops/cpu/bert/bias_gelu_helper.h" -#include "contrib_ops/cpu/bert/embed_layer_norm_helper.h" -#include "contrib_ops/cpu/bert/longformer_attention_base.h" -#include "contrib_ops/cpu/bert/attention_base.h" -#endif - -#ifdef ENABLE_TRAINING -#include "orttraining/training_ops/cpu/aten_ops/aten_op.h" -#include "orttraining/training_ops/cpu/controlflow/group.h" -#include "orttraining/training_ops/cpu/controlflow/yield.h" -#endif - -namespace onnxruntime { - namespace utils { template <> Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ bool* p_data, size_t expected_size) { return g_host->UnpackTensor(tensor, raw_data, raw_data_len, p_data, expected_size); }