Merge related issues and code review feedback.

2026-07-10 17:37:14 +00:00 · 2021-05-16 19:22:03 -07:00 · 2021-05-16 19:22:03 -07:00 · 9ddbb046a2
commit 9ddbb046a2
parent 67adb7bfe0
11 changed files with 55 additions and 69 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -193,6 +193,7 @@ if(onnxruntime_USE_VALGRIND AND NOT WIN32)
 endif()

 if (onnxruntime_ENABLE_NVTX_PROFILE)
+  message(WARNING "NTVX profile temporarily disabled, will be fixed soon")
 # TODO: This doesn't work with the shared cuda provider. Disabling temporarily to do a clean fix later as it wasn't trivial
 #  add_definitions(-DENABLE_NVTX_PROFILE=1)
 endif()
--- a/include/onnxruntime/core/framework/tensor.h
+++ b/include/onnxruntime/core/framework/tensor.h
@ -34,8 +34,12 @@ namespace onnxruntime {
 */
 class Tensor final {
 public:
-  static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator) { return std::make_unique<Tensor>(p_type, shape, allocator); }
-  static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc, ptrdiff_t offset = 0) { return std::make_unique<Tensor>(p_type, shape, p_data, alloc, offset); }
+  static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, std::shared_ptr<IAllocator> allocator) {
+    return std::make_unique<Tensor>(p_type, shape, allocator);
+  }
+  static std::unique_ptr<Tensor> Create(MLDataType p_type, const TensorShape& shape, void* p_data, const OrtMemoryInfo& alloc, ptrdiff_t offset = 0) {
+    return std::make_unique<Tensor>(p_type, shape, p_data, alloc, offset);
+  }

  Tensor() = default;  // to allow creating vector<Tensor> to support seq(tensor)

--- a/java/src/main/java/ai/onnxruntime/OnnxRuntime.java
+++ b/java/src/main/java/ai/onnxruntime/OnnxRuntime.java
@ -120,6 +120,7 @@ final class OnnxRuntime {
   * in time.
   *
   * @param file The file to remove.
+   * @param onExitOnly Delete the file on exit only, vs trying to do it immediately
   */
  private static void cleanUp(File file, boolean onExitOnly) {
    if (!file.exists()) {
@ -146,6 +147,7 @@ final class OnnxRuntime {
   *
   * @param tempDirectory The temp directory to write the library resource to.
   * @param library The bare name of the library.
+   * @param systemLoad If system.Load(..) should be called on the library vs just preparing it
   * @throws IOException If the file failed to read or write.
   */
  private static void load(Path tempDirectory, String library, boolean systemLoad)
--- a/onnxruntime/contrib_ops/cuda/bert/attention.cc
+++ b/onnxruntime/contrib_ops/cuda/bert/attention.cc
@ -1,10 +1,10 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.

-#include "core/providers/cuda/cuda_common.h"
-#include "core/providers/cuda/shared_inc/fpgeneric.h"
 #include "attention.h"
 #include "attention_impl.h"
+#include "core/providers/cuda/cuda_common.h"
+#include "core/providers/cuda/shared_inc/fpgeneric.h"

 using namespace onnxruntime::cuda;
 using namespace ::onnxruntime::common;
--- a/onnxruntime/core/framework/provider_bridge_ort.cc
+++ b/onnxruntime/core/framework/provider_bridge_ort.cc
@ -1007,48 +1007,48 @@ std::unique_ptr<IAllocator> CreateCUDAPinnedAllocator(int16_t device_id, const c
 }

 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* provider_options) {
-  if (auto provider = s_library_cuda.Get())
+  if (auto* provider = s_library_cuda.Get())
    return provider->CreateExecutionProviderFactory(provider_options);

  return nullptr;
 }

 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena) {
-  if (auto provider = s_library_dnnl.Get())
+  if (auto* provider = s_library_dnnl.Get())
    return provider->CreateExecutionProviderFactory(use_arena);

  return nullptr;
 }

 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(int device_id) {
-  if (auto provider = s_library_tensorrt.Get())
+  if (auto* provider = s_library_tensorrt.Get())
    return provider->CreateExecutionProviderFactory(device_id);

  return nullptr;
 }

 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* provider_options) {
-  if (auto provider = s_library_tensorrt.Get())
+  if (auto* provider = s_library_tensorrt.Get())
    return provider->CreateExecutionProviderFactory(provider_options);

  return nullptr;
 }

 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* provider_options) {
-  if (auto provider = s_library_openvino.Get())
+  if (auto* provider = s_library_openvino.Get())
    return provider->CreateExecutionProviderFactory(provider_options);

  return nullptr;
 }

 ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO() {
-  if (auto provider = s_library_openvino.Get())
+  if (auto* provider = s_library_openvino.Get())
    return reinterpret_cast<ProviderInfo_OpenVINO*>(provider->GetInfo());
  return nullptr;
 }

 ProviderInfo_CUDA* GetProviderInfo_CUDA() {
-  if (auto provider = s_library_cuda.Get())
+  if (auto* provider = s_library_cuda.Get())
    return reinterpret_cast<ProviderInfo_CUDA*>(provider->GetInfo());
  LOGS_DEFAULT(WARNING) << "GetProviderInfo_CUDA called, returning nullptr";
  ORT_THROW("CUDA Provider not available, can't get interface for it");
--- a/onnxruntime/core/providers/cuda/rnn/gru.cc
+++ b/onnxruntime/core/providers/cuda/rnn/gru.cc
@ -17,10 +17,10 @@ namespace cuda {
      13,                                                                       \
      T,                                                                        \
      kCudaExecutionProvider,                                                   \
-      KernelDefBuilder()                                                        \
+      (*KernelDefBuilder::Create())                                             \
          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>())                \
          .TypeConstraint("T1", DataTypeImpl::GetTensorType<int32_t>())         \
-          .InputMemoryType<OrtMemTypeCPUInput>(RNN_Input_Index::sequence_lens), \
+          .InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \
      GRU<T>);

 #define REGISTER_KERNEL_TYPED(T)                                                \
--- a/onnxruntime/core/providers/cuda/rnn/lstm.cc
+++ b/onnxruntime/core/providers/cuda/rnn/lstm.cc
@ -15,10 +15,10 @@ namespace cuda {
      13,                                                                       \
      T,                                                                        \
      kCudaExecutionProvider,                                                   \
-      KernelDefBuilder()                                                        \
+      (*KernelDefBuilder::Create())                                             \
          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>())                \
          .TypeConstraint("T1", DataTypeImpl::GetTensorType<int32_t>())         \
-          .InputMemoryType<OrtMemTypeCPUInput>(RNN_Input_Index::sequence_lens), \
+          .InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \
      LSTM<T>);

 #define REGISTER_KERNEL_TYPED(T)                                                \
--- a/onnxruntime/core/providers/cuda/rnn/rnn.cc
+++ b/onnxruntime/core/providers/cuda/rnn/rnn.cc
@ -17,10 +17,10 @@ namespace cuda {
      13,                                                                       \
      T,                                                                        \
      kCudaExecutionProvider,                                                   \
-      KernelDefBuilder()                                                        \
+      (*KernelDefBuilder::Create())                                             \
          .TypeConstraint("T", DataTypeImpl::GetTensorType<T>())                \
          .TypeConstraint("T1", DataTypeImpl::GetTensorType<int32_t>())         \
-          .InputMemoryType<OrtMemTypeCPUInput>(RNN_Input_Index::sequence_lens), \
+          .InputMemoryType(OrtMemTypeCPUInput, RNN_Input_Index::sequence_lens), \
      RNN<T>);

 #define REGISTER_KERNEL_TYPED(T)                                                \
--- a/onnxruntime/core/providers/cuda/tensor/reshape.cc
+++ b/onnxruntime/core/providers/cuda/tensor/reshape.cc
@ -11,11 +11,11 @@ ONNX_OPERATOR_KERNEL_EX(
    kOnnxDomain,
    14,
    kCudaExecutionProvider,
-    KernelDefBuilder()
+    (*KernelDefBuilder::Create())
        .TypeConstraint("T", DataTypeImpl::AllFixedSizeTensorTypes())
        .TypeConstraint("shape", DataTypeImpl::GetTensorType<int64_t>())
        .Alias(0, 0)
-        .InputMemoryType<OrtMemTypeCPUInput>(1),
+        .InputMemoryType(OrtMemTypeCPUInput, 1),
    Reshape);

 ONNX_OPERATOR_VERSIONED_KERNEL_EX(
--- a/onnxruntime/core/providers/shared_library/provider_api.h
+++ b/onnxruntime/core/providers/shared_library/provider_api.h
@ -206,23 +206,6 @@ using NameMLValMap = std::unordered_map<std::string, OrtValue>;

 namespace onnxruntime {

-// From Tensor.h
-class BufferDeleter {
- public:
-  BufferDeleter() : alloc_(nullptr) {}
-  BufferDeleter(AllocatorPtr alloc) : alloc_(alloc) {}
-
-  void operator()(void* p) const {
-    if (alloc_)
-      alloc_->Free(p);
-  }
-
- private:
-  AllocatorPtr alloc_;
-};
-
-using BufferUniquePtr = std::unique_ptr<void, BufferDeleter>;
-
 // The function passed in will be run on provider DLL unload. This is used to free thread_local variables that are in threads we don't own
 // Since these are not destroyed when the DLL unloads we have to do it manually. Search for usage for an example.
 void RunOnUnload(std::function<void()> function);
--- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
+++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
@ -8,6 +8,35 @@
 #include <mutex>
 #include "core/providers/shared/common.h"

+#include "core/framework/random_generator.h"
+#include "core/providers/cpu/controlflow/if.h"
+#include "core/providers/cpu/controlflow/loop.h"
+#include "core/providers/cpu/controlflow/scan.h"
+#include "core/providers/cpu/math/einsum.h"
+#include "core/providers/cpu/object_detection/non_max_suppression.h"
+#include "core/providers/cpu/tensor/concatbase.h"
+#include "core/providers/cpu/tensor/padbase.h"
+#include "core/providers/cpu/tensor/gatherbase.h"
+#include "core/providers/cpu/tensor/slice.h"
+#include "core/providers/cpu/tensor/split.h"
+#include "core/providers/cpu/tensor/size.h"
+#include "core/providers/cpu/tensor/scatter_nd.h"
+#include "core/providers/cpu/tensor/unsqueeze.h"
+#include "core/providers/cpu/tensor/tile.h"
+
+#ifndef DISABLE_CONTRIB_OPS
+#include "contrib_ops/cpu/bert/attention_base.h"
+#include "contrib_ops/cpu/bert/bias_gelu_helper.h"
+#include "contrib_ops/cpu/bert/embed_layer_norm_helper.h"
+#include "contrib_ops/cpu/bert/longformer_attention_base.h"
+#endif
+
+#ifdef ENABLE_TRAINING
+#include "orttraining/training_ops/cpu/aten_ops/aten_op.h"
+#include "orttraining/training_ops/cpu/controlflow/group.h"
+#include "orttraining/training_ops/cpu/controlflow/yield.h"
+#endif
+
 #ifndef _Ret_notnull_
 #define _Ret_notnull_
 #endif
@ -311,39 +340,6 @@ std::unique_ptr<OpKernelInfo> CopyOpKernelInfo(const OpKernelInfo& info) {
  return g_host->CopyOpKernelInfo(info);
 }

-}  // namespace onnxruntime
-
-#include "core/providers/cpu/tensor/unsqueeze.h"
-#include "core/providers/cpu/tensor/slice.h"
-#include "core/providers/cpu/tensor/split.h"
-#include "core/providers/cpu/tensor/size.h"
-#include "core/providers/cpu/tensor/scatter_nd.h"
-#include "core/providers/cpu/tensor/padbase.h"
-#include "core/providers/cpu/tensor/concatbase.h"
-#include "core/providers/cpu/tensor/gatherbase.h"
-#include "core/providers/cpu/controlflow/scan.h"
-#include "core/providers/cpu/controlflow/loop.h"
-#include "core/providers/cpu/tensor/tile.h"
-#include "core/providers/cpu/object_detection/non_max_suppression.h"
-#include "core/framework/random_generator.h"
-#include "core/providers/cpu/math/einsum.h"
-#include "core/providers/cpu/controlflow/if.h"
-
-#ifndef DISABLE_CONTRIB_OPS
-#include "contrib_ops/cpu/bert/bias_gelu_helper.h"
-#include "contrib_ops/cpu/bert/embed_layer_norm_helper.h"
-#include "contrib_ops/cpu/bert/longformer_attention_base.h"
-#include "contrib_ops/cpu/bert/attention_base.h"
-#endif
-
-#ifdef ENABLE_TRAINING
-#include "orttraining/training_ops/cpu/aten_ops/aten_op.h"
-#include "orttraining/training_ops/cpu/controlflow/group.h"
-#include "orttraining/training_ops/cpu/controlflow/yield.h"
-#endif
-
-namespace onnxruntime {
-
 namespace utils {
 template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ bool* p_data, size_t expected_size) { return g_host->UnpackTensor(tensor, raw_data, raw_data_len, p_data, expected_size); }