[TensorRT EP] Update TRT10.0 deprecated api (#20989)

### Description  Note: * This PR would remove C4996 suppression in tensorrt_execution_provider.cc only (according to Nvidia, places with nvinfer.h included need C4996 suppression, when /Zc:__cplusplus is enabled in ORT win build) * A follow-up PR will be raised to update deprecated TRT Plugin api usage. Here are deprecated apis to be updated in this PR: | deprecated api | Update | | ------------------------------------------------------------ | ------------------------------------------------------------ | | [kCUBLAS](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html#a9e1d81e5a8bfeb38b86e22a66d5f836a) | / | | [kCUBLAS_LT](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html#a9e1d81e5a8bfeb38b86e22a66d5f836a) | / | | [kCUDNN](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html#a9e1d81e5a8bfeb38b86e22a66d5f836a) | / | | [reallocateOutput](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1v__1__0_1_1_i_output_allocator.html#acae6441d4029584cc1c6550917518691) | Superseded by [reallocateOutputAsync](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1v__1__0_1_1_i_output_allocator.html#aa40eeb891c1dfe4c1bbf1eabe8c705ab) with cudaStream_t argument | | [createExecutionContextWithoutDeviceMemory](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_cuda_engine.html#adc86bcc42b098204997396ef2b1093fb) | Superseded by [createExecutionContext()](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_cuda_engine.html#a35de29aa6134165a5b14a537e6d99e82) with parameter.<br />Check [ExecutionContextAllocationStrategy::kUSER_MANAGED](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/namespacenvinfer1.html#ac6251a050df629edfc0ce037fa366503) for more detail | ### Motivation and Context  TRT deprecated api list: https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/deprecated.html
2026-07-05 04:17:53 +00:00 · 2024-07-01 22:55:20 -07:00 · 2024-07-01 22:55:20 -07:00 · 7be1d4aad3
commit 7be1d4aad3
parent beb2496748
2 changed files with 43 additions and 20 deletions
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@ -169,11 +169,20 @@ nvinfer1::TacticSources GetTacticSourceFromString(std::string& tactic_string) {
    nvinfer1::TacticSource source{};
    t = toUpper(t);
    if (t == "CUBLAS") {
+      LOGS_DEFAULT(WARNING) << "[TensorRT EP] Tactic kCUBLAS is deprecated in TensorRT 10.0";
+#if NV_TENSORRT_MAJOR < 10
      source = nvinfer1::TacticSource::kCUBLAS;
+#endif
    } else if (t == "CUBLASLT" || t == "CUBLAS_LT") {
+      LOGS_DEFAULT(WARNING) << "[TensorRT EP] Tactic kCUBLAS_LT is deprecated in TensorRT 9.0";
+#if NV_TENSORRT_MAJOR < 9
      source = nvinfer1::TacticSource::kCUBLAS_LT;
+#endif
    } else if (t == "CUDNN") {
+      LOGS_DEFAULT(WARNING) << "[TensorRT EP] Tactic kCUDNN is deprecated in TensorRT 10.0";
+#if NV_TENSORRT_MAJOR < 10
      source = nvinfer1::TacticSource::kCUDNN;
+#endif
    } else if (t == "EDGE_MASK_CONVOLUTIONS") {
      source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS;
    } else if (t == "JIT_CONVOLUTIONS") {
@ -298,6 +307,25 @@ void CudaCall<cudnnStatus_t, true>(cudnnStatus_t retCode, const char* exprString
  return g_host->CudaCall_true(retCode, exprString, libName, successCode, msg, file, line);
 }

+#if NV_TENSORRT_MAJOR >= 10
+void* OutputAllocator::reallocateOutputAsync(char const* /*tensorName*/, void* /*currentMemory*/, uint64_t size,
+                                             uint64_t /*alignment*/, cudaStream_t /*stream*/) noexcept {
+  // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr
+  // even for empty tensors, so allocate a dummy byte.
+  size = std::max(size, static_cast<uint64_t>(1));
+  if (size > allocated_size) {
+    cudaFree(outputPtr);
+    outputPtr = nullptr;
+    allocated_size = 0;
+    if (cudaMalloc(&outputPtr, size) == cudaSuccess) {
+      allocated_size = size;
+    }
+  }
+  // if cudaMalloc fails, returns nullptr.
+  return outputPtr;
+}
+#else
+// Only override this method when TensorRT <= 8.6
 void* OutputAllocator::reallocateOutput(char const* /*tensorName*/, void* /*currentMemory*/, uint64_t size,
                                        uint64_t /*alignment*/) noexcept {
  // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr
@ -314,6 +342,7 @@ void* OutputAllocator::reallocateOutput(char const* /*tensorName*/, void* /*curr
  // if cudaMalloc fails, returns nullptr.
  return outputPtr;
 }
+#endif

 void OutputAllocator::notifyShape(char const* /*tensorName*/, nvinfer1::Dims const& dims) noexcept {
  output_shapes.clear();
@ -3152,14 +3181,10 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
      if (mem_size > max_ctx_mem_size_) {
        max_ctx_mem_size_ = mem_size;
      }
-
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4996)  // nvinfer1::ICudaEngine::createExecutionContextWithoutDeviceMemory was deprecated
-#endif
+#if NV_TENSORRT_MAJOR < 10
      trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContextWithoutDeviceMemory());
-#if defined(_MSC_VER)
-#pragma warning(pop)
+#else
+      trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED));
 #endif
    } else {
      trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext());
@ -3606,14 +3631,12 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView

    if (context_update) {
      if (trt_state->context_memory_sharing_enable) {
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4996)  // nvinfer1::ICudaEngine::createExecutionContextWithoutDeviceMemory was deprecated
-#endif
+#if NV_TENSORRT_MAJOR < 10
        *(trt_state->context) = std::unique_ptr<nvinfer1::IExecutionContext>(
            trt_state->engine->get()->createExecutionContextWithoutDeviceMemory());
-#if defined(_MSC_VER)
-#pragma warning(pop)
+#else
+        *(trt_state->context) = std::unique_ptr<nvinfer1::IExecutionContext>(
+            trt_state->engine->get()->createExecutionContext(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED));
 #endif
      } else {
        *(trt_state->context) = std::unique_ptr<nvinfer1::IExecutionContext>(
@ -3830,13 +3853,10 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
    if (mem_size > max_ctx_mem_size_) {
      max_ctx_mem_size_ = mem_size;
    }
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4996)  // nvinfer1::ICudaEngine::createExecutionContextWithoutDeviceMemory was deprecated
-#endif
+#if NV_TENSORRT_MAJOR < 10
    trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContextWithoutDeviceMemory());
-#if defined(_MSC_VER)
-#pragma warning(pop)
+#else
+    trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext(nvinfer1::ExecutionContextAllocationStrategy::kUSER_MANAGED));
 #endif
  } else {
    trt_context = std::unique_ptr<nvinfer1::IExecutionContext>(trt_engine->createExecutionContext());
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@ -116,8 +116,11 @@ using unique_pointer = std::unique_ptr<T, TensorrtInferDeleter>;
 //
 class OutputAllocator : public nvinfer1::IOutputAllocator {
 public:
+#if NV_TENSORRT_MAJOR >= 10
+  void* reallocateOutputAsync(char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t stream) noexcept override;
+#else
  void* reallocateOutput(char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept override;
-
+#endif
  void notifyShape(char const* tensorName, nvinfer1::Dims const& dims) noexcept override;

  void* getBuffer() {