Fix issues in TensorRT provider options (#7738)

* add legacy env variable support in pybind * formating code
2026-06-29 03:30:52 +00:00 · 2021-05-17 23:07:27 -07:00 · 2021-05-17 23:07:27 -07:00 · a6972c8782
commit a6972c8782
parent e9057d2e49
4 changed files with 66 additions and 82 deletions
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@ -145,7 +145,7 @@ bool FindCycleHelper(int i, const std::list<int>* adjacency_map, bool visited[],
 * Read calibration table for INT8 quantization
 * Two kind of calibration tables are supported,
 * 1. ORT generated calibration table
-* The table is pre-serialized by flatbuffers. 
+* The table is pre-serialized by flatbuffers.
 * Each entry in the table is a key-value pair,
 * key: tensor name, value: maximum absolute value in floating point
 * For example,
@ -404,7 +404,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
      int8_calibration_cache_name_ = info.int8_calibration_table_name;
      int8_use_native_tensorrt_calibration_table_ = info.int8_use_native_calibration_table;
    }
-    if (fp16_enable_ || int8_enable_) { // DLA can only be enabled with FP16 or INT8
+    if (fp16_enable_ || int8_enable_) {  // DLA can only be enabled with FP16 or INT8
      dla_enable_ = info.dla_enable;
      dla_core_ = info.dla_core;
    }
@ -456,13 +456,13 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
      }
    }

-    if (fp16_enable_ || int8_enable_) { // DLA can only be enabled with FP16 or INT8
+    if (fp16_enable_ || int8_enable_) {  // DLA can only be enabled with FP16 or INT8
      const std::string dla_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDLAEnable);
      if (!dla_enable_env.empty()) {
        dla_enable_ = (std::stoi(dla_enable_env) == 0 ? false : true);
      }
-	  
-	  if (dla_enable_) {
+
+      if (dla_enable_) {
        const std::string dla_core_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDLACore);
        if (!dla_core_env.empty()) {
          dla_core_ = std::stoi(dla_core_env);
@ -488,12 +488,12 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
        LOGS_DEFAULT(WARNING) << "[TensorRT EP] ORT_TENSORRT_ENGINE_CACHE_PATH is deprecated! Please use ORT_TENSORRT_CACHE_PATH to specify engine cache path";
      }
    }
-    
+
    const std::string engine_decryption_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDecryptionEnable);
    if (!engine_decryption_enable_env.empty()) {
      engine_decryption_enable_ = (std::stoi(engine_decryption_enable_env) == 0 ? false : true);
    }
-    
+
    if (engine_decryption_enable_) {
      engine_decryption_lib_path_ = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDecryptionLibPath);
    }
@ -510,15 +510,15 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
    max_partition_iterations_ = 1000;
  }
  if (min_subgraph_size_ <= 0) {
-	LOGS_DEFAULT(WARNING) << "[TensorRT EP] TensorRT option trt_min_subgraph_size must be a positive integer value. Set it to 1";
+    LOGS_DEFAULT(WARNING) << "[TensorRT EP] TensorRT option trt_min_subgraph_size must be a positive integer value. Set it to 1";
    min_subgraph_size_ = 1;
-  }	
+  }
  if (max_workspace_size_ <= 0) {
-	LOGS_DEFAULT(WARNING) << "[TensorRT EP] TensorRT option trt_max_workspace_size must be a positive integer value. Set it to 1073741824 (1GB)";
+    LOGS_DEFAULT(WARNING) << "[TensorRT EP] TensorRT option trt_max_workspace_size must be a positive integer value. Set it to 1073741824 (1GB)";
    max_workspace_size_ = 1 << 30;
  }
  if (dla_core_ < 0) {
-	LOGS_DEFAULT(WARNING) << "[TensorRT EP] TensorRT option trt_dla_core must be a non-negative integer value. Set it to 0";
+    LOGS_DEFAULT(WARNING) << "[TensorRT EP] TensorRT option trt_dla_core must be a non-negative integer value. Set it to 0";
    dla_core_ = 0;
  }

@ -534,11 +534,28 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
  if (engine_decryption_enable_) {
    LIBTYPE handle = OPENLIB(engine_decryption_lib_path_.c_str());
    if (handle == nullptr) {
-      ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
-                      "TensorRT EP could not open shared library from " + engine_decryption_lib_path_);
+      ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL,
+                                         "TensorRT EP could not open shared library from " + engine_decryption_lib_path_));
    }
    engine_decryption_ = (int (*)(const char*, char*, size_t*))LIBFUNC(handle, "decrypt");
  }
+  LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT provider options: "
+                        << "device_id: " << device_id_
+                        << ", trt_max_partition_iterations: " << max_partition_iterations_
+                        << ", trt_min_subgraph_size: " << min_subgraph_size_
+                        << ", trt_max_workspace_size: " << max_workspace_size_
+                        << ", trt_fp16_enable: " << fp16_enable_
+                        << ", trt_int8_enable: " << int8_enable_
+                        << ", trt_int8_calibration_cache_name: " << int8_calibration_cache_name_
+                        << ", trt_int8_use_native_tensorrt_calibration_table: " << int8_use_native_tensorrt_calibration_table_
+                        << ", trt_dla_enable: " << dla_enable_
+                        << ", trt_dla_core: " << dla_core_
+                        << ", trt_dump_subgraphs: " << dump_subgraphs_
+                        << ", trt_engine_cache_enable: " << engine_cache_enable_
+                        << ", trt_cache_path: " << cache_path_
+                        << ", trt_engine_decryption_enable: " << engine_decryption_enable_
+                        << ", trt_engine_decryption_lib_path: " << engine_decryption_lib_path_
+                        << ", trt_force_sequential_engine_build: " << force_sequential_engine_build_;
 }

 TensorrtExecutionProvider::~TensorrtExecutionProvider() {
@ -862,7 +879,7 @@ SubGraphCollection_t TensorrtExecutionProvider::GetSupportedList(SubGraphCollect
        model_proto->SerializeToString(string_buf);

        if (dump_subgraphs_) {
-          // Dump TensorRT subgraph for debugging if enabled via ORT_TENSORRT_DUMP_SUBGRAPHS env variable.
+          // Dump TensorRT subgraph for debugging
          std::fstream dump("TensorrtExecutionProvider_TRT_Subgraph.onnx", std::ios::out | std::ios::trunc | std::ios::binary);
          model_proto->SerializeToOstream(dump);
        }
@ -1098,7 +1115,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
    model_proto->SerializeToString(string_buf);

    if (dump_subgraphs_) {
-      // Dump the TensorRT subgraph if enabled via ORT_TENSORRT_DUMP_SUBGRAPHS env variable.
+      // Dump TensorRT subgraphs
      std::fstream dump(fused_node->Name() + ".onnx", std::ios::out | std::ios::trunc | std::ios::binary);
      model_proto->SerializeToOstream(dump);
    }
@ -1183,11 +1200,11 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse

    // Set DLA
    if (fp16_enable_ || int8_enable_) {
-      if (dla_enable_ && dla_core_ >= 0) {//DLA can only run with FP16 and INT8
+      if (dla_enable_ && dla_core_ >= 0) {  //DLA can only run with FP16 and INT8
        int number_of_dla_core = trt_builder->getNbDLACores();
        if (number_of_dla_core == 0) {
          LOGS_DEFAULT(WARNING) << "[TensorRT EP] Try to use DLA core, but platform doesn't have any DLA core";
-		  dla_enable_ = false;
+          dla_enable_ = false;
        } else {
          if (dla_core_ >= number_of_dla_core) {
            LOGS_DEFAULT(WARNING) << "[TensorRT EP] Try to use DLA core #" << dla_core_ << ", but it exceeds platform's maximum DLA core number " << number_of_dla_core << ". Use DLA core 0 instead.";
@ -1198,7 +1215,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
          trt_config->setDefaultDeviceType(nvinfer1::DeviceType::kDLA);
          trt_config->setDLACore(dla_core_);
          trt_node_name_with_precision += "_dlacore" + std::to_string(dla_core_);
-		}
+        }
      }
    }

@ -1318,7 +1335,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
      *p = {context->allocate_func, context->release_func, context->allocator_handle, &parsers_[context->node_name],
            &engines_[context->node_name], &contexts_[context->node_name], &builders_[context->node_name],
            &networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name],
-            input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, dla_enable_, 
+            input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, dla_enable_,
            dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_, runtime_.get(), nullptr,
            allocator_, dynamic_range_map, engine_decryption_enable_, engine_decryption_};
      *state = p.release();
@ -1579,10 +1596,10 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse

        // Set DLA (DLA can only run with FP16 or INT8)
        if ((trt_state->fp16_enable || trt_state->int8_enable) && trt_state->dla_enable) {
-            LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] use DLA core " << trt_state->dla_core;
-            trt_config->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK);
-            trt_config->setDefaultDeviceType(nvinfer1::DeviceType::kDLA);
-            trt_config->setDLACore(trt_state->dla_core);
+          LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] use DLA core " << trt_state->dla_core;
+          trt_config->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK);
+          trt_config->setDefaultDeviceType(nvinfer1::DeviceType::kDLA);
+          trt_config->setDLACore(trt_state->dla_core);
        }

        // Build engine
@ -1883,4 +1900,4 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<Node*>& fuse
  }
  return Status::OK();
 }
-}  // namespace onnxruntime
+}  // namespace onnxruntime
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@ -208,6 +208,7 @@ const OrtDevice::DeviceType OrtDevice::GPU;
 namespace onnxruntime {

 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
+std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(int device_id);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
@ -575,29 +576,28 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
                                          sess->GetSessionOptions().enable_cpu_mem_arena));
    } else if (type == kTensorrtExecutionProvider) {
 #ifdef USE_TENSORRT
-      OrtTensorRTProviderOptions params{
-          0,
-          0,
-          nullptr,
-          1000,
-          1,
-          1 << 30,
-          0,
-          0,
-          nullptr,
-          0,
-          0,
-          0,
-          0,
-          0,
-          nullptr,
-          0,
-          nullptr,
-          0};
-
      std::string calibration_table, cache_path, lib_path;
      auto it = provider_options_map.find(type);
      if (it != provider_options_map.end()) {
+        OrtTensorRTProviderOptions params{
+            0,
+            0,
+            nullptr,
+            1000,
+            1,
+            1 << 30,
+            0,
+            0,
+            nullptr,
+            0,
+            0,
+            0,
+            0,
+            0,
+            nullptr,
+            0,
+            nullptr,
+            0};
        for (auto option : it->second) {
          if (option.first == "device_id") {
            if (!option.second.empty()) {
@ -718,8 +718,10 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
            ORT_THROW("Invalid TensorRT EP option: ", option.first);
          }
        }
+        RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_Tensorrt(&params));
+      } else {
+        RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_Tensorrt(cuda_device_id));
      }
-      RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_Tensorrt(&params));
 #endif
    } else if (type == kMIGraphXExecutionProvider) {
 #ifdef USE_MIGRAPHX
--- a/onnxruntime/test/perftest/command_args_parser.cc
+++ b/onnxruntime/test/perftest/command_args_parser.cc
@ -76,8 +76,6 @@ namespace perftest {
      "\t    [TensorRT only] [trt_dump_subgraphs]: Dump TRT subgraph to onnx model.\n"
      "\t    [TensorRT only] [trt_engine_cache_enable]: Enable engine caching.\n"
      "\t    [TensorRT only] [trt_engine_cache_path]: Specify engine cache path.\n"
-      "\t    [TensorRT only] [trt_engine_decryption_enable]: (experimental feature) Enable engine decryption.\n"
-      "\t    [TensorRT only] [trt_engine_decryption_lib_path]: (experimental feature) Specify engine decryption library path.\n"
      "\t    [TensorRT only] [trt_force_sequential_engine_build]: Force TensorRT engines to be built sequentially.\n"
      "\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
      "\t [Example] [For TensorRT EP] -e tensorrt -i 'trt_fp16_enable|true trt_int8_enable|true trt_int8_calibration_table_name|calibration.flatbuffers trt_int8_use_native_calibration_table|false trt_force_sequential_engine_build|false'\n"
--- a/onnxruntime/test/python/onnxruntime_test_python.py
+++ b/onnxruntime/test/python/onnxruntime_test_python.py
@ -72,53 +72,28 @@ class TestInferenceSession(unittest.TestCase):
            self.assertIn('trt_max_partition_iterations', option)
            self.assertIn('trt_min_subgraph_size', option)
            self.assertIn('trt_max_workspace_size', option)
-            self.assertIn('trt_fp16_enable', option)
-            self.assertIn('trt_int8_enable', option)
-            self.assertIn('trt_int8_calibration_table_name', option)
-            self.assertIn('trt_int8_use_native_calibration_table', option)
-            self.assertIn('trt_dla_enable', option)      
-            self.assertIn('trt_dla_core', option)
            self.assertIn('trt_dump_subgraphs', option)
            self.assertIn('trt_engine_cache_enable', option)
            self.assertIn('trt_engine_cache_path', option)
-            self.assertIn('trt_engine_decryption_enable', option)
-            self.assertIn('trt_engine_decryption_lib_path', option)
            self.assertIn('trt_force_sequential_engine_build', option)

            max_partition_iterations = option['trt_max_partition_iterations']
            new_max_partition_iterations = int(max_partition_iterations) + 1
            min_subgraph_size = option['trt_min_subgraph_size']
-            new_min_subgraph_size = int(max_partition_iterations) + 1
+            new_min_subgraph_size = int(min_subgraph_size) + 1
            ori_max_workspace_size = option['trt_max_workspace_size']
            new_max_workspace_size = int(ori_max_workspace_size) // 2
-            dla_core = option['trt_dla_core']
-            new_dla_core = int(dla_core) + 1

            option = {}
            option['trt_max_partition_iterations'] = new_max_partition_iterations
            option['trt_min_subgraph_size'] = new_min_subgraph_size
            option['trt_max_workspace_size'] = new_max_workspace_size
-            fp16_enable = "true"
-            option['trt_fp16_enable'] = fp16_enable
-            int8_enable = "false"
-            option['trt_int8_enable'] = int8_enable
-            calib_table_name = '/home/onnxruntime/table.flatbuffers'
-            option['trt_int8_calibration_table_name'] = calib_table_name
-            int8_use_native_calibration_table = "true"
-            option['trt_int8_use_native_calibration_table'] = int8_use_native_calibration_table
-            dla_enable = "true"
-            option['trt_dla_enable'] = dla_enable
-            option['trt_dla_core'] = new_dla_core
            dump_subgraphs = "true"
            option['trt_dump_subgraphs'] = dump_subgraphs
            engine_cache_enable = "true"
            option['trt_engine_cache_enable'] = engine_cache_enable
-            engine_cache_path = '/home/onnxruntime/engine_cache'
+            engine_cache_path = './engine_cache'
            option['trt_engine_cache_path'] = engine_cache_path
-            engine_decryption_enable = "true"
-            option['trt_engine_decryption_enable'] = engine_decryption_enable
-            engine_decryption_lib_path = '/home/onnxruntime/decryption_lib'
-            option['trt_engine_decryption_lib_path'] = engine_decryption_lib_path
            force_sequential_engine_build = "true"
            option['trt_force_sequential_engine_build'] = force_sequential_engine_build
            sess.set_providers(['TensorrtExecutionProvider'], [option])
@ -128,17 +103,9 @@ class TestInferenceSession(unittest.TestCase):
            self.assertEqual(option['trt_max_partition_iterations'], str(new_max_partition_iterations))
            self.assertEqual(option['trt_min_subgraph_size'], str(new_min_subgraph_size))
            self.assertEqual(option['trt_max_workspace_size'], str(new_max_workspace_size))
-            self.assertEqual(option['trt_int8_calibration_table_name'], str(calib_table_name))
-            self.assertEqual(option['trt_fp16_enable'], '1')
-            self.assertEqual(option['trt_int8_enable'], '0')
-            self.assertEqual(option['trt_int8_use_native_calibration_table'], '1')
-            self.assertEqual(option['trt_dla_enable'], '1')
-            self.assertEqual(option['trt_dla_core'], str(new_dla_core))
            self.assertEqual(option['trt_dump_subgraphs'], '1')
            self.assertEqual(option['trt_engine_cache_enable'], '1')
            self.assertEqual(option['trt_engine_cache_path'], str(engine_cache_path))
-            self.assertEqual(option['trt_engine_decryption_enable'], '1')
-            self.assertEqual(option['engine_decryption_lib_path'], str(engine_decryption_lib_path))
            self.assertEqual(option['trt_force_sequential_engine_build'], '1')

            # We currently disable following test code since that not all test machines/GPUs have nvidia int8 capability