diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index a24f40f9e3..d9184367f4 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1325,72 +1325,74 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector engine_buf{new char[engine_size]}; - engine_file.read((char*)engine_buf.get(), engine_size); - trt_engine = tensorrt_ptr::unique_pointer(runtime_->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path; - if (trt_engine == nullptr) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not deserialize engine from cache: " + engine_cache_path); - } - } else if (engine_decryption_enable_ && engine_cache_enable_ && !engine_file) { - // Decrypt engine - size_t engine_size = 0; - if (!engine_decryption_(engine_cache_path.c_str(), nullptr, &engine_size)) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not get engine buffer size"); - } - std::unique_ptr engine_buf{new char[engine_size]}; - if (!engine_decryption_(engine_cache_path.c_str(), &engine_buf[0], &engine_size)) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not call engine decryption function decrypt"); - } - // Deserialize engine - trt_engine = tensorrt_ptr::unique_pointer(runtime_->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path; - if (trt_engine == nullptr) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not deserialize engine from encrypted cache: " + engine_cache_path); - } - } else { - // Set INT8 per tensor dynamic range - if (int8_enable_ && trt_builder->platformHasFastInt8() && int8_calibration_cache_available_) { - trt_config->setInt8Calibrator(nullptr); - if (!SetDynamicRange(*trt_network, dynamic_range_map)) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not set INT8 dynamic range for fused node: " + fused_node.Name()); - } - } + { + // ifstream file check, engine serialization/deserialization and engine build are in critical section. It needs lock protection to prevent race condition when inferencing with multithreading. + auto lock = GetApiLock(); - // Build engine - { - auto lock = GetApiLock(); - trt_engine = tensorrt_ptr::unique_pointer(trt_builder->buildEngineWithConfig(*trt_network, *trt_config)); - } - if (trt_engine == nullptr) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not build engine for fused node: " + fused_node.Name()); - } - if (engine_cache_enable_) { - nvinfer1::IHostMemory* serializedModel = trt_engine->serialize(); - size_t engine_size = serializedModel->size(); - if (engine_decryption_enable_) { - // Encrypt engine - if (!engine_encryption_(engine_cache_path.c_str(), reinterpret_cast(serializedModel->data()), engine_size)) { - return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, - "TensorRT EP could not call engine encryption function encrypt"); - } - } else { - std::ofstream file(engine_cache_path, std::ios::binary | std::ios::out); - file.write(reinterpret_cast(serializedModel->data()), engine_size); + std::ifstream engine_file(engine_cache_path, std::ios::binary | std::ios::in); + if (engine_cache_enable_ && engine_file) { + engine_file.seekg(0, std::ios::end); + size_t engine_size = engine_file.tellg(); + engine_file.seekg(0, std::ios::beg); + std::unique_ptr engine_buf{new char[engine_size]}; + engine_file.read((char*)engine_buf.get(), engine_size); + trt_engine = tensorrt_ptr::unique_pointer(runtime_->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path; + if (trt_engine == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not deserialize engine from cache: " + engine_cache_path); + } + } else if (engine_decryption_enable_ && engine_cache_enable_ && !engine_file) { + // Decrypt engine + size_t engine_size = 0; + if (!engine_decryption_(engine_cache_path.c_str(), nullptr, &engine_size)) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not get engine buffer size"); + } + std::unique_ptr engine_buf{new char[engine_size]}; + if (!engine_decryption_(engine_cache_path.c_str(), &engine_buf[0], &engine_size)) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not call engine decryption function decrypt"); + } + // Deserialize engine + trt_engine = tensorrt_ptr::unique_pointer(runtime_->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path; + if (trt_engine == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not deserialize engine from encrypted cache: " + engine_cache_path); + } + } else { + // Set INT8 per tensor dynamic range + if (int8_enable_ && trt_builder->platformHasFastInt8() && int8_calibration_cache_available_) { + trt_config->setInt8Calibrator(nullptr); + if (!SetDynamicRange(*trt_network, dynamic_range_map)) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not set INT8 dynamic range for fused node: " + fused_node.Name()); + } + } + + // Build engine + trt_engine = tensorrt_ptr::unique_pointer(trt_builder->buildEngineWithConfig(*trt_network, *trt_config)); + if (trt_engine == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not build engine for fused node: " + fused_node.Name()); + } + if (engine_cache_enable_) { + nvinfer1::IHostMemory* serializedModel = trt_engine->serialize(); + size_t engine_size = serializedModel->size(); + if (engine_decryption_enable_) { + // Encrypt engine + if (!engine_encryption_(engine_cache_path.c_str(), reinterpret_cast(serializedModel->data()), engine_size)) { + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, + "TensorRT EP could not call engine encryption function encrypt"); + } + } else { + std::ofstream file(engine_cache_path, std::ios::binary | std::ios::out); + file.write(reinterpret_cast(serializedModel->data()), engine_size); + } + serializedModel->destroy(); + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized " + engine_cache_path; } - serializedModel->destroy(); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Serialized " + engine_cache_path; } } diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index f39d67d259..d6f6b7f0f5 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -247,7 +247,7 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string th.join(); } -TEST(TensorrtExecutionProviderTest, MultiThreadsTestWithOneSessionSingleThreadInference) { +TEST(TensorrtExecutionProviderTest, SessionCreationWithMultiThreadsAndInferenceWithMultiThreads) { std::vector threads; std::string model_name = "trt_execution_provider_multithreading_test.onnx"; std::string graph_name = "multithreading_test"; @@ -264,7 +264,7 @@ TEST(TensorrtExecutionProviderTest, MultiThreadsTestWithOneSessionSingleThreadIn th.join(); } -TEST(TensorrtExecutionProviderTest, MultiThreadsTestWithOneSessionMultiThreadsInference) { +TEST(TensorrtExecutionProviderTest, SessionCreationWithSingleThreadAndInferenceWithMultiThreads) { std::string model_name = "trt_execution_provider_multithreading_test.onnx"; std::string graph_name = "multithreading_test"; std::string sess_log_id = "TRTEPMultiThreadingTestWithOneSessionMultiThreads";