From 49cfb56cc3d405a37047de5b26b2dfdf6198a72a Mon Sep 17 00:00:00 2001
From: stevenlix <38092805+stevenlix@users.noreply.github.com>
Date: Mon, 16 Jan 2023 14:40:41 -0800
Subject: [PATCH] Fix subgraph index issue in TRT (#14305)

Subgraph index in TRT engine name keeps increasing when multiple
sessions are created for the same model, which causes TRT engine not
being reused and new engine is created again. The issue is because
trt_model_id_generator_ is defined globally.
This PR made following changes and improvements,
1. Define subgraph index as local variable thus it won't be shared
across sessions.
2. Decouple subgraph index from hash id generator
3. Call hash id generator once at the beginning of GetCapability since
hash id is shared between TRT subgraphs and there is no need to call it
for each subgraph

fix https://github.com/microsoft/onnxruntime/issues/14269
---
 .../tensorrt/tensorrt_execution_provider.cc   |  27 ++-
 .../tensorrt/tensorrt_execution_provider.h    |  10 +-
 .../tensorrt_execution_provider_utils.h       | 200 ++++++++----------
 .../providers/tensorrt/tensorrt_basic_test.cc |  49 +----
 4 files changed, 120 insertions(+), 166 deletions(-)
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index c668e65c2c..5c3eb51a03 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -623,7 +623,7 @@ bool TensorrtExecutionProvider::IsSubGraphFullySupported(SubGraphCollection_t su
   return number_of_trt_nodes == number_of_ort_nodes;
 }
 
-std::unique_ptr<IndexedSubGraph> TensorrtExecutionProvider::GetSubGraph(SubGraph_t graph_nodes_index, const GraphViewer& graph) const {
+std::unique_ptr<IndexedSubGraph> TensorrtExecutionProvider::GetSubGraph(SubGraph_t graph_nodes_index, const GraphViewer& graph, const HashValue& model_hash, int subgraph_index) const {
   const std::vector<NodeIndex>& node_index = graph.GetNodesInTopologicalOrder();
   std::unordered_set<size_t> node_set;
   node_set.reserve(graph_nodes_index.first.size());
@@ -742,12 +742,11 @@ std::unique_ptr<IndexedSubGraph> TensorrtExecutionProvider::GetSubGraph(SubGraph
   }
 
   // Generate unique kernel name for TRT subgraph
-  HashValue model_hash = 0;
-  int id = TRTGenerateModelId(graph, model_hash);
-  std::string subgraph_id = std::to_string(model_hash) + "_" + std::to_string(id);
+  std::string subgraph_id = std::to_string(model_hash) + "_" + std::to_string(subgraph_index);
   auto meta_def = IndexedSubGraph_MetaDef::Create();
   const std::string graph_type = graph.IsSubgraph() ? "subgraph" : "graph";
   meta_def->name() = "TRTKernel_" + graph_type + "_" + graph.Name() + "_" + subgraph_id;
+  LOGS_DEFAULT(INFO) << "[TensorRT EP] TensorRT subgraph MetaDef name " + meta_def->name();
 
   // Assign inputs and outputs to subgraph's meta_def
   for (const auto& input : inputs) {
@@ -945,7 +944,7 @@ SubGraphCollection_t TensorrtExecutionProvider::GetSupportedList(SubGraphCollect
 }
 
 // Detect and remove cycles from supported node list
-bool TensorrtExecutionProvider::DetectTensorRTGraphCycles(SubGraphCollection_t& supported_nodes_vector, const GraphViewer& graph, bool remove_cycles) const {
+bool TensorrtExecutionProvider::DetectTensorRTGraphCycles(SubGraphCollection_t& supported_nodes_vector, const GraphViewer& graph, const HashValue& model_hash, bool remove_cycles) const {
   const std::vector<NodeIndex>& node_index = graph.GetNodesInTopologicalOrder();
   bool trt_cycle = true, cycle_detected = false;
   while (trt_cycle) {
@@ -955,10 +954,11 @@ bool TensorrtExecutionProvider::DetectTensorRTGraphCycles(SubGraphCollection_t&
     std::unordered_map<std::string, std::unordered_set<std::string>> input_to_nodes_map, node_to_outputs_map;
     std::unordered_set<size_t> non_trt_node_index(node_index.begin(), node_index.end());
     size_t id = 0;
+    int subgraph_index = 0;
     for (const auto& group : supported_nodes_vector) {
       if (!group.first.empty()) {
         // Construct subgraph from node list
-        std::unique_ptr<IndexedSubGraph> sub_graph = GetSubGraph(group, graph);
+        std::unique_ptr<IndexedSubGraph> sub_graph = GetSubGraph(group, graph, model_hash, subgraph_index);
 
         // Create node to inputs/outputs/index maps
         const auto& meta_def = sub_graph->GetMetaDef();
@@ -981,6 +981,7 @@ bool TensorrtExecutionProvider::DetectTensorRTGraphCycles(SubGraphCollection_t&
         for (const auto& index : group.first) {
           non_trt_node_index.erase(node_index[index]);
         }
+        subgraph_index++;
       }
     }
 
@@ -1070,6 +1071,9 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
   strcpy(model_path_, path_string.c_str());
 #endif
 
+  // Generate unique kernel name for TRT graph
+  HashValue model_hash = TRTGenerateId(graph);
+
   // Get supported node list from TensorRT parser
   const int number_of_ort_nodes = graph.NumberOfNodes();
   std::vector<size_t> nodes_vector(number_of_ort_nodes);
@@ -1124,7 +1128,7 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
   }
 
   // Detect and remove cycles from supported node list
-  DetectTensorRTGraphCycles(supported_nodes_vector, graph);
+  DetectTensorRTGraphCycles(supported_nodes_vector, graph, model_hash);
 
   // Consolidate supported node list
   if (supported_nodes_vector.size() > 1) {
@@ -1135,7 +1139,7 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
       }
     }
     SubGraphCollection_t consolidated_supported_nodes_vector = {{nodes_vector, true}};
-    if (DetectTensorRTGraphCycles(consolidated_supported_nodes_vector, graph, false)) {
+    if (DetectTensorRTGraphCycles(consolidated_supported_nodes_vector, graph, model_hash, false)) {
       LOGS_DEFAULT(INFO) << "[TensorRT EP] TensorRT nodes are not consolidated because graph will have cycles after consolidation";
     } else {
       LOGS_DEFAULT(INFO) << "[TensorRT EP] TensorRT nodes are consolidated into one subgraph";
@@ -1191,12 +1195,13 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
     }
   }
 
-  int number_of_trt_nodes = 0;
+  int number_of_trt_nodes = 0, subgraph_index = 0;
   for (const auto& group : supported_nodes_vector) {
     if (!group.first.empty()) {
-      std::unique_ptr<IndexedSubGraph> sub_graph = GetSubGraph(group, graph);
+      std::unique_ptr<IndexedSubGraph> sub_graph = GetSubGraph(group, graph, model_hash, subgraph_index);
       result.push_back(ComputeCapability::Create(std::move(sub_graph)));
       number_of_trt_nodes += static_cast<int>(group.first.size());
+      subgraph_index++;
     }
   }
 
@@ -1264,7 +1269,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
         if (layer->getType() == nvinfer1::LayerType::kELEMENTWISE && next_layer->getType() == nvinfer1::LayerType::kREDUCE && (static_cast<nvinfer1::IElementWiseLayer*>(layer))->getOperation() == nvinfer1::ElementWiseOperation::kPOW) {
           LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Force Pow + Reduce ops in layer norm to run in FP32 to avoid overflow";
           layer->setPrecision(nvinfer1::DataType::kFLOAT);
-          next_layer->setPrecision(nvinfer1::DataType::kFLOAT);		
+          next_layer->setPrecision(nvinfer1::DataType::kFLOAT);
           layer->setOutputType(0, nvinfer1::DataType::kFLOAT);
           next_layer->setOutputType(0, nvinfer1::DataType::kFLOAT);
         }
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index 8dacd84e64..2cb9826b89 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -27,9 +27,9 @@ static const std::string kEngineCacheEnable = "ORT_TENSORRT_ENGINE_CACHE_ENABLE"
 static const std::string kCachePath = "ORT_TENSORRT_CACHE_PATH";
 static const std::string kDecryptionEnable = "ORT_TENSORRT_ENGINE_DECRYPTION_ENABLE";
 static const std::string kDecryptionLibPath = "ORT_TENSORRT_ENGINE_DECRYPTION_LIB_PATH";
-static const std::string kForceSequentialEngineBuild= "ORT_TENSORRT_FORCE_SEQUENTIAL_ENGINE_BUILD";
-static const std::string kContextMemorySharingEnable= "ORT_TENSORRT_CONTEXT_MEMORY_SHARING_ENABLE";
-static const std::string kLayerNormFP32Fallback= "ORT_TENSORRT_LAYER_NORM_FP32_FALLBACK";
+static const std::string kForceSequentialEngineBuild = "ORT_TENSORRT_FORCE_SEQUENTIAL_ENGINE_BUILD";
+static const std::string kContextMemorySharingEnable = "ORT_TENSORRT_CONTEXT_MEMORY_SHARING_ENABLE";
+static const std::string kLayerNormFP32Fallback = "ORT_TENSORRT_LAYER_NORM_FP32_FALLBACK";
 // Old env variable for backward compatibility
 static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH";
 }  // namespace tensorrt_env_vars
@@ -193,7 +193,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
 
   /**Get IndexedSubGraph based on node list of the subgraph*/
   std::unique_ptr<IndexedSubGraph> GetSubGraph(SubGraph_t graph_nodes_index,
-                                               const GraphViewer& graph) const;
+                                               const GraphViewer& graph, const HashValue& model_hash, int subgraph_index) const;
 
   /**
   Get TensorRT supported node lists by calling Onnx-TensorRT parser recursively. Since each time the parser
@@ -205,7 +205,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
   SubGraphCollection_t GetSupportedList(SubGraphCollection_t supported_nodes_list, int iterations, const int max_iterations,
                                         const GraphViewer& graph, bool* early_termination) const;
 
-  bool DetectTensorRTGraphCycles(SubGraphCollection_t& supported_nodes_vector, const GraphViewer& graph, bool remove_cycles = true) const;
+  bool DetectTensorRTGraphCycles(SubGraphCollection_t& supported_nodes_vector, const GraphViewer& graph, const HashValue& model_hash, bool remove_cycles = true) const;
 
   /**
   Get a unique_lock object to control the concurrency behavior.
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
index a04b971c6a..7edeb80248 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
@@ -28,23 +28,23 @@ float ConvertSinglePrecisionIEEE754ToFloat(unsigned long input) {
 }
 
 /*
-* Read calibration table for INT8 quantization
-* Two kind of calibration tables are supported,
-* 1. ORT generated calibration table
-* The table is pre-serialized by flatbuffers.
-* Each entry in the table is a key-value pair,
-* key: tensor name, value: maximum absolute value in floating point
-* For example,
-*   data_0 2.008338
-*   ...
-* 2. Native TensorRT generated calibration table
-* Data format is defined by TensorRT as,
-* tensor name : scale in 32-bit single precision IEEE754 format
-* For example,
-*   TRT-7103-EntropyCalibration2
-*   data_0: 4000889d
-*   ...
-*/
+ * Read calibration table for INT8 quantization
+ * Two kind of calibration tables are supported,
+ * 1. ORT generated calibration table
+ * The table is pre-serialized by flatbuffers.
+ * Each entry in the table is a key-value pair,
+ * key: tensor name, value: maximum absolute value in floating point
+ * For example,
+ *   data_0 2.008338
+ *   ...
+ * 2. Native TensorRT generated calibration table
+ * Data format is defined by TensorRT as,
+ * tensor name : scale in 32-bit single precision IEEE754 format
+ * For example,
+ *   TRT-7103-EntropyCalibration2
+ *   data_0: 4000889d
+ *   ...
+ */
 bool ReadDynamicRange(const std::string file_name, const bool is_trt_calibration_table, std::unordered_map<std::string, float>& dynamic_range_map) {
   std::ifstream infile(file_name, std::ios::binary | std::ios::in);
   if (!infile) {
@@ -95,13 +95,13 @@ bool ReadDynamicRange(const std::string file_name, const bool is_trt_calibration
 }
 
 /*
-* Seralize engine profile
-* The profile contains min/max shape ranges of dynamic shape dimensions of each input tensor
-* For example, assume tensor_a has two dynamic shape dimensions: dim_0 and dim_2, and tensor_b
-* has one dynamic shape dimension: dim_1. The data in profile will be,
-* key: tensor_a, value: dim_0 min_shape max_shape dim_2 min_shape max_shape
-* key: tensor_b, value: dim_1 min_shape max_shape
-*/
+ * Seralize engine profile
+ * The profile contains min/max shape ranges of dynamic shape dimensions of each input tensor
+ * For example, assume tensor_a has two dynamic shape dimensions: dim_0 and dim_2, and tensor_b
+ * has one dynamic shape dimension: dim_1. The data in profile will be,
+ * key: tensor_a, value: dim_0 min_shape max_shape dim_2 min_shape max_shape
+ * key: tensor_b, value: dim_1 min_shape max_shape
+ */
 void SerializeProfile(const std::string& file_name, std::unordered_map<std::string, std::unordered_map<size_t, std::pair<int64_t, int64_t>>>& shape_ranges) {
   // Serialize profile
   flexbuffers::Builder builder;
@@ -170,15 +170,15 @@ std::string GetCachePath(const std::string& root, const std::string& name) {
 /*
  * Get cache by type
  *
- * \param root root path of the cache  
+ * \param root root path of the cache
  * \param file_extension It could be ".engine", ".profile" or ".timing"
-*/
+ */
 std::vector<fs::path> GetCachesByType(const std::string& root, std::string file_extension) {
   std::vector<fs::path> cache_files;
-  for (const auto & entry : fs::directory_iterator(root)) {
-      if (fs::path(file_extension) == fs::path(entry).extension()) {
-        cache_files.push_back(fs::path(entry));
-      }
+  for (const auto& entry : fs::directory_iterator(root)) {
+    if (fs::path(file_extension) == fs::path(entry).extension()) {
+      cache_files.push_back(fs::path(entry));
+    }
   }
   return cache_files;
 }
@@ -186,118 +186,106 @@ std::vector<fs::path> GetCachesByType(const std::string& root, std::string file_
 bool IsCacheExistedByType(const std::string& root, std::string file_extension) {
   auto cache_files = GetCachesByType(root, file_extension);
   if (cache_files.size() == 0) {
-          return false;
+    return false;
   }
   return true;
 }
 
 void RemoveCachesByType(const std::string& root, std::string file_extension) {
   auto cache_files = GetCachesByType(root, file_extension);
-  for (const auto & entry : cache_files) {
+  for (const auto& entry : cache_files) {
     fs::remove(entry);
   }
 }
 
 // Helper class to generate engine id via model name/model content/env metadata
-class TRTModelIdGenerator {
- public:
-  int TRTGenerateId(const GraphViewer& graph_viewer, HashValue& model_hash) {
-    model_hash = 0;
+HashValue TRTGenerateId(const GraphViewer& graph_viewer) {
+  HashValue model_hash = 0;
 
-    // find the top level graph
-    const Graph* cur_graph = &graph_viewer.GetGraph();
-    while (cur_graph->IsSubgraph()) {
-      cur_graph = cur_graph->ParentGraph();
-    }
+  // find the top level graph
+  const Graph* cur_graph = &graph_viewer.GetGraph();
+  while (cur_graph->IsSubgraph()) {
+    cur_graph = cur_graph->ParentGraph();
+  }
 
-    const Graph& main_graph = *cur_graph;
-    uint32_t hash[4] = {0, 0, 0, 0};
+  const Graph& main_graph = *cur_graph;
+  uint32_t hash[4] = {0, 0, 0, 0};
 
-    auto hash_str = [&hash](const std::string& str) {
-      MurmurHash3::x86_128(str.data(), gsl::narrow_cast<int32_t>(str.size()), hash[0], &hash);
-    };
+  auto hash_str = [&hash](const std::string& str) {
+    MurmurHash3::x86_128(str.data(), gsl::narrow_cast<int32_t>(str.size()), hash[0], &hash);
+  };
 
-    // Use model name instead of path to avoid cache regeneration if path changes
-    const auto& model_path = main_graph.ModelPath();
-    if (!model_path.IsEmpty()) {
-      // Get model name
-      PathString path_string = model_path.GetComponents().back();
-      char arr[256];
+  // Use model name instead of path to avoid cache regeneration if path changes
+  const auto& model_path = main_graph.ModelPath();
+  if (!model_path.IsEmpty()) {
+    // Get model name
+    PathString path_string = model_path.GetComponents().back();
+    char arr[256];
 #ifdef _WIN32
-      wcstombs_s(nullptr, arr, sizeof(arr), path_string.c_str(), sizeof(arr));
+    wcstombs_s(nullptr, arr, sizeof(arr), path_string.c_str(), sizeof(arr));
 #else
-      strcpy(arr, path_string.c_str());
+    strcpy(arr, path_string.c_str());
 #endif
-      std::string model_name(arr);
-      LOGS_DEFAULT(INFO) << "[TensorRT EP] Model name is " << model_name;
-      // Ensure enough characters are hashed in case model names are too short
-      int32_t model_name_length = gsl::narrow_cast<int32_t>(model_name.size());
-      constexpr int32_t hash_string_length = 500;
-      std::string repeat_model_name = model_name;
-      for (int i = model_name_length; i > 0 && i < hash_string_length; i += model_name_length) {
-        repeat_model_name += model_name;
-      }
-      hash_str(repeat_model_name);
-    } else {
-      LOGS_DEFAULT(INFO) << "[TensorRT EP] Model path is empty";
+    std::string model_name(arr);
+    LOGS_DEFAULT(INFO) << "[TensorRT EP] Model name is " << model_name;
+    // Ensure enough characters are hashed in case model names are too short
+    int32_t model_name_length = gsl::narrow_cast<int32_t>(model_name.size());
+    constexpr int32_t hash_string_length = 500;
+    std::string repeat_model_name = model_name;
+    for (int i = model_name_length; i > 0 && i < hash_string_length; i += model_name_length) {
+      repeat_model_name += model_name;
     }
+    hash_str(repeat_model_name);
+  } else {
+    LOGS_DEFAULT(INFO) << "[TensorRT EP] Model path is empty";
+  }
 
-    // fingerprint the main graph by hashing graph inputs
-    for (const auto* node_arg : main_graph.GetInputsIncludingInitializers()) {
-      hash_str(node_arg->Name());
-    }
+  // fingerprint the main graph by hashing graph inputs
+  for (const auto* node_arg : main_graph.GetInputsIncludingInitializers()) {
+    hash_str(node_arg->Name());
+  }
 
-    // hashing output of each node
-    const int number_of_ort_nodes = graph_viewer.NumberOfNodes();
-    std::vector<size_t> nodes_vector(number_of_ort_nodes);
-    std::iota(std::begin(nodes_vector), std::end(nodes_vector), 0);
-    const std::vector<NodeIndex>& node_index = graph_viewer.GetNodesInTopologicalOrder();
-    for (const auto& index : nodes_vector) {
-      const auto& node = graph_viewer.GetNode(node_index[index]);
-      for (const auto* node_arg : node->OutputDefs()) {
-        if (node_arg->Exists()) {
-          hash_str(node_arg->Name());
-        }
+  // fingerprint current graph by hashing graph inputs
+  for (const auto* node_arg : graph_viewer.GetInputsIncludingInitializers()) {
+    hash_str(node_arg->Name());
+  }
+
+  // hashing output of each node
+  const int number_of_ort_nodes = graph_viewer.NumberOfNodes();
+  std::vector<size_t> nodes_vector(number_of_ort_nodes);
+  std::iota(std::begin(nodes_vector), std::end(nodes_vector), 0);
+  const std::vector<NodeIndex>& node_index = graph_viewer.GetNodesInTopologicalOrder();
+  for (const auto& index : nodes_vector) {
+    const auto& node = graph_viewer.GetNode(node_index[index]);
+    for (const auto* node_arg : node->OutputDefs()) {
+      if (node_arg->Exists()) {
+        hash_str(node_arg->Name());
       }
     }
+  }
 
 #ifdef __linux__
-    hash_str("LINUX");
+  hash_str("LINUX");
 #elif defined(_WIN32)
-    hash_str("WINDOWS");
+  hash_str("WINDOWS");
 #endif
 
 #ifdef ORT_VERSION
-    hash_str(ORT_VERSION);
+  hash_str(ORT_VERSION);
 #endif
 
 #ifdef CUDA_VERSION
-    hash_str(std::to_string(CUDA_VERSION));
+  hash_str(std::to_string(CUDA_VERSION));
 #endif
 
 #if defined(NV_TENSORRT_MAJOR) && defined(NV_TENSORRT_MINOR)
-    std::string TRT_VERSION = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR);
-    hash_str(TRT_VERSION);
+  std::string TRT_VERSION = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR);
+  hash_str(TRT_VERSION);
 #endif
 
-    model_hash = hash[0] | (uint64_t(hash[1]) << 32);
+  model_hash = hash[0] | (uint64_t(hash[1]) << 32);
 
-    // return the current unique id, and increment to update
-    return trt_model_id_[model_hash]++;
-  }
-
- private:
-  std::unordered_map<HashValue, int> trt_model_id_;       // current unique id for model
-};
-
-std::unique_ptr<TRTModelIdGenerator> trt_model_id_generator_ = std::make_unique<TRTModelIdGenerator>();
-
-// Calll TRTGenerateModelId to generate hash id for TRT engine cache
-int TRTGenerateModelId(const GraphViewer& graph_viewer, HashValue& model_hash) {
-  // if the EP is shared across multiple sessions there's a very small potential for concurrency issues.
-  // use a lock when generating an id to be paranoid
-  static OrtMutex mutex;
-  std::lock_guard<OrtMutex> lock(mutex);
-  return trt_model_id_generator_->TRTGenerateId(graph_viewer, model_hash);
-}
+  // return the current unique id
+  return model_hash;
 }
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 86024b9646..f74ecd9213 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -287,9 +287,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
   GraphViewer viewer(graph);
 
   // get the hash for the model when loaded from file
-  HashValue model_hash;
-  int id = TRTGenerateModelId(viewer, model_hash);
-  ASSERT_EQ(id, 0);
+  HashValue model_hash = TRTGenerateId(viewer);
   ASSERT_NE(model_hash, 0);
 
   // now load the model from bytes and check the hash differs
@@ -301,14 +299,11 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
   ASSERT_STATUS_OK(Model::Load(std::move(model_proto), PathString(), model2, nullptr,
                                DefaultLoggingManager().DefaultLogger()));
 
+  // Test loading same model from file and byte steam. Hash values should be different
   Graph& graph2 = model2->MainGraph();
   GraphViewer viewer2(graph2);
-
-  HashValue model_hash2;
-  int id2 = TRTGenerateModelId(viewer2, model_hash2);
-
-  // test comparing model 1 & 2
-  ASSERT_EQ(id2, 0) << "id2 should be 0";
+  HashValue model_hash2= TRTGenerateId(viewer2);
+  ASSERT_NE(model_hash, model_hash2);
 
   // Test loading same model from different path, see if hash values are same as well
   model_path = ORT_TSTR("testdata/TRTEP_test_model/mnist.onnx");
@@ -316,42 +311,8 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
   ASSERT_TRUE(Model::Load(model_path, model3, nullptr, DefaultLoggingManager().DefaultLogger()).IsOK());
   Graph& graph3 = model3->MainGraph();
   GraphViewer viewer3(graph3);
-  HashValue model_hash3;
-  int id3 = TRTGenerateModelId(viewer3, model_hash3);
+  HashValue model_hash3 = TRTGenerateId(viewer3);
   ASSERT_EQ(model_hash, model_hash3) << "model 1&3 are same models and they have same hash, no matter where they are loaded";
-  ASSERT_EQ(id3, 1) << "id3 should be 1 as model 1 & 3 have same hash";
-}
-
-// Compare on TRT subgraph id when repeatedly calling TRTGenerateModelId
-TEST(TensorrtExecutionProviderTest, TRTSubgraphIdGeneratorUsingModelHashing) {
-  // Load model
-  auto model_path = ORT_TSTR("testdata/mnist.onnx");
-  std::shared_ptr<Model> model;
-  ASSERT_TRUE(Model::Load(model_path, model, nullptr, DefaultLoggingManager().DefaultLogger()).IsOK());
-
-  Graph& main_graph = model->MainGraph();
-  GraphViewer graph(main_graph);
-  HashValue model_hash;
-
-  // Graph id acquired
-  int graph_id = TRTGenerateModelId(graph, model_hash);
-  int asserted_subgraph_id = graph_id + 1;
-
-  // mock fetching subgraphs and generate id by calling TRTGenerateModelId repeatedly
-  const int number_of_ort_nodes = graph.NumberOfNodes();
-  std::vector<size_t> nodes_vector(number_of_ort_nodes);
-  std::iota(std::begin(nodes_vector), std::end(nodes_vector), 0);
-  const std::vector<NodeIndex>& node_index = graph.GetNodesInTopologicalOrder();
-
-  for (const auto& index : nodes_vector) {
-    const auto& node = graph.GetNode(node_index[index]);
-    std::cout << "->" << node->Name(); 
-
-    // Check if id increment each time TRTGenerateModelId is called
-    int subgraph_id = TRTGenerateModelId(graph, model_hash);
-    ASSERT_EQ(subgraph_id, asserted_subgraph_id) << "id will increment as TRTGenerateModelId is repeatedly called";
-    asserted_subgraph_id++;
-  }
 }
 
 TEST_P(TensorrtExecutionProviderCacheTest, Run) {