// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include #include #include "asserts.h" #include "core/framework/allocator_utils.h" #include "core/framework/execution_providers.h" #include "core/framework/graph_partitioner.h" #include "core/framework/kernel_registry.h" #include "core/framework/op_kernel.h" #include "core/framework/bfc_arena.h" #include "core/framework/session_state.h" #include "core/graph/graph_utils.h" #include "core/graph/graph_viewer.h" #include "core/graph/model.h" #include "core/graph/model_saving_options.h" #include "core/graph/op.h" #include "core/providers/cpu/cpu_execution_provider.h" #include "core/session/onnxruntime_session_options_config_keys.h" #include "core/util/thread_utils.h" #include "gtest/gtest.h" #include "test/test_environment.h" #include "test/util/include/default_providers.h" #include "test/util/include/file_util.h" #include "core/optimizer/layout_transformation/layout_transformation.h" using namespace ONNX_NAMESPACE; namespace onnxruntime { namespace test { #ifndef ENABLE_TRAINING_CORE #ifndef __wasm__ static void TestSavedPrepacks(const Model& model) { auto inspect = [](const Graph& graph) { const auto& prepacked_for_graph = graph.GetPrepacked(); const auto& key_to_blob = prepacked_for_graph.GetKeyToBlob(); ASSERT_EQ(1U, key_to_blob.size()); const size_t expected_prepacks_for_writing = (graph.ParentGraph() == nullptr) ? 1U : 0U; ASSERT_EQ(expected_prepacks_for_writing, prepacked_for_graph.GetNumberOfWeightsForWriting()); const size_t expected_blobs_for_writing = (graph.ParentGraph() == nullptr) ? 1U : 0U; ASSERT_EQ(expected_blobs_for_writing, prepacked_for_graph.GetNumberOfKeyedBlobsForWriting()); if (graph.ParentGraph() == nullptr) { const auto* blob_keys = prepacked_for_graph.GetKeysForWeightForSaving("if_shared"); ASSERT_TRUE(blob_keys != nullptr); ASSERT_EQ(blob_keys->size(), 1U); const auto* prepacked_weights = prepacked_for_graph.GetPrepackedWeights(*blob_keys->cbegin()); ASSERT_TRUE(prepacked_weights != nullptr); ASSERT_EQ(prepacked_weights->buffer_sizes_.size(), 1U); ASSERT_EQ(prepacked_weights->buffer_sizes_[0], sizeof(float) * 2); } }; const auto& main_graph = model.MainGraph(); inspect(main_graph); const auto& nodes = main_graph.Nodes(); auto if_node_hit = std::find_if(nodes.begin(), nodes.end(), [](const Node& node) { return node.Name() == "if"; }); ASSERT_FALSE(if_node_hit == nodes.end()); const Node& if_node = *if_node_hit; for (const auto& [_, subgraph] : if_node.GetAttributeNameToSubgraphMap()) { inspect(*subgraph); } } static void TestLoadedSharedUserSupplied(const Model& model) { auto inspect = [](const Graph& graph) { const auto& prepacked_for_graph = graph.GetPrepacked(); constexpr size_t expected_prepacks_for_writing = 0U; ASSERT_EQ(expected_prepacks_for_writing, prepacked_for_graph.GetNumberOfWeightsForWriting()); // We have not loaded anything since this initializer is user supplied const auto& key_to_blob = prepacked_for_graph.GetKeyToBlob(); ASSERT_EQ(0U, key_to_blob.size()); }; const auto& main_graph = model.MainGraph(); inspect(main_graph); const auto& nodes = main_graph.Nodes(); auto if_node_hit = std::find_if(nodes.begin(), nodes.end(), [](const Node& node) { return node.Name() == "if"; }); ASSERT_FALSE(if_node_hit == nodes.end()); const Node& if_node = *if_node_hit; for (const auto& [_, subgraph] : if_node.GetAttributeNameToSubgraphMap()) { inspect(*subgraph); } } static void TestLoadedSharedNoUserSupplied(const Model& model) { auto inspect = [](const Graph& graph) { const auto& prepacked_for_graph = graph.GetPrepacked(); constexpr size_t expected_prepacks_for_writing = 0U; ASSERT_EQ(expected_prepacks_for_writing, prepacked_for_graph.GetNumberOfWeightsForWriting()); // We have not loaded anything since this initializer is user supplied const auto& key_to_blob = prepacked_for_graph.GetKeyToBlob(); ASSERT_EQ(1U, key_to_blob.size()); }; const auto& main_graph = model.MainGraph(); inspect(main_graph); const auto& nodes = main_graph.Nodes(); auto if_node_hit = std::find_if(nodes.begin(), nodes.end(), [](const Node& node) { return node.Name() == "if"; }); ASSERT_FALSE(if_node_hit == nodes.end()); const Node& if_node = *if_node_hit; for (const auto& [_, subgraph] : if_node.GetAttributeNameToSubgraphMap()) { inspect(*subgraph); } } #endif // __wasm__ #endif // ENABLE_TRAINING_CORE class TestOpKernel : public OpKernel { public: TestOpKernel(const OpKernelInfo& p) : OpKernel(p) { } Status Compute(OpKernelContext* context) const override { ORT_UNUSED_PARAMETER(context); return Status::OK(); } Status ComputeAsync(OpKernelContext* context, DoneCallback done) const override { ORT_UNUSED_PARAMETER(context); ORT_UNUSED_PARAMETER(done); return Status::OK(); } }; class SessionStateAddGetKernelTest : public testing::TestWithParam {}; TEST_P(SessionStateAddGetKernelTest, AddGetKernelTest) { OrtThreadPoolParams to; to.thread_pool_size = GetParam(); auto tp = concurrency::CreateThreadPool(&onnxruntime::Env::Default(), to, concurrency::ThreadPoolType::INTRA_OP); ONNX_OPERATOR_SCHEMA(Variable) .SetDoc("Input variable.") .Output(0, "output_1", "docstr for output_1.", "tensor(int32)"); onnxruntime::Model model("graph_1", false, DefaultLoggingManager().DefaultLogger()); auto& graph = model.MainGraph(); ExecutionProviders execution_providers; auto tmp_cpu_execution_provider = std::make_unique(CPUExecutionProviderInfo(false)); auto* cpu_execution_provider = tmp_cpu_execution_provider.get(); ASSERT_STATUS_OK(execution_providers.Add(kCpuExecutionProvider, std::move(tmp_cpu_execution_provider))); DataTransferManager dtm; ExternalDataLoaderManager edlm; profiling::Profiler profiler; SessionOptions sess_options; sess_options.enable_mem_pattern = true; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; SessionState s(graph, execution_providers, tp.get(), nullptr, dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options); std::vector inputs; std::vector outputs; TypeProto output_type; output_type.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT32); output_type.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(1); onnxruntime::NodeArg output_arg("node_1_out_1", &output_type); outputs.push_back(&output_arg); onnxruntime::Node& node = graph.AddNode("node_1", "Variable", "node 1.", inputs, outputs); auto status = graph.Resolve(); ASSERT_TRUE(status.IsOK()); auto kernel_def = KernelDefBuilder().SetName("Variable").Provider(kCpuExecutionProvider).SinceVersion(1, 10).Build(); OpKernelInfo p_info(node, *kernel_def, *cpu_execution_provider, s.GetConstantInitializedTensors(), s.GetOrtValueNameIdxMap(), s.GetDataTransferMgr(), s.GetAllocators(), s.GetSessionOptions().config_options); std::unique_ptr p_kernel = std::make_unique(p_info); size_t orig_num_outputs = p_kernel->Node().OutputDefs().size(); std::cout << "node_idx: " << node.Index() << std::endl; KernelRegistryManager kernel_registry_manager; status = kernel_registry_manager.RegisterKernels(execution_providers); ASSERT_TRUE(status.IsOK()) << status.ErrorMessage(); node.SetExecutionProviderType(kCpuExecutionProvider); std::shared_ptr kernel_registry = std::make_shared(); ASSERT_STATUS_OK(kernel_registry->Register( KernelCreateInfo(std::move(kernel_def), [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique(info); return Status::OK(); }))); kernel_registry_manager.RegisterKernelRegistry(kernel_registry); ASSERT_STATUS_OK(s.FinalizeSessionState(ORT_TSTR(""), kernel_registry_manager)); auto test_kernel = s.GetKernel(node.Index()); std::cout << "orig: " << orig_num_outputs << " new: " << test_kernel->Node().OutputDefs().size() << std::endl; EXPECT_EQ(orig_num_outputs, test_kernel->Node().OutputDefs().size()); } INSTANTIATE_TEST_SUITE_P(SessionStateTests, SessionStateAddGetKernelTest, testing::Values(0, 1)); class TestParam { public: int ir_version; bool enable_mem_pattern; int thread_count; }; TestParam param_list[] = { {3, true, 0}, {4, true, 0}, {3, false, 0}, {4, false, 0}, {3, true, 1}, {4, true, 1}, {3, false, 1}, {4, false, 1}}; class SessionStateTestP : public testing::TestWithParam {}; // Test that we separate out constant and non-constant initializers correctly TEST_P(SessionStateTestP, TestInitializerProcessing) { const TestParam& param = GetParam(); OrtThreadPoolParams to; to.thread_pool_size = to.thread_pool_size; auto tp = concurrency::CreateThreadPool(&onnxruntime::Env::Default(), to, concurrency::ThreadPoolType::INTRA_OP); std::basic_ostringstream oss; oss << ORT_TSTR("testdata/optional_inputs_ir") << param.ir_version << ORT_TSTR(".onnx"); Status status; std::shared_ptr model; ASSERT_TRUE((status = Model::Load(oss.str(), model, nullptr, DefaultLoggingManager().DefaultLogger())).IsOK()) << status; Graph& graph = model->MainGraph(); // take a copy as this gets cleared during session state initialization InitializedTensorSet initializers = graph.GetAllInitializedTensors(); ExecutionProviders execution_providers; CPUExecutionProviderInfo epi{false}; status = execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(epi)); ASSERT_TRUE(status.IsOK()) << status; KernelRegistryManager krm; status = krm.RegisterKernels(execution_providers); ASSERT_TRUE(status.IsOK()) << status; DataTransferManager dtm; ExternalDataLoaderManager edlm; profiling::Profiler profiler; SessionOptions sess_options; sess_options.enable_mem_pattern = param.enable_mem_pattern; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; SessionState session_state(graph, execution_providers, tp.get(), nullptr, dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options); GraphPartitioner partitioner(krm, execution_providers); ASSERT_STATUS_OK( partitioner.Partition( graph, session_state.GetMutableFuncMgr(), [](Graph& graph, bool& modified, const IExecutionProvider& execution_provider, const layout_transformation::DebugGraphFn& debug_graph_fn) -> Status { AllocatorPtr cpu_allocator = std::make_shared(); return layout_transformation::TransformLayoutForEP( graph, modified, execution_provider, std::move(cpu_allocator), debug_graph_fn); }, sess_options.config_options, DefaultLoggingManager().DefaultLogger())); ASSERT_STATUS_OK(session_state.FinalizeSessionState(oss.str(), krm)); const auto& initialized_tensors = session_state.GetInitializedTensors(); const auto& const_initialized_tensors = session_state.GetConstantInitializedTensors(); ASSERT_EQ(initializers.size(), initialized_tensors.size()) << "SessionState should have an entry for all initializers in Graph."; if (param.ir_version < 4) { ASSERT_EQ(initialized_tensors.size(), const_initialized_tensors.size()) << "All initializers should be considered constant if IR version < 4."; } else { const auto& name_to_idx = session_state.GetOrtValueNameIdxMap(); for (const auto& entry : initializers) { int idx; ASSERT_STATUS_OK(name_to_idx.GetIdx(entry.first, idx)); bool found = initialized_tensors.find(idx) != initialized_tensors.cend(); ASSERT_TRUE(found) << "Missing entry for " << entry.first << " in session state initialized tensors"; if (graph_utils::IsConstantInitializer(graph, entry.first, false)) { found = const_initialized_tensors.find(idx) != const_initialized_tensors.cend(); ASSERT_TRUE(found) << "Missing entry for " << entry.first << " in session state const initialized tensors"; } } } } // Test that we allocate memory for an initializer from non-arena memory even if we provide an arena-based allocator // if the relevant session option config flag is set TEST(SessionStateTest, TestInitializerMemoryAllocatedUsingNonArenaMemory) { // For this test we need to enable the arena-based allocator. if (!DoesCpuAllocatorSupportArenaUsage()) { GTEST_SKIP() << "CPU allocator does not support arena usage."; } AllocatorPtr cpu_allocator = std::make_shared(); // Part 1: Feature turned ON (i.e.) allocate from non-arena memory { std::basic_ostringstream oss; oss << ORT_TSTR("testdata/mul_1.onnx"); Status status; std::shared_ptr model; ASSERT_TRUE((status = Model::Load(oss.str(), model, nullptr, DefaultLoggingManager().DefaultLogger())).IsOK()) << status; Graph& graph = model->MainGraph(); ExecutionProviders execution_providers; CPUExecutionProviderInfo epi{true}; // use an arena-based allocator for this EP status = execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(epi)); ASSERT_TRUE(status.IsOK()) << status; KernelRegistryManager krm; status = krm.RegisterKernels(execution_providers); ASSERT_TRUE(status.IsOK()) << status; DataTransferManager dtm; ExternalDataLoaderManager edlm; profiling::Profiler profiler; SessionOptions sess_options; sess_options.enable_mem_pattern = false; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; // disable allocating initialized tensor memory from the arena(by default it will be allocated by the arena) ASSERT_STATUS_OK(sess_options.config_options.AddConfigEntry(kOrtSessionOptionsUseDeviceAllocatorForInitializers, "1")); SessionState session_state(graph, execution_providers, nullptr, nullptr, dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options); // Partition the graph GraphPartitioner partitioner(krm, execution_providers); ASSERT_STATUS_OK(partitioner.Partition( graph, session_state.GetMutableFuncMgr(), [&cpu_allocator](Graph& graph, bool& modified, const IExecutionProvider& execution_provider, const layout_transformation::DebugGraphFn& debug_graph_fn) -> Status { return layout_transformation::TransformLayoutForEP(graph, modified, execution_provider, cpu_allocator, debug_graph_fn); }, sess_options.config_options, DefaultLoggingManager().DefaultLogger())); ASSERT_STATUS_OK(session_state.FinalizeSessionState(oss.str(), krm)); // Fetch the CPU arena-allocator from the session state OrtMemoryInfo mem_info(CPU, OrtArenaAllocator); AllocatorPtr alloc = session_state.GetAllocator(mem_info); ASSERT_TRUE(alloc != nullptr); // Get stats for the CPU arena-based allocator AllocatorStats alloc_stats; static_cast(alloc.get())->GetStats(&alloc_stats); // Assert that we have made 1 Reserve() call (for allocating memory for the sole initializer in the model) ASSERT_EQ(alloc_stats.num_reserves, 1); } // Part 2: Feature turned OFF (i.e.) allocate from arena memory (default behavior) { std::basic_ostringstream oss; oss << ORT_TSTR("testdata/mul_1.onnx"); Status status; std::shared_ptr model; ASSERT_TRUE((status = Model::Load(oss.str(), model, nullptr, DefaultLoggingManager().DefaultLogger())).IsOK()) << status; Graph& graph = model->MainGraph(); ExecutionProviders execution_providers; CPUExecutionProviderInfo epi{true}; // use an arena-based allocator for this EP status = execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(epi)); ASSERT_TRUE(status.IsOK()) << status; KernelRegistryManager krm; status = krm.RegisterKernels(execution_providers); ASSERT_TRUE(status.IsOK()) << status; DataTransferManager dtm; ExternalDataLoaderManager edlm; profiling::Profiler profiler; SessionOptions sess_options; sess_options.enable_mem_pattern = false; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; SessionState session_state(graph, execution_providers, nullptr, nullptr, dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options); // Partition the graph GraphPartitioner partitioner(krm, execution_providers); ASSERT_STATUS_OK(partitioner.Partition( graph, session_state.GetMutableFuncMgr(), [&cpu_allocator](Graph& graph, bool& modified, const IExecutionProvider& execution_provider, const layout_transformation::DebugGraphFn& debug_graph_fn) -> Status { return layout_transformation::TransformLayoutForEP( graph, modified, execution_provider, cpu_allocator, debug_graph_fn); }, sess_options.config_options, DefaultLoggingManager().DefaultLogger())); // Finalize the session state ASSERT_STATUS_OK(session_state.FinalizeSessionState(oss.str(), krm)); // Fetch the CPU arena-allocator from the session state OrtMemoryInfo mem_info(CPU, OrtArenaAllocator); AllocatorPtr alloc = session_state.GetAllocator(mem_info); ASSERT_TRUE(alloc != nullptr); // Get stats for the CPU arena-based allocator AllocatorStats alloc_stats; static_cast(alloc.get())->GetStats(&alloc_stats); // Assert that we have made no Reserve() calls ASSERT_EQ(alloc_stats.num_reserves, 0); // Assert to ensure an allocation was made for the initializer through the arena allocator (Alloc() was invoked) ASSERT_EQ(alloc_stats.num_allocs, 1); } } INSTANTIATE_TEST_SUITE_P(SessionStateTests, SessionStateTestP, testing::ValuesIn(param_list)); #ifndef ENABLE_TRAINING_CORE class PrePackingTestOpKernel : public OpKernel { public: PrePackingTestOpKernel(const OpKernelInfo& info) : OpKernel(info) {} Status Compute(OpKernelContext* context) const override { ORT_UNUSED_PARAMETER(context); return Status::OK(); } Status UseSharedPrePackedBuffers(std::vector& prepacked_buffers, int input_idx, /*out*/ bool& used_shared_buffers) override { ORT_UNUSED_PARAMETER(input_idx); weight_packed_ = std::move(prepacked_buffers[0]); used_shared_buffers = true; ++store_pre_packed_weight_calls_count; return Status::OK(); } Status PrePack(const Tensor& tensor, int input_idx, AllocatorPtr alloc, /*out*/ bool& is_packed, /*out*/ PrePackedWeights* prepacked_weights) override { ORT_UNUSED_PARAMETER(tensor); ORT_UNUSED_PARAMETER(input_idx); constexpr const size_t weight_packed_len = sizeof(float) * 2; weight_packed_ = IAllocator::MakeUniquePtr(alloc, weight_packed_len, true); float* data_weights_packed = reinterpret_cast(weight_packed_.get()); data_weights_packed[0] = 1.2345f; data_weights_packed[1] = data_weights_packed[0] * 2.f; if (prepacked_weights != nullptr) { prepacked_weights->buffers_.push_back(std::move(weight_packed_)); prepacked_weights->buffer_sizes_.push_back(weight_packed_len); } is_packed = true; ++prepack_calls_count; return Status::OK(); } int prepack_calls_count = 0; int store_pre_packed_weight_calls_count = 0; IAllocatorUniquePtr weight_packed_; }; static void CreateSimpleGraph(Graph& graph) { // node creation and placement TypeProto type; type.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); type.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(1); std::vector inputs; onnxruntime::NodeArg input_0_arg("node_0_input_0", &type); onnxruntime::NodeArg input_1_arg("node_0_input_1", &type); inputs.push_back(&input_0_arg); inputs.push_back(&input_1_arg); std::vector outputs; onnxruntime::NodeArg output_arg("node_0_output_0", &type); outputs.push_back(&output_arg); graph.AddNode("node_0", "PrePackingTest", "node 0", inputs, outputs); // add an initializer ONNX_NAMESPACE::TensorProto tensor; tensor.add_dims(1); tensor.add_float_data(1.0f); tensor.set_data_type(TensorProto_DataType_FLOAT); tensor.set_name("node_0_input_1"); graph.AddInitializedTensor(tensor); auto status = graph.Resolve(); ASSERT_TRUE(status.IsOK()); } static const ONNX_NAMESPACE::GraphProto CreateSubgraph(bool then_branch) { Model model(then_branch ? "If_then" : "If_else", false, DefaultLoggingManager().DefaultLogger()); auto& graph = model.MainGraph(); std::vector inputs; std::vector outputs; const std::string suffix = then_branch ? "0" : "1"; // graph input has to have type and rank even though it's an outer scope value. TypeProto type_float; type_float.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); type_float.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(1); // outer scope values auto& if_shared = graph.GetOrCreateNodeArg("if_shared", &type_float); auto& if_input = graph.GetOrCreateNodeArg("if_input_" + suffix, &type_float); // add so that we don't end up with it being considered a graph input graph.AddOuterScopeNodeArg("if_shared"); graph.AddOuterScopeNodeArg("if_input_" + suffix); auto& if_out = graph.GetOrCreateNodeArg("if_output_" + suffix, &type_float); inputs = {&if_shared, &if_input}; outputs = {&if_out}; graph.AddNode("if_node_" + suffix, "PrePackingTest", "if node " + suffix, inputs, outputs); auto status = graph.Resolve(); EXPECT_EQ(status, Status::OK()); auto& proto = graph.ToGraphProto(); return proto; } static void CreateGraphWithSubgraph(Graph& graph) { TypeProto type_float; type_float.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); type_float.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(1); { std::vector inputs; onnxruntime::NodeArg input_0_arg("if_input_0", &type_float); onnxruntime::NodeArg input_1_arg("if_input_1", &type_float); inputs.push_back(&input_0_arg); inputs.push_back(&input_1_arg); std::vector outputs; onnxruntime::NodeArg output_arg("node_0_output_0", &type_float); outputs.push_back(&output_arg); graph.AddNode("node_0", "PrePackingTest", "node 0", inputs, outputs); } { TypeProto type_bool; type_bool.mutable_tensor_type()->set_elem_type(TensorProto_DataType_BOOL); type_bool.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(1); onnxruntime::NodeArg bool_arg("bool_arg", &type_bool); std::vector outputs; onnxruntime::NodeArg output_arg("output_arg", &type_float); outputs.push_back(&output_arg); auto& if_node = graph.AddNode("if", "If", "If node", {&bool_arg}, outputs); auto then_proto = CreateSubgraph(true); auto else_proto = CreateSubgraph(false); if_node.AddAttribute("then_branch", then_proto); if_node.AddAttribute("else_branch", else_proto); } // add an initializer ONNX_NAMESPACE::TensorProto tensor; tensor.add_dims(1); tensor.add_float_data(1.0f); tensor.set_data_type(TensorProto_DataType_FLOAT); tensor.set_name("if_shared"); graph.AddInitializedTensor(tensor); auto status = graph.Resolve(); ASSERT_TRUE(status.IsOK()); } static void PlaceAllNodesToCPUEP(Graph& graph) { for (auto& node : graph.Nodes()) { node.SetExecutionProviderType(kCpuExecutionProvider); if (node.ContainsSubgraph()) { for (auto& entry : node.GetAttributeNameToMutableSubgraphMap()) { Graph* subgraph = entry.second; PlaceAllNodesToCPUEP(*subgraph); } } } } struct PrepackingTestParam { bool test_subgraph; bool test_prepacking; }; class SessionStatePrepackingTest : public testing::TestWithParam {}; TEST_P(SessionStatePrepackingTest, PrePackingTest) { PrepackingTestParam test_param = GetParam(); OrtThreadPoolParams to; auto tp = concurrency::CreateThreadPool(&onnxruntime::Env::Default(), to, concurrency::ThreadPoolType::INTRA_OP); ONNX_OPERATOR_SCHEMA(PrePackingTest) .SetDoc("Faking Node for PrePacking") .Input(0, "Input_0", "input 0", "tensor(float)") .Input(1, "Input_1", "input 1", "tensor(float)") .Output(0, "output_0", "docstr for output_0.", "tensor(float)"); ExecutionProviders execution_providers; auto cpu_execution_provider = std::make_unique(CPUExecutionProviderInfo(false)); ASSERT_STATUS_OK(execution_providers.Add(kCpuExecutionProvider, std::move(cpu_execution_provider))); DataTransferManager dtm; ExternalDataLoaderManager edlm; profiling::Profiler profiler; std::unordered_map domain_to_version; domain_to_version[kOnnxDomain] = 11; Model model("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, std::vector(), DefaultLoggingManager().DefaultLogger()); // onnxruntime::Model model("graph_main", false, DefaultLoggingManager().DefaultLogger()); if (test_param.test_subgraph) { CreateGraphWithSubgraph(model.MainGraph()); } else { CreateSimpleGraph(model.MainGraph()); } SessionOptions sess_options; sess_options.enable_mem_pattern = true; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; sess_options.config_options.configurations[kOrtSessionOptionsConfigDisablePrepacking] = test_param.test_prepacking ? "0" : "1"; SessionState session_state(model.MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options); KernelRegistryManager kernel_registry_manager; Status status = kernel_registry_manager.RegisterKernels(execution_providers); ASSERT_TRUE(status.IsOK()) << status.ErrorMessage(); std::shared_ptr kernel_registry = std::make_shared(); auto kernel_def = KernelDefBuilder().SetName("PrePackingTest").Provider(kCpuExecutionProvider).SinceVersion(1).Build(); ASSERT_STATUS_OK(kernel_registry->Register( KernelCreateInfo(std::move(kernel_def), [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique(info); return Status::OK(); }))); kernel_registry_manager.RegisterKernelRegistry(kernel_registry); PlaceAllNodesToCPUEP(model.MainGraph()); ASSERT_STATUS_OK(session_state.FinalizeSessionState(std::basic_string(), kernel_registry_manager)); const auto& const_initialized_tensors = session_state.GetConstantInitializedTensors(); // check prepacking ASSERT_EQ(const_initialized_tensors.size(), size_t(test_param.test_prepacking ? 0 : 1)); } class SessionStateTestSharedInitalizersWithPrePacking : public ::testing::Test { protected: ExecutionProviders execution_providers; std::unordered_map domain_to_version; DataTransferManager dtm; ExternalDataLoaderManager edlm; profiling::Profiler profiler; KernelRegistryManager kernel_registry_manager; std::unique_ptr tp; void SetUp() override { OrtThreadPoolParams to; tp = concurrency::CreateThreadPool(&onnxruntime::Env::Default(), to, concurrency::ThreadPoolType::INTRA_OP); ONNX_OPERATOR_SCHEMA(PrePackingTest) .SetDoc("Faking Node for PrePacking") .Input(0, "Input_0", "input 0", "tensor(float)") .Input(1, "Input_1", "input 1", "tensor(float)") .Output(0, "output_0", "docstr for output_0.", "tensor(float)"); auto cpu_execution_provider = std::make_unique(CPUExecutionProviderInfo(false)); ASSERT_STATUS_OK(execution_providers.Add(kCpuExecutionProvider, std::move(cpu_execution_provider))); domain_to_version[kOnnxDomain] = 11; ASSERT_STATUS_OK(kernel_registry_manager.RegisterKernels(execution_providers)); std::shared_ptr kernel_registry = std::make_shared(); auto kernel_def = KernelDefBuilder() .SetName("PrePackingTest") .Provider(kCpuExecutionProvider) .SinceVersion(1) .Build(); ASSERT_STATUS_OK(kernel_registry->Register( KernelCreateInfo(std::move(kernel_def), [](FuncManager&, const OpKernelInfo& info, std::unique_ptr& out) -> Status { out = std::make_unique(info); return Status::OK(); }))); kernel_registry_manager.RegisterKernelRegistry(kernel_registry); } }; // Pre-packing enabled + no shared initializers, however, we put all the pre-packs // in a session_state container for ownership. TEST_F(SessionStateTestSharedInitalizersWithPrePacking, test1) { SessionOptions sess_options; sess_options.enable_mem_pattern = true; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; // Enable pre-packing sess_options.config_options.configurations[kOrtSessionOptionsConfigDisablePrepacking] = "0"; // First session/model Model model_1("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, std::vector(), DefaultLoggingManager().DefaultLogger()); CreateSimpleGraph(model_1.MainGraph()); PlaceAllNodesToCPUEP(model_1.MainGraph()); SessionState session_state_1(model_1.MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options); ASSERT_STATUS_OK(session_state_1.FinalizeSessionState(std::basic_string(), kernel_registry_manager)); const auto* kernel = reinterpret_cast(session_state_1.GetKernel(0)); // Assert that a pre-pack call was made. However, they sharing call is still made from a serialized container. ASSERT_EQ(session_state_1.GetNumberOfPrepacksCounter(), static_cast(1)); ASSERT_EQ(kernel->prepack_calls_count, 1); // In this case the sharing comes from the serialized container ASSERT_EQ(kernel->store_pre_packed_weight_calls_count, 1); // Second session/model Model model_2("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, std::vector(), DefaultLoggingManager().DefaultLogger()); CreateSimpleGraph(model_2.MainGraph()); PlaceAllNodesToCPUEP(model_2.MainGraph()); SessionState session_state_2(model_2.MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options); ASSERT_STATUS_OK(session_state_2.FinalizeSessionState(std::basic_string(), kernel_registry_manager)); kernel = reinterpret_cast(session_state_2.GetKernel(0)); // Assert that a pre-pack call was made. The weights are still shared from the serialized container // either because they are loaded from disk or because the container takes ownership of them. ASSERT_EQ(session_state_2.GetNumberOfPrepacksCounter(), static_cast(1)); ASSERT_EQ(kernel->prepack_calls_count, 1); ASSERT_EQ(kernel->store_pre_packed_weight_calls_count, 1); } // Pre-packing enabled + shared initializers + no pre-packed weights container = no pre-packed weights caching TEST_F(SessionStateTestSharedInitalizersWithPrePacking, test2) { SessionOptions sess_options; sess_options.enable_mem_pattern = true; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; // Enable pre-packing sess_options.config_options.configurations[kOrtSessionOptionsConfigDisablePrepacking] = "0"; // Enable shared initializer OrtMemoryInfo mem_info(CPU, OrtDeviceAllocator); std::vector float_data(1, 1); auto value = std::make_unique(); Tensor::InitOrtValue(DataTypeImpl::GetType(), TensorShape(std::vector{1}), reinterpret_cast(float_data.data()), mem_info, *value); ASSERT_STATUS_OK(sess_options.AddInitializer("node_0_input_1", value.get())); // First session/model Model model_1("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, std::vector(), DefaultLoggingManager().DefaultLogger()); CreateSimpleGraph(model_1.MainGraph()); PlaceAllNodesToCPUEP(model_1.MainGraph()); SessionState session_state_1(model_1.MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options); ASSERT_STATUS_OK(session_state_1.FinalizeSessionState(std::basic_string(), kernel_registry_manager)); const auto* kernel = reinterpret_cast(session_state_1.GetKernel(0)); // Assert that a pre-pack call was made, but sharing still takes place from the serialized container ASSERT_EQ(session_state_1.GetNumberOfPrepacksCounter(), static_cast(1)); ASSERT_EQ(kernel->prepack_calls_count, 1); ASSERT_EQ(kernel->store_pre_packed_weight_calls_count, 1); // Second session/model Model model_2("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, std::vector(), DefaultLoggingManager().DefaultLogger()); CreateSimpleGraph(model_2.MainGraph()); PlaceAllNodesToCPUEP(model_2.MainGraph()); SessionState session_state_2(model_2.MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options); ASSERT_STATUS_OK(session_state_2.FinalizeSessionState(std::basic_string(), kernel_registry_manager)); kernel = reinterpret_cast(session_state_2.GetKernel(0)); // Assert that a pre-pack call was made, but sharing still takes place from the serialized container ASSERT_EQ(session_state_2.GetNumberOfPrepacksCounter(), static_cast(1)); ASSERT_EQ(kernel->prepack_calls_count, 1); ASSERT_EQ(kernel->store_pre_packed_weight_calls_count, 1); } // Pre-packing enabled + shared initializers + pre-packed weights container = pre-packed weights caching enabled TEST_F(SessionStateTestSharedInitalizersWithPrePacking, test3) { SessionOptions sess_options; sess_options.enable_mem_pattern = true; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; // Enable pre-packing sess_options.config_options.configurations[kOrtSessionOptionsConfigDisablePrepacking] = "0"; // Enable shared initializer OrtMemoryInfo mem_info(CPU, OrtDeviceAllocator); std::vector float_data(1, 1); auto value = std::make_unique(); Tensor::InitOrtValue(DataTypeImpl::GetType(), TensorShape(std::vector{1}), reinterpret_cast(float_data.data()), mem_info, *value); ASSERT_STATUS_OK(sess_options.AddInitializer("node_0_input_1", value.get())); // Enable pre-packed weights container PrepackedWeightsContainer prepacked_weights_container; // First session/model Model model_1("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, std::vector(), DefaultLoggingManager().DefaultLogger()); CreateSimpleGraph(model_1.MainGraph()); PlaceAllNodesToCPUEP(model_1.MainGraph()); SessionState session_state_1(model_1.MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options, &prepacked_weights_container); ASSERT_STATUS_OK(session_state_1.FinalizeSessionState(std::basic_string(), kernel_registry_manager)); const auto* kernel = reinterpret_cast(session_state_1.GetKernel(0)); // Assert that a pre-pack call was made ASSERT_EQ(session_state_1.GetNumberOfPrepacksCounter(), static_cast(1)); ASSERT_EQ(kernel->prepack_calls_count, 1); // Assert that we made a call to store pre-packed weight from a shared container ASSERT_EQ(kernel->store_pre_packed_weight_calls_count, 1); // The weight to be "stored" is the same weight that we got by invoking PrePack() in the step above. // Hence, assert that it wasn't a "cached" pre-packed weight (i.e.) pre-packed weight // from another instance of the same op_type consuming the same constant initializer. ASSERT_EQ(session_state_1.GetUsedSharedPrePackedWeightCounter(), static_cast(0)); // Second session/model Model model_2("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, std::vector(), DefaultLoggingManager().DefaultLogger()); CreateSimpleGraph(model_2.MainGraph()); PlaceAllNodesToCPUEP(model_2.MainGraph()); SessionState session_state_2(model_2.MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options, &prepacked_weights_container); ASSERT_STATUS_OK(session_state_2.FinalizeSessionState(std::basic_string(), kernel_registry_manager)); // Assert that a pre-pack call was made ASSERT_EQ(session_state_2.GetNumberOfPrepacksCounter(), static_cast(1)); ASSERT_EQ(kernel->prepack_calls_count, 1); // Assert that we made a call to store pre-packed weight from a shared container ASSERT_EQ(kernel->store_pre_packed_weight_calls_count, 1); // The weight to be "stored" is a "cached" weight (i.e.) a pre-packed weight // from another instance of the same op_type consuming the same constant initializer. // Assert this. ASSERT_EQ(session_state_2.GetUsedSharedPrePackedWeightCounter(), static_cast(1)); } // Pre-packing enabled + shared initializers + // pre-packed weights container + subgraphs = // caching enabled in pre-packed weights used in subgraphs TEST_F(SessionStateTestSharedInitalizersWithPrePacking, test4) { SessionOptions sess_options; sess_options.enable_mem_pattern = true; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; // Enable pre-packing sess_options.config_options.configurations[kOrtSessionOptionsConfigDisablePrepacking] = "0"; // Enable shared initializer OrtMemoryInfo mem_info(CPU, OrtDeviceAllocator); std::vector float_data(1, 1); auto value = std::make_unique(); Tensor::InitOrtValue(DataTypeImpl::GetType(), TensorShape(std::vector{1}), reinterpret_cast(float_data.data()), mem_info, *value); ASSERT_STATUS_OK(sess_options.AddInitializer("if_shared", value.get())); // Enable pre-packed weights container PrepackedWeightsContainer prepacked_weights_container; // First session/model Model model_1("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, std::vector(), DefaultLoggingManager().DefaultLogger()); CreateGraphWithSubgraph(model_1.MainGraph()); PlaceAllNodesToCPUEP(model_1.MainGraph()); SessionState session_state_1(model_1.MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options, &prepacked_weights_container); ASSERT_STATUS_OK(session_state_1.FinalizeSessionState(std::basic_string(), kernel_registry_manager)); // At the main graph level, there should be no pre-packing calls as there are // no initializers (shared or otherwise) consumed by any nodes in the main graph ASSERT_EQ(session_state_1.GetNumberOfPrepacksCounter(), static_cast(0)); auto if_index_1 = 1; if (session_state_1.GetKernel(0)->Node().OpType() == "If") { if_index_1 = 0; } const auto& subgraph_session_states = session_state_1.GetSubgraphSessionStateMap(); const auto& if_node_session_states = subgraph_session_states.at(if_index_1); const auto& session_state_1_then_branch_session_state = *if_node_session_states.at("then_branch"); const auto& session_state_1_else_branch_session_state = *if_node_session_states.at("else_branch"); auto if_node_branches_prepack_counter_1 = session_state_1_then_branch_session_state.GetNumberOfPrepacksCounter() + session_state_1_else_branch_session_state.GetNumberOfPrepacksCounter(); // We should be seeing 2 pre-pack calls in the "If" node (one in each subgraph) ASSERT_EQ(if_node_branches_prepack_counter_1, static_cast(2)); auto if_node_branches_shared_prepack_counter_1 = session_state_1_then_branch_session_state.GetUsedSharedPrePackedWeightCounter() + session_state_1_else_branch_session_state.GetUsedSharedPrePackedWeightCounter(); // We should only be seeing 1 shared pre-pack weights usage in the "If" node // Either the "then branch" or "else branch" will be using the shared version // depending on which branch writes to the shared container ASSERT_EQ(if_node_branches_shared_prepack_counter_1, static_cast(1)); // Second session/model Model model_2("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, std::vector(), DefaultLoggingManager().DefaultLogger()); CreateGraphWithSubgraph(model_2.MainGraph()); PlaceAllNodesToCPUEP(model_2.MainGraph()); SessionState session_state_2(model_2.MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options, &prepacked_weights_container); ASSERT_STATUS_OK(session_state_2.FinalizeSessionState(std::basic_string(), kernel_registry_manager)); // At the main graph level, there should be no pre-packing calls as there are // no initializers (shared or otherwise) consumed by any nodes in the main graph ASSERT_EQ(session_state_2.GetNumberOfPrepacksCounter(), static_cast(0)); auto if_index_2 = 1; if (session_state_2.GetKernel(0)->Node().OpType() == "If") { if_index_2 = 0; } const auto& subgraph_session_states_2 = session_state_2.GetSubgraphSessionStateMap(); const auto& if_node_session_states_2 = subgraph_session_states_2.at(if_index_2); const auto& session_state_2_then_branch_session_state = *if_node_session_states_2.at("then_branch"); const auto& session_state_2_else_branch_session_state = *if_node_session_states_2.at("else_branch"); auto if_node_branches_prepack_counter_2 = session_state_2_then_branch_session_state.GetNumberOfPrepacksCounter() + session_state_2_else_branch_session_state.GetNumberOfPrepacksCounter(); // We should be seeing 2 pre-pack calls in the "If" node (one in each subgraph) ASSERT_EQ(if_node_branches_prepack_counter_2, static_cast(2)); auto if_node_branches_shared_prepack_counter_2 = session_state_2_then_branch_session_state.GetUsedSharedPrePackedWeightCounter() + session_state_2_else_branch_session_state.GetUsedSharedPrePackedWeightCounter(); // We should be seeing 2 shared pre-pack weights calls in the "If" node // Both branches will be using the shared version coming from the first model. ASSERT_EQ(if_node_branches_shared_prepack_counter_2, static_cast(2)); } #ifndef __wasm__ // sharing is on TEST_F(SessionStateTestSharedInitalizersWithPrePacking, TestPrepackedSerialization) { const std::filesystem::path model_with_external_initializers = "testdata/test_prepacked_serialization_optimized_model.onnx"; const std::filesystem::path external_initializers_file = "test_prepacked_serialization_optimized_model.bin"; { SessionOptions sess_options; sess_options.enable_mem_pattern = true; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; sess_options.optimized_model_filepath = model_with_external_initializers; // Enable pre-packing sess_options.config_options.configurations[kOrtSessionOptionsConfigDisablePrepacking] = "0"; // Enable saving model with pre-packed weights sess_options.config_options.configurations[kOrtSessionOptionsSavePrePackedConstantInitializers] = "1"; // Enable shared initializer OrtMemoryInfo mem_info(CPU, OrtDeviceAllocator); std::vector float_data(1, 1); auto value = std::make_unique(); Tensor::InitOrtValue(DataTypeImpl::GetType(), TensorShape(std::vector{1}), reinterpret_cast(float_data.data()), mem_info, *value); ASSERT_STATUS_OK(sess_options.AddInitializer("if_shared", value.get())); // Enable pre-packed weights container for shared initializers PrepackedWeightsContainer prepacked_weights_container; Model model_1("graph_main", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, std::vector(), DefaultLoggingManager().DefaultLogger()); CreateGraphWithSubgraph(model_1.MainGraph()); PlaceAllNodesToCPUEP(model_1.MainGraph()); SessionState session_state_1(model_1.MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options, &prepacked_weights_container); constexpr const bool saving_model_true = true; ASSERT_STATUS_OK(session_state_1.FinalizeSessionState(std::basic_string(), kernel_registry_manager, !saving_model_true)); TestSavedPrepacks(model_1); ModelSavingOptions model_saving_options{4}; model_saving_options.align_offset = true; ASSERT_STATUS_OK(Model::SaveWithExternalInitializers(model_1, model_with_external_initializers, external_initializers_file, model_saving_options)); } ScopedFileDeleter test_model_deleter(model_with_external_initializers); ScopedFileDeleter binary_file_deleter(external_initializers_file); // Now let's load the model along with the initializers { SessionOptions sess_options; sess_options.enable_mem_pattern = true; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; // Enable pre-packing sess_options.config_options.configurations[kOrtSessionOptionsConfigDisablePrepacking] = "0"; // We are expecting this weight to be loaded from disk along // with its pre-packed version // Enable shared initializer OrtMemoryInfo mem_info(CPU, OrtDeviceAllocator); std::vector float_data(1, 1); auto value = std::make_unique(); Tensor::InitOrtValue(DataTypeImpl::GetType(), TensorShape(std::vector{1}), reinterpret_cast(float_data.data()), mem_info, *value); ASSERT_STATUS_OK(sess_options.AddInitializer("if_shared", value.get())); // Enable pre-packed weights container for shared initializers PrepackedWeightsContainer prepacked_weights_container; std::shared_ptr model; ASSERT_STATUS_OK(Model::Load(model_with_external_initializers, model, nullptr, DefaultLoggingManager().DefaultLogger())); PlaceAllNodesToCPUEP(model->MainGraph()); SessionState session_state(model->MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options, &prepacked_weights_container); ASSERT_STATUS_OK(session_state.FinalizeSessionState(std::basic_string(), kernel_registry_manager, false)); TestLoadedSharedUserSupplied(*model); } // Load again, this time sharing is enabled, but no shared initializer in the map { SessionOptions sess_options; sess_options.enable_mem_pattern = true; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; // Enable pre-packing sess_options.config_options.configurations[kOrtSessionOptionsConfigDisablePrepacking] = "0"; // Enable pre-packed weights container for shared initializers PrepackedWeightsContainer prepacked_weights_container; std::shared_ptr model; ASSERT_STATUS_OK(Model::Load(model_with_external_initializers, model, nullptr, DefaultLoggingManager().DefaultLogger())); PlaceAllNodesToCPUEP(model->MainGraph()); SessionState session_state(model->MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options, &prepacked_weights_container); ASSERT_STATUS_OK(session_state.FinalizeSessionState(model_with_external_initializers, kernel_registry_manager, false)); TestLoadedSharedNoUserSupplied(*model); } // Load again, sharing is disabled { SessionOptions sess_options; sess_options.enable_mem_pattern = true; sess_options.execution_mode = ExecutionMode::ORT_SEQUENTIAL; sess_options.use_deterministic_compute = false; sess_options.enable_mem_reuse = true; // Enable pre-packing sess_options.config_options.configurations[kOrtSessionOptionsConfigDisablePrepacking] = "0"; std::shared_ptr model; ASSERT_STATUS_OK(Model::Load(model_with_external_initializers, model, nullptr, DefaultLoggingManager().DefaultLogger())); PlaceAllNodesToCPUEP(model->MainGraph()); SessionState session_state(model->MainGraph(), execution_providers, tp.get(), nullptr, /*inter_op_thread_pool*/ dtm, edlm, DefaultLoggingManager().DefaultLogger(), profiler, sess_options, nullptr); ASSERT_STATUS_OK(session_state.FinalizeSessionState(model_with_external_initializers, kernel_registry_manager, false)); const auto& prepacked_for_main_graph = model->MainGraph().GetPrepacked(); ASSERT_FALSE(prepacked_for_main_graph.IsSaveModeOn()); ASSERT_EQ(1U, prepacked_for_main_graph.GetKeyToBlob().size()); } } #endif // __wasm__ INSTANTIATE_TEST_SUITE_P(SessionStateTests, SessionStatePrepackingTest, testing::Values(PrepackingTestParam{false, false}, PrepackingTestParam{false, true}, PrepackingTestParam{true, false}, PrepackingTestParam{true, true})); #endif } // namespace test } // namespace onnxruntime