diff --git a/docs/ONNX_Runtime_Graph_Optimizations.md b/docs/ONNX_Runtime_Graph_Optimizations.md index fc8114a114..0c347f87e4 100644 --- a/docs/ONNX_Runtime_Graph_Optimizations.md +++ b/docs/ONNX_Runtime_Graph_Optimizations.md @@ -55,7 +55,7 @@ All optimizations can be performed either online or offline. In online mode, whe **Notes**: * When running in offline mode, make sure to use the exact same options (e.g., execution providers, optimization level) and hardware as the target machine that the model inference will run on (e.g., you cannot run a model pre-optimized for a GPU execution provider on a machine that is equipped only with CPU). -* When layout optimizations are enabled, the offline mode cannot be used. +* When layout optimizations are enabled, the offline mode can only be used on compatible hardware to the environment when the offline model is saved. For example, if model has layout optimized for AVX2, the offline model would require CPUs that support AVX2. ## Usage diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index ac8810d835..233b176ce9 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -608,12 +608,14 @@ common::Status InferenceSession::Initialize() { ORT_RETURN_IF_ERROR_SESSIONID_(graph.Resolve()); if (!session_options_.optimized_model_filepath.empty()) { - if (session_options_.graph_optimization_level < TransformerLevel::Level3) { - // Serialize optimized ONNX model. - ORT_RETURN_IF_ERROR_SESSIONID_(Model::Save(*model_, session_options_.optimized_model_filepath)); - } else { + // Serialize optimized ONNX model. + ORT_RETURN_IF_ERROR_SESSIONID_(Model::Save(*model_, session_options_.optimized_model_filepath)); + if (session_options_.graph_optimization_level >= TransformerLevel::Level3) { LOGS(*session_logger_, WARNING) << "Serializing Optimized ONNX model with Graph Optimization" - " level greater than 2 is not supported."; + " level greater than ORT_ENABLE_EXTENDED. The generated" + " model may contain hardware and execution provider specific" + " optimizations, and should only be used in the same environment" + " the model was optimized for."; } } diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 1a7fd86ed6..2ecfc15e9b 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -415,15 +415,6 @@ TEST(InferenceSessionTests, TestModelSerialization) { InferenceSession session_object_emptyValidation{so_opt, &DefaultLoggingManager()}; ASSERT_TRUE(session_object_emptyValidation.Load(test_model).IsOK()); ASSERT_TRUE(session_object_emptyValidation.Initialize().IsOK()); - - // Assert that level 3 optimization doesn't result in serialized model. - so_opt.optimized_model_filepath = ToWideString("ShouldNotSerialize"); - so_opt.graph_optimization_level = TransformerLevel::Level3; - InferenceSession session_object_Level3Test{so_opt, &DefaultLoggingManager()}; - ASSERT_TRUE(session_object_Level3Test.Load(test_model).IsOK()); - ASSERT_TRUE(session_object_Level3Test.Initialize().IsOK()); - std::ifstream model_fs_Level3(so_opt.optimized_model_filepath, ios::in | ios::binary); - ASSERT_TRUE(model_fs_Level3.fail()); } #ifdef ORT_RUN_EXTERNAL_ONNX_TESTS