From 5daeb5e0b0b10d8839addaf35bf28d610bce75da Mon Sep 17 00:00:00 2001 From: Hector Li Date: Wed, 17 Apr 2024 19:01:01 -0700 Subject: [PATCH] enable model with external data be loaded from memory buffer (#19089) ### Description Background: User save large model with initializer data in external file. e.g: onnx.save_model(onnx_model, "path/to/save/the/model.onnx", save_as_external_data=True, all_tensors_to_one_file=True, location="filename", size_threshold=1024). In that case, Ort loads the model, get the external initializer information (external file name, offset, length) and use the model path to find the external file, and locate to the tensor data via the offset and length. But it won't work if user load the model from memory, since Ort lost track of the model path. This PR adds API/session option to let user provide a table with external initializer file name as the key, the pointer to the loaded external file in memory and the buffer length as value. So that 1. user can load the model from memory buffer with external initializers in memory buffer too. 2. the initializers can be shared across sessions, for different EPs. 3. user can load the file in any way they want, e.g mmap. Internally, 1. at session creation time, Ort goes through the external initializers in the graph, gets the file name, offset, data length of the external initializers from Tensorproto . 2. With the file name, Ort get the file in memory buffer and buffer length from the table user provided. 4. Ort locates the tensor buffer from file in memory buffer (user provided) using the offset and data length (from Tensorproto ). 5. Ort creates the Tensor and replace the existing Tensor in the graph. ### Motivation and Context https://github.com/onnx/onnx/blob/main/docs/ExternalData.md For a model with external data, the Tensorproto may have initializer data in a separate file. The external file location is set via the file path relative to the model path. With the API to load model from memory buffer, it lost track of the model path. So it causes error if the model has external data. By adding a session option to set the external data buffer, Ort can find the external data correctly if model loaded from memory buffer. --- include/onnxruntime/core/graph/graph.h | 6 + .../core/session/onnxruntime_c_api.h | 31 ++- .../core/session/onnxruntime_cxx_api.h | 3 + .../core/session/onnxruntime_cxx_inline.h | 21 ++ onnxruntime/core/framework/session_options.cc | 28 +++ onnxruntime/core/framework/session_options.h | 4 + onnxruntime/core/graph/graph.cc | 54 +++++ .../core/session/abi_session_options.cc | 39 +++ onnxruntime/core/session/inference_session.cc | 7 + onnxruntime/core/session/onnxruntime_c_api.cc | 4 +- onnxruntime/core/session/ort_apis.h | 6 + .../test/shared_lib/test_model_loading.cc | 228 ++++++++++++++++++ .../test/testdata/conv_qdq_external_ini.bin | Bin 2000 -> 992 bytes .../test/testdata/conv_qdq_external_ini.onnx | Bin 2204 -> 2142 bytes orttraining/orttraining/models/bert/main.cc | 1 + .../orttraining/models/pipeline_poc/main.cc | 1 + .../models/runner/training_runner.cc | 1 + 17 files changed, 429 insertions(+), 5 deletions(-) diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h index 3b417a362d..d3c29e6a5d 100644 --- a/include/onnxruntime/core/graph/graph.h +++ b/include/onnxruntime/core/graph/graph.h @@ -727,6 +727,12 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi * and replaces graph initializers with its content. */ common::Status InjectExternalInitializedTensors(const InlinedHashMap& external_initializers); + + /** This function takes externally provided files in memory for initializers with external + * data and replaces graph initializers with its content. + */ + common::Status InjectExternalInitializersFromFilesInMemory( + const InlinedHashMap>& external_initializer_files); #endif // !defined(DISABLE_EXTERNAL_INITIALIZERS) #endif // !defined(ORT_MINIMAL_BUILD) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index e7b8f14871..867adfc65c 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -3506,15 +3506,15 @@ struct OrtApi { * \param[in] options * \param[in] initializer_names Array of null terminated UTF-8 encoded strings of the initializers names. * \param[in] initializers Array of ::OrtValue type - * \param[in] initializers_num Number of elements in the initializer_names and initializers + * \param[in] num_initializers Number of elements in the initializer_names and initializers * * \snippet{doc} snippets.dox OrtStatus Return Value * * \since Version 1.12. */ ORT_API2_STATUS(AddExternalInitializers, _In_ OrtSessionOptions* options, - _In_reads_(input_len) const char* const* initializer_names, - _In_reads_(input_len) const OrtValue* const* initializers, size_t initializers_num); + _In_reads_(num_initializers) const char* const* initializer_names, + _In_reads_(num_initializers) const OrtValue* const* initializers, size_t num_initializers); /** \brief: Create attribute of onnxruntime operator * @@ -4631,6 +4631,31 @@ struct OrtApi { * \snippet{doc} snippets.dox OrtStatus Return Value */ ORT_API2_STATUS(KernelInfoGetAllocator, _In_ const OrtKernelInfo* info, _In_ OrtMemType mem_type, _Outptr_ OrtAllocator** out); + + /** \brief Replace initialized Tensors with external data with the provided files in memory + * + * The function will find the initialized TensorProtos with external data in the graph with the provided + * external file names and the file content in memory. The API gets the external file name, offset, data length + * from TensorProto, and locate the tensor data from the file in memory buffer. + * It creates a Tensor to replace the existing Tensor in graph. The replacement + * will occur before any of the optimizations take place. The data will be copied into the graph + * since TensorProto can't refer to the user provided buffers. + * + * \param[in] session options + * \param[in] external_initializer_file_names Array of null terminated UTF-8 encoded strings of the file names + * which holds the external initializers. + * \param[in] external_initializer_file_buffer_array Array of pointers to the buffer of the file content. + * The buffer can be freed after session creation. + * \param[in] external_initializer_file_lengths Array of size_t to indicate the length of file content + * \param[in] num_external_initializer_files Number of external files + * + * \snippet{doc} snippets.dox OrtStatus Return Value + */ + ORT_API2_STATUS(AddExternalInitializersFromFilesInMemory, _In_ OrtSessionOptions* options, + _In_reads_(num_external_initializer_files) const ORTCHAR_T* const* external_initializer_file_names, + _In_reads_(num_external_initializer_files) char* const* external_initializer_file_buffer_array, + _In_reads_(num_external_initializer_files) const size_t* external_initializer_file_lengths, + size_t num_external_initializer_files); }; /* diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h index fd0e349042..8091fd4cfc 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h @@ -873,6 +873,9 @@ struct SessionOptionsImpl : ConstSessionOptionsImpl { SessionOptionsImpl& AddInitializer(const char* name, const OrtValue* ort_val); ///< Wraps OrtApi::AddInitializer SessionOptionsImpl& AddExternalInitializers(const std::vector& names, const std::vector& ort_values); ///< Wraps OrtApi::AddExternalInitializers + SessionOptionsImpl& AddExternalInitializersFromFilesInMemory(const std::vector>& external_initializer_file_names, + const std::vector& external_initializer_file_buffer_array, + const std::vector& external_initializer_file_lengths); ///< Wraps OrtApi::AddExternalInitializersFromFilesInMemory SessionOptionsImpl& AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA SessionOptionsImpl& AppendExecutionProvider_CUDA_V2(const OrtCUDAProviderOptionsV2& provider_options); ///< Wraps OrtApi::SessionOptionsAppendExecutionProvider_CUDA_V2 diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h index 9d1e8c9443..a732bf169d 100644 --- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h +++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h @@ -781,6 +781,27 @@ inline SessionOptionsImpl& SessionOptionsImpl::AddExternalInitializers(con return *this; } +template +inline SessionOptionsImpl& SessionOptionsImpl::AddExternalInitializersFromFilesInMemory(const std::vector>& file_names, + const std::vector& buffer_array, + const std::vector& file_lengths) { + const size_t inputs_num = file_names.size(); + if (inputs_num != buffer_array.size()) { + ORT_CXX_API_THROW("Expecting names and buffer_array to have the same length", ORT_INVALID_ARGUMENT); + } + if (inputs_num != file_lengths.size()) { + ORT_CXX_API_THROW("Expecting names and file_lengths to have the same length", ORT_INVALID_ARGUMENT); + } + std::vector names_ptr; + names_ptr.reserve(inputs_num); + for (size_t i = 0; i < inputs_num; ++i) { + names_ptr.push_back(file_names[i].c_str()); + } + ThrowOnError(GetApi().AddExternalInitializersFromFilesInMemory(this->p_, names_ptr.data(), buffer_array.data(), + file_lengths.data(), inputs_num)); + return *this; +} + template inline SessionOptionsImpl& SessionOptionsImpl::AppendExecutionProvider_CUDA(const OrtCUDAProviderOptions& provider_options) { ThrowOnError(GetApi().SessionOptionsAppendExecutionProvider_CUDA(this->p_, &provider_options)); diff --git a/onnxruntime/core/framework/session_options.cc b/onnxruntime/core/framework/session_options.cc index c5f0ca1ec0..9d6cd3e582 100644 --- a/onnxruntime/core/framework/session_options.cc +++ b/onnxruntime/core/framework/session_options.cc @@ -56,6 +56,34 @@ Status SessionOptions::AddExternalInitializers(gsl::span name } return Status::OK(); } + +Status SessionOptions::AddExternalInitializersFromFilesInMemory(gsl::span file_names, + gsl::span> files_buffers) { + const auto num_files = file_names.size(); + ORT_ENFORCE(num_files == files_buffers.size(), "Expecting same size spans"); + external_initializer_files_mmap.reserve(external_initializer_files_mmap.size() + num_files); + static constexpr std::array, 4> prefix_list{ + ORT_TSTR(".//"), + ORT_TSTR("./"), + ORT_TSTR(".\\\\"), + ORT_TSTR(".\\")}; + for (size_t i = 0; i < num_files; ++i) { + // ignore "./" from file name if it has + auto file_name = file_names[i]; + for (auto prefix : prefix_list) { + if (file_name.rfind(prefix, 0) == 0) { + file_name = file_name.substr(prefix.length()); + break; + } + } + bool result = external_initializer_files_mmap.emplace(file_name, files_buffers[i]).second; + if (!result) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "An entry for this name has already been added: ", + ORT_TSTR_CONVERT_TO_PRINTABLE_STRING(file_name)); + } + } + return Status::OK(); +} #endif // !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_EXTERNAL_INITIALIZERS) #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS) diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h index 796a018ac0..295d02a42f 100644 --- a/onnxruntime/core/framework/session_options.h +++ b/onnxruntime/core/framework/session_options.h @@ -155,6 +155,9 @@ struct SessionOptions { // Customer supplied pre-processed data for external initializers InlinedHashMap external_initializers; Status AddExternalInitializers(gsl::span names, gsl::span values); + InlinedHashMap> external_initializer_files_mmap; + Status AddExternalInitializersFromFilesInMemory(gsl::span file_names, + gsl::span> files_buffers); #endif // custom function callback to create a thread @@ -203,6 +206,7 @@ inline std::ostream& operator<<(std::ostream& os, const SessionOptions& session_ //<< " initializers_to_share_map:" << session_options.initializers_to_share_map #if !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_EXTERNAL_INITIALIZERS) //<< " external_initializers:" << session_options.external_initializers + //<< " external_initializer_files:" << session_options.external_initializer_files #endif #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_MINIMAL_BUILD_CUSTOM_OPS) //<< " custom_op_libs:" << session_options.custom_op_libs diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index 2220b9cd1d..62b5f7ad5d 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -3005,6 +3005,60 @@ Status Graph::InjectExternalInitializedTensors(const InlinedHashMap>& external_initializer_files) { + for (const auto& [tensor_name, tensor_proto] : name_to_initial_tensor_) { + if (tensor_proto->data_location() == TensorProto_DataLocation_EXTERNAL) { + std::unique_ptr external_data_info; + ORT_RETURN_IF_ERROR(onnxruntime::ExternalDataInfo::Create(tensor_proto->external_data(), external_data_info)); + + const auto& external_file = external_data_info->GetRelPath(); + onnxruntime::FileOffsetType file_offset = external_data_info->GetOffset(); + const size_t external_data_length = external_data_info->GetLength(); + SafeInt tensor_byte_size; + ORT_RETURN_IF_ERROR(onnxruntime::utils::GetSizeInBytesFromTensorProto<0>(*tensor_proto, &tensor_byte_size)); + ORT_RETURN_IF_NOT(external_data_length == 0 || external_data_length == tensor_byte_size, + "TensorProto: ", tensor_name, " external data size mismatch. Computed size: ", + *&tensor_byte_size, ", external_data.length: ", external_data_length); + + SafeInt end_of_read(file_offset); + end_of_read += tensor_byte_size; + + auto external_file_pos = external_initializer_files.find(external_file); + ORT_RETURN_IF(external_file_pos == external_initializer_files.end(), + "External file: ", ORT_TSTR_CONVERT_TO_PRINTABLE_STRING(external_file), + " not found from the table user provided."); + auto external_file_length = external_file_pos->second.second; + + ORT_RETURN_IF(file_offset < 0 || end_of_read > narrow(external_file_length), + "External initializer: ", tensor_name, + " offset: ", file_offset, " size to read: ", external_data_length, + " given file_length: ", external_file_length, " are out of bounds or can not be read in full."); + char* external_file_buffer = static_cast(external_file_pos->second.first); + char* tensor_buffer = external_file_buffer + file_offset; + + const auto& old_initializer = *(tensor_proto); + auto& mutable_initializers = *(graph_proto_->mutable_initializer()); + // use cheaper pointer comparison to find old entry + auto existing_entry = std::find(mutable_initializers.pointer_begin(), mutable_initializers.pointer_end(), + &old_initializer); + + // these should always be in sync as the pointer in name_to_initial_tensor_ is to memory owned by graph_proto_ + ORT_ENFORCE(existing_entry != mutable_initializers.pointer_end(), + "graph_proto_ is not in sync with name_to_initial_tensor_"); + (**existing_entry).clear_data_location(); + const DataTypeImpl* const type = DataTypeImpl::TensorTypeFromONNXEnum(old_initializer.data_type())->GetElementType(); + TensorShape tensor_shape = utils::GetTensorShapeFromTensorProto(old_initializer); + auto tensor = Tensor(type, tensor_shape, tensor_buffer, + OrtMemoryInfo(CPU, OrtAllocatorType::OrtDeviceAllocator)); + auto new_tensor_proto = utils::TensorToTensorProto(tensor, tensor_name); + **existing_entry = std::move(new_tensor_proto); + } + } + + return Status::OK(); +} #endif // DISABLE_EXTERNAL_INITIALIZERS #endif // !defined(ORT_MINIMAL_BUILD) diff --git a/onnxruntime/core/session/abi_session_options.cc b/onnxruntime/core/session/abi_session_options.cc index e2084e9ef4..7ef23d6c9e 100644 --- a/onnxruntime/core/session/abi_session_options.cc +++ b/onnxruntime/core/session/abi_session_options.cc @@ -294,6 +294,45 @@ ORT_API_STATUS_IMPL(OrtApis::AddExternalInitializers, _In_ OrtSessionOptions* op #endif } +ORT_API_STATUS_IMPL(OrtApis::AddExternalInitializersFromFilesInMemory, _In_ OrtSessionOptions* options, + _In_reads_(num_external_initializer_files) const ORTCHAR_T* const* file_names, + _In_reads_(num_external_initializer_files) char* const* buffer_array, + _In_reads_(num_external_initializer_files) const size_t* file_lengths, + size_t num_external_initializer_files) { +#if !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_EXTERNAL_INITIALIZERS) + API_IMPL_BEGIN + onnxruntime::InlinedVector names; + onnxruntime::InlinedVector> buffers; + onnxruntime::InlinedVector lengths; + names.reserve(num_external_initializer_files); + buffers.reserve(num_external_initializer_files); + lengths.reserve(num_external_initializer_files); + for (size_t i = 0; i < num_external_initializer_files; ++i) { + if (file_names[i] == nullptr || buffer_array[i] == nullptr) { + auto message = onnxruntime::MakeString("Input index: ", i, " contains null pointers"); + return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, message.c_str()); + } + names.emplace_back(file_names[i]); + buffers.emplace_back(std::make_pair(buffer_array[i], file_lengths[i])); + } + + auto st = options->value.AddExternalInitializersFromFilesInMemory(names, buffers); + if (!st.IsOK()) { + return onnxruntime::ToOrtStatus(st); + } + return nullptr; + API_IMPL_END +#else + ORT_UNUSED_PARAMETER(options); + ORT_UNUSED_PARAMETER(file_names); + ORT_UNUSED_PARAMETER(buffer_array); + ORT_UNUSED_PARAMETER(file_lengths); + ORT_UNUSED_PARAMETER(num_external_initializer_files); + return OrtApis::CreateStatus(ORT_NOT_IMPLEMENTED, + "AddExternalInitializersFromFilesInMemory is not supported in this build"); +#endif +} + ORT_API_STATUS_IMPL(OrtApis::SetDeterministicCompute, _Inout_ OrtSessionOptions* options, bool value) { API_IMPL_BEGIN options->value.use_deterministic_compute = value; diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index dbb4bc5bfe..49d16cf6b1 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -1650,6 +1650,13 @@ common::Status InferenceSession::Initialize() { ORT_RETURN_IF_ERROR_SESSIONID_(graph.InjectExternalInitializedTensors(session_options_.external_initializers)); InlinedHashMap{}.swap(session_options_.external_initializers); } + + if (!session_options_.external_initializer_files_mmap.empty()) { + ORT_RETURN_IF_ERROR_SESSIONID_( + graph.InjectExternalInitializersFromFilesInMemory(session_options_.external_initializer_files_mmap)); + InlinedHashMap, std::pair>{}.swap( + session_options_.external_initializer_files_mmap); + } #endif #ifdef ONNXRUNTIME_ENABLE_INSTRUMENT diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc index 49e3f0a021..52798ea573 100644 --- a/onnxruntime/core/session/onnxruntime_c_api.cc +++ b/onnxruntime/core/session/onnxruntime_c_api.cc @@ -683,7 +683,6 @@ static ORT_STATUS_PTR CreateSessionAndLoadModel(_In_ const OrtSessionOptions* op _In_opt_z_ const ORTCHAR_T* model_path, _In_opt_ const void* model_data, size_t model_data_length, - std::unique_ptr& sess) { // quick check here to decide load path. InferenceSession will provide error message for invalid values. // TODO: Could move to a helper @@ -2726,7 +2725,8 @@ static constexpr OrtApi ort_api_1_to_18 = { &OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO_V2, &OrtApis::SessionOptionsAppendExecutionProvider_VitisAI, &OrtApis::KernelContext_GetScratchBuffer, - &OrtApis::KernelInfoGetAllocator}; + &OrtApis::KernelInfoGetAllocator, + &OrtApis::AddExternalInitializersFromFilesInMemory}; // OrtApiBase can never change as there is no way to know what version of OrtApiBase is returned by OrtGetApiBase. static_assert(sizeof(OrtApiBase) == sizeof(void*) * 2, "New methods can't be added to OrtApiBase as it is not versioned"); diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h index 3591c96234..fcae173e6c 100644 --- a/onnxruntime/core/session/ort_apis.h +++ b/onnxruntime/core/session/ort_apis.h @@ -345,6 +345,12 @@ ORT_API_STATUS_IMPL(AddExternalInitializers, _In_ OrtSessionOptions* options, _In_reads_(initializers_num) const char* const* initializer_names, _In_reads_(initializers_num) const OrtValue* const* initializers, size_t initializers_num); +ORT_API_STATUS_IMPL(AddExternalInitializersFromFilesInMemory, _In_ OrtSessionOptions* options, + _In_reads_(num_external_initializer_files) const ORTCHAR_T* const* file_names, + _In_reads_(num_external_initializer_files) char* const* buffer_array, + _In_reads_(num_external_initializer_files) const size_t* file_lengths, + size_t num_external_initializer_files); + ORT_API_STATUS_IMPL(CreateOpAttr, _In_ const char* name, _In_ const void* data, diff --git a/onnxruntime/test/shared_lib/test_model_loading.cc b/onnxruntime/test/shared_lib/test_model_loading.cc index 8ae106bb0c..921441f90b 100644 --- a/onnxruntime/test/shared_lib/test_model_loading.cc +++ b/onnxruntime/test/shared_lib/test_model_loading.cc @@ -11,6 +11,15 @@ #include "gmock/gmock.h" +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include "core/platform/scoped_resource.h" +#endif + extern std::unique_ptr ort_env; namespace onnxruntime { @@ -101,10 +110,229 @@ TEST(CApiTest, TestExternalInitializersInjection) { initializer_data.push_back(std::move(init_tensor)); Ort::SessionOptions so; + const ORTCHAR_T* optimized_model_path = ORT_TSTR("testdata/model_with_external_initializer_come_from_user_opt.onnx"); + so.SetOptimizedModelFilePath(optimized_model_path); so.AddExternalInitializers(init_names, initializer_data); + // Dump the optimized model with external data so that it will unpack the external data from the loaded model + so.AddConfigEntry(kOrtSessionOptionsOptimizedModelExternalInitializersFileName, "model_with_external_initializer_come_from_user_opt.bin"); + so.AddConfigEntry(kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes, "10"); EXPECT_NO_THROW(Ort::Session(*ort_env, model_path, so)); } +static void ReadFileToBuffer(const char* file_path, std::vector& buffer) { + std::ifstream file(file_path, std::ios::binary | std::ios::ate); + if (!file) + ORT_THROW("Error reading file."); + buffer.resize(narrow(file.tellg())); + file.seekg(0, std::ios::beg); + if (!file.read(buffer.data(), buffer.size())) + ORT_THROW("Error reading file"); +} + +void TestLoadModelFromArrayWithExternalInitializerFromFileArray(const std::string& model_file_name, + const std::string& external_data_file_name, + const std::string& external_ini_min_size_bytes = "10", + bool compare_external_bin_file = true) { + std::string test_folder = "testdata/"; + std::string model_path = test_folder + model_file_name; + std::vector buffer; + ReadFileToBuffer(model_path.c_str(), buffer); + + std::vector external_bin_buffer; + std::string external_bin_path = test_folder + external_data_file_name; + ReadFileToBuffer(external_bin_path.c_str(), external_bin_buffer); + + Ort::SessionOptions so; + std::string optimized_model_file_name(model_file_name); + auto length = optimized_model_file_name.length(); + optimized_model_file_name.insert(length - 5, "_opt"); + std::string optimized_file_path(test_folder + optimized_model_file_name); + PathString optimized_file_path_t(optimized_file_path.begin(), optimized_file_path.end()); + + so.SetOptimizedModelFilePath(optimized_file_path_t.c_str()); + // Dump the optimized model with external data so that it will unpack the external data from the loaded model + std::string opt_bin_file_name(optimized_model_file_name); + opt_bin_file_name.replace(optimized_model_file_name.length() - 4, 4, "bin"); + so.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL); + so.AddConfigEntry(kOrtSessionOptionsOptimizedModelExternalInitializersFileName, opt_bin_file_name.c_str()); + so.AddConfigEntry(kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes, external_ini_min_size_bytes.c_str()); + + PathString external_file_name(external_data_file_name.begin(), external_data_file_name.end()); + std::vector file_names{external_file_name}; + std::vector file_buffers{external_bin_buffer.data()}; + std::vector lengths{external_bin_buffer.size()}; + so.AddExternalInitializersFromFilesInMemory(file_names, file_buffers, lengths); + + Ort::Session session(*ort_env.get(), buffer.data(), buffer.size(), so); + + std::string generated_bin_path = test_folder + opt_bin_file_name; + // If there are multiple initializers in the external bin file + // It's hard to guarantee the generated bin for optimized model is exactly same with original one for some cases + if (compare_external_bin_file) { + std::vector generated_bin_buffer; + ReadFileToBuffer(generated_bin_path.c_str(), generated_bin_buffer); + + ASSERT_EQ(external_bin_buffer, generated_bin_buffer); + } + + // Cleanup. + ASSERT_EQ(std::remove(optimized_file_path.c_str()), 0); + ASSERT_EQ(std::remove(generated_bin_path.c_str()), 0); +} + +// Single initializer from single bin file +TEST(CApiTest, TestLoadModelFromArrayWithExternalInitializerFromFileArray) { + std::string model_file_name = "model_with_external_initializers.onnx"; + std::string external_bin_name = "Pads.bin"; + TestLoadModelFromArrayWithExternalInitializerFromFileArray(model_file_name, external_bin_name); +} + +// Several external initializers from same file +// Use offset from tensor proto to locate the buffer location +TEST(CApiTest, TestLoadModelFromArrayWithExternalInitializersFromFileArray) { + std::string model_file_name = "conv_qdq_external_ini.onnx"; + std::string external_bin_name = "conv_qdq_external_ini.bin"; + TestLoadModelFromArrayWithExternalInitializerFromFileArray(model_file_name, external_bin_name); +} + +// Several external initializers from same file +// Use offset from tensor proto to locate the buffer location +TEST(CApiTest, TestLoadModelFromArrayWithExternalInitializersFromFileArrayPathRobust) { + std::string model_file_name = "conv_qdq_external_ini.onnx"; + std::string external_bin_name = "./conv_qdq_external_ini.bin"; + TestLoadModelFromArrayWithExternalInitializerFromFileArray(model_file_name, external_bin_name); + + external_bin_name = ".//conv_qdq_external_ini.bin"; + TestLoadModelFromArrayWithExternalInitializerFromFileArray(model_file_name, external_bin_name); + +#ifdef _WIN32 + external_bin_name = ".\\\\conv_qdq_external_ini.bin"; + TestLoadModelFromArrayWithExternalInitializerFromFileArray(model_file_name, external_bin_name); + + external_bin_name = ".\\conv_qdq_external_ini.bin"; + TestLoadModelFromArrayWithExternalInitializerFromFileArray(model_file_name, external_bin_name); +#endif +} + +#ifndef _WIN32 +struct FileDescriptorTraits { + using Handle = int; + static Handle GetInvalidHandleValue() { return -1; } + static void CleanUp(Handle h) { + ASSERT_TRUE(close(h) != -1); + } +}; +using ScopedFileDescriptor = ScopedResource; +#endif + +void FileMmap(const ORTCHAR_T* file_path, void*& mapped_base) { +#ifdef _WIN32 + wil::unique_hfile file_handle{CreateFile2(file_path, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, NULL)}; + ASSERT_TRUE(file_handle.get() != INVALID_HANDLE_VALUE); + + wil::unique_hfile file_mapping_handle{ + CreateFileMappingW(file_handle.get(), + nullptr, + PAGE_READONLY, + 0, + 0, + nullptr)}; + ASSERT_TRUE(file_mapping_handle.get() != INVALID_HANDLE_VALUE); + mapped_base = MapViewOfFile(file_mapping_handle.get(), + FILE_MAP_READ, + 0, + 0, + 0); +#else + ScopedFileDescriptor file_descriptor{open(file_path, O_RDONLY)}; + ASSERT_TRUE(file_descriptor.IsValid()); + struct stat sb; + stat(file_path, &sb); + mapped_base = mmap(nullptr, sb.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, file_descriptor.Get(), 0); +#endif + return; +} + +void TestLoadModelFromArrayWithExternalInitializerFromFileMmap(const std::string& model_file_name, + const std::string& external_data_file_name, + const std::string& external_ini_min_size_bytes = "10", + bool compare_external_bin_file = true) { + std::string test_folder = "testdata/"; + std::string model_path = test_folder + model_file_name; + std::vector buffer; + ReadFileToBuffer(model_path.c_str(), buffer); + + std::string external_bin_path = test_folder + external_data_file_name; + PathString external_bin_path_t(external_bin_path.begin(), external_bin_path.end()); + + void* mapped_base = nullptr; + FileMmap(external_bin_path_t.c_str(), mapped_base); + ASSERT_TRUE(mapped_base); + + std::ifstream bin_file(external_bin_path, std::ios::binary | std::ios::ate); + ASSERT_TRUE(bin_file); + size_t bin_file_length = narrow(bin_file.tellg()); + + Ort::SessionOptions so; + std::string optimized_model_file_name(model_file_name); + auto length = optimized_model_file_name.length(); + optimized_model_file_name.insert(length - 5, "_opt"); + std::string optimized_file_path(test_folder + optimized_model_file_name); + PathString optimized_file_path_t(optimized_file_path.begin(), optimized_file_path.end()); + + so.SetOptimizedModelFilePath(optimized_file_path_t.c_str()); + // Dump the optimized model with external data so that it will unpack the external data from the loaded model + std::string opt_bin_file_name(optimized_model_file_name); + opt_bin_file_name.replace(optimized_model_file_name.length() - 4, 4, "bin"); + so.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_DISABLE_ALL); + so.AddConfigEntry(kOrtSessionOptionsOptimizedModelExternalInitializersFileName, opt_bin_file_name.c_str()); + so.AddConfigEntry(kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes, external_ini_min_size_bytes.c_str()); + + PathString external_file_name(external_data_file_name.begin(), external_data_file_name.end()); + std::vector file_names{external_file_name}; + std::vector file_buffers{static_cast(mapped_base)}; + std::vector lengths{bin_file_length}; + so.AddExternalInitializersFromFilesInMemory(file_names, file_buffers, lengths); + + Ort::Session session(*ort_env.get(), buffer.data(), buffer.size(), so); + +#ifdef _WIN32 + bool ret = UnmapViewOfFile(mapped_base); + ASSERT_TRUE(ret); +#else + struct stat sb; + stat(external_bin_path.c_str(), &sb); + int ret = munmap(mapped_base, sb.st_size); + ASSERT_TRUE(ret == 0); +#endif + + std::string generated_bin_path = test_folder + opt_bin_file_name; + // If there are multiple initializers in the external bin file + // It's hard to guarantee the generated bin for optimized model is exactly same with original one for some cases + if (compare_external_bin_file) { + std::vector external_bin_buffer; + ReadFileToBuffer(external_bin_path.c_str(), external_bin_buffer); + + std::vector generated_bin_buffer; + ReadFileToBuffer(generated_bin_path.c_str(), generated_bin_buffer); + + ASSERT_EQ(external_bin_buffer, generated_bin_buffer); + } + + // Cleanup. + ASSERT_EQ(std::remove(optimized_file_path.c_str()), 0); + ASSERT_EQ(std::remove(generated_bin_path.c_str()), 0); +} + +// Load external bin file using mmap +// Several external initializers from same file +// Use offset from tensor proto to locate the buffer location +TEST(CApiTest, TestLoadModelFromArrayWithExternalInitializersFromFileMmap) { + std::string model_file_name = "conv_qdq_external_ini.onnx"; + std::string external_bin_name = "conv_qdq_external_ini.bin"; + TestLoadModelFromArrayWithExternalInitializerFromFileMmap(model_file_name, external_bin_name); +} + #endif } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/testdata/conv_qdq_external_ini.bin b/onnxruntime/test/testdata/conv_qdq_external_ini.bin index e749ab5af29c58c932a40d432f236cf3ae5d5be3..89eea0dba1fa41685be9304369c327dc3b235b5b 100644 GIT binary patch delta 7 Ocmcb>|A2kN17-jYU;|nJ delta 28 YcmaFBeu00(17?nWQ?6Sx00A-^0Hn$Z#Q*>R diff --git a/onnxruntime/test/testdata/conv_qdq_external_ini.onnx b/onnxruntime/test/testdata/conv_qdq_external_ini.onnx index fad6074aea133f2d5c3ecbf5b597bae70bdde4ce..c53e1f3ad4d9b7b1cf6bcf43ec2657b3a62b56ea 100644 GIT binary patch delta 331 zcmbOucu!!0B=E?{KhYGIuGflUl3 znaGbSv1sDDX=p;TC(8DtX@A5xS%F<*vJF$1m?#%VVrfZ!d_iK05}TiYNU&?j0!HS^ z6Peb2cxErIT+mH!)^Tj$u(^N@AE?$D+Z_&Bd0VmR6ivBE&d( z3yUjTGKYcyo6|&jQK$k%Ubq5g3p0}}AU7v9FTEr~h}qD{qJU9LZE_r|0obc@o)7fUJ2u0~f$XM}x3fP5 E03sPyXaE2J delta 308 zcmca7Fh_8LB+DFruD;2;82eecrZRF(KF1K14!>Yl@$HkVPmR6iv zBE({7U@%#V)s-=6assOqNRkIxa`JjsQT8+rMgc}AsmbpdMJGRCWd$ixKvraFX_Upy z#g>zrmtK+~#As5$sFgkWEvqU>p&Y70Gc<*hrPy55RJj<9g%r5tI2eVbxOjxPk`haj zGvbRgt5Ug`I23?b3WO*3vxP9~P3#w&tj#VtIgZU}vJ1N@OO`&=lfQxfuVjA;0L?p4 AD*ylh diff --git a/orttraining/orttraining/models/bert/main.cc b/orttraining/orttraining/models/bert/main.cc index 3e7d9a0714..33d0d0346a 100644 --- a/orttraining/orttraining/models/bert/main.cc +++ b/orttraining/orttraining/models/bert/main.cc @@ -62,6 +62,7 @@ static SessionOptions session_options = { {}, // initializers_to_share_map #if !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_EXTERNAL_INITIALIZERS) {}, // external_initializers + {}, // external_initializer_files #endif nullptr, // custom_create_thread_fn nullptr, // custom_thread_creation_options diff --git a/orttraining/orttraining/models/pipeline_poc/main.cc b/orttraining/orttraining/models/pipeline_poc/main.cc index d1e09265f3..c461e4bbf3 100644 --- a/orttraining/orttraining/models/pipeline_poc/main.cc +++ b/orttraining/orttraining/models/pipeline_poc/main.cc @@ -109,6 +109,7 @@ int main(int argc, char* argv[]) { {}, // initializers_to_share_map #if !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_EXTERNAL_INITIALIZERS) {}, // external_initializers + {}, // external_initializer_files #endif nullptr, // custom_create_thread_fn nullptr, // custom_thread_creation_options diff --git a/orttraining/orttraining/models/runner/training_runner.cc b/orttraining/orttraining/models/runner/training_runner.cc index 9ac9f3ee09..15c74d4092 100644 --- a/orttraining/orttraining/models/runner/training_runner.cc +++ b/orttraining/orttraining/models/runner/training_runner.cc @@ -57,6 +57,7 @@ static SessionOptions SESSION_OPTION = { {}, // initializers_to_share_map #if !defined(ORT_MINIMAL_BUILD) && !defined(DISABLE_EXTERNAL_INITIALIZERS) {}, // external_initializers + {}, // external_initializer_files #endif nullptr, // custom_create_thread_fn nullptr, // custom_thread_creation_options