// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include #include "serializing/mem_buffer.h" #include "serializing/tensorprotoutils.h" #include "onnx-ml.pb.h" #include "predict.pb.h" #include "converter.h" #include "executor.h" #include "util.h" namespace onnxruntime { namespace server { namespace protobufutil = google::protobuf::util; protobufutil::Status Executor::SetMLValue(const onnx::TensorProto& input_tensor, MemBufferArray& buffers, OrtMemoryInfo* cpu_memory_info, /* out */ Ort::Value& ml_value) { auto logger = env_->GetLogger(request_id_); size_t cpu_tensor_length = 0; try { onnxruntime::server::GetSizeInBytesFromTensorProto<0>(input_tensor, &cpu_tensor_length); } catch (const Ort::Exception& e) { logger->error("GetSizeInBytesFromTensorProto() failed. Error Message: {}", e.what()); return GenerateProtobufStatus(e.GetOrtErrorCode(), e.what()); } auto* buf = buffers.AllocNewBuffer(cpu_tensor_length); try { onnxruntime::server::TensorProtoToMLValue(input_tensor, onnxruntime::server::MemBuffer(buf, cpu_tensor_length, *cpu_memory_info), ml_value); } catch (const Ort::Exception& e) { logger->error("TensorProtoToMLValue() failed. Message: {}", e.what()); return GenerateProtobufStatus(e.GetOrtErrorCode(), e.what()); } return protobufutil::Status::OK; } protobufutil::Status Executor::SetNameMLValueMap(std::vector& input_names, std::vector& input_values, const onnxruntime::server::PredictRequest& request, MemBufferArray& buffers) { auto logger = env_->GetLogger(request_id_); OrtMemoryInfo* memory_info = nullptr; auto ort_status = Ort::GetApi().CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info); if (ort_status != nullptr || memory_info == nullptr) { logger->error("OrtCreateCpuMemoryInfo failed"); return protobufutil::Status(protobufutil::error::Code::RESOURCE_EXHAUSTED, "OrtCreateCpuMemoryInfo() failed"); } // Prepare the Value object for (const auto& input : request.inputs()) { using_raw_data_ = using_raw_data_ && input.second.has_raw_data(); Ort::Value ml_value{nullptr}; auto status = SetMLValue(input.second, buffers, memory_info, ml_value); if (status != protobufutil::Status::OK) { Ort::GetApi().ReleaseMemoryInfo(memory_info); logger->error("SetMLValue() failed! Input name: {}", input.first); return status; } input_names.push_back(input.first); input_values.push_back(std::move(ml_value)); } Ort::GetApi().ReleaseMemoryInfo(memory_info); return protobufutil::Status::OK; } std::vector Run(const Ort::Session& session, const Ort::RunOptions& options, const std::vector& input_names, const std::vector& input_values, const std::vector& output_names) { size_t input_count = input_names.size(); size_t output_count = output_names.size(); std::vector input_ptrs{}; input_ptrs.reserve(input_count); for (const auto& input : input_names) { input_ptrs.push_back(input.data()); } std::vector output_ptrs{}; output_ptrs.reserve(output_count); for (const auto& output : output_names) { output_ptrs.push_back(output.data()); } return const_cast(session).Run(options, input_ptrs.data(), const_cast(input_values.data()), input_count, output_ptrs.data(), output_count); } protobufutil::Status Executor::Predict(const std::string& model_name, const std::string& model_version, const onnxruntime::server::PredictRequest& request, /* out */ onnxruntime::server::PredictResponse& response) { auto logger = env_->GetLogger(request_id_); // Convert PredictRequest to NameMLValMap MemBufferArray buffer_array; std::vector input_names; std::vector input_values; auto conversion_status = SetNameMLValueMap(input_names, input_values, request, buffer_array); if (conversion_status != protobufutil::Status::OK) { return conversion_status; } Ort::RunOptions run_options{}; run_options.SetRunLogVerbosityLevel(static_cast(env_->GetLogSeverity())); run_options.SetRunTag(request_id_.c_str()); // Prepare the output names std::vector output_names; if (!request.output_filter().empty()) { output_names.reserve(request.output_filter_size()); for (const auto& name : request.output_filter()) { output_names.push_back(name); } } else { output_names = env_->GetModelOutputNames(model_name, model_version); } std::vector outputs; try { outputs = Run(env_->GetSession(model_name, model_version), run_options, input_names, input_values, output_names); } catch (const Ort::Exception& e) { return GenerateProtobufStatus(e.GetOrtErrorCode(), e.what()); } // Build the response for (size_t i = 0, sz = outputs.size(); i < sz; ++i) { onnx::TensorProto output_tensor{}; try { MLValueToTensorProto(outputs[i], using_raw_data_, logger, output_tensor); } catch (const Ort::Exception& e) { logger = env_->GetLogger(request_id_); logger->error("MLValueToTensorProto() failed. Output name: {}. Error Message: {}", output_names[i], e.what()); return GenerateProtobufStatus(e.GetOrtErrorCode(), e.what()); } auto insertion_result = response.mutable_outputs()->insert({output_names[i], output_tensor}); if (!insertion_result.second) { logger->error("SetNameMLValueMap() failed. Output name: {}. Trying to overwrite existing output value", output_names[i]); return protobufutil::Status(protobufutil::error::Code::INVALID_ARGUMENT, "SetNameMLValueMap() failed: Cannot have two outputs with the same name"); } } return protobufutil::Status::OK; } } // namespace server } // namespace onnxruntime