mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
Implement numpy array over CPU OrtValues on return values (#20539)
### Description Create numpy arrays based on the native buffers of returned OrtValues. Hold on to the OrtValue until the numpy array is garbage collected. ### Motivation and Context This saves cpu on tensor copies and addresses customer concerns.
This commit is contained in:
parent
156d52163d
commit
08ecf30e0b
7 changed files with 174 additions and 117 deletions
|
|
@ -646,7 +646,7 @@ class IOBinding:
|
|||
return self._iobinding.get_outputs()
|
||||
|
||||
def copy_outputs_to_cpu(self):
|
||||
"""Copy output contents to CPU (if on another device). No-op if already on the CPU."""
|
||||
"""Copy output contents to CPU."""
|
||||
return self._iobinding.copy_outputs_to_cpu()
|
||||
|
||||
def clear_binding_inputs(self):
|
||||
|
|
|
|||
|
|
@ -161,23 +161,27 @@ void addIoBindingMethods(pybind11::module& m) {
|
|||
return io_binding->Get()->GetOutputs();
|
||||
},
|
||||
py::return_value_policy::reference_internal)
|
||||
.def("copy_outputs_to_cpu", [](const SessionIOBinding* io_binding) -> std::vector<py::object> {
|
||||
.def("copy_outputs_to_cpu", [](const SessionIOBinding* io_binding) -> py::list {
|
||||
const std::vector<OrtValue>& outputs = io_binding->Get()->GetOutputs();
|
||||
std::vector<py::object> rfetch;
|
||||
rfetch.reserve(outputs.size());
|
||||
|
||||
size_t pos = 0;
|
||||
const auto& dtm = io_binding->GetInferenceSession()->GetDataTransferManager();
|
||||
|
||||
py::list result;
|
||||
for (const auto& ort_value : outputs) {
|
||||
if (ort_value.IsTensor()) {
|
||||
rfetch.push_back(AddTensorAsPyObj(ort_value, &dtm, nullptr));
|
||||
// We make a copy of the tensor to CPU even if it is already on CPU
|
||||
// as the function name implies using DataTransferManager.
|
||||
py::array arr = PrimitiveTensorToNumpyFromDevice(ort_value, &dtm);
|
||||
result.append(py::cast<py::object>(arr));
|
||||
} else if (ort_value.IsSparseTensor()) {
|
||||
rfetch.push_back(GetPyObjectFromSparseTensor(pos, ort_value, &dtm));
|
||||
result.append(GetPyObjectFromSparseTensor(pos, ort_value, &dtm));
|
||||
} else {
|
||||
rfetch.push_back(AddNonTensorAsPyObj(ort_value, &dtm, nullptr));
|
||||
result.append(AddNonTensorAsPyObj(ort_value, &dtm, nullptr));
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
return rfetch;
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@
|
|||
#include "core/framework/ort_value.h"
|
||||
#include "core/session/inference_session.h"
|
||||
|
||||
#include <variant>
|
||||
|
||||
PYBIND11_MAKE_OPAQUE(std::vector<OrtValue>);
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
@ -40,6 +42,8 @@ MLDataType NumpyTypeToOnnxRuntimeTensorType(int numpy_type);
|
|||
|
||||
using MemCpyFunc = void (*)(void*, const void*, size_t);
|
||||
|
||||
using DataTransferAlternative = std::variant<const DataTransferManager*, MemCpyFunc>;
|
||||
|
||||
void CpuToCpuMemCpy(void*, const void*, size_t);
|
||||
|
||||
void CopyDataToTensor(const pybind11::array& py_array, int npy_type, Tensor& tensor, MemCpyFunc mem_cpy_to_device = CpuToCpuMemCpy);
|
||||
|
|
@ -117,9 +121,42 @@ void CreateGenericMLValue(const onnxruntime::InputDefList* input_def_list, const
|
|||
const std::string& name_input, const pybind11::object& value, OrtValue* p_mlvalue,
|
||||
bool accept_only_numpy_array = false, bool use_numpy_data_memory = true, MemCpyFunc mem_cpy_to_device = CpuToCpuMemCpy);
|
||||
|
||||
void GetPyObjFromTensor(const Tensor& rtensor, pybind11::object& obj,
|
||||
const DataTransferManager* data_transfer_manager = nullptr,
|
||||
const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* mem_cpy_to_host_functions = nullptr);
|
||||
pybind11::object GetPyObjFromTensor(const OrtValue& rtensor,
|
||||
const DataTransferManager* data_transfer_manager = nullptr,
|
||||
const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* mem_cpy_to_host_functions = nullptr);
|
||||
|
||||
// The below two functions are used to convert OrtValue to numpy arrays
|
||||
|
||||
/// <summary>
|
||||
/// This function operates on string tensors. Strings are always
|
||||
/// copied to python and converted to UTF-16/UCS-4/32 depending on the platform.
|
||||
/// This is accomplished using py::cast()
|
||||
///
|
||||
/// It is an error to pass a non-tensor or a non-string tensor to this function.
|
||||
/// </summary>
|
||||
/// <param name="tensor">Tensor that contains strings</param>
|
||||
/// <returns>py::array object</returns>
|
||||
pybind11::array StringTensorToNumpyArray(const Tensor& tensor);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a numpy array with shape over OrtValue memory. Numpy array
|
||||
/// does not own the memory, but it holds a copy or OrtValue in a py::capsule.
|
||||
/// OrtValue is destroyed when the numpy array is garbage collected.
|
||||
/// This is used when the OrtValue memory is on CPU.
|
||||
/// </summary>
|
||||
/// <param name="ort_value">OrtValue with data</param>
|
||||
/// <returns>numpy array</returns>
|
||||
pybind11::array PrimitiveTensorToNumpyOverOrtValue(const OrtValue& ort_value);
|
||||
|
||||
/// <summary>
|
||||
/// Creates a numpy array with shape with a copy of OrtValue data.
|
||||
/// This function is used when the OrtValue memory is not on CPU.
|
||||
/// </summary>
|
||||
/// <param name="ort_value">Source memory that is not on CPU.</param>
|
||||
/// <param name="data_transfer">a variant encapsulating alternatives for copying data</param>
|
||||
/// <returns></returns>
|
||||
pybind11::array PrimitiveTensorToNumpyFromDevice(const OrtValue& ort_value,
|
||||
const DataTransferAlternative& data_transfer);
|
||||
|
||||
template <class T>
|
||||
struct DecRefFn {
|
||||
|
|
|
|||
|
|
@ -233,20 +233,20 @@ void addOrtValueMethods(pybind11::module& m) {
|
|||
#endif
|
||||
})
|
||||
.def("shape", [](const OrtValue* ort_value) -> py::list {
|
||||
py::list shape_arr;
|
||||
#if !defined(DISABLE_SPARSE_TENSORS)
|
||||
// OrtValue can only be a Tensor/SparseTensor, make this generic to handle non-Tensors
|
||||
ORT_ENFORCE(ort_value->IsTensor() || ort_value->IsSparseTensor(),
|
||||
"Only OrtValues that are Tensors/SpareTensors are currently supported");
|
||||
|
||||
const auto& dims = (ort_value->IsTensor())
|
||||
? ort_value->Get<Tensor>().Shape().GetDims()
|
||||
: ort_value->Get<SparseTensor>().DenseShape().GetDims();
|
||||
const auto dims = (ort_value->IsTensor())
|
||||
? ort_value->Get<Tensor>().Shape().GetDims()
|
||||
: ort_value->Get<SparseTensor>().DenseShape().GetDims();
|
||||
#else
|
||||
ORT_ENFORCE(ort_value->IsTensor(), "Only OrtValues that are Tensors are supported in this build");
|
||||
const auto& dims = ort_value->Get<Tensor>().Shape().GetDims();
|
||||
const auto dims = ort_value->Get<Tensor>().Shape().GetDims();
|
||||
#endif
|
||||
|
||||
py::list shape_arr;
|
||||
for (auto dim : dims) {
|
||||
// For sequence tensors - we would append a list of dims to the outermost list
|
||||
// For now only tensors are supported in OrtValue
|
||||
|
|
@ -302,18 +302,16 @@ void addOrtValueMethods(pybind11::module& m) {
|
|||
.def("numpy", [](const OrtValue* ml_value) -> py::object {
|
||||
ORT_ENFORCE(ml_value->IsTensor(), "Only OrtValues that are Tensors are convertible to Numpy objects");
|
||||
|
||||
py::object obj;
|
||||
|
||||
#ifdef USE_CUDA
|
||||
GetPyObjFromTensor(ml_value->Get<Tensor>(), obj, nullptr, GetCudaToHostMemCpyFunction());
|
||||
py::object obj = GetPyObjFromTensor(*ml_value, nullptr, GetCudaToHostMemCpyFunction());
|
||||
#elif USE_ROCM
|
||||
GetPyObjFromTensor(ml_value->Get<Tensor>(), obj, nullptr, GetRocmToHostMemCpyFunction());
|
||||
py::object obj = GetPyObjFromTensor(*ml_value, nullptr, GetRocmToHostMemCpyFunction());
|
||||
#elif USE_CANN
|
||||
GetPyObjFromTensor(ml_value->Get<Tensor>(), obj, nullptr, GetCannToHostMemCpyFunction());
|
||||
py::object obj = GetPyObjFromTensor(*ml_value, nullptr, GetCannToHostMemCpyFunction());
|
||||
#elif USE_DML
|
||||
GetPyObjFromTensor(ml_value->Get<Tensor>(), obj, nullptr, GetDmlToHostMemCpyFunction());
|
||||
py::object obj = GetPyObjFromTensor(*ml_value, nullptr, GetDmlToHostMemCpyFunction());
|
||||
#else
|
||||
GetPyObjFromTensor(ml_value->Get<Tensor>(), obj, nullptr, nullptr);
|
||||
py::object obj = GetPyObjFromTensor(*ml_value, nullptr, nullptr);
|
||||
#endif
|
||||
return obj;
|
||||
})
|
||||
|
|
|
|||
|
|
@ -305,18 +305,7 @@ void addSparseTensorMethods(pybind11::module& m) {
|
|||
if (sparse_tensor.IsDataTypeString()) {
|
||||
// Strings can not be on GPU and require conversion UTF-8 to Python UNICODE
|
||||
// We need to create a copy.
|
||||
const int numpy_type = OnnxRuntimeTensorToNumpyType(DataTypeImpl::GetType<std::string>());
|
||||
ORT_ENFORCE(NPY_OBJECT == numpy_type, "We are expecting to map strings to NPY_OBJECT type");
|
||||
const auto& values_shape = sparse_tensor.Values().Shape();
|
||||
py::dtype dtype("object");
|
||||
py::array result(dtype, values_shape.GetDims(), {});
|
||||
auto* out_ptr = static_cast<py::object*>(
|
||||
PyArray_DATA(reinterpret_cast<PyArrayObject*>(result.ptr())));
|
||||
const std::string* src = sparse_tensor.Values().Data<std::string>();
|
||||
for (int64_t i = 0, size = values_shape.Size(); i < size; ++i, src++) {
|
||||
out_ptr[i] = py::cast(*src);
|
||||
}
|
||||
return result;
|
||||
return StringTensorToNumpyArray(sparse_tensor.Values());
|
||||
} else {
|
||||
utils::MLTypeCallDispatcher<float, double, int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t>
|
||||
t_disp(sparse_tensor.GetElementType());
|
||||
|
|
@ -386,7 +375,7 @@ void addSparseTensorMethods(pybind11::module& m) {
|
|||
})
|
||||
.def("dense_shape", [](const PySparseTensor* py_tensor) -> py::list {
|
||||
const SparseTensor& st = py_tensor->Instance();
|
||||
const auto& dims = st.DenseShape().GetDims();
|
||||
const auto dims = st.DenseShape().GetDims();
|
||||
// We create a copy of dimensions, it is small
|
||||
py::list py_dims;
|
||||
for (auto d : dims) {
|
||||
|
|
|
|||
|
|
@ -166,66 +166,96 @@ static py::object AddNonTensor(const OrtValue& val,
|
|||
return py::cast(val.Get<T>());
|
||||
}
|
||||
|
||||
// This function is used to return strings from a string tensor to python
|
||||
// as a numpy array of strings
|
||||
// Strings are always on CPU and must always be copied to python memory
|
||||
py::array StringTensorToNumpyArray(const Tensor& tensor) {
|
||||
// Create the result and allocate memory with the right size
|
||||
py::array result(py::dtype(NPY_OBJECT), tensor.Shape().GetDims());
|
||||
const auto span = tensor.DataAsSpan<std::string>();
|
||||
auto* mutable_data = reinterpret_cast<py::object*>(result.mutable_data());
|
||||
for (size_t i = 0, lim = span.size(); i < lim; ++i) {
|
||||
mutable_data[i] = py::cast(span[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
pybind11::array PrimitiveTensorToNumpyOverOrtValue(const OrtValue& ort_value) {
|
||||
const Tensor& tensor = ort_value.Get<Tensor>();
|
||||
// The capsule destructor must be stateless
|
||||
// We create a copy of OrtValue on the heap.
|
||||
auto memory_release = [](void* data) {
|
||||
auto* ort_value = reinterpret_cast<OrtValue*>(data);
|
||||
delete ort_value;
|
||||
};
|
||||
|
||||
const int numpy_type = OnnxRuntimeTensorToNumpyType(tensor.DataType());
|
||||
auto ort_value_ptr = std::make_unique<OrtValue>(ort_value);
|
||||
pybind11::capsule caps(ort_value_ptr.get(), memory_release);
|
||||
ort_value_ptr.release();
|
||||
|
||||
// Not using array_t<T> because it may not handle MLFloat16 properly
|
||||
pybind11::array result(py::dtype(numpy_type), tensor.Shape().GetDims(),
|
||||
tensor.DataRaw(),
|
||||
caps);
|
||||
return result;
|
||||
}
|
||||
|
||||
pybind11::array PrimitiveTensorToNumpyFromDevice(const OrtValue& ort_value, const DataTransferAlternative& dtm) {
|
||||
const Tensor& tensor = ort_value.Get<Tensor>();
|
||||
const int numpy_type = OnnxRuntimeTensorToNumpyType(tensor.DataType());
|
||||
pybind11::array result(py::dtype(numpy_type), tensor.Shape().GetDims());
|
||||
void* data = result.mutable_data();
|
||||
|
||||
if (std::holds_alternative<const DataTransferManager*>(dtm)) {
|
||||
const DataTransferManager* data_transfer = std::get<const DataTransferManager*>(dtm);
|
||||
static const OrtMemoryInfo cpu_alloc_info{onnxruntime::CPU, OrtDeviceAllocator};
|
||||
const auto span = gsl::make_span<char>(reinterpret_cast<char*>(data), tensor.SizeInBytes());
|
||||
ORT_THROW_IF_ERROR(CopyTensorDataToByteSpan(*data_transfer, tensor, cpu_alloc_info, span));
|
||||
} else {
|
||||
std::get<MemCpyFunc>(dtm)(data, tensor.DataRaw(), tensor.SizeInBytes());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// In all cases, we may not have access to a DataTransferManager, hence the user may specify functions that
|
||||
// pretty much does what a DataTransferManager does - copy data from device(s) to the host
|
||||
void GetPyObjFromTensor(const Tensor& rtensor, py::object& obj,
|
||||
const DataTransferManager* data_transfer_manager,
|
||||
const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* mem_cpy_to_host_functions) {
|
||||
std::vector<npy_intp> npy_dims;
|
||||
const TensorShape& shape = rtensor.Shape();
|
||||
py::object GetPyObjFromTensor(const OrtValue& ort_value,
|
||||
const DataTransferManager* data_transfer_manager,
|
||||
const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* mem_cpy_to_host_functions) {
|
||||
ORT_ENFORCE(ort_value.IsTensor(), "This function only supports tensors");
|
||||
|
||||
for (size_t n = 0; n < shape.NumDimensions(); ++n) {
|
||||
npy_dims.push_back(shape[n]);
|
||||
const auto& tensor = ort_value.Get<Tensor>();
|
||||
if (tensor.IsDataTypeString()) {
|
||||
ORT_ENFORCE(tensor.Location().device.Type() == OrtDevice::CPU, "Strings can only be on CPU");
|
||||
// Create a numpy array of strings (python objects) by copy/converting them
|
||||
py::array result = StringTensorToNumpyArray(tensor);
|
||||
return py::cast<py::object>(result);
|
||||
}
|
||||
|
||||
MLDataType dtype = rtensor.DataType();
|
||||
const int numpy_type = OnnxRuntimeTensorToNumpyType(dtype);
|
||||
obj = py::reinterpret_steal<py::object>(PyArray_SimpleNew(
|
||||
narrow<int>(shape.NumDimensions()), npy_dims.data(), numpy_type));
|
||||
const auto device_type = tensor.Location().device.Type();
|
||||
// Create an numpy array on top of the OrtValue memory, no copy
|
||||
if (device_type == OrtDevice::CPU) {
|
||||
py::array result = PrimitiveTensorToNumpyOverOrtValue(ort_value);
|
||||
return py::cast<py::object>(result);
|
||||
}
|
||||
|
||||
void* out_ptr = static_cast<void*>(
|
||||
PyArray_DATA(reinterpret_cast<PyArrayObject*>(obj.ptr())));
|
||||
if (!data_transfer_manager && !mem_cpy_to_host_functions) {
|
||||
throw std::runtime_error(
|
||||
"GetPyObjFromTensor: Either data transfer manager or a "
|
||||
"function to copy data to the host is needed to convert non-CPU tensor to numpy array");
|
||||
}
|
||||
|
||||
if (numpy_type != NPY_OBJECT) {
|
||||
// if it is not cpu tensor, need to copy to host
|
||||
auto device_type = rtensor.Location().device.Type();
|
||||
if (device_type != OrtDevice::CPU) {
|
||||
if (!data_transfer_manager && !mem_cpy_to_host_functions)
|
||||
throw std::runtime_error(
|
||||
"GetPyObjFromTensor: Either data transfer manager or a "
|
||||
"function to copy data to the host is needed to convert non-CPU tensor to numpy array");
|
||||
static const OrtMemoryInfo cpu_alloc_info{onnxruntime::CPU, OrtDeviceAllocator};
|
||||
|
||||
// Prefer DataTransferManager if available
|
||||
if (data_transfer_manager) {
|
||||
auto span = gsl::make_span<char>(reinterpret_cast<char*>(out_ptr), dtype->Size() * shape.Size());
|
||||
ORT_THROW_IF_ERROR(CopyTensorDataToByteSpan(
|
||||
*data_transfer_manager, rtensor, cpu_alloc_info, span));
|
||||
} else {
|
||||
auto mem_cpy_to_host = mem_cpy_to_host_functions->find(device_type);
|
||||
|
||||
ORT_ENFORCE(mem_cpy_to_host != mem_cpy_to_host_functions->end(),
|
||||
"Unable to locate a function that can copy data to the host from the device");
|
||||
|
||||
ORT_ENFORCE(mem_cpy_to_host->second != 0,
|
||||
"No function that can copy data to the host from the device provided");
|
||||
|
||||
mem_cpy_to_host->second(out_ptr, rtensor.DataRaw(), dtype->Size() * shape.Size());
|
||||
}
|
||||
|
||||
} else
|
||||
memcpy(out_ptr, rtensor.DataRaw(dtype), dtype->Size() * shape.Size());
|
||||
py::array result;
|
||||
if (data_transfer_manager != nullptr) {
|
||||
result = PrimitiveTensorToNumpyFromDevice(ort_value, data_transfer_manager);
|
||||
} else {
|
||||
// Handle string type.
|
||||
// Copying strings to cpu from device is currently not supported
|
||||
ORT_ENFORCE(rtensor.Location().device.Type() == OrtDevice::CPU,
|
||||
"Copying string tensors located on another device to the host is currently not supported");
|
||||
py::object* outObj = static_cast<py::object*>(out_ptr);
|
||||
const std::string* src = rtensor.Data<std::string>();
|
||||
for (int i = 0; i < rtensor.Shape().Size(); i++, src++) {
|
||||
outObj[i] = py::cast(*src);
|
||||
}
|
||||
auto mem_cpy_to_host = mem_cpy_to_host_functions->find(device_type);
|
||||
ORT_ENFORCE(mem_cpy_to_host != mem_cpy_to_host_functions->end(),
|
||||
"Unable to locate a function that can copy data to the host from the device");
|
||||
result = PrimitiveTensorToNumpyFromDevice(ort_value, mem_cpy_to_host->second);
|
||||
}
|
||||
return py::cast<py::object>(result);
|
||||
}
|
||||
|
||||
const char* GetDeviceName(const OrtDevice& device) {
|
||||
|
|
@ -292,10 +322,9 @@ py::object AddNonTensor<TensorSeq>(const OrtValue& val,
|
|||
const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* mem_cpy_to_host_functions) {
|
||||
const auto& seq_tensors = val.Get<TensorSeq>();
|
||||
py::list py_list;
|
||||
for (const auto& rtensor : seq_tensors) {
|
||||
py::object obj;
|
||||
GetPyObjFromTensor(rtensor.Get<Tensor>(), obj, data_transfer_manager, mem_cpy_to_host_functions);
|
||||
py_list.append(obj);
|
||||
for (const auto& ort_value : seq_tensors) {
|
||||
py::object obj = GetPyObjFromTensor(ort_value, data_transfer_manager, mem_cpy_to_host_functions);
|
||||
py_list.append(std::move(obj));
|
||||
}
|
||||
// XToolChain kills the build
|
||||
// local variable 'py_list' will be copied despite being returned by name [-Werror,-Wreturn-std-move]
|
||||
|
|
@ -347,10 +376,7 @@ py::object AddNonTensorAsPyObj(const OrtValue& val,
|
|||
|
||||
py::object AddTensorAsPyObj(const OrtValue& val, const DataTransferManager* data_transfer_manager,
|
||||
const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* mem_cpy_to_host_functions) {
|
||||
const Tensor& rtensor = val.Get<Tensor>();
|
||||
py::object obj;
|
||||
GetPyObjFromTensor(rtensor, obj, data_transfer_manager, mem_cpy_to_host_functions);
|
||||
return obj;
|
||||
return GetPyObjFromTensor(val, data_transfer_manager, mem_cpy_to_host_functions);
|
||||
}
|
||||
|
||||
static std::unique_ptr<onnxruntime::IExecutionProvider> LoadExecutionProvider(
|
||||
|
|
@ -1863,11 +1889,12 @@ including arg name, arg type (contains both type and shape).)pbdoc")
|
|||
},
|
||||
R"pbdoc(Load a model saved in ONNX or ORT format.)pbdoc")
|
||||
.def("run",
|
||||
[](PyInferenceSession* sess, std::vector<std::string> output_names,
|
||||
std::map<std::string, py::object> pyfeeds, RunOptions* run_options = nullptr)
|
||||
-> std::vector<py::object> {
|
||||
[](PyInferenceSession* sess, const std::vector<std::string>& output_names,
|
||||
const std::map<std::string, const py::object>& pyfeeds, RunOptions* run_options = nullptr)
|
||||
-> py::list {
|
||||
NameMLValMap feeds;
|
||||
for (auto feed : pyfeeds) {
|
||||
feeds.reserve(pyfeeds.size());
|
||||
for (const auto& feed : pyfeeds) {
|
||||
// No need to process 'None's sent in by the user
|
||||
// to feed Optional inputs in the graph.
|
||||
// We just won't include anything in the feed and ORT
|
||||
|
|
@ -1885,6 +1912,7 @@ including arg name, arg type (contains both type and shape).)pbdoc")
|
|||
}
|
||||
|
||||
std::vector<OrtValue> fetches;
|
||||
fetches.reserve(output_names.size());
|
||||
common::Status status;
|
||||
|
||||
{
|
||||
|
|
@ -1897,29 +1925,28 @@ including arg name, arg type (contains both type and shape).)pbdoc")
|
|||
}
|
||||
}
|
||||
|
||||
std::vector<py::object> rfetch;
|
||||
rfetch.reserve(fetches.size());
|
||||
py::list result;
|
||||
size_t pos = 0;
|
||||
for (auto fet : fetches) {
|
||||
for (const auto& fet : fetches) {
|
||||
if (fet.IsAllocated()) {
|
||||
if (fet.IsTensor()) {
|
||||
rfetch.push_back(AddTensorAsPyObj(fet, nullptr, nullptr));
|
||||
result.append(AddTensorAsPyObj(fet, nullptr, nullptr));
|
||||
} else if (fet.IsSparseTensor()) {
|
||||
rfetch.push_back(GetPyObjectFromSparseTensor(pos, fet, nullptr));
|
||||
result.append(GetPyObjectFromSparseTensor(pos, fet, nullptr));
|
||||
} else {
|
||||
rfetch.push_back(AddNonTensorAsPyObj(fet, nullptr, nullptr));
|
||||
result.append(AddNonTensorAsPyObj(fet, nullptr, nullptr));
|
||||
}
|
||||
} else { // Send back None because the corresponding OrtValue was empty
|
||||
rfetch.push_back(py::none());
|
||||
result.append(py::none());
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
return rfetch;
|
||||
return result;
|
||||
})
|
||||
.def("run_async",
|
||||
[](PyInferenceSession* sess,
|
||||
std::vector<std::string> output_names,
|
||||
std::map<std::string, py::object> pyfeeds,
|
||||
const std::vector<std::string>& output_names,
|
||||
const std::map<std::string, py::object>& pyfeeds,
|
||||
PyCallback callback, py::object user_data = {},
|
||||
RunOptions* run_options = nullptr)
|
||||
-> void {
|
||||
|
|
@ -1928,7 +1955,7 @@ including arg name, arg type (contains both type and shape).)pbdoc")
|
|||
async_resource->user_data = user_data;
|
||||
// prepare feeds
|
||||
async_resource->ReserveFeeds(pyfeeds.size());
|
||||
for (auto feed : pyfeeds) {
|
||||
for (const auto& feed : pyfeeds) {
|
||||
if (!feed.second.is(py::none())) {
|
||||
OrtValue ml_value;
|
||||
auto px = sess->GetSessionHandle()->GetModelInputs();
|
||||
|
|
@ -1945,7 +1972,7 @@ including arg name, arg type (contains both type and shape).)pbdoc")
|
|||
}
|
||||
// prepare fetches
|
||||
async_resource->ReserveFetches(output_names.size());
|
||||
for (auto& output_name : output_names) {
|
||||
for (const auto& output_name : output_names) {
|
||||
async_resource->fetch_names.push_back(output_name);
|
||||
async_resource->fetch_names_raw.push_back(async_resource->fetch_names.back().c_str());
|
||||
async_resource->fetches_raw.push_back({});
|
||||
|
|
@ -1968,15 +1995,17 @@ including arg name, arg type (contains both type and shape).)pbdoc")
|
|||
/// a Tensor, SparseTensor or a TensorSequence.
|
||||
.def("run_with_ort_values", [](PyInferenceSession* sess, const py::dict& feeds, const std::vector<std::string>& output_names, RunOptions* run_options = nullptr) -> std::vector<OrtValue> {
|
||||
NameMLValMap ort_feeds;
|
||||
ort_feeds.reserve(feeds.size());
|
||||
// item is always a copy since dict returns a value and not a ref
|
||||
// and Apple XToolChain barks
|
||||
for (const auto item : feeds) {
|
||||
for (const auto& item : feeds) {
|
||||
auto name = item.first.cast<std::string>();
|
||||
const OrtValue* ort_value = item.second.cast<const OrtValue*>();
|
||||
ort_feeds.emplace(name, *ort_value);
|
||||
}
|
||||
|
||||
std::vector<OrtValue> fetches;
|
||||
fetches.reserve(output_names.size());
|
||||
{
|
||||
// release GIL to allow multiple python threads to invoke Run() in parallel.
|
||||
py::gil_scoped_release release;
|
||||
|
|
@ -2057,8 +2086,9 @@ including arg name, arg type (contains both type and shape).)pbdoc")
|
|||
})
|
||||
.def("get_tuning_results", [](PyInferenceSession* sess) -> py::list {
|
||||
#if !defined(ORT_MINIMAL_BUILD)
|
||||
auto results = sess->GetSessionHandle()->GetTuningResults();
|
||||
py::list ret;
|
||||
for (const auto& trs : sess->GetSessionHandle()->GetTuningResults()) {
|
||||
for (const auto& trs : results) {
|
||||
py::dict py_trs;
|
||||
py_trs["ep"] = trs.ep;
|
||||
py_trs["results"] = trs.results;
|
||||
|
|
|
|||
|
|
@ -232,10 +232,9 @@ std::unordered_map<std::string, std::unordered_map<std::string, py::object>> Con
|
|||
py_tensor_state[layer1_item.first] = {};
|
||||
for (const auto& layer2_item : layer1_item.second) {
|
||||
assert(layer2_item.second.IsTensor());
|
||||
py::object obj;
|
||||
const Tensor& rtensor = layer2_item.second.Get<Tensor>();
|
||||
GetPyObjFromTensor(rtensor, obj, &data_transfer_manager);
|
||||
py_tensor_state[layer1_item.first].insert({layer2_item.first, obj});
|
||||
py::array arr = PrimitiveTensorToNumpyFromDevice(layer2_item.second,
|
||||
&data_transfer_manager);
|
||||
py_tensor_state[layer1_item.first].insert({layer2_item.first, py::cast<py::object>(arr)});
|
||||
}
|
||||
}
|
||||
return py_tensor_state;
|
||||
|
|
|
|||
Loading…
Reference in a new issue