mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-19 21:32:23 +00:00
Direct use python numpy array's memory if already contiguous. (#2355)
* Direct use python numpy array's memory if already contiguous. This could greatly improve performance for session with large input, like big image 1920x1080 fastrcnn, 30~40% speed up could be achieved. * Add test case enforce contiguous/non-contiguos numpy array as inputs.
This commit is contained in:
parent
ed6da0d191
commit
aa37e2de8f
2 changed files with 84 additions and 58 deletions
|
|
@ -117,66 +117,72 @@ std::unique_ptr<Tensor> CreateTensor(AllocatorPtr alloc, const std::string& name
|
|||
|
||||
TensorShape shape(dims);
|
||||
auto element_type = NumpyToOnnxRuntimeTensorType(npy_type);
|
||||
p_tensor = onnxruntime::make_unique<Tensor>(element_type, shape, alloc);
|
||||
if (npy_type == NPY_UNICODE) {
|
||||
// Copy string data which needs to be done after Tensor is allocated.
|
||||
// Strings are Python strings or numpy.unicode string.
|
||||
std::string* dst = p_tensor->MutableData<std::string>();
|
||||
auto item_size = PyArray_ITEMSIZE(darray);
|
||||
auto num_chars = item_size / PyUnicode_4BYTE_KIND;
|
||||
char* src = static_cast<char*>(PyArray_DATA(darray));
|
||||
const char* str;
|
||||
Py_ssize_t size;
|
||||
PyObject* pStr;
|
||||
for (int i = 0; i < shape.Size(); i++, src += item_size) {
|
||||
// Python unicode strings are assumed to be USC-4. Strings are stored as UTF-8.
|
||||
pStr = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, src, num_chars);
|
||||
str = PyUnicode_AsUTF8AndSize(pStr, &size);
|
||||
if (str == NULL) {
|
||||
dst[i] = "";
|
||||
} else {
|
||||
// Size is equal to the longest string size, numpy stores
|
||||
// strings in a single array. Those code assumes a string ends with a final 0.
|
||||
dst[i] = str;
|
||||
}
|
||||
Py_XDECREF(pStr);
|
||||
}
|
||||
} else if (npy_type == NPY_STRING || npy_type == NPY_VOID) {
|
||||
// Copy string data which needs to be done after Tensor is allocated.
|
||||
// Strings are given as bytes (encoded strings).
|
||||
// NPY_VOID does not trim final 0.
|
||||
// NPY_STRING assumes bytes string ends with a final 0.
|
||||
std::string* dst = p_tensor->MutableData<std::string>();
|
||||
auto item_size = PyArray_ITEMSIZE(darray);
|
||||
char* src = static_cast<char*>(PyArray_DATA(darray));
|
||||
for (int i = 0; i < shape.Size(); i++, src += item_size) {
|
||||
if (npy_type == NPY_STRING) {
|
||||
dst[i] = src;
|
||||
} else {
|
||||
dst[i].resize(item_size);
|
||||
memcpy((void*)dst[i].c_str(), src, item_size);
|
||||
}
|
||||
}
|
||||
} else if (npy_type == NPY_OBJECT) {
|
||||
// Converts object into string.
|
||||
std::string* dst = p_tensor->MutableData<std::string>();
|
||||
auto item_size = PyArray_ITEMSIZE(darray);
|
||||
char* src = static_cast<char*>(PyArray_DATA(darray));
|
||||
PyObject *item, *pStr;
|
||||
for (int i = 0; i < shape.Size(); ++i, src += item_size) {
|
||||
// Python unicode strings are assumed to be USC-4. Strings are stored as UTF-8.
|
||||
item = PyArray_GETITEM(darray, src);
|
||||
pStr = PyObject_Str(item);
|
||||
dst[i] = py::reinterpret_borrow<py::str>(pStr);
|
||||
Py_XDECREF(pStr);
|
||||
}
|
||||
if (pyObject == darray && npy_type != NPY_UNICODE && npy_type != NPY_STRING &&
|
||||
npy_type != NPY_VOID && npy_type != NPY_OBJECT) {
|
||||
p_tensor = onnxruntime::make_unique<Tensor>(
|
||||
element_type, shape, static_cast<void*>(PyArray_DATA(darray)), alloc->Info());
|
||||
} else {
|
||||
void* buffer = p_tensor->MutableDataRaw();
|
||||
size_t len;
|
||||
if (!IAllocator::CalcMemSizeForArray(element_type->Size(), shape.Size(), &len)) {
|
||||
throw std::runtime_error("length overflow");
|
||||
p_tensor = onnxruntime::make_unique<Tensor>(element_type, shape, alloc);
|
||||
if (npy_type == NPY_UNICODE) {
|
||||
// Copy string data which needs to be done after Tensor is allocated.
|
||||
// Strings are Python strings or numpy.unicode string.
|
||||
std::string* dst = p_tensor->MutableData<std::string>();
|
||||
auto item_size = PyArray_ITEMSIZE(darray);
|
||||
auto num_chars = item_size / PyUnicode_4BYTE_KIND;
|
||||
char* src = static_cast<char*>(PyArray_DATA(darray));
|
||||
const char* str;
|
||||
Py_ssize_t size;
|
||||
PyObject* pStr;
|
||||
for (int i = 0; i < shape.Size(); i++, src += item_size) {
|
||||
// Python unicode strings are assumed to be USC-4. Strings are stored as UTF-8.
|
||||
pStr = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, src, num_chars);
|
||||
str = PyUnicode_AsUTF8AndSize(pStr, &size);
|
||||
if (str == NULL) {
|
||||
dst[i] = "";
|
||||
} else {
|
||||
// Size is equal to the longest string size, numpy stores
|
||||
// strings in a single array. Those code assumes a string ends with a final 0.
|
||||
dst[i] = str;
|
||||
}
|
||||
Py_XDECREF(pStr);
|
||||
}
|
||||
} else if (npy_type == NPY_STRING || npy_type == NPY_VOID) {
|
||||
// Copy string data which needs to be done after Tensor is allocated.
|
||||
// Strings are given as bytes (encoded strings).
|
||||
// NPY_VOID does not trim final 0.
|
||||
// NPY_STRING assumes bytes string ends with a final 0.
|
||||
std::string* dst = p_tensor->MutableData<std::string>();
|
||||
auto item_size = PyArray_ITEMSIZE(darray);
|
||||
char* src = static_cast<char*>(PyArray_DATA(darray));
|
||||
for (int i = 0; i < shape.Size(); i++, src += item_size) {
|
||||
if (npy_type == NPY_STRING) {
|
||||
dst[i] = src;
|
||||
} else {
|
||||
dst[i].resize(item_size);
|
||||
memcpy((void*)dst[i].c_str(), src, item_size);
|
||||
}
|
||||
}
|
||||
} else if (npy_type == NPY_OBJECT) {
|
||||
// Converts object into string.
|
||||
std::string* dst = p_tensor->MutableData<std::string>();
|
||||
auto item_size = PyArray_ITEMSIZE(darray);
|
||||
char* src = static_cast<char*>(PyArray_DATA(darray));
|
||||
PyObject *item, *pStr;
|
||||
for (int i = 0; i < shape.Size(); ++i, src += item_size) {
|
||||
// Python unicode strings are assumed to be USC-4. Strings are stored as UTF-8.
|
||||
item = PyArray_GETITEM(darray, src);
|
||||
pStr = PyObject_Str(item);
|
||||
dst[i] = py::reinterpret_borrow<py::str>(pStr);
|
||||
Py_XDECREF(pStr);
|
||||
}
|
||||
} else {
|
||||
void* buffer = p_tensor->MutableDataRaw();
|
||||
size_t len;
|
||||
if (!IAllocator::CalcMemSizeForArray(element_type->Size(), shape.Size(), &len)) {
|
||||
throw std::runtime_error("length overflow");
|
||||
}
|
||||
memcpy(buffer, static_cast<void*>(PyArray_DATA(darray)), len);
|
||||
}
|
||||
memcpy(buffer, static_cast<void*>(PyArray_DATA(darray)), len);
|
||||
}
|
||||
} catch (...) {
|
||||
if (!dref) {
|
||||
|
|
|
|||
|
|
@ -118,6 +118,26 @@ class TestInferenceSession(unittest.TestCase):
|
|||
np.testing.assert_allclose(
|
||||
output_expected, res[0], rtol=1e-05, atol=1e-08)
|
||||
|
||||
def testRunModel2Contiguous(self):
|
||||
sess = onnxrt.InferenceSession(self.get_name("matmul_1.onnx"))
|
||||
x = np.array([[2.0, 1.0], [4.0, 3.0], [6.0, 5.0]], dtype=np.float32)[:,[1,0]]
|
||||
input_name = sess.get_inputs()[0].name
|
||||
self.assertEqual(input_name, "X")
|
||||
input_shape = sess.get_inputs()[0].shape
|
||||
self.assertEqual(input_shape, [3, 2])
|
||||
output_name = sess.get_outputs()[0].name
|
||||
self.assertEqual(output_name, "Y")
|
||||
output_shape = sess.get_outputs()[0].shape
|
||||
self.assertEqual(output_shape, [3, 1])
|
||||
res = sess.run([output_name], {input_name: x})
|
||||
output_expected = np.array([[5.0], [11.0], [17.0]], dtype=np.float32)
|
||||
np.testing.assert_allclose(
|
||||
output_expected, res[0], rtol=1e-05, atol=1e-08)
|
||||
xcontiguous = np.ascontiguousarray(x)
|
||||
rescontiguous = sess.run([output_name], {input_name: xcontiguous})
|
||||
np.testing.assert_allclose(
|
||||
output_expected, rescontiguous[0], rtol=1e-05, atol=1e-08)
|
||||
|
||||
def testRunModelMultipleThreads(self):
|
||||
so = onnxrt.SessionOptions()
|
||||
so.log_verbosity_level = 1
|
||||
|
|
|
|||
Loading…
Reference in a new issue