onnxruntime/onnxruntime/python/onnxruntime_pybind_sparse_tensor.cc
Scott McKay 912e50f61c
Add CI minimal build with all options disabled. Fix python binding code if sparse tensors are disabled. (#9898)
* Add 2 builds to validate the cmake defines for excluding optional components work in both full and minimal builds.

* Create empty config for no-ops build

* Create empty config for no-ops build - attempt #2

* Create empty config for no-ops build - attempt #3

* Update python binding code to work when sparse tensors are disabled.
2021-12-03 06:56:51 +10:00

435 lines
23 KiB
C++

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "onnxruntime_pybind_mlvalue.h"
#include "python/onnxruntime_pybind_state_common.h"
#include "pybind11/numpy.h"
#define NO_IMPORT_ARRAY
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#define PY_ARRAY_UNIQUE_SYMBOL onnxruntime_python_ARRAY_API
#include <numpy/arrayobject.h>
#include "core/framework/tensor_shape.h"
#include "core/framework/tensor.h"
#include "core/framework/sparse_tensor.h"
#include "core/framework/allocator.h"
#include "core/framework/data_types.h"
#include "core/framework/data_types_internal.h"
#include "core/providers/get_execution_providers.h"
#include "core/framework/kernel_registry.h"
#include "core/framework/provider_options_utils.h"
#include "core/session/provider_bridge_ort.h"
namespace onnxruntime {
namespace python {
namespace py = pybind11;
using namespace onnxruntime::logging;
#if !defined(DISABLE_SPARSE_TENSORS)
namespace {
// Create a pybind11:dtype numpy instance using ONNX Tensor Element Type
template <typename T>
struct MakeDType {
py::dtype operator()() const {
return py::dtype::of<T>();
}
};
/// <summary>
/// The function creates a numpy array that points to
/// data stored within the corresponing tensor. Parent object
/// holds a reference to the object that owns the data so it
/// does not disappear.
/// </summary>
/// <returns>numpy array</returns>
py::array MakeNumpyArrayFromIndices(const Tensor& indices, const py::object& parent) {
// See https://github.com/pybind/pybind11/issues/2271 for more information on parent
py::array result(indices.Shape().GetDims(), indices.Data<int64_t>(), parent);
assert(!result.owndata());
// Set a read-only flag
PyArray_CLEARFLAGS(reinterpret_cast<PyArrayObject*>(result.ptr()), NPY_ARRAY_WRITEABLE);
return result;
}
} // namespace
class PySparseCooView : public SparseTensor::CooView {
py::object parent_;
public:
PySparseCooView(const SparseTensor::CooView& view, const py::object& parent) noexcept
: SparseTensor::CooView(view), parent_(parent) {}
};
class PySparseCsrView : public SparseTensor::CsrView {
py::object parent_;
public:
PySparseCsrView(const SparseTensor::CsrView& view, const py::object& parent) noexcept
: SparseTensor::CsrView(view), parent_(parent) {}
};
class PySparseBlockSparseView : public SparseTensor::BlockSparseView {
py::object parent_;
public:
PySparseBlockSparseView(const SparseTensor::BlockSparseView& view, const py::object& parent) noexcept
: SparseTensor::BlockSparseView(view), parent_(parent) {}
};
#endif // !defined(DISABLE_SPARSE_TENSORS)
void addSparseTensorMethods(pybind11::module& m) {
// this is exported via __init__.py so has to exist
py::enum_<OrtSparseFormat>(m, "OrtSparseFormat")
.value("ORT_SPARSE_UNDEFINED", OrtSparseFormat::ORT_SPARSE_UNDEFINED)
.value("ORT_SPARSE_COO", OrtSparseFormat::ORT_SPARSE_COO)
.value("ORT_SPARSE_CSRC", OrtSparseFormat::ORT_SPARSE_CSRC)
.value("ORT_SPARSE_BLOCK_SPARSE", OrtSparseFormat::ORT_SPARSE_BLOCK_SPARSE);
#if !defined(DISABLE_SPARSE_TENSORS)
py::class_<PySparseCooView>(m, "SparseCooView")
// Returns a numpy array of COO indices backed by Sparse Tensor memory
// be aware that indices may reside on GPU if Sparse Tensor is on GPU
.def("indices", [](const PySparseCooView* view) -> py::array {
const auto& indices = view->Indices();
return MakeNumpyArrayFromIndices(indices, py::cast(*view));
});
py::class_<PySparseCsrView>(m, "SparseCsrView")
.def("inner", [](const PySparseCsrView* view) -> py::array {
const auto& indices = view->Inner();
return MakeNumpyArrayFromIndices(indices, py::cast(*view));
})
.def("outer", [](const PySparseCsrView* view) -> py::array {
const auto& indices = view->Outer();
return MakeNumpyArrayFromIndices(indices, py::cast(*view));
});
py::class_<PySparseBlockSparseView>(m, "SparseBlockSparseView")
.def("indices", [](const PySparseBlockSparseView* view) -> py::array {
const auto& indices = view->Indices();
return MakeNumpyArrayFromIndices(indices, py::cast(*view));
});
py::class_<PySparseTensor> sparse_bind(m, "SparseTensor");
// Factory method to create a COO Sparse Tensor from numpy arrays acting as backing storage.
// Numeric arrays memory is used as is with reference count increment. All other supported
// types are copied and supported only on CPU.
// Use numpy.ascontiguousarray() to obtain contiguous array of values and indices if necessary
// py_dense_shape - numpy dense shape of the sparse tensor
// py_values - contiguous and homogeneous numpy array of values
// py_indices - contiguous numpy array of int64_t indices
// ort_device - where the value and indices buffers are allocated. For non-primitive types,
// only cpu device is supported. There is not a way to verify that ort_device
// accurately describes the memory that is backing values and indices.
sparse_bind
.def_static("sparse_coo_from_numpy",
[](const std::vector<int64_t>& py_dense_shape,
const py::array& py_values,
const py::array_t<int64_t>& py_indices,
const OrtDevice& ort_device) -> std::unique_ptr<PySparseTensor> {
if (1 != py_values.ndim()) {
ORT_THROW("Expecting values 1-D numpy values array for COO format. Got dims: ", py_values.ndim());
}
TensorShape dense_shape(py_dense_shape);
auto values_type = GetNumpyArrayType(py_values);
auto ml_type = NumpyToOnnxRuntimeTensorType(values_type);
std::unique_ptr<PySparseTensor> result;
if (IsNumericNumpyType(values_type)) {
if (!PyArray_ISCONTIGUOUS(reinterpret_cast<PyArrayObject*>(py_values.ptr()))) {
throw std::runtime_error("Require contiguous numpy array of values");
}
if (!PyArray_ISCONTIGUOUS(reinterpret_cast<PyArrayObject*>(py_indices.ptr()))) {
throw std::runtime_error("Require contiguous numpy array of indices");
}
// create references to make sure storage does not disappear
std::vector<py::object> reference_holders = {py_values, py_indices};
OrtMemoryInfo mem_info = GetMemoryInfoPerDeviceType(ort_device);
TensorShape values_shape{py_values.size()};
auto sparse_tensor = std::make_unique<SparseTensor>(ml_type, dense_shape, values_shape,
const_cast<void*>(py_values.data()), mem_info);
auto index_span = gsl::make_span(const_cast<int64_t*>(py_indices.data()), py_indices.size());
ORT_THROW_IF_ERROR(sparse_tensor->UseCooIndices(index_span));
result = std::make_unique<PySparseTensor>(std::move(sparse_tensor), std::move(reference_holders));
} else if (values_type == NPY_UNICODE || values_type == NPY_STRING) {
if (ort_device.Type() != OrtDevice::CPU) {
throw std::runtime_error("Only CPU based devices are supported for non-numeric datatypes");
}
auto sparse_tensor = std::make_unique<SparseTensor>(ml_type, dense_shape, GetAllocator());
auto mutator = sparse_tensor->MakeCooData(py_values.size(), py_indices.size());
CopyDataToTensor(py_values, values_type, mutator.Values());
CopyDataToTensor(py_indices, GetNumpyArrayType(py_indices), mutator.Indices());
result = std::make_unique<PySparseTensor>(std::move(sparse_tensor));
} else {
ORT_THROW("Unsupported values data type: ", values_type);
}
return result;
})
// Factory method to create a CSR Sparse Tensor from numpy arrays acting as backing storage.
// Numeric arrays memory is used as is with reference count increment. All other supported
// types are copied and supported only on CPU.
// Use numpy.ascontiguousarray() to obtain contiguous array of values and indices if necessary
// py_dense_shape - numpy dense shape of the sparse tensor
// py_values - contiguous and homogeneous numpy array of values
// py_inner_indices - contiguous numpy array of int64_t indices
// py_outer_indices - contiguous numpy array of int64_t indices
// ort_device - where the value and indices buffers are allocated. For non-primitive types,
// only cpu device is supported. There is not a way to verify that ort_device
// accurately describes the memory that is backing values and indices.
.def_static(
"sparse_csr_from_numpy",
[](const std::vector<int64_t>& py_dense_shape,
const py::array& py_values,
const py::array_t<int64_t>& py_inner_indices,
const py::array_t<int64_t>& py_outer_indices,
const OrtDevice& ort_device) -> std::unique_ptr<PySparseTensor> {
if (1 != py_values.ndim() || 1 != py_inner_indices.ndim() || 1 != py_outer_indices.ndim()) {
ORT_THROW("Expecting all data to be 1-D numpy arrays for CSR format.");
}
TensorShape dense_shape(py_dense_shape);
auto values_type = GetNumpyArrayType(py_values);
auto ml_type = NumpyToOnnxRuntimeTensorType(values_type);
std::unique_ptr<PySparseTensor> result;
if (IsNumericNumpyType(values_type)) {
if (!PyArray_ISCONTIGUOUS(reinterpret_cast<PyArrayObject*>(py_values.ptr()))) {
throw std::runtime_error("Require contiguous numpy array of values");
}
if (!PyArray_ISCONTIGUOUS(reinterpret_cast<PyArrayObject*>(py_inner_indices.ptr()))) {
throw std::runtime_error("Require contiguous numpy array of indices");
}
if (!PyArray_ISCONTIGUOUS(reinterpret_cast<PyArrayObject*>(py_outer_indices.ptr()))) {
throw std::runtime_error("Require contiguous numpy array of indices");
}
// go ahead and create references to make sure storage does not disappear
std::vector<py::object> reference_holders = {py_values, py_inner_indices, py_outer_indices};
OrtMemoryInfo mem_info = GetMemoryInfoPerDeviceType(ort_device);
TensorShape values_shape{py_values.size()};
auto sparse_tensor = std::make_unique<SparseTensor>(ml_type, dense_shape, values_shape,
const_cast<void*>(py_values.data()), mem_info);
auto inner_span = gsl::make_span<int64_t>(const_cast<int64_t*>(py_inner_indices.data()), py_inner_indices.size());
auto outer_span = gsl::make_span<int64_t>(const_cast<int64_t*>(py_outer_indices.data()), py_outer_indices.size());
ORT_THROW_IF_ERROR(sparse_tensor->UseCsrIndices(inner_span, outer_span));
result = std::make_unique<PySparseTensor>(std::move(sparse_tensor), std::move(reference_holders));
} else if (values_type == NPY_UNICODE || values_type == NPY_STRING) {
if (ort_device.Type() != OrtDevice::CPU) {
throw std::runtime_error("Only CPU based devices are supported for non-numeric datatypes");
}
auto sparse_tensor = std::make_unique<SparseTensor>(ml_type, dense_shape, GetAllocator());
auto mutator = sparse_tensor->MakeCsrData(py_values.size(), py_inner_indices.size(), py_outer_indices.size());
CopyDataToTensor(py_values, values_type, mutator.Values());
CopyDataToTensor(py_inner_indices, GetNumpyArrayType(py_inner_indices), mutator.Inner());
CopyDataToTensor(py_outer_indices, GetNumpyArrayType(py_outer_indices), mutator.Outer());
result = std::make_unique<PySparseTensor>(std::move(sparse_tensor));
} else {
ORT_THROW("Unsupported values data type: ", values_type);
}
return result;
})
// Factory method to create a BlockSparse Tensor from numpy arrays acting as backing storage.
// Numeric arrays memory is used as is with reference count increment. All other supported
// types are copied and supported only on CPU.
// Use numpy.ascontiguousarray() to obtain contiguous array of values and indices if necessary
// py_dense_shape - numpy dense shape of the sparse tensor
// ort_device - desribes the allocation. Only primitive types allocations can be mapped to
// py_values - contiguous and homogeneous numpy array of values
// py_indices - contiguous numpy array of int32_t indices
// ort_device - where the value and indices buffers are allocated. For non-primitive types,
// only cpu device is supported. There is not a way to verify that ort_device
// accurately describes the memory that is backing values and indices.
.def_static(
"blocksparse_from_numpy",
[](const std::vector<int64_t>& py_dense_shape,
const py::array& py_values,
const py::array_t<int32_t>& py_indices,
const OrtDevice& ort_device) -> std::unique_ptr<PySparseTensor> {
TensorShape dense_shape(py_dense_shape);
TensorShape values_shape = GetShape(py_values);
TensorShape index_shape = GetShape(py_indices);
auto values_type = GetNumpyArrayType(py_values);
auto ml_type = NumpyToOnnxRuntimeTensorType(values_type);
std::unique_ptr<PySparseTensor> result;
if (IsNumericNumpyType(values_type)) {
if (!PyArray_ISCONTIGUOUS(reinterpret_cast<PyArrayObject*>(py_values.ptr()))) {
throw std::runtime_error("Require contiguous numpy array of values");
}
if (!PyArray_ISCONTIGUOUS(reinterpret_cast<PyArrayObject*>(py_indices.ptr()))) {
throw std::runtime_error("Require contiguous numpy array of indices");
}
// create references to make sure storage does not disappear
std::vector<py::object> reference_holders = {py_values, py_indices};
OrtMemoryInfo mem_info = GetMemoryInfoPerDeviceType(ort_device);
auto sparse_tensor = std::make_unique<SparseTensor>(ml_type, dense_shape, values_shape,
const_cast<void*>(py_values.data()), mem_info);
ORT_THROW_IF_ERROR(sparse_tensor->UseBlockSparseIndices(index_shape, const_cast<int32_t*>(py_indices.data())));
result = std::make_unique<PySparseTensor>(std::move(sparse_tensor), std::move(reference_holders));
} else if (values_type == NPY_UNICODE || values_type == NPY_STRING) {
if (ort_device.Type() != OrtDevice::CPU) {
throw std::runtime_error("Only CPU based devices are supported for non-numeric datatypes");
}
auto sparse_tensor = std::make_unique<SparseTensor>(ml_type, dense_shape, GetAllocator());
auto mutator = sparse_tensor->MakeBlockSparseData(values_shape, index_shape);
CopyDataToTensor(py_values, values_type, mutator.Values());
CopyDataToTensor(py_indices, GetNumpyArrayType(py_indices), mutator.Indices());
result = std::make_unique<PySparseTensor>(std::move(sparse_tensor));
} else {
ORT_THROW("Unsupported values data type: ", values_type);
}
return result;
})
// Returns a numpy array that is backed by SparseTensor values memory
// be aware that it may be on GPU
.def("values", [](const PySparseTensor* py_tensor) -> py::array {
const SparseTensor& sparse_tensor = py_tensor->Instance();
if (sparse_tensor.Format() == SparseFormat::kUndefined) {
ORT_THROW("This sparse tensor instance does not contain data");
}
if (sparse_tensor.IsDataTypeString()) {
// Strings can not be on GPU and require conversion UTF-8 to Python UNICODE
// We need to create a copy.
const int numpy_type = OnnxRuntimeTensorToNumpyType(DataTypeImpl::GetType<std::string>());
ORT_ENFORCE(NPY_OBJECT == numpy_type, "We are expecting to map strings to NPY_OBJECT type");
const auto& values_shape = sparse_tensor.Values().Shape();
py::dtype dtype("object");
py::array result(dtype, values_shape.GetDims(), {});
auto* out_ptr = static_cast<py::object*>(
PyArray_DATA(reinterpret_cast<PyArrayObject*>(result.ptr())));
const std::string* src = sparse_tensor.Values().Data<std::string>();
for (int64_t i = 0, size = values_shape.Size(); i < size; ++i, src++) {
out_ptr[i] = py::cast(*src);
}
return result;
} else {
utils::MLTypeCallDispatcher<float, double, int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t>
t_disp(sparse_tensor.GetElementType());
auto dtype = t_disp.InvokeRet<py::dtype, MakeDType>();
const auto& values = sparse_tensor.Values();
// See https://github.com/pybind/pybind11/issues/2271
py::array result(dtype, values.Shape().GetDims(), values.DataRaw(), py::cast(*py_tensor));
assert(!result.owndata());
// Set a read-only flag
PyArray_CLEARFLAGS(reinterpret_cast<PyArrayObject*>(result.ptr()), NPY_ARRAY_WRITEABLE);
return result;
}
})
// Returns a Coo view of data
.def("get_coo_data", [](const PySparseTensor* py_tensor) -> std::unique_ptr<PySparseCooView> {
const SparseTensor& sparse_tensor = py_tensor->Instance();
if (sparse_tensor.Format() != SparseFormat::kCoo) {
ORT_THROW("This sparse tensor does not contain COO format");
}
return std::make_unique<PySparseCooView>(sparse_tensor.AsCoo(), py::cast(*py_tensor));
})
// Returns a CSR view of data
.def("get_csrc_data", [](const PySparseTensor* py_tensor) -> std::unique_ptr<PySparseCsrView> {
const SparseTensor& sparse_tensor = py_tensor->Instance();
if (sparse_tensor.Format() != SparseFormat::kCsrc) {
ORT_THROW("This sparse tensor does not contain CSR(C) format");
}
return std::make_unique<PySparseCsrView>(sparse_tensor.AsCsr(), py::cast(*py_tensor));
})
// Returns a blocksparse view of data
.def("get_blocksparse_data", [](const PySparseTensor* py_tensor) -> std::unique_ptr<PySparseBlockSparseView> {
const SparseTensor& sparse_tensor = py_tensor->Instance();
if (sparse_tensor.Format() != SparseFormat::kBlockSparse) {
ORT_THROW("This sparse tensor does not contain BlockSparse format");
}
return std::make_unique<PySparseBlockSparseView>(sparse_tensor.AsBlockSparse(), py::cast(*py_tensor));
})
/// This will copy SparseTensor into a new instance on a specified CUDA device or throw:
/// - if this sparse tensor contains strings
/// - if this sparse tensor is already on GPU
/// - if CUDA is not present in this build
/// - if the specified device is not valid
#ifdef USE_CUDA
.def("to_cuda", [](const PySparseTensor* py_tensor, const OrtDevice& ort_device) -> std::unique_ptr<PySparseTensor> {
const SparseTensor& sparse_tensor = py_tensor->Instance();
if (sparse_tensor.IsDataTypeString()) {
ORT_THROW("Can not copy string tensor to GPU devices.");
}
if (sparse_tensor.Location().device.Type() == OrtDevice::GPU) {
ORT_THROW("This sparse_tensor is already allocated on cuda. Cross device copy not supported.");
}
if (!IsCudaDeviceIdValid(logging::LoggingManager::DefaultLogger(), ort_device.Id())) {
ORT_THROW("The provided device id doesn't match any available GPUs on the machine: ", ort_device.Id());
}
auto cuda_allocator = GetCudaAllocator(ort_device.Id());
auto gpu_transfer = GetGPUDataTransfer();
auto dest_tensor = std::make_unique<SparseTensor>(sparse_tensor.DataType(), sparse_tensor.DenseShape(),
std::move(cuda_allocator));
ORT_THROW_IF_ERROR(sparse_tensor.Copy(*gpu_transfer, *dest_tensor, 0));
auto result = std::make_unique<PySparseTensor>(std::move(dest_tensor));
return result;
#else
.def("to_cuda", [](const PySparseTensor*, const OrtDevice&) {
ORT_THROW("Cuda is not available in this build");
#endif // USE_CUDA
})
.def("dense_shape", [](const PySparseTensor* py_tensor) -> py::list {
const SparseTensor& st = py_tensor->Instance();
const auto& dims = st.DenseShape().GetDims();
// We create a copy of dimensions, it is small
py::list py_dims;
for (auto d : dims) {
py_dims.append(d);
}
return py_dims;
})
.def("device_name", [](const PySparseTensor* py_tensor) -> std::string {
return std::string(GetDeviceName(py_tensor->Instance().Location().device));
})
.def("data_type", [](const PySparseTensor* py_tensor) -> std::string {
const SparseTensor& tensor = py_tensor->Instance();
const auto elem_type = tensor.GetElementType();
const auto* type_proto = DataTypeImpl::SparseTensorTypeFromONNXEnum(elem_type)->GetTypeProto();
if (type_proto == nullptr) {
ORT_THROW("Unknown type of SparseTensor: ", tensor.DataType());
}
return *ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(*type_proto);
})
// pybind apparently has a bug with returning enums from def_property_readonly or methods
// returning a method object instead of the enumeration value
// so we are using def_property and throw on a potential modificaiton
.def_property(
"format", [](const PySparseTensor* py_tensor) -> OrtSparseFormat {
const SparseTensor& tensor = py_tensor->Instance();
auto retval = OrtSparseFormat::ORT_SPARSE_UNDEFINED;
switch (tensor.Format()) {
case SparseFormat::kUndefined:
break;
case SparseFormat::kCoo:
retval = OrtSparseFormat::ORT_SPARSE_COO;
break;
case SparseFormat::kCsrc:
retval = OrtSparseFormat::ORT_SPARSE_CSRC;
break;
case SparseFormat::kBlockSparse:
retval = OrtSparseFormat::ORT_SPARSE_BLOCK_SPARSE;
break;
default:
throw std::runtime_error("Can't switch on FormatFlags()");
}
return retval; }, [](PySparseTensor*, OrtSparseFormat) -> void { throw std::runtime_error("This is a readonly property"); });
#endif // !defined(DISABLED_SPARSE_TENSORS)
}
} // namespace python
} // namespace onnxruntime