// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "onnxruntime_pybind_mlvalue.h" #include "python/onnxruntime_pybind_state_common.h" #include "pybind11/numpy.h" #define NO_IMPORT_ARRAY #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #define PY_ARRAY_UNIQUE_SYMBOL onnxruntime_python_ARRAY_API #include #include "core/framework/tensor_shape.h" #include "core/framework/tensor.h" #include "core/framework/sparse_tensor.h" #include "core/framework/allocator.h" #include "core/framework/data_types.h" #include "core/framework/data_types_internal.h" #include "core/providers/get_execution_providers.h" #include "core/framework/kernel_registry.h" #include "core/framework/provider_options_utils.h" #include "core/session/provider_bridge_ort.h" namespace onnxruntime { namespace python { namespace py = pybind11; using namespace onnxruntime::logging; #if !defined(DISABLE_SPARSE_TENSORS) namespace { // Create a pybind11:dtype numpy instance using ONNX Tensor Element Type template struct MakeDType { py::dtype operator()() const { return py::dtype::of(); } }; /// /// The function creates a numpy array that points to /// data stored within the corresponing tensor. Parent object /// holds a reference to the object that owns the data so it /// does not disappear. /// /// numpy array py::array MakeNumpyArrayFromIndices(const Tensor& indices, const py::object& parent) { // See https://github.com/pybind/pybind11/issues/2271 for more information on parent py::array result(indices.Shape().GetDims(), indices.Data(), parent); assert(!result.owndata()); // Set a read-only flag PyArray_CLEARFLAGS(reinterpret_cast(result.ptr()), NPY_ARRAY_WRITEABLE); return result; } } // namespace class PySparseCooView : public SparseTensor::CooView { py::object parent_; public: PySparseCooView(const SparseTensor::CooView& view, const py::object& parent) noexcept : SparseTensor::CooView(view), parent_(parent) {} }; class PySparseCsrView : public SparseTensor::CsrView { py::object parent_; public: PySparseCsrView(const SparseTensor::CsrView& view, const py::object& parent) noexcept : SparseTensor::CsrView(view), parent_(parent) {} }; class PySparseBlockSparseView : public SparseTensor::BlockSparseView { py::object parent_; public: PySparseBlockSparseView(const SparseTensor::BlockSparseView& view, const py::object& parent) noexcept : SparseTensor::BlockSparseView(view), parent_(parent) {} }; #endif // !defined(DISABLE_SPARSE_TENSORS) void addSparseTensorMethods(pybind11::module& m) { // this is exported via __init__.py so has to exist py::enum_(m, "OrtSparseFormat") .value("ORT_SPARSE_UNDEFINED", OrtSparseFormat::ORT_SPARSE_UNDEFINED) .value("ORT_SPARSE_COO", OrtSparseFormat::ORT_SPARSE_COO) .value("ORT_SPARSE_CSRC", OrtSparseFormat::ORT_SPARSE_CSRC) .value("ORT_SPARSE_BLOCK_SPARSE", OrtSparseFormat::ORT_SPARSE_BLOCK_SPARSE); #if !defined(DISABLE_SPARSE_TENSORS) py::class_(m, "SparseCooView") // Returns a numpy array of COO indices backed by Sparse Tensor memory // be aware that indices may reside on GPU if Sparse Tensor is on GPU .def("indices", [](const PySparseCooView* view) -> py::array { const auto& indices = view->Indices(); return MakeNumpyArrayFromIndices(indices, py::cast(*view)); }); py::class_(m, "SparseCsrView") .def("inner", [](const PySparseCsrView* view) -> py::array { const auto& indices = view->Inner(); return MakeNumpyArrayFromIndices(indices, py::cast(*view)); }) .def("outer", [](const PySparseCsrView* view) -> py::array { const auto& indices = view->Outer(); return MakeNumpyArrayFromIndices(indices, py::cast(*view)); }); py::class_(m, "SparseBlockSparseView") .def("indices", [](const PySparseBlockSparseView* view) -> py::array { const auto& indices = view->Indices(); return MakeNumpyArrayFromIndices(indices, py::cast(*view)); }); py::class_ sparse_bind(m, "SparseTensor"); // Factory method to create a COO Sparse Tensor from numpy arrays acting as backing storage. // Numeric arrays memory is used as is with reference count increment. All other supported // types are copied and supported only on CPU. // Use numpy.ascontiguousarray() to obtain contiguous array of values and indices if necessary // py_dense_shape - numpy dense shape of the sparse tensor // py_values - contiguous and homogeneous numpy array of values // py_indices - contiguous numpy array of int64_t indices // ort_device - where the value and indices buffers are allocated. For non-primitive types, // only cpu device is supported. There is not a way to verify that ort_device // accurately describes the memory that is backing values and indices. sparse_bind .def_static("sparse_coo_from_numpy", [](const std::vector& py_dense_shape, const py::array& py_values, const py::array_t& py_indices, const OrtDevice& ort_device) -> std::unique_ptr { if (1 != py_values.ndim()) { ORT_THROW("Expecting values 1-D numpy values array for COO format. Got dims: ", py_values.ndim()); } TensorShape dense_shape(py_dense_shape); auto values_type = GetNumpyArrayType(py_values); auto ml_type = NumpyToOnnxRuntimeTensorType(values_type); std::unique_ptr result; if (IsNumericNumpyType(values_type)) { if (!PyArray_ISCONTIGUOUS(reinterpret_cast(py_values.ptr()))) { throw std::runtime_error("Require contiguous numpy array of values"); } if (!PyArray_ISCONTIGUOUS(reinterpret_cast(py_indices.ptr()))) { throw std::runtime_error("Require contiguous numpy array of indices"); } // create references to make sure storage does not disappear std::vector reference_holders = {py_values, py_indices}; OrtMemoryInfo mem_info = GetMemoryInfoPerDeviceType(ort_device); TensorShape values_shape{py_values.size()}; auto sparse_tensor = std::make_unique(ml_type, dense_shape, values_shape, const_cast(py_values.data()), mem_info); auto index_span = gsl::make_span(const_cast(py_indices.data()), py_indices.size()); ORT_THROW_IF_ERROR(sparse_tensor->UseCooIndices(index_span)); result = std::make_unique(std::move(sparse_tensor), std::move(reference_holders)); } else if (values_type == NPY_UNICODE || values_type == NPY_STRING) { if (ort_device.Type() != OrtDevice::CPU) { throw std::runtime_error("Only CPU based devices are supported for non-numeric datatypes"); } auto sparse_tensor = std::make_unique(ml_type, dense_shape, GetAllocator()); auto mutator = sparse_tensor->MakeCooData(py_values.size(), py_indices.size()); CopyDataToTensor(py_values, values_type, mutator.Values()); CopyDataToTensor(py_indices, GetNumpyArrayType(py_indices), mutator.Indices()); result = std::make_unique(std::move(sparse_tensor)); } else { ORT_THROW("Unsupported values data type: ", values_type); } return result; }) // Factory method to create a CSR Sparse Tensor from numpy arrays acting as backing storage. // Numeric arrays memory is used as is with reference count increment. All other supported // types are copied and supported only on CPU. // Use numpy.ascontiguousarray() to obtain contiguous array of values and indices if necessary // py_dense_shape - numpy dense shape of the sparse tensor // py_values - contiguous and homogeneous numpy array of values // py_inner_indices - contiguous numpy array of int64_t indices // py_outer_indices - contiguous numpy array of int64_t indices // ort_device - where the value and indices buffers are allocated. For non-primitive types, // only cpu device is supported. There is not a way to verify that ort_device // accurately describes the memory that is backing values and indices. .def_static( "sparse_csr_from_numpy", [](const std::vector& py_dense_shape, const py::array& py_values, const py::array_t& py_inner_indices, const py::array_t& py_outer_indices, const OrtDevice& ort_device) -> std::unique_ptr { if (1 != py_values.ndim() || 1 != py_inner_indices.ndim() || 1 != py_outer_indices.ndim()) { ORT_THROW("Expecting all data to be 1-D numpy arrays for CSR format."); } TensorShape dense_shape(py_dense_shape); auto values_type = GetNumpyArrayType(py_values); auto ml_type = NumpyToOnnxRuntimeTensorType(values_type); std::unique_ptr result; if (IsNumericNumpyType(values_type)) { if (!PyArray_ISCONTIGUOUS(reinterpret_cast(py_values.ptr()))) { throw std::runtime_error("Require contiguous numpy array of values"); } if (!PyArray_ISCONTIGUOUS(reinterpret_cast(py_inner_indices.ptr()))) { throw std::runtime_error("Require contiguous numpy array of indices"); } if (!PyArray_ISCONTIGUOUS(reinterpret_cast(py_outer_indices.ptr()))) { throw std::runtime_error("Require contiguous numpy array of indices"); } // go ahead and create references to make sure storage does not disappear std::vector reference_holders = {py_values, py_inner_indices, py_outer_indices}; OrtMemoryInfo mem_info = GetMemoryInfoPerDeviceType(ort_device); TensorShape values_shape{py_values.size()}; auto sparse_tensor = std::make_unique(ml_type, dense_shape, values_shape, const_cast(py_values.data()), mem_info); auto inner_span = gsl::make_span(const_cast(py_inner_indices.data()), py_inner_indices.size()); auto outer_span = gsl::make_span(const_cast(py_outer_indices.data()), py_outer_indices.size()); ORT_THROW_IF_ERROR(sparse_tensor->UseCsrIndices(inner_span, outer_span)); result = std::make_unique(std::move(sparse_tensor), std::move(reference_holders)); } else if (values_type == NPY_UNICODE || values_type == NPY_STRING) { if (ort_device.Type() != OrtDevice::CPU) { throw std::runtime_error("Only CPU based devices are supported for non-numeric datatypes"); } auto sparse_tensor = std::make_unique(ml_type, dense_shape, GetAllocator()); auto mutator = sparse_tensor->MakeCsrData(py_values.size(), py_inner_indices.size(), py_outer_indices.size()); CopyDataToTensor(py_values, values_type, mutator.Values()); CopyDataToTensor(py_inner_indices, GetNumpyArrayType(py_inner_indices), mutator.Inner()); CopyDataToTensor(py_outer_indices, GetNumpyArrayType(py_outer_indices), mutator.Outer()); result = std::make_unique(std::move(sparse_tensor)); } else { ORT_THROW("Unsupported values data type: ", values_type); } return result; }) // Factory method to create a BlockSparse Tensor from numpy arrays acting as backing storage. // Numeric arrays memory is used as is with reference count increment. All other supported // types are copied and supported only on CPU. // Use numpy.ascontiguousarray() to obtain contiguous array of values and indices if necessary // py_dense_shape - numpy dense shape of the sparse tensor // ort_device - desribes the allocation. Only primitive types allocations can be mapped to // py_values - contiguous and homogeneous numpy array of values // py_indices - contiguous numpy array of int32_t indices // ort_device - where the value and indices buffers are allocated. For non-primitive types, // only cpu device is supported. There is not a way to verify that ort_device // accurately describes the memory that is backing values and indices. .def_static( "blocksparse_from_numpy", [](const std::vector& py_dense_shape, const py::array& py_values, const py::array_t& py_indices, const OrtDevice& ort_device) -> std::unique_ptr { TensorShape dense_shape(py_dense_shape); TensorShape values_shape = GetShape(py_values); TensorShape index_shape = GetShape(py_indices); auto values_type = GetNumpyArrayType(py_values); auto ml_type = NumpyToOnnxRuntimeTensorType(values_type); std::unique_ptr result; if (IsNumericNumpyType(values_type)) { if (!PyArray_ISCONTIGUOUS(reinterpret_cast(py_values.ptr()))) { throw std::runtime_error("Require contiguous numpy array of values"); } if (!PyArray_ISCONTIGUOUS(reinterpret_cast(py_indices.ptr()))) { throw std::runtime_error("Require contiguous numpy array of indices"); } // create references to make sure storage does not disappear std::vector reference_holders = {py_values, py_indices}; OrtMemoryInfo mem_info = GetMemoryInfoPerDeviceType(ort_device); auto sparse_tensor = std::make_unique(ml_type, dense_shape, values_shape, const_cast(py_values.data()), mem_info); ORT_THROW_IF_ERROR(sparse_tensor->UseBlockSparseIndices(index_shape, const_cast(py_indices.data()))); result = std::make_unique(std::move(sparse_tensor), std::move(reference_holders)); } else if (values_type == NPY_UNICODE || values_type == NPY_STRING) { if (ort_device.Type() != OrtDevice::CPU) { throw std::runtime_error("Only CPU based devices are supported for non-numeric datatypes"); } auto sparse_tensor = std::make_unique(ml_type, dense_shape, GetAllocator()); auto mutator = sparse_tensor->MakeBlockSparseData(values_shape, index_shape); CopyDataToTensor(py_values, values_type, mutator.Values()); CopyDataToTensor(py_indices, GetNumpyArrayType(py_indices), mutator.Indices()); result = std::make_unique(std::move(sparse_tensor)); } else { ORT_THROW("Unsupported values data type: ", values_type); } return result; }) // Returns a numpy array that is backed by SparseTensor values memory // be aware that it may be on GPU .def("values", [](const PySparseTensor* py_tensor) -> py::array { const SparseTensor& sparse_tensor = py_tensor->Instance(); if (sparse_tensor.Format() == SparseFormat::kUndefined) { ORT_THROW("This sparse tensor instance does not contain data"); } if (sparse_tensor.IsDataTypeString()) { // Strings can not be on GPU and require conversion UTF-8 to Python UNICODE // We need to create a copy. const int numpy_type = OnnxRuntimeTensorToNumpyType(DataTypeImpl::GetType()); ORT_ENFORCE(NPY_OBJECT == numpy_type, "We are expecting to map strings to NPY_OBJECT type"); const auto& values_shape = sparse_tensor.Values().Shape(); py::dtype dtype("object"); py::array result(dtype, values_shape.GetDims(), {}); auto* out_ptr = static_cast( PyArray_DATA(reinterpret_cast(result.ptr()))); const std::string* src = sparse_tensor.Values().Data(); for (int64_t i = 0, size = values_shape.Size(); i < size; ++i, src++) { out_ptr[i] = py::cast(*src); } return result; } else { utils::MLTypeCallDispatcher t_disp(sparse_tensor.GetElementType()); auto dtype = t_disp.InvokeRet(); const auto& values = sparse_tensor.Values(); // See https://github.com/pybind/pybind11/issues/2271 py::array result(dtype, values.Shape().GetDims(), values.DataRaw(), py::cast(*py_tensor)); assert(!result.owndata()); // Set a read-only flag PyArray_CLEARFLAGS(reinterpret_cast(result.ptr()), NPY_ARRAY_WRITEABLE); return result; } }) // Returns a Coo view of data .def("get_coo_data", [](const PySparseTensor* py_tensor) -> std::unique_ptr { const SparseTensor& sparse_tensor = py_tensor->Instance(); if (sparse_tensor.Format() != SparseFormat::kCoo) { ORT_THROW("This sparse tensor does not contain COO format"); } return std::make_unique(sparse_tensor.AsCoo(), py::cast(*py_tensor)); }) // Returns a CSR view of data .def("get_csrc_data", [](const PySparseTensor* py_tensor) -> std::unique_ptr { const SparseTensor& sparse_tensor = py_tensor->Instance(); if (sparse_tensor.Format() != SparseFormat::kCsrc) { ORT_THROW("This sparse tensor does not contain CSR(C) format"); } return std::make_unique(sparse_tensor.AsCsr(), py::cast(*py_tensor)); }) // Returns a blocksparse view of data .def("get_blocksparse_data", [](const PySparseTensor* py_tensor) -> std::unique_ptr { const SparseTensor& sparse_tensor = py_tensor->Instance(); if (sparse_tensor.Format() != SparseFormat::kBlockSparse) { ORT_THROW("This sparse tensor does not contain BlockSparse format"); } return std::make_unique(sparse_tensor.AsBlockSparse(), py::cast(*py_tensor)); }) /// This will copy SparseTensor into a new instance on a specified CUDA device or throw: /// - if this sparse tensor contains strings /// - if this sparse tensor is already on GPU /// - if CUDA is not present in this build /// - if the specified device is not valid #ifdef USE_CUDA .def("to_cuda", [](const PySparseTensor* py_tensor, const OrtDevice& ort_device) -> std::unique_ptr { const SparseTensor& sparse_tensor = py_tensor->Instance(); if (sparse_tensor.IsDataTypeString()) { ORT_THROW("Can not copy string tensor to GPU devices."); } if (sparse_tensor.Location().device.Type() == OrtDevice::GPU) { ORT_THROW("This sparse_tensor is already allocated on cuda. Cross device copy not supported."); } if (!IsCudaDeviceIdValid(logging::LoggingManager::DefaultLogger(), ort_device.Id())) { ORT_THROW("The provided device id doesn't match any available GPUs on the machine: ", ort_device.Id()); } auto cuda_allocator = GetCudaAllocator(ort_device.Id()); auto gpu_transfer = GetGPUDataTransfer(); auto dest_tensor = std::make_unique(sparse_tensor.DataType(), sparse_tensor.DenseShape(), std::move(cuda_allocator)); ORT_THROW_IF_ERROR(sparse_tensor.Copy(*gpu_transfer, *dest_tensor, 0)); auto result = std::make_unique(std::move(dest_tensor)); return result; #else .def("to_cuda", [](const PySparseTensor*, const OrtDevice&) { ORT_THROW("Cuda is not available in this build"); #endif // USE_CUDA }) .def("dense_shape", [](const PySparseTensor* py_tensor) -> py::list { const SparseTensor& st = py_tensor->Instance(); const auto& dims = st.DenseShape().GetDims(); // We create a copy of dimensions, it is small py::list py_dims; for (auto d : dims) { py_dims.append(d); } return py_dims; }) .def("device_name", [](const PySparseTensor* py_tensor) -> std::string { return std::string(GetDeviceName(py_tensor->Instance().Location().device)); }) .def("data_type", [](const PySparseTensor* py_tensor) -> std::string { const SparseTensor& tensor = py_tensor->Instance(); const auto elem_type = tensor.GetElementType(); const auto* type_proto = DataTypeImpl::SparseTensorTypeFromONNXEnum(elem_type)->GetTypeProto(); if (type_proto == nullptr) { ORT_THROW("Unknown type of SparseTensor: ", tensor.DataType()); } return *ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(*type_proto); }) // pybind apparently has a bug with returning enums from def_property_readonly or methods // returning a method object instead of the enumeration value // so we are using def_property and throw on a potential modificaiton .def_property( "format", [](const PySparseTensor* py_tensor) -> OrtSparseFormat { const SparseTensor& tensor = py_tensor->Instance(); auto retval = OrtSparseFormat::ORT_SPARSE_UNDEFINED; switch (tensor.Format()) { case SparseFormat::kUndefined: break; case SparseFormat::kCoo: retval = OrtSparseFormat::ORT_SPARSE_COO; break; case SparseFormat::kCsrc: retval = OrtSparseFormat::ORT_SPARSE_CSRC; break; case SparseFormat::kBlockSparse: retval = OrtSparseFormat::ORT_SPARSE_BLOCK_SPARSE; break; default: throw std::runtime_error("Can't switch on FormatFlags()"); } return retval; }, [](PySparseTensor*, OrtSparseFormat) -> void { throw std::runtime_error("This is a readonly property"); }); #endif // !defined(DISABLED_SPARSE_TENSORS) } } // namespace python } // namespace onnxruntime