2016-09-06 22:54:56 +00:00
|
|
|
// Note(jiayq): the import_array function is done inside
|
|
|
|
|
// caffe2_python.cc. Read
|
|
|
|
|
// http://docs.scipy.org/doc/numpy-1.10.1/reference/c-api.array.html#miscellaneous
|
|
|
|
|
// for more details.
|
2017-12-22 00:54:07 +00:00
|
|
|
|
2016-09-06 22:54:56 +00:00
|
|
|
#define NO_IMPORT_ARRAY
|
|
|
|
|
|
|
|
|
|
#include "pybind_state.h"
|
|
|
|
|
|
|
|
|
|
#include <pybind11/pybind11.h>
|
|
|
|
|
#include <pybind11/stl.h>
|
|
|
|
|
|
2018-09-12 04:07:03 +00:00
|
|
|
#ifdef CAFFE2_USE_CUDNN
|
2017-03-01 01:38:44 +00:00
|
|
|
#include "caffe2/core/common_cudnn.h"
|
2018-09-12 04:07:03 +00:00
|
|
|
#endif // CAFFE2_USE_CUDNN
|
2017-12-22 00:54:07 +00:00
|
|
|
#include "caffe2/core/context_gpu.h"
|
2016-11-14 22:58:04 +00:00
|
|
|
#include "caffe2/operators/operator_fallback_gpu.h"
|
2018-08-25 00:18:39 +00:00
|
|
|
#include "caffe2/python/pybind_state_registry.h"
|
2016-09-06 22:54:56 +00:00
|
|
|
|
2018-04-12 00:03:54 +00:00
|
|
|
#ifdef CAFFE2_USE_TRT
|
|
|
|
|
#include "caffe2/contrib/tensorrt/tensorrt_tranformer.h"
|
|
|
|
|
#endif // CAFFE2_USE_TRT
|
|
|
|
|
|
2016-09-06 22:54:56 +00:00
|
|
|
namespace caffe2 {
|
2016-11-14 22:58:04 +00:00
|
|
|
namespace python {
|
|
|
|
|
|
2018-09-06 22:01:07 +00:00
|
|
|
REGISTER_CUDA_OPERATOR(Python, GPUFallbackOp);
|
2017-12-22 00:54:07 +00:00
|
|
|
REGISTER_CUDA_OPERATOR(
|
|
|
|
|
PythonGradient,
|
2018-09-06 22:01:07 +00:00
|
|
|
GPUFallbackOp);
|
2017-12-22 00:54:07 +00:00
|
|
|
|
|
|
|
|
REGISTER_CUDA_OPERATOR(PythonDLPack, PythonOp<CUDAContext, true>);
|
|
|
|
|
REGISTER_CUDA_OPERATOR(
|
|
|
|
|
PythonDLPackGradient,
|
|
|
|
|
PythonGradientOp<CUDAContext, true>);
|
2016-09-06 22:54:56 +00:00
|
|
|
|
|
|
|
|
REGISTER_BLOB_FEEDER(CUDA, TensorFeeder<CUDAContext>);
|
|
|
|
|
|
|
|
|
|
namespace py = pybind11;
|
|
|
|
|
|
|
|
|
|
void addCUDAGlobalMethods(py::module& m) {
|
|
|
|
|
m.def("num_cuda_devices", &NumCudaDevices);
|
2017-09-18 02:58:24 +00:00
|
|
|
m.def("get_cuda_version", &CudaVersion);
|
2018-09-12 04:07:03 +00:00
|
|
|
#ifdef CAFFE2_USE_CUDNN
|
2017-03-01 01:38:44 +00:00
|
|
|
m.def("get_cudnn_version", &cudnnCompiledVersion);
|
2018-07-28 00:30:59 +00:00
|
|
|
m.attr("cudnn_convolution_fwd_algo_count") = py::int_((int) CUDNN_CONVOLUTION_FWD_ALGO_COUNT);
|
|
|
|
|
m.attr("cudnn_convolution_bwd_data_algo_count") = py::int_((int) CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT);
|
|
|
|
|
m.attr("cudnn_convolution_bwd_filter_algo_count") = py::int_((int) CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT);
|
2018-09-12 04:07:03 +00:00
|
|
|
#endif
|
2016-09-06 22:54:56 +00:00
|
|
|
m.def("get_cuda_peer_access_pattern", []() {
|
|
|
|
|
std::vector<std::vector<bool>> pattern;
|
|
|
|
|
CAFFE_ENFORCE(caffe2::GetCudaPeerAccessPattern(&pattern));
|
|
|
|
|
return pattern;
|
|
|
|
|
});
|
2017-09-18 02:58:24 +00:00
|
|
|
m.def("get_device_properties", [](int deviceid) {
|
2017-12-22 00:54:07 +00:00
|
|
|
auto& prop = GetDeviceProperty(deviceid);
|
|
|
|
|
std::map<std::string, py::object> obj;
|
|
|
|
|
obj["name"] = py::cast(prop.name);
|
|
|
|
|
obj["major"] = py::cast(prop.major);
|
|
|
|
|
obj["minor"] = py::cast(prop.minor);
|
2018-02-25 15:02:34 +00:00
|
|
|
obj["totalGlobalMem"] = py::cast(prop.totalGlobalMem);
|
2017-12-22 00:54:07 +00:00
|
|
|
return obj;
|
2017-09-18 02:58:24 +00:00
|
|
|
});
|
2018-04-12 00:03:54 +00:00
|
|
|
m.def(
|
|
|
|
|
"onnx_to_trt_op",
|
|
|
|
|
[](const py::bytes& onnx_model_str,
|
|
|
|
|
const std::unordered_map<std::string, std::vector<int>>&
|
|
|
|
|
output_size_hints,
|
|
|
|
|
int max_batch_size,
|
|
|
|
|
int max_workspace_size,
|
|
|
|
|
int verbosity,
|
|
|
|
|
bool debug_builder) -> py::bytes {
|
|
|
|
|
#ifdef CAFFE2_USE_TRT
|
|
|
|
|
TensorRTTransformer t(
|
|
|
|
|
max_batch_size, max_workspace_size, verbosity, debug_builder);
|
|
|
|
|
auto op_def =
|
|
|
|
|
t.BuildTrtOp(onnx_model_str.cast<std::string>(), output_size_hints);
|
|
|
|
|
std::string out;
|
|
|
|
|
op_def.SerializeToString(&out);
|
|
|
|
|
return py::bytes(out);
|
|
|
|
|
#else
|
|
|
|
|
CAFFE_THROW("Please build Caffe2 with USE_TENSORRT=1");
|
|
|
|
|
#endif // CAFFE2_USE_TRT
|
|
|
|
|
});
|
|
|
|
|
m.def(
|
|
|
|
|
"transform_trt",
|
2018-04-18 04:23:27 +00:00
|
|
|
[](const py::bytes& pred_net_str,
|
2018-04-12 00:03:54 +00:00
|
|
|
const std::unordered_map<std::string, std::vector<int>>& shapes,
|
|
|
|
|
int max_batch_size,
|
|
|
|
|
int max_workspace_size,
|
|
|
|
|
int verbosity,
|
2018-04-23 20:09:35 +00:00
|
|
|
bool debug_builder,
|
|
|
|
|
bool build_serializable_op) -> py::bytes {
|
2018-04-12 00:03:54 +00:00
|
|
|
#ifdef CAFFE2_USE_TRT
|
|
|
|
|
caffe2::NetDef pred_net;
|
2018-04-18 04:23:27 +00:00
|
|
|
if (!ParseProtoFromLargeString(
|
|
|
|
|
pred_net_str.cast<std::string>(), &pred_net)) {
|
2018-04-12 00:03:54 +00:00
|
|
|
LOG(ERROR) << "broken pred_net protobuf";
|
|
|
|
|
}
|
|
|
|
|
std::unordered_map<std::string, TensorShape> tensor_shapes;
|
2018-04-18 04:23:27 +00:00
|
|
|
for (const auto& it : shapes) {
|
2018-04-12 00:03:54 +00:00
|
|
|
tensor_shapes.emplace(
|
|
|
|
|
it.first, CreateTensorShape(it.second, TensorProto::FLOAT));
|
|
|
|
|
}
|
|
|
|
|
TensorRTTransformer ts(
|
2018-04-23 20:09:35 +00:00
|
|
|
max_batch_size,
|
|
|
|
|
max_workspace_size,
|
|
|
|
|
verbosity,
|
|
|
|
|
debug_builder,
|
|
|
|
|
build_serializable_op);
|
|
|
|
|
ts.Transform(GetCurrentWorkspace(), &pred_net, tensor_shapes);
|
2018-04-12 00:03:54 +00:00
|
|
|
std::string pred_net_str2;
|
|
|
|
|
pred_net.SerializeToString(&pred_net_str2);
|
2018-04-18 04:23:27 +00:00
|
|
|
return py::bytes(pred_net_str2);
|
2018-04-12 00:03:54 +00:00
|
|
|
#else
|
|
|
|
|
CAFFE_THROW("Please build Caffe2 with USE_TENSORRT=1");
|
|
|
|
|
#endif // CAFFE2_USE_TRT
|
|
|
|
|
});
|
2016-09-06 22:54:56 +00:00
|
|
|
};
|
|
|
|
|
|
2017-12-22 00:54:07 +00:00
|
|
|
void addCUDAObjectMethods(py::module& m) {
|
|
|
|
|
py::class_<DLPackWrapper<CUDAContext>>(m, "DLPackTensorCUDA")
|
|
|
|
|
.def_property_readonly(
|
|
|
|
|
"data",
|
|
|
|
|
[](DLPackWrapper<CUDAContext>* t) -> py::object {
|
|
|
|
|
CAFFE_ENFORCE_EQ(
|
|
|
|
|
t->device_option.device_type(),
|
caffe2::DeviceType -> at::DeviceType (#11254)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/11254
Previously we use DeviceType in caffe2.proto directly, but it's an `enum` and have implicit conversion to int, which does not have type safety, e.g. we have to explicitly check for a device type is valid in event.h:
```
template <int d>
struct EventCreateFunctionRegisterer {
explicit EventCreateFunctionRegisterer(EventCreateFunction f) {
static_assert(d < MaxDeviceTypes, "");
Event::event_creator_[d] = f;
}
};
```
at::DeviceType is an `enum class`, and it does not have implicit conversion to int, and provides better type safety guarantees. In this diff we have done the following refactor(taking CPU as an example):
1. caffe2::DeviceType → caffe2::DeviceTypeProto
2. caffe2::CPU → caffe2::PROTO_CPU
3. caffe2::DeviceType = at::DeviceType
4. caffe2::CPU = at::DeviceType::CPU
codemod -d caffe2/caffe2 --extensions h,cc,cpp 'device_type\(\), ' 'device_type(), PROTO_'
+ some manual changes
In short, after this diff, in c++, caffe2::CPU refers to the at::DeviceType::CPU and the old proto caffe2::CPU will be caffe2::PROTO_CPU.
In python side, we have a temporary workaround that alias `caffe2_pb2.CPU = caffe2_pb2.PROOT_CPU` to make the change easier to review and this will be removed later.
Reviewed By: ezyang
Differential Revision: D9545704
fbshipit-source-id: 461a28a4ca74e616d3ee183a607078a717fd38a7
2018-09-05 23:13:54 +00:00
|
|
|
PROTO_CUDA,
|
2017-12-22 00:54:07 +00:00
|
|
|
"Expected CUDA device option for CUDA tensor");
|
|
|
|
|
|
|
|
|
|
return t->data();
|
|
|
|
|
},
|
|
|
|
|
"Return DLPack tensor with tensor's data.")
|
|
|
|
|
.def(
|
|
|
|
|
"feed",
|
|
|
|
|
[](DLPackWrapper<CUDAContext>* t, py::object obj) {
|
|
|
|
|
CAFFE_ENFORCE_EQ(
|
|
|
|
|
t->device_option.device_type(),
|
caffe2::DeviceType -> at::DeviceType (#11254)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/11254
Previously we use DeviceType in caffe2.proto directly, but it's an `enum` and have implicit conversion to int, which does not have type safety, e.g. we have to explicitly check for a device type is valid in event.h:
```
template <int d>
struct EventCreateFunctionRegisterer {
explicit EventCreateFunctionRegisterer(EventCreateFunction f) {
static_assert(d < MaxDeviceTypes, "");
Event::event_creator_[d] = f;
}
};
```
at::DeviceType is an `enum class`, and it does not have implicit conversion to int, and provides better type safety guarantees. In this diff we have done the following refactor(taking CPU as an example):
1. caffe2::DeviceType → caffe2::DeviceTypeProto
2. caffe2::CPU → caffe2::PROTO_CPU
3. caffe2::DeviceType = at::DeviceType
4. caffe2::CPU = at::DeviceType::CPU
codemod -d caffe2/caffe2 --extensions h,cc,cpp 'device_type\(\), ' 'device_type(), PROTO_'
+ some manual changes
In short, after this diff, in c++, caffe2::CPU refers to the at::DeviceType::CPU and the old proto caffe2::CPU will be caffe2::PROTO_CPU.
In python side, we have a temporary workaround that alias `caffe2_pb2.CPU = caffe2_pb2.PROOT_CPU` to make the change easier to review and this will be removed later.
Reviewed By: ezyang
Differential Revision: D9545704
fbshipit-source-id: 461a28a4ca74e616d3ee183a607078a717fd38a7
2018-09-05 23:13:54 +00:00
|
|
|
PROTO_CUDA,
|
2017-12-22 00:54:07 +00:00
|
|
|
"Expected CUDA device option for CUDA tensor");
|
|
|
|
|
t->feed(obj);
|
|
|
|
|
},
|
|
|
|
|
"Copy data from given DLPack tensor into this tensor.")
|
|
|
|
|
.def_property_readonly(
|
|
|
|
|
"_shape",
|
|
|
|
|
[](const DLPackWrapper<CUDAContext>& t) { return t.tensor->dims(); })
|
|
|
|
|
.def(
|
|
|
|
|
"_reshape",
|
|
|
|
|
[](DLPackWrapper<CUDAContext>* t, std::vector<TIndex> dims) {
|
|
|
|
|
t->tensor->Resize(dims);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-22 20:01:37 +00:00
|
|
|
PYBIND11_MODULE(caffe2_pybind11_state_gpu, m) {
|
|
|
|
|
m.doc() = "pybind11 stateful interface to Caffe2 workspaces - GPU edition";
|
2016-09-06 22:54:56 +00:00
|
|
|
|
|
|
|
|
addGlobalMethods(m);
|
|
|
|
|
addCUDAGlobalMethods(m);
|
|
|
|
|
addObjectMethods(m);
|
2017-12-22 00:54:07 +00:00
|
|
|
addCUDAObjectMethods(m);
|
2018-08-25 00:18:39 +00:00
|
|
|
for (const auto& addition : PybindAdditionRegistry()->Keys()) {
|
|
|
|
|
PybindAdditionRegistry()->Create(addition, m);
|
|
|
|
|
}
|
2016-09-06 22:54:56 +00:00
|
|
|
}
|
2016-11-14 22:58:04 +00:00
|
|
|
} // namespace python
|
|
|
|
|
} // namespace caffe2
|