pytorch/caffe2/python/pybind_state_gpu.cc

// Note(jiayq): the import_array function is done inside
// caffe2_python.cc. Read
// http://docs.scipy.org/doc/numpy-1.10.1/reference/c-api.array.html#miscellaneous
// for more details.

#define NO_IMPORT_ARRAY

#include "pybind_state.h"

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

#ifdef CAFFE2_USE_CUDNN
#include "caffe2/core/common_cudnn.h"
#endif // CAFFE2_USE_CUDNN
#include "caffe2/core/context_gpu.h"
#include "caffe2/operators/operator_fallback_gpu.h"
#include "caffe2/python/pybind_state_registry.h"

#ifdef CAFFE2_USE_TRT
#include "caffe2/contrib/tensorrt/tensorrt_tranformer.h"
#endif // CAFFE2_USE_TRT

namespace caffe2 {
namespace python {

REGISTER_CUDA_OPERATOR(Python, GPUFallbackOp);
REGISTER_CUDA_OPERATOR(
    PythonGradient,
    GPUFallbackOp);

REGISTER_CUDA_OPERATOR(PythonDLPack, PythonOp<CUDAContext, true>);
REGISTER_CUDA_OPERATOR(
    PythonDLPackGradient,
    PythonGradientOp<CUDAContext, true>);

REGISTER_BLOB_FEEDER(CUDA, TensorFeeder<CUDAContext>);

namespace py = pybind11;

void addCUDAGlobalMethods(py::module& m) {
  m.def("num_cuda_devices", &NumCudaDevices);
  m.def("get_cuda_version", &CudaVersion);
#ifdef CAFFE2_USE_CUDNN
  m.def("get_cudnn_version", &cudnnCompiledVersion);
  m.attr("cudnn_convolution_fwd_algo_count") = py::int_((int) CUDNN_CONVOLUTION_FWD_ALGO_COUNT);
  m.attr("cudnn_convolution_bwd_data_algo_count") = py::int_((int) CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT);
  m.attr("cudnn_convolution_bwd_filter_algo_count") = py::int_((int) CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT);
#endif
  m.def("get_cuda_peer_access_pattern", []() {
    std::vector<std::vector<bool>> pattern;
    CAFFE_ENFORCE(caffe2::GetCudaPeerAccessPattern(&pattern));
    return pattern;
  });
  m.def("get_device_properties", [](int deviceid) {
    auto& prop = GetDeviceProperty(deviceid);
    std::map<std::string, py::object> obj;
    obj["name"] = py::cast(prop.name);
    obj["major"] = py::cast(prop.major);
    obj["minor"] = py::cast(prop.minor);
    obj["totalGlobalMem"] = py::cast(prop.totalGlobalMem);
    return obj;
  });
  m.def(
      "onnx_to_trt_op",
      [](const py::bytes& onnx_model_str,
         const std::unordered_map<std::string, std::vector<int>>&
             output_size_hints,
         int max_batch_size,
         int max_workspace_size,
         int verbosity,
         bool debug_builder) -> py::bytes {
#ifdef CAFFE2_USE_TRT
        TensorRTTransformer t(
            max_batch_size, max_workspace_size, verbosity, debug_builder);
        auto op_def =
            t.BuildTrtOp(onnx_model_str.cast<std::string>(), output_size_hints);
        std::string out;
        op_def.SerializeToString(&out);
        return py::bytes(out);
#else
        CAFFE_THROW("Please build Caffe2 with USE_TENSORRT=1");
#endif // CAFFE2_USE_TRT
      });
  m.def(
      "transform_trt",
      [](const py::bytes& pred_net_str,
         const std::unordered_map<std::string, std::vector<int>>& shapes,
         int max_batch_size,
         int max_workspace_size,
         int verbosity,
         bool debug_builder,
         bool build_serializable_op) -> py::bytes {
#ifdef CAFFE2_USE_TRT
        caffe2::NetDef pred_net;
        if (!ParseProtoFromLargeString(
                pred_net_str.cast<std::string>(), &pred_net)) {
          LOG(ERROR) << "broken pred_net protobuf";
        }
        std::unordered_map<std::string, TensorShape> tensor_shapes;
        for (const auto& it : shapes) {
          tensor_shapes.emplace(
              it.first, CreateTensorShape(it.second, TensorProto::FLOAT));
        }
        TensorRTTransformer ts(
            max_batch_size,
            max_workspace_size,
            verbosity,
            debug_builder,
            build_serializable_op);
        ts.Transform(GetCurrentWorkspace(), &pred_net, tensor_shapes);
        std::string pred_net_str2;
        pred_net.SerializeToString(&pred_net_str2);
        return py::bytes(pred_net_str2);
#else
        CAFFE_THROW("Please build Caffe2 with USE_TENSORRT=1");
#endif // CAFFE2_USE_TRT
      });
};

void addCUDAObjectMethods(py::module& m) {
  py::class_<DLPackWrapper<CUDAContext>>(m, "DLPackTensorCUDA")
      .def_property_readonly(
          "data",
          [](DLPackWrapper<CUDAContext>* t) -> py::object {
            CAFFE_ENFORCE_EQ(
                t->device_option.device_type(),
                PROTO_CUDA,
                "Expected CUDA device option for CUDA tensor");

            return t->data();
          },
          "Return DLPack tensor with tensor's data.")
      .def(
          "feed",
          [](DLPackWrapper<CUDAContext>* t, py::object obj) {
            CAFFE_ENFORCE_EQ(
                t->device_option.device_type(),
                PROTO_CUDA,
                "Expected CUDA device option for CUDA tensor");
            t->feed(obj);
          },
          "Copy data from given DLPack tensor into this tensor.")
      .def_property_readonly(
          "_shape",
          [](const DLPackWrapper<CUDAContext>& t) { return t.tensor->dims(); })
      .def(
          "_reshape",
          [](DLPackWrapper<CUDAContext>* t, std::vector<TIndex> dims) {
            t->tensor->Resize(dims);
          });
}

PYBIND11_MODULE(caffe2_pybind11_state_gpu, m) {
  m.doc() = "pybind11 stateful interface to Caffe2 workspaces - GPU edition";

  addGlobalMethods(m);
  addCUDAGlobalMethods(m);
  addObjectMethods(m);
  addCUDAObjectMethods(m);
  for (const auto& addition : PybindAdditionRegistry()->Keys()) {
    PybindAdditionRegistry()->Create(addition, m);
  }
}
} // namespace python
} // namespace caffe2
chunky sync 2016-09-06 22:54:56 +00:00			`// Note(jiayq): the import_array function is done inside`
			`// caffe2_python.cc. Read`
			`// http://docs.scipy.org/doc/numpy-1.10.1/reference/c-api.array.html#miscellaneous`
			`// for more details.`
Support for DLPack in Python op Summary: Adding support for DLPack tensors to Python op Reviewed By: Yangqing Differential Revision: D6577702 fbshipit-source-id: e14ef213fcdb2930ffe164667971a92aa8db503c 2017-12-22 00:54:07 +00:00
chunky sync 2016-09-06 22:54:56 +00:00			`#define NO_IMPORT_ARRAY`

			`#include "pybind_state.h"`

			`#include <pybind11/pybind11.h>`
			`#include <pybind11/stl.h>`

guard spurious cudnn.h include (#11562) Summary: This fixes the build when CuDNN was not found on the system. From the `git blame`, it looks like the bug has been around for 2 years :) Pull Request resolved: https://github.com/pytorch/pytorch/pull/11562 Differential Revision: D9784589 Pulled By: soumith fbshipit-source-id: b33153436dced0a503c9833cdf52f7093f3394b4 2018-09-12 04:07:03 +00:00			`#ifdef CAFFE2_USE_CUDNN`
Cudnn v6 Summary: Add cudnn v6 support, including testing support for dilated convolution. Add a check to ensure that the versions of cuDNN used to compile Caffe2 and run it are compatible Closes https://github.com/caffe2/caffe2/pull/85 Reviewed By: bwasti Differential Revision: D4387690 Pulled By: Yangqing fbshipit-source-id: 312960134398dd4afe6ee0c01cdc160046c904e8 2017-03-01 01:38:44 +00:00			`#include "caffe2/core/common_cudnn.h"`
guard spurious cudnn.h include (#11562) Summary: This fixes the build when CuDNN was not found on the system. From the `git blame`, it looks like the bug has been around for 2 years :) Pull Request resolved: https://github.com/pytorch/pytorch/pull/11562 Differential Revision: D9784589 Pulled By: soumith fbshipit-source-id: b33153436dced0a503c9833cdf52f7093f3394b4 2018-09-12 04:07:03 +00:00			`#endif // CAFFE2_USE_CUDNN`
Support for DLPack in Python op Summary: Adding support for DLPack tensors to Python op Reviewed By: Yangqing Differential Revision: D6577702 fbshipit-source-id: e14ef213fcdb2930ffe164667971a92aa8db503c 2017-12-22 00:54:07 +00:00			`#include "caffe2/core/context_gpu.h"`
fbsync. TODO: check if build files need update. 2016-11-14 22:58:04 +00:00			`#include "caffe2/operators/operator_fallback_gpu.h"`
Add registry to pybind_state (#10759) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/10759 Adding a basic registry pattern to pybindstate so that we can have separate 'cc' files register module updates. This is substantially cleaner than using multiple pybind modules (which have been known to cause bugs) Reviewed By: bddppq Differential Revision: D9441878 fbshipit-source-id: af9e9e98385e92b58ca50e935678328c62684d8e 2018-08-25 00:18:39 +00:00			`#include "caffe2/python/pybind_state_registry.h"`
chunky sync 2016-09-06 22:54:56 +00:00
[Caffe2] Add support to TensorRT (#6150) * Add support to TensorRT * Removed License header * Bind input/output by position * Comments * More comments * Add benchmark * Add warning for performance degradation on large batch * Address comments * comments 2018-04-12 00:03:54 +00:00			`#ifdef CAFFE2_USE_TRT`
			`#include "caffe2/contrib/tensorrt/tensorrt_tranformer.h"`
			`#endif // CAFFE2_USE_TRT`

chunky sync 2016-09-06 22:54:56 +00:00			`namespace caffe2 {`
fbsync. TODO: check if build files need update. 2016-11-14 22:58:04 +00:00			`namespace python {`

Windows DLL build with Caffe2 code (#11266) Summary: This is an experimental build on top of what orionr and mingzhe09088 built. Essentially, the idea is that we will need separate *_API versions for different shared libraries. If this theory is right, I'll try to clean up the design a bit and document it properly. Pull Request resolved: https://github.com/pytorch/pytorch/pull/11266 Reviewed By: orionr Differential Revision: D9682942 Pulled By: Yangqing fbshipit-source-id: c79653199e67a1500c9174f39f8b0357324763f3 2018-09-06 22:01:07 +00:00			`REGISTER_CUDA_OPERATOR(Python, GPUFallbackOp);`
Support for DLPack in Python op Summary: Adding support for DLPack tensors to Python op Reviewed By: Yangqing Differential Revision: D6577702 fbshipit-source-id: e14ef213fcdb2930ffe164667971a92aa8db503c 2017-12-22 00:54:07 +00:00			`REGISTER_CUDA_OPERATOR(`
			`PythonGradient,`
Windows DLL build with Caffe2 code (#11266) Summary: This is an experimental build on top of what orionr and mingzhe09088 built. Essentially, the idea is that we will need separate *_API versions for different shared libraries. If this theory is right, I'll try to clean up the design a bit and document it properly. Pull Request resolved: https://github.com/pytorch/pytorch/pull/11266 Reviewed By: orionr Differential Revision: D9682942 Pulled By: Yangqing fbshipit-source-id: c79653199e67a1500c9174f39f8b0357324763f3 2018-09-06 22:01:07 +00:00			`GPUFallbackOp);`
Support for DLPack in Python op Summary: Adding support for DLPack tensors to Python op Reviewed By: Yangqing Differential Revision: D6577702 fbshipit-source-id: e14ef213fcdb2930ffe164667971a92aa8db503c 2017-12-22 00:54:07 +00:00
			`REGISTER_CUDA_OPERATOR(PythonDLPack, PythonOp<CUDAContext, true>);`
			`REGISTER_CUDA_OPERATOR(`
			`PythonDLPackGradient,`
			`PythonGradientOp<CUDAContext, true>);`
chunky sync 2016-09-06 22:54:56 +00:00
			`REGISTER_BLOB_FEEDER(CUDA, TensorFeeder<CUDAContext>);`

			`namespace py = pybind11;`

			`void addCUDAGlobalMethods(py::module& m) {`
			`m.def("num_cuda_devices", &NumCudaDevices);`
workspace_gpu: Get{CUDAVersion,DeviceProperties} Summary: Expose some useful utilities to Python Closes https://github.com/caffe2/caffe2/pull/1216 Differential Revision: D5843888 Pulled By: akyrola fbshipit-source-id: fc731781aec3c7cc6a4b7132f1624423d015abff 2017-09-18 02:58:24 +00:00			`m.def("get_cuda_version", &CudaVersion);`
guard spurious cudnn.h include (#11562) Summary: This fixes the build when CuDNN was not found on the system. From the `git blame`, it looks like the bug has been around for 2 years :) Pull Request resolved: https://github.com/pytorch/pytorch/pull/11562 Differential Revision: D9784589 Pulled By: soumith fbshipit-source-id: b33153436dced0a503c9833cdf52f7093f3394b4 2018-09-12 04:07:03 +00:00			`#ifdef CAFFE2_USE_CUDNN`
Cudnn v6 Summary: Add cudnn v6 support, including testing support for dilated convolution. Add a check to ensure that the versions of cuDNN used to compile Caffe2 and run it are compatible Closes https://github.com/caffe2/caffe2/pull/85 Reviewed By: bwasti Differential Revision: D4387690 Pulled By: Yangqing fbshipit-source-id: 312960134398dd4afe6ee0c01cdc160046c904e8 2017-03-01 01:38:44 +00:00			`m.def("get_cudnn_version", &cudnnCompiledVersion);`
Adding conv tests with explicit algo definition Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/9798 Differential Revision: D9034663 Pulled By: virtan fbshipit-source-id: d722f25f1dd00231ccc3ad5960bbbef63af02c2d 2018-07-28 00:30:59 +00:00			`m.attr("cudnn_convolution_fwd_algo_count") = py::int_((int) CUDNN_CONVOLUTION_FWD_ALGO_COUNT);`
			`m.attr("cudnn_convolution_bwd_data_algo_count") = py::int_((int) CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT);`
			`m.attr("cudnn_convolution_bwd_filter_algo_count") = py::int_((int) CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT);`
guard spurious cudnn.h include (#11562) Summary: This fixes the build when CuDNN was not found on the system. From the `git blame`, it looks like the bug has been around for 2 years :) Pull Request resolved: https://github.com/pytorch/pytorch/pull/11562 Differential Revision: D9784589 Pulled By: soumith fbshipit-source-id: b33153436dced0a503c9833cdf52f7093f3394b4 2018-09-12 04:07:03 +00:00			`#endif`
chunky sync 2016-09-06 22:54:56 +00:00			`m.def("get_cuda_peer_access_pattern", []() {`
			`std::vector<std::vector<bool>> pattern;`
			`CAFFE_ENFORCE(caffe2::GetCudaPeerAccessPattern(&pattern));`
			`return pattern;`
			`});`
workspace_gpu: Get{CUDAVersion,DeviceProperties} Summary: Expose some useful utilities to Python Closes https://github.com/caffe2/caffe2/pull/1216 Differential Revision: D5843888 Pulled By: akyrola fbshipit-source-id: fc731781aec3c7cc6a4b7132f1624423d015abff 2017-09-18 02:58:24 +00:00			`m.def("get_device_properties", [](int deviceid) {`
Support for DLPack in Python op Summary: Adding support for DLPack tensors to Python op Reviewed By: Yangqing Differential Revision: D6577702 fbshipit-source-id: e14ef213fcdb2930ffe164667971a92aa8db503c 2017-12-22 00:54:07 +00:00			`auto& prop = GetDeviceProperty(deviceid);`
			`std::map<std::string, py::object> obj;`
			`obj["name"] = py::cast(prop.name);`
			`obj["major"] = py::cast(prop.major);`
			`obj["minor"] = py::cast(prop.minor);`
[C2 OSS][GPU]exposing totalGlobalMem info to workspace python exposing totalGlobalMem info to GetDeviceProperties method so that users can have better understanding 2018-02-25 15:02:34 +00:00			`obj["totalGlobalMem"] = py::cast(prop.totalGlobalMem);`
Support for DLPack in Python op Summary: Adding support for DLPack tensors to Python op Reviewed By: Yangqing Differential Revision: D6577702 fbshipit-source-id: e14ef213fcdb2930ffe164667971a92aa8db503c 2017-12-22 00:54:07 +00:00			`return obj;`
workspace_gpu: Get{CUDAVersion,DeviceProperties} Summary: Expose some useful utilities to Python Closes https://github.com/caffe2/caffe2/pull/1216 Differential Revision: D5843888 Pulled By: akyrola fbshipit-source-id: fc731781aec3c7cc6a4b7132f1624423d015abff 2017-09-18 02:58:24 +00:00			`});`
[Caffe2] Add support to TensorRT (#6150) * Add support to TensorRT * Removed License header * Bind input/output by position * Comments * More comments * Add benchmark * Add warning for performance degradation on large batch * Address comments * comments 2018-04-12 00:03:54 +00:00			`m.def(`
			`"onnx_to_trt_op",`
			`[](const py::bytes& onnx_model_str,`
			`const std::unordered_map<std::string, std::vector<int>>&`
			`output_size_hints,`
			`int max_batch_size,`
			`int max_workspace_size,`
			`int verbosity,`
			`bool debug_builder) -> py::bytes {`
			`#ifdef CAFFE2_USE_TRT`
			`TensorRTTransformer t(`
			`max_batch_size, max_workspace_size, verbosity, debug_builder);`
			`auto op_def =`
			`t.BuildTrtOp(onnx_model_str.cast<std::string>(), output_size_hints);`
			`std::string out;`
			`op_def.SerializeToString(&out);`
			`return py::bytes(out);`
			`#else`
			`CAFFE_THROW("Please build Caffe2 with USE_TENSORRT=1");`
			`#endif // CAFFE2_USE_TRT`
			`});`
			`m.def(`
			`"transform_trt",`
[Caffe2] Workspace centric API for TensorRT transformation (#6678) * Workspace centric API for trt transformation * Merge SSA rewrite code 2018-04-18 04:23:27 +00:00			`[](const py::bytes& pred_net_str,`
[Caffe2] Add support to TensorRT (#6150) * Add support to TensorRT * Removed License header * Bind input/output by position * Comments * More comments * Add benchmark * Add warning for performance degradation on large batch * Address comments * comments 2018-04-12 00:03:54 +00:00			`const std::unordered_map<std::string, std::vector<int>>& shapes,`
			`int max_batch_size,`
			`int max_workspace_size,`
			`int verbosity,`
[Caffe2] Provide option to initialize the TensorRT engine at Operator constructor time (#6809) * Try to have a lazy conversion of onnx-trt * . * Make it work * comments 2018-04-23 20:09:35 +00:00			`bool debug_builder,`
			`bool build_serializable_op) -> py::bytes {`
[Caffe2] Add support to TensorRT (#6150) * Add support to TensorRT * Removed License header * Bind input/output by position * Comments * More comments * Add benchmark * Add warning for performance degradation on large batch * Address comments * comments 2018-04-12 00:03:54 +00:00			`#ifdef CAFFE2_USE_TRT`
			`caffe2::NetDef pred_net;`
[Caffe2] Workspace centric API for TensorRT transformation (#6678) * Workspace centric API for trt transformation * Merge SSA rewrite code 2018-04-18 04:23:27 +00:00			`if (!ParseProtoFromLargeString(`
			`pred_net_str.cast<std::string>(), &pred_net)) {`
[Caffe2] Add support to TensorRT (#6150) * Add support to TensorRT * Removed License header * Bind input/output by position * Comments * More comments * Add benchmark * Add warning for performance degradation on large batch * Address comments * comments 2018-04-12 00:03:54 +00:00			`LOG(ERROR) << "broken pred_net protobuf";`
			`}`
			`std::unordered_map<std::string, TensorShape> tensor_shapes;`
[Caffe2] Workspace centric API for TensorRT transformation (#6678) * Workspace centric API for trt transformation * Merge SSA rewrite code 2018-04-18 04:23:27 +00:00			`for (const auto& it : shapes) {`
[Caffe2] Add support to TensorRT (#6150) * Add support to TensorRT * Removed License header * Bind input/output by position * Comments * More comments * Add benchmark * Add warning for performance degradation on large batch * Address comments * comments 2018-04-12 00:03:54 +00:00			`tensor_shapes.emplace(`
			`it.first, CreateTensorShape(it.second, TensorProto::FLOAT));`
			`}`
			`TensorRTTransformer ts(`
[Caffe2] Provide option to initialize the TensorRT engine at Operator constructor time (#6809) * Try to have a lazy conversion of onnx-trt * . * Make it work * comments 2018-04-23 20:09:35 +00:00			`max_batch_size,`
			`max_workspace_size,`
			`verbosity,`
			`debug_builder,`
			`build_serializable_op);`
			`ts.Transform(GetCurrentWorkspace(), &pred_net, tensor_shapes);`
[Caffe2] Add support to TensorRT (#6150) * Add support to TensorRT * Removed License header * Bind input/output by position * Comments * More comments * Add benchmark * Add warning for performance degradation on large batch * Address comments * comments 2018-04-12 00:03:54 +00:00			`std::string pred_net_str2;`
			`pred_net.SerializeToString(&pred_net_str2);`
[Caffe2] Workspace centric API for TensorRT transformation (#6678) * Workspace centric API for trt transformation * Merge SSA rewrite code 2018-04-18 04:23:27 +00:00			`return py::bytes(pred_net_str2);`
[Caffe2] Add support to TensorRT (#6150) * Add support to TensorRT * Removed License header * Bind input/output by position * Comments * More comments * Add benchmark * Add warning for performance degradation on large batch * Address comments * comments 2018-04-12 00:03:54 +00:00			`#else`
			`CAFFE_THROW("Please build Caffe2 with USE_TENSORRT=1");`
			`#endif // CAFFE2_USE_TRT`
			`});`
chunky sync 2016-09-06 22:54:56 +00:00			`};`

Support for DLPack in Python op Summary: Adding support for DLPack tensors to Python op Reviewed By: Yangqing Differential Revision: D6577702 fbshipit-source-id: e14ef213fcdb2930ffe164667971a92aa8db503c 2017-12-22 00:54:07 +00:00			`void addCUDAObjectMethods(py::module& m) {`
			`py::class_<DLPackWrapper<CUDAContext>>(m, "DLPackTensorCUDA")`
			`.def_property_readonly(`
			`"data",`
			`[](DLPackWrapper<CUDAContext>* t) -> py::object {`
			`CAFFE_ENFORCE_EQ(`
			`t->device_option.device_type(),`
caffe2::DeviceType -> at::DeviceType (#11254) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11254 Previously we use DeviceType in caffe2.proto directly, but it's an `enum` and have implicit conversion to int, which does not have type safety, e.g. we have to explicitly check for a device type is valid in event.h: ``` template <int d> struct EventCreateFunctionRegisterer { explicit EventCreateFunctionRegisterer(EventCreateFunction f) { static_assert(d < MaxDeviceTypes, ""); Event::event_creator_[d] = f; } }; ``` at::DeviceType is an `enum class`, and it does not have implicit conversion to int, and provides better type safety guarantees. In this diff we have done the following refactor(taking CPU as an example): 1. caffe2::DeviceType → caffe2::DeviceTypeProto 2. caffe2::CPU → caffe2::PROTO_CPU 3. caffe2::DeviceType = at::DeviceType 4. caffe2::CPU = at::DeviceType::CPU codemod -d caffe2/caffe2 --extensions h,cc,cpp 'device_type\(\), ' 'device_type(), PROTO_' + some manual changes In short, after this diff, in c++, caffe2::CPU refers to the at::DeviceType::CPU and the old proto caffe2::CPU will be caffe2::PROTO_CPU. In python side, we have a temporary workaround that alias `caffe2_pb2.CPU = caffe2_pb2.PROOT_CPU` to make the change easier to review and this will be removed later. Reviewed By: ezyang Differential Revision: D9545704 fbshipit-source-id: 461a28a4ca74e616d3ee183a607078a717fd38a7 2018-09-05 23:13:54 +00:00			`PROTO_CUDA,`
Support for DLPack in Python op Summary: Adding support for DLPack tensors to Python op Reviewed By: Yangqing Differential Revision: D6577702 fbshipit-source-id: e14ef213fcdb2930ffe164667971a92aa8db503c 2017-12-22 00:54:07 +00:00			`"Expected CUDA device option for CUDA tensor");`

			`return t->data();`
			`},`
			`"Return DLPack tensor with tensor's data.")`
			`.def(`
			`"feed",`
			`[](DLPackWrapper<CUDAContext>* t, py::object obj) {`
			`CAFFE_ENFORCE_EQ(`
			`t->device_option.device_type(),`
caffe2::DeviceType -> at::DeviceType (#11254) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/11254 Previously we use DeviceType in caffe2.proto directly, but it's an `enum` and have implicit conversion to int, which does not have type safety, e.g. we have to explicitly check for a device type is valid in event.h: ``` template <int d> struct EventCreateFunctionRegisterer { explicit EventCreateFunctionRegisterer(EventCreateFunction f) { static_assert(d < MaxDeviceTypes, ""); Event::event_creator_[d] = f; } }; ``` at::DeviceType is an `enum class`, and it does not have implicit conversion to int, and provides better type safety guarantees. In this diff we have done the following refactor(taking CPU as an example): 1. caffe2::DeviceType → caffe2::DeviceTypeProto 2. caffe2::CPU → caffe2::PROTO_CPU 3. caffe2::DeviceType = at::DeviceType 4. caffe2::CPU = at::DeviceType::CPU codemod -d caffe2/caffe2 --extensions h,cc,cpp 'device_type\(\), ' 'device_type(), PROTO_' + some manual changes In short, after this diff, in c++, caffe2::CPU refers to the at::DeviceType::CPU and the old proto caffe2::CPU will be caffe2::PROTO_CPU. In python side, we have a temporary workaround that alias `caffe2_pb2.CPU = caffe2_pb2.PROOT_CPU` to make the change easier to review and this will be removed later. Reviewed By: ezyang Differential Revision: D9545704 fbshipit-source-id: 461a28a4ca74e616d3ee183a607078a717fd38a7 2018-09-05 23:13:54 +00:00			`PROTO_CUDA,`
Support for DLPack in Python op Summary: Adding support for DLPack tensors to Python op Reviewed By: Yangqing Differential Revision: D6577702 fbshipit-source-id: e14ef213fcdb2930ffe164667971a92aa8db503c 2017-12-22 00:54:07 +00:00			`"Expected CUDA device option for CUDA tensor");`
			`t->feed(obj);`
			`},`
			`"Copy data from given DLPack tensor into this tensor.")`
			`.def_property_readonly(`
			`"_shape",`
			`[](const DLPackWrapper<CUDAContext>& t) { return t.tensor->dims(); })`
			`.def(`
			`"_reshape",`
			`[](DLPackWrapper<CUDAContext>* t, std::vector<TIndex> dims) {`
			`t->tensor->Resize(dims);`
			`});`
			`}`

Upgrade to 2.2.1 Summary: Update pybind from 1.8.1 to 2.2.1 aarch64 platform updates pending. Reviewed By: houseroad, kmatzen Differential Revision: D6089712 fbshipit-source-id: 80ce09c381717f4317e2e698479ff604cf28c709 2017-10-22 20:01:37 +00:00			`PYBIND11_MODULE(caffe2_pybind11_state_gpu, m) {`
			`m.doc() = "pybind11 stateful interface to Caffe2 workspaces - GPU edition";`
chunky sync 2016-09-06 22:54:56 +00:00
			`addGlobalMethods(m);`
			`addCUDAGlobalMethods(m);`
			`addObjectMethods(m);`
Support for DLPack in Python op Summary: Adding support for DLPack tensors to Python op Reviewed By: Yangqing Differential Revision: D6577702 fbshipit-source-id: e14ef213fcdb2930ffe164667971a92aa8db503c 2017-12-22 00:54:07 +00:00			`addCUDAObjectMethods(m);`
Add registry to pybind_state (#10759) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/10759 Adding a basic registry pattern to pybindstate so that we can have separate 'cc' files register module updates. This is substantially cleaner than using multiple pybind modules (which have been known to cause bugs) Reviewed By: bddppq Differential Revision: D9441878 fbshipit-source-id: af9e9e98385e92b58ca50e935678328c62684d8e 2018-08-25 00:18:39 +00:00			`for (const auto& addition : PybindAdditionRegistry()->Keys()) {`
			`PybindAdditionRegistry()->Create(addition, m);`
			`}`
chunky sync 2016-09-06 22:54:56 +00:00			`}`
fbsync. TODO: check if build files need update. 2016-11-14 22:58:04 +00:00			`} // namespace python`
			`} // namespace caffe2`