pytorch/test/cpp_extensions/msnpu_extension.cpp

#include <torch/extension.h>
#include <torch/library.h>

using namespace at;

static int test_int;

Tensor get_tensor(caffe2::TypeMeta dtype, IntArrayRef size) {
  auto tensor_impl = c10::make_intrusive<TensorImpl, UndefinedTensorImpl>(
      Storage(
          Storage::use_byte_size_t(),
          0,
          at::DataPtr(nullptr, Device(DeviceType::MSNPU, 0)),
          nullptr,
          false),
      DispatchKey::MSNPU,
      dtype);
  // This is a hack to workaround the shape checks in _convolution.
  tensor_impl->set_sizes_contiguous(size);
  return Tensor(std::move(tensor_impl));
}

Tensor empty_override(IntArrayRef size, const TensorOptions& options, c10::optional<c10::MemoryFormat> optional_memory_format) {
  test_int = 0;
  return get_tensor(options.dtype(), size);
}

Tensor add_override(const Tensor & a, const Tensor & b , Scalar c) {
  test_int = 1;
  return get_tensor(a.dtype(), a.sizes());
}

Tensor fake_convolution(
    const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias,
    IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation,
    bool transposed, IntArrayRef output_padding, int64_t groups) {
  test_int = 2;
  // Only the first 2 dimension of output shape is correct.
  return get_tensor(input.dtype(), {input.size(0), weight.size(0), input.size(2), input.size(3)});
}

std::tuple<Tensor,Tensor,Tensor> fake_convolution_backward(
        const Tensor & grad_output, const Tensor & input, const Tensor & weight,
        IntArrayRef stride, IntArrayRef padding,
        IntArrayRef dilation, bool transposed, IntArrayRef output_padding,
        int64_t groups, std::array<bool,3> output_mask) {
    test_int = 3;
    return std::tuple<Tensor, Tensor, Tensor>(
            get_tensor(input.dtype(), input.sizes()),
            get_tensor(weight.dtype(), weight.sizes()),
            get_tensor(input.dtype(), {}));
}

TORCH_LIBRARY_IMPL(aten, MSNPU, m) {
  m.impl_UNBOXED("empty.memory_format",                empty_override);
  m.impl_UNBOXED("add.Tensor",                         add_override);
  m.impl_UNBOXED("convolution_overrideable",           fake_convolution);
  m.impl_UNBOXED("convolution_backward_overrideable",  fake_convolution_backward);
}

// TODO: Extend this to exercise multi-device setting.  In that case,
// we need to add a thread local variable to track the current device.
struct MSNPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {
  static constexpr DeviceType static_type = DeviceType::MSNPU;
  MSNPUGuardImpl() {}
  MSNPUGuardImpl(DeviceType t) {
    AT_ASSERT(t == DeviceType::MSNPU);
  }
  DeviceType type() const override {
    return DeviceType::MSNPU;
  }
  Device exchangeDevice(Device d) const override {
    AT_ASSERT(d.type() == DeviceType::MSNPU);
    AT_ASSERT(d.index() == 0);
    return d;
  }
  Device getDevice() const override {
    return Device(DeviceType::MSNPU, 0);
  }
  void setDevice(Device d) const override {
    AT_ASSERT(d.type() == DeviceType::MSNPU);
    AT_ASSERT(d.index() == 0);
  }
  void uncheckedSetDevice(Device d) const noexcept override {
  }
  Stream getStream(Device d) const noexcept override {
    return Stream(Stream::DEFAULT, Device(DeviceType::MSNPU, 0));
  }
  Stream exchangeStream(Stream s) const noexcept override {
    return Stream(Stream::DEFAULT, Device(DeviceType::MSNPU, 0));
  }
  DeviceIndex deviceCount() const noexcept override {
    return 1;
  }

  // Event-related functions
  void record(void** event,
    const Stream& stream,
    const DeviceIndex device_index,
    const EventFlag flag) const override {
    TORCH_CHECK(false, "MSNPU backend doesn't support events.");
  }
  void block(
    void* event,
    const Stream& stream) const override {
    TORCH_CHECK(false, "MSNPU backend doesn't support events.");
  }
  bool queryEvent(void* event) const override {
    TORCH_CHECK(false, "MSNPU backend doesn't support events.");
  }
  void destroyEvent(
    void* event,
    const DeviceIndex device_index) const noexcept override { }
};

constexpr DeviceType MSNPUGuardImpl::static_type;
C10_REGISTER_GUARD_IMPL(MSNPU, MSNPUGuardImpl);

int get_test_int() {
  return test_int;
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("get_test_int", &get_test_int);
}
Expose backend extensions to python Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/16582 Reviewed By: gchanan Differential Revision: D13887539 fbshipit-source-id: 8755babf2e3e849af974655f2f3a91740efe977e 2019-02-01 18:55:00 +00:00			`#include <torch/extension.h>`
Back out "Revert D21089648: Put TORCH_LIBRARY in torch/library.h; add custom class API" Summary: Original commit changeset: 636e8a11afc6 Test Plan: export to OSS Reviewed By: malfet Differential Revision: D21170502 fbshipit-source-id: e8f35f103c4924aedbcaaf868475008d24bdeeab 2020-04-22 16:15:41 +00:00			`#include <torch/library.h>`
Expose backend extensions to python Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/16582 Reviewed By: gchanan Differential Revision: D13887539 fbshipit-source-id: 8755babf2e3e849af974655f2f3a91740efe977e 2019-02-01 18:55:00 +00:00
			`using namespace at;`

			`static int test_int;`

generic overrideable convolution for backends (#23562) Summary: One possible solution based on our discussion yesterday: ezyang gchanan zdevito Pull Request resolved: https://github.com/pytorch/pytorch/pull/23562 Differential Revision: D16998161 Pulled By: ailzhang fbshipit-source-id: 07fe3a335f43b4205a421b3521aeb5fa4dc80279 2019-08-28 01:18:45 +00:00			`Tensor get_tensor(caffe2::TypeMeta dtype, IntArrayRef size) {`
Make getting the dtype of a tensor work for backend extensions. Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/17131 Differential Revision: D14093163 Pulled By: gchanan fbshipit-source-id: 06638706e26505e3c741b7ae290000ca258599db 2019-02-15 21:44:18 +00:00			`auto tensor_impl = c10::make_intrusive<TensorImpl, UndefinedTensorImpl>(`
			`Storage(`
Back out "Revert D21171334: [pytorch][PR] Change StorageImpl to track byte count rather than element count" (#37893) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/37893 Original commit changeset: 50746043acf3 Test Plan: sandcastle and ossci Reviewed By: malfet, seemethere, ngimel Differential Revision: D21416509 fbshipit-source-id: 735ec4e61f9d36d4537f52dd2dc6267751aeb94b 2020-05-06 05:41:11 +00:00			`Storage::use_byte_size_t(),`
			`0,`
			`at::DataPtr(nullptr, Device(DeviceType::MSNPU, 0)),`
			`nullptr,`
			`false),`
Remove datatype from Storage and StorageImpl (#38870) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/38870 * Removed dtype data member from StorageImpl * Removed any methods or method arguments in Storage/StorageImpl that deal with dtypes * Update all callers of the changed API Part of issue https://github.com/pytorch/pytorch/issues/33950 Original PR: https://github.com/pytorch/pytorch/pull/38038 Reviewed By: albanD Differential Revision: D21549645 Pulled By: ezyang fbshipit-source-id: 4289b356c55ff6b9530376a79343b99b540ee3de 2020-05-21 22:21:23 +00:00			`DispatchKey::MSNPU,`
			`dtype);`
generic overrideable convolution for backends (#23562) Summary: One possible solution based on our discussion yesterday: ezyang gchanan zdevito Pull Request resolved: https://github.com/pytorch/pytorch/pull/23562 Differential Revision: D16998161 Pulled By: ailzhang fbshipit-source-id: 07fe3a335f43b4205a421b3521aeb5fa4dc80279 2019-08-28 01:18:45 +00:00			`// This is a hack to workaround the shape checks in _convolution.`
			`tensor_impl->set_sizes_contiguous(size);`
Make getting the dtype of a tensor work for backend extensions. Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/17131 Differential Revision: D14093163 Pulled By: gchanan fbshipit-source-id: 06638706e26505e3c741b7ae290000ca258599db 2019-02-15 21:44:18 +00:00			`return Tensor(std::move(tensor_impl));`
			`}`

pull empty() out of use_c10_dispatcher: full (#43572) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/43572 Test Plan: Imported from OSS Reviewed By: smessmer Differential Revision: D23326019 Pulled By: bhosmer fbshipit-source-id: 10a4d7ffe33b4be4ae45396725456c6097ce1757 2020-08-27 05:48:49 +00:00			`Tensor empty_override(IntArrayRef size, const TensorOptions& options, c10::optional<c10::MemoryFormat> optional_memory_format) {`
Expose backend extensions to python Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/16582 Reviewed By: gchanan Differential Revision: D13887539 fbshipit-source-id: 8755babf2e3e849af974655f2f3a91740efe977e 2019-02-01 18:55:00 +00:00			`test_int = 0;`
pull empty() out of use_c10_dispatcher: full (#43572) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/43572 Test Plan: Imported from OSS Reviewed By: smessmer Differential Revision: D23326019 Pulled By: bhosmer fbshipit-source-id: 10a4d7ffe33b4be4ae45396725456c6097ce1757 2020-08-27 05:48:49 +00:00			`return get_tensor(options.dtype(), size);`
Expose backend extensions to python Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/16582 Reviewed By: gchanan Differential Revision: D13887539 fbshipit-source-id: 8755babf2e3e849af974655f2f3a91740efe977e 2019-02-01 18:55:00 +00:00			`}`

			`Tensor add_override(const Tensor & a, const Tensor & b , Scalar c) {`
			`test_int = 1;`
generic overrideable convolution for backends (#23562) Summary: One possible solution based on our discussion yesterday: ezyang gchanan zdevito Pull Request resolved: https://github.com/pytorch/pytorch/pull/23562 Differential Revision: D16998161 Pulled By: ailzhang fbshipit-source-id: 07fe3a335f43b4205a421b3521aeb5fa4dc80279 2019-08-28 01:18:45 +00:00			`return get_tensor(a.dtype(), a.sizes());`
			`}`

			`Tensor fake_convolution(`
Make operators with optional Tensor? arguments c10-full (#41610) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/41610 Previously, operators that have a `Tensor?` (i.e. optional tensor) in their schema implemented it using `Tensor` in C++ and filled in an undefined tensor for the None case. The c10 operator library, however, expects `Tensor?` to be represented as `optional<Tensor>`, so those operators couldn't be c10-full yet and still had to use codegenerated unboxing instead of templated unboxing. This PR changes that. It extends the `hacky_wrapper_for_legacy_signatures` to not only take case of TensorOptions, but now also map between signatures taking `Tensor` and `optional<Tensor>`. For this, it requires an additional template parameter, the expected signature, and it uses that to go argument-by-argument and unwrap any optionals it finds. ghstack-source-id: 108873701 Test Plan: waitforsandcastle Reviewed By: bhosmer Differential Revision: D22607879 fbshipit-source-id: 57b2fb01a294b804f82cd55cd70f0ef4a478e14f 2020-07-31 23:07:08 +00:00			`const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias,`
generic overrideable convolution for backends (#23562) Summary: One possible solution based on our discussion yesterday: ezyang gchanan zdevito Pull Request resolved: https://github.com/pytorch/pytorch/pull/23562 Differential Revision: D16998161 Pulled By: ailzhang fbshipit-source-id: 07fe3a335f43b4205a421b3521aeb5fa4dc80279 2019-08-28 01:18:45 +00:00			`IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation,`
			`bool transposed, IntArrayRef output_padding, int64_t groups) {`
			`test_int = 2;`
			`// Only the first 2 dimension of output shape is correct.`
			`return get_tensor(input.dtype(), {input.size(0), weight.size(0), input.size(2), input.size(3)});`
			`}`

			`std::tuple<Tensor,Tensor,Tensor> fake_convolution_backward(`
			`const Tensor & grad_output, const Tensor & input, const Tensor & weight,`
			`IntArrayRef stride, IntArrayRef padding,`
			`IntArrayRef dilation, bool transposed, IntArrayRef output_padding,`
			`int64_t groups, std::array<bool,3> output_mask) {`
			`test_int = 3;`
			`return std::tuple<Tensor, Tensor, Tensor>(`
			`get_tensor(input.dtype(), input.sizes()),`
			`get_tensor(weight.dtype(), weight.sizes()),`
			`get_tensor(input.dtype(), {}));`
Expose backend extensions to python Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/16582 Reviewed By: gchanan Differential Revision: D13887539 fbshipit-source-id: 8755babf2e3e849af974655f2f3a91740efe977e 2019-02-01 18:55:00 +00:00			`}`

Switch to pybind11 style registration function API. (#36258) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/36258 Previous we had a && chaining style API. There are some downsides to this API: - It's easy to forget the 'static' qualifier in front, leading to subtle ODR bugs. - It is not compatible with torchbind class_ definitions, as these need multiple levels of chaining. So in practice people end up having to define multiple static initializers, one per class. - It's not like pybind11. - There's no way to conveniently get the file and line number of the registration, as there is no macro point in the API. - The old API doesn't really encourage people to put all of their definitions for a library in one place, and to give a custom namespace for it. Similarly, the old API wasn't very DRY, because you had to keep repeating the namespace/dispatch key you were writing implementations for. The new API is modeled exactly off of the PYBIND11_MODULE macro: you write: ``` TORCH_LIBRARY(aten, m) { m.def("aten::add(Tensor self, Tensor other) -> Tensor"); ... } ``` in a non-chaining fashion, and under the hood the macro expands to define a function, and define a static initializer that allocates c10::Library (previously called c10::Module, but we renamed it to avoid confusion with the existing NN module concept), passes it to your function, and then retains it for the rest of the lifetime of the program. Specification of the namespace is mandatory, and in later commit I plan to make it a hard error to TORCH_LIBRARY the same library name twice. If you are specifying an implementation for an existing operator (e.g., you're the XLA backend, or even if you're just putting registrations for implementations at the implementation site), you should use TORCH_LIBRARY_IMPL, which instead takes a backend argument (instead of namespace) and can be used to specify an implementation for a backend. Unlike TORCH_LIBRARY, you can do as many of these as you want for a backend. This needs updates to the mobile code analyzer. Signed-off-by: Edward Z. Yang <ezyang@fb.com> Test Plan: Imported from OSS Differential Revision: D20929257 Pulled By: ezyang fbshipit-source-id: ba04d78492e8c93ae7190165fb936f6872896ada 2020-04-16 17:40:43 +00:00			`TORCH_LIBRARY_IMPL(aten, MSNPU, m) {`
			`m.impl_UNBOXED("empty.memory_format", empty_override);`
			`m.impl_UNBOXED("add.Tensor", add_override);`
			`m.impl_UNBOXED("convolution_overrideable", fake_convolution);`
			`m.impl_UNBOXED("convolution_backward_overrideable", fake_convolution_backward);`
Expose backend extensions to python Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/16582 Reviewed By: gchanan Differential Revision: D13887539 fbshipit-source-id: 8755babf2e3e849af974655f2f3a91740efe977e 2019-02-01 18:55:00 +00:00			`}`

Enable autograd to recognize the XLA backend as one providing multiple devices (#17847) Summary: …e devices, while not being CUDA/HIP. Pull Request resolved: https://github.com/pytorch/pytorch/pull/17847 Differential Revision: D14545634 Pulled By: ezyang fbshipit-source-id: 417181bf2ff4f8978544afe2fb6b042e787854ed 2019-03-20 20:47:41 +00:00			`// TODO: Extend this to exercise multi-device setting. In that case,`
			`// we need to add a thread local variable to track the current device.`
			`struct MSNPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {`
			`static constexpr DeviceType static_type = DeviceType::MSNPU;`
			`MSNPUGuardImpl() {}`
			`MSNPUGuardImpl(DeviceType t) {`
			`AT_ASSERT(t == DeviceType::MSNPU);`
			`}`
			`DeviceType type() const override {`
			`return DeviceType::MSNPU;`
			`}`
			`Device exchangeDevice(Device d) const override {`
			`AT_ASSERT(d.type() == DeviceType::MSNPU);`
			`AT_ASSERT(d.index() == 0);`
			`return d;`
			`}`
			`Device getDevice() const override {`
			`return Device(DeviceType::MSNPU, 0);`
			`}`
			`void setDevice(Device d) const override {`
			`AT_ASSERT(d.type() == DeviceType::MSNPU);`
			`AT_ASSERT(d.index() == 0);`
			`}`
			`void uncheckedSetDevice(Device d) const noexcept override {`
			`}`
			`Stream getStream(Device d) const noexcept override {`
			`return Stream(Stream::DEFAULT, Device(DeviceType::MSNPU, 0));`
			`}`
			`Stream exchangeStream(Stream s) const noexcept override {`
			`return Stream(Stream::DEFAULT, Device(DeviceType::MSNPU, 0));`
			`}`
Unify cudaGetDeviceCount implementations. (#18445) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18445 ghimport-source-id: 30d018737bf6989bc68b7e3676f44e0ca6141fde Stack from [ghstack](https://github.com/ezyang/ghstack): * #18242 Test running a CUDA build on CPU machine. * #18445 Unify cudaGetDeviceCount implementations. I went about doing this by searching for calls to cudaGetDeviceCount, and then methodically replacing them with references to c10::cuda::device_count() or at::cuda::device_count(). There is a point to doing this: the various implementations wildly differed in their handling of what to do when cudaGetDeviceCount returns an error. The final standardized behavior is that all errors are swallowed and we return device count of zero. This indirectly fixes running CUDA builds on CPU, which was broken in #17847. I added 'noexcept' to the 'deviceCount' virtual method on DeviceGuardImpl. This is a BC-breaking change for anyone inheriting from DeviceGuardImpl but all you need to do is put 'noexcept' on your method and it is backwards compatible with older libtorch. Signed-off-by: Edward Z. Yang <ezyang@fb.com> Differential Revision: D14612189 fbshipit-source-id: 3c8d186e3dd623c0e27625212c7ce30f75d943cb 2019-03-26 16:42:41 +00:00			`DeviceIndex deviceCount() const noexcept override {`
Enable autograd to recognize the XLA backend as one providing multiple devices (#17847) Summary: …e devices, while not being CUDA/HIP. Pull Request resolved: https://github.com/pytorch/pytorch/pull/17847 Differential Revision: D14545634 Pulled By: ezyang fbshipit-source-id: 417181bf2ff4f8978544afe2fb6b042e787854ed 2019-03-20 20:47:41 +00:00			`return 1;`
			`}`
Creates Torch-friendly Event class and adds Stream tracking to autograd (#25130) Summary: Resubmission of https://github.com/pytorch/pytorch/issues/23424 because previous PR was borked. Pull Request resolved: https://github.com/pytorch/pytorch/pull/25130 Test Plan: Two tests were added to cuda_stream_test for this functionality. Differential Revision: D17145538 Pulled By: mruberry fbshipit-source-id: 2546c5907c038412e03aa0d3328a972b0164c455 2019-09-01 19:36:22 +00:00
			`// Event-related functions`
			`void record(void** event,`
			`const Stream& stream,`
			`const DeviceIndex device_index,`
			`const EventFlag flag) const override {`
			`TORCH_CHECK(false, "MSNPU backend doesn't support events.");`
			`}`
			`void block(`
			`void* event,`
			`const Stream& stream) const override {`
			`TORCH_CHECK(false, "MSNPU backend doesn't support events.");`
			`}`
			`bool queryEvent(void* event) const override {`
			`TORCH_CHECK(false, "MSNPU backend doesn't support events.");`
			`}`
			`void destroyEvent(`
			`void* event,`
			`const DeviceIndex device_index) const noexcept override { }`
Enable autograd to recognize the XLA backend as one providing multiple devices (#17847) Summary: …e devices, while not being CUDA/HIP. Pull Request resolved: https://github.com/pytorch/pytorch/pull/17847 Differential Revision: D14545634 Pulled By: ezyang fbshipit-source-id: 417181bf2ff4f8978544afe2fb6b042e787854ed 2019-03-20 20:47:41 +00:00			`};`

			`constexpr DeviceType MSNPUGuardImpl::static_type;`
			`C10_REGISTER_GUARD_IMPL(MSNPU, MSNPUGuardImpl);`

Expose backend extensions to python Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/16582 Reviewed By: gchanan Differential Revision: D13887539 fbshipit-source-id: 8755babf2e3e849af974655f2f3a91740efe977e 2019-02-01 18:55:00 +00:00			`int get_test_int() {`
			`return test_int;`
			`}`

			`PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {`
			`m.def("get_test_int", &get_test_int);`
			`}`