From ef07bb65e9529e8a842f770c81940e6b63e5a893 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Thu, 9 Apr 2020 14:54:13 -0700 Subject: [PATCH] [RELAND] Add DispatchKey impl overload; remove use of torch::dispatch (#36222) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/36222 Reland of #35706, with fixes to code analyzer. It is extremely common to define implementations of operators at a specific dispatch key, so we add an overload to impl specifically for this case. I then delete most uses of torch::dispatch dispatch_autograd call sites can't make use of this overload. So instead the new preferred way to specify something as autograd is to pass kAutograd as the dispatch key (short form, analogous to kCPU/kCUDA which we support today). I flip flopped about whether or not kAutograd should have the type DispatchKey or some other type (to help better encapsulate the DispatchKey enum); this is more direct and I can't think of any BC problems from this usage. Some other reorganization I did: - I renamed all of the worker functions in op_registration to have a leading underscore and made them private, just to make it more clear what the public versus private API were (the private API shouldn't be used by users because it doesn't come with && overloads) Note that this means I needed to adjust the regex in the code analyzer, because - In a few places where I was touching lines already, I replaced full DispatchKey typed out enums with shorter kFoo names, similar to kAutograd but I didn't publish these globally. - Code analyzer now prints a unified diff, and in the other order (because I tend to think of the diff as reporting how the /new/ result is different) Signed-off-by: Edward Z. Yang Test Plan: Imported from OSS Differential Revision: D20929256 Pulled By: ezyang fbshipit-source-id: c69b803d2b3a1a8aff70e14da33d3adec5239f13 --- aten/src/ATen/autocast_mode.cpp | 25 +++---- .../ATen/core/BackendSelectFallbackKernel.cpp | 2 +- .../core/op_registration/op_registration.cpp | 8 +-- .../core/op_registration/op_registration.h | 67 +++++++++++++------ .../op_registration/op_registration_test.cpp | 26 +++---- aten/src/ATen/function_wrapper.py | 6 +- aten/src/ATen/test/backend_fallback_test.cpp | 9 ++- aten/src/ATen/test/cpu_rng_test.cpp | 29 ++++---- c10/core/DispatchKey.h | 13 ++++ test/cpp_extensions/msnpu_extension.cpp | 8 +-- test/mobile/op_deps/simple_ops.cpp | 34 ++++++---- test/test_dispatch.py | 8 +-- tools/autograd/gen_variable_type.py | 10 ++- tools/code_analyzer/build.sh | 2 +- tools/code_analyzer/run_analyzer.sh | 2 +- .../jit/mobile/register_mobile_autograd.cpp | 44 ++++++------ 16 files changed, 169 insertions(+), 124 deletions(-) diff --git a/aten/src/ATen/autocast_mode.cpp b/aten/src/ATen/autocast_mode.cpp index d899bce578f..194b2c209ac 100644 --- a/aten/src/ATen/autocast_mode.cpp +++ b/aten/src/ATen/autocast_mode.cpp @@ -332,8 +332,7 @@ I think Option 2 is the right answer for all ops, not just convolutions. Option *****************************************************************************************************************/ auto register_fallthrough = c10::import() - .fallback(c10::dispatch(c10::DispatchKey::AutocastTensorId, - c10::CppFunction::makeFallthrough())); + .fallback(c10::DispatchKey::AutocastTensorId, c10::CppFunction::makeFallthrough()); /******************************************************************************************************************** Explicit registration for out-of-place ops @@ -361,17 +360,17 @@ Therefore, for the moment, this is all copy pasted in from VariableTypeEverythin // Common cases where registration signature matches redispatch signature // (that's why SIGNATURE is repeated in the WrapFunction instantiation) #define KERNEL(FUNC, REGISTER_NAME, SIGNATURE, POLICY) \ - .impl(REGISTER_NAME, c10::dispatch(DispatchKey::AutocastTensorId, \ - &WrapFunction::type::call)) + .impl(REGISTER_NAME, DispatchKey::AutocastTensorId, \ + &WrapFunction::type::call) #define KERNEL_UNBOXED_ONLY(FUNC, REGISTER_NAME, SIGNATURE, POLICY) \ - .impl(REGISTER_NAME, c10::dispatch(DispatchKey::AutocastTensorId, \ - c10::CppFunction::makeUnboxedOnly(&WrapFunction::type::call))) + .impl(REGISTER_NAME, DispatchKey::AutocastTensorId, \ + c10::CppFunction::makeUnboxedOnly(&WrapFunction::type::call)) // Less-common but still useful case: redispatching to a function with a new signature (e.g. appending a dtype) #define KERNEL_UNBOXED_ONLY_DIFFERENT_REDISPATCH_SIGNATURE(REDISPATCH_FUNC, REGISTER_NAME, REGISTER_SIGNATURE, REDISPATCH_SIGNATURE, POLICY) \ - .impl(REGISTER_NAME, c10::dispatch(DispatchKey::AutocastTensorId, \ - c10::CppFunction::makeUnboxedOnly(&WrapFunction::type::call))) + .impl(REGISTER_NAME, DispatchKey::AutocastTensorId, \ + c10::CppFunction::makeUnboxedOnly(&WrapFunction::type::call)) /***************************************** Explicit registration for out-of-place ops @@ -426,9 +425,8 @@ auto register_out_of_place = c10::import() KERNEL(ADD_NS(gelu), "aten::gelu", Tensor (const Tensor &), fp32) KERNEL_UNBOXED_ONLY(ADD_NS(layer_norm), "aten::layer_norm", Tensor (const Tensor &, IntArrayRef, const Tensor &, const Tensor &, double, bool), fp32) // The macro doesn't like this one so I had to write it out manually. - .impl("aten::native_layer_norm", - c10::dispatch(DispatchKey::AutocastTensorId, - CppFunction::makeUnboxedOnly(&WrapFunction (const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, double), std::tuple (const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, double), &ADD_NS(native_layer_norm)>::type::call))) + .impl("aten::native_layer_norm", DispatchKey::AutocastTensorId, + CppFunction::makeUnboxedOnly(&WrapFunction (const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, double), std::tuple (const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, double), &ADD_NS(native_layer_norm)>::type::call)) KERNEL_UNBOXED_ONLY(ADD_NS(group_norm), "aten::group_norm", Tensor (const Tensor &, int64_t, const Tensor &, const Tensor &, double, bool), fp32) KERNEL_UNBOXED_ONLY(ADD_NS(frobenius_norm), "aten::frobenius_norm", Tensor (const Tensor &), fp32) KERNEL_UNBOXED_ONLY(ADD_NS(frobenius_norm), "aten::frobenius_norm.dim", Tensor (const Tensor &, IntArrayRef, bool), fp32) @@ -496,9 +494,8 @@ auto register_out_of_place = c10::import() ; auto register_banned = torch::import() - .impl("aten::binary_cross_entropy", - torch::dispatch(DispatchKey::AutocastTensorId, - CppFunction::makeUnboxedOnly(&at::autocast::binary_cross_entropy_banned))); + .impl("aten::binary_cross_entropy", DispatchKey::AutocastTensorId, + CppFunction::makeUnboxedOnly(&at::autocast::binary_cross_entropy_banned)); } #endif diff --git a/aten/src/ATen/core/BackendSelectFallbackKernel.cpp b/aten/src/ATen/core/BackendSelectFallbackKernel.cpp index a95b5f87df1..5c1c1aeea36 100644 --- a/aten/src/ATen/core/BackendSelectFallbackKernel.cpp +++ b/aten/src/ATen/core/BackendSelectFallbackKernel.cpp @@ -3,7 +3,7 @@ namespace { static auto registry = c10::import() - .fallback(c10::dispatch(c10::DispatchKey::BackendSelect, c10::CppFunction::makeFallthrough())) + .fallback(c10::DispatchKey::BackendSelect, c10::CppFunction::makeFallthrough()) ; } diff --git a/aten/src/ATen/core/op_registration/op_registration.cpp b/aten/src/ATen/core/op_registration/op_registration.cpp index a8eedd13bbf..35decf9d0b8 100644 --- a/aten/src/ATen/core/op_registration/op_registration.cpp +++ b/aten/src/ATen/core/op_registration/op_registration.cpp @@ -128,13 +128,13 @@ Module& Module::operator=(Module&&) = default; // TODO: Error if an operator is def'ed multiple times. Right now we just // merge everything -Module& Module::def(FunctionSchema&& schema) & { +Module& Module::_def(FunctionSchema&& schema) & { if (ns_.has_value()) schema.setNamespaceIfNotSet(ns_->c_str()); registrars_.emplace_back(Dispatcher::singleton().registerDef(std::move(schema))); return *this; } -Module& Module::def(c10::either&& name_or_schema, CppFunction&& f) & { +Module& Module::_def(c10::either&& name_or_schema, CppFunction&& f) & { FunctionSchema schema = [&] { if (name_or_schema.is_right()) { return std::move(name_or_schema).right(); @@ -156,7 +156,7 @@ Module& Module::def(c10::either&& name_or_schema, return *this; } -Module& Module::impl(const char* name_str, CppFunction&& f) & { +Module& Module::_impl(const char* name_str, CppFunction&& f) & { auto name = torch::jit::parseName(name_str); if (ns_.has_value()) name.setNamespaceIfNotSet(ns_->c_str()); registrars_.emplace_back( @@ -171,7 +171,7 @@ Module& Module::impl(const char* name_str, CppFunction&& f) & { return *this; } -Module& Module::fallback(CppFunction&& f) & { +Module& Module::_fallback(CppFunction&& f) & { TORCH_CHECK(!ns_, "Cannot define fallbacks from namespaces, use c10::import().fallback() instead"); TORCH_CHECK(f.dispatch_key_, "Fallback for catch all function not supported"); registrars_.emplace_back(Dispatcher::singleton().registerFallback(*f.dispatch_key_, std::move(f.func_))); diff --git a/aten/src/ATen/core/op_registration/op_registration.h b/aten/src/ATen/core/op_registration/op_registration.h index dd7fcce58e1..5f34ebe8ea6 100644 --- a/aten/src/ATen/core/op_registration/op_registration.h +++ b/aten/src/ATen/core/op_registration/op_registration.h @@ -617,8 +617,8 @@ private: // // provide multiple; one per backend). We'll take care of calling // // the correct implementation depending on if we get a CPU // // tensor or a CUDA tensor -// .impl("aten::mul", torch::dispatch(torch::kCPU, &mul_cpu_impl)) -// .impl("aten::mul", torch::dispatch(torch::kCUDA, &mul_cuda_impl)) +// .impl("aten::mul", torch::kCPU, &mul_cpu_impl) +// .impl("aten::mul", torch::kCUDA, &mul_cuda_impl) // // Also, you can omit the top level namespace and specify it explicitly in // the sub-definitions, e.g., torch::import().impl("aten::mul", ...) @@ -726,12 +726,17 @@ template inline CppFunction dispatch(DeviceType type, Func&& raw_f) { auto deviceTypeToDispatchKey = [](DeviceType t){ switch (t) { + // This list is synchronized with the k-constants in c10/core/DeviceType.h case DeviceType::CPU: return c10::DispatchKey::CPUTensorId; case DeviceType::CUDA: return c10::DispatchKey::CUDATensorId; case DeviceType::XLA: return c10::DispatchKey::XLATensorId; + case DeviceType::HIP: + return c10::DispatchKey::HIPTensorId; + case DeviceType::MSNPU: + return c10::DispatchKey::MSNPUTensorId; default: TORCH_CHECK(false, "Device type ", t, " cannot be overloaded at dispatch time, " @@ -741,12 +746,6 @@ inline CppFunction dispatch(DeviceType type, Func&& raw_f) { return dispatch(deviceTypeToDispatchKey(type), std::forward(raw_f)); } -// Convenience for overriding autograd functionality -template -inline CppFunction dispatch_autograd(Func&& raw_f) { - return dispatch(c10::DispatchKey::VariableTensorId, std::forward(raw_f)); -} - inline FunctionSchema schema(const char* str, AliasAnalysisKind k) { FunctionSchema s = torch::jit::parseSchema(str); s.setAliasAnalysis(k); @@ -794,6 +793,14 @@ class CAFFE2_API Module final { friend Module _import_DOES_NOT_WORK_WITH_MOBILE_CUSTOM_BUILD(std::string ns); friend Module import(); +private: + // Non-user visible actual implementations of functions. These aren't + // public because we only implement & qualifier and not && qualifier + Module& _def(FunctionSchema&& schema) &; + Module& _def(c10::either&&, CppFunction&& f) &; + Module& _impl(const char* name, CppFunction&& f) &; + Module& _fallback(CppFunction&& f) &; + public: Module(const Module&) = delete; Module& operator=(const Module&) = delete; @@ -832,11 +839,10 @@ public: // Declare an operator with a schema, but don't provide any implementations // for it. You're expected to then provide implementations using the // impl() method. - Module& def(FunctionSchema&& schema) &; template Module& def(Schema&& raw_schema) & { FunctionSchema s = schema(std::forward(raw_schema)); - return def(std::move(s)); + return _def(std::move(s)); } template Module&& def(Schema&& raw_schema) && { @@ -848,12 +854,11 @@ public: // an implementation for it. def(n, f) is almost equivalent to def(n).impl(f), // except that if n is not a schema, then the schema is inferred from the // static type of f. - Module& def(c10::either&&, CppFunction&& f) &; template Module& def(NameOrSchema&& raw_name_or_schema, Func&& raw_f) & { CppFunction f(std::forward(raw_f)); auto name_or_schema = detail::constructSchemaOrName(std::forward(raw_name_or_schema)); - return def(std::move(name_or_schema), std::move(f)); + return _def(std::move(name_or_schema), std::move(f)); } template Module&& def(NameOrSchema&& raw_name_or_schema, Func&& raw_f) && { @@ -865,27 +870,50 @@ public: // implementations for a single operator at different dispatch keys // (see torch::dispatch). Implementations must have a corresponding // declaration (from def), otherwise they are invalid. - Module& impl(const char* name, CppFunction&& f) &; template Module& impl(const char* name, Func&& raw_f) & { CppFunction f(std::forward(raw_f)); - return impl(name, std::move(f)); + return _impl(name, std::move(f)); } template Module&& impl(const char* name, Func&& raw_f) && { impl(name, std::forward(raw_f)); return std::move(*this); } + // Convenience overload for directly specifying the dispatch key. Dispatch + // can validly be either DeviceType or DispatchKey; check torch::dispatch for + // the canonical list of accepted overloads. + template + Module& impl(const char* name, Dispatch&& key, Func&& raw_f) & { + return impl(name, dispatch(std::forward(key), std::forward(raw_f))); + } + template + Module&& impl(const char* name, Dispatch&& key, Func&& raw_f) && { + impl(name, std::forward(key), std::forward(raw_f)); + return std::move(*this); + } // Register a fallback implementation for all operators which will be used // if there is not a specific implementation for an operator available. - // At the moment, you must specify a dispatch key (see torch::dispatch) for - // your fallback. - Module& fallback(CppFunction&& f) &; + // Providing a DispatchKey is MANDATORY for fallback at the moment. + // + // Dispatch can validly be either DeviceType or DispatchKey; check + // torch::dispatch for the canonical list of accepted overloads. + template + Module& fallback(Dispatch&& key, Func&& raw_f) & { + return fallback(c10::dispatch(std::forward(key), std::forward(raw_f))); + } + template + Module&& fallback(Dispatch&& key, Func&& raw_f) && { + fallback(std::forward(key), std::forward(raw_f)); + return std::move(*this); + } + // NB: these overloads are here for completeness, but you'll probably want to + // use the direct Dispatch overload template Module& fallback(Func&& raw_f) & { - CppFunction f(std::forward(raw_f)); - return fallback(std::move(f)); + CppFunction f((std::forward(raw_f))); + return _fallback(std::move(f)); } template Module&& fallback(Func&& raw_f) && { @@ -921,7 +949,6 @@ namespace torch { // New-style API using c10::dispatch; - using c10::dispatch_autograd; using c10::schema; using c10::import; } diff --git a/aten/src/ATen/core/op_registration/op_registration_test.cpp b/aten/src/ATen/core/op_registration/op_registration_test.cpp index 622817c3b35..e141432a667 100644 --- a/aten/src/ATen/core/op_registration/op_registration_test.cpp +++ b/aten/src/ATen/core/op_registration/op_registration_test.cpp @@ -1297,11 +1297,11 @@ TEST(NewOperatorRegistrationTest, testBasics) { .def("_test::dummy2(Tensor self) -> Tensor") .def("_test::dummy3(Tensor self, Tensor other) -> Tensor", [](const Tensor& self, const Tensor& other) { return self; }) .def("_test::dummy4", [](const Tensor& self, const Tensor& other) { return other; }) - .impl("_test::dummy", c10::dispatch(c10::DeviceType::CPU, [](const Tensor& self) { return self; })) - .impl("_test::dummy", c10::dispatch(c10::DeviceType::XLA, [](const Tensor& self) { return self; })) + .impl("_test::dummy", c10::DeviceType::CPU, [](const Tensor& self) { return self; }) + .impl("_test::dummy", c10::DeviceType::XLA, [](const Tensor& self) { return self; }) // Internal API - .impl("_test::dummy2", c10::dispatch(c10::DispatchKey::CPUTensorId, [](const Tensor& self) { return self; })) - .impl("_test::dummy2", c10::dispatch(c10::DispatchKey::XLATensorId, [](const Tensor& self) { return self; })); + .impl("_test::dummy2", c10::DispatchKey::CPUTensorId, [](const Tensor& self) { return self; }) + .impl("_test::dummy2", c10::DispatchKey::XLATensorId, [](const Tensor& self) { return self; }); ASSERT_TRUE(Dispatcher::singleton().findSchema({"_test::dummy", ""}).has_value()); // Should have a schema even if there are no impls @@ -1382,7 +1382,7 @@ TEST(NewOperatorRegistrationTest, dispatch) { auto registrar = c10::import() .def("test::fn_cpu", torch::dispatch(c10::DispatchKey::CPUTensorId, [&](const Tensor& x) { cpu_called = true; return x; })) .def("test::fn_cuda", torch::dispatch(c10::kCUDA, [&](const Tensor& x) { cuda_called = true; return x; })) - .def("test::fn_autograd", torch::dispatch_autograd([&](const Tensor& x) { autograd_called = true; return x; })); + .def("test::fn_autograd", torch::dispatch(c10::kAutograd, [&](const Tensor& x) { autograd_called = true; return x; })); { auto op = Dispatcher::singleton().findSchema({"test::fn_cpu", ""}); @@ -1415,9 +1415,11 @@ TEST(NewOperatorRegistrationTest, dispatchMultiple) { bool autograd_called = false; auto registrar = c10::import() .def("test::fn(Tensor self) -> Tensor") - .impl("test::fn", torch::dispatch(c10::DispatchKey::CPUTensorId, [&](const Tensor& x) { cpu_called = true; return x; })) - .impl("test::fn", torch::dispatch(c10::kCUDA, [&](const Tensor& x) { cuda_called = true; return x; })) - .impl("test::fn", torch::dispatch_autograd([&](const Tensor& x) { autograd_called = true; return x; })); + // NB: Direct use of DispatchKey is discouraged; use the DeviceType + // k-synonyms instead + .impl("test::fn", c10::DispatchKey::CPUTensorId, [&](const Tensor& x) { cpu_called = true; return x; }) + .impl("test::fn", c10::kCUDA, [&](const Tensor& x) { cuda_called = true; return x; }) + .impl("test::fn", c10::kAutograd, [&](const Tensor& x) { autograd_called = true; return x; }); auto op = Dispatcher::singleton().findSchema({"test::fn", ""}); ASSERT_TRUE(op.has_value()); @@ -1440,7 +1442,7 @@ TEST(NewOperatorRegistrationTest, dispatchMultiple) { TEST(NewOperatorRegistrationTest, fallback) { auto registrar = c10::import() - .fallback(torch::dispatch(c10::kCPU, c10::CppFunction::makeFromBoxedFunction<&backend_fallback_kernel>())); + .fallback(c10::kCPU, c10::CppFunction::makeFromBoxedFunction<&backend_fallback_kernel>()); auto registrar1 = c10::RegisterOperators().op("_test::dummy(Tensor dummy, str input) -> ()"); auto op = Dispatcher::singleton().findSchema({"_test::dummy", ""}); @@ -1454,12 +1456,12 @@ TEST(NewOperatorRegistrationTest, BackendSelectRedispatchesToCPU) { bool backend_generic_called = false; auto registrar = c10::import() .def("test::fn(Tensor self) -> Tensor") - .impl("test::fn", torch::dispatch(c10::kCPU, [&](const Tensor& x) { cpu_called = true; return x; })) - .impl("test::fn", torch::dispatch(c10::DispatchKey::BackendSelect, [&](const Tensor& x) { + .impl("test::fn", c10::kCPU, [&](const Tensor& x) { cpu_called = true; return x; }) + .impl("test::fn", c10::DispatchKey::BackendSelect, [&](const Tensor& x) { backend_generic_called = true; auto op = c10::Dispatcher::singleton().findSchema({"test::fn", ""}); return c10::Dispatcher::singleton().callUnboxedRedispatch(*op, c10::DispatchKey::BackendSelect, x); - })) + }) ; auto op = Dispatcher::singleton().findSchema({"test::fn", ""}); ASSERT_TRUE(op.has_value()); diff --git a/aten/src/ATen/function_wrapper.py b/aten/src/ATen/function_wrapper.py index adc30a73919..fcf2f83bd76 100644 --- a/aten/src/ATen/function_wrapper.py +++ b/aten/src/ATen/function_wrapper.py @@ -117,16 +117,16 @@ DEFAULT_UNBOXEDONLY_FUNCTION_REGISTRATION = CodeTemplate("""\ CppFunction::makeUnboxedOnly(TypeDefault::${type_wrapper_name})) """) BACKEND_UNBOXEDONLY_FUNCTION_REGISTRATION = CodeTemplate("""\ -.impl("${operator_name_with_overload}", torch::dispatch( +.impl("${operator_name_with_overload}", DispatchKey::${Backend}TensorId, - CppFunction::makeUnboxedOnly(${Type}::${type_wrapper_name}))) + CppFunction::makeUnboxedOnly(${Type}::${type_wrapper_name})) """) DEFAULT_FUNCTION_REGISTRATION = CodeTemplate("""\ .impl("${operator_name_with_overload}", &TypeDefault::${type_wrapper_name}) """) BACKEND_FUNCTION_REGISTRATION = CodeTemplate("""\ .impl("${operator_name_with_overload}", - torch::dispatch(DispatchKey::${Backend}TensorId, &${Type}::${type_wrapper_name})) + DispatchKey::${Backend}TensorId, &${Type}::${type_wrapper_name}) """) # add non-virtual declaration to TensorBody.h diff --git a/aten/src/ATen/test/backend_fallback_test.cpp b/aten/src/ATen/test/backend_fallback_test.cpp index 5e8ccd20d70..a1e0c0871fe 100644 --- a/aten/src/ATen/test/backend_fallback_test.cpp +++ b/aten/src/ATen/test/backend_fallback_test.cpp @@ -138,12 +138,11 @@ TEST(BackendFallbackTest, TestBackendFallbackWithWrapper) { TEST(BackendFallbackTest, TestFallthroughBackendFallback) { // By default fallthrough auto registry = c10::import() - .fallback( - c10::dispatch(DispatchKey::TESTING_ONLY_GenericModeTensorId, - c10::CppFunction::makeFallthrough())) + .fallback(DispatchKey::TESTING_ONLY_GenericModeTensorId, + c10::CppFunction::makeFallthrough()) .impl("aten::mul.Tensor", - c10::dispatch(DispatchKey::TESTING_ONLY_GenericModeTensorId, - c10::CppFunction::makeFromBoxedFunction<&generic_mode_fallback>())); + DispatchKey::TESTING_ONLY_GenericModeTensorId, + c10::CppFunction::makeFromBoxedFunction<&generic_mode_fallback>()); c10::impl::IncludeDispatchKeyGuard guard(DispatchKey::TESTING_ONLY_GenericModeTensorId); diff --git a/aten/src/ATen/test/cpu_rng_test.cpp b/aten/src/ATen/test/cpu_rng_test.cpp index 238658e9a17..735cd60cb03 100644 --- a/aten/src/ATen/test/cpu_rng_test.cpp +++ b/aten/src/ATen/test/cpu_rng_test.cpp @@ -14,8 +14,10 @@ using namespace at; namespace { +constexpr auto kCustomRNG = DispatchKey::CustomRNGKeyId; + struct TestCPUGenerator : public c10::GeneratorImpl { - TestCPUGenerator(uint64_t value) : GeneratorImpl{Device(DeviceType::CPU), DispatchKeySet(DispatchKey::CustomRNGKeyId)}, value_(value) { } + TestCPUGenerator(uint64_t value) : GeneratorImpl{Device(DeviceType::CPU), DispatchKeySet(kCustomRNG)}, value_(value) { } ~TestCPUGenerator() = default; uint32_t random() { return value_; } uint64_t random64() { return value_; } @@ -98,21 +100,20 @@ class RNGTest : public ::testing::Test { void SetUp() override { static auto registry = torch::import() // Random - .impl("aten::random_.from", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(random_from_to))) - .impl("aten::random_.to", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(random_to))) - .impl("aten::random_", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(random_))) + .impl("aten::random_.from", kCustomRNG, CppFunction::makeUnboxedOnly(random_from_to)) + .impl("aten::random_.to", kCustomRNG, CppFunction::makeUnboxedOnly(random_to)) + .impl("aten::random_", kCustomRNG, CppFunction::makeUnboxedOnly(random_)) // Normal - .impl("aten::normal_", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(normal_))) - .impl("aten::normal.Tensor_float_out", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(normal_Tensor_float_out))) - .impl("aten::normal.float_Tensor_out", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(normal_float_Tensor_out))) - .impl("aten::normal.Tensor_Tensor_out", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(normal_Tensor_Tensor_out))) - .impl("aten::normal.Tensor_float", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(normal_Tensor_float))) - .impl("aten::normal.float_Tensor", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(normal_float_Tensor))) - .impl("aten::normal.Tensor_Tensor", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(normal_Tensor_Tensor))) - // Uniform - .impl("aten::uniform_", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(uniform_))) + .impl("aten::normal_", kCustomRNG, CppFunction::makeUnboxedOnly(normal_)) + .impl("aten::normal.Tensor_float_out", kCustomRNG, CppFunction::makeUnboxedOnly(normal_Tensor_float_out)) + .impl("aten::normal.float_Tensor_out", kCustomRNG, CppFunction::makeUnboxedOnly(normal_float_Tensor_out)) + .impl("aten::normal.Tensor_Tensor_out", kCustomRNG, CppFunction::makeUnboxedOnly(normal_Tensor_Tensor_out)) + .impl("aten::normal.Tensor_float", kCustomRNG, CppFunction::makeUnboxedOnly(normal_Tensor_float)) + .impl("aten::normal.float_Tensor", kCustomRNG, CppFunction::makeUnboxedOnly(normal_float_Tensor)) + .impl("aten::normal.Tensor_Tensor", kCustomRNG, CppFunction::makeUnboxedOnly(normal_Tensor_Tensor)) + .impl("aten::uniform_", kCustomRNG, CppFunction::makeUnboxedOnly(uniform_)) // Cauchy - .impl("aten::cauchy_", torch::dispatch(DispatchKey::CustomRNGKeyId, CppFunction::makeUnboxedOnly(custom_rng_cauchy_))) + .impl("aten::cauchy_", kCustomRNG, CppFunction::makeUnboxedOnly(custom_rng_cauchy_)) ; } }; diff --git a/c10/core/DispatchKey.h b/c10/core/DispatchKey.h index fe9eddb5d18..75e3784c4fd 100644 --- a/c10/core/DispatchKey.h +++ b/c10/core/DispatchKey.h @@ -204,8 +204,21 @@ static inline DispatchKey XLATensorId() { return DispatchKey::XLATensorId; } +// These are some convenience identifiers for dispatch keys which are +// shorter to type than their long counterparts. Note that some of these +// dispatch keys directly correspond to DeviceType; and most APIs that +// accept DispatchKey also accept DeviceType; e.g., +// torch::dispatch(torch::kCPU, ...) is also valid. +constexpr DispatchKey kAutograd = DispatchKey::VariableTensorId; + } // namespace c10 +namespace torch { + // Expose the constant, but not the TYPE (DispatchKey is an implementation + // detail!) + using c10::kAutograd; +} + // NB: You really shouldn't use this instance; this enum is guaranteed // to be pretty small so a regular array should be acceptable. namespace std { diff --git a/test/cpp_extensions/msnpu_extension.cpp b/test/cpp_extensions/msnpu_extension.cpp index b69faa66be8..b0e035d0e4e 100644 --- a/test/cpp_extensions/msnpu_extension.cpp +++ b/test/cpp_extensions/msnpu_extension.cpp @@ -49,10 +49,10 @@ std::tuple fake_convolution_backward( void init_msnpu_extension() { static auto registry = torch::import() - .impl("aten::empty.memory_format", torch::dispatch(DispatchKey::MSNPUTensorId, CppFunction::makeUnboxedOnly(empty_override))) - .impl("aten::add.Tensor", torch::dispatch(DispatchKey::MSNPUTensorId, CppFunction::makeUnboxedOnly(add_override))) - .impl("aten::convolution_overrideable", torch::dispatch(DispatchKey::MSNPUTensorId, CppFunction::makeUnboxedOnly(fake_convolution))) - .impl("aten::convolution_backward_overrideable", torch::dispatch(DispatchKey::MSNPUTensorId, CppFunction::makeUnboxedOnly(fake_convolution_backward))) + .impl("aten::empty.memory_format", kMSNPU, CppFunction::makeUnboxedOnly(empty_override)) + .impl("aten::add.Tensor", kMSNPU, CppFunction::makeUnboxedOnly(add_override)) + .impl("aten::convolution_overrideable", kMSNPU, CppFunction::makeUnboxedOnly(fake_convolution)) + .impl("aten::convolution_backward_overrideable", kMSNPU, CppFunction::makeUnboxedOnly(fake_convolution_backward)) ; } diff --git a/test/mobile/op_deps/simple_ops.cpp b/test/mobile/op_deps/simple_ops.cpp index 27c91e28fde..b8f8090f1df 100644 --- a/test/mobile/op_deps/simple_ops.cpp +++ b/test/mobile/op_deps/simple_ops.cpp @@ -62,23 +62,29 @@ Tensor FF_op(const Tensor& self) { namespace { +// NB: Some of these registrations (AA, EE) are not what you +// actually expect to see in practice, but we cover them here +// as they are technically "valid" API calls and we want to +// make sure the analyzer catches them. (The analyzer is very +// generic, so actually there isn't any reason it shouldn't work, +// but it's good to test them!) +// +// Additionally, the code in this file is not really runnable; for +// example we are missing schemas for all of the impl registrations +// here. The analyzer doesn't really care, as it only really +// cares about the name auto registerer = torch::import() - .def("aten::AA(Tensor self) -> Tensor", - torch::dispatch(DispatchKey::CPUTensorId, &AA_op)) + .def("aten::AA(Tensor self) -> Tensor", torch::dispatch(kCPU, &AA_op)) .def("aten::BB(Tensor self) -> Tensor", &BB_op) - .impl("aten::CC(Tensor self) -> Tensor", - torch::dispatch(DispatchKey::CPUTensorId, &CC_op)) - .impl("aten::DD(Tensor self) -> Tensor", &DD_op) - .def("aten::EE(Tensor self) -> Tensor", torch::dispatch( - DispatchKey::CPUTensorId, - CppFunction::makeUnboxedOnly(EE_op))) - .def("aten::FF(Tensor self) -> Tensor", - CppFunction::makeUnboxedOnly(FF_op)) - .impl("aten::GG(Tensor self) -> Tensor", torch::dispatch( - DispatchKey::CPUTensorId, [] (Tensor a) -> Tensor { + .impl("aten::CC", kCPU, &CC_op) + .impl("aten::DD", &DD_op) + .def("aten::EE(Tensor self) -> Tensor", torch::dispatch(kCPU, CppFunction::makeUnboxedOnly(EE_op))) + .def("aten::FF(Tensor self) -> Tensor", CppFunction::makeUnboxedOnly(FF_op)) + .impl("aten::GG", + kCPU, [] (Tensor a) -> Tensor { return call_FF_op(a); - })) - .impl("aten::HH(Tensor self) -> Tensor", + }) + .impl("aten::HH", [] (Tensor a) -> Tensor { return a; }); diff --git a/test/test_dispatch.py b/test/test_dispatch.py index 54d2bb59afb..d22b8b71a65 100644 --- a/test/test_dispatch.py +++ b/test/test_dispatch.py @@ -190,8 +190,7 @@ class TestDispatch(TestCase): lambda m: m.def_("foo(Tensor x) -> Tensor"), # m.impl("test_def", [](const Tensor& x) { return x }) lambda m: m.impl_t_t("foo"), - # m.impl("test_def", - # torch::dispatch_autograd([](const Tensor& x) { return x })) + # m.impl("test_def", kAutograd, [](const Tensor& x) { return x }) lambda m: m.impl_t_t("foo", dispatch="autograd") ]) self.assertExpectedInline(r, '''\ @@ -217,7 +216,7 @@ catchall: impl_t_t :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ] r = self.commute("foo", [ # m.def("foo", [](const Tensor & x) { return x }) lambda m: m.def_name_t_t("foo"), - # m.impl("foo", torch::dispatch_autograd([](const Tensor & x) { return x })) + # m.impl("foo", torch::kAutograd, [](const Tensor & x) { return x }) lambda m: m.impl_t_t("foo", "autograd") ]) self.assertExpectedInline(r, '''\ @@ -243,8 +242,7 @@ alias analysis kind: FROM_SCHEMA r = self.commute("foo", [ # m.impl("foo", [](const Tensor& x) { return x }) lambda m: m.impl_t_t("foo"), - # m.impl("foo", - # torch::dispatch_autograd([](const Tensor& x) { return x })) + # m.impl("foo", torch::kAutograd, [](const Tensor& x) { return x }) lambda m: m.impl_t_t("foo", "autograd") ]) self.assertExpectedInline(r, '''\ diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py index d4836677fd7..9935df1dcb9 100644 --- a/tools/autograd/gen_variable_type.py +++ b/tools/autograd/gen_variable_type.py @@ -183,17 +183,15 @@ ${return_type} ${type_wrapper_name}(${type_method_formals}) { """) UNBOXEDONLY_WRAPPER_REGISTRATION = CodeTemplate("""\ -.impl("${operator_name_with_overload}", - torch::dispatch_autograd( +.impl("${operator_name_with_overload}", torch::kAutograd, CppFunction::makeUnboxedOnly(VariableType::${type_wrapper_name}) - )) + ) """) WRAPPER_REGISTRATION = CodeTemplate("""\ -.impl("${operator_name_with_overload}", - torch::dispatch_autograd( +.impl("${operator_name_with_overload}", torch::kAutograd, &VariableType::${type_wrapper_name} - )) + ) """) UNPACK_TENSOR = CodeTemplate("""\ diff --git a/tools/code_analyzer/build.sh b/tools/code_analyzer/build.sh index 170fe36f668..c59c8f5e332 100755 --- a/tools/code_analyzer/build.sh +++ b/tools/code_analyzer/build.sh @@ -140,7 +140,7 @@ check_test_result() { echo "Test result is the same as expected." else echo "Test result is DIFFERENT from expected!" - diff "${OUTPUT}" "${TEST_SRC_ROOT}/expected_deps.yaml" + diff -u "${TEST_SRC_ROOT}/expected_deps.yaml" "${OUTPUT}" exit 1 fi } diff --git a/tools/code_analyzer/run_analyzer.sh b/tools/code_analyzer/run_analyzer.sh index 68e532d05b4..9f857f0886b 100755 --- a/tools/code_analyzer/run_analyzer.sh +++ b/tools/code_analyzer/run_analyzer.sh @@ -10,7 +10,7 @@ echo "Analyze: ${INPUT}" "${ANALYZER_BIN}" \ -op_schema_pattern="^(_aten|_prim|aten|quantized|profiler|_test)::[^ ]+" \ - -op_register_pattern="c10::RegisterOperators::(op|checkSchemaAndRegisterOp_)|c10::Module::(def|impl)" \ + -op_register_pattern="c10::RegisterOperators::(op|checkSchemaAndRegisterOp_)|c10::Module::(_?def|_?impl)" \ -op_invoke_pattern="c10::Dispatcher::findSchema|callOp" \ -format="${FORMAT}" \ ${EXTRA_ANALYZER_FLAGS} \ diff --git a/torch/csrc/jit/mobile/register_mobile_autograd.cpp b/torch/csrc/jit/mobile/register_mobile_autograd.cpp index 377aaf4120a..76bbc47eee3 100644 --- a/torch/csrc/jit/mobile/register_mobile_autograd.cpp +++ b/torch/csrc/jit/mobile/register_mobile_autograd.cpp @@ -105,30 +105,34 @@ static auto registry = torch::import() .impl( "_aten::add.Scalar", - torch::dispatch_autograd(torch::autograd::VariableType::add_Scalar)) + torch::kAutograd, + torch::autograd::VariableType::add_Scalar) .impl( "_aten::mul.Tensor", - torch::dispatch_autograd(torch::autograd::VariableType::mul_Tensor)) + torch::kAutograd, + torch::autograd::VariableType::mul_Tensor) .impl( "_aten::conv2d", - torch::dispatch_autograd( - CppFunction::makeFromBoxedFunction())) - .impl("_aten::dropout", torch::dispatch_autograd(VariableType::dropout)) + torch::kAutograd, + CppFunction::makeFromBoxedFunction()) + .impl("_aten::dropout", torch::kAutograd, VariableType::dropout) .impl( "_aten::feature_dropout", - torch::dispatch_autograd(VariableType::feature_dropout)) + torch::kAutograd, + VariableType::feature_dropout) .impl( "_aten::log_softmax.int", - torch::dispatch_autograd( - CppFunction::makeFromBoxedFunction())) + torch::kAutograd, + CppFunction::makeFromBoxedFunction()) .impl( "_aten::max_pool2d", - torch::dispatch_autograd([](const Tensor& self, - c10::List kernel_size, - c10::List stride, - c10::List padding, - c10::List dilation, - bool ceil_mode = false) { + torch::kAutograd, + [](const Tensor& self, + c10::List kernel_size, + c10::List stride, + c10::List padding, + c10::List dilation, + bool ceil_mode = false) { return VariableType::max_pool2d( self, kernel_size.vec(), @@ -136,12 +140,12 @@ static auto registry = padding.vec(), dilation.vec(), ceil_mode); - })) - .impl("_aten::relu", torch::dispatch_autograd(VariableType::relu)) + }) + .impl("_aten::relu", torch::kAutograd, VariableType::relu) .impl( "_aten::view", - torch::dispatch_autograd( - CppFunction::makeFromBoxedFunction())) - .impl("_aten::t", torch::dispatch_autograd(VariableType::t)) - .impl("_aten::addmm", torch::dispatch_autograd(VariableType::addmm)); + torch::kAutograd, + CppFunction::makeFromBoxedFunction()) + .impl("_aten::t", torch::kAutograd, VariableType::t) + .impl("_aten::addmm", torch::kAutograd, VariableType::addmm); } // anonymous namespace