mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Revert "Deprecate registering autograd kernels at not an autograd key (#104481)"
This reverts commit ed13ab6664.
Reverted https://github.com/pytorch/pytorch/pull/104481 on behalf of https://github.com/atalman due to failed in periodic tests ([comment](https://github.com/pytorch/pytorch/pull/104481#issuecomment-1631552846))
This commit is contained in:
parent
2f95a3d0fc
commit
24aa8b9b9a
10 changed files with 28 additions and 651 deletions
|
|
@ -268,7 +268,6 @@ test_dynamo_shard() {
|
|||
test_package \
|
||||
test_legacy_vmap \
|
||||
test_custom_op_testing \
|
||||
test_content_store \
|
||||
export/test_db \
|
||||
functorch/test_dims \
|
||||
functorch/test_aotdispatch \
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#include <ATen/core/dispatch/Dispatcher.h>
|
||||
#include <ATen/core/LegacyTypeDispatch.h>
|
||||
#include <torch/library.h>
|
||||
#include <torch/csrc/autograd/autograd_not_implemented_fallback.h>
|
||||
|
||||
/*
|
||||
* This file implements a variable fallback kernel for custom operators.
|
||||
|
|
@ -28,38 +27,36 @@ namespace {
|
|||
// NB: But not the private use ones; maybe the extension wants
|
||||
// to override it themselves!
|
||||
|
||||
#define AUTOGRAD_FALLBACK torch::autograd::basicAutogradNotImplementedFallback()
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, AutogradOther, m) {
|
||||
m.fallback(AUTOGRAD_FALLBACK);
|
||||
m.fallback(torch::CppFunction::makeFallthrough());
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, AutogradCPU, m) {
|
||||
m.fallback(AUTOGRAD_FALLBACK);
|
||||
m.fallback(torch::CppFunction::makeFallthrough());
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, AutogradXPU, m) {
|
||||
m.fallback(AUTOGRAD_FALLBACK);
|
||||
m.fallback(torch::CppFunction::makeFallthrough());
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, AutogradCUDA, m) {
|
||||
m.fallback(AUTOGRAD_FALLBACK);
|
||||
m.fallback(torch::CppFunction::makeFallthrough());
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, AutogradXLA, m) {
|
||||
m.fallback(AUTOGRAD_FALLBACK);
|
||||
m.fallback(torch::CppFunction::makeFallthrough());
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, AutogradLazy, m) {
|
||||
m.fallback(AUTOGRAD_FALLBACK);
|
||||
m.fallback(torch::CppFunction::makeFallthrough());
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, AutogradMPS, m) {
|
||||
m.fallback(AUTOGRAD_FALLBACK);
|
||||
m.fallback(torch::CppFunction::makeFallthrough());
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, AutogradMeta, m) {
|
||||
m.fallback(AUTOGRAD_FALLBACK);
|
||||
m.fallback(torch::CppFunction::makeFallthrough());
|
||||
}
|
||||
|
||||
// see Note [ADInplaceOrView key]
|
||||
|
|
@ -68,9 +65,7 @@ TORCH_LIBRARY_IMPL(_, ADInplaceOrView, m) {
|
|||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(_, AutogradHPU, m) {
|
||||
m.fallback(AUTOGRAD_FALLBACK);
|
||||
m.fallback(torch::CppFunction::makeFallthrough());
|
||||
}
|
||||
|
||||
#undef AUTOGRAD_FALLBACK
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,372 +0,0 @@
|
|||
# Owner(s): ["module: autograd"]
|
||||
|
||||
import torch
|
||||
from torch.library import Library
|
||||
from torch.testing._internal.common_utils import (
|
||||
TestCase,
|
||||
parametrize,
|
||||
instantiate_parametrized_tests,
|
||||
run_tests,
|
||||
)
|
||||
import contextlib
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
@contextlib.contextmanager
|
||||
def autograd_fallback_mode(mode):
|
||||
prev = torch._C._get_autograd_fallback_mode()
|
||||
try:
|
||||
torch._C._set_autograd_fallback_mode(mode)
|
||||
yield
|
||||
finally:
|
||||
torch._C._set_autograd_fallback_mode(prev)
|
||||
|
||||
class TestAutogradFallback(TestCase):
|
||||
test_ns = '_test_autograd_fallback'
|
||||
|
||||
def tearDown(self):
|
||||
if hasattr(torch.ops, self.test_ns):
|
||||
delattr(torch.ops, self.test_ns)
|
||||
if hasattr(self, 'lib'):
|
||||
del self.lib.m
|
||||
del self.lib
|
||||
|
||||
def get_op(self, name):
|
||||
return getattr(getattr(torch.ops, self.test_ns), name).default
|
||||
|
||||
def get_lib(self):
|
||||
lib = Library(self.test_ns, "FRAGMENT")
|
||||
self.lib = lib
|
||||
return lib
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_no_grad(self, mode):
|
||||
with autograd_fallback_mode(mode):
|
||||
lib = self.get_lib()
|
||||
lib.define("foo(Tensor a, Tensor b, int c) -> Tensor")
|
||||
lib.impl("foo", lambda a, b, c: a + b + c, "CPU")
|
||||
op = self.get_op("foo")
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
with torch.no_grad():
|
||||
a = torch.randn([], requires_grad=True)
|
||||
b = torch.randn([], requires_grad=True)
|
||||
out = op(a, b, 1)
|
||||
self.assertFalse(out.requires_grad)
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
a = torch.randn([])
|
||||
b = torch.randn([])
|
||||
out = op(a, b, 1)
|
||||
self.assertFalse(out.requires_grad)
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_no_autograd_kernel(self, mode):
|
||||
with autograd_fallback_mode(mode):
|
||||
lib = self.get_lib()
|
||||
lib.define("foo(Tensor a, Tensor b, int c) -> Tensor")
|
||||
op = self.get_op("foo")
|
||||
|
||||
def foo_impl(a, b, c):
|
||||
result = a.detach().numpy() + b.detach().numpy() + c
|
||||
return torch.tensor(result)
|
||||
|
||||
lib.impl("foo", foo_impl, "CPU")
|
||||
|
||||
# Some inputs requiring grad
|
||||
a = torch.randn([], requires_grad=False)
|
||||
b = torch.randn([], requires_grad=True)
|
||||
out = op(a, b, 1).sum()
|
||||
with self._check_ctx(mode, mode_nothing_raises=True):
|
||||
out.backward()
|
||||
self.assertIsNone(b.grad)
|
||||
|
||||
def _check_ctx(self, mode, *, mode_nothing_raises=False):
|
||||
if mode == "warn":
|
||||
return self.assertWarnsRegex(UserWarning, 'an autograd kernel was not registered')
|
||||
assert mode == "nothing"
|
||||
if mode_nothing_raises:
|
||||
return self.assertRaisesRegex(RuntimeError, "does not require grad")
|
||||
return contextlib.nullcontext()
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_no_autograd_kernel_inplace(self, mode):
|
||||
with autograd_fallback_mode(mode):
|
||||
# input modified in-place gets returned as output
|
||||
lib = self.get_lib()
|
||||
lib.define("foo(Tensor(a!) self, Tensor(b!) y) -> (Tensor(a!), Tensor(b!))")
|
||||
op = self.get_op("foo")
|
||||
|
||||
def foo_impl(x, y):
|
||||
with torch.no_grad():
|
||||
x.sin_()
|
||||
y.cos_()
|
||||
return x, y
|
||||
|
||||
lib.impl("foo", foo_impl, "CPU")
|
||||
|
||||
x = torch.randn(3, requires_grad=True)
|
||||
w = x.clone()
|
||||
v = x.clone()
|
||||
y0 = w[0]
|
||||
y1 = v[1]
|
||||
z0, z1 = op(y0, y1)
|
||||
for tensor in [w, v, z0, z1, y0, y1]:
|
||||
with self._check_ctx(mode):
|
||||
tensor.sum().backward(retain_graph=True)
|
||||
|
||||
# no outputs: we don't do anything. Maybe we should in the future.
|
||||
# This is not a common failure mode.
|
||||
lib.define("bar(Tensor(a!) self) -> ()")
|
||||
op = self.get_op("bar")
|
||||
|
||||
def bar_impl(x):
|
||||
with torch.no_grad():
|
||||
x.sin_()
|
||||
|
||||
lib.impl("bar", bar_impl, "CPU")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
x = torch.randn([], requires_grad=True)
|
||||
y = x.clone()
|
||||
z = op(y)
|
||||
y.backward()
|
||||
self.assertEqual(x.grad, torch.ones_like(x))
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_cpu_return_self(self, mode):
|
||||
with autograd_fallback_mode(mode):
|
||||
# To be clear, none of these situations are OK and will lead
|
||||
# to other problems down the line. We're testing them because
|
||||
# it is fairly common to actually do these things.
|
||||
lib = Library(self.test_ns, "FRAGMENT")
|
||||
lib.define("foo(Tensor self) -> Tensor")
|
||||
lib.impl("foo", lambda x: x, "CPU")
|
||||
op = self.get_op("foo")
|
||||
|
||||
x = torch.randn(3, requires_grad=True)
|
||||
y = op(x).sum()
|
||||
with self._check_ctx(mode):
|
||||
y.backward()
|
||||
self.assertEqual(x.grad, torch.ones_like(x))
|
||||
|
||||
lib.define("bar(Tensor(a!) self) -> Tensor(a!)")
|
||||
lib.impl("bar", lambda x: x, "CPU")
|
||||
op = self.get_op("bar")
|
||||
|
||||
x = torch.randn(3, requires_grad=True)
|
||||
y = op(x).sum()
|
||||
with self._check_ctx(mode):
|
||||
y.backward()
|
||||
self.assertEqual(x.grad, torch.ones_like(x))
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_composite_registered_to_cpu(self, mode):
|
||||
with autograd_fallback_mode(mode):
|
||||
lib = Library(self.test_ns, "FRAGMENT")
|
||||
lib.define("foo(Tensor self) -> Tensor")
|
||||
lib.impl("foo", lambda x: x.sin().sum(), "CPU")
|
||||
op = self.get_op("foo")
|
||||
|
||||
x = torch.randn(3, requires_grad=True)
|
||||
y = op(x)
|
||||
with self._check_ctx(mode):
|
||||
y.backward()
|
||||
self.assertEqual(x.grad, x.cos())
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_autograd_function_registered_to_cpu(self, mode):
|
||||
with autograd_fallback_mode(mode):
|
||||
lib = Library(self.test_ns, "FRAGMENT")
|
||||
lib.define("foo(Tensor self) -> Tensor")
|
||||
|
||||
class NumpySin(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, x):
|
||||
ctx.save_for_backward(x)
|
||||
return torch.tensor(np.sin(x.cpu().numpy()))
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, gx):
|
||||
x, = ctx.saved_tensors
|
||||
return gx * x.cos()
|
||||
|
||||
lib.impl("foo", NumpySin.apply, "CPU")
|
||||
op = self.get_op("foo")
|
||||
|
||||
x = torch.randn(3, requires_grad=True)
|
||||
y = op(x).sum()
|
||||
with self._check_ctx(mode):
|
||||
y.backward()
|
||||
self.assertEqual(x.grad, x.cos())
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_inplace_autograd_function_registered_to_cpu(self, mode):
|
||||
with autograd_fallback_mode(mode):
|
||||
lib = Library(self.test_ns, "FRAGMENT")
|
||||
lib.define("foo(Tensor(a!) self) -> Tensor(a!)")
|
||||
|
||||
class NumpySin_(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, x):
|
||||
ctx.save_for_backward(x.clone())
|
||||
x_np = x.detach().numpy()
|
||||
np.sin(x_np, out=x_np)
|
||||
ctx.mark_dirty(x)
|
||||
return x
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, gx):
|
||||
x, = ctx.saved_tensors
|
||||
return gx * x.cos()
|
||||
|
||||
lib.impl("foo", NumpySin_.apply, "CPU")
|
||||
op = self.get_op("foo")
|
||||
|
||||
x = torch.randn(3, requires_grad=True)
|
||||
z = x.clone()
|
||||
w = z[0]
|
||||
y = op(w)
|
||||
|
||||
expected = torch.zeros_like(x)
|
||||
expected[0] = x[0].cos()
|
||||
with self._check_ctx(mode):
|
||||
gx, = torch.autograd.grad(y, x, torch.ones_like(y), retain_graph=True)
|
||||
self.assertEqual(gx, expected)
|
||||
|
||||
expected = torch.ones_like(x)
|
||||
expected[0] = x[0].cos()
|
||||
with self._check_ctx(mode):
|
||||
gx, = torch.autograd.grad(z, x, torch.ones_like(z))
|
||||
self.assertEqual(gx, expected)
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_inplace_on_tensor_that_does_not_require_grad(self, mode):
|
||||
# We don't do anything special (that is, we don't rebase history).
|
||||
# See NOTE [autograd fallback and in-place operations] for why
|
||||
with autograd_fallback_mode(mode):
|
||||
lib = Library(self.test_ns, "FRAGMENT")
|
||||
|
||||
# Correct usage of (a!)
|
||||
lib.define("foo(Tensor(a!) self, Tensor other) -> Tensor(a!)")
|
||||
|
||||
def foo_impl(x, y):
|
||||
x_d = x.detach()
|
||||
y = y.detach()
|
||||
x_d.add_(y)
|
||||
return x
|
||||
|
||||
lib.impl("foo", foo_impl, "CPU")
|
||||
foo = self.get_op("foo")
|
||||
|
||||
# Incorrect usage of (a!): user doesn't return tensor as-is
|
||||
lib.define("bar(Tensor(a!) self, Tensor other) -> Tensor(a!)")
|
||||
|
||||
def bar_impl(x, y):
|
||||
x_d = x.detach()
|
||||
y = y.detach()
|
||||
x_d.add_(y)
|
||||
return x_d.clone()
|
||||
|
||||
lib.impl("bar", bar_impl, "CPU")
|
||||
bar = self.get_op("bar")
|
||||
|
||||
# User mutated input tensor but didn't return it.
|
||||
lib.define("baz(Tensor(a!) self, Tensor other) -> ()")
|
||||
|
||||
def baz_impl(x, y):
|
||||
x_d = x.detach()
|
||||
y = y.detach()
|
||||
x_d.add_(y)
|
||||
|
||||
lib.impl("baz", baz_impl, "CPU")
|
||||
baz = self.get_op("baz")
|
||||
|
||||
# Test in-place on non-view
|
||||
for op in (foo, bar, baz):
|
||||
x = torch.randn(3)
|
||||
y = torch.randn(3, requires_grad=True)
|
||||
with self.assertRaisesRegex(RuntimeError, "does not require grad"):
|
||||
z = x.clone()
|
||||
op(z, y)
|
||||
torch.autograd.grad(z, y, torch.ones_like(z), allow_unused=True)
|
||||
|
||||
# Test in-place on view
|
||||
for op in (foo, bar, baz):
|
||||
x = torch.randn(3)
|
||||
y = torch.randn(3, requires_grad=True)
|
||||
with self.assertRaisesRegex(RuntimeError, "does not require grad"):
|
||||
z = x[:]
|
||||
op(z, y)
|
||||
torch.autograd.grad(z, x, torch.ones_like(z), allow_unused=True)
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_post_autograd_returns_leaf(self, mode):
|
||||
with autograd_fallback_mode(mode):
|
||||
lib = self.get_lib()
|
||||
lib.define("foo(Tensor a) -> (Tensor, Tensor)")
|
||||
op = self.get_op("foo")
|
||||
|
||||
lib.impl("foo", lambda a: (a.clone(), a.clone().detach().requires_grad_()), "CPU")
|
||||
x = torch.randn(3, requires_grad=True)
|
||||
y, z = op(x)
|
||||
with self._check_ctx(mode):
|
||||
z.sum().backward()
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_post_autograd_returns_mix_of_requires_grad_tensors(self, mode):
|
||||
with autograd_fallback_mode(mode):
|
||||
lib = self.get_lib()
|
||||
lib.define("foo(Tensor a, Tensor b) -> (Tensor, Tensor, Tensor)")
|
||||
op = self.get_op("foo")
|
||||
|
||||
def foo_impl(a, b):
|
||||
with torch.no_grad():
|
||||
x = a.clone()
|
||||
z = b.clone()
|
||||
y = a * b
|
||||
return x, y, z
|
||||
|
||||
lib.impl("foo", foo_impl, "CPU")
|
||||
a = torch.randn(3, requires_grad=True)
|
||||
b = torch.randn(3, requires_grad=True)
|
||||
x, y, z = op(a, b)
|
||||
|
||||
with self._check_ctx(mode, mode_nothing_raises=True):
|
||||
torch.autograd.grad(x, (a, b), torch.ones_like(x), allow_unused=True, retain_graph=True)
|
||||
|
||||
with self._check_ctx(mode, mode_nothing_raises=False):
|
||||
torch.autograd.grad(y, (a, b), torch.ones_like(y), allow_unused=True, retain_graph=True)
|
||||
|
||||
with self._check_ctx(mode, mode_nothing_raises=True):
|
||||
torch.autograd.grad(z, (a, b), torch.ones_like(z), allow_unused=True, retain_graph=True)
|
||||
|
||||
@parametrize("mode", ("nothing", "warn"))
|
||||
def test_supports_tensor_lists(self, mode):
|
||||
with autograd_fallback_mode(mode):
|
||||
lib = self.get_lib()
|
||||
lib.define("foo(Tensor[] a) -> Tensor[]")
|
||||
op = self.get_op("foo")
|
||||
|
||||
def foo_impl(a):
|
||||
x, y, z = a
|
||||
with torch.no_grad():
|
||||
return x + y + z, x * y * z
|
||||
|
||||
lib.impl("foo", foo_impl, "CPU")
|
||||
x = torch.randn(3, requires_grad=True)
|
||||
y = torch.randn(1, requires_grad=True)
|
||||
z = torch.randn(2, 1, requires_grad=True)
|
||||
a, b = op([x, y, z])
|
||||
with self._check_ctx(mode, mode_nothing_raises=True):
|
||||
torch.autograd.grad(a, (x, y, z), torch.ones_like(a), allow_unused=True, retain_graph=True)
|
||||
with self._check_ctx(mode, mode_nothing_raises=True):
|
||||
torch.autograd.grad(b, (x, y, z), torch.ones_like(b), allow_unused=True, retain_graph=True)
|
||||
|
||||
|
||||
instantiate_parametrized_tests(TestAutogradFallback)
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_tests()
|
||||
|
|
@ -11267,7 +11267,6 @@ class TestAutogradMultipleDispatch(TestCase):
|
|||
|
||||
from autograd.test_complex import TestAutogradComplex # noqa: F401
|
||||
from autograd.test_functional import TestAutogradFunctional # noqa: F401
|
||||
from autograd.test_fallback import TestAutogradFallback # noqa: F401
|
||||
|
||||
# e.g., TestAutogradDeviceTypeCPU and TestAutogradDeviceTypeCUDA
|
||||
instantiate_device_type_tests(
|
||||
|
|
|
|||
|
|
@ -391,7 +391,7 @@ CPU: impl_t_t [kernel]
|
|||
CUDA: default_def_name_t_t [math kernel]
|
||||
XLA: default_def_name_t_t [math kernel]
|
||||
AutogradOther: default_def_name_t_t [math kernel]
|
||||
AutogradCPU: registered in pytorch framework [backend fallback]
|
||||
AutogradCPU: fallthrough registered in pytorch framework [backend fallback]
|
||||
AutogradCUDA: default_def_name_t_t [math kernel]
|
||||
AutogradXLA: default_def_name_t_t [math kernel]
|
||||
''')
|
||||
|
|
@ -456,7 +456,7 @@ CPU: fn_cpu [kernel]
|
|||
CUDA: fn_math [math kernel]
|
||||
XLA: fn_math [math kernel]
|
||||
AutogradOther: fn_math [math kernel]
|
||||
AutogradCPU: registered in pytorch framework [backend fallback]
|
||||
AutogradCPU: fallthrough registered in pytorch framework [backend fallback]
|
||||
AutogradCUDA: fn_math [math kernel]
|
||||
AutogradXLA: fn_math [math kernel]
|
||||
''')
|
||||
|
|
@ -587,10 +587,10 @@ Undefined: fn_defaultbackend [default backend kernel]
|
|||
CPU: fn_cpu [kernel]
|
||||
CUDA: fn_defaultbackend [default backend kernel]
|
||||
XLA: fn_defaultbackend [default backend kernel]
|
||||
AutogradOther: registered in pytorch framework [backend fallback]
|
||||
AutogradCPU: registered in pytorch framework [backend fallback]
|
||||
AutogradCUDA: registered in pytorch framework [backend fallback]
|
||||
AutogradXLA: registered in pytorch framework [backend fallback]
|
||||
AutogradOther: fallthrough registered in pytorch framework [backend fallback]
|
||||
AutogradCPU: fallthrough registered in pytorch framework [backend fallback]
|
||||
AutogradCUDA: fallthrough registered in pytorch framework [backend fallback]
|
||||
AutogradXLA: fallthrough registered in pytorch framework [backend fallback]
|
||||
''')
|
||||
|
||||
def test_computed_table_with_cpu_autograd_defaultbackend(self):
|
||||
|
|
@ -814,9 +814,9 @@ XLA fn_XLA [kernel]
|
|||
Lazy fn_Lazy [kernel]
|
||||
FPGA fn_CompositeImplicitAutograd [math kernel]
|
||||
AutogradOther fn_CompositeImplicitAutograd [math kernel]
|
||||
AutogradCPU [backend fallback]
|
||||
AutogradXLA [backend fallback]
|
||||
AutogradLazy [backend fallback]
|
||||
AutogradCPU fallthrough [backend fallback]
|
||||
AutogradXLA fallthrough [backend fallback]
|
||||
AutogradLazy fallthrough [backend fallback]
|
||||
'''
|
||||
)
|
||||
|
||||
|
|
@ -836,8 +836,8 @@ Lazy fn_Lazy [kernel]
|
|||
FPGA fn_CompositeImplicitAutograd [math kernel]
|
||||
AutogradOther fn_CompositeImplicitAutograd [math kernel]
|
||||
AutogradCPU fn_AutogradCPU [kernel]
|
||||
AutogradXLA [backend fallback]
|
||||
AutogradLazy [backend fallback]
|
||||
AutogradXLA fallthrough [backend fallback]
|
||||
AutogradLazy fallthrough [backend fallback]
|
||||
'''
|
||||
)
|
||||
self.assertExpectedInline(
|
||||
|
|
@ -869,10 +869,10 @@ CPU fn_CPU [kernel]
|
|||
XLA fn_XLA [kernel]
|
||||
Lazy fn_Lazy [kernel]
|
||||
FPGA fn_CompositeExplicitAutograd [default backend kernel]
|
||||
AutogradOther [backend fallback]
|
||||
AutogradOther fallthrough [backend fallback]
|
||||
AutogradCPU fn_AutogradCPU [kernel]
|
||||
AutogradXLA [backend fallback]
|
||||
AutogradLazy [backend fallback]
|
||||
AutogradXLA fallthrough [backend fallback]
|
||||
AutogradLazy fallthrough [backend fallback]
|
||||
'''
|
||||
)
|
||||
|
||||
|
|
@ -906,7 +906,7 @@ XLA fn_CompositeImplicitAutograd [math kernel]
|
|||
Lazy fn_CompositeImplicitAutograd [math kernel]
|
||||
FPGA fn_FPGA [kernel]
|
||||
AutogradOther ambiguous_autogradother [ambiguous autogradother]
|
||||
AutogradCPU [backend fallback]
|
||||
AutogradCPU fallthrough [backend fallback]
|
||||
AutogradXLA fn_CompositeImplicitAutograd [math kernel]
|
||||
AutogradLazy fn_CompositeImplicitAutograd [math kernel]
|
||||
'''
|
||||
|
|
|
|||
|
|
@ -1229,9 +1229,6 @@ class _InferenceMode:
|
|||
def __enter__(self): ...
|
||||
def __exit__(self, exc_type, exc_value, traceback): ...
|
||||
|
||||
def _set_autograd_fallback_mode(mode: str) -> None: ...
|
||||
def _get_autograd_fallback_mode() -> str: ...
|
||||
|
||||
# Defined in torch/csrc/jit/python/script_init.cpp
|
||||
class LoggerBase: ...
|
||||
class NoopLogger(LoggerBase): ...
|
||||
|
|
|
|||
|
|
@ -45,184 +45,8 @@ void _foreach_tensor(
|
|||
}
|
||||
}
|
||||
|
||||
AutogradFallbackMode kAutogradFallbackMode = AutogradFallbackMode::Warn;
|
||||
|
||||
} // namespace
|
||||
|
||||
void setAutogradFallbackMode(AutogradFallbackMode mode) {
|
||||
TORCH_CHECK(mode != AutogradFallbackMode::Error, "NYI: mode='error'");
|
||||
kAutogradFallbackMode = mode;
|
||||
}
|
||||
|
||||
AutogradFallbackMode getAutogradFallbackMode() {
|
||||
return kAutogradFallbackMode;
|
||||
}
|
||||
|
||||
static void warnAutogradNotImplemented(const std::string& op_name) {
|
||||
TORCH_WARN(
|
||||
op_name,
|
||||
": an autograd kernel was not registered to the Autograd key(s) ",
|
||||
"but we are trying to backprop through it. This may lead to silently incorrect behavior. ",
|
||||
"This behavior is deprecated and will be removed in a future version of PyTorch. ",
|
||||
"If your operator is differentiable, please ensure you have registered an "
|
||||
"autograd kernel to the correct Autograd key (e.g. DispatchKey::Autograd, "
|
||||
"DispatchKey::CompositeImplicitAutograd). If your operator is not "
|
||||
"differentiable, or to squash this warning and use the previous behavior, "
|
||||
"please register torch::CppFunction::makeFallthrough() to DispatchKey::Autograd.");
|
||||
}
|
||||
|
||||
struct WarnNotImplemented : public Node {
|
||||
WarnNotImplemented(
|
||||
std::string op_name,
|
||||
int64_t num_outputs,
|
||||
edge_list&& next_edges)
|
||||
: Node(std::move(next_edges)),
|
||||
op_name(std::move(op_name)),
|
||||
num_outputs(num_outputs) {}
|
||||
|
||||
WarnNotImplemented(std::string op_name, int64_t num_outputs)
|
||||
: op_name(std::move(op_name)), num_outputs(num_outputs) {}
|
||||
|
||||
variable_list apply(variable_list&& inputs) override;
|
||||
|
||||
std::string op_name;
|
||||
int64_t num_outputs;
|
||||
};
|
||||
|
||||
auto WarnNotImplemented::apply(variable_list&& inputs) -> variable_list {
|
||||
warnAutogradNotImplemented(op_name);
|
||||
std::vector<at::Tensor> output(num_outputs);
|
||||
return output;
|
||||
}
|
||||
|
||||
static void basicAutogradNotImplementedFallbackImpl(
|
||||
const c10::OperatorHandle& op,
|
||||
c10::DispatchKeySet dispatch_keys,
|
||||
torch::jit::Stack* stack) {
|
||||
const auto& schema = op.schema();
|
||||
const auto& op_name = schema.operator_name().name;
|
||||
const auto num_arguments = schema.arguments().size();
|
||||
const auto num_returns = schema.returns().size();
|
||||
const auto stack_start = stack->size() - num_arguments;
|
||||
const bool grad_mode = GradMode::is_enabled();
|
||||
|
||||
if (getAutogradFallbackMode() == AutogradFallbackMode::Nothing) {
|
||||
op.redispatchBoxed(dispatch_keys & c10::after_autograd_keyset, stack);
|
||||
return;
|
||||
}
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
getAutogradFallbackMode() == AutogradFallbackMode::Warn);
|
||||
|
||||
bool any_input_requires_grad = false;
|
||||
if (grad_mode) {
|
||||
_foreach_tensor(
|
||||
[&](size_t _, size_t idx_arg, const at::Tensor& t) {
|
||||
if (t.requires_grad()) {
|
||||
any_input_requires_grad = true;
|
||||
}
|
||||
},
|
||||
stack,
|
||||
stack_start,
|
||||
num_arguments);
|
||||
}
|
||||
|
||||
std::shared_ptr<WarnNotImplemented> grad_fn;
|
||||
if (any_input_requires_grad) {
|
||||
// NB: It is standard to collect edges from all tensors
|
||||
// (see generated/VariableTypeEverything.cpp for examples)
|
||||
std::vector<const at::Tensor*> all_tensors_on_stack;
|
||||
_foreach_tensor(
|
||||
[&](size_t _, size_t idx_arg, const at::Tensor& t) {
|
||||
all_tensors_on_stack.push_back(&t);
|
||||
},
|
||||
stack,
|
||||
stack_start,
|
||||
num_arguments);
|
||||
grad_fn = std::shared_ptr<WarnNotImplemented>(
|
||||
new WarnNotImplemented(op_name, all_tensors_on_stack.size()),
|
||||
deleteNode);
|
||||
grad_fn->set_next_edges(collect_next_edges(all_tensors_on_stack));
|
||||
}
|
||||
|
||||
op.redispatchBoxed(dispatch_keys & c10::after_autograd_keyset, stack);
|
||||
|
||||
if (any_input_requires_grad) {
|
||||
// NB: if the operator mutates any inputs in-place and does not return them
|
||||
// as outputs, we are unable to lazily raise a warning. This is OK because
|
||||
// we don't expect many existing operators to do this because of the amount
|
||||
// of technical expertise necessary (you would need to manually register an
|
||||
// autograd kernel without using autograd.Function)
|
||||
_foreach_tensor(
|
||||
[&](size_t _, size_t idx_ret, const at::Tensor& t) {
|
||||
if (!isDifferentiableType(t.scalar_type())) {
|
||||
return;
|
||||
}
|
||||
const bool is_mutable_output =
|
||||
schema.is_aliasing({c10::SchemaArgType::output, idx_ret}) &&
|
||||
schema.is_mutable({c10::SchemaArgType::output, idx_ret});
|
||||
|
||||
// If the post-autograd implementation returns Tensors that require
|
||||
// grad, then we install a hook that will warn during the backwards.
|
||||
//
|
||||
// NB: If the operation is inplace and the inputs were views,
|
||||
// it is possible that the history was rebased and the hook will
|
||||
// not warn in all places where it should. That is, the following
|
||||
// won't warn:
|
||||
// >>> x = torch.randn(3, 3, requires_grad=True)
|
||||
// >>> z = x.clone()
|
||||
// >>> w = z[0]
|
||||
// >>> k = w[0]
|
||||
// >>> y = op(k)
|
||||
// >>> torch.autograd.grad(z.sum(), w)
|
||||
if (t.requires_grad()) {
|
||||
t.register_hook([op_name](const at::Tensor& grad) {
|
||||
warnAutogradNotImplemented(op_name);
|
||||
return grad;
|
||||
});
|
||||
// If history is rebased, then we will attempt to warn
|
||||
// on the view's base. This will catch most cases (because
|
||||
// users typically call .backward() and backprop through
|
||||
// the entire program).
|
||||
if (t.is_view() && is_mutable_output) {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
|
||||
const_cast<at::TensorBase&>(t._base()).register_hook(
|
||||
[op_name](const at::TensorBase& grad) {
|
||||
warnAutogradNotImplemented(op_name);
|
||||
return grad;
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// If the post-autograd implementation returns any Tensors that
|
||||
// don't require grad, then we install the WarnNotImplemented grad_fn.
|
||||
// This grad_fn warns in backward and returns undefined tensor
|
||||
// gradients.
|
||||
//
|
||||
// NOTE [autograd fallback and in-place operations]
|
||||
// If the schema says the output is mutable, and the output
|
||||
// is an input, and the input is a view Tensor, then...
|
||||
// we're not sure if set_history is OK to do, so we just skip
|
||||
// adding the grad_fn. Builtin operators do rebase_history here,
|
||||
// but custom operators may have multiple Tensor(a!) returns,
|
||||
// rebase_history assumes single Tensor(a!) return, and in general
|
||||
// custom ops don't have a good in-place story.
|
||||
if (!is_mutable_output) {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
|
||||
set_history(const_cast<at::Tensor&>(t), grad_fn);
|
||||
}
|
||||
},
|
||||
stack,
|
||||
stack->size() - num_returns,
|
||||
num_returns);
|
||||
}
|
||||
}
|
||||
|
||||
torch::CppFunction basicAutogradNotImplementedFallback() {
|
||||
return torch::CppFunction::makeFromBoxedFunction<
|
||||
&basicAutogradNotImplementedFallbackImpl>();
|
||||
}
|
||||
|
||||
static void autogradNotImplementedFallbackImpl(
|
||||
const c10::OperatorHandle& op,
|
||||
c10::DispatchKeySet dispatch_keys,
|
||||
|
|
|
|||
|
|
@ -5,30 +5,9 @@
|
|||
namespace torch {
|
||||
namespace autograd {
|
||||
|
||||
// Default DispatchKey::Autograd fallback for built-in operators.
|
||||
// Can be registered for custom operators.
|
||||
TORCH_API torch::CppFunction autogradNotImplementedFallback();
|
||||
|
||||
// Default DispatchKey::AdInplaceOrView fallback for built-in operators
|
||||
// Can be registered for custom operators.
|
||||
TORCH_API torch::CppFunction autogradNotImplementedInplaceOrViewFallback();
|
||||
|
||||
// Default DispatchKey::Autograd fallback for all other operators (i.e. custom
|
||||
// operators)
|
||||
TORCH_API torch::CppFunction basicAutogradNotImplementedFallback();
|
||||
|
||||
enum class AutogradFallbackMode {
|
||||
Nothing, // Fallback is a redispatch
|
||||
Warn, // Fallback raises a warning if backward is called
|
||||
Error, // Fallback raises an error if backward is called
|
||||
};
|
||||
|
||||
// Change the behavior of "basicAutogradNotImplementedFallback"
|
||||
// In Python this is:
|
||||
// - torch._C._set_autograd_fallback_mode(str) -> None
|
||||
// - torch._C._get_autograd_fallback_mode() -> str
|
||||
TORCH_API void setAutogradFallbackMode(AutogradFallbackMode mode);
|
||||
TORCH_API AutogradFallbackMode getAutogradFallbackMode();
|
||||
|
||||
} // namespace autograd
|
||||
} // namespace torch
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@
|
|||
#include <torch/csrc/Exceptions.h>
|
||||
#include <torch/csrc/autograd/VariableTypeUtils.h>
|
||||
#include <torch/csrc/autograd/autograd.h>
|
||||
#include <torch/csrc/autograd/autograd_not_implemented_fallback.h>
|
||||
#include <torch/csrc/autograd/function.h>
|
||||
#include <torch/csrc/autograd/grad_mode.h>
|
||||
#include <torch/csrc/autograd/profiler.h>
|
||||
|
|
@ -409,37 +408,6 @@ PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject* unused) {
|
|||
auto cls = python_type_class.ptr();
|
||||
registerPythonTensorClass(device, cls);
|
||||
});
|
||||
_C_m.def("_set_autograd_fallback_mode", [](const std::string& mode) {
|
||||
if (mode == "nothing") {
|
||||
torch::autograd::setAutogradFallbackMode(
|
||||
torch::autograd::AutogradFallbackMode::Nothing);
|
||||
return;
|
||||
}
|
||||
if (mode == "warn") {
|
||||
torch::autograd::setAutogradFallbackMode(
|
||||
torch::autograd::AutogradFallbackMode::Warn);
|
||||
return;
|
||||
}
|
||||
if (mode == "error") {
|
||||
torch::autograd::setAutogradFallbackMode(
|
||||
torch::autograd::AutogradFallbackMode::Error);
|
||||
return;
|
||||
}
|
||||
TORCH_INTERNAL_ASSERT(false, "Unsupported AutogradFallbackMode: ", mode);
|
||||
});
|
||||
_C_m.def("_get_autograd_fallback_mode", []() {
|
||||
auto mode = torch::autograd::getAutogradFallbackMode();
|
||||
switch (mode) {
|
||||
case torch::autograd::AutogradFallbackMode::Nothing:
|
||||
return "nothing";
|
||||
case torch::autograd::AutogradFallbackMode::Warn:
|
||||
return "warn";
|
||||
case torch::autograd::AutogradFallbackMode::Error:
|
||||
return "error";
|
||||
default:
|
||||
TORCH_INTERNAL_ASSERT(false, "Unsupported AutogradFallbackMode");
|
||||
}
|
||||
});
|
||||
|
||||
_C_m.def("_activate_cuda_trace", []() { activateCUDATrace(); });
|
||||
|
||||
|
|
|
|||
|
|
@ -485,19 +485,11 @@ namespace {
|
|||
m.impl(#FUNC, FUNC##DEV); \
|
||||
}
|
||||
|
||||
#define REGISTER_C10D_FALLTHROUGH(FUNC, KEY) \
|
||||
TORCH_LIBRARY_IMPL(c10d, KEY, m) { \
|
||||
m.impl(#FUNC, torch::CppFunction::makeFallthrough()); \
|
||||
}
|
||||
|
||||
// 1st level expansion
|
||||
#define REGISTER_C10D_OP(FUNC) \
|
||||
REGISTER_C10D_OP1(FUNC, CPU) \
|
||||
REGISTER_C10D_FALLTHROUGH(FUNC, AutogradCPU) \
|
||||
REGISTER_C10D_OP1(FUNC, CUDA) \
|
||||
REGISTER_C10D_FALLTHROUGH(FUNC, AutogradCUDA) \
|
||||
REGISTER_C10D_OP1(FUNC, PrivateUse1) \
|
||||
REGISTER_C10D_FALLTHROUGH(FUNC, AutogradPrivateUse1)
|
||||
#define REGISTER_C10D_OP(FUNC) \
|
||||
REGISTER_C10D_OP1(FUNC, CPU) \
|
||||
REGISTER_C10D_OP1(FUNC, CUDA) \
|
||||
REGISTER_C10D_OP1(FUNC, PrivateUse1)
|
||||
|
||||
// Now we start to register ops with the three device keys
|
||||
|
||||
|
|
@ -527,10 +519,6 @@ TORCH_LIBRARY_IMPL(c10d, CPU, m) {
|
|||
m.impl("monitored_barrier_", monitored_barrier_CPU);
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(c10d, AutogradCPU, m) {
|
||||
m.impl("monitored_barrier_", torch::CppFunction::makeFallthrough());
|
||||
}
|
||||
|
||||
// TODO: The SparseCPU/SparseCUDA dispatched methods are only used to support
|
||||
// sparse all_reduce in the Gloo backend
|
||||
TORCH_LIBRARY_IMPL(c10d, SparseCPU, m) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue