2018-12-12 04:40:32 +00:00
|
|
|
#include <c10/core/TensorImpl.h>
|
2018-05-25 00:31:41 +00:00
|
|
|
|
2018-12-04 05:48:46 +00:00
|
|
|
#include <c10/core/Backend.h>
|
2021-05-01 04:22:23 +00:00
|
|
|
#include <c10/core/InferenceMode.h>
|
2018-12-10 23:06:30 +00:00
|
|
|
#include <c10/core/WrapDimMinimal.h>
|
2020-01-15 19:12:17 +00:00
|
|
|
#include <c10/core/impl/LocalDispatchKeySet.h>
|
2018-12-08 00:18:20 +00:00
|
|
|
#include <c10/util/Optional.h>
|
2018-04-28 19:54:05 +00:00
|
|
|
|
2018-12-12 04:40:32 +00:00
|
|
|
C10_DEFINE_bool(
|
|
|
|
|
caffe2_keep_on_shrink,
|
|
|
|
|
true,
|
|
|
|
|
"If set, keeps memory when a tensor is shrinking its size.");
|
2018-08-04 01:20:19 +00:00
|
|
|
|
2018-12-12 04:40:32 +00:00
|
|
|
C10_DEFINE_int64(
|
|
|
|
|
caffe2_max_keep_on_shrink_memory,
|
|
|
|
|
LLONG_MAX,
|
|
|
|
|
"The maximum memory in bytes to keep on shrink, if the difference between "
|
|
|
|
|
"tensor sizes is bigger than this then tensor will be reset.");
|
|
|
|
|
|
|
|
|
|
namespace c10 {
|
|
|
|
|
|
2021-05-21 01:15:21 +00:00
|
|
|
namespace impl {
|
|
|
|
|
|
|
|
|
|
static std::string noop_name_fn(const PyInterpreter*) {
|
|
|
|
|
return "<unloaded interpreter>";
|
|
|
|
|
}
|
|
|
|
|
|
Preserve PyObject even when it goes dead (#56017)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56017
Fixes #55686
This patch is seemingly straightforward but some of the changes are very
subtle. For the general algorithmic approach, please first read the
quoted issue. Based on the algorithm, there are some fairly
straightforward changes:
- New boolean on TensorImpl tracking if we own the pyobj or not
- PythonHooks virtual interface for requesting deallocation of pyobj
when TensorImpl is being released and we own its pyobj, and
implementation of the hooks in python_tensor.cpp
- Modification of THPVariable to MaybeOwned its C++ tensor, directly
using swolchok's nice new class
And then, there is python_variable.cpp. Some of the changes follow the
general algorithmic approach:
- THPVariable_NewWithVar is simply adjusted to handle MaybeOwned and
initializes as owend (like before)
- THPVariable_Wrap adds the logic for reverting ownership back to
PyObject when we take out an owning reference to the Python object
- THPVariable_dealloc attempts to resurrect the Python object if
the C++ tensor is live, and otherwise does the same old implementation
as before
- THPVariable_tryResurrect implements the resurrection logic. It is
modeled after CPython code so read the cited logic and see if
it is faithfully replicated
- THPVariable_clear is slightly updated for MaybeOwned and also to
preserve the invariant that if owns_pyobj, then pyobj_ is not null.
This change is slightly dodgy: the previous implementation has a
comment mentioning that the pyobj nulling is required to ensure we
don't try to reuse the dead pyobj. I don't think, in this new world,
this is possible, because the invariant says that the pyobj only
dies if the C++ object is dead too. But I still unset the field
for safety.
And then... there is THPVariableMetaType. colesbury explained in the
issue why this is necessary: when destructing an object in Python, you
start off by running the tp_dealloc of the subclass before moving up
to the parent class (much in the same way C++ destructors work). The
deallocation process for a vanilla Python-defined class does irreparable
harm to the PyObject instance (e.g., the finalizers get run) making it
no longer valid attempt to resurrect later in the tp_dealloc chain.
(BTW, the fact that objects can resurrect but in an invalid state is
one of the reasons why it's so frickin' hard to write correct __del__
implementations). So we need to make sure that we actually override
the tp_dealloc of the bottom most *subclass* of Tensor to make sure
we attempt a resurrection before we start finalizing. To do this,
we need to define a metaclass for Tensor that can override tp_dealloc
whenever we create a new subclass of Tensor. By the way, it was totally
not documented how to create metaclasses in the C++ API, and it took
a good bit of trial error to figure it out (and the answer is now
immortalized in https://stackoverflow.com/q/67077317/23845 -- the things
that I got wrong in earlier versions of the PR included setting
tp_basicsize incorrectly, incorrectly setting Py_TPFLAGS_HAVE_GC on
the metaclass--you want to leave it unset so that it inherits, and
determining that tp_init is what actually gets called when you construct
a class, not tp_call as another not-to-be-named StackOverflow question
suggests).
Aside: Ordinarily, adding a metaclass to a class is a user visible
change, as it means that it is no longer valid to mixin another class
with a different metaclass. However, because _C._TensorBase is a C
extension object, it will typically conflict with most other
metaclasses, so this is not BC breaking.
The desired new behavior of a subclass tp_dealloc is to first test if
we should resurrect, and otherwise do the same old behavior. In an
initial implementation of this patch, I implemented this by saving the
original tp_dealloc (which references subtype_dealloc, the "standard"
dealloc for all Python defined classes) and invoking it. However, this
results in an infinite loop, as it attempts to call the dealloc function
of the base type, but incorrectly chooses subclass type (because it is
not a subtype_dealloc, as we have overridden it; see
https://github.com/python/cpython/blob/b38601d49675d90e1ee6faa47f7adaeca992d02d/Objects/typeobject.c#L1261 )
So, with great reluctance, I must duplicate the behavior of
subtype_dealloc in our implementation. Note that this is not entirely
unheard of in Python binding code; for example, Cython
https://github.com/cython/cython/blob/c25c3ccc4b862592b06e66fd0fc508e4d388437b/Cython/Compiler/ModuleNode.py#L1560
also does similar things. This logic makes up the bulk of
THPVariable_subclass_dealloc
To review this, you should pull up the CPython copy of subtype_dealloc
https://github.com/python/cpython/blob/b38601d49675d90e1ee6faa47f7adaeca992d02d/Objects/typeobject.c#L1230
and verify that I have specialized the implementation for our case
appropriately. Among the simplifications I made:
- I assume PyType_IS_GC, because I assume that Tensor subclasses are
only ever done in Python and those classes are always subject to GC.
(BTW, yes! This means I have broken anyone who has extend PyTorch
tensor from C API directly. I'm going to guess no one has actually
done this.)
- I don't bother walking up the type bases to find the parent dealloc;
I know it is always THPVariable_dealloc. Similarly, I can get rid
of some parent type tests based on knowledge of how
THPVariable_dealloc is defined
- The CPython version calls some private APIs which I can't call, so
I use the public PyObject_GC_UnTrack APIs.
- I don't allow the finalizer of a Tensor to change its type (but
more on this shortly)
One alternative I discussed with colesbury was instead of copy pasting
the subtype_dealloc, we could transmute the type of the object that was
dying to turn it into a different object whose tp_dealloc is
subtype_dealloc, so the stock subtype_dealloc would then be applicable.
We decided this would be kind of weird and didn't do it that way.
TODO:
- More code comments
- Figure out how not to increase the size of TensorImpl with the new
bool field
- Add some torture tests for the THPVariable_subclass_dealloc, e.g.,
involving subclasses of Tensors that do strange things with finalizers
- Benchmark the impact of taking the GIL to release C++ side tensors
(e.g., from autograd)
- Benchmark the impact of adding a new metaclass to Tensor (probably
will be done by separating out the metaclass change into its own
change)
- Benchmark the impact of changing THPVariable to conditionally own
Tensor (as opposed to unconditionally owning it, as before)
- Add tests that this actually indeed preserves the Python object
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Test Plan: Imported from OSS
Reviewed By: albanD
Differential Revision: D27765125
Pulled By: ezyang
fbshipit-source-id: 857f14bdcca2900727412aff4c2e2d7f0af1415a
2021-06-03 17:47:19 +00:00
|
|
|
static void noop_decref_fn(const PyInterpreter*, PyObject*) {
|
|
|
|
|
// no-op
|
|
|
|
|
}
|
|
|
|
|
|
Dispatch to Python via __torch_dispatch__ (#59760)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/59760
See https://github.com/pytorch/pytorch/issues/59049
There are some moving parts to this PR, I'll structure this explanation so the straightforward parts go first, and then the less straightforward parts.
**The actual dispatch to Python.** The core logic of dispatch to Python lives in `concrete_dispatch_fn` in `torch/csrc/autograd/python_variable.cpp`. It takes the input IValue stack, scans all the arguments for Tensor arguments, and defers most of the heavy lifting to `handle_torch_function_no_python_arg_parser` which actually does all of the logic for calling out to torch dispatch (in particular, this function handles multiple dispatch situations for you). Because we have a different function name than regular `__torch_function__` handling, `handle_torch_function_no_python_arg_parser` is generalized to accept a magic method name to look for when testing if Tensors have custom handling or not. Unlike `__torch_function__`, by default there is no `__torch_dispatch__` on Tensor classes.
**Maintaining the Python dispatch key.** In order to get to the dispatch to Python logic, we must tag Tensors with the `__torch_dispatch__` magic method with the newly added Python dispatch key (separated from PythonFuncTorch to allow for a transitional period while they migrate to this mechanism). We expose a new private property `_is_python_dispatch` that assists in debugging if a Tensor is participating in Python dispatch or not. We apply the Python dispatch key the first time a PyObject for a Tensor is constructed (THPVariable_NewWithVar), testing if `__torch_dispatch__` exists with then newly added `check_has_torch_dispatch`.
**Shallow copy and detach.** For the simple examples tested in this PR, most creations of Tensor route through the dispatcher. The exception to this is `shallow_copy_and_detach`, which bypasses the dispatcher and is used when saving tensors for backwards. When a Tensor is Python dispatch, we override the behavior of `shallow_copy_and_detach` to instead directly call into `__torch_dispatch__` to perform a `detach` operation (in the same way it would be invoked if you called `detach` directly). Because this Python call is triggered directly from c10::TensorImpl, it must be indirected through `PyInterpreter::detach`, which is the general mechanism for dynamic dispatching to the Python interpreter associated with a TensorImpl.
**torchdeploy compatibility.** The dispatch to Python logic cannot be directly registered to the dispatcher as it is compiled in the Python library, which will get loaded multiple times per torchdeploy interpreter. Thus, we must employ a two phase process. First, we register a fallback inside a non-Python library (aten/src/ATen/core/PythonFallbackKernel.cpp). Its job is to determine the appropriate PyInterpreter to handle the Python dispatch by going through all of the arguments and finding the first argument that has a PyObject/PyInterpreter. With this PyInterpreter, it makes another dynamic dispatch via "dispatch" which will go to the correct torchdeploy interpreter to handle dispatching to actual Python.
**Testing.** We provide a simple example of a LoggingTensor for testing, which can be used to generate TorchScript-like traces to observe what operations are being called when a Tensor is invoked. Although a LoggingTensor would be better implemented via an is-a relationship rather than a has-a relationship (as is done in the test), we've done it this way to show that arbitrarily complex compositions of tensors inside a tensor work properly.
**Known limitations.**
* We haven't adjusted any operator code, so some patterns may not work (as they lose the Python subclass in an unrecoverable way)
* `__torch_function__` must be explicitly disabled with `_disabled_torch_function_impl` otherwise things don't work quite correctly (in particular, what is being disabled is default subclass preservation behavior.)
* We don't ever populate kwargs, even when an argument is kwarg-only
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Differential Revision:
D29017912
D29017912
Test Plan: Imported from OSS
Reviewed By: bdhirsh
Pulled By: ezyang
fbshipit-source-id: a67714d9e541d09203a8cfc85345b8967db86238
2021-06-25 18:49:20 +00:00
|
|
|
static c10::intrusive_ptr<TensorImpl> noop_detach_fn(
|
|
|
|
|
const PyInterpreter*,
|
|
|
|
|
const TensorImpl*) {
|
|
|
|
|
TORCH_INTERNAL_ASSERT(
|
|
|
|
|
0,
|
|
|
|
|
"attempted to detach (shallow_copy_and_detach) Tensor with nontrivial PyObject after corresponding interpreter died");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void noop_dispatch_fn(
|
|
|
|
|
const PyInterpreter*,
|
|
|
|
|
const c10::OperatorHandle& op,
|
2021-08-31 21:53:01 +00:00
|
|
|
torch::jit::Stack* stack) {
|
Dispatch to Python via __torch_dispatch__ (#59760)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/59760
See https://github.com/pytorch/pytorch/issues/59049
There are some moving parts to this PR, I'll structure this explanation so the straightforward parts go first, and then the less straightforward parts.
**The actual dispatch to Python.** The core logic of dispatch to Python lives in `concrete_dispatch_fn` in `torch/csrc/autograd/python_variable.cpp`. It takes the input IValue stack, scans all the arguments for Tensor arguments, and defers most of the heavy lifting to `handle_torch_function_no_python_arg_parser` which actually does all of the logic for calling out to torch dispatch (in particular, this function handles multiple dispatch situations for you). Because we have a different function name than regular `__torch_function__` handling, `handle_torch_function_no_python_arg_parser` is generalized to accept a magic method name to look for when testing if Tensors have custom handling or not. Unlike `__torch_function__`, by default there is no `__torch_dispatch__` on Tensor classes.
**Maintaining the Python dispatch key.** In order to get to the dispatch to Python logic, we must tag Tensors with the `__torch_dispatch__` magic method with the newly added Python dispatch key (separated from PythonFuncTorch to allow for a transitional period while they migrate to this mechanism). We expose a new private property `_is_python_dispatch` that assists in debugging if a Tensor is participating in Python dispatch or not. We apply the Python dispatch key the first time a PyObject for a Tensor is constructed (THPVariable_NewWithVar), testing if `__torch_dispatch__` exists with then newly added `check_has_torch_dispatch`.
**Shallow copy and detach.** For the simple examples tested in this PR, most creations of Tensor route through the dispatcher. The exception to this is `shallow_copy_and_detach`, which bypasses the dispatcher and is used when saving tensors for backwards. When a Tensor is Python dispatch, we override the behavior of `shallow_copy_and_detach` to instead directly call into `__torch_dispatch__` to perform a `detach` operation (in the same way it would be invoked if you called `detach` directly). Because this Python call is triggered directly from c10::TensorImpl, it must be indirected through `PyInterpreter::detach`, which is the general mechanism for dynamic dispatching to the Python interpreter associated with a TensorImpl.
**torchdeploy compatibility.** The dispatch to Python logic cannot be directly registered to the dispatcher as it is compiled in the Python library, which will get loaded multiple times per torchdeploy interpreter. Thus, we must employ a two phase process. First, we register a fallback inside a non-Python library (aten/src/ATen/core/PythonFallbackKernel.cpp). Its job is to determine the appropriate PyInterpreter to handle the Python dispatch by going through all of the arguments and finding the first argument that has a PyObject/PyInterpreter. With this PyInterpreter, it makes another dynamic dispatch via "dispatch" which will go to the correct torchdeploy interpreter to handle dispatching to actual Python.
**Testing.** We provide a simple example of a LoggingTensor for testing, which can be used to generate TorchScript-like traces to observe what operations are being called when a Tensor is invoked. Although a LoggingTensor would be better implemented via an is-a relationship rather than a has-a relationship (as is done in the test), we've done it this way to show that arbitrarily complex compositions of tensors inside a tensor work properly.
**Known limitations.**
* We haven't adjusted any operator code, so some patterns may not work (as they lose the Python subclass in an unrecoverable way)
* `__torch_function__` must be explicitly disabled with `_disabled_torch_function_impl` otherwise things don't work quite correctly (in particular, what is being disabled is default subclass preservation behavior.)
* We don't ever populate kwargs, even when an argument is kwarg-only
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Differential Revision:
D29017912
D29017912
Test Plan: Imported from OSS
Reviewed By: bdhirsh
Pulled By: ezyang
fbshipit-source-id: a67714d9e541d09203a8cfc85345b8967db86238
2021-06-25 18:49:20 +00:00
|
|
|
TORCH_INTERNAL_ASSERT(
|
|
|
|
|
0,
|
|
|
|
|
"attempted to dispatch (__torch_dispatch__) an operator on Tensor with nontrivial PyObject after corresponding interpreter died");
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-21 01:15:21 +00:00
|
|
|
void PyInterpreter::disarm() noexcept {
|
|
|
|
|
name_fn_ = &noop_name_fn;
|
Preserve PyObject even when it goes dead (#56017)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56017
Fixes #55686
This patch is seemingly straightforward but some of the changes are very
subtle. For the general algorithmic approach, please first read the
quoted issue. Based on the algorithm, there are some fairly
straightforward changes:
- New boolean on TensorImpl tracking if we own the pyobj or not
- PythonHooks virtual interface for requesting deallocation of pyobj
when TensorImpl is being released and we own its pyobj, and
implementation of the hooks in python_tensor.cpp
- Modification of THPVariable to MaybeOwned its C++ tensor, directly
using swolchok's nice new class
And then, there is python_variable.cpp. Some of the changes follow the
general algorithmic approach:
- THPVariable_NewWithVar is simply adjusted to handle MaybeOwned and
initializes as owend (like before)
- THPVariable_Wrap adds the logic for reverting ownership back to
PyObject when we take out an owning reference to the Python object
- THPVariable_dealloc attempts to resurrect the Python object if
the C++ tensor is live, and otherwise does the same old implementation
as before
- THPVariable_tryResurrect implements the resurrection logic. It is
modeled after CPython code so read the cited logic and see if
it is faithfully replicated
- THPVariable_clear is slightly updated for MaybeOwned and also to
preserve the invariant that if owns_pyobj, then pyobj_ is not null.
This change is slightly dodgy: the previous implementation has a
comment mentioning that the pyobj nulling is required to ensure we
don't try to reuse the dead pyobj. I don't think, in this new world,
this is possible, because the invariant says that the pyobj only
dies if the C++ object is dead too. But I still unset the field
for safety.
And then... there is THPVariableMetaType. colesbury explained in the
issue why this is necessary: when destructing an object in Python, you
start off by running the tp_dealloc of the subclass before moving up
to the parent class (much in the same way C++ destructors work). The
deallocation process for a vanilla Python-defined class does irreparable
harm to the PyObject instance (e.g., the finalizers get run) making it
no longer valid attempt to resurrect later in the tp_dealloc chain.
(BTW, the fact that objects can resurrect but in an invalid state is
one of the reasons why it's so frickin' hard to write correct __del__
implementations). So we need to make sure that we actually override
the tp_dealloc of the bottom most *subclass* of Tensor to make sure
we attempt a resurrection before we start finalizing. To do this,
we need to define a metaclass for Tensor that can override tp_dealloc
whenever we create a new subclass of Tensor. By the way, it was totally
not documented how to create metaclasses in the C++ API, and it took
a good bit of trial error to figure it out (and the answer is now
immortalized in https://stackoverflow.com/q/67077317/23845 -- the things
that I got wrong in earlier versions of the PR included setting
tp_basicsize incorrectly, incorrectly setting Py_TPFLAGS_HAVE_GC on
the metaclass--you want to leave it unset so that it inherits, and
determining that tp_init is what actually gets called when you construct
a class, not tp_call as another not-to-be-named StackOverflow question
suggests).
Aside: Ordinarily, adding a metaclass to a class is a user visible
change, as it means that it is no longer valid to mixin another class
with a different metaclass. However, because _C._TensorBase is a C
extension object, it will typically conflict with most other
metaclasses, so this is not BC breaking.
The desired new behavior of a subclass tp_dealloc is to first test if
we should resurrect, and otherwise do the same old behavior. In an
initial implementation of this patch, I implemented this by saving the
original tp_dealloc (which references subtype_dealloc, the "standard"
dealloc for all Python defined classes) and invoking it. However, this
results in an infinite loop, as it attempts to call the dealloc function
of the base type, but incorrectly chooses subclass type (because it is
not a subtype_dealloc, as we have overridden it; see
https://github.com/python/cpython/blob/b38601d49675d90e1ee6faa47f7adaeca992d02d/Objects/typeobject.c#L1261 )
So, with great reluctance, I must duplicate the behavior of
subtype_dealloc in our implementation. Note that this is not entirely
unheard of in Python binding code; for example, Cython
https://github.com/cython/cython/blob/c25c3ccc4b862592b06e66fd0fc508e4d388437b/Cython/Compiler/ModuleNode.py#L1560
also does similar things. This logic makes up the bulk of
THPVariable_subclass_dealloc
To review this, you should pull up the CPython copy of subtype_dealloc
https://github.com/python/cpython/blob/b38601d49675d90e1ee6faa47f7adaeca992d02d/Objects/typeobject.c#L1230
and verify that I have specialized the implementation for our case
appropriately. Among the simplifications I made:
- I assume PyType_IS_GC, because I assume that Tensor subclasses are
only ever done in Python and those classes are always subject to GC.
(BTW, yes! This means I have broken anyone who has extend PyTorch
tensor from C API directly. I'm going to guess no one has actually
done this.)
- I don't bother walking up the type bases to find the parent dealloc;
I know it is always THPVariable_dealloc. Similarly, I can get rid
of some parent type tests based on knowledge of how
THPVariable_dealloc is defined
- The CPython version calls some private APIs which I can't call, so
I use the public PyObject_GC_UnTrack APIs.
- I don't allow the finalizer of a Tensor to change its type (but
more on this shortly)
One alternative I discussed with colesbury was instead of copy pasting
the subtype_dealloc, we could transmute the type of the object that was
dying to turn it into a different object whose tp_dealloc is
subtype_dealloc, so the stock subtype_dealloc would then be applicable.
We decided this would be kind of weird and didn't do it that way.
TODO:
- More code comments
- Figure out how not to increase the size of TensorImpl with the new
bool field
- Add some torture tests for the THPVariable_subclass_dealloc, e.g.,
involving subclasses of Tensors that do strange things with finalizers
- Benchmark the impact of taking the GIL to release C++ side tensors
(e.g., from autograd)
- Benchmark the impact of adding a new metaclass to Tensor (probably
will be done by separating out the metaclass change into its own
change)
- Benchmark the impact of changing THPVariable to conditionally own
Tensor (as opposed to unconditionally owning it, as before)
- Add tests that this actually indeed preserves the Python object
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Test Plan: Imported from OSS
Reviewed By: albanD
Differential Revision: D27765125
Pulled By: ezyang
fbshipit-source-id: 857f14bdcca2900727412aff4c2e2d7f0af1415a
2021-06-03 17:47:19 +00:00
|
|
|
decref_fn_ = &noop_decref_fn;
|
Dispatch to Python via __torch_dispatch__ (#59760)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/59760
See https://github.com/pytorch/pytorch/issues/59049
There are some moving parts to this PR, I'll structure this explanation so the straightforward parts go first, and then the less straightforward parts.
**The actual dispatch to Python.** The core logic of dispatch to Python lives in `concrete_dispatch_fn` in `torch/csrc/autograd/python_variable.cpp`. It takes the input IValue stack, scans all the arguments for Tensor arguments, and defers most of the heavy lifting to `handle_torch_function_no_python_arg_parser` which actually does all of the logic for calling out to torch dispatch (in particular, this function handles multiple dispatch situations for you). Because we have a different function name than regular `__torch_function__` handling, `handle_torch_function_no_python_arg_parser` is generalized to accept a magic method name to look for when testing if Tensors have custom handling or not. Unlike `__torch_function__`, by default there is no `__torch_dispatch__` on Tensor classes.
**Maintaining the Python dispatch key.** In order to get to the dispatch to Python logic, we must tag Tensors with the `__torch_dispatch__` magic method with the newly added Python dispatch key (separated from PythonFuncTorch to allow for a transitional period while they migrate to this mechanism). We expose a new private property `_is_python_dispatch` that assists in debugging if a Tensor is participating in Python dispatch or not. We apply the Python dispatch key the first time a PyObject for a Tensor is constructed (THPVariable_NewWithVar), testing if `__torch_dispatch__` exists with then newly added `check_has_torch_dispatch`.
**Shallow copy and detach.** For the simple examples tested in this PR, most creations of Tensor route through the dispatcher. The exception to this is `shallow_copy_and_detach`, which bypasses the dispatcher and is used when saving tensors for backwards. When a Tensor is Python dispatch, we override the behavior of `shallow_copy_and_detach` to instead directly call into `__torch_dispatch__` to perform a `detach` operation (in the same way it would be invoked if you called `detach` directly). Because this Python call is triggered directly from c10::TensorImpl, it must be indirected through `PyInterpreter::detach`, which is the general mechanism for dynamic dispatching to the Python interpreter associated with a TensorImpl.
**torchdeploy compatibility.** The dispatch to Python logic cannot be directly registered to the dispatcher as it is compiled in the Python library, which will get loaded multiple times per torchdeploy interpreter. Thus, we must employ a two phase process. First, we register a fallback inside a non-Python library (aten/src/ATen/core/PythonFallbackKernel.cpp). Its job is to determine the appropriate PyInterpreter to handle the Python dispatch by going through all of the arguments and finding the first argument that has a PyObject/PyInterpreter. With this PyInterpreter, it makes another dynamic dispatch via "dispatch" which will go to the correct torchdeploy interpreter to handle dispatching to actual Python.
**Testing.** We provide a simple example of a LoggingTensor for testing, which can be used to generate TorchScript-like traces to observe what operations are being called when a Tensor is invoked. Although a LoggingTensor would be better implemented via an is-a relationship rather than a has-a relationship (as is done in the test), we've done it this way to show that arbitrarily complex compositions of tensors inside a tensor work properly.
**Known limitations.**
* We haven't adjusted any operator code, so some patterns may not work (as they lose the Python subclass in an unrecoverable way)
* `__torch_function__` must be explicitly disabled with `_disabled_torch_function_impl` otherwise things don't work quite correctly (in particular, what is being disabled is default subclass preservation behavior.)
* We don't ever populate kwargs, even when an argument is kwarg-only
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Differential Revision:
D29017912
D29017912
Test Plan: Imported from OSS
Reviewed By: bdhirsh
Pulled By: ezyang
fbshipit-source-id: a67714d9e541d09203a8cfc85345b8967db86238
2021-06-25 18:49:20 +00:00
|
|
|
detach_fn_ = &noop_detach_fn;
|
|
|
|
|
dispatch_fn_ = &noop_dispatch_fn;
|
2021-05-21 01:15:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace impl
|
|
|
|
|
|
2021-05-01 04:22:23 +00:00
|
|
|
const char* const TensorImpl::err_msg_tensor_metadata_change_not_allowed =
|
2019-07-30 05:21:40 +00:00
|
|
|
"is not allowed on a Tensor created from .data or .detach().\n"
|
|
|
|
|
"If your intent is to change the metadata of a Tensor (such as sizes / strides / storage / storage_offset)\n"
|
|
|
|
|
"without autograd tracking the change, remove the .data / .detach() call and wrap the change in a `with torch.no_grad():` block.\n"
|
|
|
|
|
"For example, change:\n"
|
|
|
|
|
" x.data.set_(y)\n"
|
|
|
|
|
"to:\n"
|
|
|
|
|
" with torch.no_grad():\n"
|
|
|
|
|
" x.set_(y)";
|
|
|
|
|
|
2020-07-16 16:09:19 +00:00
|
|
|
at::Tensor& TensorImpl::mutable_grad() {
|
2021-05-01 04:22:23 +00:00
|
|
|
if (!autograd_meta_)
|
|
|
|
|
autograd_meta_ = impl::GetAutogradMetaFactory()->make();
|
2020-07-16 16:09:19 +00:00
|
|
|
return autograd_meta_->mutable_grad();
|
2018-04-28 19:54:05 +00:00
|
|
|
}
|
|
|
|
|
|
2018-12-12 04:40:32 +00:00
|
|
|
const at::Tensor& TensorImpl::grad() const {
|
2019-11-08 17:09:43 +00:00
|
|
|
// Yes, I know this looks really weird. But I don't really have a choice as
|
|
|
|
|
// long as this function returns a const reference to Tensor. I'm not
|
|
|
|
|
// really sure how I would have designed this API differently, but it
|
|
|
|
|
// is not so easy to fix right now because the mutable counterpart of
|
|
|
|
|
// this function must keep working so that "x.grad() = ..." keeps working
|
|
|
|
|
// (part of public API).
|
2021-05-01 04:22:23 +00:00
|
|
|
if (!autograd_meta_)
|
|
|
|
|
return impl::GetAutogradMetaFactory()->undefined_tensor();
|
2019-11-08 17:09:43 +00:00
|
|
|
return autograd_meta_->grad();
|
2018-04-28 19:54:05 +00:00
|
|
|
}
|
|
|
|
|
|
2021-09-08 20:25:42 +00:00
|
|
|
const at::Tensor& TensorImpl::_fw_grad(
|
|
|
|
|
uint64_t level,
|
|
|
|
|
const at::TensorBase& self) const {
|
Reland: Add base forward grad logic (#49734)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/49734
RFC: https://github.com/pytorch/rfcs/pull/11
This PR add the basic logic to handle forward grad as dual Tensors.
It contains the following:
- Mechanism to save dual state on a Tensor and clear it up when the dual level ends
- C++ and python user facing API
- Updated view system that is able to track both forward and backward views
The current PR has the following limitations:
- Extensive tests are in the next PR in the stack as formulas are needed to write full tests.
- Only the manual formulas have been audited and no other formula is actually implemented here (they are in the next PR in the stack)
- Only level 0 is allowed for now. This was discussed and agreed that it is not needed for the first version of this PR.
- We can save one ViewInfo creation when both the forward and backward views have the same base. This can be done by adding a boolean flag to the DifferentiableViewMeta and extra logic in the `as_view` method. This is left out to keep this PR concise.
- We can skip tracking forward views if the base has a forward grad. This can be done by adding extra logic in the `as_view` method. This is left out to keep this PR concise.
Reading guide:
- Updated view handling in [gen_variable_type.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-f6553cec68caeaea36f6c8b14ff76a6d39dfd774e0ea9ef2f76e8d81fd9af5df), [VariableTypeUtils.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-ec71cfa45954dece1236c661d170e6341879c5be637f4abf52e826d61b40695a), [variable.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-60e3bfe444e89efc7149f25b38e472710525984789934ab83f1bd5671b8ff285) (skip code below "[Forward Grad View]" for now), [variable.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-1604bcd0e4350ed99ec45e437cee7ac9ebe337392c9ea16a236247aeeb35b02bR266-R542) and [custom_function.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-dd85f452082b5bb6612bbc12adb496f8827defa228509f7b493de1d517522d5d). This introduces the new ViewInfo to hold view informations shared for forward and backward. It also updates the differentiable view meta to use this. And it updates the as_view function to handle both forward and backward view.
- New forward grad class that handle storing gradients and tracking at each level [forward_grad.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-c6c5b9ab2d7e5dde4102495faa1b6bbbfc23aa3e47deb7359c0bfe1eb004c0cb), [forward_grad.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-de2ab54ade7312701850d71a119a4f4ee4b9fc5a9c42a467cdd4e73c033531dd) and [build_variables.bzl](https://github.com/pytorch/pytorch/pull/49097/files#diff-dfdfa2efb17beddfd9094524f95351fd197db6c8857e96b436fb599870359325). EDIT: These files also contain the new flag to globally disable forward AD that allows us to reduce performance issues while this is in development.
- Lowest level API and binding between Tensor and AutogradMeta in [TensorBody.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-7554853205392fa743357bf845ecc350a974ec049383248c12daaf2f4de04911), [TensorImpl.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-052bd9150ef8e09289ddf644b5a6830ede49207201cd41728f6d7cc6d9cead94), [TensorImpl.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-a15aae4cf23da44970db7cece62ff981265575c798c62f7b52d87c8809dfe2e1) and the rest of [variable.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-60e3bfe444e89efc7149f25b38e472710525984789934ab83f1bd5671b8ff285R557-R677)
- API to access the forward primal that needs to be a differentiable function (and so in native_functions.yaml) [native_functions.yaml](https://github.com/pytorch/pytorch/pull/49097/files#diff-2f3dbd85efb9b5172f2264eedd3be47dd765e6ab7cc8bf3ade5e62c28ae35991) [NamedRegistrations.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-69bd3bea510c9b64e1633fa18c3ea63d4b8348dbad3a78ad9de844ab3e43dc1d), [VariableMethodsStub.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-23f5fcb737a2b289811fe0f4b65aef775e7c824b2e629ecd343df51405cd434f), [derivatives.yaml](https://github.com/pytorch/pytorch/pull/49097/files#diff-e4c2f99a2404e98c3586e07425da73008f36b1bada790648a7297af141d37f8c), [gen_python_functions.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-e4c2f99a2404e98c3586e07425da73008f36b1bada790648a7297af141d37f8c), [gen_trace_type.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-54e0b976027bf8debefb959ff360b89ae93466970c843365b1b3a03806d868ce), [TraceTypeManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-f34636741ad4a23d018e0c289bc750c3bad887b45660e1d6eaf440d234a78fbf) and [part of VariableTypeManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-6e19a1bce8cbdba8714b6e2c794a76bc0864b64a49cfa757cb0b5afdc937d1a4R198-R243)
- c++ API [autograd.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-349028fbe8291a965a7a263c323b208fe071c35c66179ee997ef84fa81aa4b1e), [autograd.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-a3fe908d67dfec16a1fcde300de68b0701bf68b88db7451f29f2bee255cf30c9)
- python binding [init.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-c58a67c85191c22c9b3bb439117d8053edfd9dea839fa010cf967d404c3c630d)
- python API [forward_ad.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-a4efad4ba18fffdfb264c21e5475997a24a743089a899f8ec1a5ff962c6738d9), [autograd/__init__.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-743abcafd32ad0e69f39ac5a91df4197b7e1921c135cacee7ef6dc829a8a7af8)
- c++ and python printing [Formatting.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-881dba501e71662e2e4818b4b016f739b344c8aed2f5edc6b871eda47a2aced0), [_tensor_str.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-a7911f8d5e73adbff914d99fd7818ace2a7030b6a3748abe06ec6fc6e3df9cc3)
- Utility for formulas and updated manual functions to respect new view system as well as forward grad [FunctionsManual.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-6378bb6dc81a64dab676d61731341fa5d1088418f32a1473a33a0ccfc2357dc1), [FunctionsManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-4adbd88239afcd60e8198aab65d4f5e43b62314e34b80551e997a1ea503adea5) [rest of VariableTypeManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-6e19a1bce8cbdba8714b6e2c794a76bc0864b64a49cfa757cb0b5afdc937d1a4R264-R433)
- Ensure SavedVariable save forward grad properly [saved_variable.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-c1b8039d776241abe177d5aa99b79dd9489a9b3e529da8ab24c2e386c1238ae2), [saved_variable.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-cc9fba479b5beae06b2eea2e390d17796e0341c5b037a20b5bcaccbb0c341030)
Test Plan: Imported from OSS
Reviewed By: gchanan
Differential Revision: D25678797
Pulled By: albanD
fbshipit-source-id: 3d58550c11b5f58b9b73fd30596d042b857fb9dd
2020-12-22 20:07:00 +00:00
|
|
|
// See TensorImpl::grad() above for explanation about the line below
|
2021-05-01 04:22:23 +00:00
|
|
|
if (!autograd_meta_)
|
|
|
|
|
return impl::GetAutogradMetaFactory()->undefined_tensor();
|
Reland: Add base forward grad logic (#49734)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/49734
RFC: https://github.com/pytorch/rfcs/pull/11
This PR add the basic logic to handle forward grad as dual Tensors.
It contains the following:
- Mechanism to save dual state on a Tensor and clear it up when the dual level ends
- C++ and python user facing API
- Updated view system that is able to track both forward and backward views
The current PR has the following limitations:
- Extensive tests are in the next PR in the stack as formulas are needed to write full tests.
- Only the manual formulas have been audited and no other formula is actually implemented here (they are in the next PR in the stack)
- Only level 0 is allowed for now. This was discussed and agreed that it is not needed for the first version of this PR.
- We can save one ViewInfo creation when both the forward and backward views have the same base. This can be done by adding a boolean flag to the DifferentiableViewMeta and extra logic in the `as_view` method. This is left out to keep this PR concise.
- We can skip tracking forward views if the base has a forward grad. This can be done by adding extra logic in the `as_view` method. This is left out to keep this PR concise.
Reading guide:
- Updated view handling in [gen_variable_type.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-f6553cec68caeaea36f6c8b14ff76a6d39dfd774e0ea9ef2f76e8d81fd9af5df), [VariableTypeUtils.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-ec71cfa45954dece1236c661d170e6341879c5be637f4abf52e826d61b40695a), [variable.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-60e3bfe444e89efc7149f25b38e472710525984789934ab83f1bd5671b8ff285) (skip code below "[Forward Grad View]" for now), [variable.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-1604bcd0e4350ed99ec45e437cee7ac9ebe337392c9ea16a236247aeeb35b02bR266-R542) and [custom_function.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-dd85f452082b5bb6612bbc12adb496f8827defa228509f7b493de1d517522d5d). This introduces the new ViewInfo to hold view informations shared for forward and backward. It also updates the differentiable view meta to use this. And it updates the as_view function to handle both forward and backward view.
- New forward grad class that handle storing gradients and tracking at each level [forward_grad.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-c6c5b9ab2d7e5dde4102495faa1b6bbbfc23aa3e47deb7359c0bfe1eb004c0cb), [forward_grad.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-de2ab54ade7312701850d71a119a4f4ee4b9fc5a9c42a467cdd4e73c033531dd) and [build_variables.bzl](https://github.com/pytorch/pytorch/pull/49097/files#diff-dfdfa2efb17beddfd9094524f95351fd197db6c8857e96b436fb599870359325). EDIT: These files also contain the new flag to globally disable forward AD that allows us to reduce performance issues while this is in development.
- Lowest level API and binding between Tensor and AutogradMeta in [TensorBody.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-7554853205392fa743357bf845ecc350a974ec049383248c12daaf2f4de04911), [TensorImpl.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-052bd9150ef8e09289ddf644b5a6830ede49207201cd41728f6d7cc6d9cead94), [TensorImpl.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-a15aae4cf23da44970db7cece62ff981265575c798c62f7b52d87c8809dfe2e1) and the rest of [variable.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-60e3bfe444e89efc7149f25b38e472710525984789934ab83f1bd5671b8ff285R557-R677)
- API to access the forward primal that needs to be a differentiable function (and so in native_functions.yaml) [native_functions.yaml](https://github.com/pytorch/pytorch/pull/49097/files#diff-2f3dbd85efb9b5172f2264eedd3be47dd765e6ab7cc8bf3ade5e62c28ae35991) [NamedRegistrations.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-69bd3bea510c9b64e1633fa18c3ea63d4b8348dbad3a78ad9de844ab3e43dc1d), [VariableMethodsStub.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-23f5fcb737a2b289811fe0f4b65aef775e7c824b2e629ecd343df51405cd434f), [derivatives.yaml](https://github.com/pytorch/pytorch/pull/49097/files#diff-e4c2f99a2404e98c3586e07425da73008f36b1bada790648a7297af141d37f8c), [gen_python_functions.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-e4c2f99a2404e98c3586e07425da73008f36b1bada790648a7297af141d37f8c), [gen_trace_type.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-54e0b976027bf8debefb959ff360b89ae93466970c843365b1b3a03806d868ce), [TraceTypeManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-f34636741ad4a23d018e0c289bc750c3bad887b45660e1d6eaf440d234a78fbf) and [part of VariableTypeManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-6e19a1bce8cbdba8714b6e2c794a76bc0864b64a49cfa757cb0b5afdc937d1a4R198-R243)
- c++ API [autograd.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-349028fbe8291a965a7a263c323b208fe071c35c66179ee997ef84fa81aa4b1e), [autograd.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-a3fe908d67dfec16a1fcde300de68b0701bf68b88db7451f29f2bee255cf30c9)
- python binding [init.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-c58a67c85191c22c9b3bb439117d8053edfd9dea839fa010cf967d404c3c630d)
- python API [forward_ad.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-a4efad4ba18fffdfb264c21e5475997a24a743089a899f8ec1a5ff962c6738d9), [autograd/__init__.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-743abcafd32ad0e69f39ac5a91df4197b7e1921c135cacee7ef6dc829a8a7af8)
- c++ and python printing [Formatting.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-881dba501e71662e2e4818b4b016f739b344c8aed2f5edc6b871eda47a2aced0), [_tensor_str.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-a7911f8d5e73adbff914d99fd7818ace2a7030b6a3748abe06ec6fc6e3df9cc3)
- Utility for formulas and updated manual functions to respect new view system as well as forward grad [FunctionsManual.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-6378bb6dc81a64dab676d61731341fa5d1088418f32a1473a33a0ccfc2357dc1), [FunctionsManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-4adbd88239afcd60e8198aab65d4f5e43b62314e34b80551e997a1ea503adea5) [rest of VariableTypeManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-6e19a1bce8cbdba8714b6e2c794a76bc0864b64a49cfa757cb0b5afdc937d1a4R264-R433)
- Ensure SavedVariable save forward grad properly [saved_variable.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-c1b8039d776241abe177d5aa99b79dd9489a9b3e529da8ab24c2e386c1238ae2), [saved_variable.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-cc9fba479b5beae06b2eea2e390d17796e0341c5b037a20b5bcaccbb0c341030)
Test Plan: Imported from OSS
Reviewed By: gchanan
Differential Revision: D25678797
Pulled By: albanD
fbshipit-source-id: 3d58550c11b5f58b9b73fd30596d042b857fb9dd
2020-12-22 20:07:00 +00:00
|
|
|
return autograd_meta_->fw_grad(level, self);
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-01 04:22:23 +00:00
|
|
|
void TensorImpl::_set_fw_grad(
|
2021-09-08 20:25:42 +00:00
|
|
|
const at::TensorBase& new_grad,
|
|
|
|
|
const at::TensorBase& self,
|
2021-05-01 04:22:23 +00:00
|
|
|
uint64_t level,
|
|
|
|
|
bool is_inplace_op) {
|
|
|
|
|
if (!autograd_meta_)
|
|
|
|
|
autograd_meta_ = impl::GetAutogradMetaFactory()->make();
|
Reland: Add base forward grad logic (#49734)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/49734
RFC: https://github.com/pytorch/rfcs/pull/11
This PR add the basic logic to handle forward grad as dual Tensors.
It contains the following:
- Mechanism to save dual state on a Tensor and clear it up when the dual level ends
- C++ and python user facing API
- Updated view system that is able to track both forward and backward views
The current PR has the following limitations:
- Extensive tests are in the next PR in the stack as formulas are needed to write full tests.
- Only the manual formulas have been audited and no other formula is actually implemented here (they are in the next PR in the stack)
- Only level 0 is allowed for now. This was discussed and agreed that it is not needed for the first version of this PR.
- We can save one ViewInfo creation when both the forward and backward views have the same base. This can be done by adding a boolean flag to the DifferentiableViewMeta and extra logic in the `as_view` method. This is left out to keep this PR concise.
- We can skip tracking forward views if the base has a forward grad. This can be done by adding extra logic in the `as_view` method. This is left out to keep this PR concise.
Reading guide:
- Updated view handling in [gen_variable_type.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-f6553cec68caeaea36f6c8b14ff76a6d39dfd774e0ea9ef2f76e8d81fd9af5df), [VariableTypeUtils.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-ec71cfa45954dece1236c661d170e6341879c5be637f4abf52e826d61b40695a), [variable.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-60e3bfe444e89efc7149f25b38e472710525984789934ab83f1bd5671b8ff285) (skip code below "[Forward Grad View]" for now), [variable.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-1604bcd0e4350ed99ec45e437cee7ac9ebe337392c9ea16a236247aeeb35b02bR266-R542) and [custom_function.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-dd85f452082b5bb6612bbc12adb496f8827defa228509f7b493de1d517522d5d). This introduces the new ViewInfo to hold view informations shared for forward and backward. It also updates the differentiable view meta to use this. And it updates the as_view function to handle both forward and backward view.
- New forward grad class that handle storing gradients and tracking at each level [forward_grad.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-c6c5b9ab2d7e5dde4102495faa1b6bbbfc23aa3e47deb7359c0bfe1eb004c0cb), [forward_grad.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-de2ab54ade7312701850d71a119a4f4ee4b9fc5a9c42a467cdd4e73c033531dd) and [build_variables.bzl](https://github.com/pytorch/pytorch/pull/49097/files#diff-dfdfa2efb17beddfd9094524f95351fd197db6c8857e96b436fb599870359325). EDIT: These files also contain the new flag to globally disable forward AD that allows us to reduce performance issues while this is in development.
- Lowest level API and binding between Tensor and AutogradMeta in [TensorBody.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-7554853205392fa743357bf845ecc350a974ec049383248c12daaf2f4de04911), [TensorImpl.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-052bd9150ef8e09289ddf644b5a6830ede49207201cd41728f6d7cc6d9cead94), [TensorImpl.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-a15aae4cf23da44970db7cece62ff981265575c798c62f7b52d87c8809dfe2e1) and the rest of [variable.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-60e3bfe444e89efc7149f25b38e472710525984789934ab83f1bd5671b8ff285R557-R677)
- API to access the forward primal that needs to be a differentiable function (and so in native_functions.yaml) [native_functions.yaml](https://github.com/pytorch/pytorch/pull/49097/files#diff-2f3dbd85efb9b5172f2264eedd3be47dd765e6ab7cc8bf3ade5e62c28ae35991) [NamedRegistrations.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-69bd3bea510c9b64e1633fa18c3ea63d4b8348dbad3a78ad9de844ab3e43dc1d), [VariableMethodsStub.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-23f5fcb737a2b289811fe0f4b65aef775e7c824b2e629ecd343df51405cd434f), [derivatives.yaml](https://github.com/pytorch/pytorch/pull/49097/files#diff-e4c2f99a2404e98c3586e07425da73008f36b1bada790648a7297af141d37f8c), [gen_python_functions.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-e4c2f99a2404e98c3586e07425da73008f36b1bada790648a7297af141d37f8c), [gen_trace_type.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-54e0b976027bf8debefb959ff360b89ae93466970c843365b1b3a03806d868ce), [TraceTypeManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-f34636741ad4a23d018e0c289bc750c3bad887b45660e1d6eaf440d234a78fbf) and [part of VariableTypeManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-6e19a1bce8cbdba8714b6e2c794a76bc0864b64a49cfa757cb0b5afdc937d1a4R198-R243)
- c++ API [autograd.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-349028fbe8291a965a7a263c323b208fe071c35c66179ee997ef84fa81aa4b1e), [autograd.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-a3fe908d67dfec16a1fcde300de68b0701bf68b88db7451f29f2bee255cf30c9)
- python binding [init.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-c58a67c85191c22c9b3bb439117d8053edfd9dea839fa010cf967d404c3c630d)
- python API [forward_ad.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-a4efad4ba18fffdfb264c21e5475997a24a743089a899f8ec1a5ff962c6738d9), [autograd/__init__.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-743abcafd32ad0e69f39ac5a91df4197b7e1921c135cacee7ef6dc829a8a7af8)
- c++ and python printing [Formatting.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-881dba501e71662e2e4818b4b016f739b344c8aed2f5edc6b871eda47a2aced0), [_tensor_str.py](https://github.com/pytorch/pytorch/pull/49097/files#diff-a7911f8d5e73adbff914d99fd7818ace2a7030b6a3748abe06ec6fc6e3df9cc3)
- Utility for formulas and updated manual functions to respect new view system as well as forward grad [FunctionsManual.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-6378bb6dc81a64dab676d61731341fa5d1088418f32a1473a33a0ccfc2357dc1), [FunctionsManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-4adbd88239afcd60e8198aab65d4f5e43b62314e34b80551e997a1ea503adea5) [rest of VariableTypeManual.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-6e19a1bce8cbdba8714b6e2c794a76bc0864b64a49cfa757cb0b5afdc937d1a4R264-R433)
- Ensure SavedVariable save forward grad properly [saved_variable.h](https://github.com/pytorch/pytorch/pull/49097/files#diff-c1b8039d776241abe177d5aa99b79dd9489a9b3e529da8ab24c2e386c1238ae2), [saved_variable.cpp](https://github.com/pytorch/pytorch/pull/49097/files#diff-cc9fba479b5beae06b2eea2e390d17796e0341c5b037a20b5bcaccbb0c341030)
Test Plan: Imported from OSS
Reviewed By: gchanan
Differential Revision: D25678797
Pulled By: albanD
fbshipit-source-id: 3d58550c11b5f58b9b73fd30596d042b857fb9dd
2020-12-22 20:07:00 +00:00
|
|
|
autograd_meta_->set_fw_grad(new_grad, self, level, is_inplace_op);
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-21 22:21:23 +00:00
|
|
|
TensorImpl::TensorImpl(
|
|
|
|
|
Storage&& storage,
|
|
|
|
|
DispatchKeySet key_set,
|
2020-10-30 17:11:22 +00:00
|
|
|
const caffe2::TypeMeta data_type)
|
2021-02-18 01:23:03 +00:00
|
|
|
// Use std::forward to suppress static analyzer false positive.
|
2021-05-01 04:22:23 +00:00
|
|
|
: TensorImpl(
|
|
|
|
|
std::forward<Storage>(storage),
|
|
|
|
|
key_set,
|
|
|
|
|
data_type,
|
|
|
|
|
storage.device()) {}
|
2019-04-05 14:18:38 +00:00
|
|
|
|
Dispatch to Python via __torch_dispatch__ (#59760)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/59760
See https://github.com/pytorch/pytorch/issues/59049
There are some moving parts to this PR, I'll structure this explanation so the straightforward parts go first, and then the less straightforward parts.
**The actual dispatch to Python.** The core logic of dispatch to Python lives in `concrete_dispatch_fn` in `torch/csrc/autograd/python_variable.cpp`. It takes the input IValue stack, scans all the arguments for Tensor arguments, and defers most of the heavy lifting to `handle_torch_function_no_python_arg_parser` which actually does all of the logic for calling out to torch dispatch (in particular, this function handles multiple dispatch situations for you). Because we have a different function name than regular `__torch_function__` handling, `handle_torch_function_no_python_arg_parser` is generalized to accept a magic method name to look for when testing if Tensors have custom handling or not. Unlike `__torch_function__`, by default there is no `__torch_dispatch__` on Tensor classes.
**Maintaining the Python dispatch key.** In order to get to the dispatch to Python logic, we must tag Tensors with the `__torch_dispatch__` magic method with the newly added Python dispatch key (separated from PythonFuncTorch to allow for a transitional period while they migrate to this mechanism). We expose a new private property `_is_python_dispatch` that assists in debugging if a Tensor is participating in Python dispatch or not. We apply the Python dispatch key the first time a PyObject for a Tensor is constructed (THPVariable_NewWithVar), testing if `__torch_dispatch__` exists with then newly added `check_has_torch_dispatch`.
**Shallow copy and detach.** For the simple examples tested in this PR, most creations of Tensor route through the dispatcher. The exception to this is `shallow_copy_and_detach`, which bypasses the dispatcher and is used when saving tensors for backwards. When a Tensor is Python dispatch, we override the behavior of `shallow_copy_and_detach` to instead directly call into `__torch_dispatch__` to perform a `detach` operation (in the same way it would be invoked if you called `detach` directly). Because this Python call is triggered directly from c10::TensorImpl, it must be indirected through `PyInterpreter::detach`, which is the general mechanism for dynamic dispatching to the Python interpreter associated with a TensorImpl.
**torchdeploy compatibility.** The dispatch to Python logic cannot be directly registered to the dispatcher as it is compiled in the Python library, which will get loaded multiple times per torchdeploy interpreter. Thus, we must employ a two phase process. First, we register a fallback inside a non-Python library (aten/src/ATen/core/PythonFallbackKernel.cpp). Its job is to determine the appropriate PyInterpreter to handle the Python dispatch by going through all of the arguments and finding the first argument that has a PyObject/PyInterpreter. With this PyInterpreter, it makes another dynamic dispatch via "dispatch" which will go to the correct torchdeploy interpreter to handle dispatching to actual Python.
**Testing.** We provide a simple example of a LoggingTensor for testing, which can be used to generate TorchScript-like traces to observe what operations are being called when a Tensor is invoked. Although a LoggingTensor would be better implemented via an is-a relationship rather than a has-a relationship (as is done in the test), we've done it this way to show that arbitrarily complex compositions of tensors inside a tensor work properly.
**Known limitations.**
* We haven't adjusted any operator code, so some patterns may not work (as they lose the Python subclass in an unrecoverable way)
* `__torch_function__` must be explicitly disabled with `_disabled_torch_function_impl` otherwise things don't work quite correctly (in particular, what is being disabled is default subclass preservation behavior.)
* We don't ever populate kwargs, even when an argument is kwarg-only
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Differential Revision:
D29017912
D29017912
Test Plan: Imported from OSS
Reviewed By: bdhirsh
Pulled By: ezyang
fbshipit-source-id: a67714d9e541d09203a8cfc85345b8967db86238
2021-06-25 18:49:20 +00:00
|
|
|
// [Note: Python key removal]
|
|
|
|
|
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
// In most constructors for TensorImpl, you will see Python key is removed from
|
|
|
|
|
// the passed in DispatchKeySet. Why?
|
|
|
|
|
//
|
|
|
|
|
// INVARIANT: Python dispatch key is set iff PyObject for the Tensor has a
|
|
|
|
|
// nontrivial __torch_dispatch__ implementation.
|
|
|
|
|
//
|
|
|
|
|
// When a fresh TensorImpl is created, there is *no* PyObject (this only gets
|
|
|
|
|
// initialized lazily at the first point in time the Tensor passes into Python).
|
|
|
|
|
// So we would violate the invariant.
|
|
|
|
|
//
|
|
|
|
|
// In practice, what will happen shortly afterwards is that the TensorImpl
|
|
|
|
|
// will get its PyObject initialized by Tensor._make_subclass; at this point
|
|
|
|
|
// the Python dispatch key will be set and all is well. The point is to delay
|
|
|
|
|
// the dispatch key setting until that point.
|
|
|
|
|
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
2021-04-09 21:39:14 +00:00
|
|
|
TensorImpl::TensorImpl(
|
|
|
|
|
ImplType type,
|
|
|
|
|
Storage&& storage,
|
|
|
|
|
DispatchKeySet key_set,
|
|
|
|
|
const caffe2::TypeMeta data_type)
|
|
|
|
|
: storage_(std::move(storage)),
|
2021-05-21 01:15:21 +00:00
|
|
|
pyobj_interpreter_(nullptr),
|
|
|
|
|
pyobj_(nullptr),
|
2021-04-09 21:39:14 +00:00
|
|
|
storage_offset_(0),
|
|
|
|
|
numel_(0),
|
|
|
|
|
data_type_(data_type),
|
|
|
|
|
device_opt_(storage_.device()),
|
Dispatch to Python via __torch_dispatch__ (#59760)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/59760
See https://github.com/pytorch/pytorch/issues/59049
There are some moving parts to this PR, I'll structure this explanation so the straightforward parts go first, and then the less straightforward parts.
**The actual dispatch to Python.** The core logic of dispatch to Python lives in `concrete_dispatch_fn` in `torch/csrc/autograd/python_variable.cpp`. It takes the input IValue stack, scans all the arguments for Tensor arguments, and defers most of the heavy lifting to `handle_torch_function_no_python_arg_parser` which actually does all of the logic for calling out to torch dispatch (in particular, this function handles multiple dispatch situations for you). Because we have a different function name than regular `__torch_function__` handling, `handle_torch_function_no_python_arg_parser` is generalized to accept a magic method name to look for when testing if Tensors have custom handling or not. Unlike `__torch_function__`, by default there is no `__torch_dispatch__` on Tensor classes.
**Maintaining the Python dispatch key.** In order to get to the dispatch to Python logic, we must tag Tensors with the `__torch_dispatch__` magic method with the newly added Python dispatch key (separated from PythonFuncTorch to allow for a transitional period while they migrate to this mechanism). We expose a new private property `_is_python_dispatch` that assists in debugging if a Tensor is participating in Python dispatch or not. We apply the Python dispatch key the first time a PyObject for a Tensor is constructed (THPVariable_NewWithVar), testing if `__torch_dispatch__` exists with then newly added `check_has_torch_dispatch`.
**Shallow copy and detach.** For the simple examples tested in this PR, most creations of Tensor route through the dispatcher. The exception to this is `shallow_copy_and_detach`, which bypasses the dispatcher and is used when saving tensors for backwards. When a Tensor is Python dispatch, we override the behavior of `shallow_copy_and_detach` to instead directly call into `__torch_dispatch__` to perform a `detach` operation (in the same way it would be invoked if you called `detach` directly). Because this Python call is triggered directly from c10::TensorImpl, it must be indirected through `PyInterpreter::detach`, which is the general mechanism for dynamic dispatching to the Python interpreter associated with a TensorImpl.
**torchdeploy compatibility.** The dispatch to Python logic cannot be directly registered to the dispatcher as it is compiled in the Python library, which will get loaded multiple times per torchdeploy interpreter. Thus, we must employ a two phase process. First, we register a fallback inside a non-Python library (aten/src/ATen/core/PythonFallbackKernel.cpp). Its job is to determine the appropriate PyInterpreter to handle the Python dispatch by going through all of the arguments and finding the first argument that has a PyObject/PyInterpreter. With this PyInterpreter, it makes another dynamic dispatch via "dispatch" which will go to the correct torchdeploy interpreter to handle dispatching to actual Python.
**Testing.** We provide a simple example of a LoggingTensor for testing, which can be used to generate TorchScript-like traces to observe what operations are being called when a Tensor is invoked. Although a LoggingTensor would be better implemented via an is-a relationship rather than a has-a relationship (as is done in the test), we've done it this way to show that arbitrarily complex compositions of tensors inside a tensor work properly.
**Known limitations.**
* We haven't adjusted any operator code, so some patterns may not work (as they lose the Python subclass in an unrecoverable way)
* `__torch_function__` must be explicitly disabled with `_disabled_torch_function_impl` otherwise things don't work quite correctly (in particular, what is being disabled is default subclass preservation behavior.)
* We don't ever populate kwargs, even when an argument is kwarg-only
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Differential Revision:
D29017912
D29017912
Test Plan: Imported from OSS
Reviewed By: bdhirsh
Pulled By: ezyang
fbshipit-source-id: a67714d9e541d09203a8cfc85345b8967db86238
2021-06-25 18:49:20 +00:00
|
|
|
key_set_(key_set.remove(
|
|
|
|
|
DispatchKey::Python)) { // See [Note: Python key removal]
|
2021-04-09 21:39:14 +00:00
|
|
|
init_bitfields();
|
|
|
|
|
// Inference tensor doesn't have version counter.
|
2021-06-04 15:58:06 +00:00
|
|
|
if (!is_inference()) {
|
2021-04-09 22:53:13 +00:00
|
|
|
version_counter_ = VariableVersion(/*version=*/0);
|
2021-04-09 21:39:14 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-01 04:22:23 +00:00
|
|
|
TensorImpl::TensorImpl(
|
|
|
|
|
DispatchKeySet key_set,
|
|
|
|
|
const caffe2::TypeMeta data_type,
|
|
|
|
|
c10::optional<c10::Device> device_opt)
|
2021-04-14 18:16:51 +00:00
|
|
|
// NOLINTNEXTLINE(performance-move-const-arg)
|
2020-01-15 19:12:17 +00:00
|
|
|
: TensorImpl({}, key_set, data_type, std::move(device_opt)) {}
|
2018-09-13 23:26:34 +00:00
|
|
|
|
2021-04-14 18:16:51 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
2021-05-01 04:22:23 +00:00
|
|
|
TensorImpl::TensorImpl(
|
|
|
|
|
Storage&& storage,
|
|
|
|
|
DispatchKeySet key_set,
|
|
|
|
|
const caffe2::TypeMeta data_type,
|
|
|
|
|
c10::optional<c10::Device> device_opt)
|
2018-09-13 23:26:34 +00:00
|
|
|
: storage_(std::move(storage)),
|
2021-05-21 01:15:21 +00:00
|
|
|
pyobj_interpreter_(nullptr),
|
|
|
|
|
pyobj_(nullptr),
|
2018-10-03 21:12:15 +00:00
|
|
|
storage_offset_(0),
|
2018-09-13 23:26:34 +00:00
|
|
|
numel_(0),
|
2018-09-17 16:08:34 +00:00
|
|
|
data_type_(data_type),
|
2020-09-01 16:03:45 +00:00
|
|
|
device_opt_(device_opt) {
|
2020-10-30 17:11:22 +00:00
|
|
|
init_bitfields();
|
|
|
|
|
|
2020-01-15 19:12:17 +00:00
|
|
|
if (!key_set.empty()) {
|
2021-05-01 04:22:23 +00:00
|
|
|
TORCH_INTERNAL_ASSERT(
|
|
|
|
|
data_type == ScalarType::Undefined || device_opt_.has_value());
|
2019-05-24 06:14:48 +00:00
|
|
|
// UndefinedTensorImpl is a singleton, so we skip logging it
|
|
|
|
|
C10_LOG_API_USAGE_ONCE("tensor.create");
|
|
|
|
|
}
|
2021-04-09 21:39:14 +00:00
|
|
|
|
|
|
|
|
bool inference_mode = c10::InferenceMode::is_enabled();
|
|
|
|
|
|
2021-04-30 15:45:08 +00:00
|
|
|
// TODO: be more explicit about the full key set at call sites so we
|
|
|
|
|
// don't have to keep recomputing it here
|
|
|
|
|
DispatchKey k = key_set.highestPriorityBackendTypeId();
|
|
|
|
|
|
|
|
|
|
key_set = key_set | getAutocastRelatedKeySetFromBackend(k);
|
|
|
|
|
|
Dispatch to Python via __torch_dispatch__ (#59760)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/59760
See https://github.com/pytorch/pytorch/issues/59049
There are some moving parts to this PR, I'll structure this explanation so the straightforward parts go first, and then the less straightforward parts.
**The actual dispatch to Python.** The core logic of dispatch to Python lives in `concrete_dispatch_fn` in `torch/csrc/autograd/python_variable.cpp`. It takes the input IValue stack, scans all the arguments for Tensor arguments, and defers most of the heavy lifting to `handle_torch_function_no_python_arg_parser` which actually does all of the logic for calling out to torch dispatch (in particular, this function handles multiple dispatch situations for you). Because we have a different function name than regular `__torch_function__` handling, `handle_torch_function_no_python_arg_parser` is generalized to accept a magic method name to look for when testing if Tensors have custom handling or not. Unlike `__torch_function__`, by default there is no `__torch_dispatch__` on Tensor classes.
**Maintaining the Python dispatch key.** In order to get to the dispatch to Python logic, we must tag Tensors with the `__torch_dispatch__` magic method with the newly added Python dispatch key (separated from PythonFuncTorch to allow for a transitional period while they migrate to this mechanism). We expose a new private property `_is_python_dispatch` that assists in debugging if a Tensor is participating in Python dispatch or not. We apply the Python dispatch key the first time a PyObject for a Tensor is constructed (THPVariable_NewWithVar), testing if `__torch_dispatch__` exists with then newly added `check_has_torch_dispatch`.
**Shallow copy and detach.** For the simple examples tested in this PR, most creations of Tensor route through the dispatcher. The exception to this is `shallow_copy_and_detach`, which bypasses the dispatcher and is used when saving tensors for backwards. When a Tensor is Python dispatch, we override the behavior of `shallow_copy_and_detach` to instead directly call into `__torch_dispatch__` to perform a `detach` operation (in the same way it would be invoked if you called `detach` directly). Because this Python call is triggered directly from c10::TensorImpl, it must be indirected through `PyInterpreter::detach`, which is the general mechanism for dynamic dispatching to the Python interpreter associated with a TensorImpl.
**torchdeploy compatibility.** The dispatch to Python logic cannot be directly registered to the dispatcher as it is compiled in the Python library, which will get loaded multiple times per torchdeploy interpreter. Thus, we must employ a two phase process. First, we register a fallback inside a non-Python library (aten/src/ATen/core/PythonFallbackKernel.cpp). Its job is to determine the appropriate PyInterpreter to handle the Python dispatch by going through all of the arguments and finding the first argument that has a PyObject/PyInterpreter. With this PyInterpreter, it makes another dynamic dispatch via "dispatch" which will go to the correct torchdeploy interpreter to handle dispatching to actual Python.
**Testing.** We provide a simple example of a LoggingTensor for testing, which can be used to generate TorchScript-like traces to observe what operations are being called when a Tensor is invoked. Although a LoggingTensor would be better implemented via an is-a relationship rather than a has-a relationship (as is done in the test), we've done it this way to show that arbitrarily complex compositions of tensors inside a tensor work properly.
**Known limitations.**
* We haven't adjusted any operator code, so some patterns may not work (as they lose the Python subclass in an unrecoverable way)
* `__torch_function__` must be explicitly disabled with `_disabled_torch_function_impl` otherwise things don't work quite correctly (in particular, what is being disabled is default subclass preservation behavior.)
* We don't ever populate kwargs, even when an argument is kwarg-only
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Differential Revision:
D29017912
D29017912
Test Plan: Imported from OSS
Reviewed By: bdhirsh
Pulled By: ezyang
fbshipit-source-id: a67714d9e541d09203a8cfc85345b8967db86238
2021-06-25 18:49:20 +00:00
|
|
|
key_set =
|
|
|
|
|
key_set.remove(DispatchKey::Python); // See [Note: Python key removal]
|
|
|
|
|
|
2021-04-09 21:39:14 +00:00
|
|
|
// Inference tensor doesn't have autograd related keys.
|
|
|
|
|
if (inference_mode) {
|
2021-05-01 04:22:23 +00:00
|
|
|
// See Note [Expected TLS state in InferenceMode] for why we exclude
|
2021-05-02 05:55:12 +00:00
|
|
|
// Autograd & ADInplaceOrView keys. Normally key_set only contains backend
|
2021-05-01 04:22:23 +00:00
|
|
|
// keys but we do the substraction here to make sure.
|
2021-05-02 05:55:12 +00:00
|
|
|
key_set_ = key_set - c10::autograd_dispatch_keyset_with_ADInplaceOrView;
|
2021-03-31 17:46:38 +00:00
|
|
|
} else {
|
2021-05-01 04:22:23 +00:00
|
|
|
// TODO: Ideally we only add AutogradBackend key when the tensor requires
|
|
|
|
|
// grad.
|
2021-03-31 17:46:38 +00:00
|
|
|
// See Note [Dream: skip VariableType kernel when requires_grad=false]
|
|
|
|
|
key_set_ = key_set | getAutogradRelatedKeySetFromBackend(k);
|
|
|
|
|
}
|
2020-09-01 16:03:45 +00:00
|
|
|
|
2021-04-09 21:39:14 +00:00
|
|
|
// Inference tensor doesn't have version counter.
|
2021-06-04 15:58:06 +00:00
|
|
|
if (!is_inference()) {
|
2021-04-09 22:53:13 +00:00
|
|
|
version_counter_ = VariableVersion(/*version=*/0);
|
2021-04-09 21:39:14 +00:00
|
|
|
}
|
|
|
|
|
|
2021-05-01 04:22:23 +00:00
|
|
|
// we would also like to check that non-cpu devices have an index, but some
|
|
|
|
|
// Caffe2 operators create Storages with default devices.
|
2018-10-03 21:12:15 +00:00
|
|
|
}
|
2018-08-16 15:08:43 +00:00
|
|
|
|
2021-01-12 18:31:12 +00:00
|
|
|
#ifndef C10_DISABLE_TENSORIMPL_EXTENSIBILITY
|
Rename IntList to IntArrayRef. (#16751)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/16751
This was made more complicated by the fact that ivalue::IntList
is a thing. So I had to fix all of the sites where we referring
to IValue post facto.
The following codemods were run, in this order:
```
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in IntList IntArrayRef
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in IntArrayRef::create IntList::create
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in ivalue::IntArrayRef ivalue::IntList
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in Tag::IntArrayRef Tag::IntList
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in isIntArrayRef isIntList
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in toIntArrayRef toIntList
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in 'Shared<IntArrayRef>' 'Shared<IntList>'
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in 'intrusive_ptr<IntArrayRef>' 'intrusive_ptr<IntList>'
```
Some manual fixups were done afterwards; they can be reviewed separately
at https://github.com/pytorch/pytorch/pull/16752
Reviewed By: dzhulgakov
Differential Revision: D13954363
fbshipit-source-id: b5c40aacba042402155a2f5a229fa6db7992ac64
2019-02-05 22:39:43 +00:00
|
|
|
IntArrayRef TensorImpl::sizes() const {
|
2021-01-08 04:54:20 +00:00
|
|
|
return sizes_and_strides_.sizes_arrayref();
|
Tensor merge PRs from July 20 (#9713)
Summary:
Constituent PRs:
- [x] #9553 Remove unnecessary functions from StorageDerived.h (by cpuhrsch, reviewed by ezyang)
- [x] #9588 Use THTensor/Storage for THVoidTensor/Storage (by cpuhrsch , reviewed by gchanan)
- [x] #9627 Delete context from tensor (by ezyang, reviewed by gchanan)
- [x] #9641 Tensor reorganization (by ezyang, reviewed by gchanan )
- [x] #9647 Remove dim_ from THTensor (by cpuhrsch, reviewed by ezyang)
- [x] #9650 Remove context (by cpuhrsch, reviewed by gchanan and ezyang)
- [x] #9715 Fix Windows build in tensor merge PR (by ezyang, reviewed by gchanan and SsnL)
Upcoming PRs which didn't make this cut:
- [x] #9644 Stride move to TensorImpl, and nits (by ezyang, reviewed by gchanan)
- [ ] #9652 Native localScalar (by ezyang, **UNREVIEWED AND FAILING TESTS**)
- [x] #9710 Devirtualize TensorImpl::toString (by ezyang, reviewed by gchanan)
- [ ] #9654 Use int64_t instead of ptrdiff_t for size / Rename flag to resizable_ (by cpuhrsch, **CHANGES REQUESTED AND FAILING TESTS**)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9713
Reviewed By: gchanan
Differential Revision: D8960882
Pulled By: ezyang
fbshipit-source-id: 99747b2c5462c7ff6809b67aacb4197626408204
2018-07-24 00:40:19 +00:00
|
|
|
}
|
2021-01-12 18:31:12 +00:00
|
|
|
#endif
|
Tensor merge PRs from July 20 (#9713)
Summary:
Constituent PRs:
- [x] #9553 Remove unnecessary functions from StorageDerived.h (by cpuhrsch, reviewed by ezyang)
- [x] #9588 Use THTensor/Storage for THVoidTensor/Storage (by cpuhrsch , reviewed by gchanan)
- [x] #9627 Delete context from tensor (by ezyang, reviewed by gchanan)
- [x] #9641 Tensor reorganization (by ezyang, reviewed by gchanan )
- [x] #9647 Remove dim_ from THTensor (by cpuhrsch, reviewed by ezyang)
- [x] #9650 Remove context (by cpuhrsch, reviewed by gchanan and ezyang)
- [x] #9715 Fix Windows build in tensor merge PR (by ezyang, reviewed by gchanan and SsnL)
Upcoming PRs which didn't make this cut:
- [x] #9644 Stride move to TensorImpl, and nits (by ezyang, reviewed by gchanan)
- [ ] #9652 Native localScalar (by ezyang, **UNREVIEWED AND FAILING TESTS**)
- [x] #9710 Devirtualize TensorImpl::toString (by ezyang, reviewed by gchanan)
- [ ] #9654 Use int64_t instead of ptrdiff_t for size / Rename flag to resizable_ (by cpuhrsch, **CHANGES REQUESTED AND FAILING TESTS**)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9713
Reviewed By: gchanan
Differential Revision: D8960882
Pulled By: ezyang
fbshipit-source-id: 99747b2c5462c7ff6809b67aacb4197626408204
2018-07-24 00:40:19 +00:00
|
|
|
|
Rename IntList to IntArrayRef. (#16751)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/16751
This was made more complicated by the fact that ivalue::IntList
is a thing. So I had to fix all of the sites where we referring
to IValue post facto.
The following codemods were run, in this order:
```
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in IntList IntArrayRef
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in IntArrayRef::create IntList::create
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in ivalue::IntArrayRef ivalue::IntList
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in Tag::IntArrayRef Tag::IntList
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in isIntArrayRef isIntList
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in toIntArrayRef toIntList
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in 'Shared<IntArrayRef>' 'Shared<IntList>'
codemod -m -d . --extensions cc,cpp,cu,cuh,h,hpp,py,cwrap,yaml,in 'intrusive_ptr<IntArrayRef>' 'intrusive_ptr<IntList>'
```
Some manual fixups were done afterwards; they can be reviewed separately
at https://github.com/pytorch/pytorch/pull/16752
Reviewed By: dzhulgakov
Differential Revision: D13954363
fbshipit-source-id: b5c40aacba042402155a2f5a229fa6db7992ac64
2019-02-05 22:39:43 +00:00
|
|
|
IntArrayRef TensorImpl::strides() const {
|
2021-01-08 04:54:20 +00:00
|
|
|
return sizes_and_strides_.strides_arrayref();
|
2018-07-24 05:17:59 +00:00
|
|
|
}
|
|
|
|
|
|
2021-03-08 20:25:37 +00:00
|
|
|
void TensorImpl::HandleResize() {
|
|
|
|
|
// If needed, we will free the data. the next mutable_data() call
|
|
|
|
|
// will create the data storage.
|
|
|
|
|
bool reset_tensor = false;
|
|
|
|
|
if (reserved_) {
|
|
|
|
|
// If tensor is reserved then don't claim its memeory unless nbytes()
|
|
|
|
|
// is smaller than new size
|
2021-05-01 04:22:23 +00:00
|
|
|
reset_tensor =
|
|
|
|
|
storage_.nbytes() < (storage_offset_ + numel_) * data_type_.itemsize();
|
2021-03-08 20:25:37 +00:00
|
|
|
} else {
|
|
|
|
|
reset_tensor = storage_.nbytes() <
|
2021-05-01 04:22:23 +00:00
|
|
|
(storage_offset_ + numel_) * data_type_.itemsize() ||
|
|
|
|
|
!FLAGS_caffe2_keep_on_shrink ||
|
|
|
|
|
storage_.nbytes() - (storage_offset_ + numel_) * data_type_.itemsize() >
|
|
|
|
|
static_cast<size_t>(FLAGS_caffe2_max_keep_on_shrink_memory);
|
2021-03-08 20:25:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (reset_tensor && storage_initialized()) {
|
|
|
|
|
FreeMemory();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-08-25 05:29:37 +00:00
|
|
|
bool TensorImpl::compute_contiguous() const {
|
|
|
|
|
bool is_contiguous = true;
|
|
|
|
|
if (is_empty())
|
|
|
|
|
return is_contiguous;
|
|
|
|
|
int64_t z = 1;
|
|
|
|
|
for (int64_t d = dim() - 1; d >= 0; d--) {
|
2021-01-08 04:54:20 +00:00
|
|
|
const auto size_d = sizes_and_strides_.size_at_unchecked(d);
|
|
|
|
|
if (size_d != 1) {
|
|
|
|
|
if (sizes_and_strides_.stride_at_unchecked(d) == z) {
|
|
|
|
|
z *= size_d;
|
2018-08-25 05:29:37 +00:00
|
|
|
} else {
|
|
|
|
|
is_contiguous = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return is_contiguous;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-06 13:59:20 +00:00
|
|
|
bool TensorImpl::compute_channels_last_contiguous_2d() const {
|
|
|
|
|
// Please don't combine these code, constant array is used here to let
|
|
|
|
|
// compiler fully unroll the loop to get better performance
|
2021-01-08 04:54:20 +00:00
|
|
|
switch (sizes_and_strides_.size()) {
|
2021-05-01 04:22:23 +00:00
|
|
|
case 4: {
|
|
|
|
|
int64_t expected = 1;
|
|
|
|
|
for (auto& d : {1, 3, 2, 0}) {
|
|
|
|
|
const auto size_d = sizes_and_strides_.size_at_unchecked(d);
|
|
|
|
|
if (size_d != 1) {
|
|
|
|
|
if (sizes_and_strides_.stride_at_unchecked(d) != expected) {
|
|
|
|
|
return false;
|
2020-03-06 13:59:20 +00:00
|
|
|
}
|
2021-05-01 04:22:23 +00:00
|
|
|
expected *= size_d;
|
2019-08-05 18:42:48 +00:00
|
|
|
}
|
|
|
|
|
}
|
2021-05-01 04:22:23 +00:00
|
|
|
return true;
|
|
|
|
|
}
|
2021-04-14 18:16:51 +00:00
|
|
|
// NOLINTNEXTLINE(bugprone-branch-clone)
|
2020-03-06 13:59:20 +00:00
|
|
|
case 3:
|
|
|
|
|
// TODO dim == 3 case will be enabled once it is fully tested
|
|
|
|
|
return false;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
2019-08-05 18:42:48 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-06 13:59:20 +00:00
|
|
|
bool TensorImpl::compute_channels_last_contiguous_3d() const {
|
|
|
|
|
// Please don't combine these code, constant array is used here to let
|
|
|
|
|
// compiler fully unroll the loop to get better performance
|
2021-01-08 04:54:20 +00:00
|
|
|
switch (sizes_and_strides_.size()) {
|
2021-05-01 04:22:23 +00:00
|
|
|
case 5: {
|
|
|
|
|
int64_t expected = 1;
|
|
|
|
|
for (auto& d : {1, 4, 3, 2, 0}) {
|
|
|
|
|
const auto size_d = sizes_and_strides_.size_at_unchecked(d);
|
|
|
|
|
if (size_d != 1) {
|
|
|
|
|
if (sizes_and_strides_.stride_at_unchecked(d) != expected) {
|
|
|
|
|
return false;
|
2020-03-06 13:59:20 +00:00
|
|
|
}
|
2021-05-01 04:22:23 +00:00
|
|
|
expected *= size_d;
|
2020-03-06 13:59:20 +00:00
|
|
|
}
|
|
|
|
|
}
|
2021-05-01 04:22:23 +00:00
|
|
|
return true;
|
|
|
|
|
}
|
2021-04-14 18:16:51 +00:00
|
|
|
// NOLINTNEXTLINE(bugprone-branch-clone)
|
2020-03-06 13:59:20 +00:00
|
|
|
case 4:
|
|
|
|
|
// TODO dim == 4 case will be enabled once it is fully tested
|
|
|
|
|
return false;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool TensorImpl::compute_strides_like_channels_last_2d() const {
|
2021-05-01 04:22:23 +00:00
|
|
|
return is_channels_last_strides_2d(
|
|
|
|
|
TensorImpl::sizes(), TensorImpl::strides());
|
2020-03-06 13:59:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool TensorImpl::compute_strides_like_channels_last_3d() const {
|
2021-05-01 04:22:23 +00:00
|
|
|
return is_channels_last_strides_3d(
|
|
|
|
|
TensorImpl::sizes(), TensorImpl::strides());
|
2019-08-05 18:42:48 +00:00
|
|
|
}
|
|
|
|
|
|
2019-10-03 19:04:42 +00:00
|
|
|
bool TensorImpl::compute_non_overlapping_and_dense() const {
|
|
|
|
|
if (dim() == 1) {
|
2021-05-01 04:22:23 +00:00
|
|
|
return sizes_and_strides_.size_at_unchecked(0) < 2 ||
|
|
|
|
|
sizes_and_strides_.stride_at_unchecked(0) == 1;
|
2019-10-03 19:04:42 +00:00
|
|
|
}
|
2021-05-01 04:22:23 +00:00
|
|
|
SmallVector<int64_t, 5> perm;
|
2019-10-03 19:04:42 +00:00
|
|
|
perm.resize(dim());
|
2021-05-01 04:22:23 +00:00
|
|
|
for (int64_t i = 0; i < dim(); i++) {
|
2019-10-03 19:04:42 +00:00
|
|
|
perm[i] = i;
|
|
|
|
|
}
|
|
|
|
|
// Sort by strides, leaving 0 and 1 sized dims at the end of the array
|
|
|
|
|
std::sort(perm.begin(), perm.end(), [&](int64_t a, int64_t b) {
|
2021-05-01 04:22:23 +00:00
|
|
|
if (sizes_and_strides_.size_at_unchecked(a) < 2) {
|
|
|
|
|
return false;
|
|
|
|
|
} else if (sizes_and_strides_.size_at_unchecked(b) < 2) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return sizes_and_strides_.stride_at_unchecked(a) <
|
|
|
|
|
sizes_and_strides_.stride_at_unchecked(b);
|
2019-10-03 19:04:42 +00:00
|
|
|
});
|
|
|
|
|
auto require_stride = 1;
|
2021-05-01 04:22:23 +00:00
|
|
|
for (int64_t i = 0; i < dim(); i++) {
|
2021-01-08 04:54:20 +00:00
|
|
|
const auto size_perm_i = sizes_and_strides_.size_at_unchecked(perm[i]);
|
|
|
|
|
if (size_perm_i < 2) {
|
2019-10-03 19:04:42 +00:00
|
|
|
return true;
|
|
|
|
|
}
|
2021-01-08 04:54:20 +00:00
|
|
|
if (sizes_and_strides_.stride_at_unchecked(perm[i]) != require_stride) {
|
2019-10-03 19:04:42 +00:00
|
|
|
return false;
|
|
|
|
|
}
|
2021-01-08 04:54:20 +00:00
|
|
|
require_stride *= size_perm_i;
|
2019-10-03 19:04:42 +00:00
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
Tensor merge PRs from July 20 (#9713)
Summary:
Constituent PRs:
- [x] #9553 Remove unnecessary functions from StorageDerived.h (by cpuhrsch, reviewed by ezyang)
- [x] #9588 Use THTensor/Storage for THVoidTensor/Storage (by cpuhrsch , reviewed by gchanan)
- [x] #9627 Delete context from tensor (by ezyang, reviewed by gchanan)
- [x] #9641 Tensor reorganization (by ezyang, reviewed by gchanan )
- [x] #9647 Remove dim_ from THTensor (by cpuhrsch, reviewed by ezyang)
- [x] #9650 Remove context (by cpuhrsch, reviewed by gchanan and ezyang)
- [x] #9715 Fix Windows build in tensor merge PR (by ezyang, reviewed by gchanan and SsnL)
Upcoming PRs which didn't make this cut:
- [x] #9644 Stride move to TensorImpl, and nits (by ezyang, reviewed by gchanan)
- [ ] #9652 Native localScalar (by ezyang, **UNREVIEWED AND FAILING TESTS**)
- [x] #9710 Devirtualize TensorImpl::toString (by ezyang, reviewed by gchanan)
- [ ] #9654 Use int64_t instead of ptrdiff_t for size / Rename flag to resizable_ (by cpuhrsch, **CHANGES REQUESTED AND FAILING TESTS**)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9713
Reviewed By: gchanan
Differential Revision: D8960882
Pulled By: ezyang
fbshipit-source-id: 99747b2c5462c7ff6809b67aacb4197626408204
2018-07-24 00:40:19 +00:00
|
|
|
void TensorImpl::release_resources() {
|
Remove Variable::Impl and DifferentiableViewImpl (#17072)
Summary:
As part of the Variable/Tensor merge work: https://github.com/pytorch/pytorch/issues/13638, we make the following changes in this PR:
1. Remove the `Variable::Impl` class and the `DifferentiableViewImpl` class
2. Change all `Variable.data()` call sites to either use `Variable` directly, or use `Variable.tensor_data()`
3. Remove `Variable.data()` API
3. Add `Variable.variable_data()` that matches `tensor.data` in Python API, which creates a new `Variable` that shares the same storage and tensor metadata with the original `Variable`, but with a completely new autograd history.
After this PR, Variable doesn't wrap a Tensor internally anymore, and both Variable and Tensor use the same TensorImpl class as its `impl_`. The only difference is that Variable always has AutogradMeta in its TensorImpl, but Tensor doesn't.
**Note that this PR is BC-breaking in the following use cases:**
**Use Case 1:**
Previously, `x.data = y` works even if `x` and `y` are of different TensorImpl type (e.g. `x` is a CPU dense tensor whose impl is of type TensorImpl, while `y` is a CPU sparse tensor whose impl is of type SparseTensorImpl). However, after this PR, `x.data = y` doesn't work anymore if `x` and `y` are of different TensorImpl type, because the underlying implementation `variable.set_data(tensor)` no longer works if `variable` and `tensor` have different TensorImpl type.
**Use Case 2:**
If a tensor `x`'s `grad` is sparse, accumulating dense gradients to `x` will change the tensor that `x.grad` is pointing to. This is better illustrated with the following example:
```python
params = torch.tensor([1.5, 1.5]).requires_grad_()
with torch.no_grad():
# Change gradient to a sparse tensor
params.grad = torch.sparse_coo_tensor(torch.tensor([[1, 1]]).long(), torch.tensor([1., 1.]))
grad_saved = params.grad
params.backward(torch.tensor([1.5, 1.5]))
assert id(grad_saved) == id(params.grad) # This will fail after this PR
```
The assertion in the last line will fail after this PR, because adding dense gradients to sparse gradients will change the `params.grad` tensor reference.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/17072
Differential Revision: D14075257
Pulled By: yf225
fbshipit-source-id: 0e681df641270dea586042dd26db59f2e76b5957
2019-05-24 04:03:29 +00:00
|
|
|
autograd_meta_.reset();
|
2018-08-16 15:08:43 +00:00
|
|
|
if (storage_) {
|
Use intrusive_ptr in Storage; replace unique_ptr<Storage> with Storage (#10488)
Summary:
```
Use intrusive_ptr in Storage; replace unique_ptr<Storage> with Storage
This patch does two major changes:
- It replaces the use of Retainable in Storage with a new implementation
based on intrusive_ptr. This will be necessary because Caffe2 will
be using this class to implement intrusive_ptrs, and we need to
line these up for the merge. One good thing about the new implementation is
that the default copy/move constructors/assignment operators and destructor
work automatically, instead of needing to be hardcoded into Storage/Tensor.
- It replaces all places where we returned std::unique_ptr<Storage> with
Storage, collapsing an unnecessary double indirection that is no longer
necessary now that we have correctly working copy/move constructors.
I didn't initially want to do step (2), but it was very important to
eliminate all bare uses of new Storage and new StorageImpl, and this making
the API change was the most straightforward way to do this.
HOW TO FIX YOUR CODE IN THE NEW API
- You no longer need to dereference the result of tensor.storage() to pass
it to set. So, instead of:
x.set_(*y.storage());
just write:
x.set_(y.storage());
- If you were accessing methods on StorageImpl via the pImpl() method, you
must use the dot operator to run pImpl(). Even better; just drop pImpl,
we now have method forwarding. So, instead of:
storage->pImpl()->data();
just do:
storage->data();
// storage.pImpl()->data() works too but is not as recommended
- storage->getDevice() is no more; instead use storage->device().index()
MISC CODE UPDATES
- retain, release, weak_retain, weak_release and weak_lock are now
reimplemented using the "blessed API", and renamed to make it
clearer that their use is discouraged.
- nvcc OS X and general OS X portability improvements to intrusive_ptr
- A new comment in intrusive_ptr describing how stack allocated
intrusive_ptr_targets work differently than heap allocated ones
from c10::make_intrusive
CAVEAT EMPTOR
- THStorage_weakRetain used to work on strong pointers, but it NO LONGER
works with intrusive_ptr. You must reclaim the strong pointer into a
real strong pointer, construct a weak pointer from it, and then release
the strong and weak pointers. See StorageSharing.cpp for an example.
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/10488
Reviewed By: gchanan
Differential Revision: D9306134
Pulled By: ezyang
fbshipit-source-id: 02d58ef62dab8e4da6131e1a24834a65c21048e2
2018-08-22 04:29:30 +00:00
|
|
|
storage_ = {};
|
Tensor merge PRs from July 20 (#9713)
Summary:
Constituent PRs:
- [x] #9553 Remove unnecessary functions from StorageDerived.h (by cpuhrsch, reviewed by ezyang)
- [x] #9588 Use THTensor/Storage for THVoidTensor/Storage (by cpuhrsch , reviewed by gchanan)
- [x] #9627 Delete context from tensor (by ezyang, reviewed by gchanan)
- [x] #9641 Tensor reorganization (by ezyang, reviewed by gchanan )
- [x] #9647 Remove dim_ from THTensor (by cpuhrsch, reviewed by ezyang)
- [x] #9650 Remove context (by cpuhrsch, reviewed by gchanan and ezyang)
- [x] #9715 Fix Windows build in tensor merge PR (by ezyang, reviewed by gchanan and SsnL)
Upcoming PRs which didn't make this cut:
- [x] #9644 Stride move to TensorImpl, and nits (by ezyang, reviewed by gchanan)
- [ ] #9652 Native localScalar (by ezyang, **UNREVIEWED AND FAILING TESTS**)
- [x] #9710 Devirtualize TensorImpl::toString (by ezyang, reviewed by gchanan)
- [ ] #9654 Use int64_t instead of ptrdiff_t for size / Rename flag to resizable_ (by cpuhrsch, **CHANGES REQUESTED AND FAILING TESTS**)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9713
Reviewed By: gchanan
Differential Revision: D8960882
Pulled By: ezyang
fbshipit-source-id: 99747b2c5462c7ff6809b67aacb4197626408204
2018-07-24 00:40:19 +00:00
|
|
|
}
|
Preserve PyObject even when it goes dead (#56017)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56017
Fixes #55686
This patch is seemingly straightforward but some of the changes are very
subtle. For the general algorithmic approach, please first read the
quoted issue. Based on the algorithm, there are some fairly
straightforward changes:
- New boolean on TensorImpl tracking if we own the pyobj or not
- PythonHooks virtual interface for requesting deallocation of pyobj
when TensorImpl is being released and we own its pyobj, and
implementation of the hooks in python_tensor.cpp
- Modification of THPVariable to MaybeOwned its C++ tensor, directly
using swolchok's nice new class
And then, there is python_variable.cpp. Some of the changes follow the
general algorithmic approach:
- THPVariable_NewWithVar is simply adjusted to handle MaybeOwned and
initializes as owend (like before)
- THPVariable_Wrap adds the logic for reverting ownership back to
PyObject when we take out an owning reference to the Python object
- THPVariable_dealloc attempts to resurrect the Python object if
the C++ tensor is live, and otherwise does the same old implementation
as before
- THPVariable_tryResurrect implements the resurrection logic. It is
modeled after CPython code so read the cited logic and see if
it is faithfully replicated
- THPVariable_clear is slightly updated for MaybeOwned and also to
preserve the invariant that if owns_pyobj, then pyobj_ is not null.
This change is slightly dodgy: the previous implementation has a
comment mentioning that the pyobj nulling is required to ensure we
don't try to reuse the dead pyobj. I don't think, in this new world,
this is possible, because the invariant says that the pyobj only
dies if the C++ object is dead too. But I still unset the field
for safety.
And then... there is THPVariableMetaType. colesbury explained in the
issue why this is necessary: when destructing an object in Python, you
start off by running the tp_dealloc of the subclass before moving up
to the parent class (much in the same way C++ destructors work). The
deallocation process for a vanilla Python-defined class does irreparable
harm to the PyObject instance (e.g., the finalizers get run) making it
no longer valid attempt to resurrect later in the tp_dealloc chain.
(BTW, the fact that objects can resurrect but in an invalid state is
one of the reasons why it's so frickin' hard to write correct __del__
implementations). So we need to make sure that we actually override
the tp_dealloc of the bottom most *subclass* of Tensor to make sure
we attempt a resurrection before we start finalizing. To do this,
we need to define a metaclass for Tensor that can override tp_dealloc
whenever we create a new subclass of Tensor. By the way, it was totally
not documented how to create metaclasses in the C++ API, and it took
a good bit of trial error to figure it out (and the answer is now
immortalized in https://stackoverflow.com/q/67077317/23845 -- the things
that I got wrong in earlier versions of the PR included setting
tp_basicsize incorrectly, incorrectly setting Py_TPFLAGS_HAVE_GC on
the metaclass--you want to leave it unset so that it inherits, and
determining that tp_init is what actually gets called when you construct
a class, not tp_call as another not-to-be-named StackOverflow question
suggests).
Aside: Ordinarily, adding a metaclass to a class is a user visible
change, as it means that it is no longer valid to mixin another class
with a different metaclass. However, because _C._TensorBase is a C
extension object, it will typically conflict with most other
metaclasses, so this is not BC breaking.
The desired new behavior of a subclass tp_dealloc is to first test if
we should resurrect, and otherwise do the same old behavior. In an
initial implementation of this patch, I implemented this by saving the
original tp_dealloc (which references subtype_dealloc, the "standard"
dealloc for all Python defined classes) and invoking it. However, this
results in an infinite loop, as it attempts to call the dealloc function
of the base type, but incorrectly chooses subclass type (because it is
not a subtype_dealloc, as we have overridden it; see
https://github.com/python/cpython/blob/b38601d49675d90e1ee6faa47f7adaeca992d02d/Objects/typeobject.c#L1261 )
So, with great reluctance, I must duplicate the behavior of
subtype_dealloc in our implementation. Note that this is not entirely
unheard of in Python binding code; for example, Cython
https://github.com/cython/cython/blob/c25c3ccc4b862592b06e66fd0fc508e4d388437b/Cython/Compiler/ModuleNode.py#L1560
also does similar things. This logic makes up the bulk of
THPVariable_subclass_dealloc
To review this, you should pull up the CPython copy of subtype_dealloc
https://github.com/python/cpython/blob/b38601d49675d90e1ee6faa47f7adaeca992d02d/Objects/typeobject.c#L1230
and verify that I have specialized the implementation for our case
appropriately. Among the simplifications I made:
- I assume PyType_IS_GC, because I assume that Tensor subclasses are
only ever done in Python and those classes are always subject to GC.
(BTW, yes! This means I have broken anyone who has extend PyTorch
tensor from C API directly. I'm going to guess no one has actually
done this.)
- I don't bother walking up the type bases to find the parent dealloc;
I know it is always THPVariable_dealloc. Similarly, I can get rid
of some parent type tests based on knowledge of how
THPVariable_dealloc is defined
- The CPython version calls some private APIs which I can't call, so
I use the public PyObject_GC_UnTrack APIs.
- I don't allow the finalizer of a Tensor to change its type (but
more on this shortly)
One alternative I discussed with colesbury was instead of copy pasting
the subtype_dealloc, we could transmute the type of the object that was
dying to turn it into a different object whose tp_dealloc is
subtype_dealloc, so the stock subtype_dealloc would then be applicable.
We decided this would be kind of weird and didn't do it that way.
TODO:
- More code comments
- Figure out how not to increase the size of TensorImpl with the new
bool field
- Add some torture tests for the THPVariable_subclass_dealloc, e.g.,
involving subclasses of Tensors that do strange things with finalizers
- Benchmark the impact of taking the GIL to release C++ side tensors
(e.g., from autograd)
- Benchmark the impact of adding a new metaclass to Tensor (probably
will be done by separating out the metaclass change into its own
change)
- Benchmark the impact of changing THPVariable to conditionally own
Tensor (as opposed to unconditionally owning it, as before)
- Add tests that this actually indeed preserves the Python object
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Test Plan: Imported from OSS
Reviewed By: albanD
Differential Revision: D27765125
Pulled By: ezyang
fbshipit-source-id: 857f14bdcca2900727412aff4c2e2d7f0af1415a
2021-06-03 17:47:19 +00:00
|
|
|
if (owns_pyobj_) {
|
|
|
|
|
TORCH_INTERNAL_ASSERT(pyobj_interpreter_ != nullptr);
|
|
|
|
|
TORCH_INTERNAL_ASSERT(pyobj_ != nullptr);
|
|
|
|
|
pyobj_interpreter_.load(std::memory_order_acquire)->decref(pyobj_);
|
|
|
|
|
// NB: this destructor can only be entered when there are no
|
|
|
|
|
// references to this C++ object (obviously), NOR any references
|
|
|
|
|
// to the PyObject (if there are references to the PyObject,
|
|
|
|
|
// then the PyObject holds an owning reference to the tensor).
|
|
|
|
|
// So it is OK to clear pyobj_ here as it is impossible for it to
|
|
|
|
|
// be used again (modulo weak reference races)
|
|
|
|
|
pyobj_ = nullptr; // for safety
|
|
|
|
|
}
|
Tensor merge PRs from July 20 (#9713)
Summary:
Constituent PRs:
- [x] #9553 Remove unnecessary functions from StorageDerived.h (by cpuhrsch, reviewed by ezyang)
- [x] #9588 Use THTensor/Storage for THVoidTensor/Storage (by cpuhrsch , reviewed by gchanan)
- [x] #9627 Delete context from tensor (by ezyang, reviewed by gchanan)
- [x] #9641 Tensor reorganization (by ezyang, reviewed by gchanan )
- [x] #9647 Remove dim_ from THTensor (by cpuhrsch, reviewed by ezyang)
- [x] #9650 Remove context (by cpuhrsch, reviewed by gchanan and ezyang)
- [x] #9715 Fix Windows build in tensor merge PR (by ezyang, reviewed by gchanan and SsnL)
Upcoming PRs which didn't make this cut:
- [x] #9644 Stride move to TensorImpl, and nits (by ezyang, reviewed by gchanan)
- [ ] #9652 Native localScalar (by ezyang, **UNREVIEWED AND FAILING TESTS**)
- [x] #9710 Devirtualize TensorImpl::toString (by ezyang, reviewed by gchanan)
- [ ] #9654 Use int64_t instead of ptrdiff_t for size / Rename flag to resizable_ (by cpuhrsch, **CHANGES REQUESTED AND FAILING TESTS**)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9713
Reviewed By: gchanan
Differential Revision: D8960882
Pulled By: ezyang
fbshipit-source-id: 99747b2c5462c7ff6809b67aacb4197626408204
2018-07-24 00:40:19 +00:00
|
|
|
}
|
|
|
|
|
|
2021-01-13 23:13:28 +00:00
|
|
|
#ifndef C10_DISABLE_TENSORIMPL_EXTENSIBILITY
|
Tensor merge PRs from July 20 (#9713)
Summary:
Constituent PRs:
- [x] #9553 Remove unnecessary functions from StorageDerived.h (by cpuhrsch, reviewed by ezyang)
- [x] #9588 Use THTensor/Storage for THVoidTensor/Storage (by cpuhrsch , reviewed by gchanan)
- [x] #9627 Delete context from tensor (by ezyang, reviewed by gchanan)
- [x] #9641 Tensor reorganization (by ezyang, reviewed by gchanan )
- [x] #9647 Remove dim_ from THTensor (by cpuhrsch, reviewed by ezyang)
- [x] #9650 Remove context (by cpuhrsch, reviewed by gchanan and ezyang)
- [x] #9715 Fix Windows build in tensor merge PR (by ezyang, reviewed by gchanan and SsnL)
Upcoming PRs which didn't make this cut:
- [x] #9644 Stride move to TensorImpl, and nits (by ezyang, reviewed by gchanan)
- [ ] #9652 Native localScalar (by ezyang, **UNREVIEWED AND FAILING TESTS**)
- [x] #9710 Devirtualize TensorImpl::toString (by ezyang, reviewed by gchanan)
- [ ] #9654 Use int64_t instead of ptrdiff_t for size / Rename flag to resizable_ (by cpuhrsch, **CHANGES REQUESTED AND FAILING TESTS**)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9713
Reviewed By: gchanan
Differential Revision: D8960882
Pulled By: ezyang
fbshipit-source-id: 99747b2c5462c7ff6809b67aacb4197626408204
2018-07-24 00:40:19 +00:00
|
|
|
int64_t TensorImpl::dim() const {
|
2021-01-08 04:54:20 +00:00
|
|
|
return sizes_and_strides_.size();
|
Tensor merge PRs from July 20 (#9713)
Summary:
Constituent PRs:
- [x] #9553 Remove unnecessary functions from StorageDerived.h (by cpuhrsch, reviewed by ezyang)
- [x] #9588 Use THTensor/Storage for THVoidTensor/Storage (by cpuhrsch , reviewed by gchanan)
- [x] #9627 Delete context from tensor (by ezyang, reviewed by gchanan)
- [x] #9641 Tensor reorganization (by ezyang, reviewed by gchanan )
- [x] #9647 Remove dim_ from THTensor (by cpuhrsch, reviewed by ezyang)
- [x] #9650 Remove context (by cpuhrsch, reviewed by gchanan and ezyang)
- [x] #9715 Fix Windows build in tensor merge PR (by ezyang, reviewed by gchanan and SsnL)
Upcoming PRs which didn't make this cut:
- [x] #9644 Stride move to TensorImpl, and nits (by ezyang, reviewed by gchanan)
- [ ] #9652 Native localScalar (by ezyang, **UNREVIEWED AND FAILING TESTS**)
- [x] #9710 Devirtualize TensorImpl::toString (by ezyang, reviewed by gchanan)
- [ ] #9654 Use int64_t instead of ptrdiff_t for size / Rename flag to resizable_ (by cpuhrsch, **CHANGES REQUESTED AND FAILING TESTS**)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9713
Reviewed By: gchanan
Differential Revision: D8960882
Pulled By: ezyang
fbshipit-source-id: 99747b2c5462c7ff6809b67aacb4197626408204
2018-07-24 00:40:19 +00:00
|
|
|
}
|
2021-01-13 23:13:28 +00:00
|
|
|
#endif
|
Tensor merge PRs from July 20 (#9713)
Summary:
Constituent PRs:
- [x] #9553 Remove unnecessary functions from StorageDerived.h (by cpuhrsch, reviewed by ezyang)
- [x] #9588 Use THTensor/Storage for THVoidTensor/Storage (by cpuhrsch , reviewed by gchanan)
- [x] #9627 Delete context from tensor (by ezyang, reviewed by gchanan)
- [x] #9641 Tensor reorganization (by ezyang, reviewed by gchanan )
- [x] #9647 Remove dim_ from THTensor (by cpuhrsch, reviewed by ezyang)
- [x] #9650 Remove context (by cpuhrsch, reviewed by gchanan and ezyang)
- [x] #9715 Fix Windows build in tensor merge PR (by ezyang, reviewed by gchanan and SsnL)
Upcoming PRs which didn't make this cut:
- [x] #9644 Stride move to TensorImpl, and nits (by ezyang, reviewed by gchanan)
- [ ] #9652 Native localScalar (by ezyang, **UNREVIEWED AND FAILING TESTS**)
- [x] #9710 Devirtualize TensorImpl::toString (by ezyang, reviewed by gchanan)
- [ ] #9654 Use int64_t instead of ptrdiff_t for size / Rename flag to resizable_ (by cpuhrsch, **CHANGES REQUESTED AND FAILING TESTS**)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9713
Reviewed By: gchanan
Differential Revision: D8960882
Pulled By: ezyang
fbshipit-source-id: 99747b2c5462c7ff6809b67aacb4197626408204
2018-07-24 00:40:19 +00:00
|
|
|
|
2018-08-16 15:08:43 +00:00
|
|
|
int64_t TensorImpl::size(int64_t d) const {
|
|
|
|
|
d = at::maybe_wrap_dim(d, dim(), false);
|
2021-01-08 04:54:20 +00:00
|
|
|
return sizes_and_strides_.size_at_unchecked(d);
|
2018-08-16 15:08:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int64_t TensorImpl::stride(int64_t d) const {
|
|
|
|
|
d = at::maybe_wrap_dim(d, dim(), false);
|
2021-01-08 04:54:20 +00:00
|
|
|
return sizes_and_strides_.stride_at_unchecked(d);
|
2018-07-25 16:14:22 +00:00
|
|
|
}
|
|
|
|
|
|
2021-02-01 19:23:23 +00:00
|
|
|
#ifndef C10_DISABLE_TENSORIMPL_EXTENSIBILITY
|
2019-02-11 20:48:17 +00:00
|
|
|
bool TensorImpl::has_storage() const {
|
|
|
|
|
return storage_;
|
|
|
|
|
}
|
2021-02-01 19:23:23 +00:00
|
|
|
#endif
|
2019-02-11 20:48:17 +00:00
|
|
|
|
[PyTorch] Devirtualize TensorImpl::storage() (#51050)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/51050
Subclasses want to be able to make storage() calls throw, so
we find some free space in TensorImpl to add a flag that they can set
to make that happen without making storage() virtual. It should still
be inlineable.
ghstack-source-id: 121819684
Test Plan:
Compared `perf stat` on 1M iterations on AdIndexer benchmark before/after
Before:
```
74,483.15 msec task-clock # 0.999 CPUs utilized ( +- 0.14% )
16,637 context-switches # 0.223 K/sec ( +- 11.97% )
3 cpu-migrations # 0.000 K/sec ( +- 7.20% )
107,085 page-faults # 0.001 M/sec ( +- 2.39% )
147,356,440,831 cycles # 1.978 GHz ( +- 0.14% ) (50.06%)
278,678,430,378 instructions # 1.89 insn per cycle ( +- 0.01% ) (50.05%)
43,540,698,177 branches # 584.571 M/sec ( +- 0.01% ) (50.05%)
141,028,843 branch-misses # 0.32% of all branches ( +- 1.00% ) (50.05%)
```
After:
```
74,178.77 msec task-clock # 0.999 CPUs utilized ( +- 0.31% )
17,125 context-switches # 0.231 K/sec ( +- 3.41% )
3 cpu-migrations # 0.000 K/sec
109,535 page-faults # 0.001 M/sec ( +- 1.04% )
146,803,364,372 cycles # 1.979 GHz ( +- 0.30% ) (50.03%)
277,726,600,254 instructions # 1.89 insn per cycle ( +- 0.02% ) (50.03%)
43,299,659,815 branches # 583.720 M/sec ( +- 0.03% ) (50.03%)
130,504,094 branch-misses # 0.30% of all branches ( +- 1.14% ) (50.03%)
```
Looks like approximately 0.3% instruction count win (and similarly for cycles, but that's within noise).
Reviewed By: ezyang
Differential Revision: D26013815
fbshipit-source-id: 07939957929070e18b9981d492d8279c9bb33c55
2021-02-17 19:41:49 +00:00
|
|
|
void TensorImpl::throw_storage_access_error() const {
|
2021-05-01 04:22:23 +00:00
|
|
|
TORCH_CHECK_NOT_IMPLEMENTED(
|
|
|
|
|
false, "Cannot access storage of ", tensorimpl_type_name());
|
[PyTorch] Devirtualize TensorImpl::storage() (#51050)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/51050
Subclasses want to be able to make storage() calls throw, so
we find some free space in TensorImpl to add a flag that they can set
to make that happen without making storage() virtual. It should still
be inlineable.
ghstack-source-id: 121819684
Test Plan:
Compared `perf stat` on 1M iterations on AdIndexer benchmark before/after
Before:
```
74,483.15 msec task-clock # 0.999 CPUs utilized ( +- 0.14% )
16,637 context-switches # 0.223 K/sec ( +- 11.97% )
3 cpu-migrations # 0.000 K/sec ( +- 7.20% )
107,085 page-faults # 0.001 M/sec ( +- 2.39% )
147,356,440,831 cycles # 1.978 GHz ( +- 0.14% ) (50.06%)
278,678,430,378 instructions # 1.89 insn per cycle ( +- 0.01% ) (50.05%)
43,540,698,177 branches # 584.571 M/sec ( +- 0.01% ) (50.05%)
141,028,843 branch-misses # 0.32% of all branches ( +- 1.00% ) (50.05%)
```
After:
```
74,178.77 msec task-clock # 0.999 CPUs utilized ( +- 0.31% )
17,125 context-switches # 0.231 K/sec ( +- 3.41% )
3 cpu-migrations # 0.000 K/sec
109,535 page-faults # 0.001 M/sec ( +- 1.04% )
146,803,364,372 cycles # 1.979 GHz ( +- 0.30% ) (50.03%)
277,726,600,254 instructions # 1.89 insn per cycle ( +- 0.02% ) (50.03%)
43,299,659,815 branches # 583.720 M/sec ( +- 0.03% ) (50.03%)
130,504,094 branch-misses # 0.30% of all branches ( +- 1.14% ) (50.03%)
```
Looks like approximately 0.3% instruction count win (and similarly for cycles, but that's within noise).
Reviewed By: ezyang
Differential Revision: D26013815
fbshipit-source-id: 07939957929070e18b9981d492d8279c9bb33c55
2021-02-17 19:41:49 +00:00
|
|
|
}
|
|
|
|
|
|
2021-05-01 04:22:23 +00:00
|
|
|
bool TensorImpl::is_contiguous_nondefault_policy_impl(
|
|
|
|
|
at::MemoryFormat memory_format) const {
|
|
|
|
|
if (has_contiguity_ ==
|
|
|
|
|
static_cast<uint8_t>(HasContiguityPolicy::ContiguityNotSupported)) {
|
2021-04-08 01:19:27 +00:00
|
|
|
TORCH_CHECK_NOT_IMPLEMENTED(
|
2021-05-01 04:22:23 +00:00
|
|
|
false,
|
|
|
|
|
"Tensors of type ",
|
|
|
|
|
tensorimpl_type_name(),
|
2021-04-08 01:19:27 +00:00
|
|
|
" do not have is_contiguous");
|
|
|
|
|
} else {
|
2021-05-01 04:22:23 +00:00
|
|
|
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
|
|
|
|
|
has_contiguity_ ==
|
|
|
|
|
static_cast<uint8_t>(HasContiguityPolicy::CustomBehavior));
|
2021-04-08 01:19:27 +00:00
|
|
|
return is_contiguous_custom(memory_format);
|
2021-04-05 20:39:59 +00:00
|
|
|
}
|
2021-04-08 01:19:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool TensorImpl::is_contiguous_custom(at::MemoryFormat memory_format) const {
|
|
|
|
|
TORCH_INTERNAL_ASSERT(
|
|
|
|
|
false,
|
|
|
|
|
"TensorImpl::is_contiguous_custom should never be called; did you "
|
|
|
|
|
"set_has_contiguity_policy and forget to override is_contiguous_custom?");
|
2019-05-16 14:15:34 +00:00
|
|
|
}
|
|
|
|
|
|
2018-10-04 02:06:54 +00:00
|
|
|
static void deletePlacementDeleteContext(void* ptr) {
|
|
|
|
|
delete static_cast<PlacementDeleteContext*>(ptr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
at::DataPtr PlacementDeleteContext::makeDataPtr(
|
|
|
|
|
at::DataPtr&& data_ptr,
|
|
|
|
|
PlacementDtor placement_dtor,
|
|
|
|
|
size_t size,
|
|
|
|
|
at::Device device) {
|
|
|
|
|
auto* ptr = data_ptr.get();
|
2021-05-01 04:22:23 +00:00
|
|
|
return {
|
|
|
|
|
ptr,
|
|
|
|
|
new PlacementDeleteContext(std::move(data_ptr), placement_dtor, size),
|
|
|
|
|
&deletePlacementDeleteContext,
|
|
|
|
|
device};
|
2018-10-04 02:06:54 +00:00
|
|
|
}
|
|
|
|
|
|
2021-07-06 16:45:04 +00:00
|
|
|
AutogradMetaInterface::~AutogradMetaInterface() = default;
|
2018-12-27 00:31:47 +00:00
|
|
|
|
2021-04-09 21:39:14 +00:00
|
|
|
// Setting requires_grad to true on inference tensor outside InferenceMode
|
|
|
|
|
// is forbidden. Ideally it would also be illegal inside InferenceMode.
|
|
|
|
|
// But there's no way that we can directly allocate a tensor to have
|
|
|
|
|
// requires_grad = true in C++ constructor so set_requires_grad is widely
|
|
|
|
|
// used in C++ frontend. Forbidding it inside InferenceMode will force users
|
|
|
|
|
// to delete these setter code in their code which is not ideal.
|
2019-10-31 18:18:47 +00:00
|
|
|
void TensorImpl::set_requires_grad(bool requires_grad) {
|
2021-05-01 04:22:23 +00:00
|
|
|
TORCH_CHECK(
|
2021-06-04 15:58:06 +00:00
|
|
|
!(requires_grad && is_inference() && !c10::InferenceMode::is_enabled()),
|
2021-05-01 04:22:23 +00:00
|
|
|
"Setting requires_grad=True on inference tensor outside InferenceMode is not allowed.");
|
|
|
|
|
if (!requires_grad && !autograd_meta_)
|
|
|
|
|
return;
|
|
|
|
|
if (!autograd_meta_)
|
|
|
|
|
autograd_meta_ = impl::GetAutogradMetaFactory()->make();
|
Null AutogradMeta optimization (#28610)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/28610
The basic idea is, in some cases where we stored a pointer to a full AutogradMeta object, instead store a nullptr. We let a nullptr represent a default-constructed AutogradMeta object, and simply populate it with a real AutogradMeta if there is ever a situation where we need to modify it.
The primary technical contrivance in this diff is I have to use AutogradMetaFactory to lazily initialize the AutogradMeta, as it is not available in the dynamic library that TensorImpl is in. (I spent a while trying to put them in the same compilation unit, but gave up in the end as it pushed us over the Windows linking binary size limit. Eep.)
Some other notes:
- `set_autograd_meta` now unconditionally turns a tensor into a variable. I audited all call sites and observed there are no occurrences where nullptr is passed (after this patch, there are now!)
- `copy_tensor_metadata` is updated to unconditionally preserve the VariableTensorId-ness of the destination tensor. I think this is the more correct semantics; we can't do the old semantics anymore.
- There's a bunch of places in the API where we return const references to objects. This is pretty weird to me, but I didn't feel like cleaning it up. But sometimes I don't conveniently have something that's the right lifetime, so I introduced a number of singletons to handle this correctly.
You might wonder why I'm doing the optimization before the variable-tensor dynamic merge. The reason is simple: this change is semantics preserving, while variable-tensor dynamic merge is not. So it is easier to get right, and prevents us from regressing performance if we do it the other way.
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Test Plan: Imported from OSS
Differential Revision: D18171162
Pulled By: ezyang
fbshipit-source-id: 580df729e4d04881b2b9caa0f0c00785b3afbb92
2019-10-31 18:18:47 +00:00
|
|
|
// NB: In principle, setting requires_grad to false could result in
|
|
|
|
|
// the AutogradMeta becoming equal to a default constructed state,
|
|
|
|
|
// in which case we could apply the nullptr AutogradMeta optimization
|
|
|
|
|
// (see autograd_meta_ docs). But we don't do this right now. Note
|
|
|
|
|
// that it is unsound to unconditionally set AutogradMeta to false
|
|
|
|
|
// when you set requires_grad to False, as there may be nontrivial
|
|
|
|
|
// information content in the other fields; for example, we may
|
|
|
|
|
// have set the string name for a Variable, or there may be hooks
|
|
|
|
|
// registered for it.
|
|
|
|
|
autograd_meta_->set_requires_grad(requires_grad, this);
|
2019-10-31 18:18:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool TensorImpl::requires_grad() const {
|
2021-05-01 04:22:23 +00:00
|
|
|
if (!autograd_meta_)
|
|
|
|
|
return false;
|
Null AutogradMeta optimization (#28610)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/28610
The basic idea is, in some cases where we stored a pointer to a full AutogradMeta object, instead store a nullptr. We let a nullptr represent a default-constructed AutogradMeta object, and simply populate it with a real AutogradMeta if there is ever a situation where we need to modify it.
The primary technical contrivance in this diff is I have to use AutogradMetaFactory to lazily initialize the AutogradMeta, as it is not available in the dynamic library that TensorImpl is in. (I spent a while trying to put them in the same compilation unit, but gave up in the end as it pushed us over the Windows linking binary size limit. Eep.)
Some other notes:
- `set_autograd_meta` now unconditionally turns a tensor into a variable. I audited all call sites and observed there are no occurrences where nullptr is passed (after this patch, there are now!)
- `copy_tensor_metadata` is updated to unconditionally preserve the VariableTensorId-ness of the destination tensor. I think this is the more correct semantics; we can't do the old semantics anymore.
- There's a bunch of places in the API where we return const references to objects. This is pretty weird to me, but I didn't feel like cleaning it up. But sometimes I don't conveniently have something that's the right lifetime, so I introduced a number of singletons to handle this correctly.
You might wonder why I'm doing the optimization before the variable-tensor dynamic merge. The reason is simple: this change is semantics preserving, while variable-tensor dynamic merge is not. So it is easier to get right, and prevents us from regressing performance if we do it the other way.
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Test Plan: Imported from OSS
Differential Revision: D18171162
Pulled By: ezyang
fbshipit-source-id: 580df729e4d04881b2b9caa0f0c00785b3afbb92
2019-10-31 18:18:47 +00:00
|
|
|
return autograd_meta_->requires_grad();
|
2019-10-31 18:18:47 +00:00
|
|
|
}
|
|
|
|
|
|
2021-05-01 04:22:23 +00:00
|
|
|
void TensorImpl::set_autograd_meta(
|
|
|
|
|
std::unique_ptr<c10::AutogradMetaInterface> autograd_meta) {
|
Null AutogradMeta optimization (#28610)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/28610
The basic idea is, in some cases where we stored a pointer to a full AutogradMeta object, instead store a nullptr. We let a nullptr represent a default-constructed AutogradMeta object, and simply populate it with a real AutogradMeta if there is ever a situation where we need to modify it.
The primary technical contrivance in this diff is I have to use AutogradMetaFactory to lazily initialize the AutogradMeta, as it is not available in the dynamic library that TensorImpl is in. (I spent a while trying to put them in the same compilation unit, but gave up in the end as it pushed us over the Windows linking binary size limit. Eep.)
Some other notes:
- `set_autograd_meta` now unconditionally turns a tensor into a variable. I audited all call sites and observed there are no occurrences where nullptr is passed (after this patch, there are now!)
- `copy_tensor_metadata` is updated to unconditionally preserve the VariableTensorId-ness of the destination tensor. I think this is the more correct semantics; we can't do the old semantics anymore.
- There's a bunch of places in the API where we return const references to objects. This is pretty weird to me, but I didn't feel like cleaning it up. But sometimes I don't conveniently have something that's the right lifetime, so I introduced a number of singletons to handle this correctly.
You might wonder why I'm doing the optimization before the variable-tensor dynamic merge. The reason is simple: this change is semantics preserving, while variable-tensor dynamic merge is not. So it is easier to get right, and prevents us from regressing performance if we do it the other way.
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Test Plan: Imported from OSS
Differential Revision: D18171162
Pulled By: ezyang
fbshipit-source-id: 580df729e4d04881b2b9caa0f0c00785b3afbb92
2019-10-31 18:18:47 +00:00
|
|
|
// NB: autograd_meta may be null! That just means it's the default
|
|
|
|
|
// constructor
|
2019-10-31 18:18:47 +00:00
|
|
|
autograd_meta_ = std::move(autograd_meta);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
c10::AutogradMetaInterface* TensorImpl::autograd_meta() const {
|
Null AutogradMeta optimization (#28610)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/28610
The basic idea is, in some cases where we stored a pointer to a full AutogradMeta object, instead store a nullptr. We let a nullptr represent a default-constructed AutogradMeta object, and simply populate it with a real AutogradMeta if there is ever a situation where we need to modify it.
The primary technical contrivance in this diff is I have to use AutogradMetaFactory to lazily initialize the AutogradMeta, as it is not available in the dynamic library that TensorImpl is in. (I spent a while trying to put them in the same compilation unit, but gave up in the end as it pushed us over the Windows linking binary size limit. Eep.)
Some other notes:
- `set_autograd_meta` now unconditionally turns a tensor into a variable. I audited all call sites and observed there are no occurrences where nullptr is passed (after this patch, there are now!)
- `copy_tensor_metadata` is updated to unconditionally preserve the VariableTensorId-ness of the destination tensor. I think this is the more correct semantics; we can't do the old semantics anymore.
- There's a bunch of places in the API where we return const references to objects. This is pretty weird to me, but I didn't feel like cleaning it up. But sometimes I don't conveniently have something that's the right lifetime, so I introduced a number of singletons to handle this correctly.
You might wonder why I'm doing the optimization before the variable-tensor dynamic merge. The reason is simple: this change is semantics preserving, while variable-tensor dynamic merge is not. So it is easier to get right, and prevents us from regressing performance if we do it the other way.
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Test Plan: Imported from OSS
Differential Revision: D18171162
Pulled By: ezyang
fbshipit-source-id: 580df729e4d04881b2b9caa0f0c00785b3afbb92
2019-10-31 18:18:47 +00:00
|
|
|
// NB: Might return null!
|
2019-10-31 18:18:47 +00:00
|
|
|
return autograd_meta_.get();
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-05 02:33:16 +00:00
|
|
|
c10::intrusive_ptr<TensorImpl> TensorImpl::shallow_copy_and_detach(
|
|
|
|
|
const c10::VariableVersion& version_counter,
|
|
|
|
|
bool allow_tensor_metadata_change) const {
|
Dispatch to Python via __torch_dispatch__ (#59760)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/59760
See https://github.com/pytorch/pytorch/issues/59049
There are some moving parts to this PR, I'll structure this explanation so the straightforward parts go first, and then the less straightforward parts.
**The actual dispatch to Python.** The core logic of dispatch to Python lives in `concrete_dispatch_fn` in `torch/csrc/autograd/python_variable.cpp`. It takes the input IValue stack, scans all the arguments for Tensor arguments, and defers most of the heavy lifting to `handle_torch_function_no_python_arg_parser` which actually does all of the logic for calling out to torch dispatch (in particular, this function handles multiple dispatch situations for you). Because we have a different function name than regular `__torch_function__` handling, `handle_torch_function_no_python_arg_parser` is generalized to accept a magic method name to look for when testing if Tensors have custom handling or not. Unlike `__torch_function__`, by default there is no `__torch_dispatch__` on Tensor classes.
**Maintaining the Python dispatch key.** In order to get to the dispatch to Python logic, we must tag Tensors with the `__torch_dispatch__` magic method with the newly added Python dispatch key (separated from PythonFuncTorch to allow for a transitional period while they migrate to this mechanism). We expose a new private property `_is_python_dispatch` that assists in debugging if a Tensor is participating in Python dispatch or not. We apply the Python dispatch key the first time a PyObject for a Tensor is constructed (THPVariable_NewWithVar), testing if `__torch_dispatch__` exists with then newly added `check_has_torch_dispatch`.
**Shallow copy and detach.** For the simple examples tested in this PR, most creations of Tensor route through the dispatcher. The exception to this is `shallow_copy_and_detach`, which bypasses the dispatcher and is used when saving tensors for backwards. When a Tensor is Python dispatch, we override the behavior of `shallow_copy_and_detach` to instead directly call into `__torch_dispatch__` to perform a `detach` operation (in the same way it would be invoked if you called `detach` directly). Because this Python call is triggered directly from c10::TensorImpl, it must be indirected through `PyInterpreter::detach`, which is the general mechanism for dynamic dispatching to the Python interpreter associated with a TensorImpl.
**torchdeploy compatibility.** The dispatch to Python logic cannot be directly registered to the dispatcher as it is compiled in the Python library, which will get loaded multiple times per torchdeploy interpreter. Thus, we must employ a two phase process. First, we register a fallback inside a non-Python library (aten/src/ATen/core/PythonFallbackKernel.cpp). Its job is to determine the appropriate PyInterpreter to handle the Python dispatch by going through all of the arguments and finding the first argument that has a PyObject/PyInterpreter. With this PyInterpreter, it makes another dynamic dispatch via "dispatch" which will go to the correct torchdeploy interpreter to handle dispatching to actual Python.
**Testing.** We provide a simple example of a LoggingTensor for testing, which can be used to generate TorchScript-like traces to observe what operations are being called when a Tensor is invoked. Although a LoggingTensor would be better implemented via an is-a relationship rather than a has-a relationship (as is done in the test), we've done it this way to show that arbitrarily complex compositions of tensors inside a tensor work properly.
**Known limitations.**
* We haven't adjusted any operator code, so some patterns may not work (as they lose the Python subclass in an unrecoverable way)
* `__torch_function__` must be explicitly disabled with `_disabled_torch_function_impl` otherwise things don't work quite correctly (in particular, what is being disabled is default subclass preservation behavior.)
* We don't ever populate kwargs, even when an argument is kwarg-only
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Differential Revision:
D29017912
D29017912
Test Plan: Imported from OSS
Reviewed By: bdhirsh
Pulled By: ezyang
fbshipit-source-id: a67714d9e541d09203a8cfc85345b8967db86238
2021-06-25 18:49:20 +00:00
|
|
|
if (key_set_.has(DispatchKey::Python) &&
|
|
|
|
|
!c10::impl::tls_is_dispatch_key_excluded(DispatchKey::Python)) {
|
|
|
|
|
auto r = pyobj_interpreter_.load(std::memory_order_acquire)->detach(this);
|
|
|
|
|
if (r) {
|
|
|
|
|
r->set_version_counter(version_counter);
|
|
|
|
|
r->set_allow_tensor_metadata_change(allow_tensor_metadata_change);
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
// otherwise just copy the TensorImpl and not the PyObject. Since
|
|
|
|
|
// the interpreter is dead no one can call us out on it
|
|
|
|
|
}
|
2020-12-05 02:33:16 +00:00
|
|
|
auto impl = c10::make_intrusive<TensorImpl>(
|
2020-12-10 01:43:51 +00:00
|
|
|
// No need to populate Storage; copy_tensor_metadata will do it for us.
|
2021-05-01 04:22:23 +00:00
|
|
|
key_set_,
|
|
|
|
|
data_type_,
|
|
|
|
|
device_opt_);
|
2020-12-05 02:33:16 +00:00
|
|
|
copy_tensor_metadata(
|
|
|
|
|
/*src_impl=*/this,
|
|
|
|
|
/*dest_impl=*/impl.get(),
|
|
|
|
|
/*version_counter=*/version_counter,
|
|
|
|
|
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
|
|
|
|
|
impl->refresh_numel();
|
|
|
|
|
impl->refresh_contiguous();
|
|
|
|
|
return impl;
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-05 02:33:16 +00:00
|
|
|
c10::intrusive_ptr<TensorImpl> TensorImpl::shallow_copy_and_detach(
|
|
|
|
|
c10::VariableVersion&& version_counter,
|
|
|
|
|
bool allow_tensor_metadata_change) const {
|
Dispatch to Python via __torch_dispatch__ (#59760)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/59760
See https://github.com/pytorch/pytorch/issues/59049
There are some moving parts to this PR, I'll structure this explanation so the straightforward parts go first, and then the less straightforward parts.
**The actual dispatch to Python.** The core logic of dispatch to Python lives in `concrete_dispatch_fn` in `torch/csrc/autograd/python_variable.cpp`. It takes the input IValue stack, scans all the arguments for Tensor arguments, and defers most of the heavy lifting to `handle_torch_function_no_python_arg_parser` which actually does all of the logic for calling out to torch dispatch (in particular, this function handles multiple dispatch situations for you). Because we have a different function name than regular `__torch_function__` handling, `handle_torch_function_no_python_arg_parser` is generalized to accept a magic method name to look for when testing if Tensors have custom handling or not. Unlike `__torch_function__`, by default there is no `__torch_dispatch__` on Tensor classes.
**Maintaining the Python dispatch key.** In order to get to the dispatch to Python logic, we must tag Tensors with the `__torch_dispatch__` magic method with the newly added Python dispatch key (separated from PythonFuncTorch to allow for a transitional period while they migrate to this mechanism). We expose a new private property `_is_python_dispatch` that assists in debugging if a Tensor is participating in Python dispatch or not. We apply the Python dispatch key the first time a PyObject for a Tensor is constructed (THPVariable_NewWithVar), testing if `__torch_dispatch__` exists with then newly added `check_has_torch_dispatch`.
**Shallow copy and detach.** For the simple examples tested in this PR, most creations of Tensor route through the dispatcher. The exception to this is `shallow_copy_and_detach`, which bypasses the dispatcher and is used when saving tensors for backwards. When a Tensor is Python dispatch, we override the behavior of `shallow_copy_and_detach` to instead directly call into `__torch_dispatch__` to perform a `detach` operation (in the same way it would be invoked if you called `detach` directly). Because this Python call is triggered directly from c10::TensorImpl, it must be indirected through `PyInterpreter::detach`, which is the general mechanism for dynamic dispatching to the Python interpreter associated with a TensorImpl.
**torchdeploy compatibility.** The dispatch to Python logic cannot be directly registered to the dispatcher as it is compiled in the Python library, which will get loaded multiple times per torchdeploy interpreter. Thus, we must employ a two phase process. First, we register a fallback inside a non-Python library (aten/src/ATen/core/PythonFallbackKernel.cpp). Its job is to determine the appropriate PyInterpreter to handle the Python dispatch by going through all of the arguments and finding the first argument that has a PyObject/PyInterpreter. With this PyInterpreter, it makes another dynamic dispatch via "dispatch" which will go to the correct torchdeploy interpreter to handle dispatching to actual Python.
**Testing.** We provide a simple example of a LoggingTensor for testing, which can be used to generate TorchScript-like traces to observe what operations are being called when a Tensor is invoked. Although a LoggingTensor would be better implemented via an is-a relationship rather than a has-a relationship (as is done in the test), we've done it this way to show that arbitrarily complex compositions of tensors inside a tensor work properly.
**Known limitations.**
* We haven't adjusted any operator code, so some patterns may not work (as they lose the Python subclass in an unrecoverable way)
* `__torch_function__` must be explicitly disabled with `_disabled_torch_function_impl` otherwise things don't work quite correctly (in particular, what is being disabled is default subclass preservation behavior.)
* We don't ever populate kwargs, even when an argument is kwarg-only
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Differential Revision:
D29017912
D29017912
Test Plan: Imported from OSS
Reviewed By: bdhirsh
Pulled By: ezyang
fbshipit-source-id: a67714d9e541d09203a8cfc85345b8967db86238
2021-06-25 18:49:20 +00:00
|
|
|
if (key_set_.has(DispatchKey::Python) &&
|
|
|
|
|
!c10::impl::tls_is_dispatch_key_excluded(DispatchKey::Python)) {
|
|
|
|
|
auto r = pyobj_interpreter_.load(std::memory_order_acquire)->detach(this);
|
|
|
|
|
if (r) {
|
|
|
|
|
r->set_version_counter(std::move(version_counter));
|
|
|
|
|
r->set_allow_tensor_metadata_change(allow_tensor_metadata_change);
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
// otherwise just copy the TensorImpl and not the PyObject. Since
|
|
|
|
|
// the interpreter is dead no one can call us out on it
|
|
|
|
|
}
|
2020-12-05 02:33:16 +00:00
|
|
|
auto impl = c10::make_intrusive<TensorImpl>(
|
2020-12-10 01:43:51 +00:00
|
|
|
// No need to populate Storage; copy_tensor_metadata will do it for us.
|
2021-05-01 04:22:23 +00:00
|
|
|
key_set_,
|
|
|
|
|
data_type_,
|
|
|
|
|
device_opt_);
|
2020-12-05 02:33:16 +00:00
|
|
|
copy_tensor_metadata(
|
|
|
|
|
/*src_impl=*/this,
|
|
|
|
|
/*dest_impl=*/impl.get(),
|
|
|
|
|
/*version_counter=*/std::move(version_counter),
|
|
|
|
|
/*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
|
|
|
|
|
impl->refresh_numel();
|
|
|
|
|
impl->refresh_contiguous();
|
|
|
|
|
return impl;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void TensorImpl::copy_tensor_metadata_except_version_counter(
|
2019-10-31 18:18:47 +00:00
|
|
|
const TensorImpl* src_impl,
|
|
|
|
|
TensorImpl* dest_impl,
|
|
|
|
|
bool allow_tensor_metadata_change) {
|
|
|
|
|
dest_impl->storage_ = src_impl->storage_;
|
2021-01-08 04:54:20 +00:00
|
|
|
dest_impl->sizes_and_strides_ = src_impl->sizes_and_strides_;
|
2019-10-31 18:18:47 +00:00
|
|
|
dest_impl->storage_offset_ = src_impl->storage_offset_;
|
|
|
|
|
dest_impl->data_type_ = src_impl->data_type_;
|
|
|
|
|
dest_impl->device_opt_ = src_impl->device_opt_;
|
Dispatch to Python via __torch_dispatch__ (#59760)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/59760
See https://github.com/pytorch/pytorch/issues/59049
There are some moving parts to this PR, I'll structure this explanation so the straightforward parts go first, and then the less straightforward parts.
**The actual dispatch to Python.** The core logic of dispatch to Python lives in `concrete_dispatch_fn` in `torch/csrc/autograd/python_variable.cpp`. It takes the input IValue stack, scans all the arguments for Tensor arguments, and defers most of the heavy lifting to `handle_torch_function_no_python_arg_parser` which actually does all of the logic for calling out to torch dispatch (in particular, this function handles multiple dispatch situations for you). Because we have a different function name than regular `__torch_function__` handling, `handle_torch_function_no_python_arg_parser` is generalized to accept a magic method name to look for when testing if Tensors have custom handling or not. Unlike `__torch_function__`, by default there is no `__torch_dispatch__` on Tensor classes.
**Maintaining the Python dispatch key.** In order to get to the dispatch to Python logic, we must tag Tensors with the `__torch_dispatch__` magic method with the newly added Python dispatch key (separated from PythonFuncTorch to allow for a transitional period while they migrate to this mechanism). We expose a new private property `_is_python_dispatch` that assists in debugging if a Tensor is participating in Python dispatch or not. We apply the Python dispatch key the first time a PyObject for a Tensor is constructed (THPVariable_NewWithVar), testing if `__torch_dispatch__` exists with then newly added `check_has_torch_dispatch`.
**Shallow copy and detach.** For the simple examples tested in this PR, most creations of Tensor route through the dispatcher. The exception to this is `shallow_copy_and_detach`, which bypasses the dispatcher and is used when saving tensors for backwards. When a Tensor is Python dispatch, we override the behavior of `shallow_copy_and_detach` to instead directly call into `__torch_dispatch__` to perform a `detach` operation (in the same way it would be invoked if you called `detach` directly). Because this Python call is triggered directly from c10::TensorImpl, it must be indirected through `PyInterpreter::detach`, which is the general mechanism for dynamic dispatching to the Python interpreter associated with a TensorImpl.
**torchdeploy compatibility.** The dispatch to Python logic cannot be directly registered to the dispatcher as it is compiled in the Python library, which will get loaded multiple times per torchdeploy interpreter. Thus, we must employ a two phase process. First, we register a fallback inside a non-Python library (aten/src/ATen/core/PythonFallbackKernel.cpp). Its job is to determine the appropriate PyInterpreter to handle the Python dispatch by going through all of the arguments and finding the first argument that has a PyObject/PyInterpreter. With this PyInterpreter, it makes another dynamic dispatch via "dispatch" which will go to the correct torchdeploy interpreter to handle dispatching to actual Python.
**Testing.** We provide a simple example of a LoggingTensor for testing, which can be used to generate TorchScript-like traces to observe what operations are being called when a Tensor is invoked. Although a LoggingTensor would be better implemented via an is-a relationship rather than a has-a relationship (as is done in the test), we've done it this way to show that arbitrarily complex compositions of tensors inside a tensor work properly.
**Known limitations.**
* We haven't adjusted any operator code, so some patterns may not work (as they lose the Python subclass in an unrecoverable way)
* `__torch_function__` must be explicitly disabled with `_disabled_torch_function_impl` otherwise things don't work quite correctly (in particular, what is being disabled is default subclass preservation behavior.)
* We don't ever populate kwargs, even when an argument is kwarg-only
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Differential Revision:
D29017912
D29017912
Test Plan: Imported from OSS
Reviewed By: bdhirsh
Pulled By: ezyang
fbshipit-source-id: a67714d9e541d09203a8cfc85345b8967db86238
2021-06-25 18:49:20 +00:00
|
|
|
dest_impl->key_set_ = src_impl->key_set_.remove(DispatchKey::Python);
|
2019-10-31 18:18:47 +00:00
|
|
|
dest_impl->is_contiguous_ = src_impl->is_contiguous_;
|
2021-04-08 01:19:27 +00:00
|
|
|
dest_impl->has_contiguity_ = src_impl->has_contiguity_;
|
2021-05-01 04:22:23 +00:00
|
|
|
dest_impl->is_channels_last_contiguous_ =
|
|
|
|
|
src_impl->is_channels_last_contiguous_;
|
|
|
|
|
dest_impl->is_channels_last_3d_contiguous_ =
|
|
|
|
|
src_impl->is_channels_last_3d_contiguous_;
|
2019-10-31 18:18:47 +00:00
|
|
|
dest_impl->is_channels_last_ = src_impl->is_channels_last_;
|
2020-03-06 13:59:20 +00:00
|
|
|
dest_impl->is_channels_last_3d_ = src_impl->is_channels_last_3d_;
|
2021-05-01 04:22:23 +00:00
|
|
|
dest_impl->is_non_overlapping_and_dense_ =
|
|
|
|
|
src_impl->is_non_overlapping_and_dense_;
|
2019-10-31 18:18:47 +00:00
|
|
|
dest_impl->is_wrapped_number_ = src_impl->is_wrapped_number_;
|
|
|
|
|
dest_impl->reserved_ = src_impl->reserved_;
|
|
|
|
|
dest_impl->set_allow_tensor_metadata_change(allow_tensor_metadata_change);
|
2021-05-01 04:22:23 +00:00
|
|
|
dest_impl->storage_access_should_throw_ =
|
|
|
|
|
src_impl->storage_access_should_throw_;
|
2019-10-31 18:18:47 +00:00
|
|
|
if (src_impl->named_tensor_meta_ != nullptr) {
|
|
|
|
|
dest_impl->named_tensor_meta_ = src_impl->named_tensor_meta_->clone();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-05 02:33:16 +00:00
|
|
|
void TensorImpl::copy_tensor_metadata(
|
|
|
|
|
const TensorImpl* src_impl,
|
|
|
|
|
TensorImpl* dest_impl,
|
|
|
|
|
const c10::VariableVersion& version_counter,
|
|
|
|
|
bool allow_tensor_metadata_change) {
|
2021-05-01 04:22:23 +00:00
|
|
|
copy_tensor_metadata_except_version_counter(
|
|
|
|
|
src_impl, dest_impl, allow_tensor_metadata_change);
|
2021-04-09 21:39:14 +00:00
|
|
|
// TODO: In the ideal end state, it's okay to set disabled version_counter
|
2021-05-01 04:22:23 +00:00
|
|
|
// on inference tensor since it's a no-op. This requires refactor on call
|
|
|
|
|
// sites.
|
2021-06-04 15:58:06 +00:00
|
|
|
if (!dest_impl->is_inference()) {
|
2021-04-09 21:39:14 +00:00
|
|
|
dest_impl->set_version_counter(version_counter);
|
|
|
|
|
}
|
2020-12-05 02:33:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void TensorImpl::copy_tensor_metadata(
|
|
|
|
|
const TensorImpl* src_impl,
|
|
|
|
|
TensorImpl* dest_impl,
|
|
|
|
|
c10::VariableVersion&& version_counter,
|
|
|
|
|
bool allow_tensor_metadata_change) {
|
2021-05-01 04:22:23 +00:00
|
|
|
copy_tensor_metadata_except_version_counter(
|
|
|
|
|
src_impl, dest_impl, allow_tensor_metadata_change);
|
2021-06-04 15:58:06 +00:00
|
|
|
if (!dest_impl->is_inference()) {
|
2021-04-09 21:39:14 +00:00
|
|
|
dest_impl->set_version_counter(std::move(version_counter));
|
|
|
|
|
}
|
2020-12-05 02:33:16 +00:00
|
|
|
}
|
|
|
|
|
|
2019-10-31 18:18:47 +00:00
|
|
|
namespace impl {
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
AutogradMetaFactory* meta_factory = nullptr;
|
2021-05-01 04:22:23 +00:00
|
|
|
} // namespace
|
2019-10-31 18:18:47 +00:00
|
|
|
|
|
|
|
|
void SetAutogradMetaFactory(AutogradMetaFactory* factory) {
|
|
|
|
|
meta_factory = factory;
|
|
|
|
|
}
|
|
|
|
|
AutogradMetaFactory* GetAutogradMetaFactory() {
|
2021-05-01 04:22:23 +00:00
|
|
|
TORCH_CHECK(
|
|
|
|
|
meta_factory,
|
|
|
|
|
"Support for autograd has not been loaded; have you linked against libtorch.so?")
|
2019-10-31 18:18:47 +00:00
|
|
|
return meta_factory;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace impl
|
|
|
|
|
|
2018-12-12 04:40:32 +00:00
|
|
|
} // namespace c10
|