Canonicalize all includes in PyTorch. (#14849)
Summary:
Anywhere we used #include "foo.h", we now say #include <foo.h>
Paths are adjusted to be rooted out of aten/src, torch/lib, or
the root level directory.
I modified CMakeLists.txt by hand to remove TH and THC from
the include paths.
I used the following script to do the canonicalization:
```
import subprocess
import re
import os.path
files = subprocess.check_output(['git', 'ls-files']).decode('utf-8').rstrip().split('\n')
for fn in files:
if not any(fn.endswith(suff) for suff in ['.cu', '.cpp', '.in', '.h', '.hpp', '.cu', '.cuh', '.cc']):
continue
if not any(fn.startswith(pref) for pref in ["aten/", "torch/"]):
continue
with open(fn, 'r') as f:
c = f.read()
def fmt(p):
return "#include <{}>".format(p)
def repl(m):
p = m.group(1)
if p in ["dlfcn.h", "unistd.h", "nvrtc.h", "cuda.h", "cuda_runtime.h", "cstdint", "cudnn.h", "Python.h", "cusparse.h", "cuda_runtime_api.h", "cuda_fp16.h", "cublas_v2.h", "stdint.h", "curand_kernel.h"]:
return fmt(p)
if any(p.startswith(pref) for pref in ["torch/csrc", "c10/", "ATen/", "caffe2/", "TH/", "THC/", "Eigen/", "gtest/", "zdl/", "gloo/", "onnx/", "miopen/"]):
return fmt(p)
for root in ["aten/src", "torch/lib", ""]:
for bad_root in [os.path.dirname(fn), "aten/src/TH", "aten/src/THC", "torch/csrc"]:
new_p = os.path.relpath(os.path.join(bad_root, p), root)
if not new_p.startswith("../") and (os.path.exists(os.path.join(root, new_p)) or os.path.exists(os.path.join(root, new_p + ".in"))):
return fmt(new_p)
print("ERROR: ", fn, p)
return m.group(0)
new_c = re.sub(r'#include "([^"]+)"', repl, c)
if new_c != c:
print(fn)
with open(fn, 'w') as f:
f.write(new_c)
```
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14849
Reviewed By: dzhulgakov
Differential Revision: D13363445
Pulled By: ezyang
fbshipit-source-id: 52361f878a672785f9306c9e9ab2513128092b68
2018-12-09 03:32:01 +00:00
|
|
|
#include <torch/csrc/tensor/python_tensor.h>
|
2018-02-23 23:03:31 +00:00
|
|
|
|
|
|
|
|
#include <structmember.h>
|
|
|
|
|
#include <pybind11/pybind11.h>
|
|
|
|
|
|
Canonicalize all includes in PyTorch. (#14849)
Summary:
Anywhere we used #include "foo.h", we now say #include <foo.h>
Paths are adjusted to be rooted out of aten/src, torch/lib, or
the root level directory.
I modified CMakeLists.txt by hand to remove TH and THC from
the include paths.
I used the following script to do the canonicalization:
```
import subprocess
import re
import os.path
files = subprocess.check_output(['git', 'ls-files']).decode('utf-8').rstrip().split('\n')
for fn in files:
if not any(fn.endswith(suff) for suff in ['.cu', '.cpp', '.in', '.h', '.hpp', '.cu', '.cuh', '.cc']):
continue
if not any(fn.startswith(pref) for pref in ["aten/", "torch/"]):
continue
with open(fn, 'r') as f:
c = f.read()
def fmt(p):
return "#include <{}>".format(p)
def repl(m):
p = m.group(1)
if p in ["dlfcn.h", "unistd.h", "nvrtc.h", "cuda.h", "cuda_runtime.h", "cstdint", "cudnn.h", "Python.h", "cusparse.h", "cuda_runtime_api.h", "cuda_fp16.h", "cublas_v2.h", "stdint.h", "curand_kernel.h"]:
return fmt(p)
if any(p.startswith(pref) for pref in ["torch/csrc", "c10/", "ATen/", "caffe2/", "TH/", "THC/", "Eigen/", "gtest/", "zdl/", "gloo/", "onnx/", "miopen/"]):
return fmt(p)
for root in ["aten/src", "torch/lib", ""]:
for bad_root in [os.path.dirname(fn), "aten/src/TH", "aten/src/THC", "torch/csrc"]:
new_p = os.path.relpath(os.path.join(bad_root, p), root)
if not new_p.startswith("../") and (os.path.exists(os.path.join(root, new_p)) or os.path.exists(os.path.join(root, new_p + ".in"))):
return fmt(new_p)
print("ERROR: ", fn, p)
return m.group(0)
new_c = re.sub(r'#include "([^"]+)"', repl, c)
if new_c != c:
print(fn)
with open(fn, 'w') as f:
f.write(new_c)
```
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14849
Reviewed By: dzhulgakov
Differential Revision: D13363445
Pulled By: ezyang
fbshipit-source-id: 52361f878a672785f9306c9e9ab2513128092b68
2018-12-09 03:32:01 +00:00
|
|
|
#include <torch/csrc/Dtype.h>
|
|
|
|
|
#include <torch/csrc/DynamicTypes.h>
|
|
|
|
|
#include <torch/csrc/Exceptions.h>
|
|
|
|
|
#include <torch/csrc/Layout.h>
|
|
|
|
|
#include <torch/csrc/autograd/variable.h>
|
|
|
|
|
#include <torch/csrc/autograd/python_variable.h>
|
|
|
|
|
#include <torch/csrc/autograd/generated/VariableType.h>
|
|
|
|
|
#include <torch/csrc/autograd/utils/wrap_outputs.h>
|
|
|
|
|
#include <torch/csrc/utils/cuda_enabled.h>
|
|
|
|
|
#include <torch/csrc/utils/cuda_lazy_init.h>
|
|
|
|
|
#include <torch/csrc/utils/python_strings.h>
|
|
|
|
|
#include <torch/csrc/utils/tensor_new.h>
|
|
|
|
|
#include <torch/csrc/utils/tensor_types.h>
|
2018-02-23 23:03:31 +00:00
|
|
|
|
2018-06-16 07:40:35 +00:00
|
|
|
#include <ATen/ATen.h>
|
|
|
|
|
|
2018-05-04 15:04:57 +00:00
|
|
|
#include <sstream>
|
2018-06-16 07:40:35 +00:00
|
|
|
#include <string>
|
|
|
|
|
#include <type_traits>
|
2018-05-04 15:04:57 +00:00
|
|
|
#include <vector>
|
|
|
|
|
|
2018-06-26 04:11:49 +00:00
|
|
|
namespace torch { namespace tensors {
|
2018-02-23 23:03:31 +00:00
|
|
|
|
|
|
|
|
using namespace at;
|
|
|
|
|
using namespace torch::autograd;
|
|
|
|
|
|
|
|
|
|
struct PyTensorType {
|
|
|
|
|
PyTypeObject py_type;
|
Introduce torch.layout and split layout from dtypes. (#6145)
* Introduce torch.layout and split layout from dtypes.
Tensors (and tensor types) now have a 'layout' attribute that returns either 'torch.strided' or 'torch.sparse_coo'.
Previously, dtypes were 1-to-1 with ATen types/PyTensorTypes; the impetus behind this decision was to make things easy in the common case
(i.e. specifying a type in a factory function). But this doesn't really follow for sparity, which isn't a common case.
It also doesn't properly represent the concept or a dtype, which in numpy are proper scalar types (i.e. roughly the type returned from indexing the
last dimension of an n-d array). But this should be the same whether or not the tensor is represented via strides, sparsity, etc.
This is accomplished by:
1) having the dtype of tensor return the (device-type, scalar-type) combination, i.e. torch.cuda.float32, so both
torch.cuda.FloatTensor and torch.cuda.sparse.FloatTensor have the same dtype
2) Adding a layout parameter to python functions, where the combination of (dtype, layout) maps to an ATen type that is used for dispatch.
* Formatting, make init throw python_error.
* Fix cuda not enabled error message.
* Fix test.
2018-04-02 18:07:50 +00:00
|
|
|
THPDtype* dtype;
|
|
|
|
|
THPLayout* layout;
|
2018-04-12 18:05:44 +00:00
|
|
|
bool is_cuda;
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,modernize-avoid-c-arrays)
|
2018-02-23 23:03:31 +00:00
|
|
|
char name[64];
|
Split libATen.so into libATen_cpu.so and libATen_cuda.so (#7275)
* Split libATen.so into libATen_cpu.so and libATen_cuda.so
Previously, ATen could be built with either CPU-only support, or
CPU/CUDA support, but only via a compile-time flag, requiring
two separate builds. This means that if you have a program which
indirectly uses a CPU-only build of ATen, and a CPU/CUDA-build of
ATen, you're gonna have a bad time. And you might want a CPU-only
build of ATen, because it is 15M (versus the 300M of a CUDA build).
This commit splits libATen.so into two libraries, CPU/CUDA, so
that it's not necessary to do a full rebuild to get CPU-only
support; instead, if you link against libATen_cpu.so only, you
are CPU-only; if you additionally link/dlopen libATen_cuda.so,
this enables CUDA support. This brings ATen's dynamic library
structure more similar to Caffe2's. libATen.so is no more
(this is BC BREAKING)
The general principle for how this works is that we introduce
a *hooks* interface, which introduces a dynamic dispatch indirection
between a call site and implementation site of CUDA functionality,
mediated by a static initialization registry. This means that we can continue
to, for example, lazily initialize CUDA from Context (a core, CPU class) without
having a direct dependency on the CUDA bits. Instead, we look up
in the registry if, e.g., CUDA hooks have been loaded (this loading
process happens at static initialization time), and if they
have been we dynamic dispatch to this class. We similarly use
the hooks interface to handle Variable registration.
We introduce a new invariant: if the backend of a type has not
been initialized (e.g., it's library has not been dlopened; for
CUDA, this also includes CUDA initialization), then the Type
pointers in the context registry are NULL. If you access the
registry directly you must maintain this invariant.
There are a few potholes along the way. I document them here:
- Previously, PyTorch maintained a separate registry for variable
types, because no provision for them was made in the Context's
type_registry. Now that we have the hooks mechanism, we can easily
have PyTorch register variables in the main registry. The code
has been refactored accordingly.
- There is a subtle ordering issue between Variable and CUDA.
We permit libATen_cuda.so and PyTorch to be loaded in either
order (in practice, CUDA is always loaded "after" PyTorch, because
it is lazily initialized.) This means that, when CUDA types are
loaded, we must subsequently also initialize their Variable equivalents.
Appropriate hooks were added to VariableHooks to make this possible;
similarly, getVariableHooks() is not referentially transparent, and
will change behavior after Variables are loaded. (This is different
to CUDAHooks, which is "burned in" after you try to initialize CUDA.)
- The cmake is adjusted to separate dependencies into either CPU
or CUDA dependencies. The generator scripts are adjusted to either
generate a file as a CUDA (cuda_file_manager) or CPU file (file_manager).
- I changed all native functions which were CUDA-only (the cudnn functions)
to have dispatches for CUDA only (making it permissible to not specify
all dispatch options.) This uncovered a bug in how we were handling
native functions which dispatch on a Type argument; I introduced a new
self_ty keyword to handle this case. I'm not 100% happy about it
but it fixed my problem.
This also exposed the fact that set_history incompletely handles
heterogenous return tuples combining Tensor and TensorList. I
swapped this codegen to use flatten() (at the possible cost of
a slight perf regression, since we're allocating another vector now
in this code path).
- thc_state is no longer a public member of Context; use getTHCState() instead
- This PR comes with Registry from Caffe2, for handling static initialization.
I needed to make a bunch of fixes to Registry to make it more portable
- No more ##__VA_ARGS__ token pasting; instead, it is mandatory to pass at
least one argument to the var-args. CUDAHooks and VariableHooks pass a nullary
struct CUDAHooksArgs/VariableHooksArgs to solve the problem. We must get rid of
token pasting because it does not work with MSVC.
- It seems MSVC is not willing to generate code for constructors of template
classes at use sites which cross DLL boundaries. So we explicitly instantiate
the class to get around the problem. This involved tweaks to the boilerplate
generating macros, and also required us to shuffle around namespaces a bit,
because you can't specialize a template unless you are in the same namespace as
the template.
- Insertion of AT_API to appropriate places where the registry must be exported
- We have a general problem which is that on recent Ubuntu distributions,
--as-needed is enabled for shared libraries, which is (cc @apaszke who was
worrying about this in #7160 see also #7160 (comment)). For now, I've hacked
this up in the PR to pass -Wl,--no-as-needed to all of the spots necessary to
make CI work, but a more sustainable solution is to attempt to dlopen
libATen_cuda.so when CUDA functionality is requested.
- The JIT tests somehow manage to try to touch CUDA without loading libATen_cuda.so. So
we pass -Wl,--no-as-needed when linking libATen_cuda.so to _C.so
- There is a very subtle linking issue with lapack, which is solved by making sure libATen_cuda.so links against LAPACK. There's a comment in aten/src/ATen/CMakeLists.txt about htis as well as a follow up bug at #7353
- autogradpp used AT_CUDA_ENABLED directly. We've expunged these uses and added
a few more things to CUDAHooks (getNumGPUs)
- Added manualSeedAll to Generator so that we can invoke it polymorphically (it
only does something different for CUDAGenerator)
- There's a new cuda/CUDAConfig.h header for CUDA-only ifdef macros (AT_CUDNN_ENABLED, most prominently)
- CUDAHooks/VariableHooks structs live in at namespace because Registry's
namespace support is not good enough to handle it otherwise (see Registry
changes above)
- There's some modest moving around of native functions in ReduceOps and
UnaryOps to get the CUDA-only function implementations into separate files, so
they are only compiled into libATen_cuda.so. sspaddmm needed a separate CUDA
function due to object linkage boundaries.
- Some direct uses of native functions in CUDA code has to go away, since these
functions are not exported, so you have to go through the dispatcher
(at::native::empty_like to at::empty_like)
- Code in THC/THCS/THCUNN now properly use THC_API macro instead of TH_API
(which matters now that TH and THC are not in the same library)
- Added code debt in torch/_thnn/utils.py and other THNN parsing code to handle
both TH_API and THC_API
- TensorUtils.h is now properly exported with AT_API
- Dead uses of TH_EXPORTS and co expunged; we now use ATen_cpu_exports and
ATen_cuda_exports (new, in ATenCUDAGeneral.h) consistently
- Fix some incorrect type annotations on _cudnn_rnn_backward, where we didn't
declare a type as possibly undefined when we should have. We didn't catch this
previously because optional annotations are not tested on "pass-through" native
ATen ops (which don't have dispatch). Upstream issue at #7316
- There's a new cmake macro aten_compile_options for applying all of our
per-target compile time options. We use this on the cpu and cuda libraries.
- test/test_cpp_extensions.py can be run directly by invoking in Python,
assuming you've setup your PYTHONPATH setup correctly
- type_from_string does some new funny business to only query for all valid CUDA
types (which causes CUDA initialization) when we see "torch.cuda." in the
requested string
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Last mile libtorch fixes
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* pedantic fix
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2018-05-10 17:28:33 +00:00
|
|
|
int backend;
|
|
|
|
|
int scalar_type;
|
|
|
|
|
|
2019-06-30 11:07:46 +00:00
|
|
|
Backend get_backend() const {
|
|
|
|
|
return static_cast<Backend>(backend);
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-15 19:12:17 +00:00
|
|
|
DispatchKey get_dispatch_key() const {
|
|
|
|
|
return backendToDispatchKey(static_cast<Backend>(backend));
|
2019-06-30 11:07:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ScalarType get_scalar_type() const {
|
|
|
|
|
return static_cast<ScalarType>(scalar_type);
|
Split libATen.so into libATen_cpu.so and libATen_cuda.so (#7275)
* Split libATen.so into libATen_cpu.so and libATen_cuda.so
Previously, ATen could be built with either CPU-only support, or
CPU/CUDA support, but only via a compile-time flag, requiring
two separate builds. This means that if you have a program which
indirectly uses a CPU-only build of ATen, and a CPU/CUDA-build of
ATen, you're gonna have a bad time. And you might want a CPU-only
build of ATen, because it is 15M (versus the 300M of a CUDA build).
This commit splits libATen.so into two libraries, CPU/CUDA, so
that it's not necessary to do a full rebuild to get CPU-only
support; instead, if you link against libATen_cpu.so only, you
are CPU-only; if you additionally link/dlopen libATen_cuda.so,
this enables CUDA support. This brings ATen's dynamic library
structure more similar to Caffe2's. libATen.so is no more
(this is BC BREAKING)
The general principle for how this works is that we introduce
a *hooks* interface, which introduces a dynamic dispatch indirection
between a call site and implementation site of CUDA functionality,
mediated by a static initialization registry. This means that we can continue
to, for example, lazily initialize CUDA from Context (a core, CPU class) without
having a direct dependency on the CUDA bits. Instead, we look up
in the registry if, e.g., CUDA hooks have been loaded (this loading
process happens at static initialization time), and if they
have been we dynamic dispatch to this class. We similarly use
the hooks interface to handle Variable registration.
We introduce a new invariant: if the backend of a type has not
been initialized (e.g., it's library has not been dlopened; for
CUDA, this also includes CUDA initialization), then the Type
pointers in the context registry are NULL. If you access the
registry directly you must maintain this invariant.
There are a few potholes along the way. I document them here:
- Previously, PyTorch maintained a separate registry for variable
types, because no provision for them was made in the Context's
type_registry. Now that we have the hooks mechanism, we can easily
have PyTorch register variables in the main registry. The code
has been refactored accordingly.
- There is a subtle ordering issue between Variable and CUDA.
We permit libATen_cuda.so and PyTorch to be loaded in either
order (in practice, CUDA is always loaded "after" PyTorch, because
it is lazily initialized.) This means that, when CUDA types are
loaded, we must subsequently also initialize their Variable equivalents.
Appropriate hooks were added to VariableHooks to make this possible;
similarly, getVariableHooks() is not referentially transparent, and
will change behavior after Variables are loaded. (This is different
to CUDAHooks, which is "burned in" after you try to initialize CUDA.)
- The cmake is adjusted to separate dependencies into either CPU
or CUDA dependencies. The generator scripts are adjusted to either
generate a file as a CUDA (cuda_file_manager) or CPU file (file_manager).
- I changed all native functions which were CUDA-only (the cudnn functions)
to have dispatches for CUDA only (making it permissible to not specify
all dispatch options.) This uncovered a bug in how we were handling
native functions which dispatch on a Type argument; I introduced a new
self_ty keyword to handle this case. I'm not 100% happy about it
but it fixed my problem.
This also exposed the fact that set_history incompletely handles
heterogenous return tuples combining Tensor and TensorList. I
swapped this codegen to use flatten() (at the possible cost of
a slight perf regression, since we're allocating another vector now
in this code path).
- thc_state is no longer a public member of Context; use getTHCState() instead
- This PR comes with Registry from Caffe2, for handling static initialization.
I needed to make a bunch of fixes to Registry to make it more portable
- No more ##__VA_ARGS__ token pasting; instead, it is mandatory to pass at
least one argument to the var-args. CUDAHooks and VariableHooks pass a nullary
struct CUDAHooksArgs/VariableHooksArgs to solve the problem. We must get rid of
token pasting because it does not work with MSVC.
- It seems MSVC is not willing to generate code for constructors of template
classes at use sites which cross DLL boundaries. So we explicitly instantiate
the class to get around the problem. This involved tweaks to the boilerplate
generating macros, and also required us to shuffle around namespaces a bit,
because you can't specialize a template unless you are in the same namespace as
the template.
- Insertion of AT_API to appropriate places where the registry must be exported
- We have a general problem which is that on recent Ubuntu distributions,
--as-needed is enabled for shared libraries, which is (cc @apaszke who was
worrying about this in #7160 see also #7160 (comment)). For now, I've hacked
this up in the PR to pass -Wl,--no-as-needed to all of the spots necessary to
make CI work, but a more sustainable solution is to attempt to dlopen
libATen_cuda.so when CUDA functionality is requested.
- The JIT tests somehow manage to try to touch CUDA without loading libATen_cuda.so. So
we pass -Wl,--no-as-needed when linking libATen_cuda.so to _C.so
- There is a very subtle linking issue with lapack, which is solved by making sure libATen_cuda.so links against LAPACK. There's a comment in aten/src/ATen/CMakeLists.txt about htis as well as a follow up bug at #7353
- autogradpp used AT_CUDA_ENABLED directly. We've expunged these uses and added
a few more things to CUDAHooks (getNumGPUs)
- Added manualSeedAll to Generator so that we can invoke it polymorphically (it
only does something different for CUDAGenerator)
- There's a new cuda/CUDAConfig.h header for CUDA-only ifdef macros (AT_CUDNN_ENABLED, most prominently)
- CUDAHooks/VariableHooks structs live in at namespace because Registry's
namespace support is not good enough to handle it otherwise (see Registry
changes above)
- There's some modest moving around of native functions in ReduceOps and
UnaryOps to get the CUDA-only function implementations into separate files, so
they are only compiled into libATen_cuda.so. sspaddmm needed a separate CUDA
function due to object linkage boundaries.
- Some direct uses of native functions in CUDA code has to go away, since these
functions are not exported, so you have to go through the dispatcher
(at::native::empty_like to at::empty_like)
- Code in THC/THCS/THCUNN now properly use THC_API macro instead of TH_API
(which matters now that TH and THC are not in the same library)
- Added code debt in torch/_thnn/utils.py and other THNN parsing code to handle
both TH_API and THC_API
- TensorUtils.h is now properly exported with AT_API
- Dead uses of TH_EXPORTS and co expunged; we now use ATen_cpu_exports and
ATen_cuda_exports (new, in ATenCUDAGeneral.h) consistently
- Fix some incorrect type annotations on _cudnn_rnn_backward, where we didn't
declare a type as possibly undefined when we should have. We didn't catch this
previously because optional annotations are not tested on "pass-through" native
ATen ops (which don't have dispatch). Upstream issue at #7316
- There's a new cmake macro aten_compile_options for applying all of our
per-target compile time options. We use this on the cpu and cuda libraries.
- test/test_cpp_extensions.py can be run directly by invoking in Python,
assuming you've setup your PYTHONPATH setup correctly
- type_from_string does some new funny business to only query for all valid CUDA
types (which causes CUDA initialization) when we see "torch.cuda." in the
requested string
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Last mile libtorch fixes
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* pedantic fix
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2018-05-10 17:28:33 +00:00
|
|
|
}
|
2018-02-23 23:03:31 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static_assert(std::is_standard_layout<PyTensorType>::value, "PyTensorType must be standard layout");
|
|
|
|
|
|
2018-04-03 20:29:25 +00:00
|
|
|
// This is always an instance of VariableType
|
2019-06-30 11:07:46 +00:00
|
|
|
static PyTensorType* default_tensor_type;
|
2018-02-23 23:03:31 +00:00
|
|
|
|
2021-02-09 21:20:48 +00:00
|
|
|
static void py_bind_tensor_types(const std::vector<PyTensorType*>& tensor_types);
|
2018-02-23 23:03:31 +00:00
|
|
|
|
|
|
|
|
static TypeError unavailable_type(const PyTensorType& type) {
|
2019-06-30 11:07:46 +00:00
|
|
|
return TypeError("type %s not available. Torch not compiled with CUDA enabled.", type.name);
|
2018-02-23 23:03:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static PyObject* Tensor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) {
|
|
|
|
|
HANDLE_TH_ERRORS
|
|
|
|
|
auto& tensor_type = *((PyTensorType*)type);
|
2019-06-30 11:07:46 +00:00
|
|
|
if (tensor_type.is_cuda && !torch::utils::cuda_enabled()) {
|
2018-02-23 23:03:31 +00:00
|
|
|
throw unavailable_type(tensor_type);
|
|
|
|
|
}
|
2020-01-15 19:12:17 +00:00
|
|
|
return THPVariable_Wrap(torch::utils::legacy_tensor_ctor(tensor_type.get_dispatch_key(), tensor_type.get_scalar_type(), args, kwargs));
|
2018-02-23 23:03:31 +00:00
|
|
|
END_HANDLE_TH_ERRORS
|
|
|
|
|
}
|
|
|
|
|
|
Tensor type set (#25308)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/25308
Instead of storing a single TensorTypeId in a Tensor, we store a bitset of tensor type IDs in a Tensor, TensorTypeSet. This class comes with some unit tests. This is in preparation for making Variable a TensorTypeId. In order to help flush out places where this makes a semantic difference, we rename `Tensor::type_id()` to `Tensor::type_set()` and smoke out all of the locations where this was semantically meaningful.
Because the new tensor type set is 64-bits, this increases the size of Tensor by a word.
Listing of semantic changes:
* Many TensorImpl related constructors just propagate TensorTypeId to a parent constructor. These are pretty simple to adjust.
* Backend extensions are now in the business of explicitly constructing a TensorTypeSet and then passing it in. This is probably OK for now but when Variable drops, these dispatch IDs may get immediately overwritten to have Variable set.
* `sparseTensorSetToDeviceType` and similar functions previously did an equality test with TensorTypeId, to determine what an appropriate device type is. This equality is now replaced with a set inclusion test. This is valid, under the assumption that we don't ever have weird sets like "this tensor is simultaneously a sparse CPU tensor and a sparse CUDA tensor", which will be true in the short term plan of adding Variable to the dispatch ID.
* `impl::dispatchTypeId` was generally introduced for cases where we legitimately need to convert from `TensorTypeSet -> TensorTypeId` in a dispatch related manner. At the moment, the implementation is trivial, but they will soon be adjusted to handle TLS. I've tried to make these call sites as forwards compatible as possible:
* `checked_tensor_unwrap` and co now use `dispatchTypeId`. When Variable is added to the type set, these will always be called in a context where the Variable type ID is disabled, so we will get the correct underlying tensor type ID.
* Uses of `Backend` in dispatch are now replaced with `TensorTypeSet`. The general heuristic here for whether or not to accept a `TensorTypeId` or `TensorTypeSet` is that we want to make the generated code as simple as possible. It is easier to retrieve a `TensorTypeSet`, so that's a more appropriate API in these cases.
* In some cases, I could not conveniently switch an implementation to the new semantics, because it was blocked on some other refactor. In this case, I introduced `legacyExtractTypeId`, which gives what would be a BC-compatible `TensorTypeSet` to `TensorTypeId` implementation that will continue to report the same values it would have prior to this change. This is **different** from `dispatchTypeId`, because this function does NOT respect TLS; it always ignores Variable type IDs.
* c10 dispatcher tests, which are oblivious to Variable dispatch, use this BC function (actually, they use `extractTypeId`, an overload for Tensor.
* The implementation of `new_*` methods heavily relies on tensor type ID, I chose not to unwind this. PR to refactor this at https://github.com/pytorch/pytorch/pull/25475
* Slicing also relies on tensor type ID, see `torch/csrc/autograd/python_variable_indexing.cpp` (though in some cases in this file, I was able to replace use of tensor type ID with TensorOptions)
* In some cases, there is an equality test on tensor type ID which would be better done by testing "tensor axes". In those cases, I replaced those equality tests with more equality tests.
* Example: `torch/csrc/nn/type_checks.h`
* There is a total punt in `torch/csrc/tensor/python_tensor.cpp` where "instance of" checking is done via dispatch ids. In general, the Variable-ness of a tensor doesn't participate in instanceof testing. It's not entirely clear what to do here.
* Instead of storing `Backend` in `VariableInfo`, we now just store Layout.
c10 dispatcher test updates were done with:
```
:%s/\([^ ]\+\)\.type_id()/extractTypeId(\1)/g
:%s/\([^( ]\+\)->type_id()/extractTypeId(*\1)/g
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/25308
Differential Revision: D17092791
Test Plan: sandcastle and ossci
Reviewed By: bwasti
Pulled By: ezyang
fbshipit-source-id: 22207d14fe62dd31ee19cc5011af22e3d9aabb5b
2019-09-10 17:24:43 +00:00
|
|
|
// TODO: Deprecate this instancecheck entirely. It's here to make
|
|
|
|
|
// instanceof(t, torch.FloatTensor) work, but we are not going to keep
|
|
|
|
|
// adding torch.QuantizedIntTensor classes for every new tensor type
|
|
|
|
|
// we add...
|
2020-10-20 21:59:49 +00:00
|
|
|
static PyObject* Tensor_instancecheck(PyObject* _self, PyObject* arg) {
|
2018-02-23 23:03:31 +00:00
|
|
|
HANDLE_TH_ERRORS
|
2020-10-20 21:59:49 +00:00
|
|
|
auto self = (PyTensorType*)_self;
|
2018-02-23 23:03:31 +00:00
|
|
|
if (THPVariable_Check(arg)) {
|
2021-04-15 15:48:00 +00:00
|
|
|
const auto& var = THPVariable_Unpack(arg);
|
Split libATen.so into libATen_cpu.so and libATen_cuda.so (#7275)
* Split libATen.so into libATen_cpu.so and libATen_cuda.so
Previously, ATen could be built with either CPU-only support, or
CPU/CUDA support, but only via a compile-time flag, requiring
two separate builds. This means that if you have a program which
indirectly uses a CPU-only build of ATen, and a CPU/CUDA-build of
ATen, you're gonna have a bad time. And you might want a CPU-only
build of ATen, because it is 15M (versus the 300M of a CUDA build).
This commit splits libATen.so into two libraries, CPU/CUDA, so
that it's not necessary to do a full rebuild to get CPU-only
support; instead, if you link against libATen_cpu.so only, you
are CPU-only; if you additionally link/dlopen libATen_cuda.so,
this enables CUDA support. This brings ATen's dynamic library
structure more similar to Caffe2's. libATen.so is no more
(this is BC BREAKING)
The general principle for how this works is that we introduce
a *hooks* interface, which introduces a dynamic dispatch indirection
between a call site and implementation site of CUDA functionality,
mediated by a static initialization registry. This means that we can continue
to, for example, lazily initialize CUDA from Context (a core, CPU class) without
having a direct dependency on the CUDA bits. Instead, we look up
in the registry if, e.g., CUDA hooks have been loaded (this loading
process happens at static initialization time), and if they
have been we dynamic dispatch to this class. We similarly use
the hooks interface to handle Variable registration.
We introduce a new invariant: if the backend of a type has not
been initialized (e.g., it's library has not been dlopened; for
CUDA, this also includes CUDA initialization), then the Type
pointers in the context registry are NULL. If you access the
registry directly you must maintain this invariant.
There are a few potholes along the way. I document them here:
- Previously, PyTorch maintained a separate registry for variable
types, because no provision for them was made in the Context's
type_registry. Now that we have the hooks mechanism, we can easily
have PyTorch register variables in the main registry. The code
has been refactored accordingly.
- There is a subtle ordering issue between Variable and CUDA.
We permit libATen_cuda.so and PyTorch to be loaded in either
order (in practice, CUDA is always loaded "after" PyTorch, because
it is lazily initialized.) This means that, when CUDA types are
loaded, we must subsequently also initialize their Variable equivalents.
Appropriate hooks were added to VariableHooks to make this possible;
similarly, getVariableHooks() is not referentially transparent, and
will change behavior after Variables are loaded. (This is different
to CUDAHooks, which is "burned in" after you try to initialize CUDA.)
- The cmake is adjusted to separate dependencies into either CPU
or CUDA dependencies. The generator scripts are adjusted to either
generate a file as a CUDA (cuda_file_manager) or CPU file (file_manager).
- I changed all native functions which were CUDA-only (the cudnn functions)
to have dispatches for CUDA only (making it permissible to not specify
all dispatch options.) This uncovered a bug in how we were handling
native functions which dispatch on a Type argument; I introduced a new
self_ty keyword to handle this case. I'm not 100% happy about it
but it fixed my problem.
This also exposed the fact that set_history incompletely handles
heterogenous return tuples combining Tensor and TensorList. I
swapped this codegen to use flatten() (at the possible cost of
a slight perf regression, since we're allocating another vector now
in this code path).
- thc_state is no longer a public member of Context; use getTHCState() instead
- This PR comes with Registry from Caffe2, for handling static initialization.
I needed to make a bunch of fixes to Registry to make it more portable
- No more ##__VA_ARGS__ token pasting; instead, it is mandatory to pass at
least one argument to the var-args. CUDAHooks and VariableHooks pass a nullary
struct CUDAHooksArgs/VariableHooksArgs to solve the problem. We must get rid of
token pasting because it does not work with MSVC.
- It seems MSVC is not willing to generate code for constructors of template
classes at use sites which cross DLL boundaries. So we explicitly instantiate
the class to get around the problem. This involved tweaks to the boilerplate
generating macros, and also required us to shuffle around namespaces a bit,
because you can't specialize a template unless you are in the same namespace as
the template.
- Insertion of AT_API to appropriate places where the registry must be exported
- We have a general problem which is that on recent Ubuntu distributions,
--as-needed is enabled for shared libraries, which is (cc @apaszke who was
worrying about this in #7160 see also #7160 (comment)). For now, I've hacked
this up in the PR to pass -Wl,--no-as-needed to all of the spots necessary to
make CI work, but a more sustainable solution is to attempt to dlopen
libATen_cuda.so when CUDA functionality is requested.
- The JIT tests somehow manage to try to touch CUDA without loading libATen_cuda.so. So
we pass -Wl,--no-as-needed when linking libATen_cuda.so to _C.so
- There is a very subtle linking issue with lapack, which is solved by making sure libATen_cuda.so links against LAPACK. There's a comment in aten/src/ATen/CMakeLists.txt about htis as well as a follow up bug at #7353
- autogradpp used AT_CUDA_ENABLED directly. We've expunged these uses and added
a few more things to CUDAHooks (getNumGPUs)
- Added manualSeedAll to Generator so that we can invoke it polymorphically (it
only does something different for CUDAGenerator)
- There's a new cuda/CUDAConfig.h header for CUDA-only ifdef macros (AT_CUDNN_ENABLED, most prominently)
- CUDAHooks/VariableHooks structs live in at namespace because Registry's
namespace support is not good enough to handle it otherwise (see Registry
changes above)
- There's some modest moving around of native functions in ReduceOps and
UnaryOps to get the CUDA-only function implementations into separate files, so
they are only compiled into libATen_cuda.so. sspaddmm needed a separate CUDA
function due to object linkage boundaries.
- Some direct uses of native functions in CUDA code has to go away, since these
functions are not exported, so you have to go through the dispatcher
(at::native::empty_like to at::empty_like)
- Code in THC/THCS/THCUNN now properly use THC_API macro instead of TH_API
(which matters now that TH and THC are not in the same library)
- Added code debt in torch/_thnn/utils.py and other THNN parsing code to handle
both TH_API and THC_API
- TensorUtils.h is now properly exported with AT_API
- Dead uses of TH_EXPORTS and co expunged; we now use ATen_cpu_exports and
ATen_cuda_exports (new, in ATenCUDAGeneral.h) consistently
- Fix some incorrect type annotations on _cudnn_rnn_backward, where we didn't
declare a type as possibly undefined when we should have. We didn't catch this
previously because optional annotations are not tested on "pass-through" native
ATen ops (which don't have dispatch). Upstream issue at #7316
- There's a new cmake macro aten_compile_options for applying all of our
per-target compile time options. We use this on the cpu and cuda libraries.
- test/test_cpp_extensions.py can be run directly by invoking in Python,
assuming you've setup your PYTHONPATH setup correctly
- type_from_string does some new funny business to only query for all valid CUDA
types (which causes CUDA initialization) when we see "torch.cuda." in the
requested string
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Last mile libtorch fixes
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* pedantic fix
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2018-05-10 17:28:33 +00:00
|
|
|
// NB: This is a little unfortunate, in that if I do an isinstance check
|
|
|
|
|
// against torch.cuda.FloatTensor, this will immediately initialize CUDA.
|
|
|
|
|
// I originally thought that it would not be possible for aten_type_ to
|
|
|
|
|
// be nullptr if you had a tensor of some type, in which case you can
|
2020-01-18 00:01:29 +00:00
|
|
|
// skip initializing aten_type(), but TestAutograd.test_type_conversions
|
Split libATen.so into libATen_cpu.so and libATen_cuda.so (#7275)
* Split libATen.so into libATen_cpu.so and libATen_cuda.so
Previously, ATen could be built with either CPU-only support, or
CPU/CUDA support, but only via a compile-time flag, requiring
two separate builds. This means that if you have a program which
indirectly uses a CPU-only build of ATen, and a CPU/CUDA-build of
ATen, you're gonna have a bad time. And you might want a CPU-only
build of ATen, because it is 15M (versus the 300M of a CUDA build).
This commit splits libATen.so into two libraries, CPU/CUDA, so
that it's not necessary to do a full rebuild to get CPU-only
support; instead, if you link against libATen_cpu.so only, you
are CPU-only; if you additionally link/dlopen libATen_cuda.so,
this enables CUDA support. This brings ATen's dynamic library
structure more similar to Caffe2's. libATen.so is no more
(this is BC BREAKING)
The general principle for how this works is that we introduce
a *hooks* interface, which introduces a dynamic dispatch indirection
between a call site and implementation site of CUDA functionality,
mediated by a static initialization registry. This means that we can continue
to, for example, lazily initialize CUDA from Context (a core, CPU class) without
having a direct dependency on the CUDA bits. Instead, we look up
in the registry if, e.g., CUDA hooks have been loaded (this loading
process happens at static initialization time), and if they
have been we dynamic dispatch to this class. We similarly use
the hooks interface to handle Variable registration.
We introduce a new invariant: if the backend of a type has not
been initialized (e.g., it's library has not been dlopened; for
CUDA, this also includes CUDA initialization), then the Type
pointers in the context registry are NULL. If you access the
registry directly you must maintain this invariant.
There are a few potholes along the way. I document them here:
- Previously, PyTorch maintained a separate registry for variable
types, because no provision for them was made in the Context's
type_registry. Now that we have the hooks mechanism, we can easily
have PyTorch register variables in the main registry. The code
has been refactored accordingly.
- There is a subtle ordering issue between Variable and CUDA.
We permit libATen_cuda.so and PyTorch to be loaded in either
order (in practice, CUDA is always loaded "after" PyTorch, because
it is lazily initialized.) This means that, when CUDA types are
loaded, we must subsequently also initialize their Variable equivalents.
Appropriate hooks were added to VariableHooks to make this possible;
similarly, getVariableHooks() is not referentially transparent, and
will change behavior after Variables are loaded. (This is different
to CUDAHooks, which is "burned in" after you try to initialize CUDA.)
- The cmake is adjusted to separate dependencies into either CPU
or CUDA dependencies. The generator scripts are adjusted to either
generate a file as a CUDA (cuda_file_manager) or CPU file (file_manager).
- I changed all native functions which were CUDA-only (the cudnn functions)
to have dispatches for CUDA only (making it permissible to not specify
all dispatch options.) This uncovered a bug in how we were handling
native functions which dispatch on a Type argument; I introduced a new
self_ty keyword to handle this case. I'm not 100% happy about it
but it fixed my problem.
This also exposed the fact that set_history incompletely handles
heterogenous return tuples combining Tensor and TensorList. I
swapped this codegen to use flatten() (at the possible cost of
a slight perf regression, since we're allocating another vector now
in this code path).
- thc_state is no longer a public member of Context; use getTHCState() instead
- This PR comes with Registry from Caffe2, for handling static initialization.
I needed to make a bunch of fixes to Registry to make it more portable
- No more ##__VA_ARGS__ token pasting; instead, it is mandatory to pass at
least one argument to the var-args. CUDAHooks and VariableHooks pass a nullary
struct CUDAHooksArgs/VariableHooksArgs to solve the problem. We must get rid of
token pasting because it does not work with MSVC.
- It seems MSVC is not willing to generate code for constructors of template
classes at use sites which cross DLL boundaries. So we explicitly instantiate
the class to get around the problem. This involved tweaks to the boilerplate
generating macros, and also required us to shuffle around namespaces a bit,
because you can't specialize a template unless you are in the same namespace as
the template.
- Insertion of AT_API to appropriate places where the registry must be exported
- We have a general problem which is that on recent Ubuntu distributions,
--as-needed is enabled for shared libraries, which is (cc @apaszke who was
worrying about this in #7160 see also #7160 (comment)). For now, I've hacked
this up in the PR to pass -Wl,--no-as-needed to all of the spots necessary to
make CI work, but a more sustainable solution is to attempt to dlopen
libATen_cuda.so when CUDA functionality is requested.
- The JIT tests somehow manage to try to touch CUDA without loading libATen_cuda.so. So
we pass -Wl,--no-as-needed when linking libATen_cuda.so to _C.so
- There is a very subtle linking issue with lapack, which is solved by making sure libATen_cuda.so links against LAPACK. There's a comment in aten/src/ATen/CMakeLists.txt about htis as well as a follow up bug at #7353
- autogradpp used AT_CUDA_ENABLED directly. We've expunged these uses and added
a few more things to CUDAHooks (getNumGPUs)
- Added manualSeedAll to Generator so that we can invoke it polymorphically (it
only does something different for CUDAGenerator)
- There's a new cuda/CUDAConfig.h header for CUDA-only ifdef macros (AT_CUDNN_ENABLED, most prominently)
- CUDAHooks/VariableHooks structs live in at namespace because Registry's
namespace support is not good enough to handle it otherwise (see Registry
changes above)
- There's some modest moving around of native functions in ReduceOps and
UnaryOps to get the CUDA-only function implementations into separate files, so
they are only compiled into libATen_cuda.so. sspaddmm needed a separate CUDA
function due to object linkage boundaries.
- Some direct uses of native functions in CUDA code has to go away, since these
functions are not exported, so you have to go through the dispatcher
(at::native::empty_like to at::empty_like)
- Code in THC/THCS/THCUNN now properly use THC_API macro instead of TH_API
(which matters now that TH and THC are not in the same library)
- Added code debt in torch/_thnn/utils.py and other THNN parsing code to handle
both TH_API and THC_API
- TensorUtils.h is now properly exported with AT_API
- Dead uses of TH_EXPORTS and co expunged; we now use ATen_cpu_exports and
ATen_cuda_exports (new, in ATenCUDAGeneral.h) consistently
- Fix some incorrect type annotations on _cudnn_rnn_backward, where we didn't
declare a type as possibly undefined when we should have. We didn't catch this
previously because optional annotations are not tested on "pass-through" native
ATen ops (which don't have dispatch). Upstream issue at #7316
- There's a new cmake macro aten_compile_options for applying all of our
per-target compile time options. We use this on the cpu and cuda libraries.
- test/test_cpp_extensions.py can be run directly by invoking in Python,
assuming you've setup your PYTHONPATH setup correctly
- type_from_string does some new funny business to only query for all valid CUDA
types (which causes CUDA initialization) when we see "torch.cuda." in the
requested string
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Last mile libtorch fixes
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* pedantic fix
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2018-05-10 17:28:33 +00:00
|
|
|
// seems to violate this property (for whatever reason.)
|
Tensor type set (#25308)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/25308
Instead of storing a single TensorTypeId in a Tensor, we store a bitset of tensor type IDs in a Tensor, TensorTypeSet. This class comes with some unit tests. This is in preparation for making Variable a TensorTypeId. In order to help flush out places where this makes a semantic difference, we rename `Tensor::type_id()` to `Tensor::type_set()` and smoke out all of the locations where this was semantically meaningful.
Because the new tensor type set is 64-bits, this increases the size of Tensor by a word.
Listing of semantic changes:
* Many TensorImpl related constructors just propagate TensorTypeId to a parent constructor. These are pretty simple to adjust.
* Backend extensions are now in the business of explicitly constructing a TensorTypeSet and then passing it in. This is probably OK for now but when Variable drops, these dispatch IDs may get immediately overwritten to have Variable set.
* `sparseTensorSetToDeviceType` and similar functions previously did an equality test with TensorTypeId, to determine what an appropriate device type is. This equality is now replaced with a set inclusion test. This is valid, under the assumption that we don't ever have weird sets like "this tensor is simultaneously a sparse CPU tensor and a sparse CUDA tensor", which will be true in the short term plan of adding Variable to the dispatch ID.
* `impl::dispatchTypeId` was generally introduced for cases where we legitimately need to convert from `TensorTypeSet -> TensorTypeId` in a dispatch related manner. At the moment, the implementation is trivial, but they will soon be adjusted to handle TLS. I've tried to make these call sites as forwards compatible as possible:
* `checked_tensor_unwrap` and co now use `dispatchTypeId`. When Variable is added to the type set, these will always be called in a context where the Variable type ID is disabled, so we will get the correct underlying tensor type ID.
* Uses of `Backend` in dispatch are now replaced with `TensorTypeSet`. The general heuristic here for whether or not to accept a `TensorTypeId` or `TensorTypeSet` is that we want to make the generated code as simple as possible. It is easier to retrieve a `TensorTypeSet`, so that's a more appropriate API in these cases.
* In some cases, I could not conveniently switch an implementation to the new semantics, because it was blocked on some other refactor. In this case, I introduced `legacyExtractTypeId`, which gives what would be a BC-compatible `TensorTypeSet` to `TensorTypeId` implementation that will continue to report the same values it would have prior to this change. This is **different** from `dispatchTypeId`, because this function does NOT respect TLS; it always ignores Variable type IDs.
* c10 dispatcher tests, which are oblivious to Variable dispatch, use this BC function (actually, they use `extractTypeId`, an overload for Tensor.
* The implementation of `new_*` methods heavily relies on tensor type ID, I chose not to unwind this. PR to refactor this at https://github.com/pytorch/pytorch/pull/25475
* Slicing also relies on tensor type ID, see `torch/csrc/autograd/python_variable_indexing.cpp` (though in some cases in this file, I was able to replace use of tensor type ID with TensorOptions)
* In some cases, there is an equality test on tensor type ID which would be better done by testing "tensor axes". In those cases, I replaced those equality tests with more equality tests.
* Example: `torch/csrc/nn/type_checks.h`
* There is a total punt in `torch/csrc/tensor/python_tensor.cpp` where "instance of" checking is done via dispatch ids. In general, the Variable-ness of a tensor doesn't participate in instanceof testing. It's not entirely clear what to do here.
* Instead of storing `Backend` in `VariableInfo`, we now just store Layout.
c10 dispatcher test updates were done with:
```
:%s/\([^ ]\+\)\.type_id()/extractTypeId(\1)/g
:%s/\([^( ]\+\)->type_id()/extractTypeId(*\1)/g
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/25308
Differential Revision: D17092791
Test Plan: sandcastle and ossci
Reviewed By: bwasti
Pulled By: ezyang
fbshipit-source-id: 22207d14fe62dd31ee19cc5011af22e3d9aabb5b
2019-09-10 17:24:43 +00:00
|
|
|
//
|
2020-01-15 19:12:17 +00:00
|
|
|
// TODO: Stop using legacyExtractDispatchKey here (probably need to build
|
Tensor type set (#25308)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/25308
Instead of storing a single TensorTypeId in a Tensor, we store a bitset of tensor type IDs in a Tensor, TensorTypeSet. This class comes with some unit tests. This is in preparation for making Variable a TensorTypeId. In order to help flush out places where this makes a semantic difference, we rename `Tensor::type_id()` to `Tensor::type_set()` and smoke out all of the locations where this was semantically meaningful.
Because the new tensor type set is 64-bits, this increases the size of Tensor by a word.
Listing of semantic changes:
* Many TensorImpl related constructors just propagate TensorTypeId to a parent constructor. These are pretty simple to adjust.
* Backend extensions are now in the business of explicitly constructing a TensorTypeSet and then passing it in. This is probably OK for now but when Variable drops, these dispatch IDs may get immediately overwritten to have Variable set.
* `sparseTensorSetToDeviceType` and similar functions previously did an equality test with TensorTypeId, to determine what an appropriate device type is. This equality is now replaced with a set inclusion test. This is valid, under the assumption that we don't ever have weird sets like "this tensor is simultaneously a sparse CPU tensor and a sparse CUDA tensor", which will be true in the short term plan of adding Variable to the dispatch ID.
* `impl::dispatchTypeId` was generally introduced for cases where we legitimately need to convert from `TensorTypeSet -> TensorTypeId` in a dispatch related manner. At the moment, the implementation is trivial, but they will soon be adjusted to handle TLS. I've tried to make these call sites as forwards compatible as possible:
* `checked_tensor_unwrap` and co now use `dispatchTypeId`. When Variable is added to the type set, these will always be called in a context where the Variable type ID is disabled, so we will get the correct underlying tensor type ID.
* Uses of `Backend` in dispatch are now replaced with `TensorTypeSet`. The general heuristic here for whether or not to accept a `TensorTypeId` or `TensorTypeSet` is that we want to make the generated code as simple as possible. It is easier to retrieve a `TensorTypeSet`, so that's a more appropriate API in these cases.
* In some cases, I could not conveniently switch an implementation to the new semantics, because it was blocked on some other refactor. In this case, I introduced `legacyExtractTypeId`, which gives what would be a BC-compatible `TensorTypeSet` to `TensorTypeId` implementation that will continue to report the same values it would have prior to this change. This is **different** from `dispatchTypeId`, because this function does NOT respect TLS; it always ignores Variable type IDs.
* c10 dispatcher tests, which are oblivious to Variable dispatch, use this BC function (actually, they use `extractTypeId`, an overload for Tensor.
* The implementation of `new_*` methods heavily relies on tensor type ID, I chose not to unwind this. PR to refactor this at https://github.com/pytorch/pytorch/pull/25475
* Slicing also relies on tensor type ID, see `torch/csrc/autograd/python_variable_indexing.cpp` (though in some cases in this file, I was able to replace use of tensor type ID with TensorOptions)
* In some cases, there is an equality test on tensor type ID which would be better done by testing "tensor axes". In those cases, I replaced those equality tests with more equality tests.
* Example: `torch/csrc/nn/type_checks.h`
* There is a total punt in `torch/csrc/tensor/python_tensor.cpp` where "instance of" checking is done via dispatch ids. In general, the Variable-ness of a tensor doesn't participate in instanceof testing. It's not entirely clear what to do here.
* Instead of storing `Backend` in `VariableInfo`, we now just store Layout.
c10 dispatcher test updates were done with:
```
:%s/\([^ ]\+\)\.type_id()/extractTypeId(\1)/g
:%s/\([^( ]\+\)->type_id()/extractTypeId(*\1)/g
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/25308
Differential Revision: D17092791
Test Plan: sandcastle and ossci
Reviewed By: bwasti
Pulled By: ezyang
fbshipit-source-id: 22207d14fe62dd31ee19cc5011af22e3d9aabb5b
2019-09-10 17:24:43 +00:00
|
|
|
// in instanceof checking to Tensor class itself)
|
2020-01-15 19:12:17 +00:00
|
|
|
if (legacyExtractDispatchKey(var.key_set()) == self->get_dispatch_key() &&
|
2019-04-22 04:12:21 +00:00
|
|
|
var.scalar_type() == static_cast<ScalarType>(self->scalar_type)) {
|
2018-02-23 23:03:31 +00:00
|
|
|
Py_RETURN_TRUE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
|
END_HANDLE_TH_ERRORS
|
|
|
|
|
}
|
|
|
|
|
|
Fix remaining invalid function cast warnings that show up with GCC 8/9 (#26104)
Summary:
Follow-up to gh-25483, more of the same fixes for warnings like:
```
../torch/csrc/autograd/python_variable.cpp:503:31: warning: cast between incompatible function types from ‘PyObject* (*)(THPVariable*)’ {aka ‘_object* (*)(THPVariable*)’} to ‘getter’ {aka ‘_object* (*)(_object*, void*)’} [-Wcast-function-type]
503 | {"_backward_hooks", (getter)THPVariable_get_backwards_hooks, (setter)THPVariable_set_backwards_hooks, nullptr, nullptr},
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
```
This takes the build log output for a full rebuild with GCC 9.1 from ~10,000 to ~7,000 lines.
`clang-tidy` is going to complain, no way around that - see discussion at the end of gh-25483.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/26104
Differential Revision: D17396831
Pulled By: ezyang
fbshipit-source-id: d71696bfe4dbe25519e4bcb7753151c118bd39f7
2019-09-17 14:40:38 +00:00
|
|
|
PyObject *Tensor_dtype(PyTensorType* self, void *unused) {
|
2018-03-01 19:06:55 +00:00
|
|
|
return torch::autograd::utils::wrap(self->dtype);
|
|
|
|
|
}
|
|
|
|
|
|
Fix remaining invalid function cast warnings that show up with GCC 8/9 (#26104)
Summary:
Follow-up to gh-25483, more of the same fixes for warnings like:
```
../torch/csrc/autograd/python_variable.cpp:503:31: warning: cast between incompatible function types from ‘PyObject* (*)(THPVariable*)’ {aka ‘_object* (*)(THPVariable*)’} to ‘getter’ {aka ‘_object* (*)(_object*, void*)’} [-Wcast-function-type]
503 | {"_backward_hooks", (getter)THPVariable_get_backwards_hooks, (setter)THPVariable_set_backwards_hooks, nullptr, nullptr},
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
```
This takes the build log output for a full rebuild with GCC 9.1 from ~10,000 to ~7,000 lines.
`clang-tidy` is going to complain, no way around that - see discussion at the end of gh-25483.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/26104
Differential Revision: D17396831
Pulled By: ezyang
fbshipit-source-id: d71696bfe4dbe25519e4bcb7753151c118bd39f7
2019-09-17 14:40:38 +00:00
|
|
|
PyObject *Tensor_layout(PyTensorType* self, void *unused) {
|
Introduce torch.layout and split layout from dtypes. (#6145)
* Introduce torch.layout and split layout from dtypes.
Tensors (and tensor types) now have a 'layout' attribute that returns either 'torch.strided' or 'torch.sparse_coo'.
Previously, dtypes were 1-to-1 with ATen types/PyTensorTypes; the impetus behind this decision was to make things easy in the common case
(i.e. specifying a type in a factory function). But this doesn't really follow for sparity, which isn't a common case.
It also doesn't properly represent the concept or a dtype, which in numpy are proper scalar types (i.e. roughly the type returned from indexing the
last dimension of an n-d array). But this should be the same whether or not the tensor is represented via strides, sparsity, etc.
This is accomplished by:
1) having the dtype of tensor return the (device-type, scalar-type) combination, i.e. torch.cuda.float32, so both
torch.cuda.FloatTensor and torch.cuda.sparse.FloatTensor have the same dtype
2) Adding a layout parameter to python functions, where the combination of (dtype, layout) maps to an ATen type that is used for dispatch.
* Formatting, make init throw python_error.
* Fix cuda not enabled error message.
* Fix test.
2018-04-02 18:07:50 +00:00
|
|
|
return torch::autograd::utils::wrap(self->layout);
|
|
|
|
|
}
|
|
|
|
|
|
Fix remaining invalid function cast warnings that show up with GCC 8/9 (#26104)
Summary:
Follow-up to gh-25483, more of the same fixes for warnings like:
```
../torch/csrc/autograd/python_variable.cpp:503:31: warning: cast between incompatible function types from ‘PyObject* (*)(THPVariable*)’ {aka ‘_object* (*)(THPVariable*)’} to ‘getter’ {aka ‘_object* (*)(_object*, void*)’} [-Wcast-function-type]
503 | {"_backward_hooks", (getter)THPVariable_get_backwards_hooks, (setter)THPVariable_set_backwards_hooks, nullptr, nullptr},
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
```
This takes the build log output for a full rebuild with GCC 9.1 from ~10,000 to ~7,000 lines.
`clang-tidy` is going to complain, no way around that - see discussion at the end of gh-25483.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/26104
Differential Revision: D17396831
Pulled By: ezyang
fbshipit-source-id: d71696bfe4dbe25519e4bcb7753151c118bd39f7
2019-09-17 14:40:38 +00:00
|
|
|
PyObject *Tensor_is_cuda(PyTensorType* self, void *unused) {
|
2018-04-12 18:05:44 +00:00
|
|
|
if (self->is_cuda) {
|
2018-03-01 19:06:55 +00:00
|
|
|
Py_RETURN_TRUE;
|
|
|
|
|
} else {
|
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Fix remaining invalid function cast warnings that show up with GCC 8/9 (#26104)
Summary:
Follow-up to gh-25483, more of the same fixes for warnings like:
```
../torch/csrc/autograd/python_variable.cpp:503:31: warning: cast between incompatible function types from ‘PyObject* (*)(THPVariable*)’ {aka ‘_object* (*)(THPVariable*)’} to ‘getter’ {aka ‘_object* (*)(_object*, void*)’} [-Wcast-function-type]
503 | {"_backward_hooks", (getter)THPVariable_get_backwards_hooks, (setter)THPVariable_set_backwards_hooks, nullptr, nullptr},
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
```
This takes the build log output for a full rebuild with GCC 9.1 from ~10,000 to ~7,000 lines.
`clang-tidy` is going to complain, no way around that - see discussion at the end of gh-25483.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/26104
Differential Revision: D17396831
Pulled By: ezyang
fbshipit-source-id: d71696bfe4dbe25519e4bcb7753151c118bd39f7
2019-09-17 14:40:38 +00:00
|
|
|
PyObject *Tensor_is_sparse(PyTensorType *self, void *unused) {
|
2018-06-16 07:40:35 +00:00
|
|
|
if (self->layout->layout == at::Layout::Strided) {
|
2018-03-01 19:06:55 +00:00
|
|
|
Py_RETURN_FALSE;
|
2018-06-16 07:40:35 +00:00
|
|
|
} else {
|
|
|
|
|
Py_RETURN_TRUE;
|
2018-03-01 19:06:55 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-12 17:07:56 +00:00
|
|
|
PyObject *Tensor_is_sparse_csr(PyTensorType *self, void *unused) {
|
|
|
|
|
if (self->layout->layout == at::Layout::SparseCsr) {
|
|
|
|
|
Py_RETURN_TRUE;
|
|
|
|
|
} else {
|
|
|
|
|
Py_RETURN_FALSE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays)
|
2018-02-23 23:03:31 +00:00
|
|
|
static struct PyMethodDef metaclass_methods[] = {
|
2020-10-20 21:59:49 +00:00
|
|
|
{"__instancecheck__", Tensor_instancecheck, METH_O, nullptr},
|
2018-08-30 23:22:24 +00:00
|
|
|
{nullptr}
|
2018-02-23 23:03:31 +00:00
|
|
|
};
|
|
|
|
|
|
2018-03-01 19:06:55 +00:00
|
|
|
typedef PyObject *(*getter)(PyObject *, void *);
|
|
|
|
|
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays)
|
2018-03-01 19:06:55 +00:00
|
|
|
static struct PyGetSetDef metaclass_properties[] = {
|
|
|
|
|
{"dtype", (getter)Tensor_dtype, nullptr, nullptr, nullptr},
|
Introduce torch.layout and split layout from dtypes. (#6145)
* Introduce torch.layout and split layout from dtypes.
Tensors (and tensor types) now have a 'layout' attribute that returns either 'torch.strided' or 'torch.sparse_coo'.
Previously, dtypes were 1-to-1 with ATen types/PyTensorTypes; the impetus behind this decision was to make things easy in the common case
(i.e. specifying a type in a factory function). But this doesn't really follow for sparity, which isn't a common case.
It also doesn't properly represent the concept or a dtype, which in numpy are proper scalar types (i.e. roughly the type returned from indexing the
last dimension of an n-d array). But this should be the same whether or not the tensor is represented via strides, sparsity, etc.
This is accomplished by:
1) having the dtype of tensor return the (device-type, scalar-type) combination, i.e. torch.cuda.float32, so both
torch.cuda.FloatTensor and torch.cuda.sparse.FloatTensor have the same dtype
2) Adding a layout parameter to python functions, where the combination of (dtype, layout) maps to an ATen type that is used for dispatch.
* Formatting, make init throw python_error.
* Fix cuda not enabled error message.
* Fix test.
2018-04-02 18:07:50 +00:00
|
|
|
{"layout", (getter)Tensor_layout, nullptr, nullptr, nullptr},
|
2018-03-01 19:06:55 +00:00
|
|
|
{"is_cuda", (getter)Tensor_is_cuda, nullptr, nullptr, nullptr},
|
|
|
|
|
{"is_sparse", (getter)Tensor_is_sparse, nullptr, nullptr, nullptr},
|
2021-04-12 17:07:56 +00:00
|
|
|
{"is_sparse_csr",(getter)Tensor_is_sparse_csr, nullptr, nullptr, nullptr},
|
2018-03-01 19:06:55 +00:00
|
|
|
{nullptr}
|
2018-02-23 23:03:31 +00:00
|
|
|
};
|
|
|
|
|
|
2019-12-20 20:09:06 +00:00
|
|
|
static PyTypeObject metaclass = {
|
|
|
|
|
PyVarObject_HEAD_INIT(nullptr, 0)
|
|
|
|
|
"torch.tensortype", /* tp_name */
|
|
|
|
|
sizeof(PyTypeObject) /* tp_basicsize */
|
|
|
|
|
};
|
2018-02-23 23:03:31 +00:00
|
|
|
|
|
|
|
|
static void py_initialize_metaclass(PyTypeObject& metaclass) {
|
|
|
|
|
metaclass.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE;
|
|
|
|
|
metaclass.tp_methods = metaclass_methods;
|
2018-03-01 19:06:55 +00:00
|
|
|
metaclass.tp_getset = metaclass_properties;
|
2018-02-23 23:03:31 +00:00
|
|
|
metaclass.tp_base = &PyType_Type;
|
|
|
|
|
if (PyType_Ready(&metaclass) < 0) {
|
|
|
|
|
throw python_error();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-20 20:09:06 +00:00
|
|
|
static PyTypeObject tensor_type_prototype = {
|
|
|
|
|
PyVarObject_HEAD_INIT(&metaclass, 0)
|
|
|
|
|
nullptr, /* tp_name */
|
|
|
|
|
sizeof(PyTensorType) /* tp_basicsize */
|
|
|
|
|
};
|
|
|
|
|
|
2018-02-23 23:03:31 +00:00
|
|
|
static void py_initialize_tensor_type(PyTypeObject& type, const char* name, PyObject* tp_dict) {
|
|
|
|
|
// NOTE: we don't use the typical static declaration of PyTypeObject because
|
|
|
|
|
// we need to initialize as many types as there are VariableType instances.
|
2019-12-20 20:09:06 +00:00
|
|
|
// We copy the basic object fields from a prototype definition and initialize
|
|
|
|
|
// the remaining fields below.
|
|
|
|
|
memcpy(&type, &tensor_type_prototype, sizeof(PyTypeObject));
|
2019-05-09 14:04:23 +00:00
|
|
|
// Subclassing from torch.<ScalarType>Tensor isn't supported.
|
|
|
|
|
// (Py_TPFLAGS_BASETYPE omitted). Subclassing torch.Tensor still allowed.
|
|
|
|
|
type.tp_flags = Py_TPFLAGS_DEFAULT;
|
2018-02-23 23:03:31 +00:00
|
|
|
type.tp_name = name;
|
|
|
|
|
type.tp_new = Tensor_new;
|
|
|
|
|
if (PyType_Ready(&type) < 0) {
|
|
|
|
|
throw python_error();
|
|
|
|
|
}
|
|
|
|
|
if (PyDict_Merge(type.tp_dict, tp_dict, 0) < 0) {
|
|
|
|
|
throw python_error();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const char* get_module(Backend backend) {
|
|
|
|
|
switch (backend) {
|
2018-08-19 00:25:26 +00:00
|
|
|
case Backend::CPU: return "torch";
|
|
|
|
|
case Backend::CUDA: return "torch.cuda";
|
|
|
|
|
case Backend::SparseCPU: return "torch.sparse";
|
|
|
|
|
case Backend::SparseCUDA: return "torch.cuda.sparse";
|
Make AT_ASSERT/AT_ERROR non-printf based, other tweaks (#7104)
* Make AT_ASSERT/AT_ERROR non-printf based, other tweaks
- AT_ASSERT/AT_ERROR don't take printf strings anymore; instead,
they take a comma-separated list of things you wanted to print
(bringing it inline with Caffe2's conventions).
Instead of AT_ASSERT(x == 0, "%d is not zero", x)
you write AT_ASSERT(x == 0, x, " is not zero")
This is done by way of a new variadic template at::str(), which
takes a list of arguments and cats their string reps (as per
operator<<) together.
- A bunch of the demangling logic that was in Error.h is now
moved to Error.cpp (better header hygiene.) Also, demangle
has been moved out to its own helper function, and also
a new helper demangle_type (from Caffe2) added.
- A bunch of AT_ASSERT converted into AT_CHECK, to more properly
convey which checks can be caused by user error, and which are
due to logic error in ATen.
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* CR
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Fix test failure.
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* buildfix
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* More fixes.
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* One more fix
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Try harder
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2018-05-01 14:28:31 +00:00
|
|
|
default: AT_ERROR("invalid backend: ", toString(backend));
|
2018-02-23 23:03:31 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static std::string get_name(Backend backend, ScalarType scalarType) {
|
|
|
|
|
std::ostringstream ss;
|
2018-11-22 07:04:43 +00:00
|
|
|
ss << get_module(backend) << "." << toString(scalarType) << "Tensor";
|
2018-02-23 23:03:31 +00:00
|
|
|
return ss.str();
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-30 11:07:46 +00:00
|
|
|
static THPObjectPtr get_storage_obj(PyTensorType* type) {
|
|
|
|
|
auto module_name = get_module(type->get_backend());
|
2018-03-01 19:06:55 +00:00
|
|
|
auto module_obj = THPObjectPtr(PyImport_ImportModule(module_name));
|
|
|
|
|
if (!module_obj) throw python_error();
|
|
|
|
|
|
2019-06-30 11:07:46 +00:00
|
|
|
auto storage_name = std::string(toString(type->get_scalar_type())) + "Storage";
|
2018-03-01 19:06:55 +00:00
|
|
|
THPObjectPtr storage(PyObject_GetAttrString(module_obj.get(), storage_name.c_str()));
|
|
|
|
|
if (!storage.get()) {
|
|
|
|
|
throw TypeError("couldn't find storage object %s", storage_name.c_str());
|
|
|
|
|
}
|
|
|
|
|
return storage;
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-23 23:03:31 +00:00
|
|
|
static void set_type(PyTensorType& type_obj, Backend backend, ScalarType scalarType) {
|
Split libATen.so into libATen_cpu.so and libATen_cuda.so (#7275)
* Split libATen.so into libATen_cpu.so and libATen_cuda.so
Previously, ATen could be built with either CPU-only support, or
CPU/CUDA support, but only via a compile-time flag, requiring
two separate builds. This means that if you have a program which
indirectly uses a CPU-only build of ATen, and a CPU/CUDA-build of
ATen, you're gonna have a bad time. And you might want a CPU-only
build of ATen, because it is 15M (versus the 300M of a CUDA build).
This commit splits libATen.so into two libraries, CPU/CUDA, so
that it's not necessary to do a full rebuild to get CPU-only
support; instead, if you link against libATen_cpu.so only, you
are CPU-only; if you additionally link/dlopen libATen_cuda.so,
this enables CUDA support. This brings ATen's dynamic library
structure more similar to Caffe2's. libATen.so is no more
(this is BC BREAKING)
The general principle for how this works is that we introduce
a *hooks* interface, which introduces a dynamic dispatch indirection
between a call site and implementation site of CUDA functionality,
mediated by a static initialization registry. This means that we can continue
to, for example, lazily initialize CUDA from Context (a core, CPU class) without
having a direct dependency on the CUDA bits. Instead, we look up
in the registry if, e.g., CUDA hooks have been loaded (this loading
process happens at static initialization time), and if they
have been we dynamic dispatch to this class. We similarly use
the hooks interface to handle Variable registration.
We introduce a new invariant: if the backend of a type has not
been initialized (e.g., it's library has not been dlopened; for
CUDA, this also includes CUDA initialization), then the Type
pointers in the context registry are NULL. If you access the
registry directly you must maintain this invariant.
There are a few potholes along the way. I document them here:
- Previously, PyTorch maintained a separate registry for variable
types, because no provision for them was made in the Context's
type_registry. Now that we have the hooks mechanism, we can easily
have PyTorch register variables in the main registry. The code
has been refactored accordingly.
- There is a subtle ordering issue between Variable and CUDA.
We permit libATen_cuda.so and PyTorch to be loaded in either
order (in practice, CUDA is always loaded "after" PyTorch, because
it is lazily initialized.) This means that, when CUDA types are
loaded, we must subsequently also initialize their Variable equivalents.
Appropriate hooks were added to VariableHooks to make this possible;
similarly, getVariableHooks() is not referentially transparent, and
will change behavior after Variables are loaded. (This is different
to CUDAHooks, which is "burned in" after you try to initialize CUDA.)
- The cmake is adjusted to separate dependencies into either CPU
or CUDA dependencies. The generator scripts are adjusted to either
generate a file as a CUDA (cuda_file_manager) or CPU file (file_manager).
- I changed all native functions which were CUDA-only (the cudnn functions)
to have dispatches for CUDA only (making it permissible to not specify
all dispatch options.) This uncovered a bug in how we were handling
native functions which dispatch on a Type argument; I introduced a new
self_ty keyword to handle this case. I'm not 100% happy about it
but it fixed my problem.
This also exposed the fact that set_history incompletely handles
heterogenous return tuples combining Tensor and TensorList. I
swapped this codegen to use flatten() (at the possible cost of
a slight perf regression, since we're allocating another vector now
in this code path).
- thc_state is no longer a public member of Context; use getTHCState() instead
- This PR comes with Registry from Caffe2, for handling static initialization.
I needed to make a bunch of fixes to Registry to make it more portable
- No more ##__VA_ARGS__ token pasting; instead, it is mandatory to pass at
least one argument to the var-args. CUDAHooks and VariableHooks pass a nullary
struct CUDAHooksArgs/VariableHooksArgs to solve the problem. We must get rid of
token pasting because it does not work with MSVC.
- It seems MSVC is not willing to generate code for constructors of template
classes at use sites which cross DLL boundaries. So we explicitly instantiate
the class to get around the problem. This involved tweaks to the boilerplate
generating macros, and also required us to shuffle around namespaces a bit,
because you can't specialize a template unless you are in the same namespace as
the template.
- Insertion of AT_API to appropriate places where the registry must be exported
- We have a general problem which is that on recent Ubuntu distributions,
--as-needed is enabled for shared libraries, which is (cc @apaszke who was
worrying about this in #7160 see also #7160 (comment)). For now, I've hacked
this up in the PR to pass -Wl,--no-as-needed to all of the spots necessary to
make CI work, but a more sustainable solution is to attempt to dlopen
libATen_cuda.so when CUDA functionality is requested.
- The JIT tests somehow manage to try to touch CUDA without loading libATen_cuda.so. So
we pass -Wl,--no-as-needed when linking libATen_cuda.so to _C.so
- There is a very subtle linking issue with lapack, which is solved by making sure libATen_cuda.so links against LAPACK. There's a comment in aten/src/ATen/CMakeLists.txt about htis as well as a follow up bug at #7353
- autogradpp used AT_CUDA_ENABLED directly. We've expunged these uses and added
a few more things to CUDAHooks (getNumGPUs)
- Added manualSeedAll to Generator so that we can invoke it polymorphically (it
only does something different for CUDAGenerator)
- There's a new cuda/CUDAConfig.h header for CUDA-only ifdef macros (AT_CUDNN_ENABLED, most prominently)
- CUDAHooks/VariableHooks structs live in at namespace because Registry's
namespace support is not good enough to handle it otherwise (see Registry
changes above)
- There's some modest moving around of native functions in ReduceOps and
UnaryOps to get the CUDA-only function implementations into separate files, so
they are only compiled into libATen_cuda.so. sspaddmm needed a separate CUDA
function due to object linkage boundaries.
- Some direct uses of native functions in CUDA code has to go away, since these
functions are not exported, so you have to go through the dispatcher
(at::native::empty_like to at::empty_like)
- Code in THC/THCS/THCUNN now properly use THC_API macro instead of TH_API
(which matters now that TH and THC are not in the same library)
- Added code debt in torch/_thnn/utils.py and other THNN parsing code to handle
both TH_API and THC_API
- TensorUtils.h is now properly exported with AT_API
- Dead uses of TH_EXPORTS and co expunged; we now use ATen_cpu_exports and
ATen_cuda_exports (new, in ATenCUDAGeneral.h) consistently
- Fix some incorrect type annotations on _cudnn_rnn_backward, where we didn't
declare a type as possibly undefined when we should have. We didn't catch this
previously because optional annotations are not tested on "pass-through" native
ATen ops (which don't have dispatch). Upstream issue at #7316
- There's a new cmake macro aten_compile_options for applying all of our
per-target compile time options. We use this on the cpu and cuda libraries.
- test/test_cpp_extensions.py can be run directly by invoking in Python,
assuming you've setup your PYTHONPATH setup correctly
- type_from_string does some new funny business to only query for all valid CUDA
types (which causes CUDA initialization) when we see "torch.cuda." in the
requested string
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* Last mile libtorch fixes
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
* pedantic fix
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
2018-05-10 17:28:33 +00:00
|
|
|
// This field is lazily initialized from backend and scalar_type
|
|
|
|
|
type_obj.backend = static_cast<int>(backend);
|
|
|
|
|
type_obj.scalar_type = static_cast<int>(scalarType);
|
2020-04-30 18:08:33 +00:00
|
|
|
type_obj.layout = torch::getTHPLayout(layout_from_backend(backend));
|
|
|
|
|
type_obj.dtype = torch::getTHPDtype(scalarType);
|
2018-04-12 18:05:44 +00:00
|
|
|
type_obj.is_cuda = (backend == at::Backend::CUDA || backend == at::Backend::SparseCUDA);
|
2018-02-23 23:03:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void set_name(PyTensorType& type_obj, const std::string& name) {
|
|
|
|
|
size_t n = sizeof(type_obj.name);
|
|
|
|
|
strncpy(type_obj.name, name.c_str(), n);
|
|
|
|
|
type_obj.name[n - 1] = '\0';
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-03 20:29:25 +00:00
|
|
|
static THPObjectPtr get_tensor_dict() {
|
|
|
|
|
auto torch = THPObjectPtr(PyImport_ImportModule("torch"));
|
|
|
|
|
if (!torch) throw python_error();
|
2018-02-23 23:03:31 +00:00
|
|
|
|
2018-04-03 20:29:25 +00:00
|
|
|
auto tensor_class = THPObjectPtr(PyObject_GetAttrString(torch, "Tensor"));
|
|
|
|
|
if (!tensor_class) throw python_error();
|
2018-02-23 23:03:31 +00:00
|
|
|
|
2018-04-03 20:29:25 +00:00
|
|
|
auto tensor_type = (PyTypeObject*)tensor_class.get();
|
2019-05-15 14:25:43 +00:00
|
|
|
TORCH_CHECK(tensor_type->tp_base, "missing base type for Tensor");
|
2018-02-23 23:03:31 +00:00
|
|
|
|
|
|
|
|
auto res = THPObjectPtr(PyDict_New());
|
|
|
|
|
if (!res) throw python_error();
|
|
|
|
|
|
2018-04-03 20:29:25 +00:00
|
|
|
if (PyDict_Merge(res.get(), tensor_type->tp_dict, 0) < 0) {
|
2018-02-23 23:03:31 +00:00
|
|
|
throw python_error();
|
|
|
|
|
}
|
2018-04-03 20:29:25 +00:00
|
|
|
if (PyDict_Merge(res.get(), tensor_type->tp_base->tp_dict, 0) < 0) {
|
2018-02-23 23:03:31 +00:00
|
|
|
throw python_error();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-09 21:20:48 +00:00
|
|
|
// A note about the lifetime of the various PyTensorType: normally
|
|
|
|
|
// PyTypeObject instances are statically allocated, but we want to create them
|
|
|
|
|
// dynamically at init time, because their exact number depends on
|
|
|
|
|
// torch::utils::all_declared_types(). The memory for each PyTensorType is
|
|
|
|
|
// allocated by initialize_aten_types() and never freed: technically it's a
|
|
|
|
|
// leak, but it's not a problem since we want them to be alive for the whole time
|
|
|
|
|
// of the process anyway.
|
|
|
|
|
//
|
|
|
|
|
// An alternative is to use a std::vector<PyTensorType> instead, and let
|
|
|
|
|
// std::vector to manage the lifetime of its items. This is problematic
|
|
|
|
|
// though, because it means that the memory of PyTensorType is deallocated at
|
|
|
|
|
// some point during the exit: if by chance we have another global destructor
|
|
|
|
|
// and/or atexit() function which tries to access the PyTensorTypes, we risk
|
|
|
|
|
// an use-after-free error. This happens for example if we embed CPython and
|
|
|
|
|
// call Py_Finalize inside an atexit() function which was registered before
|
|
|
|
|
// importing torch.
|
|
|
|
|
static std::vector<PyTensorType*> tensor_types;
|
2018-02-23 23:03:31 +00:00
|
|
|
|
2019-06-30 11:07:46 +00:00
|
|
|
void set_default_tensor_type(PyTensorType* type) {
|
|
|
|
|
if (!at::isFloatingType(type->get_scalar_type())) {
|
|
|
|
|
throw TypeError("only floating-point types are supported as the default type");
|
|
|
|
|
}
|
|
|
|
|
if (type->get_backend() == Backend::Undefined) {
|
|
|
|
|
throw TypeError("default type cannot be undefined");
|
|
|
|
|
}
|
|
|
|
|
if (isSparse(type->get_backend())) {
|
|
|
|
|
throw TypeError("only dense types are supported as the default type");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// get the storage first, so if it doesn't exist we don't change the default tensor type
|
|
|
|
|
THPObjectPtr storage = get_storage_obj(type);
|
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
|
|
|
|
|
default_tensor_type = type;
|
|
|
|
|
at::set_default_dtype(scalarTypeToTypeMeta(type->get_scalar_type()));
|
|
|
|
|
|
|
|
|
|
auto torch_module = THPObjectPtr(PyImport_ImportModule("torch"));
|
|
|
|
|
if (!torch_module) throw python_error();
|
|
|
|
|
|
|
|
|
|
if (PyObject_SetAttrString(torch_module.get(), "Storage", storage) != 0) {
|
|
|
|
|
// technically, we should undo the change of default tensor type.
|
|
|
|
|
throw python_error();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-09 21:20:48 +00:00
|
|
|
static void initialize_aten_types(std::vector<PyTensorType*>& tensor_types) {
|
2018-02-23 23:03:31 +00:00
|
|
|
// includes CUDA types even when PyTorch is not built with CUDA
|
|
|
|
|
auto declared_types = torch::utils::all_declared_types();
|
2018-04-03 20:29:25 +00:00
|
|
|
tensor_types.resize(declared_types.size());
|
2018-02-23 23:03:31 +00:00
|
|
|
|
|
|
|
|
for (size_t i = 0, end = declared_types.size(); i != end; i++) {
|
2021-02-09 21:20:48 +00:00
|
|
|
tensor_types[i] = new PyTensorType();
|
|
|
|
|
auto& tensor_type = *tensor_types[i];
|
2018-02-23 23:03:31 +00:00
|
|
|
Backend backend = declared_types[i].first;
|
|
|
|
|
ScalarType scalar_type = declared_types[i].second;
|
|
|
|
|
set_type(tensor_type, backend, scalar_type);
|
|
|
|
|
set_name(tensor_type, get_name(backend, scalar_type));
|
2019-06-30 11:07:46 +00:00
|
|
|
|
|
|
|
|
// Use torch.float32 as the default tensor type
|
|
|
|
|
if (backend == Backend::CPU && scalar_type == at::kFloat) {
|
|
|
|
|
set_default_tensor_type(&tensor_type);
|
|
|
|
|
}
|
2018-02-23 23:03:31 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-02-27 22:58:09 +00:00
|
|
|
void initialize_python_bindings() {
|
2018-02-23 23:03:31 +00:00
|
|
|
// Initialize the at::Type* pointers, name, and properties of the PyTensorType
|
|
|
|
|
// vector. After this call, the vector must not be resized.
|
|
|
|
|
initialize_aten_types(tensor_types);
|
|
|
|
|
|
2018-04-03 20:29:25 +00:00
|
|
|
// Initialize the Python metaclass for the torch.FloatTensor, etc. types.
|
|
|
|
|
// The metaclass handles __instancecheck__ checks and binds the dtype property
|
|
|
|
|
// on the type objects.
|
2018-02-23 23:03:31 +00:00
|
|
|
py_initialize_metaclass(metaclass);
|
|
|
|
|
|
|
|
|
|
// Get the tp_dict of the Variable class. We copy function definitions
|
|
|
|
|
// onto each Tensor type object so that they can be accessed via e.g.
|
2018-04-03 20:29:25 +00:00
|
|
|
// `torch.FloatTensor.add`.
|
|
|
|
|
auto tensor_dict = get_tensor_dict();
|
2018-02-23 23:03:31 +00:00
|
|
|
|
2018-04-03 20:29:25 +00:00
|
|
|
// Initialize each Python type object torch.FloatTensor, torch.DoubleTensor, etc.
|
2018-02-23 23:03:31 +00:00
|
|
|
for (auto& tensor_type : tensor_types) {
|
2021-02-09 21:20:48 +00:00
|
|
|
py_initialize_tensor_type(tensor_type->py_type, tensor_type->name, tensor_dict.get());
|
2018-02-23 23:03:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add the type objects to their corresponding modules. e.g. torch.FloatTensor
|
|
|
|
|
// is added to the `torch` module as `FloatTensor`. Also add all the type
|
|
|
|
|
// objects to the set torch._tensor_classes.
|
|
|
|
|
py_bind_tensor_types(tensor_types);
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-09 21:20:48 +00:00
|
|
|
static void py_bind_tensor_types(const std::vector<PyTensorType*>& tensor_types) {
|
2018-02-23 23:03:31 +00:00
|
|
|
auto torch_module = THPObjectPtr(PyImport_ImportModule("torch"));
|
|
|
|
|
if (!torch_module) throw python_error();
|
|
|
|
|
|
|
|
|
|
auto tensor_classes = THPObjectPtr(PyObject_GetAttrString(torch_module.get(), "_tensor_classes"));
|
|
|
|
|
if (!tensor_classes) throw python_error();
|
|
|
|
|
|
|
|
|
|
for (auto& tensor_type : tensor_types) {
|
2021-02-09 21:20:48 +00:00
|
|
|
auto name = std::string(tensor_type->name);
|
2018-12-14 21:30:35 +00:00
|
|
|
auto idx = name.rfind('.');
|
2018-02-23 23:03:31 +00:00
|
|
|
auto type_name = name.substr(idx + 1);
|
|
|
|
|
auto module_name = name.substr(0, idx);
|
|
|
|
|
|
|
|
|
|
auto module_obj = THPObjectPtr(PyImport_ImportModule(module_name.c_str()));
|
|
|
|
|
if (!module_obj) throw python_error();
|
|
|
|
|
|
2021-02-09 21:20:48 +00:00
|
|
|
PyObject* type_obj = (PyObject*)tensor_type;
|
2018-02-23 23:03:31 +00:00
|
|
|
Py_INCREF(type_obj);
|
|
|
|
|
if (PyModule_AddObject(module_obj.get(), type_name.c_str(), type_obj) < 0) {
|
|
|
|
|
throw python_error();
|
|
|
|
|
}
|
|
|
|
|
if (PySet_Add(tensor_classes.get(), type_obj) < 0) {
|
|
|
|
|
throw python_error();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool PyTensorType_Check(PyObject* obj) {
|
|
|
|
|
auto it = std::find_if(tensor_types.begin(), tensor_types.end(),
|
2021-02-09 21:20:48 +00:00
|
|
|
[obj](PyTensorType *x) {
|
|
|
|
|
return (PyObject*)x == obj;
|
2018-02-23 23:03:31 +00:00
|
|
|
});
|
|
|
|
|
return it != tensor_types.end();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void py_set_default_tensor_type(PyObject* obj) {
|
Make PyTorch code-base clang-tidy compliant (#56892)
Summary:
This is an automatic change generated by the following script:
```
#!/usr/bin/env python3
from subprocess import check_output, check_call
import os
def get_compiled_files_list():
import json
with open("build/compile_commands.json") as f:
data = json.load(f)
files = [os.path.relpath(node['file']) for node in data]
for idx, fname in enumerate(files):
if fname.startswith('build/') and fname.endswith('.DEFAULT.cpp'):
files[idx] = fname[len('build/'):-len('.DEFAULT.cpp')]
return files
def run_clang_tidy(fname):
check_call(["python3", "tools/clang_tidy.py", "-c", "build", "-x", fname,"-s"])
changes = check_output(["git", "ls-files", "-m"])
if len(changes) == 0:
return
check_call(["git", "commit","--all", "-m", f"NOLINT stubs for {fname}"])
def main():
git_files = check_output(["git", "ls-files"]).decode("ascii").split("\n")
compiled_files = get_compiled_files_list()
for idx, fname in enumerate(git_files):
if fname not in compiled_files:
continue
if fname.startswith("caffe2/contrib/aten/"):
continue
print(f"[{idx}/{len(git_files)}] Processing {fname}")
run_clang_tidy(fname)
if __name__ == "__main__":
main()
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56892
Reviewed By: H-Huang
Differential Revision: D27991944
Pulled By: malfet
fbshipit-source-id: 5415e1eb2c1b34319a4f03024bfaa087007d7179
2021-04-28 21:09:06 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
2018-03-01 19:06:55 +00:00
|
|
|
PyTensorType *type;
|
|
|
|
|
if (PyTensorType_Check(obj)) {
|
|
|
|
|
type = (PyTensorType*)obj;
|
2018-04-16 17:49:00 +00:00
|
|
|
} else {
|
|
|
|
|
throw TypeError("invalid type object");
|
|
|
|
|
}
|
2019-06-30 11:07:46 +00:00
|
|
|
if (type->is_cuda && !torch::utils::cuda_enabled()) {
|
2018-04-16 17:49:00 +00:00
|
|
|
throw unavailable_type(*type);
|
|
|
|
|
}
|
2019-06-30 11:07:46 +00:00
|
|
|
set_default_tensor_type(type);
|
2018-04-16 17:49:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void py_set_default_dtype(PyObject* obj) {
|
|
|
|
|
if (THPDtype_Check(obj)) {
|
2019-06-30 11:07:46 +00:00
|
|
|
auto scalar_type = ((THPDtype*)obj)->scalar_type;
|
|
|
|
|
auto backend = default_tensor_type->get_backend();
|
|
|
|
|
auto it = std::find_if(tensor_types.begin(), tensor_types.end(),
|
2021-02-09 21:20:48 +00:00
|
|
|
[backend, scalar_type](PyTensorType *x) {
|
|
|
|
|
return x->get_backend() == backend && x->get_scalar_type() == scalar_type;
|
2019-06-30 11:07:46 +00:00
|
|
|
});
|
2021-02-09 21:20:48 +00:00
|
|
|
set_default_tensor_type(*it);
|
2018-03-01 19:06:55 +00:00
|
|
|
} else {
|
2019-06-30 11:07:46 +00:00
|
|
|
throw TypeError("invalid dtype object");
|
2018-02-23 23:03:31 +00:00
|
|
|
}
|
2018-04-03 20:29:25 +00:00
|
|
|
}
|
2018-03-01 19:06:55 +00:00
|
|
|
|
2020-01-15 19:12:17 +00:00
|
|
|
c10::DispatchKey get_default_dispatch_key() {
|
2018-07-25 00:59:44 +00:00
|
|
|
AT_ASSERT(default_tensor_type);
|
2020-01-15 19:12:17 +00:00
|
|
|
return default_tensor_type->get_dispatch_key();
|
2018-02-23 23:03:31 +00:00
|
|
|
}
|
2019-04-04 09:21:09 +00:00
|
|
|
|
|
|
|
|
ScalarType get_default_scalar_type() {
|
|
|
|
|
return typeMetaToScalarType(get_default_dtype());
|
|
|
|
|
}
|
2019-06-30 11:07:46 +00:00
|
|
|
|
2018-06-26 04:11:49 +00:00
|
|
|
}} // namespace torch::tensors
|