Canonicalize all includes in PyTorch. (#14849)
Summary:
Anywhere we used #include "foo.h", we now say #include <foo.h>
Paths are adjusted to be rooted out of aten/src, torch/lib, or
the root level directory.
I modified CMakeLists.txt by hand to remove TH and THC from
the include paths.
I used the following script to do the canonicalization:
```
import subprocess
import re
import os.path
files = subprocess.check_output(['git', 'ls-files']).decode('utf-8').rstrip().split('\n')
for fn in files:
if not any(fn.endswith(suff) for suff in ['.cu', '.cpp', '.in', '.h', '.hpp', '.cu', '.cuh', '.cc']):
continue
if not any(fn.startswith(pref) for pref in ["aten/", "torch/"]):
continue
with open(fn, 'r') as f:
c = f.read()
def fmt(p):
return "#include <{}>".format(p)
def repl(m):
p = m.group(1)
if p in ["dlfcn.h", "unistd.h", "nvrtc.h", "cuda.h", "cuda_runtime.h", "cstdint", "cudnn.h", "Python.h", "cusparse.h", "cuda_runtime_api.h", "cuda_fp16.h", "cublas_v2.h", "stdint.h", "curand_kernel.h"]:
return fmt(p)
if any(p.startswith(pref) for pref in ["torch/csrc", "c10/", "ATen/", "caffe2/", "TH/", "THC/", "Eigen/", "gtest/", "zdl/", "gloo/", "onnx/", "miopen/"]):
return fmt(p)
for root in ["aten/src", "torch/lib", ""]:
for bad_root in [os.path.dirname(fn), "aten/src/TH", "aten/src/THC", "torch/csrc"]:
new_p = os.path.relpath(os.path.join(bad_root, p), root)
if not new_p.startswith("../") and (os.path.exists(os.path.join(root, new_p)) or os.path.exists(os.path.join(root, new_p + ".in"))):
return fmt(new_p)
print("ERROR: ", fn, p)
return m.group(0)
new_c = re.sub(r'#include "([^"]+)"', repl, c)
if new_c != c:
print(fn)
with open(fn, 'w') as f:
f.write(new_c)
```
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14849
Reviewed By: dzhulgakov
Differential Revision: D13363445
Pulled By: ezyang
fbshipit-source-id: 52361f878a672785f9306c9e9ab2513128092b68
2018-12-09 03:32:01 +00:00
|
|
|
#include <torch/csrc/python_headers.h>
|
2024-06-14 17:31:21 +00:00
|
|
|
#include <vector>
|
2016-08-23 02:11:50 +00:00
|
|
|
|
2022-11-30 20:38:02 +00:00
|
|
|
#include <ATen/ops/from_blob.h>
|
2022-06-01 19:00:58 +00:00
|
|
|
#include <c10/core/CPUAllocator.h>
|
2024-11-19 04:24:14 +00:00
|
|
|
#include <c10/util/error.h>
|
Canonicalize all includes in PyTorch. (#14849)
Summary:
Anywhere we used #include "foo.h", we now say #include <foo.h>
Paths are adjusted to be rooted out of aten/src, torch/lib, or
the root level directory.
I modified CMakeLists.txt by hand to remove TH and THC from
the include paths.
I used the following script to do the canonicalization:
```
import subprocess
import re
import os.path
files = subprocess.check_output(['git', 'ls-files']).decode('utf-8').rstrip().split('\n')
for fn in files:
if not any(fn.endswith(suff) for suff in ['.cu', '.cpp', '.in', '.h', '.hpp', '.cu', '.cuh', '.cc']):
continue
if not any(fn.startswith(pref) for pref in ["aten/", "torch/"]):
continue
with open(fn, 'r') as f:
c = f.read()
def fmt(p):
return "#include <{}>".format(p)
def repl(m):
p = m.group(1)
if p in ["dlfcn.h", "unistd.h", "nvrtc.h", "cuda.h", "cuda_runtime.h", "cstdint", "cudnn.h", "Python.h", "cusparse.h", "cuda_runtime_api.h", "cuda_fp16.h", "cublas_v2.h", "stdint.h", "curand_kernel.h"]:
return fmt(p)
if any(p.startswith(pref) for pref in ["torch/csrc", "c10/", "ATen/", "caffe2/", "TH/", "THC/", "Eigen/", "gtest/", "zdl/", "gloo/", "onnx/", "miopen/"]):
return fmt(p)
for root in ["aten/src", "torch/lib", ""]:
for bad_root in [os.path.dirname(fn), "aten/src/TH", "aten/src/THC", "torch/csrc"]:
new_p = os.path.relpath(os.path.join(bad_root, p), root)
if not new_p.startswith("../") and (os.path.exists(os.path.join(root, new_p)) or os.path.exists(os.path.join(root, new_p + ".in"))):
return fmt(new_p)
print("ERROR: ", fn, p)
return m.group(0)
new_c = re.sub(r'#include "([^"]+)"', repl, c)
if new_c != c:
print(fn)
with open(fn, 'w') as f:
f.write(new_c)
```
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14849
Reviewed By: dzhulgakov
Differential Revision: D13363445
Pulled By: ezyang
fbshipit-source-id: 52361f878a672785f9306c9e9ab2513128092b68
2018-12-09 03:32:01 +00:00
|
|
|
#include <torch/csrc/THP.h>
|
|
|
|
|
#include <torch/csrc/serialization.h>
|
2018-03-09 03:18:55 +00:00
|
|
|
|
2018-09-28 02:01:29 +00:00
|
|
|
template <class io>
|
2022-01-18 20:37:16 +00:00
|
|
|
Py_ssize_t doPartialRead(io fildes, void* buf, size_t nbytes);
|
2018-09-28 02:01:29 +00:00
|
|
|
|
|
|
|
|
template <class io>
|
2022-01-18 20:37:16 +00:00
|
|
|
Py_ssize_t doPartialWrite(io fildes, void* buf, size_t nbytes);
|
2018-09-28 02:01:29 +00:00
|
|
|
|
2022-01-18 20:37:16 +00:00
|
|
|
static Py_ssize_t doPartialPythonReadBuffered(
|
|
|
|
|
PyObject* fildes,
|
|
|
|
|
void* buf,
|
|
|
|
|
size_t nbytes);
|
|
|
|
|
static Py_ssize_t doPartialPythonReadInto(
|
|
|
|
|
PyObject* fildes,
|
|
|
|
|
void* buf,
|
|
|
|
|
size_t nbytes);
|
|
|
|
|
static Py_ssize_t doPartialPythonWrite(
|
|
|
|
|
PyObject* fildes,
|
|
|
|
|
void* buf,
|
|
|
|
|
size_t nbytes);
|
2018-03-09 03:18:55 +00:00
|
|
|
|
|
|
|
|
template <>
|
2022-01-18 20:37:16 +00:00
|
|
|
Py_ssize_t doPartialRead<int>(int fildes, void* buf, size_t nbytes) {
|
2018-03-09 03:18:55 +00:00
|
|
|
return read(fildes, buf, nbytes);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
2022-01-18 20:37:16 +00:00
|
|
|
Py_ssize_t doPartialRead<PyObject*>(
|
|
|
|
|
PyObject* fildes,
|
|
|
|
|
void* buf,
|
|
|
|
|
size_t nbytes) {
|
2018-03-09 03:18:55 +00:00
|
|
|
// Try to use fildes.readinto() instead of fildes.read()
|
|
|
|
|
// because it is more memory efficient.
|
2018-09-28 02:01:29 +00:00
|
|
|
// TODO: Stop calling PyObject_HasAttrString() in a loop on our read loop
|
2018-03-09 03:18:55 +00:00
|
|
|
auto has_readinto = PyObject_HasAttrString(fildes, "readinto") == 1;
|
|
|
|
|
if (has_readinto) {
|
2018-09-28 02:01:29 +00:00
|
|
|
return doPartialPythonReadInto(fildes, buf, nbytes);
|
2018-03-09 03:18:55 +00:00
|
|
|
}
|
2018-09-28 02:01:29 +00:00
|
|
|
return doPartialPythonReadBuffered(fildes, buf, nbytes);
|
2018-03-09 03:18:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
2022-01-18 20:37:16 +00:00
|
|
|
Py_ssize_t doPartialWrite<int>(int fildes, void* buf, size_t nbytes) {
|
2018-03-09 03:18:55 +00:00
|
|
|
return write(fildes, buf, nbytes);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
2022-01-18 20:37:16 +00:00
|
|
|
Py_ssize_t doPartialWrite<PyObject*>(
|
|
|
|
|
PyObject* fildes,
|
|
|
|
|
void* buf,
|
|
|
|
|
size_t nbytes) {
|
2018-09-28 02:01:29 +00:00
|
|
|
return doPartialPythonWrite(fildes, buf, nbytes);
|
2018-03-09 03:18:55 +00:00
|
|
|
}
|
|
|
|
|
|
2024-11-07 23:58:18 +00:00
|
|
|
static bool isUnsupportedOperation() {
|
2018-03-09 03:18:55 +00:00
|
|
|
THPObjectPtr io(PyImport_ImportModule("io"));
|
|
|
|
|
if (!io)
|
|
|
|
|
throw python_error();
|
|
|
|
|
THPObjectPtr exception(PyObject_GetAttrString(io, "UnsupportedOperation"));
|
2018-12-07 20:22:49 +00:00
|
|
|
if (!exception)
|
|
|
|
|
throw python_error();
|
2018-03-09 03:18:55 +00:00
|
|
|
return PyErr_ExceptionMatches(exception.get());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Call Python fildes.read(nbytes) and copy it to buf.
|
2024-11-07 23:58:18 +00:00
|
|
|
static Py_ssize_t doPartialPythonReadBuffered(
|
2022-01-18 20:37:16 +00:00
|
|
|
PyObject* fildes,
|
|
|
|
|
void* buf,
|
|
|
|
|
size_t raw_nbytes) {
|
2018-09-28 02:01:29 +00:00
|
|
|
// If we request a large amount of data, f.read() will internally try to
|
|
|
|
|
// allocate a buffer of that size. This is counterproductive, because
|
|
|
|
|
// it's not the buffer we ultimately want to write the data into. Read
|
|
|
|
|
// less than that and avoid allocating too much extra memory.
|
|
|
|
|
// TODO: Maybe 260 KB is a bit small...
|
|
|
|
|
const size_t nbytes = std::min<size_t>(raw_nbytes, 262144u); // 2^18 (~260 KB)
|
|
|
|
|
|
|
|
|
|
THPObjectPtr r(PyObject_CallMethod(fildes, "read", "i", nbytes));
|
|
|
|
|
if (!r)
|
|
|
|
|
throw python_error();
|
|
|
|
|
|
|
|
|
|
auto size = PyBytes_GET_SIZE(r.get());
|
|
|
|
|
const void* py_buf = PyBytes_AsString(r.get());
|
2018-03-09 03:18:55 +00:00
|
|
|
|
2018-09-28 02:01:29 +00:00
|
|
|
// we read EOF
|
|
|
|
|
if (size == 0) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2018-03-09 03:18:55 +00:00
|
|
|
|
2018-09-28 02:01:29 +00:00
|
|
|
// Slurp it into the buffer we actually want
|
|
|
|
|
memcpy(buf, py_buf, size);
|
2018-03-09 03:18:55 +00:00
|
|
|
|
2018-09-28 02:01:29 +00:00
|
|
|
return size;
|
2018-03-09 03:18:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Either does fildes.readinto(buf) or fildes.write(buf)
|
2024-11-07 23:58:18 +00:00
|
|
|
static Py_ssize_t doPartialPythonIO(
|
2022-01-18 20:37:16 +00:00
|
|
|
PyObject* fildes,
|
|
|
|
|
void* buf,
|
|
|
|
|
size_t nbytes,
|
|
|
|
|
bool is_read) {
|
2018-03-09 03:18:55 +00:00
|
|
|
auto rw_flag = is_read ? PyBUF_WRITE : PyBUF_READ;
|
2023-08-31 06:47:42 +00:00
|
|
|
THPObjectPtr memview(PyMemoryView_FromMemory(
|
|
|
|
|
reinterpret_cast<char*>(buf), static_cast<Py_ssize_t>(nbytes), rw_flag));
|
2018-03-09 03:18:55 +00:00
|
|
|
if (!memview)
|
|
|
|
|
throw python_error();
|
|
|
|
|
|
2021-06-15 20:13:43 +00:00
|
|
|
std::string method = "write";
|
2018-03-09 03:18:55 +00:00
|
|
|
if (is_read) {
|
|
|
|
|
method = "readinto";
|
|
|
|
|
}
|
2021-06-15 20:13:43 +00:00
|
|
|
THPObjectPtr r(
|
|
|
|
|
PyObject_CallMethod(fildes, method.c_str(), "O", memview.get()));
|
2018-03-09 03:18:55 +00:00
|
|
|
if (r) {
|
|
|
|
|
return PyLong_AsSsize_t(r.get());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// fildes.readinto can return UnsupportedOperation so fall back to
|
|
|
|
|
// fildes.read.
|
|
|
|
|
if (is_read && isUnsupportedOperation()) {
|
|
|
|
|
PyErr_Clear();
|
2018-09-28 02:01:29 +00:00
|
|
|
return doPartialPythonReadBuffered(fildes, buf, nbytes);
|
2018-03-09 03:18:55 +00:00
|
|
|
}
|
|
|
|
|
throw python_error();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Call Python fildes.readinto(buf)
|
2022-01-18 20:37:16 +00:00
|
|
|
static Py_ssize_t doPartialPythonReadInto(
|
|
|
|
|
PyObject* fildes,
|
|
|
|
|
void* buf,
|
|
|
|
|
size_t nbytes) {
|
2018-09-28 02:01:29 +00:00
|
|
|
return doPartialPythonIO(fildes, buf, nbytes, /* is_read */ true);
|
2018-03-09 03:18:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Call Python fildes.write(buf)
|
2022-01-18 20:37:16 +00:00
|
|
|
static Py_ssize_t doPartialPythonWrite(
|
|
|
|
|
PyObject* fildes,
|
|
|
|
|
void* buf,
|
|
|
|
|
size_t nbytes) {
|
2018-09-28 02:01:29 +00:00
|
|
|
return doPartialPythonIO(fildes, buf, nbytes, /* is_read */ false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Requires that we read EXACTLY nbytes; fails if we don't.
|
|
|
|
|
template <typename io>
|
|
|
|
|
void doRead(io fildes, void* raw_buf, size_t nbytes) {
|
|
|
|
|
char* buf = static_cast<char*>(raw_buf);
|
|
|
|
|
while (nbytes > 0) {
|
|
|
|
|
errno = 0; // doPartialRead may not set errno
|
|
|
|
|
// we read in 1GB blocks to avoid bugs on Mac OS X Lion
|
|
|
|
|
// see https://github.com/pytorch/pytorch/issues/1031 for more details
|
2022-01-18 20:37:16 +00:00
|
|
|
Py_ssize_t r =
|
|
|
|
|
doPartialRead(fildes, buf, std::min<size_t>(nbytes, 1073741824));
|
2018-09-28 02:01:29 +00:00
|
|
|
if (r < 0) {
|
|
|
|
|
int err = errno;
|
2021-02-22 19:23:27 +00:00
|
|
|
TORCH_INTERNAL_ASSERT(
|
|
|
|
|
err != 0, "read(): impossible! r < 0, but no errno was set");
|
|
|
|
|
TORCH_INTERNAL_ASSERT(
|
|
|
|
|
err != EAGAIN,
|
|
|
|
|
"read(): non-blocking fd ",
|
|
|
|
|
fildes,
|
2018-09-28 02:01:29 +00:00
|
|
|
" read EAGAIN; cowardly refusing to spin-wait");
|
|
|
|
|
if (err == EINTR) {
|
|
|
|
|
continue;
|
|
|
|
|
} else {
|
2024-10-21 06:55:07 +00:00
|
|
|
TORCH_CHECK(
|
2024-11-19 04:24:14 +00:00
|
|
|
false,
|
|
|
|
|
"read(): fd ",
|
|
|
|
|
fildes,
|
|
|
|
|
" failed with ",
|
|
|
|
|
c10::utils::str_error(err));
|
2018-09-28 02:01:29 +00:00
|
|
|
}
|
|
|
|
|
} else if (r == 0) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
buf += r;
|
|
|
|
|
// This is guaranteed by POSIX, but I just want to be double-sure
|
|
|
|
|
// to not underflow a signed integer.
|
|
|
|
|
AT_ASSERT(static_cast<size_t>(r) <= nbytes);
|
|
|
|
|
nbytes -= r;
|
|
|
|
|
}
|
|
|
|
|
if (nbytes != 0) {
|
2024-10-21 06:55:07 +00:00
|
|
|
TORCH_CHECK(
|
|
|
|
|
false,
|
2018-09-28 02:01:29 +00:00
|
|
|
"unexpected EOF, expected ",
|
|
|
|
|
nbytes,
|
|
|
|
|
" more bytes. The file might be corrupted.");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename io>
|
|
|
|
|
void doWrite(io fildes, void* raw_buf, size_t nbytes) {
|
|
|
|
|
char* buf = static_cast<char*>(raw_buf);
|
|
|
|
|
while (nbytes > 0) {
|
|
|
|
|
errno = 0; // doPartialWrite may not set errno
|
|
|
|
|
// we write in 1GB blocks to avoid bugs on Mac OS X Lion
|
|
|
|
|
// see https://github.com/pytorch/pytorch/issues/1031 for more details
|
2022-01-18 20:37:16 +00:00
|
|
|
Py_ssize_t r =
|
|
|
|
|
doPartialWrite(fildes, buf, std::min<size_t>(nbytes, 1073741824));
|
2018-09-28 02:01:29 +00:00
|
|
|
if (r < 0) {
|
|
|
|
|
int err = errno;
|
2021-02-22 19:23:27 +00:00
|
|
|
TORCH_INTERNAL_ASSERT(
|
|
|
|
|
err != 0, "write(): impossible! r < 0, but no errno was set");
|
|
|
|
|
TORCH_INTERNAL_ASSERT(
|
|
|
|
|
err != EAGAIN,
|
|
|
|
|
"write(): non-blocking fd ",
|
|
|
|
|
fildes,
|
2018-09-28 02:01:29 +00:00
|
|
|
" read EAGAIN; cowardly refusing to spin-wait");
|
|
|
|
|
if (err == EINTR) {
|
|
|
|
|
continue;
|
|
|
|
|
} else {
|
2024-10-21 06:55:07 +00:00
|
|
|
TORCH_CHECK(
|
2024-11-19 04:24:14 +00:00
|
|
|
false,
|
|
|
|
|
"write(): fd ",
|
|
|
|
|
fildes,
|
|
|
|
|
" failed with ",
|
|
|
|
|
c10::utils::str_error(err));
|
2018-09-28 02:01:29 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
buf += r;
|
|
|
|
|
AT_ASSERT(static_cast<size_t>(r) <= nbytes);
|
|
|
|
|
nbytes -= r;
|
|
|
|
|
}
|
2018-03-09 03:18:55 +00:00
|
|
|
}
|
2016-08-23 02:11:50 +00:00
|
|
|
|
2022-06-01 19:00:58 +00:00
|
|
|
// save_save is necessary since the old eager format saved storages as
|
|
|
|
|
// [size + data], but the v1.5 eager format removes this since size is saved in
|
|
|
|
|
// the filesize.
|
|
|
|
|
template <class io>
|
|
|
|
|
void THPStorage_writeFileRaw(
|
|
|
|
|
c10::StorageImpl* self,
|
|
|
|
|
io fd,
|
|
|
|
|
bool save_size,
|
|
|
|
|
uint64_t element_size) {
|
|
|
|
|
c10::DeviceGuard guard(self->device());
|
2023-01-24 08:19:00 +00:00
|
|
|
uint8_t* data{};
|
2022-11-30 20:38:02 +00:00
|
|
|
at::Tensor cpu_tensor;
|
2023-08-31 06:47:42 +00:00
|
|
|
size_t size_bytes = self->nbytes();
|
|
|
|
|
size_t numel = size_bytes / element_size;
|
2022-06-01 19:00:58 +00:00
|
|
|
if (self->device_type() == at::kCPU) {
|
2023-03-31 07:54:56 +00:00
|
|
|
// We are using a mutable pointer here because we're ultimately
|
|
|
|
|
// calling into a Python API that requires that, even though it
|
|
|
|
|
// won't mutate the data.
|
2023-04-04 10:48:12 +00:00
|
|
|
data = static_cast<uint8_t*>(self->mutable_data());
|
2022-06-01 19:00:58 +00:00
|
|
|
} else {
|
2022-11-30 20:38:02 +00:00
|
|
|
// Here we use a tensor.to() to impl D2H for all non-CPU device.
|
|
|
|
|
auto device_tensor = at::from_blob(
|
2023-04-04 10:48:12 +00:00
|
|
|
self->mutable_data(),
|
2023-08-31 06:47:42 +00:00
|
|
|
{static_cast<int64_t>(size_bytes)},
|
2022-11-30 20:38:02 +00:00
|
|
|
{1},
|
2023-08-31 06:47:42 +00:00
|
|
|
nullptr,
|
2022-11-30 20:38:02 +00:00
|
|
|
at::device(self->device()).dtype(c10::kByte),
|
|
|
|
|
{self->device()});
|
|
|
|
|
cpu_tensor = device_tensor.to(at::kCPU);
|
|
|
|
|
data = (uint8_t*)cpu_tensor.data_ptr();
|
2022-06-01 19:00:58 +00:00
|
|
|
}
|
|
|
|
|
if (save_size) {
|
|
|
|
|
if (torch::utils::THP_nativeByteOrder() ==
|
|
|
|
|
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN)
|
|
|
|
|
doWrite(fd, &numel, sizeof(int64_t));
|
|
|
|
|
else {
|
2023-01-24 08:19:00 +00:00
|
|
|
int64_t nsize{}; // convert big endian cpu to little endian storage
|
2024-09-10 08:31:26 +00:00
|
|
|
torch::utils::THP_encodeBuffer(
|
2022-06-01 19:00:58 +00:00
|
|
|
(uint8_t*)&nsize,
|
|
|
|
|
(const int64_t*)&numel,
|
|
|
|
|
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
|
|
|
|
|
1);
|
|
|
|
|
doWrite(fd, &nsize, sizeof(int64_t));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// fast track for bytes and little endian
|
|
|
|
|
if (element_size == 1 ||
|
|
|
|
|
torch::utils::THP_nativeByteOrder() ==
|
|
|
|
|
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN) {
|
|
|
|
|
doWrite(fd, data, size_bytes);
|
|
|
|
|
} else {
|
2023-08-31 06:47:42 +00:00
|
|
|
size_t buffer_size = std::min(numel, (size_t)5000);
|
2024-06-14 17:31:21 +00:00
|
|
|
std::vector<uint8_t> le_buffer;
|
|
|
|
|
le_buffer.resize(buffer_size * element_size);
|
2023-08-31 06:47:42 +00:00
|
|
|
for (size_t i = 0; i < numel; i += buffer_size) {
|
2022-06-01 19:00:58 +00:00
|
|
|
size_t to_convert = std::min(numel - i, buffer_size);
|
|
|
|
|
if (element_size == 2) {
|
2024-09-10 08:31:26 +00:00
|
|
|
torch::utils::THP_encodeBuffer(
|
2024-06-14 17:31:21 +00:00
|
|
|
le_buffer.data(),
|
2022-06-01 19:00:58 +00:00
|
|
|
(const int16_t*)data + i,
|
|
|
|
|
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
|
|
|
|
|
to_convert);
|
|
|
|
|
} else if (element_size == 4) {
|
2024-09-10 08:31:26 +00:00
|
|
|
torch::utils::THP_encodeBuffer(
|
2024-06-14 17:31:21 +00:00
|
|
|
le_buffer.data(),
|
2022-06-01 19:00:58 +00:00
|
|
|
(const int32_t*)data + i,
|
|
|
|
|
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
|
|
|
|
|
to_convert);
|
|
|
|
|
} else if (element_size == 8) {
|
2024-09-10 08:31:26 +00:00
|
|
|
torch::utils::THP_encodeBuffer(
|
2024-06-14 17:31:21 +00:00
|
|
|
le_buffer.data(),
|
2022-06-01 19:00:58 +00:00
|
|
|
(const int64_t*)data + i,
|
|
|
|
|
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN,
|
|
|
|
|
to_convert);
|
|
|
|
|
}
|
2024-06-14 17:31:21 +00:00
|
|
|
doWrite(fd, le_buffer.data(), to_convert * element_size);
|
2022-06-01 19:00:58 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template void THPStorage_writeFileRaw<int>(
|
|
|
|
|
c10::StorageImpl* self,
|
|
|
|
|
int fd,
|
|
|
|
|
bool save_size,
|
|
|
|
|
uint64_t element_size);
|
|
|
|
|
template void THPStorage_writeFileRaw<PyObject*>(
|
|
|
|
|
c10::StorageImpl* self,
|
|
|
|
|
PyObject* fd,
|
|
|
|
|
bool save_size,
|
|
|
|
|
uint64_t element_size);
|
|
|
|
|
|
|
|
|
|
template <class io>
|
|
|
|
|
c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw(
|
|
|
|
|
io file,
|
|
|
|
|
c10::intrusive_ptr<c10::StorageImpl> storage,
|
|
|
|
|
uint64_t element_size) {
|
|
|
|
|
c10::OptionalDeviceGuard guard;
|
|
|
|
|
if (storage.defined()) {
|
|
|
|
|
guard.reset_device(storage->device());
|
|
|
|
|
}
|
2023-01-24 08:19:00 +00:00
|
|
|
int64_t size{};
|
2022-06-01 19:00:58 +00:00
|
|
|
doRead(file, &size, sizeof(int64_t));
|
|
|
|
|
if (torch::utils::THP_nativeByteOrder() ==
|
|
|
|
|
torch::utils::THPByteOrder::THP_BIG_ENDIAN) {
|
2023-01-24 08:19:00 +00:00
|
|
|
int64_t tsize = size; // convert little endian storage to big endian cpu
|
2024-09-10 08:31:26 +00:00
|
|
|
torch::utils::THP_decodeBuffer(&size, (const uint8_t*)&tsize, true, 1);
|
2022-06-01 19:00:58 +00:00
|
|
|
}
|
2023-08-31 06:47:42 +00:00
|
|
|
size_t nbytes = element_size * size;
|
2022-06-01 19:00:58 +00:00
|
|
|
if (!storage.defined()) {
|
|
|
|
|
storage = c10::make_intrusive<at::StorageImpl>(
|
|
|
|
|
c10::StorageImpl::use_byte_size_t(),
|
|
|
|
|
nbytes,
|
|
|
|
|
c10::GetDefaultCPUAllocator(),
|
|
|
|
|
/*resizable=*/true);
|
|
|
|
|
} else {
|
2023-08-31 06:47:42 +00:00
|
|
|
size_t _storage_nbytes = storage->nbytes();
|
2022-06-01 19:00:58 +00:00
|
|
|
TORCH_CHECK(
|
|
|
|
|
_storage_nbytes == nbytes,
|
|
|
|
|
"storage has wrong byte size: expected %ld got %ld",
|
|
|
|
|
nbytes,
|
|
|
|
|
_storage_nbytes);
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-31 06:47:42 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
|
2022-06-01 19:00:58 +00:00
|
|
|
std::unique_ptr<char[]> cpu_data;
|
|
|
|
|
|
2023-01-24 08:19:00 +00:00
|
|
|
uint8_t* data{};
|
2022-06-01 19:00:58 +00:00
|
|
|
if (storage->device_type() == at::kCPU) {
|
2023-04-04 10:48:12 +00:00
|
|
|
data = static_cast<uint8_t*>(storage->mutable_data());
|
2022-06-01 19:00:58 +00:00
|
|
|
} else {
|
2023-08-31 06:47:42 +00:00
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
|
2022-06-01 19:00:58 +00:00
|
|
|
cpu_data = std::unique_ptr<char[]>(new char[nbytes]);
|
|
|
|
|
data = (uint8_t*)cpu_data.get();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// fast track for bytes and little endian
|
|
|
|
|
if (element_size == 1 ||
|
|
|
|
|
torch::utils::THP_nativeByteOrder() ==
|
|
|
|
|
torch::utils::THPByteOrder::THP_LITTLE_ENDIAN) {
|
|
|
|
|
doRead(file, data, storage->nbytes());
|
|
|
|
|
} else {
|
|
|
|
|
int64_t buffer_size = std::min(size, (int64_t)5000);
|
|
|
|
|
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
|
|
|
|
|
std::unique_ptr<uint8_t[]> le_buffer(
|
|
|
|
|
new uint8_t[buffer_size * element_size]);
|
|
|
|
|
|
|
|
|
|
for (int64_t i = 0; i < size; i += buffer_size) {
|
|
|
|
|
size_t to_convert = std::min(size - i, buffer_size);
|
|
|
|
|
doRead(file, le_buffer.get(), element_size * to_convert);
|
|
|
|
|
|
|
|
|
|
// NOLINTNEXTLINE(bugprone-branch-clone)
|
|
|
|
|
if (element_size == 2) {
|
2024-09-10 08:31:26 +00:00
|
|
|
torch::utils::THP_decodeBuffer(
|
2023-04-26 21:44:07 +00:00
|
|
|
(int16_t*)data + i, le_buffer.get(), true, to_convert);
|
2022-06-01 19:00:58 +00:00
|
|
|
} else if (element_size == 4) {
|
2024-09-10 08:31:26 +00:00
|
|
|
torch::utils::THP_decodeBuffer(
|
2023-04-26 21:44:07 +00:00
|
|
|
(int32_t*)data + i, le_buffer.get(), true, to_convert);
|
2022-06-01 19:00:58 +00:00
|
|
|
} else if (element_size == 8) {
|
2024-09-10 08:31:26 +00:00
|
|
|
torch::utils::THP_decodeBuffer(
|
2023-04-26 21:44:07 +00:00
|
|
|
(int64_t*)data + i, le_buffer.get(), true, to_convert);
|
2022-06-01 19:00:58 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-30 20:38:02 +00:00
|
|
|
if (storage->device_type() != at::kCPU) {
|
|
|
|
|
// Here we use a tensor.copy_() to impl H2D for all non-CPU device.
|
|
|
|
|
auto cpu_tensor = at::from_blob(
|
2023-08-31 06:47:42 +00:00
|
|
|
(void*)data,
|
|
|
|
|
{static_cast<int64_t>(nbytes)},
|
|
|
|
|
at::device(at::kCPU).dtype(c10::kByte));
|
2022-11-30 20:38:02 +00:00
|
|
|
auto device_tensor = at::from_blob(
|
2023-04-04 10:48:12 +00:00
|
|
|
storage->mutable_data(),
|
2023-08-31 06:47:42 +00:00
|
|
|
{static_cast<int64_t>(nbytes)},
|
2022-11-30 20:38:02 +00:00
|
|
|
{1},
|
2023-08-31 06:47:42 +00:00
|
|
|
nullptr,
|
2022-11-30 20:38:02 +00:00
|
|
|
at::device(storage->device()).dtype(c10::kByte),
|
|
|
|
|
{storage->device()});
|
|
|
|
|
device_tensor.copy_(cpu_tensor);
|
2022-06-01 19:00:58 +00:00
|
|
|
}
|
|
|
|
|
return storage;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw<int>(
|
|
|
|
|
int fd,
|
|
|
|
|
c10::intrusive_ptr<c10::StorageImpl> storage,
|
|
|
|
|
uint64_t element_size);
|
|
|
|
|
template c10::intrusive_ptr<c10::StorageImpl> THPStorage_readFileRaw<PyObject*>(
|
|
|
|
|
PyObject* fd,
|
|
|
|
|
c10::intrusive_ptr<c10::StorageImpl> storage,
|
|
|
|
|
uint64_t element_size);
|