mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
extract out //c10/core:alloc_cpu (#70859)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/70859 ghstack-source-id: 147642534 Test Plan: Extracting code unmodified to a new library: relying on CI to validate. Reviewed By: malfet Differential Revision: D33329688 fbshipit-source-id: f60327467d197ec1862fb3554f8b83e6c84cab5c (cherry picked from commit f82e7c0e9beba1113defe6d55cf8a232551e913b)
This commit is contained in:
parent
fc6a488e9a
commit
844a4b47df
10 changed files with 147 additions and 103 deletions
|
|
@ -77,6 +77,7 @@ cc_library(
|
|||
deps = [
|
||||
":headers",
|
||||
"//c10/core:ScalarType",
|
||||
"//c10/core:alloc_cpu",
|
||||
"//c10/core:base",
|
||||
"//c10/util:TypeCast",
|
||||
"//c10/util:base",
|
||||
|
|
|
|||
|
|
@ -1,106 +1,18 @@
|
|||
#include <c10/core/CPUAllocator.h>
|
||||
#include <c10/core/DeviceType.h>
|
||||
#include <c10/core/alignment.h>
|
||||
#include <c10/core/impl/alloc_cpu.h>
|
||||
#include <c10/mobile/CPUCachingAllocator.h>
|
||||
#include <c10/mobile/CPUProfilingAllocator.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
// TODO: rename flags to C10
|
||||
// TODO: rename flag to C10
|
||||
C10_DEFINE_bool(
|
||||
caffe2_report_cpu_memory_usage,
|
||||
false,
|
||||
"If set, print out detailed memory usage");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
caffe2_cpu_allocator_do_zero_fill,
|
||||
false,
|
||||
"If set, do memory zerofilling when allocating on CPU");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
caffe2_cpu_allocator_do_junk_fill,
|
||||
false,
|
||||
"If set, fill memory with deterministic junk when allocating on CPU");
|
||||
|
||||
namespace c10 {
|
||||
|
||||
void memset_junk(void* data, size_t num) {
|
||||
// This garbage pattern is NaN when interpreted as floating point values,
|
||||
// or as very large integer values.
|
||||
static constexpr int32_t kJunkPattern = 0x7fedbeef;
|
||||
static constexpr int64_t kJunkPattern64 =
|
||||
static_cast<int64_t>(kJunkPattern) << 32 | kJunkPattern;
|
||||
int32_t int64_count = num / sizeof(kJunkPattern64);
|
||||
int32_t remaining_bytes = num % sizeof(kJunkPattern64);
|
||||
int64_t* data_i64 = reinterpret_cast<int64_t*>(data);
|
||||
for (const auto i : c10::irange(int64_count)) {
|
||||
data_i64[i] = kJunkPattern64;
|
||||
}
|
||||
if (remaining_bytes > 0) {
|
||||
memcpy(data_i64 + int64_count, &kJunkPattern64, remaining_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
void* alloc_cpu(size_t nbytes) {
|
||||
if (nbytes == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
// We might have clowny upstream code that tries to alloc a negative number
|
||||
// of bytes. Let's catch it early.
|
||||
CAFFE_ENFORCE(
|
||||
((ptrdiff_t)nbytes) >= 0,
|
||||
"alloc_cpu() seems to have been called with negative number: ",
|
||||
nbytes);
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
void* data;
|
||||
#ifdef __ANDROID__
|
||||
data = memalign(gAlignment, nbytes);
|
||||
#elif defined(_MSC_VER)
|
||||
data = _aligned_malloc(nbytes, gAlignment);
|
||||
#else
|
||||
int err = posix_memalign(&data, gAlignment, nbytes);
|
||||
if (err != 0) {
|
||||
CAFFE_THROW(
|
||||
"DefaultCPUAllocator: can't allocate memory: you tried to allocate ",
|
||||
nbytes,
|
||||
" bytes. Error code ",
|
||||
err,
|
||||
" (",
|
||||
strerror(err),
|
||||
")");
|
||||
}
|
||||
#endif
|
||||
|
||||
CAFFE_ENFORCE(
|
||||
data,
|
||||
"DefaultCPUAllocator: not enough memory: you tried to allocate ",
|
||||
nbytes,
|
||||
" bytes.");
|
||||
|
||||
// move data to a thread's NUMA node
|
||||
NUMAMove(data, nbytes, GetCurrentNUMANode());
|
||||
CHECK(
|
||||
!FLAGS_caffe2_cpu_allocator_do_zero_fill ||
|
||||
!FLAGS_caffe2_cpu_allocator_do_junk_fill)
|
||||
<< "Cannot request both zero-fill and junk-fill at the same time";
|
||||
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
|
||||
memset(data, 0, nbytes);
|
||||
} else if (FLAGS_caffe2_cpu_allocator_do_junk_fill) {
|
||||
memset_junk(data, nbytes);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
void free_cpu(void* data) {
|
||||
#ifdef _MSC_VER
|
||||
_aligned_free(data);
|
||||
#else
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
|
||||
free(data);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct C10_API DefaultCPUAllocator final : at::Allocator {
|
||||
DefaultCPUAllocator() = default;
|
||||
at::DataPtr allocate(size_t nbytes) const override {
|
||||
|
|
|
|||
|
|
@ -6,12 +6,9 @@
|
|||
#include <c10/core/Allocator.h>
|
||||
#include <c10/core/alignment.h> // legacy, update dependents to include this directly
|
||||
#include <c10/util/Logging.h>
|
||||
#include <c10/util/numa.h>
|
||||
|
||||
// TODO: rename to c10
|
||||
C10_DECLARE_bool(caffe2_report_cpu_memory_usage);
|
||||
C10_DECLARE_bool(caffe2_cpu_allocator_do_zero_fill);
|
||||
C10_DECLARE_bool(caffe2_cpu_allocator_do_junk_fill);
|
||||
|
||||
namespace c10 {
|
||||
|
||||
|
|
@ -20,14 +17,6 @@ using MemoryDeleter = void (*)(void*);
|
|||
// A helper function that is basically doing nothing.
|
||||
C10_API void NoDelete(void*);
|
||||
|
||||
// Fill the data memory region of num bytes with a particular garbage pattern.
|
||||
// The garbage value is chosen to be NaN if interpreted as floating point value,
|
||||
// or a very large integer.
|
||||
C10_API void memset_junk(void* data, size_t num);
|
||||
|
||||
C10_API void* alloc_cpu(size_t nbytes);
|
||||
C10_API void free_cpu(void* data);
|
||||
|
||||
// A simple struct that is used to report C10's memory allocation and
|
||||
// deallocation status to the profiler
|
||||
class C10_API ProfiledCPUMemoryReporter {
|
||||
|
|
|
|||
|
|
@ -16,6 +16,23 @@ def define_targets(rules):
|
|||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
rules.cc_library(
|
||||
name = "alloc_cpu",
|
||||
srcs = ["impl/alloc_cpu.cpp"],
|
||||
hdrs = ["impl/alloc_cpu.h"],
|
||||
# This library defines flags, The use of alwayslink keeps them
|
||||
# from being stripped.
|
||||
alwayslink = True,
|
||||
linkstatic = True,
|
||||
local_defines = ["C10_BUILD_MAIN_LIB"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":alignment",
|
||||
"//c10/macros",
|
||||
"//c10/util:base",
|
||||
],
|
||||
)
|
||||
|
||||
rules.cc_library(
|
||||
name = "base",
|
||||
srcs = rules.glob(
|
||||
|
|
@ -25,6 +42,7 @@ def define_targets(rules):
|
|||
],
|
||||
exclude = [
|
||||
"CPUAllocator.cpp",
|
||||
"impl/alloc_cpu.cpp",
|
||||
],
|
||||
),
|
||||
hdrs = rules.glob(
|
||||
|
|
@ -34,6 +52,7 @@ def define_targets(rules):
|
|||
],
|
||||
exclude = [
|
||||
"CPUAllocator.h",
|
||||
"impl/alloc_cpu.h",
|
||||
],
|
||||
),
|
||||
# This library uses flags and registration. Do not let the
|
||||
|
|
|
|||
107
c10/core/impl/alloc_cpu.cpp
Normal file
107
c10/core/impl/alloc_cpu.cpp
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
#include <c10/core/impl/alloc_cpu.h>
|
||||
|
||||
#include <c10/core/alignment.h>
|
||||
#include <c10/util/Flags.h>
|
||||
#include <c10/util/Logging.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <c10/util/numa.h>
|
||||
|
||||
// TODO: rename flags to C10
|
||||
C10_DEFINE_bool(
|
||||
caffe2_cpu_allocator_do_zero_fill,
|
||||
false,
|
||||
"If set, do memory zerofilling when allocating on CPU");
|
||||
|
||||
C10_DEFINE_bool(
|
||||
caffe2_cpu_allocator_do_junk_fill,
|
||||
false,
|
||||
"If set, fill memory with deterministic junk when allocating on CPU");
|
||||
|
||||
namespace c10 {
|
||||
|
||||
namespace {
|
||||
|
||||
// Fill the data memory region of num bytes with a particular garbage pattern.
|
||||
// The garbage value is chosen to be NaN if interpreted as floating point value,
|
||||
// or a very large integer.
|
||||
void memset_junk(void* data, size_t num) {
|
||||
// This garbage pattern is NaN when interpreted as floating point values,
|
||||
// or as very large integer values.
|
||||
static constexpr int32_t kJunkPattern = 0x7fedbeef;
|
||||
static constexpr int64_t kJunkPattern64 =
|
||||
static_cast<int64_t>(kJunkPattern) << 32 | kJunkPattern;
|
||||
int32_t int64_count = num / sizeof(kJunkPattern64);
|
||||
int32_t remaining_bytes = num % sizeof(kJunkPattern64);
|
||||
int64_t* data_i64 = reinterpret_cast<int64_t*>(data);
|
||||
for (const auto i : c10::irange(int64_count)) {
|
||||
data_i64[i] = kJunkPattern64;
|
||||
}
|
||||
if (remaining_bytes > 0) {
|
||||
memcpy(data_i64 + int64_count, &kJunkPattern64, remaining_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void* alloc_cpu(size_t nbytes) {
|
||||
if (nbytes == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
// We might have clowny upstream code that tries to alloc a negative number
|
||||
// of bytes. Let's catch it early.
|
||||
CAFFE_ENFORCE(
|
||||
((ptrdiff_t)nbytes) >= 0,
|
||||
"alloc_cpu() seems to have been called with negative number: ",
|
||||
nbytes);
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
void* data;
|
||||
#ifdef __ANDROID__
|
||||
data = memalign(gAlignment, nbytes);
|
||||
#elif defined(_MSC_VER)
|
||||
data = _aligned_malloc(nbytes, gAlignment);
|
||||
#else
|
||||
int err = posix_memalign(&data, gAlignment, nbytes);
|
||||
if (err != 0) {
|
||||
CAFFE_THROW(
|
||||
"DefaultCPUAllocator: can't allocate memory: you tried to allocate ",
|
||||
nbytes,
|
||||
" bytes. Error code ",
|
||||
err,
|
||||
" (",
|
||||
strerror(err),
|
||||
")");
|
||||
}
|
||||
#endif
|
||||
|
||||
CAFFE_ENFORCE(
|
||||
data,
|
||||
"DefaultCPUAllocator: not enough memory: you tried to allocate ",
|
||||
nbytes,
|
||||
" bytes.");
|
||||
|
||||
// move data to a thread's NUMA node
|
||||
NUMAMove(data, nbytes, GetCurrentNUMANode());
|
||||
CHECK(
|
||||
!FLAGS_caffe2_cpu_allocator_do_zero_fill ||
|
||||
!FLAGS_caffe2_cpu_allocator_do_junk_fill)
|
||||
<< "Cannot request both zero-fill and junk-fill at the same time";
|
||||
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
|
||||
memset(data, 0, nbytes);
|
||||
} else if (FLAGS_caffe2_cpu_allocator_do_junk_fill) {
|
||||
memset_junk(data, nbytes);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
void free_cpu(void* data) {
|
||||
#ifdef _MSC_VER
|
||||
_aligned_free(data);
|
||||
#else
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
|
||||
free(data);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace c10
|
||||
12
c10/core/impl/alloc_cpu.h
Normal file
12
c10/core/impl/alloc_cpu.h
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include <c10/macros/Macros.h>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace c10 {
|
||||
|
||||
C10_API void* alloc_cpu(size_t nbytes);
|
||||
C10_API void free_cpu(void* data);
|
||||
|
||||
} // namespace c10
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
#include <c10/mobile/CPUCachingAllocator.h>
|
||||
|
||||
#include <c10/core/impl/alloc_cpu.h>
|
||||
|
||||
namespace c10 {
|
||||
|
||||
namespace {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
#include <c10/core/CPUAllocator.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/SmallVector.h>
|
||||
#include <c10/util/flat_hash_map.h>
|
||||
|
|
|
|||
|
|
@ -1,8 +1,12 @@
|
|||
#include <climits>
|
||||
|
||||
#include <c10/core/impl/alloc_cpu.h>
|
||||
#include <c10/mobile/CPUProfilingAllocator.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
namespace c10 {
|
||||
|
||||
namespace {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
#include <c10/core/CPUAllocator.h>
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/SmallVector.h>
|
||||
#include <c10/util/flat_hash_map.h>
|
||||
|
|
|
|||
Loading…
Reference in a new issue