extract out //c10/core:alloc_cpu (#70859)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/70859

ghstack-source-id: 147642534

Test Plan: Extracting code unmodified to a new library: relying on CI to validate.

Reviewed By: malfet

Differential Revision: D33329688

fbshipit-source-id: f60327467d197ec1862fb3554f8b83e6c84cab5c
(cherry picked from commit f82e7c0e9beba1113defe6d55cf8a232551e913b)
This commit is contained in:
mikey dagitses 2022-01-26 23:23:39 -08:00 committed by PyTorch MergeBot
parent fc6a488e9a
commit 844a4b47df
10 changed files with 147 additions and 103 deletions

View file

@ -77,6 +77,7 @@ cc_library(
deps = [
":headers",
"//c10/core:ScalarType",
"//c10/core:alloc_cpu",
"//c10/core:base",
"//c10/util:TypeCast",
"//c10/util:base",

View file

@ -1,106 +1,18 @@
#include <c10/core/CPUAllocator.h>
#include <c10/core/DeviceType.h>
#include <c10/core/alignment.h>
#include <c10/core/impl/alloc_cpu.h>
#include <c10/mobile/CPUCachingAllocator.h>
#include <c10/mobile/CPUProfilingAllocator.h>
#include <c10/util/irange.h>
// TODO: rename flags to C10
// TODO: rename flag to C10
C10_DEFINE_bool(
caffe2_report_cpu_memory_usage,
false,
"If set, print out detailed memory usage");
C10_DEFINE_bool(
caffe2_cpu_allocator_do_zero_fill,
false,
"If set, do memory zerofilling when allocating on CPU");
C10_DEFINE_bool(
caffe2_cpu_allocator_do_junk_fill,
false,
"If set, fill memory with deterministic junk when allocating on CPU");
namespace c10 {
void memset_junk(void* data, size_t num) {
// This garbage pattern is NaN when interpreted as floating point values,
// or as very large integer values.
static constexpr int32_t kJunkPattern = 0x7fedbeef;
static constexpr int64_t kJunkPattern64 =
static_cast<int64_t>(kJunkPattern) << 32 | kJunkPattern;
int32_t int64_count = num / sizeof(kJunkPattern64);
int32_t remaining_bytes = num % sizeof(kJunkPattern64);
int64_t* data_i64 = reinterpret_cast<int64_t*>(data);
for (const auto i : c10::irange(int64_count)) {
data_i64[i] = kJunkPattern64;
}
if (remaining_bytes > 0) {
memcpy(data_i64 + int64_count, &kJunkPattern64, remaining_bytes);
}
}
void* alloc_cpu(size_t nbytes) {
if (nbytes == 0) {
return nullptr;
}
// We might have clowny upstream code that tries to alloc a negative number
// of bytes. Let's catch it early.
CAFFE_ENFORCE(
((ptrdiff_t)nbytes) >= 0,
"alloc_cpu() seems to have been called with negative number: ",
nbytes);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
void* data;
#ifdef __ANDROID__
data = memalign(gAlignment, nbytes);
#elif defined(_MSC_VER)
data = _aligned_malloc(nbytes, gAlignment);
#else
int err = posix_memalign(&data, gAlignment, nbytes);
if (err != 0) {
CAFFE_THROW(
"DefaultCPUAllocator: can't allocate memory: you tried to allocate ",
nbytes,
" bytes. Error code ",
err,
" (",
strerror(err),
")");
}
#endif
CAFFE_ENFORCE(
data,
"DefaultCPUAllocator: not enough memory: you tried to allocate ",
nbytes,
" bytes.");
// move data to a thread's NUMA node
NUMAMove(data, nbytes, GetCurrentNUMANode());
CHECK(
!FLAGS_caffe2_cpu_allocator_do_zero_fill ||
!FLAGS_caffe2_cpu_allocator_do_junk_fill)
<< "Cannot request both zero-fill and junk-fill at the same time";
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
memset(data, 0, nbytes);
} else if (FLAGS_caffe2_cpu_allocator_do_junk_fill) {
memset_junk(data, nbytes);
}
return data;
}
void free_cpu(void* data) {
#ifdef _MSC_VER
_aligned_free(data);
#else
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
free(data);
#endif
}
struct C10_API DefaultCPUAllocator final : at::Allocator {
DefaultCPUAllocator() = default;
at::DataPtr allocate(size_t nbytes) const override {

View file

@ -6,12 +6,9 @@
#include <c10/core/Allocator.h>
#include <c10/core/alignment.h> // legacy, update dependents to include this directly
#include <c10/util/Logging.h>
#include <c10/util/numa.h>
// TODO: rename to c10
C10_DECLARE_bool(caffe2_report_cpu_memory_usage);
C10_DECLARE_bool(caffe2_cpu_allocator_do_zero_fill);
C10_DECLARE_bool(caffe2_cpu_allocator_do_junk_fill);
namespace c10 {
@ -20,14 +17,6 @@ using MemoryDeleter = void (*)(void*);
// A helper function that is basically doing nothing.
C10_API void NoDelete(void*);
// Fill the data memory region of num bytes with a particular garbage pattern.
// The garbage value is chosen to be NaN if interpreted as floating point value,
// or a very large integer.
C10_API void memset_junk(void* data, size_t num);
C10_API void* alloc_cpu(size_t nbytes);
C10_API void free_cpu(void* data);
// A simple struct that is used to report C10's memory allocation and
// deallocation status to the profiler
class C10_API ProfiledCPUMemoryReporter {

View file

@ -16,6 +16,23 @@ def define_targets(rules):
visibility = ["//visibility:public"],
)
rules.cc_library(
name = "alloc_cpu",
srcs = ["impl/alloc_cpu.cpp"],
hdrs = ["impl/alloc_cpu.h"],
# This library defines flags, The use of alwayslink keeps them
# from being stripped.
alwayslink = True,
linkstatic = True,
local_defines = ["C10_BUILD_MAIN_LIB"],
visibility = ["//visibility:public"],
deps = [
":alignment",
"//c10/macros",
"//c10/util:base",
],
)
rules.cc_library(
name = "base",
srcs = rules.glob(
@ -25,6 +42,7 @@ def define_targets(rules):
],
exclude = [
"CPUAllocator.cpp",
"impl/alloc_cpu.cpp",
],
),
hdrs = rules.glob(
@ -34,6 +52,7 @@ def define_targets(rules):
],
exclude = [
"CPUAllocator.h",
"impl/alloc_cpu.h",
],
),
# This library uses flags and registration. Do not let the

107
c10/core/impl/alloc_cpu.cpp Normal file
View file

@ -0,0 +1,107 @@
#include <c10/core/impl/alloc_cpu.h>
#include <c10/core/alignment.h>
#include <c10/util/Flags.h>
#include <c10/util/Logging.h>
#include <c10/util/irange.h>
#include <c10/util/numa.h>
// TODO: rename flags to C10
C10_DEFINE_bool(
caffe2_cpu_allocator_do_zero_fill,
false,
"If set, do memory zerofilling when allocating on CPU");
C10_DEFINE_bool(
caffe2_cpu_allocator_do_junk_fill,
false,
"If set, fill memory with deterministic junk when allocating on CPU");
namespace c10 {
namespace {
// Fill the data memory region of num bytes with a particular garbage pattern.
// The garbage value is chosen to be NaN if interpreted as floating point value,
// or a very large integer.
void memset_junk(void* data, size_t num) {
// This garbage pattern is NaN when interpreted as floating point values,
// or as very large integer values.
static constexpr int32_t kJunkPattern = 0x7fedbeef;
static constexpr int64_t kJunkPattern64 =
static_cast<int64_t>(kJunkPattern) << 32 | kJunkPattern;
int32_t int64_count = num / sizeof(kJunkPattern64);
int32_t remaining_bytes = num % sizeof(kJunkPattern64);
int64_t* data_i64 = reinterpret_cast<int64_t*>(data);
for (const auto i : c10::irange(int64_count)) {
data_i64[i] = kJunkPattern64;
}
if (remaining_bytes > 0) {
memcpy(data_i64 + int64_count, &kJunkPattern64, remaining_bytes);
}
}
} // namespace
void* alloc_cpu(size_t nbytes) {
if (nbytes == 0) {
return nullptr;
}
// We might have clowny upstream code that tries to alloc a negative number
// of bytes. Let's catch it early.
CAFFE_ENFORCE(
((ptrdiff_t)nbytes) >= 0,
"alloc_cpu() seems to have been called with negative number: ",
nbytes);
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
void* data;
#ifdef __ANDROID__
data = memalign(gAlignment, nbytes);
#elif defined(_MSC_VER)
data = _aligned_malloc(nbytes, gAlignment);
#else
int err = posix_memalign(&data, gAlignment, nbytes);
if (err != 0) {
CAFFE_THROW(
"DefaultCPUAllocator: can't allocate memory: you tried to allocate ",
nbytes,
" bytes. Error code ",
err,
" (",
strerror(err),
")");
}
#endif
CAFFE_ENFORCE(
data,
"DefaultCPUAllocator: not enough memory: you tried to allocate ",
nbytes,
" bytes.");
// move data to a thread's NUMA node
NUMAMove(data, nbytes, GetCurrentNUMANode());
CHECK(
!FLAGS_caffe2_cpu_allocator_do_zero_fill ||
!FLAGS_caffe2_cpu_allocator_do_junk_fill)
<< "Cannot request both zero-fill and junk-fill at the same time";
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
memset(data, 0, nbytes);
} else if (FLAGS_caffe2_cpu_allocator_do_junk_fill) {
memset_junk(data, nbytes);
}
return data;
}
void free_cpu(void* data) {
#ifdef _MSC_VER
_aligned_free(data);
#else
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
free(data);
#endif
}
} // namespace c10

12
c10/core/impl/alloc_cpu.h Normal file
View file

@ -0,0 +1,12 @@
#pragma once
#include <c10/macros/Macros.h>
#include <cstddef>
namespace c10 {
C10_API void* alloc_cpu(size_t nbytes);
C10_API void free_cpu(void* data);
} // namespace c10

View file

@ -1,5 +1,7 @@
#include <c10/mobile/CPUCachingAllocator.h>
#include <c10/core/impl/alloc_cpu.h>
namespace c10 {
namespace {

View file

@ -5,7 +5,6 @@
#include <memory>
#include <mutex>
#include <c10/core/CPUAllocator.h>
#include <c10/util/Exception.h>
#include <c10/util/SmallVector.h>
#include <c10/util/flat_hash_map.h>

View file

@ -1,8 +1,12 @@
#include <climits>
#include <c10/core/impl/alloc_cpu.h>
#include <c10/mobile/CPUProfilingAllocator.h>
#include <c10/util/irange.h>
#include <map>
#include <set>
namespace c10 {
namespace {

View file

@ -5,7 +5,6 @@
#include <memory>
#include <mutex>
#include <c10/core/CPUAllocator.h>
#include <c10/util/Exception.h>
#include <c10/util/SmallVector.h>
#include <c10/util/flat_hash_map.h>