xref: /aosp_15_r20/external/pytorch/c10/core/impl/alloc_cpu.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <c10/core/impl/alloc_cpu.h>
2 
3 #include <c10/core/alignment.h>
4 #include <c10/util/Flags.h>
5 #include <c10/util/Logging.h>
6 #include <c10/util/irange.h>
7 #include <c10/util/numa.h>
8 
9 #ifdef USE_MIMALLOC
10 #include <mimalloc.h>
11 #endif
12 
13 #ifdef __linux__
14 #include <sys/mman.h>
15 #include <unistd.h>
16 #endif
17 
18 // TODO: rename flags to C10
19 C10_DEFINE_bool(
20     caffe2_cpu_allocator_do_zero_fill,
21     false,
22     "If set, do memory zerofilling when allocating on CPU");
23 
24 C10_DEFINE_bool(
25     caffe2_cpu_allocator_do_junk_fill,
26     false,
27     "If set, fill memory with deterministic junk when allocating on CPU");
28 
29 namespace c10 {
30 
31 namespace {
32 
33 // Fill the data memory region of num bytes with a particular garbage pattern.
34 // The garbage value is chosen to be NaN if interpreted as floating point value,
35 // or a very large integer.
memset_junk(void * data,size_t num)36 void memset_junk(void* data, size_t num) {
37   // This garbage pattern is NaN when interpreted as floating point values,
38   // or as very large integer values.
39   static constexpr int32_t kJunkPattern = 0x7fedbeef;
40   static constexpr int64_t kJunkPattern64 =
41       static_cast<int64_t>(kJunkPattern) << 32 | kJunkPattern;
42   auto int64_count = num / sizeof(kJunkPattern64);
43   auto remaining_bytes = num % sizeof(kJunkPattern64);
44   int64_t* data_i64 = reinterpret_cast<int64_t*>(data);
45   for (const auto i : c10::irange(int64_count)) {
46     data_i64[i] = kJunkPattern64;
47   }
48   if (remaining_bytes > 0) {
49     memcpy(data_i64 + int64_count, &kJunkPattern64, remaining_bytes);
50   }
51 }
52 
53 #if defined(__linux__) && !defined(__ANDROID__)
is_thp_alloc_enabled()54 static inline bool is_thp_alloc_enabled() {
55   static bool value = [&] {
56     const char* ptr = std::getenv("THP_MEM_ALLOC_ENABLE");
57     return ptr != nullptr ? std::atoi(ptr) : 0;
58   }();
59   return value;
60 }
61 
c10_compute_alignment(size_t nbytes)62 inline size_t c10_compute_alignment(size_t nbytes) {
63   static const auto pagesize = sysconf(_SC_PAGESIZE);
64   // for kernels that don't provide page size, default it to 4K
65   const size_t thp_alignment = (pagesize < 0 ? gPagesize : pagesize);
66   return (is_thp_alloc_enabled() ? thp_alignment : gAlignment);
67 }
68 
is_thp_alloc(size_t nbytes)69 inline bool is_thp_alloc(size_t nbytes) {
70   // enable thp (transparent huge pages) for larger buffers
71   return (is_thp_alloc_enabled() && (nbytes >= gAlloc_threshold_thp));
72 }
73 #elif !defined(__ANDROID__) && !defined(_MSC_VER)
c10_compute_alignment(C10_UNUSED size_t nbytes)74 constexpr size_t c10_compute_alignment(C10_UNUSED size_t nbytes) {
75   return gAlignment;
76 }
77 
is_thp_alloc(C10_UNUSED size_t nbytes)78 constexpr bool is_thp_alloc(C10_UNUSED size_t nbytes) {
79   return false;
80 }
81 #endif
82 } // namespace
83 
alloc_cpu(size_t nbytes)84 void* alloc_cpu(size_t nbytes) {
85   if (nbytes == 0) {
86     return nullptr;
87   }
88   // We might have clowny upstream code that tries to alloc a negative number
89   // of bytes. Let's catch it early.
90   CAFFE_ENFORCE(
91       ((ptrdiff_t)nbytes) >= 0,
92       "alloc_cpu() seems to have been called with negative number: ",
93       nbytes);
94 
95   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
96   void* data;
97 #ifdef __ANDROID__
98   data = memalign(gAlignment, nbytes);
99   CAFFE_ENFORCE(
100       data,
101       "DefaultCPUAllocator: not enough memory: you tried to allocate ",
102       nbytes,
103       " bytes.");
104 #elif defined(_MSC_VER)
105 #ifdef USE_MIMALLOC
106   data = mi_malloc_aligned(nbytes, gAlignment);
107 #else
108   data = _aligned_malloc(nbytes, gAlignment);
109 #endif
110   CAFFE_ENFORCE(
111       data,
112       "DefaultCPUAllocator: not enough memory: you tried to allocate ",
113       nbytes,
114       " bytes.");
115 #else
116   int err = posix_memalign(&data, c10_compute_alignment(nbytes), nbytes);
117   CAFFE_ENFORCE(
118       err == 0,
119       "DefaultCPUAllocator: can't allocate memory: you tried to allocate ",
120       nbytes,
121       " bytes. Error code ",
122       err,
123       " (",
124       strerror(err),
125       ")");
126   if (is_thp_alloc(nbytes)) {
127 #ifdef __linux__
128     // MADV_HUGEPAGE advise is available only for linux.
129     // general posix compliant systems can check POSIX_MADV_SEQUENTIAL advise.
130     int ret = madvise(data, nbytes, MADV_HUGEPAGE);
131     if (ret != 0) {
132       TORCH_WARN_ONCE("thp madvise for HUGEPAGE failed with ", strerror(errno));
133     }
134 #endif
135   }
136 #endif
137 
138   // move data to a thread's NUMA node
139   NUMAMove(data, nbytes, GetCurrentNUMANode());
140   CHECK(
141       !FLAGS_caffe2_cpu_allocator_do_zero_fill ||
142       !FLAGS_caffe2_cpu_allocator_do_junk_fill)
143       << "Cannot request both zero-fill and junk-fill at the same time";
144   if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
145     memset(data, 0, nbytes);
146   } else if (FLAGS_caffe2_cpu_allocator_do_junk_fill) {
147     memset_junk(data, nbytes);
148   }
149 
150   return data;
151 }
152 
free_cpu(void * data)153 void free_cpu(void* data) {
154 #ifdef _MSC_VER
155 #ifdef USE_MIMALLOC
156   mi_free(data);
157 #else
158   _aligned_free(data);
159 #endif
160 #else
161   // NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
162   free(data);
163 #endif
164 }
165 
166 } // namespace c10
167