1 #include "Python.h"
2 #include "pycore_pymem.h" // _PyTraceMalloc_Config
3 #include "pycore_code.h" // stats
4
5 #include <stdbool.h>
6 #include <stdlib.h> // malloc()
7
8
9 /* Defined in tracemalloc.c */
10 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
11
12
13 /* Python's malloc wrappers (see pymem.h) */
14
15 #undef uint
16 #define uint unsigned int /* assuming >= 16 bits */
17
18 /* Forward declaration */
19 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
20 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
21 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
22 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
23
24 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
25 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
26 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
27 static void _PyMem_DebugFree(void *ctx, void *p);
28
29 static void _PyObject_DebugDumpAddress(const void *p);
30 static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
31
32 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
33
34 #if defined(__has_feature) /* Clang */
35 # if __has_feature(address_sanitizer) /* is ASAN enabled? */
36 # define _Py_NO_SANITIZE_ADDRESS \
37 __attribute__((no_sanitize("address")))
38 # endif
39 # if __has_feature(thread_sanitizer) /* is TSAN enabled? */
40 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
41 # endif
42 # if __has_feature(memory_sanitizer) /* is MSAN enabled? */
43 # define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
44 # endif
45 #elif defined(__GNUC__)
46 # if defined(__SANITIZE_ADDRESS__) /* GCC 4.8+, is ASAN enabled? */
47 # define _Py_NO_SANITIZE_ADDRESS \
48 __attribute__((no_sanitize_address))
49 # endif
50 // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
51 // is provided only since GCC 7.
52 # if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
53 # define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
54 # endif
55 #endif
56
57 #ifndef _Py_NO_SANITIZE_ADDRESS
58 # define _Py_NO_SANITIZE_ADDRESS
59 #endif
60 #ifndef _Py_NO_SANITIZE_THREAD
61 # define _Py_NO_SANITIZE_THREAD
62 #endif
63 #ifndef _Py_NO_SANITIZE_MEMORY
64 # define _Py_NO_SANITIZE_MEMORY
65 #endif
66
67 #ifdef WITH_PYMALLOC
68
69 #ifdef MS_WINDOWS
70 # include <windows.h>
71 #elif defined(HAVE_MMAP)
72 # include <sys/mman.h>
73 # ifdef MAP_ANONYMOUS
74 # define ARENAS_USE_MMAP
75 # endif
76 #endif
77
78 /* Forward declaration */
79 static void* _PyObject_Malloc(void *ctx, size_t size);
80 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
81 static void _PyObject_Free(void *ctx, void *p);
82 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
83 #endif
84
85
86 /* bpo-35053: Declare tracemalloc configuration here rather than
87 Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
88 library, whereas _Py_NewReference() requires it. */
89 struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
90
91
92 static void *
_PyMem_RawMalloc(void * ctx,size_t size)93 _PyMem_RawMalloc(void *ctx, size_t size)
94 {
95 /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
96 for malloc(0), which would be treated as an error. Some platforms would
97 return a pointer with no memory behind it, which would break pymalloc.
98 To solve these problems, allocate an extra byte. */
99 if (size == 0)
100 size = 1;
101 return malloc(size);
102 }
103
104 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)105 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
106 {
107 /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
108 for calloc(0, 0), which would be treated as an error. Some platforms
109 would return a pointer with no memory behind it, which would break
110 pymalloc. To solve these problems, allocate an extra byte. */
111 if (nelem == 0 || elsize == 0) {
112 nelem = 1;
113 elsize = 1;
114 }
115 return calloc(nelem, elsize);
116 }
117
118 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)119 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
120 {
121 if (size == 0)
122 size = 1;
123 return realloc(ptr, size);
124 }
125
126 static void
_PyMem_RawFree(void * ctx,void * ptr)127 _PyMem_RawFree(void *ctx, void *ptr)
128 {
129 free(ptr);
130 }
131
132
133 #ifdef MS_WINDOWS
134 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)135 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
136 {
137 return VirtualAlloc(NULL, size,
138 MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
139 }
140
141 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)142 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
143 {
144 VirtualFree(ptr, 0, MEM_RELEASE);
145 }
146
147 #elif defined(ARENAS_USE_MMAP)
148 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)149 _PyObject_ArenaMmap(void *ctx, size_t size)
150 {
151 void *ptr;
152 ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
153 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
154 if (ptr == MAP_FAILED)
155 return NULL;
156 assert(ptr != NULL);
157 return ptr;
158 }
159
160 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)161 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
162 {
163 munmap(ptr, size);
164 }
165
166 #else
167 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)168 _PyObject_ArenaMalloc(void *ctx, size_t size)
169 {
170 return malloc(size);
171 }
172
173 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)174 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
175 {
176 free(ptr);
177 }
178 #endif
179
180 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
181 #ifdef WITH_PYMALLOC
182 # define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
183 #endif
184
185 #define PYRAW_ALLOC MALLOC_ALLOC
186 #ifdef WITH_PYMALLOC
187 # define PYOBJ_ALLOC PYMALLOC_ALLOC
188 #else
189 # define PYOBJ_ALLOC MALLOC_ALLOC
190 #endif
191 #define PYMEM_ALLOC PYOBJ_ALLOC
192
193 typedef struct {
194 /* We tag each block with an API ID in order to tag API violations */
195 char api_id;
196 PyMemAllocatorEx alloc;
197 } debug_alloc_api_t;
198 static struct {
199 debug_alloc_api_t raw;
200 debug_alloc_api_t mem;
201 debug_alloc_api_t obj;
202 } _PyMem_Debug = {
203 {'r', PYRAW_ALLOC},
204 {'m', PYMEM_ALLOC},
205 {'o', PYOBJ_ALLOC}
206 };
207
208 #define PYDBGRAW_ALLOC \
209 {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
210 #define PYDBGMEM_ALLOC \
211 {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
212 #define PYDBGOBJ_ALLOC \
213 {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
214
215 #ifdef Py_DEBUG
216 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
217 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
218 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
219 #else
220 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
221 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
222 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
223 #endif
224
225
226 static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)227 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
228 PyMemAllocatorEx *old_alloc)
229 {
230 if (old_alloc != NULL) {
231 PyMem_GetAllocator(domain, old_alloc);
232 }
233
234
235 PyMemAllocatorEx new_alloc;
236 switch(domain)
237 {
238 case PYMEM_DOMAIN_RAW:
239 new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
240 break;
241 case PYMEM_DOMAIN_MEM:
242 new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
243 break;
244 case PYMEM_DOMAIN_OBJ:
245 new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
246 break;
247 default:
248 /* unknown domain */
249 return -1;
250 }
251 PyMem_SetAllocator(domain, &new_alloc);
252 if (debug) {
253 _PyMem_SetupDebugHooksDomain(domain);
254 }
255 return 0;
256 }
257
258
259 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)260 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
261 PyMemAllocatorEx *old_alloc)
262 {
263 #ifdef Py_DEBUG
264 const int debug = 1;
265 #else
266 const int debug = 0;
267 #endif
268 return pymem_set_default_allocator(domain, debug, old_alloc);
269 }
270
271
272 int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)273 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
274 {
275 if (name == NULL || *name == '\0') {
276 /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
277 nameions): use default memory allocators */
278 *allocator = PYMEM_ALLOCATOR_DEFAULT;
279 }
280 else if (strcmp(name, "default") == 0) {
281 *allocator = PYMEM_ALLOCATOR_DEFAULT;
282 }
283 else if (strcmp(name, "debug") == 0) {
284 *allocator = PYMEM_ALLOCATOR_DEBUG;
285 }
286 #ifdef WITH_PYMALLOC
287 else if (strcmp(name, "pymalloc") == 0) {
288 *allocator = PYMEM_ALLOCATOR_PYMALLOC;
289 }
290 else if (strcmp(name, "pymalloc_debug") == 0) {
291 *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
292 }
293 #endif
294 else if (strcmp(name, "malloc") == 0) {
295 *allocator = PYMEM_ALLOCATOR_MALLOC;
296 }
297 else if (strcmp(name, "malloc_debug") == 0) {
298 *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
299 }
300 else {
301 /* unknown allocator */
302 return -1;
303 }
304 return 0;
305 }
306
307
308 int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)309 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
310 {
311 switch (allocator) {
312 case PYMEM_ALLOCATOR_NOT_SET:
313 /* do nothing */
314 break;
315
316 case PYMEM_ALLOCATOR_DEFAULT:
317 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
318 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
319 (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
320 break;
321
322 case PYMEM_ALLOCATOR_DEBUG:
323 (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
324 (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
325 (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
326 break;
327
328 #ifdef WITH_PYMALLOC
329 case PYMEM_ALLOCATOR_PYMALLOC:
330 case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
331 {
332 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
333 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
334
335 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
336 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
337 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
338
339 if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
340 PyMem_SetupDebugHooks();
341 }
342 break;
343 }
344 #endif
345
346 case PYMEM_ALLOCATOR_MALLOC:
347 case PYMEM_ALLOCATOR_MALLOC_DEBUG:
348 {
349 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
350 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
351 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
352 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
353
354 if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
355 PyMem_SetupDebugHooks();
356 }
357 break;
358 }
359
360 default:
361 /* unknown allocator */
362 return -1;
363 }
364 return 0;
365 }
366
367
368 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)369 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
370 {
371 return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
372 }
373
374
375 const char*
_PyMem_GetCurrentAllocatorName(void)376 _PyMem_GetCurrentAllocatorName(void)
377 {
378 PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
379 #ifdef WITH_PYMALLOC
380 PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
381 #endif
382
383 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
384 pymemallocator_eq(&_PyMem, &malloc_alloc) &&
385 pymemallocator_eq(&_PyObject, &malloc_alloc))
386 {
387 return "malloc";
388 }
389 #ifdef WITH_PYMALLOC
390 if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
391 pymemallocator_eq(&_PyMem, &pymalloc) &&
392 pymemallocator_eq(&_PyObject, &pymalloc))
393 {
394 return "pymalloc";
395 }
396 #endif
397
398 PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
399 PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
400 PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
401
402 if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
403 pymemallocator_eq(&_PyMem, &dbg_mem) &&
404 pymemallocator_eq(&_PyObject, &dbg_obj))
405 {
406 /* Debug hooks installed */
407 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
408 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
409 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
410 {
411 return "malloc_debug";
412 }
413 #ifdef WITH_PYMALLOC
414 if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
415 pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
416 pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
417 {
418 return "pymalloc_debug";
419 }
420 #endif
421 }
422 return NULL;
423 }
424
425
426 #undef MALLOC_ALLOC
427 #undef PYMALLOC_ALLOC
428 #undef PYRAW_ALLOC
429 #undef PYMEM_ALLOC
430 #undef PYOBJ_ALLOC
431 #undef PYDBGRAW_ALLOC
432 #undef PYDBGMEM_ALLOC
433 #undef PYDBGOBJ_ALLOC
434
435
436 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
437 #ifdef MS_WINDOWS
438 _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
439 #elif defined(ARENAS_USE_MMAP)
440 _PyObject_ArenaMmap, _PyObject_ArenaMunmap
441 #else
442 _PyObject_ArenaMalloc, _PyObject_ArenaFree
443 #endif
444 };
445
446 #ifdef WITH_PYMALLOC
447 static int
_PyMem_DebugEnabled(void)448 _PyMem_DebugEnabled(void)
449 {
450 return (_PyObject.malloc == _PyMem_DebugMalloc);
451 }
452
453 static int
_PyMem_PymallocEnabled(void)454 _PyMem_PymallocEnabled(void)
455 {
456 if (_PyMem_DebugEnabled()) {
457 return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
458 }
459 else {
460 return (_PyObject.malloc == _PyObject_Malloc);
461 }
462 }
463 #endif
464
465
466 static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)467 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
468 {
469 PyMemAllocatorEx alloc;
470
471 if (domain == PYMEM_DOMAIN_RAW) {
472 if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
473 return;
474 }
475
476 PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
477 alloc.ctx = &_PyMem_Debug.raw;
478 alloc.malloc = _PyMem_DebugRawMalloc;
479 alloc.calloc = _PyMem_DebugRawCalloc;
480 alloc.realloc = _PyMem_DebugRawRealloc;
481 alloc.free = _PyMem_DebugRawFree;
482 PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
483 }
484 else if (domain == PYMEM_DOMAIN_MEM) {
485 if (_PyMem.malloc == _PyMem_DebugMalloc) {
486 return;
487 }
488
489 PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
490 alloc.ctx = &_PyMem_Debug.mem;
491 alloc.malloc = _PyMem_DebugMalloc;
492 alloc.calloc = _PyMem_DebugCalloc;
493 alloc.realloc = _PyMem_DebugRealloc;
494 alloc.free = _PyMem_DebugFree;
495 PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
496 }
497 else if (domain == PYMEM_DOMAIN_OBJ) {
498 if (_PyObject.malloc == _PyMem_DebugMalloc) {
499 return;
500 }
501
502 PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
503 alloc.ctx = &_PyMem_Debug.obj;
504 alloc.malloc = _PyMem_DebugMalloc;
505 alloc.calloc = _PyMem_DebugCalloc;
506 alloc.realloc = _PyMem_DebugRealloc;
507 alloc.free = _PyMem_DebugFree;
508 PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
509 }
510 }
511
512
513 void
PyMem_SetupDebugHooks(void)514 PyMem_SetupDebugHooks(void)
515 {
516 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
517 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
518 _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
519 }
520
521 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)522 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
523 {
524 switch(domain)
525 {
526 case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
527 case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
528 case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
529 default:
530 /* unknown domain: set all attributes to NULL */
531 allocator->ctx = NULL;
532 allocator->malloc = NULL;
533 allocator->calloc = NULL;
534 allocator->realloc = NULL;
535 allocator->free = NULL;
536 }
537 }
538
539 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)540 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
541 {
542 switch(domain)
543 {
544 case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
545 case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
546 case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
547 /* ignore unknown domain */
548 }
549 }
550
551 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)552 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
553 {
554 *allocator = _PyObject_Arena;
555 }
556
557 void *
_PyObject_VirtualAlloc(size_t size)558 _PyObject_VirtualAlloc(size_t size)
559 {
560 return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
561 }
562
563 void
_PyObject_VirtualFree(void * obj,size_t size)564 _PyObject_VirtualFree(void *obj, size_t size)
565 {
566 _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
567 }
568
569 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)570 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
571 {
572 _PyObject_Arena = *allocator;
573 }
574
575 void *
PyMem_RawMalloc(size_t size)576 PyMem_RawMalloc(size_t size)
577 {
578 /*
579 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
580 * Most python internals blindly use a signed Py_ssize_t to track
581 * things without checking for overflows or negatives.
582 * As size_t is unsigned, checking for size < 0 is not required.
583 */
584 if (size > (size_t)PY_SSIZE_T_MAX)
585 return NULL;
586 return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
587 }
588
589 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)590 PyMem_RawCalloc(size_t nelem, size_t elsize)
591 {
592 /* see PyMem_RawMalloc() */
593 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
594 return NULL;
595 return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
596 }
597
598 void*
PyMem_RawRealloc(void * ptr,size_t new_size)599 PyMem_RawRealloc(void *ptr, size_t new_size)
600 {
601 /* see PyMem_RawMalloc() */
602 if (new_size > (size_t)PY_SSIZE_T_MAX)
603 return NULL;
604 return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
605 }
606
PyMem_RawFree(void * ptr)607 void PyMem_RawFree(void *ptr)
608 {
609 _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
610 }
611
612
613 void *
PyMem_Malloc(size_t size)614 PyMem_Malloc(size_t size)
615 {
616 /* see PyMem_RawMalloc() */
617 if (size > (size_t)PY_SSIZE_T_MAX)
618 return NULL;
619 OBJECT_STAT_INC_COND(allocations512, size < 512);
620 OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
621 OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
622 OBJECT_STAT_INC(allocations);
623 return _PyMem.malloc(_PyMem.ctx, size);
624 }
625
626 void *
PyMem_Calloc(size_t nelem,size_t elsize)627 PyMem_Calloc(size_t nelem, size_t elsize)
628 {
629 /* see PyMem_RawMalloc() */
630 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
631 return NULL;
632 OBJECT_STAT_INC_COND(allocations512, elsize < 512);
633 OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
634 OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
635 OBJECT_STAT_INC(allocations);
636 return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
637 }
638
639 void *
PyMem_Realloc(void * ptr,size_t new_size)640 PyMem_Realloc(void *ptr, size_t new_size)
641 {
642 /* see PyMem_RawMalloc() */
643 if (new_size > (size_t)PY_SSIZE_T_MAX)
644 return NULL;
645 return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
646 }
647
648 void
PyMem_Free(void * ptr)649 PyMem_Free(void *ptr)
650 {
651 OBJECT_STAT_INC(frees);
652 _PyMem.free(_PyMem.ctx, ptr);
653 }
654
655
656 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)657 _PyMem_RawWcsdup(const wchar_t *str)
658 {
659 assert(str != NULL);
660
661 size_t len = wcslen(str);
662 if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
663 return NULL;
664 }
665
666 size_t size = (len + 1) * sizeof(wchar_t);
667 wchar_t *str2 = PyMem_RawMalloc(size);
668 if (str2 == NULL) {
669 return NULL;
670 }
671
672 memcpy(str2, str, size);
673 return str2;
674 }
675
676 char *
_PyMem_RawStrdup(const char * str)677 _PyMem_RawStrdup(const char *str)
678 {
679 assert(str != NULL);
680 size_t size = strlen(str) + 1;
681 char *copy = PyMem_RawMalloc(size);
682 if (copy == NULL) {
683 return NULL;
684 }
685 memcpy(copy, str, size);
686 return copy;
687 }
688
689 char *
_PyMem_Strdup(const char * str)690 _PyMem_Strdup(const char *str)
691 {
692 assert(str != NULL);
693 size_t size = strlen(str) + 1;
694 char *copy = PyMem_Malloc(size);
695 if (copy == NULL) {
696 return NULL;
697 }
698 memcpy(copy, str, size);
699 return copy;
700 }
701
702 void *
PyObject_Malloc(size_t size)703 PyObject_Malloc(size_t size)
704 {
705 /* see PyMem_RawMalloc() */
706 if (size > (size_t)PY_SSIZE_T_MAX)
707 return NULL;
708 OBJECT_STAT_INC_COND(allocations512, size < 512);
709 OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
710 OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
711 OBJECT_STAT_INC(allocations);
712 return _PyObject.malloc(_PyObject.ctx, size);
713 }
714
715 void *
PyObject_Calloc(size_t nelem,size_t elsize)716 PyObject_Calloc(size_t nelem, size_t elsize)
717 {
718 /* see PyMem_RawMalloc() */
719 if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
720 return NULL;
721 OBJECT_STAT_INC_COND(allocations512, elsize < 512);
722 OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
723 OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
724 OBJECT_STAT_INC(allocations);
725 return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
726 }
727
728 void *
PyObject_Realloc(void * ptr,size_t new_size)729 PyObject_Realloc(void *ptr, size_t new_size)
730 {
731 /* see PyMem_RawMalloc() */
732 if (new_size > (size_t)PY_SSIZE_T_MAX)
733 return NULL;
734 return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
735 }
736
737 void
PyObject_Free(void * ptr)738 PyObject_Free(void *ptr)
739 {
740 OBJECT_STAT_INC(frees);
741 _PyObject.free(_PyObject.ctx, ptr);
742 }
743
744
745 /* If we're using GCC, use __builtin_expect() to reduce overhead of
746 the valgrind checks */
747 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
748 # define UNLIKELY(value) __builtin_expect((value), 0)
749 # define LIKELY(value) __builtin_expect((value), 1)
750 #else
751 # define UNLIKELY(value) (value)
752 # define LIKELY(value) (value)
753 #endif
754
755 #ifdef WITH_PYMALLOC
756
757 #ifdef WITH_VALGRIND
758 #include <valgrind/valgrind.h>
759
760 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
761 static int running_on_valgrind = -1;
762 #endif
763
764
765 /* An object allocator for Python.
766
767 Here is an introduction to the layers of the Python memory architecture,
768 showing where the object allocator is actually used (layer +2), It is
769 called for every object allocation and deallocation (PyObject_New/Del),
770 unless the object-specific allocators implement a proprietary allocation
771 scheme (ex.: ints use a simple free list). This is also the place where
772 the cyclic garbage collector operates selectively on container objects.
773
774
775 Object-specific allocators
776 _____ ______ ______ ________
777 [ int ] [ dict ] [ list ] ... [ string ] Python core |
778 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
779 _______________________________ | |
780 [ Python's object allocator ] | |
781 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
782 ______________________________________________________________ |
783 [ Python's raw memory allocator (PyMem_ API) ] |
784 +1 | <----- Python memory (under PyMem manager's control) ------> | |
785 __________________________________________________________________
786 [ Underlying general-purpose allocator (ex: C library malloc) ]
787 0 | <------ Virtual memory allocated for the python process -------> |
788
789 =========================================================================
790 _______________________________________________________________________
791 [ OS-specific Virtual Memory Manager (VMM) ]
792 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
793 __________________________________ __________________________________
794 [ ] [ ]
795 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
796
797 */
798 /*==========================================================================*/
799
800 /* A fast, special-purpose memory allocator for small blocks, to be used
801 on top of a general-purpose malloc -- heavily based on previous art. */
802
803 /* Vladimir Marangozov -- August 2000 */
804
805 /*
806 * "Memory management is where the rubber meets the road -- if we do the wrong
807 * thing at any level, the results will not be good. And if we don't make the
808 * levels work well together, we are in serious trouble." (1)
809 *
810 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
811 * "Dynamic Storage Allocation: A Survey and Critical Review",
812 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
813 */
814
815 /* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
816
817 /*==========================================================================*/
818
819 /*
820 * Allocation strategy abstract:
821 *
822 * For small requests, the allocator sub-allocates <Big> blocks of memory.
823 * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
824 * system's allocator.
825 *
826 * Small requests are grouped in size classes spaced 8 bytes apart, due
827 * to the required valid alignment of the returned address. Requests of
828 * a particular size are serviced from memory pools of 4K (one VMM page).
829 * Pools are fragmented on demand and contain free lists of blocks of one
830 * particular size class. In other words, there is a fixed-size allocator
831 * for each size class. Free pools are shared by the different allocators
832 * thus minimizing the space reserved for a particular size class.
833 *
834 * This allocation strategy is a variant of what is known as "simple
835 * segregated storage based on array of free lists". The main drawback of
836 * simple segregated storage is that we might end up with lot of reserved
837 * memory for the different free lists, which degenerate in time. To avoid
838 * this, we partition each free list in pools and we share dynamically the
839 * reserved space between all free lists. This technique is quite efficient
840 * for memory intensive programs which allocate mainly small-sized blocks.
841 *
842 * For small requests we have the following table:
843 *
844 * Request in bytes Size of allocated block Size class idx
845 * ----------------------------------------------------------------
846 * 1-8 8 0
847 * 9-16 16 1
848 * 17-24 24 2
849 * 25-32 32 3
850 * 33-40 40 4
851 * 41-48 48 5
852 * 49-56 56 6
853 * 57-64 64 7
854 * 65-72 72 8
855 * ... ... ...
856 * 497-504 504 62
857 * 505-512 512 63
858 *
859 * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
860 * allocator.
861 */
862
863 /*==========================================================================*/
864
865 /*
866 * -- Main tunable settings section --
867 */
868
869 /*
870 * Alignment of addresses returned to the user. 8-bytes alignment works
871 * on most current architectures (with 32-bit or 64-bit address buses).
872 * The alignment value is also used for grouping small requests in size
873 * classes spaced ALIGNMENT bytes apart.
874 *
875 * You shouldn't change this unless you know what you are doing.
876 */
877
878 #if SIZEOF_VOID_P > 4
879 #define ALIGNMENT 16 /* must be 2^N */
880 #define ALIGNMENT_SHIFT 4
881 #else
882 #define ALIGNMENT 8 /* must be 2^N */
883 #define ALIGNMENT_SHIFT 3
884 #endif
885
886 /* Return the number of bytes in size class I, as a uint. */
887 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
888
889 /*
890 * Max size threshold below which malloc requests are considered to be
891 * small enough in order to use preallocated memory pools. You can tune
892 * this value according to your application behaviour and memory needs.
893 *
894 * Note: a size threshold of 512 guarantees that newly created dictionaries
895 * will be allocated from preallocated memory pools on 64-bit.
896 *
897 * The following invariants must hold:
898 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
899 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
900 *
901 * Although not required, for better performance and space efficiency,
902 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
903 */
904 #define SMALL_REQUEST_THRESHOLD 512
905 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
906
907 /*
908 * The system's VMM page size can be obtained on most unices with a
909 * getpagesize() call or deduced from various header files. To make
910 * things simpler, we assume that it is 4K, which is OK for most systems.
911 * It is probably better if this is the native page size, but it doesn't
912 * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page
913 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
914 * violation fault. 4K is apparently OK for all the platforms that python
915 * currently targets.
916 */
917 #define SYSTEM_PAGE_SIZE (4 * 1024)
918
919 /*
920 * Maximum amount of memory managed by the allocator for small requests.
921 */
922 #ifdef WITH_MEMORY_LIMITS
923 #ifndef SMALL_MEMORY_LIMIT
924 #define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
925 #endif
926 #endif
927
928 #if !defined(WITH_PYMALLOC_RADIX_TREE)
929 /* Use radix-tree to track arena memory regions, for address_in_range().
930 * Enable by default since it allows larger pool sizes. Can be disabled
931 * using -DWITH_PYMALLOC_RADIX_TREE=0 */
932 #define WITH_PYMALLOC_RADIX_TREE 1
933 #endif
934
935 #if SIZEOF_VOID_P > 4
936 /* on 64-bit platforms use larger pools and arenas if we can */
937 #define USE_LARGE_ARENAS
938 #if WITH_PYMALLOC_RADIX_TREE
939 /* large pools only supported if radix-tree is enabled */
940 #define USE_LARGE_POOLS
941 #endif
942 #endif
943
944 /*
945 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
946 * on a page boundary. This is a reserved virtual address space for the
947 * current process (obtained through a malloc()/mmap() call). In no way this
948 * means that the memory arenas will be used entirely. A malloc(<Big>) is
949 * usually an address range reservation for <Big> bytes, unless all pages within
950 * this space are referenced subsequently. So malloc'ing big blocks and not
951 * using them does not mean "wasting memory". It's an addressable range
952 * wastage...
953 *
954 * Arenas are allocated with mmap() on systems supporting anonymous memory
955 * mappings to reduce heap fragmentation.
956 */
957 #ifdef USE_LARGE_ARENAS
958 #define ARENA_BITS 20 /* 1 MiB */
959 #else
960 #define ARENA_BITS 18 /* 256 KiB */
961 #endif
962 #define ARENA_SIZE (1 << ARENA_BITS)
963 #define ARENA_SIZE_MASK (ARENA_SIZE - 1)
964
965 #ifdef WITH_MEMORY_LIMITS
966 #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
967 #endif
968
969 /*
970 * Size of the pools used for small blocks. Must be a power of 2.
971 */
972 #ifdef USE_LARGE_POOLS
973 #define POOL_BITS 14 /* 16 KiB */
974 #else
975 #define POOL_BITS 12 /* 4 KiB */
976 #endif
977 #define POOL_SIZE (1 << POOL_BITS)
978 #define POOL_SIZE_MASK (POOL_SIZE - 1)
979
980 #if !WITH_PYMALLOC_RADIX_TREE
981 #if POOL_SIZE != SYSTEM_PAGE_SIZE
982 # error "pool size must be equal to system page size"
983 #endif
984 #endif
985
986 #define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE)
987 #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
988 # error "arena size not an exact multiple of pool size"
989 #endif
990
991 /*
992 * -- End of tunable settings section --
993 */
994
995 /*==========================================================================*/
996
997 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
998 typedef uint8_t block;
999
1000 /* Pool for small blocks. */
1001 struct pool_header {
1002 union { block *_padding;
1003 uint count; } ref; /* number of allocated blocks */
1004 block *freeblock; /* pool's free list head */
1005 struct pool_header *nextpool; /* next pool of this size class */
1006 struct pool_header *prevpool; /* previous pool "" */
1007 uint arenaindex; /* index into arenas of base adr */
1008 uint szidx; /* block size class index */
1009 uint nextoffset; /* bytes to virgin block */
1010 uint maxnextoffset; /* largest valid nextoffset */
1011 };
1012
1013 typedef struct pool_header *poolp;
1014
1015 /* Record keeping for arenas. */
1016 struct arena_object {
1017 /* The address of the arena, as returned by malloc. Note that 0
1018 * will never be returned by a successful malloc, and is used
1019 * here to mark an arena_object that doesn't correspond to an
1020 * allocated arena.
1021 */
1022 uintptr_t address;
1023
1024 /* Pool-aligned pointer to the next pool to be carved off. */
1025 block* pool_address;
1026
1027 /* The number of available pools in the arena: free pools + never-
1028 * allocated pools.
1029 */
1030 uint nfreepools;
1031
1032 /* The total number of pools in the arena, whether or not available. */
1033 uint ntotalpools;
1034
1035 /* Singly-linked list of available pools. */
1036 struct pool_header* freepools;
1037
1038 /* Whenever this arena_object is not associated with an allocated
1039 * arena, the nextarena member is used to link all unassociated
1040 * arena_objects in the singly-linked `unused_arena_objects` list.
1041 * The prevarena member is unused in this case.
1042 *
1043 * When this arena_object is associated with an allocated arena
1044 * with at least one available pool, both members are used in the
1045 * doubly-linked `usable_arenas` list, which is maintained in
1046 * increasing order of `nfreepools` values.
1047 *
1048 * Else this arena_object is associated with an allocated arena
1049 * all of whose pools are in use. `nextarena` and `prevarena`
1050 * are both meaningless in this case.
1051 */
1052 struct arena_object* nextarena;
1053 struct arena_object* prevarena;
1054 };
1055
1056 #define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
1057
1058 #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
1059
1060 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
1061 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
1062
1063 /* Return total number of blocks in pool of size index I, as a uint. */
1064 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1065
1066 /*==========================================================================*/
1067
1068 /*
1069 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1070
1071 This is involved. For an index i, usedpools[i+i] is the header for a list of
1072 all partially used pools holding small blocks with "size class idx" i. So
1073 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
1074 16, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1075
1076 Pools are carved off an arena's highwater mark (an arena_object's pool_address
1077 member) as needed. Once carved off, a pool is in one of three states forever
1078 after:
1079
1080 used == partially used, neither empty nor full
1081 At least one block in the pool is currently allocated, and at least one
1082 block in the pool is not currently allocated (note this implies a pool
1083 has room for at least two blocks).
1084 This is a pool's initial state, as a pool is created only when malloc
1085 needs space.
1086 The pool holds blocks of a fixed size, and is in the circular list headed
1087 at usedpools[i] (see above). It's linked to the other used pools of the
1088 same size class via the pool_header's nextpool and prevpool members.
1089 If all but one block is currently allocated, a malloc can cause a
1090 transition to the full state. If all but one block is not currently
1091 allocated, a free can cause a transition to the empty state.
1092
1093 full == all the pool's blocks are currently allocated
1094 On transition to full, a pool is unlinked from its usedpools[] list.
1095 It's not linked to from anything then anymore, and its nextpool and
1096 prevpool members are meaningless until it transitions back to used.
1097 A free of a block in a full pool puts the pool back in the used state.
1098 Then it's linked in at the front of the appropriate usedpools[] list, so
1099 that the next allocation for its size class will reuse the freed block.
1100
1101 empty == all the pool's blocks are currently available for allocation
1102 On transition to empty, a pool is unlinked from its usedpools[] list,
1103 and linked to the front of its arena_object's singly-linked freepools list,
1104 via its nextpool member. The prevpool member has no meaning in this case.
1105 Empty pools have no inherent size class: the next time a malloc finds
1106 an empty list in usedpools[], it takes the first pool off of freepools.
1107 If the size class needed happens to be the same as the size class the pool
1108 last had, some pool initialization can be skipped.
1109
1110
1111 Block Management
1112
1113 Blocks within pools are again carved out as needed. pool->freeblock points to
1114 the start of a singly-linked list of free blocks within the pool. When a
1115 block is freed, it's inserted at the front of its pool's freeblock list. Note
1116 that the available blocks in a pool are *not* linked all together when a pool
1117 is initialized. Instead only "the first two" (lowest addresses) blocks are
1118 set up, returning the first such block, and setting pool->freeblock to a
1119 one-block list holding the second such block. This is consistent with that
1120 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1121 of memory until it's actually needed.
1122
1123 So long as a pool is in the used state, we're certain there *is* a block
1124 available for allocating, and pool->freeblock is not NULL. If pool->freeblock
1125 points to the end of the free list before we've carved the entire pool into
1126 blocks, that means we simply haven't yet gotten to one of the higher-address
1127 blocks. The offset from the pool_header to the start of "the next" virgin
1128 block is stored in the pool_header nextoffset member, and the largest value
1129 of nextoffset that makes sense is stored in the maxnextoffset member when a
1130 pool is initialized. All the blocks in a pool have been passed out at least
1131 once when and only when nextoffset > maxnextoffset.
1132
1133
1134 Major obscurity: While the usedpools vector is declared to have poolp
1135 entries, it doesn't really. It really contains two pointers per (conceptual)
1136 poolp entry, the nextpool and prevpool members of a pool_header. The
1137 excruciating initialization code below fools C so that
1138
1139 usedpool[i+i]
1140
1141 "acts like" a genuine poolp, but only so long as you only reference its
1142 nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
1143 compensating for that a pool_header's nextpool and prevpool members
1144 immediately follow a pool_header's first two members:
1145
1146 union { block *_padding;
1147 uint count; } ref;
1148 block *freeblock;
1149
1150 each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
1151 contains is a fudged-up pointer p such that *if* C believes it's a poolp
1152 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1153 circular list is empty).
1154
1155 It's unclear why the usedpools setup is so convoluted. It could be to
1156 minimize the amount of cache required to hold this heavily-referenced table
1157 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1158 referencing code has to remember to "double the index" and doing so isn't
1159 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1160 on that C doesn't insert any padding anywhere in a pool_header at or before
1161 the prevpool member.
1162 **************************************************************************** */
1163
1164 #define PTA(x) ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1165 #define PT(x) PTA(x), PTA(x)
1166
1167 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1168 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1169 #if NB_SMALL_SIZE_CLASSES > 8
1170 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1171 #if NB_SMALL_SIZE_CLASSES > 16
1172 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1173 #if NB_SMALL_SIZE_CLASSES > 24
1174 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1175 #if NB_SMALL_SIZE_CLASSES > 32
1176 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1177 #if NB_SMALL_SIZE_CLASSES > 40
1178 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1179 #if NB_SMALL_SIZE_CLASSES > 48
1180 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1181 #if NB_SMALL_SIZE_CLASSES > 56
1182 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1183 #if NB_SMALL_SIZE_CLASSES > 64
1184 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1185 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1186 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1187 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1188 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1189 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1190 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1191 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1192 #endif /* NB_SMALL_SIZE_CLASSES > 8 */
1193 };
1194
1195 /*==========================================================================
1196 Arena management.
1197
1198 `arenas` is a vector of arena_objects. It contains maxarenas entries, some of
1199 which may not be currently used (== they're arena_objects that aren't
1200 currently associated with an allocated arena). Note that arenas proper are
1201 separately malloc'ed.
1202
1203 Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5,
1204 we do try to free() arenas, and use some mild heuristic strategies to increase
1205 the likelihood that arenas eventually can be freed.
1206
1207 unused_arena_objects
1208
1209 This is a singly-linked list of the arena_objects that are currently not
1210 being used (no arena is associated with them). Objects are taken off the
1211 head of the list in new_arena(), and are pushed on the head of the list in
1212 PyObject_Free() when the arena is empty. Key invariant: an arena_object
1213 is on this list if and only if its .address member is 0.
1214
1215 usable_arenas
1216
1217 This is a doubly-linked list of the arena_objects associated with arenas
1218 that have pools available. These pools are either waiting to be reused,
1219 or have not been used before. The list is sorted to have the most-
1220 allocated arenas first (ascending order based on the nfreepools member).
1221 This means that the next allocation will come from a heavily used arena,
1222 which gives the nearly empty arenas a chance to be returned to the system.
1223 In my unscientific tests this dramatically improved the number of arenas
1224 that could be freed.
1225
1226 Note that an arena_object associated with an arena all of whose pools are
1227 currently in use isn't on either list.
1228
1229 Changed in Python 3.8: keeping usable_arenas sorted by number of free pools
1230 used to be done by one-at-a-time linear search when an arena's number of
1231 free pools changed. That could, overall, consume time quadratic in the
1232 number of arenas. That didn't really matter when there were only a few
1233 hundred arenas (typical!), but could be a timing disaster when there were
1234 hundreds of thousands. See bpo-37029.
1235
1236 Now we have a vector of "search fingers" to eliminate the need to search:
1237 nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1238 with nfp free pools. This is NULL if and only if there is no arena with
1239 nfp free pools in usable_arenas.
1240 */
1241
1242 /* Array of objects used to track chunks of memory (arenas). */
1243 static struct arena_object* arenas = NULL;
1244 /* Number of slots currently allocated in the `arenas` vector. */
1245 static uint maxarenas = 0;
1246
1247 /* The head of the singly-linked, NULL-terminated list of available
1248 * arena_objects.
1249 */
1250 static struct arena_object* unused_arena_objects = NULL;
1251
1252 /* The head of the doubly-linked, NULL-terminated at each end, list of
1253 * arena_objects associated with arenas that have pools available.
1254 */
1255 static struct arena_object* usable_arenas = NULL;
1256
1257 /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1258 static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1259
1260 /* How many arena_objects do we initially allocate?
1261 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1262 * `arenas` vector.
1263 */
1264 #define INITIAL_ARENA_OBJECTS 16
1265
1266 /* Number of arenas allocated that haven't been free()'d. */
1267 static size_t narenas_currently_allocated = 0;
1268
1269 /* Total number of times malloc() called to allocate an arena. */
1270 static size_t ntimes_arena_allocated = 0;
1271 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1272 static size_t narenas_highwater = 0;
1273
1274 static Py_ssize_t raw_allocated_blocks;
1275
1276 Py_ssize_t
_Py_GetAllocatedBlocks(void)1277 _Py_GetAllocatedBlocks(void)
1278 {
1279 Py_ssize_t n = raw_allocated_blocks;
1280 /* add up allocated blocks for used pools */
1281 for (uint i = 0; i < maxarenas; ++i) {
1282 /* Skip arenas which are not allocated. */
1283 if (arenas[i].address == 0) {
1284 continue;
1285 }
1286
1287 uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE);
1288
1289 /* visit every pool in the arena */
1290 assert(base <= (uintptr_t) arenas[i].pool_address);
1291 for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
1292 poolp p = (poolp)base;
1293 n += p->ref.count;
1294 }
1295 }
1296 return n;
1297 }
1298
1299 #if WITH_PYMALLOC_RADIX_TREE
1300 /*==========================================================================*/
1301 /* radix tree for tracking arena usage. If enabled, used to implement
1302 address_in_range().
1303
1304 memory address bit allocation for keys
1305
1306 64-bit pointers, IGNORE_BITS=0 and 2^20 arena size:
1307 15 -> MAP_TOP_BITS
1308 15 -> MAP_MID_BITS
1309 14 -> MAP_BOT_BITS
1310 20 -> ideal aligned arena
1311 ----
1312 64
1313
1314 64-bit pointers, IGNORE_BITS=16, and 2^20 arena size:
1315 16 -> IGNORE_BITS
1316 10 -> MAP_TOP_BITS
1317 10 -> MAP_MID_BITS
1318 8 -> MAP_BOT_BITS
1319 20 -> ideal aligned arena
1320 ----
1321 64
1322
1323 32-bit pointers and 2^18 arena size:
1324 14 -> MAP_BOT_BITS
1325 18 -> ideal aligned arena
1326 ----
1327 32
1328
1329 */
1330
1331 #if SIZEOF_VOID_P == 8
1332
1333 /* number of bits in a pointer */
1334 #define POINTER_BITS 64
1335
1336 /* High bits of memory addresses that will be ignored when indexing into the
1337 * radix tree. Setting this to zero is the safe default. For most 64-bit
1338 * machines, setting this to 16 would be safe. The kernel would not give
1339 * user-space virtual memory addresses that have significant information in
1340 * those high bits. The main advantage to setting IGNORE_BITS > 0 is that less
1341 * virtual memory will be used for the top and middle radix tree arrays. Those
1342 * arrays are allocated in the BSS segment and so will typically consume real
1343 * memory only if actually accessed.
1344 */
1345 #define IGNORE_BITS 0
1346
1347 /* use the top and mid layers of the radix tree */
1348 #define USE_INTERIOR_NODES
1349
1350 #elif SIZEOF_VOID_P == 4
1351
1352 #define POINTER_BITS 32
1353 #define IGNORE_BITS 0
1354
1355 #else
1356
1357 /* Currently this code works for 64-bit or 32-bit pointers only. */
1358 #error "obmalloc radix tree requires 64-bit or 32-bit pointers."
1359
1360 #endif /* SIZEOF_VOID_P */
1361
1362 /* arena_coverage_t members require this to be true */
1363 #if ARENA_BITS >= 32
1364 # error "arena size must be < 2^32"
1365 #endif
1366
1367 /* the lower bits of the address that are not ignored */
1368 #define ADDRESS_BITS (POINTER_BITS - IGNORE_BITS)
1369
1370 #ifdef USE_INTERIOR_NODES
1371 /* number of bits used for MAP_TOP and MAP_MID nodes */
1372 #define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3)
1373 #else
1374 #define INTERIOR_BITS 0
1375 #endif
1376
1377 #define MAP_TOP_BITS INTERIOR_BITS
1378 #define MAP_TOP_LENGTH (1 << MAP_TOP_BITS)
1379 #define MAP_TOP_MASK (MAP_TOP_LENGTH - 1)
1380
1381 #define MAP_MID_BITS INTERIOR_BITS
1382 #define MAP_MID_LENGTH (1 << MAP_MID_BITS)
1383 #define MAP_MID_MASK (MAP_MID_LENGTH - 1)
1384
1385 #define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS)
1386 #define MAP_BOT_LENGTH (1 << MAP_BOT_BITS)
1387 #define MAP_BOT_MASK (MAP_BOT_LENGTH - 1)
1388
1389 #define MAP_BOT_SHIFT ARENA_BITS
1390 #define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT)
1391 #define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT)
1392
1393 #define AS_UINT(p) ((uintptr_t)(p))
1394 #define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK)
1395 #define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK)
1396 #define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK)
1397
1398 #if IGNORE_BITS > 0
1399 /* Return the ignored part of the pointer address. Those bits should be same
1400 * for all valid pointers if IGNORE_BITS is set correctly.
1401 */
1402 #define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS)
1403 #else
1404 #define HIGH_BITS(p) 0
1405 #endif
1406
1407
1408 /* This is the leaf of the radix tree. See arena_map_mark_used() for the
1409 * meaning of these members. */
1410 typedef struct {
1411 int32_t tail_hi;
1412 int32_t tail_lo;
1413 } arena_coverage_t;
1414
1415 typedef struct arena_map_bot {
1416 /* The members tail_hi and tail_lo are accessed together. So, it
1417 * better to have them as an array of structs, rather than two
1418 * arrays.
1419 */
1420 arena_coverage_t arenas[MAP_BOT_LENGTH];
1421 } arena_map_bot_t;
1422
1423 #ifdef USE_INTERIOR_NODES
1424 typedef struct arena_map_mid {
1425 struct arena_map_bot *ptrs[MAP_MID_LENGTH];
1426 } arena_map_mid_t;
1427
1428 typedef struct arena_map_top {
1429 struct arena_map_mid *ptrs[MAP_TOP_LENGTH];
1430 } arena_map_top_t;
1431 #endif
1432
1433 /* The root of radix tree. Note that by initializing like this, the memory
1434 * should be in the BSS. The OS will only memory map pages as the MAP_MID
1435 * nodes get used (OS pages are demand loaded as needed).
1436 */
1437 #ifdef USE_INTERIOR_NODES
1438 static arena_map_top_t arena_map_root;
1439 /* accounting for number of used interior nodes */
1440 static int arena_map_mid_count;
1441 static int arena_map_bot_count;
1442 #else
1443 static arena_map_bot_t arena_map_root;
1444 #endif
1445
1446 /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1447 * it cannot be created */
1448 static inline Py_ALWAYS_INLINE arena_map_bot_t *
arena_map_get(block * p,int create)1449 arena_map_get(block *p, int create)
1450 {
1451 #ifdef USE_INTERIOR_NODES
1452 /* sanity check that IGNORE_BITS is correct */
1453 assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1454 int i1 = MAP_TOP_INDEX(p);
1455 if (arena_map_root.ptrs[i1] == NULL) {
1456 if (!create) {
1457 return NULL;
1458 }
1459 arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1460 if (n == NULL) {
1461 return NULL;
1462 }
1463 arena_map_root.ptrs[i1] = n;
1464 arena_map_mid_count++;
1465 }
1466 int i2 = MAP_MID_INDEX(p);
1467 if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1468 if (!create) {
1469 return NULL;
1470 }
1471 arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1472 if (n == NULL) {
1473 return NULL;
1474 }
1475 arena_map_root.ptrs[i1]->ptrs[i2] = n;
1476 arena_map_bot_count++;
1477 }
1478 return arena_map_root.ptrs[i1]->ptrs[i2];
1479 #else
1480 return &arena_map_root;
1481 #endif
1482 }
1483
1484
1485 /* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw
1486 * away 24 bits of the address. That reduces the space requirement of
1487 * the tree compared to similar radix tree page-map schemes. In
1488 * exchange for slashing the space requirement, it needs more
1489 * computation to check an address.
1490 *
1491 * Tracking coverage is done by "ideal" arena address. It is easier to
1492 * explain in decimal so let's say that the arena size is 100 bytes.
1493 * Then, ideal addresses are 100, 200, 300, etc. For checking if a
1494 * pointer address is inside an actual arena, we have to check two ideal
1495 * arena addresses. E.g. if pointer is 357, we need to check 200 and
1496 * 300. In the rare case that an arena is aligned in the ideal way
1497 * (e.g. base address of arena is 200) then we only have to check one
1498 * ideal address.
1499 *
1500 * The tree nodes for 200 and 300 both store the address of arena.
1501 * There are two cases: the arena starts at a lower ideal arena and
1502 * extends to this one, or the arena starts in this arena and extends to
1503 * the next ideal arena. The tail_lo and tail_hi members correspond to
1504 * these two cases.
1505 */
1506
1507
1508 /* mark or unmark addresses covered by arena */
1509 static int
arena_map_mark_used(uintptr_t arena_base,int is_used)1510 arena_map_mark_used(uintptr_t arena_base, int is_used)
1511 {
1512 /* sanity check that IGNORE_BITS is correct */
1513 assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1514 arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used);
1515 if (n_hi == NULL) {
1516 assert(is_used); /* otherwise node should already exist */
1517 return 0; /* failed to allocate space for node */
1518 }
1519 int i3 = MAP_BOT_INDEX((block *)arena_base);
1520 int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1521 if (tail == 0) {
1522 /* is ideal arena address */
1523 n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1524 }
1525 else {
1526 /* arena_base address is not ideal (aligned to arena size) and
1527 * so it potentially covers two MAP_BOT nodes. Get the MAP_BOT node
1528 * for the next arena. Note that it might be in different MAP_TOP
1529 * and MAP_MID nodes as well so we need to call arena_map_get()
1530 * again (do the full tree traversal).
1531 */
1532 n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1533 uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1534 /* If arena_base is a legit arena address, so is arena_base_next - 1
1535 * (last address in arena). If arena_base_next overflows then it
1536 * must overflow to 0. However, that would mean arena_base was
1537 * "ideal" and we should not be in this case. */
1538 assert(arena_base < arena_base_next);
1539 arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used);
1540 if (n_lo == NULL) {
1541 assert(is_used); /* otherwise should already exist */
1542 n_hi->arenas[i3].tail_hi = 0;
1543 return 0; /* failed to allocate space for node */
1544 }
1545 int i3_next = MAP_BOT_INDEX(arena_base_next);
1546 n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1547 }
1548 return 1;
1549 }
1550
1551 /* Return true if 'p' is a pointer inside an obmalloc arena.
1552 * _PyObject_Free() calls this so it needs to be very fast. */
1553 static int
arena_map_is_used(block * p)1554 arena_map_is_used(block *p)
1555 {
1556 arena_map_bot_t *n = arena_map_get(p, 0);
1557 if (n == NULL) {
1558 return 0;
1559 }
1560 int i3 = MAP_BOT_INDEX(p);
1561 /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1562 int32_t hi = n->arenas[i3].tail_hi;
1563 int32_t lo = n->arenas[i3].tail_lo;
1564 int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1565 return (tail < lo) || (tail >= hi && hi != 0);
1566 }
1567
1568 /* end of radix tree logic */
1569 /*==========================================================================*/
1570 #endif /* WITH_PYMALLOC_RADIX_TREE */
1571
1572
1573 /* Allocate a new arena. If we run out of memory, return NULL. Else
1574 * allocate a new arena, and return the address of an arena_object
1575 * describing the new arena. It's expected that the caller will set
1576 * `usable_arenas` to the return value.
1577 */
1578 static struct arena_object*
new_arena(void)1579 new_arena(void)
1580 {
1581 struct arena_object* arenaobj;
1582 uint excess; /* number of bytes above pool alignment */
1583 void *address;
1584 static int debug_stats = -1;
1585
1586 if (debug_stats == -1) {
1587 const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1588 debug_stats = (opt != NULL && *opt != '\0');
1589 }
1590 if (debug_stats) {
1591 _PyObject_DebugMallocStats(stderr);
1592 }
1593
1594 if (unused_arena_objects == NULL) {
1595 uint i;
1596 uint numarenas;
1597 size_t nbytes;
1598
1599 /* Double the number of arena objects on each allocation.
1600 * Note that it's possible for `numarenas` to overflow.
1601 */
1602 numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1603 if (numarenas <= maxarenas)
1604 return NULL; /* overflow */
1605 #if SIZEOF_SIZE_T <= SIZEOF_INT
1606 if (numarenas > SIZE_MAX / sizeof(*arenas))
1607 return NULL; /* overflow */
1608 #endif
1609 nbytes = numarenas * sizeof(*arenas);
1610 arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1611 if (arenaobj == NULL)
1612 return NULL;
1613 arenas = arenaobj;
1614
1615 /* We might need to fix pointers that were copied. However,
1616 * new_arena only gets called when all the pages in the
1617 * previous arenas are full. Thus, there are *no* pointers
1618 * into the old array. Thus, we don't have to worry about
1619 * invalid pointers. Just to be sure, some asserts:
1620 */
1621 assert(usable_arenas == NULL);
1622 assert(unused_arena_objects == NULL);
1623
1624 /* Put the new arenas on the unused_arena_objects list. */
1625 for (i = maxarenas; i < numarenas; ++i) {
1626 arenas[i].address = 0; /* mark as unassociated */
1627 arenas[i].nextarena = i < numarenas - 1 ?
1628 &arenas[i+1] : NULL;
1629 }
1630
1631 /* Update globals. */
1632 unused_arena_objects = &arenas[maxarenas];
1633 maxarenas = numarenas;
1634 }
1635
1636 /* Take the next available arena object off the head of the list. */
1637 assert(unused_arena_objects != NULL);
1638 arenaobj = unused_arena_objects;
1639 unused_arena_objects = arenaobj->nextarena;
1640 assert(arenaobj->address == 0);
1641 address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1642 #if WITH_PYMALLOC_RADIX_TREE
1643 if (address != NULL) {
1644 if (!arena_map_mark_used((uintptr_t)address, 1)) {
1645 /* marking arena in radix tree failed, abort */
1646 _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1647 address = NULL;
1648 }
1649 }
1650 #endif
1651 if (address == NULL) {
1652 /* The allocation failed: return NULL after putting the
1653 * arenaobj back.
1654 */
1655 arenaobj->nextarena = unused_arena_objects;
1656 unused_arena_objects = arenaobj;
1657 return NULL;
1658 }
1659 arenaobj->address = (uintptr_t)address;
1660
1661 ++narenas_currently_allocated;
1662 ++ntimes_arena_allocated;
1663 if (narenas_currently_allocated > narenas_highwater)
1664 narenas_highwater = narenas_currently_allocated;
1665 arenaobj->freepools = NULL;
1666 /* pool_address <- first pool-aligned address in the arena
1667 nfreepools <- number of whole pools that fit after alignment */
1668 arenaobj->pool_address = (block*)arenaobj->address;
1669 arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1670 excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1671 if (excess != 0) {
1672 --arenaobj->nfreepools;
1673 arenaobj->pool_address += POOL_SIZE - excess;
1674 }
1675 arenaobj->ntotalpools = arenaobj->nfreepools;
1676
1677 return arenaobj;
1678 }
1679
1680
1681
1682 #if WITH_PYMALLOC_RADIX_TREE
1683 /* Return true if and only if P is an address that was allocated by
1684 pymalloc. When the radix tree is used, 'poolp' is unused.
1685 */
1686 static bool
address_in_range(void * p,poolp pool)1687 address_in_range(void *p, poolp pool)
1688 {
1689 return arena_map_is_used(p);
1690 }
1691 #else
1692 /*
1693 address_in_range(P, POOL)
1694
1695 Return true if and only if P is an address that was allocated by pymalloc.
1696 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1697 (the caller is asked to compute this because the macro expands POOL more than
1698 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1699 variable and pass the latter to the macro; because address_in_range is
1700 called on every alloc/realloc/free, micro-efficiency is important here).
1701
1702 Tricky: Let B be the arena base address associated with the pool, B =
1703 arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if
1704
1705 B <= P < B + ARENA_SIZE
1706
1707 Subtracting B throughout, this is true iff
1708
1709 0 <= P-B < ARENA_SIZE
1710
1711 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1712
1713 Obscure: A PyMem "free memory" function can call the pymalloc free or realloc
1714 before the first arena has been allocated. `arenas` is still NULL in that
1715 case. We're relying on that maxarenas is also 0 in that case, so that
1716 (POOL)->arenaindex < maxarenas must be false, saving us from trying to index
1717 into a NULL arenas.
1718
1719 Details: given P and POOL, the arena_object corresponding to P is AO =
1720 arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild
1721 stores, etc), POOL is the correct address of P's pool, AO.address is the
1722 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1723 AO.address. In addition, AO.address is not 0 (no arena can start at address 0
1724 (NULL)). Therefore address_in_range correctly reports that obmalloc
1725 controls P.
1726
1727 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1728 call to the system malloc() or realloc()). (POOL)->arenaindex may be anything
1729 in this case -- it may even be uninitialized trash. If the trash arenaindex
1730 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1731 control P.
1732
1733 Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an
1734 allocated arena, obmalloc controls all the memory in slice AO.address :
1735 AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc,
1736 so P doesn't lie in that slice, so the macro correctly reports that P is not
1737 controlled by obmalloc.
1738
1739 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1740 arena_object (one not currently associated with an allocated arena),
1741 AO.address is 0, and the second test in the macro reduces to:
1742
1743 P < ARENA_SIZE
1744
1745 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1746 that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part
1747 of the test still passes, and the third clause (AO.address != 0) is necessary
1748 to get the correct result: AO.address is 0 in this case, so the macro
1749 correctly reports that P is not controlled by obmalloc (despite that P lies in
1750 slice AO.address : AO.address + ARENA_SIZE).
1751
1752 Note: The third (AO.address != 0) clause was added in Python 2.5. Before
1753 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1754 corresponded to a currently-allocated arena, so the "P is not controlled by
1755 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1756 was impossible.
1757
1758 Note that the logic is excruciating, and reading up possibly uninitialized
1759 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1760 creates problems for some memory debuggers. The overwhelming advantage is
1761 that this test determines whether an arbitrary address is controlled by
1762 obmalloc in a small constant time, independent of the number of arenas
1763 obmalloc controls. Since this test is needed at every entry point, it's
1764 extremely desirable that it be this fast.
1765 */
1766
1767 static bool _Py_NO_SANITIZE_ADDRESS
1768 _Py_NO_SANITIZE_THREAD
1769 _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1770 address_in_range(void *p, poolp pool)
1771 {
1772 // Since address_in_range may be reading from memory which was not allocated
1773 // by Python, it is important that pool->arenaindex is read only once, as
1774 // another thread may be concurrently modifying the value without holding
1775 // the GIL. The following dance forces the compiler to read pool->arenaindex
1776 // only once.
1777 uint arenaindex = *((volatile uint *)&pool->arenaindex);
1778 return arenaindex < maxarenas &&
1779 (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1780 arenas[arenaindex].address != 0;
1781 }
1782
1783 #endif /* !WITH_PYMALLOC_RADIX_TREE */
1784
1785 /*==========================================================================*/
1786
1787 // Called when freelist is exhausted. Extend the freelist if there is
1788 // space for a block. Otherwise, remove this pool from usedpools.
1789 static void
pymalloc_pool_extend(poolp pool,uint size)1790 pymalloc_pool_extend(poolp pool, uint size)
1791 {
1792 if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1793 /* There is room for another block. */
1794 pool->freeblock = (block*)pool + pool->nextoffset;
1795 pool->nextoffset += INDEX2SIZE(size);
1796 *(block **)(pool->freeblock) = NULL;
1797 return;
1798 }
1799
1800 /* Pool is full, unlink from used pools. */
1801 poolp next;
1802 next = pool->nextpool;
1803 pool = pool->prevpool;
1804 next->prevpool = pool;
1805 pool->nextpool = next;
1806 }
1807
1808 /* called when pymalloc_alloc can not allocate a block from usedpool.
1809 * This function takes new pool and allocate a block from it.
1810 */
1811 static void*
allocate_from_new_pool(uint size)1812 allocate_from_new_pool(uint size)
1813 {
1814 /* There isn't a pool of the right size class immediately
1815 * available: use a free pool.
1816 */
1817 if (UNLIKELY(usable_arenas == NULL)) {
1818 /* No arena has a free pool: allocate a new arena. */
1819 #ifdef WITH_MEMORY_LIMITS
1820 if (narenas_currently_allocated >= MAX_ARENAS) {
1821 return NULL;
1822 }
1823 #endif
1824 usable_arenas = new_arena();
1825 if (usable_arenas == NULL) {
1826 return NULL;
1827 }
1828 usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1829 assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1830 nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1831 }
1832 assert(usable_arenas->address != 0);
1833
1834 /* This arena already had the smallest nfreepools value, so decreasing
1835 * nfreepools doesn't change that, and we don't need to rearrange the
1836 * usable_arenas list. However, if the arena becomes wholly allocated,
1837 * we need to remove its arena_object from usable_arenas.
1838 */
1839 assert(usable_arenas->nfreepools > 0);
1840 if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1841 /* It's the last of this size, so there won't be any. */
1842 nfp2lasta[usable_arenas->nfreepools] = NULL;
1843 }
1844 /* If any free pools will remain, it will be the new smallest. */
1845 if (usable_arenas->nfreepools > 1) {
1846 assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1847 nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1848 }
1849
1850 /* Try to get a cached free pool. */
1851 poolp pool = usable_arenas->freepools;
1852 if (LIKELY(pool != NULL)) {
1853 /* Unlink from cached pools. */
1854 usable_arenas->freepools = pool->nextpool;
1855 usable_arenas->nfreepools--;
1856 if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1857 /* Wholly allocated: remove. */
1858 assert(usable_arenas->freepools == NULL);
1859 assert(usable_arenas->nextarena == NULL ||
1860 usable_arenas->nextarena->prevarena ==
1861 usable_arenas);
1862 usable_arenas = usable_arenas->nextarena;
1863 if (usable_arenas != NULL) {
1864 usable_arenas->prevarena = NULL;
1865 assert(usable_arenas->address != 0);
1866 }
1867 }
1868 else {
1869 /* nfreepools > 0: it must be that freepools
1870 * isn't NULL, or that we haven't yet carved
1871 * off all the arena's pools for the first
1872 * time.
1873 */
1874 assert(usable_arenas->freepools != NULL ||
1875 usable_arenas->pool_address <=
1876 (block*)usable_arenas->address +
1877 ARENA_SIZE - POOL_SIZE);
1878 }
1879 }
1880 else {
1881 /* Carve off a new pool. */
1882 assert(usable_arenas->nfreepools > 0);
1883 assert(usable_arenas->freepools == NULL);
1884 pool = (poolp)usable_arenas->pool_address;
1885 assert((block*)pool <= (block*)usable_arenas->address +
1886 ARENA_SIZE - POOL_SIZE);
1887 pool->arenaindex = (uint)(usable_arenas - arenas);
1888 assert(&arenas[pool->arenaindex] == usable_arenas);
1889 pool->szidx = DUMMY_SIZE_IDX;
1890 usable_arenas->pool_address += POOL_SIZE;
1891 --usable_arenas->nfreepools;
1892
1893 if (usable_arenas->nfreepools == 0) {
1894 assert(usable_arenas->nextarena == NULL ||
1895 usable_arenas->nextarena->prevarena ==
1896 usable_arenas);
1897 /* Unlink the arena: it is completely allocated. */
1898 usable_arenas = usable_arenas->nextarena;
1899 if (usable_arenas != NULL) {
1900 usable_arenas->prevarena = NULL;
1901 assert(usable_arenas->address != 0);
1902 }
1903 }
1904 }
1905
1906 /* Frontlink to used pools. */
1907 block *bp;
1908 poolp next = usedpools[size + size]; /* == prev */
1909 pool->nextpool = next;
1910 pool->prevpool = next;
1911 next->nextpool = pool;
1912 next->prevpool = pool;
1913 pool->ref.count = 1;
1914 if (pool->szidx == size) {
1915 /* Luckily, this pool last contained blocks
1916 * of the same size class, so its header
1917 * and free list are already initialized.
1918 */
1919 bp = pool->freeblock;
1920 assert(bp != NULL);
1921 pool->freeblock = *(block **)bp;
1922 return bp;
1923 }
1924 /*
1925 * Initialize the pool header, set up the free list to
1926 * contain just the second block, and return the first
1927 * block.
1928 */
1929 pool->szidx = size;
1930 size = INDEX2SIZE(size);
1931 bp = (block *)pool + POOL_OVERHEAD;
1932 pool->nextoffset = POOL_OVERHEAD + (size << 1);
1933 pool->maxnextoffset = POOL_SIZE - size;
1934 pool->freeblock = bp + size;
1935 *(block **)(pool->freeblock) = NULL;
1936 return bp;
1937 }
1938
1939 /* pymalloc allocator
1940
1941 Return a pointer to newly allocated memory if pymalloc allocated memory.
1942
1943 Return NULL if pymalloc failed to allocate the memory block: on bigger
1944 requests, on error in the code below (as a last chance to serve the request)
1945 or when the max memory limit has been reached.
1946 */
1947 static inline void*
pymalloc_alloc(void * ctx,size_t nbytes)1948 pymalloc_alloc(void *ctx, size_t nbytes)
1949 {
1950 #ifdef WITH_VALGRIND
1951 if (UNLIKELY(running_on_valgrind == -1)) {
1952 running_on_valgrind = RUNNING_ON_VALGRIND;
1953 }
1954 if (UNLIKELY(running_on_valgrind)) {
1955 return NULL;
1956 }
1957 #endif
1958
1959 if (UNLIKELY(nbytes == 0)) {
1960 return NULL;
1961 }
1962 if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
1963 return NULL;
1964 }
1965
1966 uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1967 poolp pool = usedpools[size + size];
1968 block *bp;
1969
1970 if (LIKELY(pool != pool->nextpool)) {
1971 /*
1972 * There is a used pool for this size class.
1973 * Pick up the head block of its free list.
1974 */
1975 ++pool->ref.count;
1976 bp = pool->freeblock;
1977 assert(bp != NULL);
1978
1979 if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
1980 // Reached the end of the free list, try to extend it.
1981 pymalloc_pool_extend(pool, size);
1982 }
1983 }
1984 else {
1985 /* There isn't a pool of the right size class immediately
1986 * available: use a free pool.
1987 */
1988 bp = allocate_from_new_pool(size);
1989 }
1990
1991 return (void *)bp;
1992 }
1993
1994
1995 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1996 _PyObject_Malloc(void *ctx, size_t nbytes)
1997 {
1998 void* ptr = pymalloc_alloc(ctx, nbytes);
1999 if (LIKELY(ptr != NULL)) {
2000 return ptr;
2001 }
2002
2003 ptr = PyMem_RawMalloc(nbytes);
2004 if (ptr != NULL) {
2005 raw_allocated_blocks++;
2006 }
2007 return ptr;
2008 }
2009
2010
2011 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)2012 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2013 {
2014 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2015 size_t nbytes = nelem * elsize;
2016
2017 void* ptr = pymalloc_alloc(ctx, nbytes);
2018 if (LIKELY(ptr != NULL)) {
2019 memset(ptr, 0, nbytes);
2020 return ptr;
2021 }
2022
2023 ptr = PyMem_RawCalloc(nelem, elsize);
2024 if (ptr != NULL) {
2025 raw_allocated_blocks++;
2026 }
2027 return ptr;
2028 }
2029
2030
2031 static void
insert_to_usedpool(poolp pool)2032 insert_to_usedpool(poolp pool)
2033 {
2034 assert(pool->ref.count > 0); /* else the pool is empty */
2035
2036 uint size = pool->szidx;
2037 poolp next = usedpools[size + size];
2038 poolp prev = next->prevpool;
2039
2040 /* insert pool before next: prev <-> pool <-> next */
2041 pool->nextpool = next;
2042 pool->prevpool = prev;
2043 next->prevpool = pool;
2044 prev->nextpool = pool;
2045 }
2046
2047 static void
insert_to_freepool(poolp pool)2048 insert_to_freepool(poolp pool)
2049 {
2050 poolp next = pool->nextpool;
2051 poolp prev = pool->prevpool;
2052 next->prevpool = prev;
2053 prev->nextpool = next;
2054
2055 /* Link the pool to freepools. This is a singly-linked
2056 * list, and pool->prevpool isn't used there.
2057 */
2058 struct arena_object *ao = &arenas[pool->arenaindex];
2059 pool->nextpool = ao->freepools;
2060 ao->freepools = pool;
2061 uint nf = ao->nfreepools;
2062 /* If this is the rightmost arena with this number of free pools,
2063 * nfp2lasta[nf] needs to change. Caution: if nf is 0, there
2064 * are no arenas in usable_arenas with that value.
2065 */
2066 struct arena_object* lastnf = nfp2lasta[nf];
2067 assert((nf == 0 && lastnf == NULL) ||
2068 (nf > 0 &&
2069 lastnf != NULL &&
2070 lastnf->nfreepools == nf &&
2071 (lastnf->nextarena == NULL ||
2072 nf < lastnf->nextarena->nfreepools)));
2073 if (lastnf == ao) { /* it is the rightmost */
2074 struct arena_object* p = ao->prevarena;
2075 nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2076 }
2077 ao->nfreepools = ++nf;
2078
2079 /* All the rest is arena management. We just freed
2080 * a pool, and there are 4 cases for arena mgmt:
2081 * 1. If all the pools are free, return the arena to
2082 * the system free(). Except if this is the last
2083 * arena in the list, keep it to avoid thrashing:
2084 * keeping one wholly free arena in the list avoids
2085 * pathological cases where a simple loop would
2086 * otherwise provoke needing to allocate and free an
2087 * arena on every iteration. See bpo-37257.
2088 * 2. If this is the only free pool in the arena,
2089 * add the arena back to the `usable_arenas` list.
2090 * 3. If the "next" arena has a smaller count of free
2091 * pools, we have to "slide this arena right" to
2092 * restore that usable_arenas is sorted in order of
2093 * nfreepools.
2094 * 4. Else there's nothing more to do.
2095 */
2096 if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2097 /* Case 1. First unlink ao from usable_arenas.
2098 */
2099 assert(ao->prevarena == NULL ||
2100 ao->prevarena->address != 0);
2101 assert(ao ->nextarena == NULL ||
2102 ao->nextarena->address != 0);
2103
2104 /* Fix the pointer in the prevarena, or the
2105 * usable_arenas pointer.
2106 */
2107 if (ao->prevarena == NULL) {
2108 usable_arenas = ao->nextarena;
2109 assert(usable_arenas == NULL ||
2110 usable_arenas->address != 0);
2111 }
2112 else {
2113 assert(ao->prevarena->nextarena == ao);
2114 ao->prevarena->nextarena =
2115 ao->nextarena;
2116 }
2117 /* Fix the pointer in the nextarena. */
2118 if (ao->nextarena != NULL) {
2119 assert(ao->nextarena->prevarena == ao);
2120 ao->nextarena->prevarena =
2121 ao->prevarena;
2122 }
2123 /* Record that this arena_object slot is
2124 * available to be reused.
2125 */
2126 ao->nextarena = unused_arena_objects;
2127 unused_arena_objects = ao;
2128
2129 #if WITH_PYMALLOC_RADIX_TREE
2130 /* mark arena region as not under control of obmalloc */
2131 arena_map_mark_used(ao->address, 0);
2132 #endif
2133
2134 /* Free the entire arena. */
2135 _PyObject_Arena.free(_PyObject_Arena.ctx,
2136 (void *)ao->address, ARENA_SIZE);
2137 ao->address = 0; /* mark unassociated */
2138 --narenas_currently_allocated;
2139
2140 return;
2141 }
2142
2143 if (nf == 1) {
2144 /* Case 2. Put ao at the head of
2145 * usable_arenas. Note that because
2146 * ao->nfreepools was 0 before, ao isn't
2147 * currently on the usable_arenas list.
2148 */
2149 ao->nextarena = usable_arenas;
2150 ao->prevarena = NULL;
2151 if (usable_arenas)
2152 usable_arenas->prevarena = ao;
2153 usable_arenas = ao;
2154 assert(usable_arenas->address != 0);
2155 if (nfp2lasta[1] == NULL) {
2156 nfp2lasta[1] = ao;
2157 }
2158
2159 return;
2160 }
2161
2162 /* If this arena is now out of order, we need to keep
2163 * the list sorted. The list is kept sorted so that
2164 * the "most full" arenas are used first, which allows
2165 * the nearly empty arenas to be completely freed. In
2166 * a few un-scientific tests, it seems like this
2167 * approach allowed a lot more memory to be freed.
2168 */
2169 /* If this is the only arena with nf, record that. */
2170 if (nfp2lasta[nf] == NULL) {
2171 nfp2lasta[nf] = ao;
2172 } /* else the rightmost with nf doesn't change */
2173 /* If this was the rightmost of the old size, it remains in place. */
2174 if (ao == lastnf) {
2175 /* Case 4. Nothing to do. */
2176 return;
2177 }
2178 /* If ao were the only arena in the list, the last block would have
2179 * gotten us out.
2180 */
2181 assert(ao->nextarena != NULL);
2182
2183 /* Case 3: We have to move the arena towards the end of the list,
2184 * because it has more free pools than the arena to its right. It needs
2185 * to move to follow lastnf.
2186 * First unlink ao from usable_arenas.
2187 */
2188 if (ao->prevarena != NULL) {
2189 /* ao isn't at the head of the list */
2190 assert(ao->prevarena->nextarena == ao);
2191 ao->prevarena->nextarena = ao->nextarena;
2192 }
2193 else {
2194 /* ao is at the head of the list */
2195 assert(usable_arenas == ao);
2196 usable_arenas = ao->nextarena;
2197 }
2198 ao->nextarena->prevarena = ao->prevarena;
2199 /* And insert after lastnf. */
2200 ao->prevarena = lastnf;
2201 ao->nextarena = lastnf->nextarena;
2202 if (ao->nextarena != NULL) {
2203 ao->nextarena->prevarena = ao;
2204 }
2205 lastnf->nextarena = ao;
2206 /* Verify that the swaps worked. */
2207 assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2208 assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2209 assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2210 assert((usable_arenas == ao && ao->prevarena == NULL)
2211 || ao->prevarena->nextarena == ao);
2212 }
2213
2214 /* Free a memory block allocated by pymalloc_alloc().
2215 Return 1 if it was freed.
2216 Return 0 if the block was not allocated by pymalloc_alloc(). */
2217 static inline int
pymalloc_free(void * ctx,void * p)2218 pymalloc_free(void *ctx, void *p)
2219 {
2220 assert(p != NULL);
2221
2222 #ifdef WITH_VALGRIND
2223 if (UNLIKELY(running_on_valgrind > 0)) {
2224 return 0;
2225 }
2226 #endif
2227
2228 poolp pool = POOL_ADDR(p);
2229 if (UNLIKELY(!address_in_range(p, pool))) {
2230 return 0;
2231 }
2232 /* We allocated this address. */
2233
2234 /* Link p to the start of the pool's freeblock list. Since
2235 * the pool had at least the p block outstanding, the pool
2236 * wasn't empty (so it's already in a usedpools[] list, or
2237 * was full and is in no list -- it's not in the freeblocks
2238 * list in any case).
2239 */
2240 assert(pool->ref.count > 0); /* else it was empty */
2241 block *lastfree = pool->freeblock;
2242 *(block **)p = lastfree;
2243 pool->freeblock = (block *)p;
2244 pool->ref.count--;
2245
2246 if (UNLIKELY(lastfree == NULL)) {
2247 /* Pool was full, so doesn't currently live in any list:
2248 * link it to the front of the appropriate usedpools[] list.
2249 * This mimics LRU pool usage for new allocations and
2250 * targets optimal filling when several pools contain
2251 * blocks of the same size class.
2252 */
2253 insert_to_usedpool(pool);
2254 return 1;
2255 }
2256
2257 /* freeblock wasn't NULL, so the pool wasn't full,
2258 * and the pool is in a usedpools[] list.
2259 */
2260 if (LIKELY(pool->ref.count != 0)) {
2261 /* pool isn't empty: leave it in usedpools */
2262 return 1;
2263 }
2264
2265 /* Pool is now empty: unlink from usedpools, and
2266 * link to the front of freepools. This ensures that
2267 * previously freed pools will be allocated later
2268 * (being not referenced, they are perhaps paged out).
2269 */
2270 insert_to_freepool(pool);
2271 return 1;
2272 }
2273
2274
2275 static void
_PyObject_Free(void * ctx,void * p)2276 _PyObject_Free(void *ctx, void *p)
2277 {
2278 /* PyObject_Free(NULL) has no effect */
2279 if (p == NULL) {
2280 return;
2281 }
2282
2283 if (UNLIKELY(!pymalloc_free(ctx, p))) {
2284 /* pymalloc didn't allocate this address */
2285 PyMem_RawFree(p);
2286 raw_allocated_blocks--;
2287 }
2288 }
2289
2290
2291 /* pymalloc realloc.
2292
2293 If nbytes==0, then as the Python docs promise, we do not treat this like
2294 free(p), and return a non-NULL result.
2295
2296 Return 1 if pymalloc reallocated memory and wrote the new pointer into
2297 newptr_p.
2298
2299 Return 0 if pymalloc didn't allocated p. */
2300 static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)2301 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
2302 {
2303 void *bp;
2304 poolp pool;
2305 size_t size;
2306
2307 assert(p != NULL);
2308
2309 #ifdef WITH_VALGRIND
2310 /* Treat running_on_valgrind == -1 the same as 0 */
2311 if (UNLIKELY(running_on_valgrind > 0)) {
2312 return 0;
2313 }
2314 #endif
2315
2316 pool = POOL_ADDR(p);
2317 if (!address_in_range(p, pool)) {
2318 /* pymalloc is not managing this block.
2319
2320 If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2321 over this block. However, if we do, we need to copy the valid data
2322 from the C-managed block to one of our blocks, and there's no
2323 portable way to know how much of the memory space starting at p is
2324 valid.
2325
2326 As bug 1185883 pointed out the hard way, it's possible that the
2327 C-managed block is "at the end" of allocated VM space, so that a
2328 memory fault can occur if we try to copy nbytes bytes starting at p.
2329 Instead we punt: let C continue to manage this block. */
2330 return 0;
2331 }
2332
2333 /* pymalloc is in charge of this block */
2334 size = INDEX2SIZE(pool->szidx);
2335 if (nbytes <= size) {
2336 /* The block is staying the same or shrinking.
2337
2338 If it's shrinking, there's a tradeoff: it costs cycles to copy the
2339 block to a smaller size class, but it wastes memory not to copy it.
2340
2341 The compromise here is to copy on shrink only if at least 25% of
2342 size can be shaved off. */
2343 if (4 * nbytes > 3 * size) {
2344 /* It's the same, or shrinking and new/old > 3/4. */
2345 *newptr_p = p;
2346 return 1;
2347 }
2348 size = nbytes;
2349 }
2350
2351 bp = _PyObject_Malloc(ctx, nbytes);
2352 if (bp != NULL) {
2353 memcpy(bp, p, size);
2354 _PyObject_Free(ctx, p);
2355 }
2356 *newptr_p = bp;
2357 return 1;
2358 }
2359
2360
2361 static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)2362 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2363 {
2364 void *ptr2;
2365
2366 if (ptr == NULL) {
2367 return _PyObject_Malloc(ctx, nbytes);
2368 }
2369
2370 if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
2371 return ptr2;
2372 }
2373
2374 return PyMem_RawRealloc(ptr, nbytes);
2375 }
2376
2377 #else /* ! WITH_PYMALLOC */
2378
2379 /*==========================================================================*/
2380 /* pymalloc not enabled: Redirect the entry points to malloc. These will
2381 * only be used by extensions that are compiled with pymalloc enabled. */
2382
2383 Py_ssize_t
_Py_GetAllocatedBlocks(void)2384 _Py_GetAllocatedBlocks(void)
2385 {
2386 return 0;
2387 }
2388
2389 #endif /* WITH_PYMALLOC */
2390
2391
2392 /*==========================================================================*/
2393 /* A x-platform debugging allocator. This doesn't manage memory directly,
2394 * it wraps a real allocator, adding extra debugging info to the memory blocks.
2395 */
2396
2397 /* Uncomment this define to add the "serialno" field */
2398 /* #define PYMEM_DEBUG_SERIALNO */
2399
2400 #ifdef PYMEM_DEBUG_SERIALNO
2401 static size_t serialno = 0; /* incremented on each debug {m,re}alloc */
2402
2403 /* serialno is always incremented via calling this routine. The point is
2404 * to supply a single place to set a breakpoint.
2405 */
2406 static void
bumpserialno(void)2407 bumpserialno(void)
2408 {
2409 ++serialno;
2410 }
2411 #endif
2412
2413 #define SST SIZEOF_SIZE_T
2414
2415 #ifdef PYMEM_DEBUG_SERIALNO
2416 # define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2417 #else
2418 # define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2419 #endif
2420
2421 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2422 static size_t
read_size_t(const void * p)2423 read_size_t(const void *p)
2424 {
2425 const uint8_t *q = (const uint8_t *)p;
2426 size_t result = *q++;
2427 int i;
2428
2429 for (i = SST; --i > 0; ++q)
2430 result = (result << 8) | *q;
2431 return result;
2432 }
2433
2434 /* Write n as a big-endian size_t, MSB at address p, LSB at
2435 * p + sizeof(size_t) - 1.
2436 */
2437 static void
write_size_t(void * p,size_t n)2438 write_size_t(void *p, size_t n)
2439 {
2440 uint8_t *q = (uint8_t *)p + SST - 1;
2441 int i;
2442
2443 for (i = SST; --i >= 0; --q) {
2444 *q = (uint8_t)(n & 0xff);
2445 n >>= 8;
2446 }
2447 }
2448
2449 /* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and
2450 fills them with useful stuff, here calling the underlying malloc's result p:
2451
2452 p[0: S]
2453 Number of bytes originally asked for. This is a size_t, big-endian (easier
2454 to read in a memory dump).
2455 p[S]
2456 API ID. See PEP 445. This is a character, but seems undocumented.
2457 p[S+1: 2*S]
2458 Copies of PYMEM_FORBIDDENBYTE. Used to catch under- writes and reads.
2459 p[2*S: 2*S+n]
2460 The requested memory, filled with copies of PYMEM_CLEANBYTE.
2461 Used to catch reference to uninitialized memory.
2462 &p[2*S] is returned. Note that this is 8-byte aligned if pymalloc
2463 handled the request itself.
2464 p[2*S+n: 2*S+n+S]
2465 Copies of PYMEM_FORBIDDENBYTE. Used to catch over- writes and reads.
2466 p[2*S+n+S: 2*S+n+2*S]
2467 A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2468 and _PyMem_DebugRealloc.
2469 This is a big-endian size_t.
2470 If "bad memory" is detected later, the serial number gives an
2471 excellent way to set a breakpoint on the next run, to capture the
2472 instant at which this block was passed out.
2473
2474 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2475 for 3 * S extra bytes, and omits the last serialno field.
2476 */
2477
2478 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2479 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2480 {
2481 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2482 uint8_t *p; /* base address of malloc'ed pad block */
2483 uint8_t *data; /* p + 2*SST == pointer to data bytes */
2484 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2485 size_t total; /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2486
2487 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2488 /* integer overflow: can't represent total as a Py_ssize_t */
2489 return NULL;
2490 }
2491 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2492
2493 /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2494 ^--- p ^--- data ^--- tail
2495 S: nbytes stored as size_t
2496 I: API identifier (1 byte)
2497 F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2498 C: Clean bytes used later to store actual data
2499 N: Serial number stored as size_t
2500
2501 If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2502 is omitted. */
2503
2504 if (use_calloc) {
2505 p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2506 }
2507 else {
2508 p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2509 }
2510 if (p == NULL) {
2511 return NULL;
2512 }
2513 data = p + 2*SST;
2514
2515 #ifdef PYMEM_DEBUG_SERIALNO
2516 bumpserialno();
2517 #endif
2518
2519 /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2520 write_size_t(p, nbytes);
2521 p[SST] = (uint8_t)api->api_id;
2522 memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2523
2524 if (nbytes > 0 && !use_calloc) {
2525 memset(data, PYMEM_CLEANBYTE, nbytes);
2526 }
2527
2528 /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2529 tail = data + nbytes;
2530 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2531 #ifdef PYMEM_DEBUG_SERIALNO
2532 write_size_t(tail + SST, serialno);
2533 #endif
2534
2535 return data;
2536 }
2537
2538 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2539 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2540 {
2541 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2542 }
2543
2544 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2545 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2546 {
2547 size_t nbytes;
2548 assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2549 nbytes = nelem * elsize;
2550 return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2551 }
2552
2553
2554 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2555 particular, that the FORBIDDENBYTEs with the api ID are still intact).
2556 Then fills the original bytes with PYMEM_DEADBYTE.
2557 Then calls the underlying free.
2558 */
2559 static void
_PyMem_DebugRawFree(void * ctx,void * p)2560 _PyMem_DebugRawFree(void *ctx, void *p)
2561 {
2562 /* PyMem_Free(NULL) has no effect */
2563 if (p == NULL) {
2564 return;
2565 }
2566
2567 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2568 uint8_t *q = (uint8_t *)p - 2*SST; /* address returned from malloc */
2569 size_t nbytes;
2570
2571 _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2572 nbytes = read_size_t(q);
2573 nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2574 memset(q, PYMEM_DEADBYTE, nbytes);
2575 api->alloc.free(api->alloc.ctx, q);
2576 }
2577
2578
2579 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2580 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2581 {
2582 if (p == NULL) {
2583 return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2584 }
2585
2586 debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2587 uint8_t *head; /* base address of malloc'ed pad block */
2588 uint8_t *data; /* pointer to data bytes */
2589 uint8_t *r;
2590 uint8_t *tail; /* data + nbytes == pointer to tail pad bytes */
2591 size_t total; /* 2 * SST + nbytes + 2 * SST */
2592 size_t original_nbytes;
2593 #define ERASED_SIZE 64
2594 uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */
2595
2596 _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2597
2598 data = (uint8_t *)p;
2599 head = data - 2*SST;
2600 original_nbytes = read_size_t(head);
2601 if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2602 /* integer overflow: can't represent total as a Py_ssize_t */
2603 return NULL;
2604 }
2605 total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2606
2607 tail = data + original_nbytes;
2608 #ifdef PYMEM_DEBUG_SERIALNO
2609 size_t block_serialno = read_size_t(tail + SST);
2610 #endif
2611 /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2612 ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2613 */
2614 if (original_nbytes <= sizeof(save)) {
2615 memcpy(save, data, original_nbytes);
2616 memset(data - 2 * SST, PYMEM_DEADBYTE,
2617 original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2618 }
2619 else {
2620 memcpy(save, data, ERASED_SIZE);
2621 memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2622 memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2623 memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2624 ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2625 }
2626
2627 /* Resize and add decorations. */
2628 r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2629 if (r == NULL) {
2630 /* if realloc() failed: rewrite header and footer which have
2631 just been erased */
2632 nbytes = original_nbytes;
2633 }
2634 else {
2635 head = r;
2636 #ifdef PYMEM_DEBUG_SERIALNO
2637 bumpserialno();
2638 block_serialno = serialno;
2639 #endif
2640 }
2641 data = head + 2*SST;
2642
2643 write_size_t(head, nbytes);
2644 head[SST] = (uint8_t)api->api_id;
2645 memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2646
2647 tail = data + nbytes;
2648 memset(tail, PYMEM_FORBIDDENBYTE, SST);
2649 #ifdef PYMEM_DEBUG_SERIALNO
2650 write_size_t(tail + SST, block_serialno);
2651 #endif
2652
2653 /* Restore saved bytes. */
2654 if (original_nbytes <= sizeof(save)) {
2655 memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2656 }
2657 else {
2658 size_t i = original_nbytes - ERASED_SIZE;
2659 memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2660 if (nbytes > i) {
2661 memcpy(data + i, &save[ERASED_SIZE],
2662 Py_MIN(nbytes - i, ERASED_SIZE));
2663 }
2664 }
2665
2666 if (r == NULL) {
2667 return NULL;
2668 }
2669
2670 if (nbytes > original_nbytes) {
2671 /* growing: mark new extra memory clean */
2672 memset(data + original_nbytes, PYMEM_CLEANBYTE,
2673 nbytes - original_nbytes);
2674 }
2675
2676 return data;
2677 }
2678
2679 static inline void
_PyMem_DebugCheckGIL(const char * func)2680 _PyMem_DebugCheckGIL(const char *func)
2681 {
2682 if (!PyGILState_Check()) {
2683 _Py_FatalErrorFunc(func,
2684 "Python memory allocator called "
2685 "without holding the GIL");
2686 }
2687 }
2688
2689 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2690 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2691 {
2692 _PyMem_DebugCheckGIL(__func__);
2693 return _PyMem_DebugRawMalloc(ctx, nbytes);
2694 }
2695
2696 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2697 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2698 {
2699 _PyMem_DebugCheckGIL(__func__);
2700 return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2701 }
2702
2703
2704 static void
_PyMem_DebugFree(void * ctx,void * ptr)2705 _PyMem_DebugFree(void *ctx, void *ptr)
2706 {
2707 _PyMem_DebugCheckGIL(__func__);
2708 _PyMem_DebugRawFree(ctx, ptr);
2709 }
2710
2711
2712 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2713 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2714 {
2715 _PyMem_DebugCheckGIL(__func__);
2716 return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2717 }
2718
2719 /* Check the forbidden bytes on both ends of the memory allocated for p.
2720 * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2721 * and call Py_FatalError to kill the program.
2722 * The API id, is also checked.
2723 */
2724 static void
_PyMem_DebugCheckAddress(const char * func,char api,const void * p)2725 _PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2726 {
2727 assert(p != NULL);
2728
2729 const uint8_t *q = (const uint8_t *)p;
2730 size_t nbytes;
2731 const uint8_t *tail;
2732 int i;
2733 char id;
2734
2735 /* Check the API id */
2736 id = (char)q[-SST];
2737 if (id != api) {
2738 _PyObject_DebugDumpAddress(p);
2739 _Py_FatalErrorFormat(func,
2740 "bad ID: Allocated using API '%c', "
2741 "verified using API '%c'",
2742 id, api);
2743 }
2744
2745 /* Check the stuff at the start of p first: if there's underwrite
2746 * corruption, the number-of-bytes field may be nuts, and checking
2747 * the tail could lead to a segfault then.
2748 */
2749 for (i = SST-1; i >= 1; --i) {
2750 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2751 _PyObject_DebugDumpAddress(p);
2752 _Py_FatalErrorFunc(func, "bad leading pad byte");
2753 }
2754 }
2755
2756 nbytes = read_size_t(q - 2*SST);
2757 tail = q + nbytes;
2758 for (i = 0; i < SST; ++i) {
2759 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2760 _PyObject_DebugDumpAddress(p);
2761 _Py_FatalErrorFunc(func, "bad trailing pad byte");
2762 }
2763 }
2764 }
2765
2766 /* Display info to stderr about the memory block at p. */
2767 static void
_PyObject_DebugDumpAddress(const void * p)2768 _PyObject_DebugDumpAddress(const void *p)
2769 {
2770 const uint8_t *q = (const uint8_t *)p;
2771 const uint8_t *tail;
2772 size_t nbytes;
2773 int i;
2774 int ok;
2775 char id;
2776
2777 fprintf(stderr, "Debug memory block at address p=%p:", p);
2778 if (p == NULL) {
2779 fprintf(stderr, "\n");
2780 return;
2781 }
2782 id = (char)q[-SST];
2783 fprintf(stderr, " API '%c'\n", id);
2784
2785 nbytes = read_size_t(q - 2*SST);
2786 fprintf(stderr, " %zu bytes originally requested\n", nbytes);
2787
2788 /* In case this is nuts, check the leading pad bytes first. */
2789 fprintf(stderr, " The %d pad bytes at p-%d are ", SST-1, SST-1);
2790 ok = 1;
2791 for (i = 1; i <= SST-1; ++i) {
2792 if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2793 ok = 0;
2794 break;
2795 }
2796 }
2797 if (ok)
2798 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2799 else {
2800 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2801 PYMEM_FORBIDDENBYTE);
2802 for (i = SST-1; i >= 1; --i) {
2803 const uint8_t byte = *(q-i);
2804 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
2805 if (byte != PYMEM_FORBIDDENBYTE)
2806 fputs(" *** OUCH", stderr);
2807 fputc('\n', stderr);
2808 }
2809
2810 fputs(" Because memory is corrupted at the start, the "
2811 "count of bytes requested\n"
2812 " may be bogus, and checking the trailing pad "
2813 "bytes may segfault.\n", stderr);
2814 }
2815
2816 tail = q + nbytes;
2817 fprintf(stderr, " The %d pad bytes at tail=%p are ", SST, (void *)tail);
2818 ok = 1;
2819 for (i = 0; i < SST; ++i) {
2820 if (tail[i] != PYMEM_FORBIDDENBYTE) {
2821 ok = 0;
2822 break;
2823 }
2824 }
2825 if (ok)
2826 fputs("FORBIDDENBYTE, as expected.\n", stderr);
2827 else {
2828 fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2829 PYMEM_FORBIDDENBYTE);
2830 for (i = 0; i < SST; ++i) {
2831 const uint8_t byte = tail[i];
2832 fprintf(stderr, " at tail+%d: 0x%02x",
2833 i, byte);
2834 if (byte != PYMEM_FORBIDDENBYTE)
2835 fputs(" *** OUCH", stderr);
2836 fputc('\n', stderr);
2837 }
2838 }
2839
2840 #ifdef PYMEM_DEBUG_SERIALNO
2841 size_t serial = read_size_t(tail + SST);
2842 fprintf(stderr,
2843 " The block was made by call #%zu to debug malloc/realloc.\n",
2844 serial);
2845 #endif
2846
2847 if (nbytes > 0) {
2848 i = 0;
2849 fputs(" Data at p:", stderr);
2850 /* print up to 8 bytes at the start */
2851 while (q < tail && i < 8) {
2852 fprintf(stderr, " %02x", *q);
2853 ++i;
2854 ++q;
2855 }
2856 /* and up to 8 at the end */
2857 if (q < tail) {
2858 if (tail - q > 8) {
2859 fputs(" ...", stderr);
2860 q = tail - 8;
2861 }
2862 while (q < tail) {
2863 fprintf(stderr, " %02x", *q);
2864 ++q;
2865 }
2866 }
2867 fputc('\n', stderr);
2868 }
2869 fputc('\n', stderr);
2870
2871 fflush(stderr);
2872 _PyMem_DumpTraceback(fileno(stderr), p);
2873 }
2874
2875
2876 static size_t
printone(FILE * out,const char * msg,size_t value)2877 printone(FILE *out, const char* msg, size_t value)
2878 {
2879 int i, k;
2880 char buf[100];
2881 size_t origvalue = value;
2882
2883 fputs(msg, out);
2884 for (i = (int)strlen(msg); i < 35; ++i)
2885 fputc(' ', out);
2886 fputc('=', out);
2887
2888 /* Write the value with commas. */
2889 i = 22;
2890 buf[i--] = '\0';
2891 buf[i--] = '\n';
2892 k = 3;
2893 do {
2894 size_t nextvalue = value / 10;
2895 unsigned int digit = (unsigned int)(value - nextvalue * 10);
2896 value = nextvalue;
2897 buf[i--] = (char)(digit + '0');
2898 --k;
2899 if (k == 0 && value && i >= 0) {
2900 k = 3;
2901 buf[i--] = ',';
2902 }
2903 } while (value && i >= 0);
2904
2905 while (i >= 0)
2906 buf[i--] = ' ';
2907 fputs(buf, out);
2908
2909 return origvalue;
2910 }
2911
2912 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2913 _PyDebugAllocatorStats(FILE *out,
2914 const char *block_name, int num_blocks, size_t sizeof_block)
2915 {
2916 char buf1[128];
2917 char buf2[128];
2918 PyOS_snprintf(buf1, sizeof(buf1),
2919 "%d %ss * %zd bytes each",
2920 num_blocks, block_name, sizeof_block);
2921 PyOS_snprintf(buf2, sizeof(buf2),
2922 "%48s ", buf1);
2923 (void)printone(out, buf2, num_blocks * sizeof_block);
2924 }
2925
2926
2927 #ifdef WITH_PYMALLOC
2928
2929 #ifdef Py_DEBUG
2930 /* Is target in the list? The list is traversed via the nextpool pointers.
2931 * The list may be NULL-terminated, or circular. Return 1 if target is in
2932 * list, else 0.
2933 */
2934 static int
pool_is_in_list(const poolp target,poolp list)2935 pool_is_in_list(const poolp target, poolp list)
2936 {
2937 poolp origlist = list;
2938 assert(target != NULL);
2939 if (list == NULL)
2940 return 0;
2941 do {
2942 if (target == list)
2943 return 1;
2944 list = list->nextpool;
2945 } while (list != NULL && list != origlist);
2946 return 0;
2947 }
2948 #endif
2949
2950 /* Print summary info to "out" about the state of pymalloc's structures.
2951 * In Py_DEBUG mode, also perform some expensive internal consistency
2952 * checks.
2953 *
2954 * Return 0 if the memory debug hooks are not installed or no statistics was
2955 * written into out, return 1 otherwise.
2956 */
2957 int
_PyObject_DebugMallocStats(FILE * out)2958 _PyObject_DebugMallocStats(FILE *out)
2959 {
2960 if (!_PyMem_PymallocEnabled()) {
2961 return 0;
2962 }
2963
2964 uint i;
2965 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2966 /* # of pools, allocated blocks, and free blocks per class index */
2967 size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2968 size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2969 size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2970 /* total # of allocated bytes in used and full pools */
2971 size_t allocated_bytes = 0;
2972 /* total # of available bytes in used pools */
2973 size_t available_bytes = 0;
2974 /* # of free pools + pools not yet carved out of current arena */
2975 uint numfreepools = 0;
2976 /* # of bytes for arena alignment padding */
2977 size_t arena_alignment = 0;
2978 /* # of bytes in used and full pools used for pool_headers */
2979 size_t pool_header_bytes = 0;
2980 /* # of bytes in used and full pools wasted due to quantization,
2981 * i.e. the necessarily leftover space at the ends of used and
2982 * full pools.
2983 */
2984 size_t quantization = 0;
2985 /* # of arenas actually allocated. */
2986 size_t narenas = 0;
2987 /* running total -- should equal narenas * ARENA_SIZE */
2988 size_t total;
2989 char buf[128];
2990
2991 fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2992 SMALL_REQUEST_THRESHOLD, numclasses);
2993
2994 for (i = 0; i < numclasses; ++i)
2995 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2996
2997 /* Because full pools aren't linked to from anything, it's easiest
2998 * to march over all the arenas. If we're lucky, most of the memory
2999 * will be living in full pools -- would be a shame to miss them.
3000 */
3001 for (i = 0; i < maxarenas; ++i) {
3002 uintptr_t base = arenas[i].address;
3003
3004 /* Skip arenas which are not allocated. */
3005 if (arenas[i].address == (uintptr_t)NULL)
3006 continue;
3007 narenas += 1;
3008
3009 numfreepools += arenas[i].nfreepools;
3010
3011 /* round up to pool alignment */
3012 if (base & (uintptr_t)POOL_SIZE_MASK) {
3013 arena_alignment += POOL_SIZE;
3014 base &= ~(uintptr_t)POOL_SIZE_MASK;
3015 base += POOL_SIZE;
3016 }
3017
3018 /* visit every pool in the arena */
3019 assert(base <= (uintptr_t) arenas[i].pool_address);
3020 for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
3021 poolp p = (poolp)base;
3022 const uint sz = p->szidx;
3023 uint freeblocks;
3024
3025 if (p->ref.count == 0) {
3026 /* currently unused */
3027 #ifdef Py_DEBUG
3028 assert(pool_is_in_list(p, arenas[i].freepools));
3029 #endif
3030 continue;
3031 }
3032 ++numpools[sz];
3033 numblocks[sz] += p->ref.count;
3034 freeblocks = NUMBLOCKS(sz) - p->ref.count;
3035 numfreeblocks[sz] += freeblocks;
3036 #ifdef Py_DEBUG
3037 if (freeblocks > 0)
3038 assert(pool_is_in_list(p, usedpools[sz + sz]));
3039 #endif
3040 }
3041 }
3042 assert(narenas == narenas_currently_allocated);
3043
3044 fputc('\n', out);
3045 fputs("class size num pools blocks in use avail blocks\n"
3046 "----- ---- --------- ------------- ------------\n",
3047 out);
3048
3049 for (i = 0; i < numclasses; ++i) {
3050 size_t p = numpools[i];
3051 size_t b = numblocks[i];
3052 size_t f = numfreeblocks[i];
3053 uint size = INDEX2SIZE(i);
3054 if (p == 0) {
3055 assert(b == 0 && f == 0);
3056 continue;
3057 }
3058 fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3059 i, size, p, b, f);
3060 allocated_bytes += b * size;
3061 available_bytes += f * size;
3062 pool_header_bytes += p * POOL_OVERHEAD;
3063 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3064 }
3065 fputc('\n', out);
3066 #ifdef PYMEM_DEBUG_SERIALNO
3067 if (_PyMem_DebugEnabled()) {
3068 (void)printone(out, "# times object malloc called", serialno);
3069 }
3070 #endif
3071 (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3072 (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3073 (void)printone(out, "# arenas highwater mark", narenas_highwater);
3074 (void)printone(out, "# arenas allocated current", narenas);
3075
3076 PyOS_snprintf(buf, sizeof(buf),
3077 "%zu arenas * %d bytes/arena",
3078 narenas, ARENA_SIZE);
3079 (void)printone(out, buf, narenas * ARENA_SIZE);
3080
3081 fputc('\n', out);
3082
3083 /* Account for what all of those arena bytes are being used for. */
3084 total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3085 total += printone(out, "# bytes in available blocks", available_bytes);
3086
3087 PyOS_snprintf(buf, sizeof(buf),
3088 "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3089 total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3090
3091 total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3092 total += printone(out, "# bytes lost to quantization", quantization);
3093 total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3094 (void)printone(out, "Total", total);
3095 assert(narenas * ARENA_SIZE == total);
3096
3097 #if WITH_PYMALLOC_RADIX_TREE
3098 fputs("\narena map counts\n", out);
3099 #ifdef USE_INTERIOR_NODES
3100 (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3101 (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3102 fputc('\n', out);
3103 #endif
3104 total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3105 #ifdef USE_INTERIOR_NODES
3106 total += printone(out, "# bytes lost to arena map mid",
3107 sizeof(arena_map_mid_t) * arena_map_mid_count);
3108 total += printone(out, "# bytes lost to arena map bot",
3109 sizeof(arena_map_bot_t) * arena_map_bot_count);
3110 (void)printone(out, "Total", total);
3111 #endif
3112 #endif
3113
3114 return 1;
3115 }
3116
3117 #endif /* #ifdef WITH_PYMALLOC */
3118