1 #include "Python.h"
2 #include "pycore_pymem.h"         // _PyTraceMalloc_Config
3 #include "pycore_code.h"         // stats
4 
5 #include <stdbool.h>
6 #include <stdlib.h>               // malloc()
7 
8 
9 /* Defined in tracemalloc.c */
10 extern void _PyMem_DumpTraceback(int fd, const void *ptr);
11 
12 
13 /* Python's malloc wrappers (see pymem.h) */
14 
15 #undef  uint
16 #define uint    unsigned int    /* assuming >= 16 bits */
17 
18 /* Forward declaration */
19 static void* _PyMem_DebugRawMalloc(void *ctx, size_t size);
20 static void* _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize);
21 static void* _PyMem_DebugRawRealloc(void *ctx, void *ptr, size_t size);
22 static void _PyMem_DebugRawFree(void *ctx, void *ptr);
23 
24 static void* _PyMem_DebugMalloc(void *ctx, size_t size);
25 static void* _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize);
26 static void* _PyMem_DebugRealloc(void *ctx, void *ptr, size_t size);
27 static void _PyMem_DebugFree(void *ctx, void *p);
28 
29 static void _PyObject_DebugDumpAddress(const void *p);
30 static void _PyMem_DebugCheckAddress(const char *func, char api_id, const void *p);
31 
32 static void _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain);
33 
34 #if defined(__has_feature)  /* Clang */
35 #  if __has_feature(address_sanitizer) /* is ASAN enabled? */
36 #    define _Py_NO_SANITIZE_ADDRESS \
37         __attribute__((no_sanitize("address")))
38 #  endif
39 #  if __has_feature(thread_sanitizer)  /* is TSAN enabled? */
40 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
41 #  endif
42 #  if __has_feature(memory_sanitizer)  /* is MSAN enabled? */
43 #    define _Py_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
44 #  endif
45 #elif defined(__GNUC__)
46 #  if defined(__SANITIZE_ADDRESS__)    /* GCC 4.8+, is ASAN enabled? */
47 #    define _Py_NO_SANITIZE_ADDRESS \
48         __attribute__((no_sanitize_address))
49 #  endif
50    // TSAN is supported since GCC 5.1, but __SANITIZE_THREAD__ macro
51    // is provided only since GCC 7.
52 #  if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
53 #    define _Py_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
54 #  endif
55 #endif
56 
57 #ifndef _Py_NO_SANITIZE_ADDRESS
58 #  define _Py_NO_SANITIZE_ADDRESS
59 #endif
60 #ifndef _Py_NO_SANITIZE_THREAD
61 #  define _Py_NO_SANITIZE_THREAD
62 #endif
63 #ifndef _Py_NO_SANITIZE_MEMORY
64 #  define _Py_NO_SANITIZE_MEMORY
65 #endif
66 
67 #ifdef WITH_PYMALLOC
68 
69 #ifdef MS_WINDOWS
70 #  include <windows.h>
71 #elif defined(HAVE_MMAP)
72 #  include <sys/mman.h>
73 #  ifdef MAP_ANONYMOUS
74 #    define ARENAS_USE_MMAP
75 #  endif
76 #endif
77 
78 /* Forward declaration */
79 static void* _PyObject_Malloc(void *ctx, size_t size);
80 static void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
81 static void _PyObject_Free(void *ctx, void *p);
82 static void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
83 #endif
84 
85 
86 /* bpo-35053: Declare tracemalloc configuration here rather than
87    Modules/_tracemalloc.c because _tracemalloc can be compiled as dynamic
88    library, whereas _Py_NewReference() requires it. */
89 struct _PyTraceMalloc_Config _Py_tracemalloc_config = _PyTraceMalloc_Config_INIT;
90 
91 
92 static void *
_PyMem_RawMalloc(void * ctx,size_t size)93 _PyMem_RawMalloc(void *ctx, size_t size)
94 {
95     /* PyMem_RawMalloc(0) means malloc(1). Some systems would return NULL
96        for malloc(0), which would be treated as an error. Some platforms would
97        return a pointer with no memory behind it, which would break pymalloc.
98        To solve these problems, allocate an extra byte. */
99     if (size == 0)
100         size = 1;
101     return malloc(size);
102 }
103 
104 static void *
_PyMem_RawCalloc(void * ctx,size_t nelem,size_t elsize)105 _PyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
106 {
107     /* PyMem_RawCalloc(0, 0) means calloc(1, 1). Some systems would return NULL
108        for calloc(0, 0), which would be treated as an error. Some platforms
109        would return a pointer with no memory behind it, which would break
110        pymalloc.  To solve these problems, allocate an extra byte. */
111     if (nelem == 0 || elsize == 0) {
112         nelem = 1;
113         elsize = 1;
114     }
115     return calloc(nelem, elsize);
116 }
117 
118 static void *
_PyMem_RawRealloc(void * ctx,void * ptr,size_t size)119 _PyMem_RawRealloc(void *ctx, void *ptr, size_t size)
120 {
121     if (size == 0)
122         size = 1;
123     return realloc(ptr, size);
124 }
125 
126 static void
_PyMem_RawFree(void * ctx,void * ptr)127 _PyMem_RawFree(void *ctx, void *ptr)
128 {
129     free(ptr);
130 }
131 
132 
133 #ifdef MS_WINDOWS
134 static void *
_PyObject_ArenaVirtualAlloc(void * ctx,size_t size)135 _PyObject_ArenaVirtualAlloc(void *ctx, size_t size)
136 {
137     return VirtualAlloc(NULL, size,
138                         MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
139 }
140 
141 static void
_PyObject_ArenaVirtualFree(void * ctx,void * ptr,size_t size)142 _PyObject_ArenaVirtualFree(void *ctx, void *ptr, size_t size)
143 {
144     VirtualFree(ptr, 0, MEM_RELEASE);
145 }
146 
147 #elif defined(ARENAS_USE_MMAP)
148 static void *
_PyObject_ArenaMmap(void * ctx,size_t size)149 _PyObject_ArenaMmap(void *ctx, size_t size)
150 {
151     void *ptr;
152     ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,
153                MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
154     if (ptr == MAP_FAILED)
155         return NULL;
156     assert(ptr != NULL);
157     return ptr;
158 }
159 
160 static void
_PyObject_ArenaMunmap(void * ctx,void * ptr,size_t size)161 _PyObject_ArenaMunmap(void *ctx, void *ptr, size_t size)
162 {
163     munmap(ptr, size);
164 }
165 
166 #else
167 static void *
_PyObject_ArenaMalloc(void * ctx,size_t size)168 _PyObject_ArenaMalloc(void *ctx, size_t size)
169 {
170     return malloc(size);
171 }
172 
173 static void
_PyObject_ArenaFree(void * ctx,void * ptr,size_t size)174 _PyObject_ArenaFree(void *ctx, void *ptr, size_t size)
175 {
176     free(ptr);
177 }
178 #endif
179 
180 #define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
181 #ifdef WITH_PYMALLOC
182 #  define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
183 #endif
184 
185 #define PYRAW_ALLOC MALLOC_ALLOC
186 #ifdef WITH_PYMALLOC
187 #  define PYOBJ_ALLOC PYMALLOC_ALLOC
188 #else
189 #  define PYOBJ_ALLOC MALLOC_ALLOC
190 #endif
191 #define PYMEM_ALLOC PYOBJ_ALLOC
192 
193 typedef struct {
194     /* We tag each block with an API ID in order to tag API violations */
195     char api_id;
196     PyMemAllocatorEx alloc;
197 } debug_alloc_api_t;
198 static struct {
199     debug_alloc_api_t raw;
200     debug_alloc_api_t mem;
201     debug_alloc_api_t obj;
202 } _PyMem_Debug = {
203     {'r', PYRAW_ALLOC},
204     {'m', PYMEM_ALLOC},
205     {'o', PYOBJ_ALLOC}
206     };
207 
208 #define PYDBGRAW_ALLOC \
209     {&_PyMem_Debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree}
210 #define PYDBGMEM_ALLOC \
211     {&_PyMem_Debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
212 #define PYDBGOBJ_ALLOC \
213     {&_PyMem_Debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree}
214 
215 #ifdef Py_DEBUG
216 static PyMemAllocatorEx _PyMem_Raw = PYDBGRAW_ALLOC;
217 static PyMemAllocatorEx _PyMem = PYDBGMEM_ALLOC;
218 static PyMemAllocatorEx _PyObject = PYDBGOBJ_ALLOC;
219 #else
220 static PyMemAllocatorEx _PyMem_Raw = PYRAW_ALLOC;
221 static PyMemAllocatorEx _PyMem = PYMEM_ALLOC;
222 static PyMemAllocatorEx _PyObject = PYOBJ_ALLOC;
223 #endif
224 
225 
226 static int
pymem_set_default_allocator(PyMemAllocatorDomain domain,int debug,PyMemAllocatorEx * old_alloc)227 pymem_set_default_allocator(PyMemAllocatorDomain domain, int debug,
228                             PyMemAllocatorEx *old_alloc)
229 {
230     if (old_alloc != NULL) {
231         PyMem_GetAllocator(domain, old_alloc);
232     }
233 
234 
235     PyMemAllocatorEx new_alloc;
236     switch(domain)
237     {
238     case PYMEM_DOMAIN_RAW:
239         new_alloc = (PyMemAllocatorEx)PYRAW_ALLOC;
240         break;
241     case PYMEM_DOMAIN_MEM:
242         new_alloc = (PyMemAllocatorEx)PYMEM_ALLOC;
243         break;
244     case PYMEM_DOMAIN_OBJ:
245         new_alloc = (PyMemAllocatorEx)PYOBJ_ALLOC;
246         break;
247     default:
248         /* unknown domain */
249         return -1;
250     }
251     PyMem_SetAllocator(domain, &new_alloc);
252     if (debug) {
253         _PyMem_SetupDebugHooksDomain(domain);
254     }
255     return 0;
256 }
257 
258 
259 int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * old_alloc)260 _PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
261                            PyMemAllocatorEx *old_alloc)
262 {
263 #ifdef Py_DEBUG
264     const int debug = 1;
265 #else
266     const int debug = 0;
267 #endif
268     return pymem_set_default_allocator(domain, debug, old_alloc);
269 }
270 
271 
272 int
_PyMem_GetAllocatorName(const char * name,PyMemAllocatorName * allocator)273 _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
274 {
275     if (name == NULL || *name == '\0') {
276         /* PYTHONMALLOC is empty or is not set or ignored (-E/-I command line
277            nameions): use default memory allocators */
278         *allocator = PYMEM_ALLOCATOR_DEFAULT;
279     }
280     else if (strcmp(name, "default") == 0) {
281         *allocator = PYMEM_ALLOCATOR_DEFAULT;
282     }
283     else if (strcmp(name, "debug") == 0) {
284         *allocator = PYMEM_ALLOCATOR_DEBUG;
285     }
286 #ifdef WITH_PYMALLOC
287     else if (strcmp(name, "pymalloc") == 0) {
288         *allocator = PYMEM_ALLOCATOR_PYMALLOC;
289     }
290     else if (strcmp(name, "pymalloc_debug") == 0) {
291         *allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
292     }
293 #endif
294     else if (strcmp(name, "malloc") == 0) {
295         *allocator = PYMEM_ALLOCATOR_MALLOC;
296     }
297     else if (strcmp(name, "malloc_debug") == 0) {
298         *allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
299     }
300     else {
301         /* unknown allocator */
302         return -1;
303     }
304     return 0;
305 }
306 
307 
308 int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)309 _PyMem_SetupAllocators(PyMemAllocatorName allocator)
310 {
311     switch (allocator) {
312     case PYMEM_ALLOCATOR_NOT_SET:
313         /* do nothing */
314         break;
315 
316     case PYMEM_ALLOCATOR_DEFAULT:
317         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, NULL);
318         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_MEM, NULL);
319         (void)_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_OBJ, NULL);
320         break;
321 
322     case PYMEM_ALLOCATOR_DEBUG:
323         (void)pymem_set_default_allocator(PYMEM_DOMAIN_RAW, 1, NULL);
324         (void)pymem_set_default_allocator(PYMEM_DOMAIN_MEM, 1, NULL);
325         (void)pymem_set_default_allocator(PYMEM_DOMAIN_OBJ, 1, NULL);
326         break;
327 
328 #ifdef WITH_PYMALLOC
329     case PYMEM_ALLOCATOR_PYMALLOC:
330     case PYMEM_ALLOCATOR_PYMALLOC_DEBUG:
331     {
332         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
333         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
334 
335         PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
336         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &pymalloc);
337         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &pymalloc);
338 
339         if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
340             PyMem_SetupDebugHooks();
341         }
342         break;
343     }
344 #endif
345 
346     case PYMEM_ALLOCATOR_MALLOC:
347     case PYMEM_ALLOCATOR_MALLOC_DEBUG:
348     {
349         PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
350         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &malloc_alloc);
351         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &malloc_alloc);
352         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &malloc_alloc);
353 
354         if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
355             PyMem_SetupDebugHooks();
356         }
357         break;
358     }
359 
360     default:
361         /* unknown allocator */
362         return -1;
363     }
364     return 0;
365 }
366 
367 
368 static int
pymemallocator_eq(PyMemAllocatorEx * a,PyMemAllocatorEx * b)369 pymemallocator_eq(PyMemAllocatorEx *a, PyMemAllocatorEx *b)
370 {
371     return (memcmp(a, b, sizeof(PyMemAllocatorEx)) == 0);
372 }
373 
374 
375 const char*
_PyMem_GetCurrentAllocatorName(void)376 _PyMem_GetCurrentAllocatorName(void)
377 {
378     PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
379 #ifdef WITH_PYMALLOC
380     PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
381 #endif
382 
383     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
384         pymemallocator_eq(&_PyMem, &malloc_alloc) &&
385         pymemallocator_eq(&_PyObject, &malloc_alloc))
386     {
387         return "malloc";
388     }
389 #ifdef WITH_PYMALLOC
390     if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
391         pymemallocator_eq(&_PyMem, &pymalloc) &&
392         pymemallocator_eq(&_PyObject, &pymalloc))
393     {
394         return "pymalloc";
395     }
396 #endif
397 
398     PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
399     PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
400     PyMemAllocatorEx dbg_obj = PYDBGOBJ_ALLOC;
401 
402     if (pymemallocator_eq(&_PyMem_Raw, &dbg_raw) &&
403         pymemallocator_eq(&_PyMem, &dbg_mem) &&
404         pymemallocator_eq(&_PyObject, &dbg_obj))
405     {
406         /* Debug hooks installed */
407         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
408             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &malloc_alloc) &&
409             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &malloc_alloc))
410         {
411             return "malloc_debug";
412         }
413 #ifdef WITH_PYMALLOC
414         if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
415             pymemallocator_eq(&_PyMem_Debug.mem.alloc, &pymalloc) &&
416             pymemallocator_eq(&_PyMem_Debug.obj.alloc, &pymalloc))
417         {
418             return "pymalloc_debug";
419         }
420 #endif
421     }
422     return NULL;
423 }
424 
425 
426 #undef MALLOC_ALLOC
427 #undef PYMALLOC_ALLOC
428 #undef PYRAW_ALLOC
429 #undef PYMEM_ALLOC
430 #undef PYOBJ_ALLOC
431 #undef PYDBGRAW_ALLOC
432 #undef PYDBGMEM_ALLOC
433 #undef PYDBGOBJ_ALLOC
434 
435 
436 static PyObjectArenaAllocator _PyObject_Arena = {NULL,
437 #ifdef MS_WINDOWS
438     _PyObject_ArenaVirtualAlloc, _PyObject_ArenaVirtualFree
439 #elif defined(ARENAS_USE_MMAP)
440     _PyObject_ArenaMmap, _PyObject_ArenaMunmap
441 #else
442     _PyObject_ArenaMalloc, _PyObject_ArenaFree
443 #endif
444     };
445 
446 #ifdef WITH_PYMALLOC
447 static int
_PyMem_DebugEnabled(void)448 _PyMem_DebugEnabled(void)
449 {
450     return (_PyObject.malloc == _PyMem_DebugMalloc);
451 }
452 
453 static int
_PyMem_PymallocEnabled(void)454 _PyMem_PymallocEnabled(void)
455 {
456     if (_PyMem_DebugEnabled()) {
457         return (_PyMem_Debug.obj.alloc.malloc == _PyObject_Malloc);
458     }
459     else {
460         return (_PyObject.malloc == _PyObject_Malloc);
461     }
462 }
463 #endif
464 
465 
466 static void
_PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)467 _PyMem_SetupDebugHooksDomain(PyMemAllocatorDomain domain)
468 {
469     PyMemAllocatorEx alloc;
470 
471     if (domain == PYMEM_DOMAIN_RAW) {
472         if (_PyMem_Raw.malloc == _PyMem_DebugRawMalloc) {
473             return;
474         }
475 
476         PyMem_GetAllocator(PYMEM_DOMAIN_RAW, &_PyMem_Debug.raw.alloc);
477         alloc.ctx = &_PyMem_Debug.raw;
478         alloc.malloc = _PyMem_DebugRawMalloc;
479         alloc.calloc = _PyMem_DebugRawCalloc;
480         alloc.realloc = _PyMem_DebugRawRealloc;
481         alloc.free = _PyMem_DebugRawFree;
482         PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &alloc);
483     }
484     else if (domain == PYMEM_DOMAIN_MEM) {
485         if (_PyMem.malloc == _PyMem_DebugMalloc) {
486             return;
487         }
488 
489         PyMem_GetAllocator(PYMEM_DOMAIN_MEM, &_PyMem_Debug.mem.alloc);
490         alloc.ctx = &_PyMem_Debug.mem;
491         alloc.malloc = _PyMem_DebugMalloc;
492         alloc.calloc = _PyMem_DebugCalloc;
493         alloc.realloc = _PyMem_DebugRealloc;
494         alloc.free = _PyMem_DebugFree;
495         PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &alloc);
496     }
497     else if (domain == PYMEM_DOMAIN_OBJ)  {
498         if (_PyObject.malloc == _PyMem_DebugMalloc) {
499             return;
500         }
501 
502         PyMem_GetAllocator(PYMEM_DOMAIN_OBJ, &_PyMem_Debug.obj.alloc);
503         alloc.ctx = &_PyMem_Debug.obj;
504         alloc.malloc = _PyMem_DebugMalloc;
505         alloc.calloc = _PyMem_DebugCalloc;
506         alloc.realloc = _PyMem_DebugRealloc;
507         alloc.free = _PyMem_DebugFree;
508         PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &alloc);
509     }
510 }
511 
512 
513 void
PyMem_SetupDebugHooks(void)514 PyMem_SetupDebugHooks(void)
515 {
516     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_RAW);
517     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_MEM);
518     _PyMem_SetupDebugHooksDomain(PYMEM_DOMAIN_OBJ);
519 }
520 
521 void
PyMem_GetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)522 PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
523 {
524     switch(domain)
525     {
526     case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break;
527     case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break;
528     case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break;
529     default:
530         /* unknown domain: set all attributes to NULL */
531         allocator->ctx = NULL;
532         allocator->malloc = NULL;
533         allocator->calloc = NULL;
534         allocator->realloc = NULL;
535         allocator->free = NULL;
536     }
537 }
538 
539 void
PyMem_SetAllocator(PyMemAllocatorDomain domain,PyMemAllocatorEx * allocator)540 PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
541 {
542     switch(domain)
543     {
544     case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break;
545     case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break;
546     case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break;
547     /* ignore unknown domain */
548     }
549 }
550 
551 void
PyObject_GetArenaAllocator(PyObjectArenaAllocator * allocator)552 PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
553 {
554     *allocator = _PyObject_Arena;
555 }
556 
557 void *
_PyObject_VirtualAlloc(size_t size)558 _PyObject_VirtualAlloc(size_t size)
559 {
560     return _PyObject_Arena.alloc(_PyObject_Arena.ctx, size);
561 }
562 
563 void
_PyObject_VirtualFree(void * obj,size_t size)564 _PyObject_VirtualFree(void *obj, size_t size)
565 {
566     _PyObject_Arena.free(_PyObject_Arena.ctx, obj, size);
567 }
568 
569 void
PyObject_SetArenaAllocator(PyObjectArenaAllocator * allocator)570 PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
571 {
572     _PyObject_Arena = *allocator;
573 }
574 
575 void *
PyMem_RawMalloc(size_t size)576 PyMem_RawMalloc(size_t size)
577 {
578     /*
579      * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
580      * Most python internals blindly use a signed Py_ssize_t to track
581      * things without checking for overflows or negatives.
582      * As size_t is unsigned, checking for size < 0 is not required.
583      */
584     if (size > (size_t)PY_SSIZE_T_MAX)
585         return NULL;
586     return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
587 }
588 
589 void *
PyMem_RawCalloc(size_t nelem,size_t elsize)590 PyMem_RawCalloc(size_t nelem, size_t elsize)
591 {
592     /* see PyMem_RawMalloc() */
593     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
594         return NULL;
595     return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
596 }
597 
598 void*
PyMem_RawRealloc(void * ptr,size_t new_size)599 PyMem_RawRealloc(void *ptr, size_t new_size)
600 {
601     /* see PyMem_RawMalloc() */
602     if (new_size > (size_t)PY_SSIZE_T_MAX)
603         return NULL;
604     return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
605 }
606 
PyMem_RawFree(void * ptr)607 void PyMem_RawFree(void *ptr)
608 {
609     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
610 }
611 
612 
613 void *
PyMem_Malloc(size_t size)614 PyMem_Malloc(size_t size)
615 {
616     /* see PyMem_RawMalloc() */
617     if (size > (size_t)PY_SSIZE_T_MAX)
618         return NULL;
619     OBJECT_STAT_INC_COND(allocations512, size < 512);
620     OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
621     OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
622     OBJECT_STAT_INC(allocations);
623     return _PyMem.malloc(_PyMem.ctx, size);
624 }
625 
626 void *
PyMem_Calloc(size_t nelem,size_t elsize)627 PyMem_Calloc(size_t nelem, size_t elsize)
628 {
629     /* see PyMem_RawMalloc() */
630     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
631         return NULL;
632     OBJECT_STAT_INC_COND(allocations512, elsize < 512);
633     OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
634     OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
635     OBJECT_STAT_INC(allocations);
636     return _PyMem.calloc(_PyMem.ctx, nelem, elsize);
637 }
638 
639 void *
PyMem_Realloc(void * ptr,size_t new_size)640 PyMem_Realloc(void *ptr, size_t new_size)
641 {
642     /* see PyMem_RawMalloc() */
643     if (new_size > (size_t)PY_SSIZE_T_MAX)
644         return NULL;
645     return _PyMem.realloc(_PyMem.ctx, ptr, new_size);
646 }
647 
648 void
PyMem_Free(void * ptr)649 PyMem_Free(void *ptr)
650 {
651     OBJECT_STAT_INC(frees);
652     _PyMem.free(_PyMem.ctx, ptr);
653 }
654 
655 
656 wchar_t*
_PyMem_RawWcsdup(const wchar_t * str)657 _PyMem_RawWcsdup(const wchar_t *str)
658 {
659     assert(str != NULL);
660 
661     size_t len = wcslen(str);
662     if (len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
663         return NULL;
664     }
665 
666     size_t size = (len + 1) * sizeof(wchar_t);
667     wchar_t *str2 = PyMem_RawMalloc(size);
668     if (str2 == NULL) {
669         return NULL;
670     }
671 
672     memcpy(str2, str, size);
673     return str2;
674 }
675 
676 char *
_PyMem_RawStrdup(const char * str)677 _PyMem_RawStrdup(const char *str)
678 {
679     assert(str != NULL);
680     size_t size = strlen(str) + 1;
681     char *copy = PyMem_RawMalloc(size);
682     if (copy == NULL) {
683         return NULL;
684     }
685     memcpy(copy, str, size);
686     return copy;
687 }
688 
689 char *
_PyMem_Strdup(const char * str)690 _PyMem_Strdup(const char *str)
691 {
692     assert(str != NULL);
693     size_t size = strlen(str) + 1;
694     char *copy = PyMem_Malloc(size);
695     if (copy == NULL) {
696         return NULL;
697     }
698     memcpy(copy, str, size);
699     return copy;
700 }
701 
702 void *
PyObject_Malloc(size_t size)703 PyObject_Malloc(size_t size)
704 {
705     /* see PyMem_RawMalloc() */
706     if (size > (size_t)PY_SSIZE_T_MAX)
707         return NULL;
708     OBJECT_STAT_INC_COND(allocations512, size < 512);
709     OBJECT_STAT_INC_COND(allocations4k, size >= 512 && size < 4094);
710     OBJECT_STAT_INC_COND(allocations_big, size >= 4094);
711     OBJECT_STAT_INC(allocations);
712     return _PyObject.malloc(_PyObject.ctx, size);
713 }
714 
715 void *
PyObject_Calloc(size_t nelem,size_t elsize)716 PyObject_Calloc(size_t nelem, size_t elsize)
717 {
718     /* see PyMem_RawMalloc() */
719     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
720         return NULL;
721     OBJECT_STAT_INC_COND(allocations512, elsize < 512);
722     OBJECT_STAT_INC_COND(allocations4k, elsize >= 512 && elsize < 4094);
723     OBJECT_STAT_INC_COND(allocations_big, elsize >= 4094);
724     OBJECT_STAT_INC(allocations);
725     return _PyObject.calloc(_PyObject.ctx, nelem, elsize);
726 }
727 
728 void *
PyObject_Realloc(void * ptr,size_t new_size)729 PyObject_Realloc(void *ptr, size_t new_size)
730 {
731     /* see PyMem_RawMalloc() */
732     if (new_size > (size_t)PY_SSIZE_T_MAX)
733         return NULL;
734     return _PyObject.realloc(_PyObject.ctx, ptr, new_size);
735 }
736 
737 void
PyObject_Free(void * ptr)738 PyObject_Free(void *ptr)
739 {
740     OBJECT_STAT_INC(frees);
741     _PyObject.free(_PyObject.ctx, ptr);
742 }
743 
744 
745 /* If we're using GCC, use __builtin_expect() to reduce overhead of
746    the valgrind checks */
747 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
748 #  define UNLIKELY(value) __builtin_expect((value), 0)
749 #  define LIKELY(value) __builtin_expect((value), 1)
750 #else
751 #  define UNLIKELY(value) (value)
752 #  define LIKELY(value) (value)
753 #endif
754 
755 #ifdef WITH_PYMALLOC
756 
757 #ifdef WITH_VALGRIND
758 #include <valgrind/valgrind.h>
759 
760 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
761 static int running_on_valgrind = -1;
762 #endif
763 
764 
765 /* An object allocator for Python.
766 
767    Here is an introduction to the layers of the Python memory architecture,
768    showing where the object allocator is actually used (layer +2), It is
769    called for every object allocation and deallocation (PyObject_New/Del),
770    unless the object-specific allocators implement a proprietary allocation
771    scheme (ex.: ints use a simple free list). This is also the place where
772    the cyclic garbage collector operates selectively on container objects.
773 
774 
775     Object-specific allocators
776     _____   ______   ______       ________
777    [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
778 +3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
779     _______________________________       |                           |
780    [   Python's object allocator   ]      |                           |
781 +2 | ####### Object memory ####### | <------ Internal buffers ------> |
782     ______________________________________________________________    |
783    [          Python's raw memory allocator (PyMem_ API)          ]   |
784 +1 | <----- Python memory (under PyMem manager's control) ------> |   |
785     __________________________________________________________________
786    [    Underlying general-purpose allocator (ex: C library malloc)   ]
787  0 | <------ Virtual memory allocated for the python process -------> |
788 
789    =========================================================================
790     _______________________________________________________________________
791    [                OS-specific Virtual Memory Manager (VMM)               ]
792 -1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
793     __________________________________   __________________________________
794    [                                  ] [                                  ]
795 -2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
796 
797 */
798 /*==========================================================================*/
799 
800 /* A fast, special-purpose memory allocator for small blocks, to be used
801    on top of a general-purpose malloc -- heavily based on previous art. */
802 
803 /* Vladimir Marangozov -- August 2000 */
804 
805 /*
806  * "Memory management is where the rubber meets the road -- if we do the wrong
807  * thing at any level, the results will not be good. And if we don't make the
808  * levels work well together, we are in serious trouble." (1)
809  *
810  * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
811  *    "Dynamic Storage Allocation: A Survey and Critical Review",
812  *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
813  */
814 
815 /* #undef WITH_MEMORY_LIMITS */         /* disable mem limit checks  */
816 
817 /*==========================================================================*/
818 
819 /*
820  * Allocation strategy abstract:
821  *
822  * For small requests, the allocator sub-allocates <Big> blocks of memory.
823  * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
824  * system's allocator.
825  *
826  * Small requests are grouped in size classes spaced 8 bytes apart, due
827  * to the required valid alignment of the returned address. Requests of
828  * a particular size are serviced from memory pools of 4K (one VMM page).
829  * Pools are fragmented on demand and contain free lists of blocks of one
830  * particular size class. In other words, there is a fixed-size allocator
831  * for each size class. Free pools are shared by the different allocators
832  * thus minimizing the space reserved for a particular size class.
833  *
834  * This allocation strategy is a variant of what is known as "simple
835  * segregated storage based on array of free lists". The main drawback of
836  * simple segregated storage is that we might end up with lot of reserved
837  * memory for the different free lists, which degenerate in time. To avoid
838  * this, we partition each free list in pools and we share dynamically the
839  * reserved space between all free lists. This technique is quite efficient
840  * for memory intensive programs which allocate mainly small-sized blocks.
841  *
842  * For small requests we have the following table:
843  *
844  * Request in bytes     Size of allocated block      Size class idx
845  * ----------------------------------------------------------------
846  *        1-8                     8                       0
847  *        9-16                   16                       1
848  *       17-24                   24                       2
849  *       25-32                   32                       3
850  *       33-40                   40                       4
851  *       41-48                   48                       5
852  *       49-56                   56                       6
853  *       57-64                   64                       7
854  *       65-72                   72                       8
855  *        ...                   ...                     ...
856  *      497-504                 504                      62
857  *      505-512                 512                      63
858  *
859  *      0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
860  *      allocator.
861  */
862 
863 /*==========================================================================*/
864 
865 /*
866  * -- Main tunable settings section --
867  */
868 
869 /*
870  * Alignment of addresses returned to the user. 8-bytes alignment works
871  * on most current architectures (with 32-bit or 64-bit address buses).
872  * The alignment value is also used for grouping small requests in size
873  * classes spaced ALIGNMENT bytes apart.
874  *
875  * You shouldn't change this unless you know what you are doing.
876  */
877 
878 #if SIZEOF_VOID_P > 4
879 #define ALIGNMENT              16               /* must be 2^N */
880 #define ALIGNMENT_SHIFT         4
881 #else
882 #define ALIGNMENT               8               /* must be 2^N */
883 #define ALIGNMENT_SHIFT         3
884 #endif
885 
886 /* Return the number of bytes in size class I, as a uint. */
887 #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
888 
889 /*
890  * Max size threshold below which malloc requests are considered to be
891  * small enough in order to use preallocated memory pools. You can tune
892  * this value according to your application behaviour and memory needs.
893  *
894  * Note: a size threshold of 512 guarantees that newly created dictionaries
895  * will be allocated from preallocated memory pools on 64-bit.
896  *
897  * The following invariants must hold:
898  *      1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
899  *      2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
900  *
901  * Although not required, for better performance and space efficiency,
902  * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
903  */
904 #define SMALL_REQUEST_THRESHOLD 512
905 #define NB_SMALL_SIZE_CLASSES   (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
906 
907 /*
908  * The system's VMM page size can be obtained on most unices with a
909  * getpagesize() call or deduced from various header files. To make
910  * things simpler, we assume that it is 4K, which is OK for most systems.
911  * It is probably better if this is the native page size, but it doesn't
912  * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
913  * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
914  * violation fault.  4K is apparently OK for all the platforms that python
915  * currently targets.
916  */
917 #define SYSTEM_PAGE_SIZE        (4 * 1024)
918 
919 /*
920  * Maximum amount of memory managed by the allocator for small requests.
921  */
922 #ifdef WITH_MEMORY_LIMITS
923 #ifndef SMALL_MEMORY_LIMIT
924 #define SMALL_MEMORY_LIMIT      (64 * 1024 * 1024)      /* 64 MB -- more? */
925 #endif
926 #endif
927 
928 #if !defined(WITH_PYMALLOC_RADIX_TREE)
929 /* Use radix-tree to track arena memory regions, for address_in_range().
930  * Enable by default since it allows larger pool sizes.  Can be disabled
931  * using -DWITH_PYMALLOC_RADIX_TREE=0 */
932 #define WITH_PYMALLOC_RADIX_TREE 1
933 #endif
934 
935 #if SIZEOF_VOID_P > 4
936 /* on 64-bit platforms use larger pools and arenas if we can */
937 #define USE_LARGE_ARENAS
938 #if WITH_PYMALLOC_RADIX_TREE
939 /* large pools only supported if radix-tree is enabled */
940 #define USE_LARGE_POOLS
941 #endif
942 #endif
943 
944 /*
945  * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
946  * on a page boundary. This is a reserved virtual address space for the
947  * current process (obtained through a malloc()/mmap() call). In no way this
948  * means that the memory arenas will be used entirely. A malloc(<Big>) is
949  * usually an address range reservation for <Big> bytes, unless all pages within
950  * this space are referenced subsequently. So malloc'ing big blocks and not
951  * using them does not mean "wasting memory". It's an addressable range
952  * wastage...
953  *
954  * Arenas are allocated with mmap() on systems supporting anonymous memory
955  * mappings to reduce heap fragmentation.
956  */
957 #ifdef USE_LARGE_ARENAS
958 #define ARENA_BITS              20                    /* 1 MiB */
959 #else
960 #define ARENA_BITS              18                    /* 256 KiB */
961 #endif
962 #define ARENA_SIZE              (1 << ARENA_BITS)
963 #define ARENA_SIZE_MASK         (ARENA_SIZE - 1)
964 
965 #ifdef WITH_MEMORY_LIMITS
966 #define MAX_ARENAS              (SMALL_MEMORY_LIMIT / ARENA_SIZE)
967 #endif
968 
969 /*
970  * Size of the pools used for small blocks.  Must be a power of 2.
971  */
972 #ifdef USE_LARGE_POOLS
973 #define POOL_BITS               14                  /* 16 KiB */
974 #else
975 #define POOL_BITS               12                  /* 4 KiB */
976 #endif
977 #define POOL_SIZE               (1 << POOL_BITS)
978 #define POOL_SIZE_MASK          (POOL_SIZE - 1)
979 
980 #if !WITH_PYMALLOC_RADIX_TREE
981 #if POOL_SIZE != SYSTEM_PAGE_SIZE
982 #   error "pool size must be equal to system page size"
983 #endif
984 #endif
985 
986 #define MAX_POOLS_IN_ARENA  (ARENA_SIZE / POOL_SIZE)
987 #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
988 #   error "arena size not an exact multiple of pool size"
989 #endif
990 
991 /*
992  * -- End of tunable settings section --
993  */
994 
995 /*==========================================================================*/
996 
997 /* When you say memory, my mind reasons in terms of (pointers to) blocks */
998 typedef uint8_t block;
999 
1000 /* Pool for small blocks. */
1001 struct pool_header {
1002     union { block *_padding;
1003             uint count; } ref;          /* number of allocated blocks    */
1004     block *freeblock;                   /* pool's free list head         */
1005     struct pool_header *nextpool;       /* next pool of this size class  */
1006     struct pool_header *prevpool;       /* previous pool       ""        */
1007     uint arenaindex;                    /* index into arenas of base adr */
1008     uint szidx;                         /* block size class index        */
1009     uint nextoffset;                    /* bytes to virgin block         */
1010     uint maxnextoffset;                 /* largest valid nextoffset      */
1011 };
1012 
1013 typedef struct pool_header *poolp;
1014 
1015 /* Record keeping for arenas. */
1016 struct arena_object {
1017     /* The address of the arena, as returned by malloc.  Note that 0
1018      * will never be returned by a successful malloc, and is used
1019      * here to mark an arena_object that doesn't correspond to an
1020      * allocated arena.
1021      */
1022     uintptr_t address;
1023 
1024     /* Pool-aligned pointer to the next pool to be carved off. */
1025     block* pool_address;
1026 
1027     /* The number of available pools in the arena:  free pools + never-
1028      * allocated pools.
1029      */
1030     uint nfreepools;
1031 
1032     /* The total number of pools in the arena, whether or not available. */
1033     uint ntotalpools;
1034 
1035     /* Singly-linked list of available pools. */
1036     struct pool_header* freepools;
1037 
1038     /* Whenever this arena_object is not associated with an allocated
1039      * arena, the nextarena member is used to link all unassociated
1040      * arena_objects in the singly-linked `unused_arena_objects` list.
1041      * The prevarena member is unused in this case.
1042      *
1043      * When this arena_object is associated with an allocated arena
1044      * with at least one available pool, both members are used in the
1045      * doubly-linked `usable_arenas` list, which is maintained in
1046      * increasing order of `nfreepools` values.
1047      *
1048      * Else this arena_object is associated with an allocated arena
1049      * all of whose pools are in use.  `nextarena` and `prevarena`
1050      * are both meaningless in this case.
1051      */
1052     struct arena_object* nextarena;
1053     struct arena_object* prevarena;
1054 };
1055 
1056 #define POOL_OVERHEAD   _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
1057 
1058 #define DUMMY_SIZE_IDX          0xffff  /* size class of newly cached pools */
1059 
1060 /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
1061 #define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
1062 
1063 /* Return total number of blocks in pool of size index I, as a uint. */
1064 #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
1065 
1066 /*==========================================================================*/
1067 
1068 /*
1069  * Pool table -- headed, circular, doubly-linked lists of partially used pools.
1070 
1071 This is involved.  For an index i, usedpools[i+i] is the header for a list of
1072 all partially used pools holding small blocks with "size class idx" i. So
1073 usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
1074 16, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
1075 
1076 Pools are carved off an arena's highwater mark (an arena_object's pool_address
1077 member) as needed.  Once carved off, a pool is in one of three states forever
1078 after:
1079 
1080 used == partially used, neither empty nor full
1081     At least one block in the pool is currently allocated, and at least one
1082     block in the pool is not currently allocated (note this implies a pool
1083     has room for at least two blocks).
1084     This is a pool's initial state, as a pool is created only when malloc
1085     needs space.
1086     The pool holds blocks of a fixed size, and is in the circular list headed
1087     at usedpools[i] (see above).  It's linked to the other used pools of the
1088     same size class via the pool_header's nextpool and prevpool members.
1089     If all but one block is currently allocated, a malloc can cause a
1090     transition to the full state.  If all but one block is not currently
1091     allocated, a free can cause a transition to the empty state.
1092 
1093 full == all the pool's blocks are currently allocated
1094     On transition to full, a pool is unlinked from its usedpools[] list.
1095     It's not linked to from anything then anymore, and its nextpool and
1096     prevpool members are meaningless until it transitions back to used.
1097     A free of a block in a full pool puts the pool back in the used state.
1098     Then it's linked in at the front of the appropriate usedpools[] list, so
1099     that the next allocation for its size class will reuse the freed block.
1100 
1101 empty == all the pool's blocks are currently available for allocation
1102     On transition to empty, a pool is unlinked from its usedpools[] list,
1103     and linked to the front of its arena_object's singly-linked freepools list,
1104     via its nextpool member.  The prevpool member has no meaning in this case.
1105     Empty pools have no inherent size class:  the next time a malloc finds
1106     an empty list in usedpools[], it takes the first pool off of freepools.
1107     If the size class needed happens to be the same as the size class the pool
1108     last had, some pool initialization can be skipped.
1109 
1110 
1111 Block Management
1112 
1113 Blocks within pools are again carved out as needed.  pool->freeblock points to
1114 the start of a singly-linked list of free blocks within the pool.  When a
1115 block is freed, it's inserted at the front of its pool's freeblock list.  Note
1116 that the available blocks in a pool are *not* linked all together when a pool
1117 is initialized.  Instead only "the first two" (lowest addresses) blocks are
1118 set up, returning the first such block, and setting pool->freeblock to a
1119 one-block list holding the second such block.  This is consistent with that
1120 pymalloc strives at all levels (arena, pool, and block) never to touch a piece
1121 of memory until it's actually needed.
1122 
1123 So long as a pool is in the used state, we're certain there *is* a block
1124 available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
1125 points to the end of the free list before we've carved the entire pool into
1126 blocks, that means we simply haven't yet gotten to one of the higher-address
1127 blocks.  The offset from the pool_header to the start of "the next" virgin
1128 block is stored in the pool_header nextoffset member, and the largest value
1129 of nextoffset that makes sense is stored in the maxnextoffset member when a
1130 pool is initialized.  All the blocks in a pool have been passed out at least
1131 once when and only when nextoffset > maxnextoffset.
1132 
1133 
1134 Major obscurity:  While the usedpools vector is declared to have poolp
1135 entries, it doesn't really.  It really contains two pointers per (conceptual)
1136 poolp entry, the nextpool and prevpool members of a pool_header.  The
1137 excruciating initialization code below fools C so that
1138 
1139     usedpool[i+i]
1140 
1141 "acts like" a genuine poolp, but only so long as you only reference its
1142 nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
1143 compensating for that a pool_header's nextpool and prevpool members
1144 immediately follow a pool_header's first two members:
1145 
1146     union { block *_padding;
1147             uint count; } ref;
1148     block *freeblock;
1149 
1150 each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
1151 contains is a fudged-up pointer p such that *if* C believes it's a poolp
1152 pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
1153 circular list is empty).
1154 
1155 It's unclear why the usedpools setup is so convoluted.  It could be to
1156 minimize the amount of cache required to hold this heavily-referenced table
1157 (which only *needs* the two interpool pointer members of a pool_header). OTOH,
1158 referencing code has to remember to "double the index" and doing so isn't
1159 free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
1160 on that C doesn't insert any padding anywhere in a pool_header at or before
1161 the prevpool member.
1162 **************************************************************************** */
1163 
1164 #define PTA(x)  ((poolp )((uint8_t *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
1165 #define PT(x)   PTA(x), PTA(x)
1166 
1167 static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
1168     PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
1169 #if NB_SMALL_SIZE_CLASSES > 8
1170     , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
1171 #if NB_SMALL_SIZE_CLASSES > 16
1172     , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
1173 #if NB_SMALL_SIZE_CLASSES > 24
1174     , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
1175 #if NB_SMALL_SIZE_CLASSES > 32
1176     , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
1177 #if NB_SMALL_SIZE_CLASSES > 40
1178     , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
1179 #if NB_SMALL_SIZE_CLASSES > 48
1180     , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
1181 #if NB_SMALL_SIZE_CLASSES > 56
1182     , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
1183 #if NB_SMALL_SIZE_CLASSES > 64
1184 #error "NB_SMALL_SIZE_CLASSES should be less than 64"
1185 #endif /* NB_SMALL_SIZE_CLASSES > 64 */
1186 #endif /* NB_SMALL_SIZE_CLASSES > 56 */
1187 #endif /* NB_SMALL_SIZE_CLASSES > 48 */
1188 #endif /* NB_SMALL_SIZE_CLASSES > 40 */
1189 #endif /* NB_SMALL_SIZE_CLASSES > 32 */
1190 #endif /* NB_SMALL_SIZE_CLASSES > 24 */
1191 #endif /* NB_SMALL_SIZE_CLASSES > 16 */
1192 #endif /* NB_SMALL_SIZE_CLASSES >  8 */
1193 };
1194 
1195 /*==========================================================================
1196 Arena management.
1197 
1198 `arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
1199 which may not be currently used (== they're arena_objects that aren't
1200 currently associated with an allocated arena).  Note that arenas proper are
1201 separately malloc'ed.
1202 
1203 Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
1204 we do try to free() arenas, and use some mild heuristic strategies to increase
1205 the likelihood that arenas eventually can be freed.
1206 
1207 unused_arena_objects
1208 
1209     This is a singly-linked list of the arena_objects that are currently not
1210     being used (no arena is associated with them).  Objects are taken off the
1211     head of the list in new_arena(), and are pushed on the head of the list in
1212     PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
1213     is on this list if and only if its .address member is 0.
1214 
1215 usable_arenas
1216 
1217     This is a doubly-linked list of the arena_objects associated with arenas
1218     that have pools available.  These pools are either waiting to be reused,
1219     or have not been used before.  The list is sorted to have the most-
1220     allocated arenas first (ascending order based on the nfreepools member).
1221     This means that the next allocation will come from a heavily used arena,
1222     which gives the nearly empty arenas a chance to be returned to the system.
1223     In my unscientific tests this dramatically improved the number of arenas
1224     that could be freed.
1225 
1226 Note that an arena_object associated with an arena all of whose pools are
1227 currently in use isn't on either list.
1228 
1229 Changed in Python 3.8:  keeping usable_arenas sorted by number of free pools
1230 used to be done by one-at-a-time linear search when an arena's number of
1231 free pools changed.  That could, overall, consume time quadratic in the
1232 number of arenas.  That didn't really matter when there were only a few
1233 hundred arenas (typical!), but could be a timing disaster when there were
1234 hundreds of thousands.  See bpo-37029.
1235 
1236 Now we have a vector of "search fingers" to eliminate the need to search:
1237 nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1238 with nfp free pools.  This is NULL if and only if there is no arena with
1239 nfp free pools in usable_arenas.
1240 */
1241 
1242 /* Array of objects used to track chunks of memory (arenas). */
1243 static struct arena_object* arenas = NULL;
1244 /* Number of slots currently allocated in the `arenas` vector. */
1245 static uint maxarenas = 0;
1246 
1247 /* The head of the singly-linked, NULL-terminated list of available
1248  * arena_objects.
1249  */
1250 static struct arena_object* unused_arena_objects = NULL;
1251 
1252 /* The head of the doubly-linked, NULL-terminated at each end, list of
1253  * arena_objects associated with arenas that have pools available.
1254  */
1255 static struct arena_object* usable_arenas = NULL;
1256 
1257 /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1258 static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1259 
1260 /* How many arena_objects do we initially allocate?
1261  * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
1262  * `arenas` vector.
1263  */
1264 #define INITIAL_ARENA_OBJECTS 16
1265 
1266 /* Number of arenas allocated that haven't been free()'d. */
1267 static size_t narenas_currently_allocated = 0;
1268 
1269 /* Total number of times malloc() called to allocate an arena. */
1270 static size_t ntimes_arena_allocated = 0;
1271 /* High water mark (max value ever seen) for narenas_currently_allocated. */
1272 static size_t narenas_highwater = 0;
1273 
1274 static Py_ssize_t raw_allocated_blocks;
1275 
1276 Py_ssize_t
_Py_GetAllocatedBlocks(void)1277 _Py_GetAllocatedBlocks(void)
1278 {
1279     Py_ssize_t n = raw_allocated_blocks;
1280     /* add up allocated blocks for used pools */
1281     for (uint i = 0; i < maxarenas; ++i) {
1282         /* Skip arenas which are not allocated. */
1283         if (arenas[i].address == 0) {
1284             continue;
1285         }
1286 
1287         uintptr_t base = (uintptr_t)_Py_ALIGN_UP(arenas[i].address, POOL_SIZE);
1288 
1289         /* visit every pool in the arena */
1290         assert(base <= (uintptr_t) arenas[i].pool_address);
1291         for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
1292             poolp p = (poolp)base;
1293             n += p->ref.count;
1294         }
1295     }
1296     return n;
1297 }
1298 
1299 #if WITH_PYMALLOC_RADIX_TREE
1300 /*==========================================================================*/
1301 /* radix tree for tracking arena usage.  If enabled, used to implement
1302    address_in_range().
1303 
1304    memory address bit allocation for keys
1305 
1306    64-bit pointers, IGNORE_BITS=0 and 2^20 arena size:
1307      15 -> MAP_TOP_BITS
1308      15 -> MAP_MID_BITS
1309      14 -> MAP_BOT_BITS
1310      20 -> ideal aligned arena
1311    ----
1312      64
1313 
1314    64-bit pointers, IGNORE_BITS=16, and 2^20 arena size:
1315      16 -> IGNORE_BITS
1316      10 -> MAP_TOP_BITS
1317      10 -> MAP_MID_BITS
1318       8 -> MAP_BOT_BITS
1319      20 -> ideal aligned arena
1320    ----
1321      64
1322 
1323    32-bit pointers and 2^18 arena size:
1324      14 -> MAP_BOT_BITS
1325      18 -> ideal aligned arena
1326    ----
1327      32
1328 
1329 */
1330 
1331 #if SIZEOF_VOID_P == 8
1332 
1333 /* number of bits in a pointer */
1334 #define POINTER_BITS 64
1335 
1336 /* High bits of memory addresses that will be ignored when indexing into the
1337  * radix tree.  Setting this to zero is the safe default.  For most 64-bit
1338  * machines, setting this to 16 would be safe.  The kernel would not give
1339  * user-space virtual memory addresses that have significant information in
1340  * those high bits.  The main advantage to setting IGNORE_BITS > 0 is that less
1341  * virtual memory will be used for the top and middle radix tree arrays.  Those
1342  * arrays are allocated in the BSS segment and so will typically consume real
1343  * memory only if actually accessed.
1344  */
1345 #define IGNORE_BITS 0
1346 
1347 /* use the top and mid layers of the radix tree */
1348 #define USE_INTERIOR_NODES
1349 
1350 #elif SIZEOF_VOID_P == 4
1351 
1352 #define POINTER_BITS 32
1353 #define IGNORE_BITS 0
1354 
1355 #else
1356 
1357  /* Currently this code works for 64-bit or 32-bit pointers only.  */
1358 #error "obmalloc radix tree requires 64-bit or 32-bit pointers."
1359 
1360 #endif /* SIZEOF_VOID_P */
1361 
1362 /* arena_coverage_t members require this to be true  */
1363 #if ARENA_BITS >= 32
1364 #   error "arena size must be < 2^32"
1365 #endif
1366 
1367 /* the lower bits of the address that are not ignored */
1368 #define ADDRESS_BITS (POINTER_BITS - IGNORE_BITS)
1369 
1370 #ifdef USE_INTERIOR_NODES
1371 /* number of bits used for MAP_TOP and MAP_MID nodes */
1372 #define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3)
1373 #else
1374 #define INTERIOR_BITS 0
1375 #endif
1376 
1377 #define MAP_TOP_BITS INTERIOR_BITS
1378 #define MAP_TOP_LENGTH (1 << MAP_TOP_BITS)
1379 #define MAP_TOP_MASK (MAP_TOP_LENGTH - 1)
1380 
1381 #define MAP_MID_BITS INTERIOR_BITS
1382 #define MAP_MID_LENGTH (1 << MAP_MID_BITS)
1383 #define MAP_MID_MASK (MAP_MID_LENGTH - 1)
1384 
1385 #define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS)
1386 #define MAP_BOT_LENGTH (1 << MAP_BOT_BITS)
1387 #define MAP_BOT_MASK (MAP_BOT_LENGTH - 1)
1388 
1389 #define MAP_BOT_SHIFT ARENA_BITS
1390 #define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT)
1391 #define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT)
1392 
1393 #define AS_UINT(p) ((uintptr_t)(p))
1394 #define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK)
1395 #define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK)
1396 #define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK)
1397 
1398 #if IGNORE_BITS > 0
1399 /* Return the ignored part of the pointer address.  Those bits should be same
1400  * for all valid pointers if IGNORE_BITS is set correctly.
1401  */
1402 #define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS)
1403 #else
1404 #define HIGH_BITS(p) 0
1405 #endif
1406 
1407 
1408 /* This is the leaf of the radix tree.  See arena_map_mark_used() for the
1409  * meaning of these members. */
1410 typedef struct {
1411     int32_t tail_hi;
1412     int32_t tail_lo;
1413 } arena_coverage_t;
1414 
1415 typedef struct arena_map_bot {
1416     /* The members tail_hi and tail_lo are accessed together.  So, it
1417      * better to have them as an array of structs, rather than two
1418      * arrays.
1419      */
1420     arena_coverage_t arenas[MAP_BOT_LENGTH];
1421 } arena_map_bot_t;
1422 
1423 #ifdef USE_INTERIOR_NODES
1424 typedef struct arena_map_mid {
1425     struct arena_map_bot *ptrs[MAP_MID_LENGTH];
1426 } arena_map_mid_t;
1427 
1428 typedef struct arena_map_top {
1429     struct arena_map_mid *ptrs[MAP_TOP_LENGTH];
1430 } arena_map_top_t;
1431 #endif
1432 
1433 /* The root of radix tree.  Note that by initializing like this, the memory
1434  * should be in the BSS.  The OS will only memory map pages as the MAP_MID
1435  * nodes get used (OS pages are demand loaded as needed).
1436  */
1437 #ifdef USE_INTERIOR_NODES
1438 static arena_map_top_t arena_map_root;
1439 /* accounting for number of used interior nodes */
1440 static int arena_map_mid_count;
1441 static int arena_map_bot_count;
1442 #else
1443 static arena_map_bot_t arena_map_root;
1444 #endif
1445 
1446 /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
1447  * it cannot be created */
1448 static inline Py_ALWAYS_INLINE arena_map_bot_t *
arena_map_get(block * p,int create)1449 arena_map_get(block *p, int create)
1450 {
1451 #ifdef USE_INTERIOR_NODES
1452     /* sanity check that IGNORE_BITS is correct */
1453     assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
1454     int i1 = MAP_TOP_INDEX(p);
1455     if (arena_map_root.ptrs[i1] == NULL) {
1456         if (!create) {
1457             return NULL;
1458         }
1459         arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
1460         if (n == NULL) {
1461             return NULL;
1462         }
1463         arena_map_root.ptrs[i1] = n;
1464         arena_map_mid_count++;
1465     }
1466     int i2 = MAP_MID_INDEX(p);
1467     if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
1468         if (!create) {
1469             return NULL;
1470         }
1471         arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
1472         if (n == NULL) {
1473             return NULL;
1474         }
1475         arena_map_root.ptrs[i1]->ptrs[i2] = n;
1476         arena_map_bot_count++;
1477     }
1478     return arena_map_root.ptrs[i1]->ptrs[i2];
1479 #else
1480     return &arena_map_root;
1481 #endif
1482 }
1483 
1484 
1485 /* The radix tree only tracks arenas.  So, for 16 MiB arenas, we throw
1486  * away 24 bits of the address.  That reduces the space requirement of
1487  * the tree compared to similar radix tree page-map schemes.  In
1488  * exchange for slashing the space requirement, it needs more
1489  * computation to check an address.
1490  *
1491  * Tracking coverage is done by "ideal" arena address.  It is easier to
1492  * explain in decimal so let's say that the arena size is 100 bytes.
1493  * Then, ideal addresses are 100, 200, 300, etc.  For checking if a
1494  * pointer address is inside an actual arena, we have to check two ideal
1495  * arena addresses.  E.g. if pointer is 357, we need to check 200 and
1496  * 300.  In the rare case that an arena is aligned in the ideal way
1497  * (e.g. base address of arena is 200) then we only have to check one
1498  * ideal address.
1499  *
1500  * The tree nodes for 200 and 300 both store the address of arena.
1501  * There are two cases: the arena starts at a lower ideal arena and
1502  * extends to this one, or the arena starts in this arena and extends to
1503  * the next ideal arena.  The tail_lo and tail_hi members correspond to
1504  * these two cases.
1505  */
1506 
1507 
1508 /* mark or unmark addresses covered by arena */
1509 static int
arena_map_mark_used(uintptr_t arena_base,int is_used)1510 arena_map_mark_used(uintptr_t arena_base, int is_used)
1511 {
1512     /* sanity check that IGNORE_BITS is correct */
1513     assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
1514     arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used);
1515     if (n_hi == NULL) {
1516         assert(is_used); /* otherwise node should already exist */
1517         return 0; /* failed to allocate space for node */
1518     }
1519     int i3 = MAP_BOT_INDEX((block *)arena_base);
1520     int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
1521     if (tail == 0) {
1522         /* is ideal arena address */
1523         n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
1524     }
1525     else {
1526         /* arena_base address is not ideal (aligned to arena size) and
1527          * so it potentially covers two MAP_BOT nodes.  Get the MAP_BOT node
1528          * for the next arena.  Note that it might be in different MAP_TOP
1529          * and MAP_MID nodes as well so we need to call arena_map_get()
1530          * again (do the full tree traversal).
1531          */
1532         n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
1533         uintptr_t arena_base_next = arena_base + ARENA_SIZE;
1534         /* If arena_base is a legit arena address, so is arena_base_next - 1
1535          * (last address in arena).  If arena_base_next overflows then it
1536          * must overflow to 0.  However, that would mean arena_base was
1537          * "ideal" and we should not be in this case. */
1538         assert(arena_base < arena_base_next);
1539         arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used);
1540         if (n_lo == NULL) {
1541             assert(is_used); /* otherwise should already exist */
1542             n_hi->arenas[i3].tail_hi = 0;
1543             return 0; /* failed to allocate space for node */
1544         }
1545         int i3_next = MAP_BOT_INDEX(arena_base_next);
1546         n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
1547     }
1548     return 1;
1549 }
1550 
1551 /* Return true if 'p' is a pointer inside an obmalloc arena.
1552  * _PyObject_Free() calls this so it needs to be very fast. */
1553 static int
arena_map_is_used(block * p)1554 arena_map_is_used(block *p)
1555 {
1556     arena_map_bot_t *n = arena_map_get(p, 0);
1557     if (n == NULL) {
1558         return 0;
1559     }
1560     int i3 = MAP_BOT_INDEX(p);
1561     /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
1562     int32_t hi = n->arenas[i3].tail_hi;
1563     int32_t lo = n->arenas[i3].tail_lo;
1564     int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
1565     return (tail < lo) || (tail >= hi && hi != 0);
1566 }
1567 
1568 /* end of radix tree logic */
1569 /*==========================================================================*/
1570 #endif /* WITH_PYMALLOC_RADIX_TREE */
1571 
1572 
1573 /* Allocate a new arena.  If we run out of memory, return NULL.  Else
1574  * allocate a new arena, and return the address of an arena_object
1575  * describing the new arena.  It's expected that the caller will set
1576  * `usable_arenas` to the return value.
1577  */
1578 static struct arena_object*
new_arena(void)1579 new_arena(void)
1580 {
1581     struct arena_object* arenaobj;
1582     uint excess;        /* number of bytes above pool alignment */
1583     void *address;
1584     static int debug_stats = -1;
1585 
1586     if (debug_stats == -1) {
1587         const char *opt = Py_GETENV("PYTHONMALLOCSTATS");
1588         debug_stats = (opt != NULL && *opt != '\0');
1589     }
1590     if (debug_stats) {
1591         _PyObject_DebugMallocStats(stderr);
1592     }
1593 
1594     if (unused_arena_objects == NULL) {
1595         uint i;
1596         uint numarenas;
1597         size_t nbytes;
1598 
1599         /* Double the number of arena objects on each allocation.
1600          * Note that it's possible for `numarenas` to overflow.
1601          */
1602         numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
1603         if (numarenas <= maxarenas)
1604             return NULL;                /* overflow */
1605 #if SIZEOF_SIZE_T <= SIZEOF_INT
1606         if (numarenas > SIZE_MAX / sizeof(*arenas))
1607             return NULL;                /* overflow */
1608 #endif
1609         nbytes = numarenas * sizeof(*arenas);
1610         arenaobj = (struct arena_object *)PyMem_RawRealloc(arenas, nbytes);
1611         if (arenaobj == NULL)
1612             return NULL;
1613         arenas = arenaobj;
1614 
1615         /* We might need to fix pointers that were copied.  However,
1616          * new_arena only gets called when all the pages in the
1617          * previous arenas are full.  Thus, there are *no* pointers
1618          * into the old array. Thus, we don't have to worry about
1619          * invalid pointers.  Just to be sure, some asserts:
1620          */
1621         assert(usable_arenas == NULL);
1622         assert(unused_arena_objects == NULL);
1623 
1624         /* Put the new arenas on the unused_arena_objects list. */
1625         for (i = maxarenas; i < numarenas; ++i) {
1626             arenas[i].address = 0;              /* mark as unassociated */
1627             arenas[i].nextarena = i < numarenas - 1 ?
1628                                    &arenas[i+1] : NULL;
1629         }
1630 
1631         /* Update globals. */
1632         unused_arena_objects = &arenas[maxarenas];
1633         maxarenas = numarenas;
1634     }
1635 
1636     /* Take the next available arena object off the head of the list. */
1637     assert(unused_arena_objects != NULL);
1638     arenaobj = unused_arena_objects;
1639     unused_arena_objects = arenaobj->nextarena;
1640     assert(arenaobj->address == 0);
1641     address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
1642 #if WITH_PYMALLOC_RADIX_TREE
1643     if (address != NULL) {
1644         if (!arena_map_mark_used((uintptr_t)address, 1)) {
1645             /* marking arena in radix tree failed, abort */
1646             _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
1647             address = NULL;
1648         }
1649     }
1650 #endif
1651     if (address == NULL) {
1652         /* The allocation failed: return NULL after putting the
1653          * arenaobj back.
1654          */
1655         arenaobj->nextarena = unused_arena_objects;
1656         unused_arena_objects = arenaobj;
1657         return NULL;
1658     }
1659     arenaobj->address = (uintptr_t)address;
1660 
1661     ++narenas_currently_allocated;
1662     ++ntimes_arena_allocated;
1663     if (narenas_currently_allocated > narenas_highwater)
1664         narenas_highwater = narenas_currently_allocated;
1665     arenaobj->freepools = NULL;
1666     /* pool_address <- first pool-aligned address in the arena
1667        nfreepools <- number of whole pools that fit after alignment */
1668     arenaobj->pool_address = (block*)arenaobj->address;
1669     arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
1670     excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
1671     if (excess != 0) {
1672         --arenaobj->nfreepools;
1673         arenaobj->pool_address += POOL_SIZE - excess;
1674     }
1675     arenaobj->ntotalpools = arenaobj->nfreepools;
1676 
1677     return arenaobj;
1678 }
1679 
1680 
1681 
1682 #if WITH_PYMALLOC_RADIX_TREE
1683 /* Return true if and only if P is an address that was allocated by
1684    pymalloc.  When the radix tree is used, 'poolp' is unused.
1685  */
1686 static bool
address_in_range(void * p,poolp pool)1687 address_in_range(void *p, poolp pool)
1688 {
1689     return arena_map_is_used(p);
1690 }
1691 #else
1692 /*
1693 address_in_range(P, POOL)
1694 
1695 Return true if and only if P is an address that was allocated by pymalloc.
1696 POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
1697 (the caller is asked to compute this because the macro expands POOL more than
1698 once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
1699 variable and pass the latter to the macro; because address_in_range is
1700 called on every alloc/realloc/free, micro-efficiency is important here).
1701 
1702 Tricky:  Let B be the arena base address associated with the pool, B =
1703 arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
1704 
1705     B <= P < B + ARENA_SIZE
1706 
1707 Subtracting B throughout, this is true iff
1708 
1709     0 <= P-B < ARENA_SIZE
1710 
1711 By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
1712 
1713 Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
1714 before the first arena has been allocated.  `arenas` is still NULL in that
1715 case.  We're relying on that maxarenas is also 0 in that case, so that
1716 (POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
1717 into a NULL arenas.
1718 
1719 Details:  given P and POOL, the arena_object corresponding to P is AO =
1720 arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
1721 stores, etc), POOL is the correct address of P's pool, AO.address is the
1722 correct base address of the pool's arena, and P must be within ARENA_SIZE of
1723 AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
1724 (NULL)).  Therefore address_in_range correctly reports that obmalloc
1725 controls P.
1726 
1727 Now suppose obmalloc does not control P (e.g., P was obtained via a direct
1728 call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
1729 in this case -- it may even be uninitialized trash.  If the trash arenaindex
1730 is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
1731 control P.
1732 
1733 Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
1734 allocated arena, obmalloc controls all the memory in slice AO.address :
1735 AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
1736 so P doesn't lie in that slice, so the macro correctly reports that P is not
1737 controlled by obmalloc.
1738 
1739 Finally, if P is not controlled by obmalloc and AO corresponds to an unused
1740 arena_object (one not currently associated with an allocated arena),
1741 AO.address is 0, and the second test in the macro reduces to:
1742 
1743     P < ARENA_SIZE
1744 
1745 If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
1746 that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
1747 of the test still passes, and the third clause (AO.address != 0) is necessary
1748 to get the correct result:  AO.address is 0 in this case, so the macro
1749 correctly reports that P is not controlled by obmalloc (despite that P lies in
1750 slice AO.address : AO.address + ARENA_SIZE).
1751 
1752 Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
1753 2.5, arenas were never free()'ed, and an arenaindex < maxarena always
1754 corresponded to a currently-allocated arena, so the "P is not controlled by
1755 obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
1756 was impossible.
1757 
1758 Note that the logic is excruciating, and reading up possibly uninitialized
1759 memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
1760 creates problems for some memory debuggers.  The overwhelming advantage is
1761 that this test determines whether an arbitrary address is controlled by
1762 obmalloc in a small constant time, independent of the number of arenas
1763 obmalloc controls.  Since this test is needed at every entry point, it's
1764 extremely desirable that it be this fast.
1765 */
1766 
1767 static bool _Py_NO_SANITIZE_ADDRESS
1768             _Py_NO_SANITIZE_THREAD
1769             _Py_NO_SANITIZE_MEMORY
address_in_range(void * p,poolp pool)1770 address_in_range(void *p, poolp pool)
1771 {
1772     // Since address_in_range may be reading from memory which was not allocated
1773     // by Python, it is important that pool->arenaindex is read only once, as
1774     // another thread may be concurrently modifying the value without holding
1775     // the GIL. The following dance forces the compiler to read pool->arenaindex
1776     // only once.
1777     uint arenaindex = *((volatile uint *)&pool->arenaindex);
1778     return arenaindex < maxarenas &&
1779         (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE &&
1780         arenas[arenaindex].address != 0;
1781 }
1782 
1783 #endif /* !WITH_PYMALLOC_RADIX_TREE */
1784 
1785 /*==========================================================================*/
1786 
1787 // Called when freelist is exhausted.  Extend the freelist if there is
1788 // space for a block.  Otherwise, remove this pool from usedpools.
1789 static void
pymalloc_pool_extend(poolp pool,uint size)1790 pymalloc_pool_extend(poolp pool, uint size)
1791 {
1792     if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
1793         /* There is room for another block. */
1794         pool->freeblock = (block*)pool + pool->nextoffset;
1795         pool->nextoffset += INDEX2SIZE(size);
1796         *(block **)(pool->freeblock) = NULL;
1797         return;
1798     }
1799 
1800     /* Pool is full, unlink from used pools. */
1801     poolp next;
1802     next = pool->nextpool;
1803     pool = pool->prevpool;
1804     next->prevpool = pool;
1805     pool->nextpool = next;
1806 }
1807 
1808 /* called when pymalloc_alloc can not allocate a block from usedpool.
1809  * This function takes new pool and allocate a block from it.
1810  */
1811 static void*
allocate_from_new_pool(uint size)1812 allocate_from_new_pool(uint size)
1813 {
1814     /* There isn't a pool of the right size class immediately
1815      * available:  use a free pool.
1816      */
1817     if (UNLIKELY(usable_arenas == NULL)) {
1818         /* No arena has a free pool:  allocate a new arena. */
1819 #ifdef WITH_MEMORY_LIMITS
1820         if (narenas_currently_allocated >= MAX_ARENAS) {
1821             return NULL;
1822         }
1823 #endif
1824         usable_arenas = new_arena();
1825         if (usable_arenas == NULL) {
1826             return NULL;
1827         }
1828         usable_arenas->nextarena = usable_arenas->prevarena = NULL;
1829         assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1830         nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
1831     }
1832     assert(usable_arenas->address != 0);
1833 
1834     /* This arena already had the smallest nfreepools value, so decreasing
1835      * nfreepools doesn't change that, and we don't need to rearrange the
1836      * usable_arenas list.  However, if the arena becomes wholly allocated,
1837      * we need to remove its arena_object from usable_arenas.
1838      */
1839     assert(usable_arenas->nfreepools > 0);
1840     if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1841         /* It's the last of this size, so there won't be any. */
1842         nfp2lasta[usable_arenas->nfreepools] = NULL;
1843     }
1844     /* If any free pools will remain, it will be the new smallest. */
1845     if (usable_arenas->nfreepools > 1) {
1846         assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1847         nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1848     }
1849 
1850     /* Try to get a cached free pool. */
1851     poolp pool = usable_arenas->freepools;
1852     if (LIKELY(pool != NULL)) {
1853         /* Unlink from cached pools. */
1854         usable_arenas->freepools = pool->nextpool;
1855         usable_arenas->nfreepools--;
1856         if (UNLIKELY(usable_arenas->nfreepools == 0)) {
1857             /* Wholly allocated:  remove. */
1858             assert(usable_arenas->freepools == NULL);
1859             assert(usable_arenas->nextarena == NULL ||
1860                    usable_arenas->nextarena->prevarena ==
1861                    usable_arenas);
1862             usable_arenas = usable_arenas->nextarena;
1863             if (usable_arenas != NULL) {
1864                 usable_arenas->prevarena = NULL;
1865                 assert(usable_arenas->address != 0);
1866             }
1867         }
1868         else {
1869             /* nfreepools > 0:  it must be that freepools
1870              * isn't NULL, or that we haven't yet carved
1871              * off all the arena's pools for the first
1872              * time.
1873              */
1874             assert(usable_arenas->freepools != NULL ||
1875                    usable_arenas->pool_address <=
1876                    (block*)usable_arenas->address +
1877                        ARENA_SIZE - POOL_SIZE);
1878         }
1879     }
1880     else {
1881         /* Carve off a new pool. */
1882         assert(usable_arenas->nfreepools > 0);
1883         assert(usable_arenas->freepools == NULL);
1884         pool = (poolp)usable_arenas->pool_address;
1885         assert((block*)pool <= (block*)usable_arenas->address +
1886                                  ARENA_SIZE - POOL_SIZE);
1887         pool->arenaindex = (uint)(usable_arenas - arenas);
1888         assert(&arenas[pool->arenaindex] == usable_arenas);
1889         pool->szidx = DUMMY_SIZE_IDX;
1890         usable_arenas->pool_address += POOL_SIZE;
1891         --usable_arenas->nfreepools;
1892 
1893         if (usable_arenas->nfreepools == 0) {
1894             assert(usable_arenas->nextarena == NULL ||
1895                    usable_arenas->nextarena->prevarena ==
1896                    usable_arenas);
1897             /* Unlink the arena:  it is completely allocated. */
1898             usable_arenas = usable_arenas->nextarena;
1899             if (usable_arenas != NULL) {
1900                 usable_arenas->prevarena = NULL;
1901                 assert(usable_arenas->address != 0);
1902             }
1903         }
1904     }
1905 
1906     /* Frontlink to used pools. */
1907     block *bp;
1908     poolp next = usedpools[size + size]; /* == prev */
1909     pool->nextpool = next;
1910     pool->prevpool = next;
1911     next->nextpool = pool;
1912     next->prevpool = pool;
1913     pool->ref.count = 1;
1914     if (pool->szidx == size) {
1915         /* Luckily, this pool last contained blocks
1916          * of the same size class, so its header
1917          * and free list are already initialized.
1918          */
1919         bp = pool->freeblock;
1920         assert(bp != NULL);
1921         pool->freeblock = *(block **)bp;
1922         return bp;
1923     }
1924     /*
1925      * Initialize the pool header, set up the free list to
1926      * contain just the second block, and return the first
1927      * block.
1928      */
1929     pool->szidx = size;
1930     size = INDEX2SIZE(size);
1931     bp = (block *)pool + POOL_OVERHEAD;
1932     pool->nextoffset = POOL_OVERHEAD + (size << 1);
1933     pool->maxnextoffset = POOL_SIZE - size;
1934     pool->freeblock = bp + size;
1935     *(block **)(pool->freeblock) = NULL;
1936     return bp;
1937 }
1938 
1939 /* pymalloc allocator
1940 
1941    Return a pointer to newly allocated memory if pymalloc allocated memory.
1942 
1943    Return NULL if pymalloc failed to allocate the memory block: on bigger
1944    requests, on error in the code below (as a last chance to serve the request)
1945    or when the max memory limit has been reached.
1946 */
1947 static inline void*
pymalloc_alloc(void * ctx,size_t nbytes)1948 pymalloc_alloc(void *ctx, size_t nbytes)
1949 {
1950 #ifdef WITH_VALGRIND
1951     if (UNLIKELY(running_on_valgrind == -1)) {
1952         running_on_valgrind = RUNNING_ON_VALGRIND;
1953     }
1954     if (UNLIKELY(running_on_valgrind)) {
1955         return NULL;
1956     }
1957 #endif
1958 
1959     if (UNLIKELY(nbytes == 0)) {
1960         return NULL;
1961     }
1962     if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
1963         return NULL;
1964     }
1965 
1966     uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
1967     poolp pool = usedpools[size + size];
1968     block *bp;
1969 
1970     if (LIKELY(pool != pool->nextpool)) {
1971         /*
1972          * There is a used pool for this size class.
1973          * Pick up the head block of its free list.
1974          */
1975         ++pool->ref.count;
1976         bp = pool->freeblock;
1977         assert(bp != NULL);
1978 
1979         if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
1980             // Reached the end of the free list, try to extend it.
1981             pymalloc_pool_extend(pool, size);
1982         }
1983     }
1984     else {
1985         /* There isn't a pool of the right size class immediately
1986          * available:  use a free pool.
1987          */
1988         bp = allocate_from_new_pool(size);
1989     }
1990 
1991     return (void *)bp;
1992 }
1993 
1994 
1995 static void *
_PyObject_Malloc(void * ctx,size_t nbytes)1996 _PyObject_Malloc(void *ctx, size_t nbytes)
1997 {
1998     void* ptr = pymalloc_alloc(ctx, nbytes);
1999     if (LIKELY(ptr != NULL)) {
2000         return ptr;
2001     }
2002 
2003     ptr = PyMem_RawMalloc(nbytes);
2004     if (ptr != NULL) {
2005         raw_allocated_blocks++;
2006     }
2007     return ptr;
2008 }
2009 
2010 
2011 static void *
_PyObject_Calloc(void * ctx,size_t nelem,size_t elsize)2012 _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
2013 {
2014     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2015     size_t nbytes = nelem * elsize;
2016 
2017     void* ptr = pymalloc_alloc(ctx, nbytes);
2018     if (LIKELY(ptr != NULL)) {
2019         memset(ptr, 0, nbytes);
2020         return ptr;
2021     }
2022 
2023     ptr = PyMem_RawCalloc(nelem, elsize);
2024     if (ptr != NULL) {
2025         raw_allocated_blocks++;
2026     }
2027     return ptr;
2028 }
2029 
2030 
2031 static void
insert_to_usedpool(poolp pool)2032 insert_to_usedpool(poolp pool)
2033 {
2034     assert(pool->ref.count > 0);            /* else the pool is empty */
2035 
2036     uint size = pool->szidx;
2037     poolp next = usedpools[size + size];
2038     poolp prev = next->prevpool;
2039 
2040     /* insert pool before next:   prev <-> pool <-> next */
2041     pool->nextpool = next;
2042     pool->prevpool = prev;
2043     next->prevpool = pool;
2044     prev->nextpool = pool;
2045 }
2046 
2047 static void
insert_to_freepool(poolp pool)2048 insert_to_freepool(poolp pool)
2049 {
2050     poolp next = pool->nextpool;
2051     poolp prev = pool->prevpool;
2052     next->prevpool = prev;
2053     prev->nextpool = next;
2054 
2055     /* Link the pool to freepools.  This is a singly-linked
2056      * list, and pool->prevpool isn't used there.
2057      */
2058     struct arena_object *ao = &arenas[pool->arenaindex];
2059     pool->nextpool = ao->freepools;
2060     ao->freepools = pool;
2061     uint nf = ao->nfreepools;
2062     /* If this is the rightmost arena with this number of free pools,
2063      * nfp2lasta[nf] needs to change.  Caution:  if nf is 0, there
2064      * are no arenas in usable_arenas with that value.
2065      */
2066     struct arena_object* lastnf = nfp2lasta[nf];
2067     assert((nf == 0 && lastnf == NULL) ||
2068            (nf > 0 &&
2069             lastnf != NULL &&
2070             lastnf->nfreepools == nf &&
2071             (lastnf->nextarena == NULL ||
2072              nf < lastnf->nextarena->nfreepools)));
2073     if (lastnf == ao) {  /* it is the rightmost */
2074         struct arena_object* p = ao->prevarena;
2075         nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
2076     }
2077     ao->nfreepools = ++nf;
2078 
2079     /* All the rest is arena management.  We just freed
2080      * a pool, and there are 4 cases for arena mgmt:
2081      * 1. If all the pools are free, return the arena to
2082      *    the system free().  Except if this is the last
2083      *    arena in the list, keep it to avoid thrashing:
2084      *    keeping one wholly free arena in the list avoids
2085      *    pathological cases where a simple loop would
2086      *    otherwise provoke needing to allocate and free an
2087      *    arena on every iteration.  See bpo-37257.
2088      * 2. If this is the only free pool in the arena,
2089      *    add the arena back to the `usable_arenas` list.
2090      * 3. If the "next" arena has a smaller count of free
2091      *    pools, we have to "slide this arena right" to
2092      *    restore that usable_arenas is sorted in order of
2093      *    nfreepools.
2094      * 4. Else there's nothing more to do.
2095      */
2096     if (nf == ao->ntotalpools && ao->nextarena != NULL) {
2097         /* Case 1.  First unlink ao from usable_arenas.
2098          */
2099         assert(ao->prevarena == NULL ||
2100                ao->prevarena->address != 0);
2101         assert(ao ->nextarena == NULL ||
2102                ao->nextarena->address != 0);
2103 
2104         /* Fix the pointer in the prevarena, or the
2105          * usable_arenas pointer.
2106          */
2107         if (ao->prevarena == NULL) {
2108             usable_arenas = ao->nextarena;
2109             assert(usable_arenas == NULL ||
2110                    usable_arenas->address != 0);
2111         }
2112         else {
2113             assert(ao->prevarena->nextarena == ao);
2114             ao->prevarena->nextarena =
2115                 ao->nextarena;
2116         }
2117         /* Fix the pointer in the nextarena. */
2118         if (ao->nextarena != NULL) {
2119             assert(ao->nextarena->prevarena == ao);
2120             ao->nextarena->prevarena =
2121                 ao->prevarena;
2122         }
2123         /* Record that this arena_object slot is
2124          * available to be reused.
2125          */
2126         ao->nextarena = unused_arena_objects;
2127         unused_arena_objects = ao;
2128 
2129 #if WITH_PYMALLOC_RADIX_TREE
2130         /* mark arena region as not under control of obmalloc */
2131         arena_map_mark_used(ao->address, 0);
2132 #endif
2133 
2134         /* Free the entire arena. */
2135         _PyObject_Arena.free(_PyObject_Arena.ctx,
2136                              (void *)ao->address, ARENA_SIZE);
2137         ao->address = 0;                        /* mark unassociated */
2138         --narenas_currently_allocated;
2139 
2140         return;
2141     }
2142 
2143     if (nf == 1) {
2144         /* Case 2.  Put ao at the head of
2145          * usable_arenas.  Note that because
2146          * ao->nfreepools was 0 before, ao isn't
2147          * currently on the usable_arenas list.
2148          */
2149         ao->nextarena = usable_arenas;
2150         ao->prevarena = NULL;
2151         if (usable_arenas)
2152             usable_arenas->prevarena = ao;
2153         usable_arenas = ao;
2154         assert(usable_arenas->address != 0);
2155         if (nfp2lasta[1] == NULL) {
2156             nfp2lasta[1] = ao;
2157         }
2158 
2159         return;
2160     }
2161 
2162     /* If this arena is now out of order, we need to keep
2163      * the list sorted.  The list is kept sorted so that
2164      * the "most full" arenas are used first, which allows
2165      * the nearly empty arenas to be completely freed.  In
2166      * a few un-scientific tests, it seems like this
2167      * approach allowed a lot more memory to be freed.
2168      */
2169     /* If this is the only arena with nf, record that. */
2170     if (nfp2lasta[nf] == NULL) {
2171         nfp2lasta[nf] = ao;
2172     } /* else the rightmost with nf doesn't change */
2173     /* If this was the rightmost of the old size, it remains in place. */
2174     if (ao == lastnf) {
2175         /* Case 4.  Nothing to do. */
2176         return;
2177     }
2178     /* If ao were the only arena in the list, the last block would have
2179      * gotten us out.
2180      */
2181     assert(ao->nextarena != NULL);
2182 
2183     /* Case 3:  We have to move the arena towards the end of the list,
2184      * because it has more free pools than the arena to its right.  It needs
2185      * to move to follow lastnf.
2186      * First unlink ao from usable_arenas.
2187      */
2188     if (ao->prevarena != NULL) {
2189         /* ao isn't at the head of the list */
2190         assert(ao->prevarena->nextarena == ao);
2191         ao->prevarena->nextarena = ao->nextarena;
2192     }
2193     else {
2194         /* ao is at the head of the list */
2195         assert(usable_arenas == ao);
2196         usable_arenas = ao->nextarena;
2197     }
2198     ao->nextarena->prevarena = ao->prevarena;
2199     /* And insert after lastnf. */
2200     ao->prevarena = lastnf;
2201     ao->nextarena = lastnf->nextarena;
2202     if (ao->nextarena != NULL) {
2203         ao->nextarena->prevarena = ao;
2204     }
2205     lastnf->nextarena = ao;
2206     /* Verify that the swaps worked. */
2207     assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
2208     assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);
2209     assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
2210     assert((usable_arenas == ao && ao->prevarena == NULL)
2211            || ao->prevarena->nextarena == ao);
2212 }
2213 
2214 /* Free a memory block allocated by pymalloc_alloc().
2215    Return 1 if it was freed.
2216    Return 0 if the block was not allocated by pymalloc_alloc(). */
2217 static inline int
pymalloc_free(void * ctx,void * p)2218 pymalloc_free(void *ctx, void *p)
2219 {
2220     assert(p != NULL);
2221 
2222 #ifdef WITH_VALGRIND
2223     if (UNLIKELY(running_on_valgrind > 0)) {
2224         return 0;
2225     }
2226 #endif
2227 
2228     poolp pool = POOL_ADDR(p);
2229     if (UNLIKELY(!address_in_range(p, pool))) {
2230         return 0;
2231     }
2232     /* We allocated this address. */
2233 
2234     /* Link p to the start of the pool's freeblock list.  Since
2235      * the pool had at least the p block outstanding, the pool
2236      * wasn't empty (so it's already in a usedpools[] list, or
2237      * was full and is in no list -- it's not in the freeblocks
2238      * list in any case).
2239      */
2240     assert(pool->ref.count > 0);            /* else it was empty */
2241     block *lastfree = pool->freeblock;
2242     *(block **)p = lastfree;
2243     pool->freeblock = (block *)p;
2244     pool->ref.count--;
2245 
2246     if (UNLIKELY(lastfree == NULL)) {
2247         /* Pool was full, so doesn't currently live in any list:
2248          * link it to the front of the appropriate usedpools[] list.
2249          * This mimics LRU pool usage for new allocations and
2250          * targets optimal filling when several pools contain
2251          * blocks of the same size class.
2252          */
2253         insert_to_usedpool(pool);
2254         return 1;
2255     }
2256 
2257     /* freeblock wasn't NULL, so the pool wasn't full,
2258      * and the pool is in a usedpools[] list.
2259      */
2260     if (LIKELY(pool->ref.count != 0)) {
2261         /* pool isn't empty:  leave it in usedpools */
2262         return 1;
2263     }
2264 
2265     /* Pool is now empty:  unlink from usedpools, and
2266      * link to the front of freepools.  This ensures that
2267      * previously freed pools will be allocated later
2268      * (being not referenced, they are perhaps paged out).
2269      */
2270     insert_to_freepool(pool);
2271     return 1;
2272 }
2273 
2274 
2275 static void
_PyObject_Free(void * ctx,void * p)2276 _PyObject_Free(void *ctx, void *p)
2277 {
2278     /* PyObject_Free(NULL) has no effect */
2279     if (p == NULL) {
2280         return;
2281     }
2282 
2283     if (UNLIKELY(!pymalloc_free(ctx, p))) {
2284         /* pymalloc didn't allocate this address */
2285         PyMem_RawFree(p);
2286         raw_allocated_blocks--;
2287     }
2288 }
2289 
2290 
2291 /* pymalloc realloc.
2292 
2293    If nbytes==0, then as the Python docs promise, we do not treat this like
2294    free(p), and return a non-NULL result.
2295 
2296    Return 1 if pymalloc reallocated memory and wrote the new pointer into
2297    newptr_p.
2298 
2299    Return 0 if pymalloc didn't allocated p. */
2300 static int
pymalloc_realloc(void * ctx,void ** newptr_p,void * p,size_t nbytes)2301 pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
2302 {
2303     void *bp;
2304     poolp pool;
2305     size_t size;
2306 
2307     assert(p != NULL);
2308 
2309 #ifdef WITH_VALGRIND
2310     /* Treat running_on_valgrind == -1 the same as 0 */
2311     if (UNLIKELY(running_on_valgrind > 0)) {
2312         return 0;
2313     }
2314 #endif
2315 
2316     pool = POOL_ADDR(p);
2317     if (!address_in_range(p, pool)) {
2318         /* pymalloc is not managing this block.
2319 
2320            If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
2321            over this block.  However, if we do, we need to copy the valid data
2322            from the C-managed block to one of our blocks, and there's no
2323            portable way to know how much of the memory space starting at p is
2324            valid.
2325 
2326            As bug 1185883 pointed out the hard way, it's possible that the
2327            C-managed block is "at the end" of allocated VM space, so that a
2328            memory fault can occur if we try to copy nbytes bytes starting at p.
2329            Instead we punt: let C continue to manage this block. */
2330         return 0;
2331     }
2332 
2333     /* pymalloc is in charge of this block */
2334     size = INDEX2SIZE(pool->szidx);
2335     if (nbytes <= size) {
2336         /* The block is staying the same or shrinking.
2337 
2338            If it's shrinking, there's a tradeoff: it costs cycles to copy the
2339            block to a smaller size class, but it wastes memory not to copy it.
2340 
2341            The compromise here is to copy on shrink only if at least 25% of
2342            size can be shaved off. */
2343         if (4 * nbytes > 3 * size) {
2344             /* It's the same, or shrinking and new/old > 3/4. */
2345             *newptr_p = p;
2346             return 1;
2347         }
2348         size = nbytes;
2349     }
2350 
2351     bp = _PyObject_Malloc(ctx, nbytes);
2352     if (bp != NULL) {
2353         memcpy(bp, p, size);
2354         _PyObject_Free(ctx, p);
2355     }
2356     *newptr_p = bp;
2357     return 1;
2358 }
2359 
2360 
2361 static void *
_PyObject_Realloc(void * ctx,void * ptr,size_t nbytes)2362 _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
2363 {
2364     void *ptr2;
2365 
2366     if (ptr == NULL) {
2367         return _PyObject_Malloc(ctx, nbytes);
2368     }
2369 
2370     if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
2371         return ptr2;
2372     }
2373 
2374     return PyMem_RawRealloc(ptr, nbytes);
2375 }
2376 
2377 #else   /* ! WITH_PYMALLOC */
2378 
2379 /*==========================================================================*/
2380 /* pymalloc not enabled:  Redirect the entry points to malloc.  These will
2381  * only be used by extensions that are compiled with pymalloc enabled. */
2382 
2383 Py_ssize_t
_Py_GetAllocatedBlocks(void)2384 _Py_GetAllocatedBlocks(void)
2385 {
2386     return 0;
2387 }
2388 
2389 #endif /* WITH_PYMALLOC */
2390 
2391 
2392 /*==========================================================================*/
2393 /* A x-platform debugging allocator.  This doesn't manage memory directly,
2394  * it wraps a real allocator, adding extra debugging info to the memory blocks.
2395  */
2396 
2397 /* Uncomment this define to add the "serialno" field */
2398 /* #define PYMEM_DEBUG_SERIALNO */
2399 
2400 #ifdef PYMEM_DEBUG_SERIALNO
2401 static size_t serialno = 0;     /* incremented on each debug {m,re}alloc */
2402 
2403 /* serialno is always incremented via calling this routine.  The point is
2404  * to supply a single place to set a breakpoint.
2405  */
2406 static void
bumpserialno(void)2407 bumpserialno(void)
2408 {
2409     ++serialno;
2410 }
2411 #endif
2412 
2413 #define SST SIZEOF_SIZE_T
2414 
2415 #ifdef PYMEM_DEBUG_SERIALNO
2416 #  define PYMEM_DEBUG_EXTRA_BYTES 4 * SST
2417 #else
2418 #  define PYMEM_DEBUG_EXTRA_BYTES 3 * SST
2419 #endif
2420 
2421 /* Read sizeof(size_t) bytes at p as a big-endian size_t. */
2422 static size_t
read_size_t(const void * p)2423 read_size_t(const void *p)
2424 {
2425     const uint8_t *q = (const uint8_t *)p;
2426     size_t result = *q++;
2427     int i;
2428 
2429     for (i = SST; --i > 0; ++q)
2430         result = (result << 8) | *q;
2431     return result;
2432 }
2433 
2434 /* Write n as a big-endian size_t, MSB at address p, LSB at
2435  * p + sizeof(size_t) - 1.
2436  */
2437 static void
write_size_t(void * p,size_t n)2438 write_size_t(void *p, size_t n)
2439 {
2440     uint8_t *q = (uint8_t *)p + SST - 1;
2441     int i;
2442 
2443     for (i = SST; --i >= 0; --q) {
2444         *q = (uint8_t)(n & 0xff);
2445         n >>= 8;
2446     }
2447 }
2448 
2449 /* Let S = sizeof(size_t).  The debug malloc asks for 4 * S extra bytes and
2450    fills them with useful stuff, here calling the underlying malloc's result p:
2451 
2452 p[0: S]
2453     Number of bytes originally asked for.  This is a size_t, big-endian (easier
2454     to read in a memory dump).
2455 p[S]
2456     API ID.  See PEP 445.  This is a character, but seems undocumented.
2457 p[S+1: 2*S]
2458     Copies of PYMEM_FORBIDDENBYTE.  Used to catch under- writes and reads.
2459 p[2*S: 2*S+n]
2460     The requested memory, filled with copies of PYMEM_CLEANBYTE.
2461     Used to catch reference to uninitialized memory.
2462     &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
2463     handled the request itself.
2464 p[2*S+n: 2*S+n+S]
2465     Copies of PYMEM_FORBIDDENBYTE.  Used to catch over- writes and reads.
2466 p[2*S+n+S: 2*S+n+2*S]
2467     A serial number, incremented by 1 on each call to _PyMem_DebugMalloc
2468     and _PyMem_DebugRealloc.
2469     This is a big-endian size_t.
2470     If "bad memory" is detected later, the serial number gives an
2471     excellent way to set a breakpoint on the next run, to capture the
2472     instant at which this block was passed out.
2473 
2474 If PYMEM_DEBUG_SERIALNO is not defined (default), the debug malloc only asks
2475 for 3 * S extra bytes, and omits the last serialno field.
2476 */
2477 
2478 static void *
_PyMem_DebugRawAlloc(int use_calloc,void * ctx,size_t nbytes)2479 _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
2480 {
2481     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2482     uint8_t *p;           /* base address of malloc'ed pad block */
2483     uint8_t *data;        /* p + 2*SST == pointer to data bytes */
2484     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2485     size_t total;         /* nbytes + PYMEM_DEBUG_EXTRA_BYTES */
2486 
2487     if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2488         /* integer overflow: can't represent total as a Py_ssize_t */
2489         return NULL;
2490     }
2491     total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2492 
2493     /* Layout: [SSSS IFFF CCCC...CCCC FFFF NNNN]
2494                 ^--- p    ^--- data   ^--- tail
2495        S: nbytes stored as size_t
2496        I: API identifier (1 byte)
2497        F: Forbidden bytes (size_t - 1 bytes before, size_t bytes after)
2498        C: Clean bytes used later to store actual data
2499        N: Serial number stored as size_t
2500 
2501        If PYMEM_DEBUG_SERIALNO is not defined (default), the last NNNN field
2502        is omitted. */
2503 
2504     if (use_calloc) {
2505         p = (uint8_t *)api->alloc.calloc(api->alloc.ctx, 1, total);
2506     }
2507     else {
2508         p = (uint8_t *)api->alloc.malloc(api->alloc.ctx, total);
2509     }
2510     if (p == NULL) {
2511         return NULL;
2512     }
2513     data = p + 2*SST;
2514 
2515 #ifdef PYMEM_DEBUG_SERIALNO
2516     bumpserialno();
2517 #endif
2518 
2519     /* at p, write size (SST bytes), id (1 byte), pad (SST-1 bytes) */
2520     write_size_t(p, nbytes);
2521     p[SST] = (uint8_t)api->api_id;
2522     memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2523 
2524     if (nbytes > 0 && !use_calloc) {
2525         memset(data, PYMEM_CLEANBYTE, nbytes);
2526     }
2527 
2528     /* at tail, write pad (SST bytes) and serialno (SST bytes) */
2529     tail = data + nbytes;
2530     memset(tail, PYMEM_FORBIDDENBYTE, SST);
2531 #ifdef PYMEM_DEBUG_SERIALNO
2532     write_size_t(tail + SST, serialno);
2533 #endif
2534 
2535     return data;
2536 }
2537 
2538 static void *
_PyMem_DebugRawMalloc(void * ctx,size_t nbytes)2539 _PyMem_DebugRawMalloc(void *ctx, size_t nbytes)
2540 {
2541     return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2542 }
2543 
2544 static void *
_PyMem_DebugRawCalloc(void * ctx,size_t nelem,size_t elsize)2545 _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize)
2546 {
2547     size_t nbytes;
2548     assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
2549     nbytes = nelem * elsize;
2550     return _PyMem_DebugRawAlloc(1, ctx, nbytes);
2551 }
2552 
2553 
2554 /* The debug free first checks the 2*SST bytes on each end for sanity (in
2555    particular, that the FORBIDDENBYTEs with the api ID are still intact).
2556    Then fills the original bytes with PYMEM_DEADBYTE.
2557    Then calls the underlying free.
2558 */
2559 static void
_PyMem_DebugRawFree(void * ctx,void * p)2560 _PyMem_DebugRawFree(void *ctx, void *p)
2561 {
2562     /* PyMem_Free(NULL) has no effect */
2563     if (p == NULL) {
2564         return;
2565     }
2566 
2567     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2568     uint8_t *q = (uint8_t *)p - 2*SST;  /* address returned from malloc */
2569     size_t nbytes;
2570 
2571     _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2572     nbytes = read_size_t(q);
2573     nbytes += PYMEM_DEBUG_EXTRA_BYTES;
2574     memset(q, PYMEM_DEADBYTE, nbytes);
2575     api->alloc.free(api->alloc.ctx, q);
2576 }
2577 
2578 
2579 static void *
_PyMem_DebugRawRealloc(void * ctx,void * p,size_t nbytes)2580 _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
2581 {
2582     if (p == NULL) {
2583         return _PyMem_DebugRawAlloc(0, ctx, nbytes);
2584     }
2585 
2586     debug_alloc_api_t *api = (debug_alloc_api_t *)ctx;
2587     uint8_t *head;        /* base address of malloc'ed pad block */
2588     uint8_t *data;        /* pointer to data bytes */
2589     uint8_t *r;
2590     uint8_t *tail;        /* data + nbytes == pointer to tail pad bytes */
2591     size_t total;         /* 2 * SST + nbytes + 2 * SST */
2592     size_t original_nbytes;
2593 #define ERASED_SIZE 64
2594     uint8_t save[2*ERASED_SIZE];  /* A copy of erased bytes. */
2595 
2596     _PyMem_DebugCheckAddress(__func__, api->api_id, p);
2597 
2598     data = (uint8_t *)p;
2599     head = data - 2*SST;
2600     original_nbytes = read_size_t(head);
2601     if (nbytes > (size_t)PY_SSIZE_T_MAX - PYMEM_DEBUG_EXTRA_BYTES) {
2602         /* integer overflow: can't represent total as a Py_ssize_t */
2603         return NULL;
2604     }
2605     total = nbytes + PYMEM_DEBUG_EXTRA_BYTES;
2606 
2607     tail = data + original_nbytes;
2608 #ifdef PYMEM_DEBUG_SERIALNO
2609     size_t block_serialno = read_size_t(tail + SST);
2610 #endif
2611     /* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
2612        ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
2613      */
2614     if (original_nbytes <= sizeof(save)) {
2615         memcpy(save, data, original_nbytes);
2616         memset(data - 2 * SST, PYMEM_DEADBYTE,
2617                original_nbytes + PYMEM_DEBUG_EXTRA_BYTES);
2618     }
2619     else {
2620         memcpy(save, data, ERASED_SIZE);
2621         memset(head, PYMEM_DEADBYTE, ERASED_SIZE + 2 * SST);
2622         memcpy(&save[ERASED_SIZE], tail - ERASED_SIZE, ERASED_SIZE);
2623         memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
2624                ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
2625     }
2626 
2627     /* Resize and add decorations. */
2628     r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
2629     if (r == NULL) {
2630         /* if realloc() failed: rewrite header and footer which have
2631            just been erased */
2632         nbytes = original_nbytes;
2633     }
2634     else {
2635         head = r;
2636 #ifdef PYMEM_DEBUG_SERIALNO
2637         bumpserialno();
2638         block_serialno = serialno;
2639 #endif
2640     }
2641     data = head + 2*SST;
2642 
2643     write_size_t(head, nbytes);
2644     head[SST] = (uint8_t)api->api_id;
2645     memset(head + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
2646 
2647     tail = data + nbytes;
2648     memset(tail, PYMEM_FORBIDDENBYTE, SST);
2649 #ifdef PYMEM_DEBUG_SERIALNO
2650     write_size_t(tail + SST, block_serialno);
2651 #endif
2652 
2653     /* Restore saved bytes. */
2654     if (original_nbytes <= sizeof(save)) {
2655         memcpy(data, save, Py_MIN(nbytes, original_nbytes));
2656     }
2657     else {
2658         size_t i = original_nbytes - ERASED_SIZE;
2659         memcpy(data, save, Py_MIN(nbytes, ERASED_SIZE));
2660         if (nbytes > i) {
2661             memcpy(data + i, &save[ERASED_SIZE],
2662                    Py_MIN(nbytes - i, ERASED_SIZE));
2663         }
2664     }
2665 
2666     if (r == NULL) {
2667         return NULL;
2668     }
2669 
2670     if (nbytes > original_nbytes) {
2671         /* growing: mark new extra memory clean */
2672         memset(data + original_nbytes, PYMEM_CLEANBYTE,
2673                nbytes - original_nbytes);
2674     }
2675 
2676     return data;
2677 }
2678 
2679 static inline void
_PyMem_DebugCheckGIL(const char * func)2680 _PyMem_DebugCheckGIL(const char *func)
2681 {
2682     if (!PyGILState_Check()) {
2683         _Py_FatalErrorFunc(func,
2684                            "Python memory allocator called "
2685                            "without holding the GIL");
2686     }
2687 }
2688 
2689 static void *
_PyMem_DebugMalloc(void * ctx,size_t nbytes)2690 _PyMem_DebugMalloc(void *ctx, size_t nbytes)
2691 {
2692     _PyMem_DebugCheckGIL(__func__);
2693     return _PyMem_DebugRawMalloc(ctx, nbytes);
2694 }
2695 
2696 static void *
_PyMem_DebugCalloc(void * ctx,size_t nelem,size_t elsize)2697 _PyMem_DebugCalloc(void *ctx, size_t nelem, size_t elsize)
2698 {
2699     _PyMem_DebugCheckGIL(__func__);
2700     return _PyMem_DebugRawCalloc(ctx, nelem, elsize);
2701 }
2702 
2703 
2704 static void
_PyMem_DebugFree(void * ctx,void * ptr)2705 _PyMem_DebugFree(void *ctx, void *ptr)
2706 {
2707     _PyMem_DebugCheckGIL(__func__);
2708     _PyMem_DebugRawFree(ctx, ptr);
2709 }
2710 
2711 
2712 static void *
_PyMem_DebugRealloc(void * ctx,void * ptr,size_t nbytes)2713 _PyMem_DebugRealloc(void *ctx, void *ptr, size_t nbytes)
2714 {
2715     _PyMem_DebugCheckGIL(__func__);
2716     return _PyMem_DebugRawRealloc(ctx, ptr, nbytes);
2717 }
2718 
2719 /* Check the forbidden bytes on both ends of the memory allocated for p.
2720  * If anything is wrong, print info to stderr via _PyObject_DebugDumpAddress,
2721  * and call Py_FatalError to kill the program.
2722  * The API id, is also checked.
2723  */
2724 static void
_PyMem_DebugCheckAddress(const char * func,char api,const void * p)2725 _PyMem_DebugCheckAddress(const char *func, char api, const void *p)
2726 {
2727     assert(p != NULL);
2728 
2729     const uint8_t *q = (const uint8_t *)p;
2730     size_t nbytes;
2731     const uint8_t *tail;
2732     int i;
2733     char id;
2734 
2735     /* Check the API id */
2736     id = (char)q[-SST];
2737     if (id != api) {
2738         _PyObject_DebugDumpAddress(p);
2739         _Py_FatalErrorFormat(func,
2740                              "bad ID: Allocated using API '%c', "
2741                              "verified using API '%c'",
2742                              id, api);
2743     }
2744 
2745     /* Check the stuff at the start of p first:  if there's underwrite
2746      * corruption, the number-of-bytes field may be nuts, and checking
2747      * the tail could lead to a segfault then.
2748      */
2749     for (i = SST-1; i >= 1; --i) {
2750         if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2751             _PyObject_DebugDumpAddress(p);
2752             _Py_FatalErrorFunc(func, "bad leading pad byte");
2753         }
2754     }
2755 
2756     nbytes = read_size_t(q - 2*SST);
2757     tail = q + nbytes;
2758     for (i = 0; i < SST; ++i) {
2759         if (tail[i] != PYMEM_FORBIDDENBYTE) {
2760             _PyObject_DebugDumpAddress(p);
2761             _Py_FatalErrorFunc(func, "bad trailing pad byte");
2762         }
2763     }
2764 }
2765 
2766 /* Display info to stderr about the memory block at p. */
2767 static void
_PyObject_DebugDumpAddress(const void * p)2768 _PyObject_DebugDumpAddress(const void *p)
2769 {
2770     const uint8_t *q = (const uint8_t *)p;
2771     const uint8_t *tail;
2772     size_t nbytes;
2773     int i;
2774     int ok;
2775     char id;
2776 
2777     fprintf(stderr, "Debug memory block at address p=%p:", p);
2778     if (p == NULL) {
2779         fprintf(stderr, "\n");
2780         return;
2781     }
2782     id = (char)q[-SST];
2783     fprintf(stderr, " API '%c'\n", id);
2784 
2785     nbytes = read_size_t(q - 2*SST);
2786     fprintf(stderr, "    %zu bytes originally requested\n", nbytes);
2787 
2788     /* In case this is nuts, check the leading pad bytes first. */
2789     fprintf(stderr, "    The %d pad bytes at p-%d are ", SST-1, SST-1);
2790     ok = 1;
2791     for (i = 1; i <= SST-1; ++i) {
2792         if (*(q-i) != PYMEM_FORBIDDENBYTE) {
2793             ok = 0;
2794             break;
2795         }
2796     }
2797     if (ok)
2798         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2799     else {
2800         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2801             PYMEM_FORBIDDENBYTE);
2802         for (i = SST-1; i >= 1; --i) {
2803             const uint8_t byte = *(q-i);
2804             fprintf(stderr, "        at p-%d: 0x%02x", i, byte);
2805             if (byte != PYMEM_FORBIDDENBYTE)
2806                 fputs(" *** OUCH", stderr);
2807             fputc('\n', stderr);
2808         }
2809 
2810         fputs("    Because memory is corrupted at the start, the "
2811               "count of bytes requested\n"
2812               "       may be bogus, and checking the trailing pad "
2813               "bytes may segfault.\n", stderr);
2814     }
2815 
2816     tail = q + nbytes;
2817     fprintf(stderr, "    The %d pad bytes at tail=%p are ", SST, (void *)tail);
2818     ok = 1;
2819     for (i = 0; i < SST; ++i) {
2820         if (tail[i] != PYMEM_FORBIDDENBYTE) {
2821             ok = 0;
2822             break;
2823         }
2824     }
2825     if (ok)
2826         fputs("FORBIDDENBYTE, as expected.\n", stderr);
2827     else {
2828         fprintf(stderr, "not all FORBIDDENBYTE (0x%02x):\n",
2829                 PYMEM_FORBIDDENBYTE);
2830         for (i = 0; i < SST; ++i) {
2831             const uint8_t byte = tail[i];
2832             fprintf(stderr, "        at tail+%d: 0x%02x",
2833                     i, byte);
2834             if (byte != PYMEM_FORBIDDENBYTE)
2835                 fputs(" *** OUCH", stderr);
2836             fputc('\n', stderr);
2837         }
2838     }
2839 
2840 #ifdef PYMEM_DEBUG_SERIALNO
2841     size_t serial = read_size_t(tail + SST);
2842     fprintf(stderr,
2843             "    The block was made by call #%zu to debug malloc/realloc.\n",
2844             serial);
2845 #endif
2846 
2847     if (nbytes > 0) {
2848         i = 0;
2849         fputs("    Data at p:", stderr);
2850         /* print up to 8 bytes at the start */
2851         while (q < tail && i < 8) {
2852             fprintf(stderr, " %02x", *q);
2853             ++i;
2854             ++q;
2855         }
2856         /* and up to 8 at the end */
2857         if (q < tail) {
2858             if (tail - q > 8) {
2859                 fputs(" ...", stderr);
2860                 q = tail - 8;
2861             }
2862             while (q < tail) {
2863                 fprintf(stderr, " %02x", *q);
2864                 ++q;
2865             }
2866         }
2867         fputc('\n', stderr);
2868     }
2869     fputc('\n', stderr);
2870 
2871     fflush(stderr);
2872     _PyMem_DumpTraceback(fileno(stderr), p);
2873 }
2874 
2875 
2876 static size_t
printone(FILE * out,const char * msg,size_t value)2877 printone(FILE *out, const char* msg, size_t value)
2878 {
2879     int i, k;
2880     char buf[100];
2881     size_t origvalue = value;
2882 
2883     fputs(msg, out);
2884     for (i = (int)strlen(msg); i < 35; ++i)
2885         fputc(' ', out);
2886     fputc('=', out);
2887 
2888     /* Write the value with commas. */
2889     i = 22;
2890     buf[i--] = '\0';
2891     buf[i--] = '\n';
2892     k = 3;
2893     do {
2894         size_t nextvalue = value / 10;
2895         unsigned int digit = (unsigned int)(value - nextvalue * 10);
2896         value = nextvalue;
2897         buf[i--] = (char)(digit + '0');
2898         --k;
2899         if (k == 0 && value && i >= 0) {
2900             k = 3;
2901             buf[i--] = ',';
2902         }
2903     } while (value && i >= 0);
2904 
2905     while (i >= 0)
2906         buf[i--] = ' ';
2907     fputs(buf, out);
2908 
2909     return origvalue;
2910 }
2911 
2912 void
_PyDebugAllocatorStats(FILE * out,const char * block_name,int num_blocks,size_t sizeof_block)2913 _PyDebugAllocatorStats(FILE *out,
2914                        const char *block_name, int num_blocks, size_t sizeof_block)
2915 {
2916     char buf1[128];
2917     char buf2[128];
2918     PyOS_snprintf(buf1, sizeof(buf1),
2919                   "%d %ss * %zd bytes each",
2920                   num_blocks, block_name, sizeof_block);
2921     PyOS_snprintf(buf2, sizeof(buf2),
2922                   "%48s ", buf1);
2923     (void)printone(out, buf2, num_blocks * sizeof_block);
2924 }
2925 
2926 
2927 #ifdef WITH_PYMALLOC
2928 
2929 #ifdef Py_DEBUG
2930 /* Is target in the list?  The list is traversed via the nextpool pointers.
2931  * The list may be NULL-terminated, or circular.  Return 1 if target is in
2932  * list, else 0.
2933  */
2934 static int
pool_is_in_list(const poolp target,poolp list)2935 pool_is_in_list(const poolp target, poolp list)
2936 {
2937     poolp origlist = list;
2938     assert(target != NULL);
2939     if (list == NULL)
2940         return 0;
2941     do {
2942         if (target == list)
2943             return 1;
2944         list = list->nextpool;
2945     } while (list != NULL && list != origlist);
2946     return 0;
2947 }
2948 #endif
2949 
2950 /* Print summary info to "out" about the state of pymalloc's structures.
2951  * In Py_DEBUG mode, also perform some expensive internal consistency
2952  * checks.
2953  *
2954  * Return 0 if the memory debug hooks are not installed or no statistics was
2955  * written into out, return 1 otherwise.
2956  */
2957 int
_PyObject_DebugMallocStats(FILE * out)2958 _PyObject_DebugMallocStats(FILE *out)
2959 {
2960     if (!_PyMem_PymallocEnabled()) {
2961         return 0;
2962     }
2963 
2964     uint i;
2965     const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
2966     /* # of pools, allocated blocks, and free blocks per class index */
2967     size_t numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2968     size_t numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2969     size_t numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
2970     /* total # of allocated bytes in used and full pools */
2971     size_t allocated_bytes = 0;
2972     /* total # of available bytes in used pools */
2973     size_t available_bytes = 0;
2974     /* # of free pools + pools not yet carved out of current arena */
2975     uint numfreepools = 0;
2976     /* # of bytes for arena alignment padding */
2977     size_t arena_alignment = 0;
2978     /* # of bytes in used and full pools used for pool_headers */
2979     size_t pool_header_bytes = 0;
2980     /* # of bytes in used and full pools wasted due to quantization,
2981      * i.e. the necessarily leftover space at the ends of used and
2982      * full pools.
2983      */
2984     size_t quantization = 0;
2985     /* # of arenas actually allocated. */
2986     size_t narenas = 0;
2987     /* running total -- should equal narenas * ARENA_SIZE */
2988     size_t total;
2989     char buf[128];
2990 
2991     fprintf(out, "Small block threshold = %d, in %u size classes.\n",
2992             SMALL_REQUEST_THRESHOLD, numclasses);
2993 
2994     for (i = 0; i < numclasses; ++i)
2995         numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
2996 
2997     /* Because full pools aren't linked to from anything, it's easiest
2998      * to march over all the arenas.  If we're lucky, most of the memory
2999      * will be living in full pools -- would be a shame to miss them.
3000      */
3001     for (i = 0; i < maxarenas; ++i) {
3002         uintptr_t base = arenas[i].address;
3003 
3004         /* Skip arenas which are not allocated. */
3005         if (arenas[i].address == (uintptr_t)NULL)
3006             continue;
3007         narenas += 1;
3008 
3009         numfreepools += arenas[i].nfreepools;
3010 
3011         /* round up to pool alignment */
3012         if (base & (uintptr_t)POOL_SIZE_MASK) {
3013             arena_alignment += POOL_SIZE;
3014             base &= ~(uintptr_t)POOL_SIZE_MASK;
3015             base += POOL_SIZE;
3016         }
3017 
3018         /* visit every pool in the arena */
3019         assert(base <= (uintptr_t) arenas[i].pool_address);
3020         for (; base < (uintptr_t) arenas[i].pool_address; base += POOL_SIZE) {
3021             poolp p = (poolp)base;
3022             const uint sz = p->szidx;
3023             uint freeblocks;
3024 
3025             if (p->ref.count == 0) {
3026                 /* currently unused */
3027 #ifdef Py_DEBUG
3028                 assert(pool_is_in_list(p, arenas[i].freepools));
3029 #endif
3030                 continue;
3031             }
3032             ++numpools[sz];
3033             numblocks[sz] += p->ref.count;
3034             freeblocks = NUMBLOCKS(sz) - p->ref.count;
3035             numfreeblocks[sz] += freeblocks;
3036 #ifdef Py_DEBUG
3037             if (freeblocks > 0)
3038                 assert(pool_is_in_list(p, usedpools[sz + sz]));
3039 #endif
3040         }
3041     }
3042     assert(narenas == narenas_currently_allocated);
3043 
3044     fputc('\n', out);
3045     fputs("class   size   num pools   blocks in use  avail blocks\n"
3046           "-----   ----   ---------   -------------  ------------\n",
3047           out);
3048 
3049     for (i = 0; i < numclasses; ++i) {
3050         size_t p = numpools[i];
3051         size_t b = numblocks[i];
3052         size_t f = numfreeblocks[i];
3053         uint size = INDEX2SIZE(i);
3054         if (p == 0) {
3055             assert(b == 0 && f == 0);
3056             continue;
3057         }
3058         fprintf(out, "%5u %6u %11zu %15zu %13zu\n",
3059                 i, size, p, b, f);
3060         allocated_bytes += b * size;
3061         available_bytes += f * size;
3062         pool_header_bytes += p * POOL_OVERHEAD;
3063         quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
3064     }
3065     fputc('\n', out);
3066 #ifdef PYMEM_DEBUG_SERIALNO
3067     if (_PyMem_DebugEnabled()) {
3068         (void)printone(out, "# times object malloc called", serialno);
3069     }
3070 #endif
3071     (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
3072     (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
3073     (void)printone(out, "# arenas highwater mark", narenas_highwater);
3074     (void)printone(out, "# arenas allocated current", narenas);
3075 
3076     PyOS_snprintf(buf, sizeof(buf),
3077                   "%zu arenas * %d bytes/arena",
3078                   narenas, ARENA_SIZE);
3079     (void)printone(out, buf, narenas * ARENA_SIZE);
3080 
3081     fputc('\n', out);
3082 
3083     /* Account for what all of those arena bytes are being used for. */
3084     total = printone(out, "# bytes in allocated blocks", allocated_bytes);
3085     total += printone(out, "# bytes in available blocks", available_bytes);
3086 
3087     PyOS_snprintf(buf, sizeof(buf),
3088         "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
3089     total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
3090 
3091     total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
3092     total += printone(out, "# bytes lost to quantization", quantization);
3093     total += printone(out, "# bytes lost to arena alignment", arena_alignment);
3094     (void)printone(out, "Total", total);
3095     assert(narenas * ARENA_SIZE == total);
3096 
3097 #if WITH_PYMALLOC_RADIX_TREE
3098     fputs("\narena map counts\n", out);
3099 #ifdef USE_INTERIOR_NODES
3100     (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
3101     (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
3102     fputc('\n', out);
3103 #endif
3104     total = printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
3105 #ifdef USE_INTERIOR_NODES
3106     total += printone(out, "# bytes lost to arena map mid",
3107                       sizeof(arena_map_mid_t) * arena_map_mid_count);
3108     total += printone(out, "# bytes lost to arena map bot",
3109                       sizeof(arena_map_bot_t) * arena_map_bot_count);
3110     (void)printone(out, "Total", total);
3111 #endif
3112 #endif
3113 
3114     return 1;
3115 }
3116 
3117 #endif /* #ifdef WITH_PYMALLOC */
3118