1 /*
2 
3   Reference Cycle Garbage Collection
4   ==================================
5 
6   Neil Schemenauer <[email protected]>
7 
8   Based on a post on the python-dev list.  Ideas from Guido van Rossum,
9   Eric Tiedemann, and various others.
10 
11   http://www.arctrix.com/nas/python/gc/
12 
13   The following mailing list threads provide a historical perspective on
14   the design of this module.  Note that a fair amount of refinement has
15   occurred since those discussions.
16 
17   http://mail.python.org/pipermail/python-dev/2000-March/002385.html
18   http://mail.python.org/pipermail/python-dev/2000-March/002434.html
19   http://mail.python.org/pipermail/python-dev/2000-March/002497.html
20 
21   For a highlevel view of the collection process, read the collect
22   function.
23 
24 */
25 
26 #include "Python.h"
27 #include "pycore_context.h"
28 #include "pycore_initconfig.h"
29 #include "pycore_interp.h"      // PyInterpreterState.gc
30 #include "pycore_object.h"
31 #include "pycore_pyerrors.h"
32 #include "pycore_pystate.h"     // _PyThreadState_GET()
33 #include "pydtrace.h"
34 
35 typedef struct _gc_runtime_state GCState;
36 
37 /*[clinic input]
38 module gc
39 [clinic start generated code]*/
40 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/
41 
42 
43 #ifdef Py_DEBUG
44 #  define GC_DEBUG
45 #endif
46 
47 #define GC_NEXT _PyGCHead_NEXT
48 #define GC_PREV _PyGCHead_PREV
49 
50 // update_refs() set this bit for all objects in current generation.
51 // subtract_refs() and move_unreachable() uses this to distinguish
52 // visited object is in GCing or not.
53 //
54 // move_unreachable() removes this flag from reachable objects.
55 // Only unreachable objects have this flag.
56 //
57 // No objects in interpreter have this flag after GC ends.
58 #define PREV_MASK_COLLECTING   _PyGC_PREV_MASK_COLLECTING
59 
60 // Lowest bit of _gc_next is used for UNREACHABLE flag.
61 //
62 // This flag represents the object is in unreachable list in move_unreachable()
63 //
64 // Although this flag is used only in move_unreachable(), move_unreachable()
65 // doesn't clear this flag to skip unnecessary iteration.
66 // move_legacy_finalizers() removes this flag instead.
67 // Between them, unreachable list is not normal list and we can not use
68 // most gc_list_* functions for it.
69 #define NEXT_MASK_UNREACHABLE  (1)
70 
71 /* Get an object's GC head */
72 #define AS_GC(o) ((PyGC_Head *)(((char *)(o))-sizeof(PyGC_Head)))
73 
74 /* Get the object given the GC head */
75 #define FROM_GC(g) ((PyObject *)(((char *)(g))+sizeof(PyGC_Head)))
76 
77 static inline int
gc_is_collecting(PyGC_Head * g)78 gc_is_collecting(PyGC_Head *g)
79 {
80     return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
81 }
82 
83 static inline void
gc_clear_collecting(PyGC_Head * g)84 gc_clear_collecting(PyGC_Head *g)
85 {
86     g->_gc_prev &= ~PREV_MASK_COLLECTING;
87 }
88 
89 static inline Py_ssize_t
gc_get_refs(PyGC_Head * g)90 gc_get_refs(PyGC_Head *g)
91 {
92     return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
93 }
94 
95 static inline void
gc_set_refs(PyGC_Head * g,Py_ssize_t refs)96 gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
97 {
98     g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
99         | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
100 }
101 
102 static inline void
gc_reset_refs(PyGC_Head * g,Py_ssize_t refs)103 gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
104 {
105     g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
106         | PREV_MASK_COLLECTING
107         | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
108 }
109 
110 static inline void
gc_decref(PyGC_Head * g)111 gc_decref(PyGC_Head *g)
112 {
113     _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
114                               gc_get_refs(g) > 0,
115                               "refcount is too small");
116     g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
117 }
118 
119 /* set for debugging information */
120 #define DEBUG_STATS             (1<<0) /* print collection statistics */
121 #define DEBUG_COLLECTABLE       (1<<1) /* print collectable objects */
122 #define DEBUG_UNCOLLECTABLE     (1<<2) /* print uncollectable objects */
123 #define DEBUG_SAVEALL           (1<<5) /* save all garbage in gc.garbage */
124 #define DEBUG_LEAK              DEBUG_COLLECTABLE | \
125                 DEBUG_UNCOLLECTABLE | \
126                 DEBUG_SAVEALL
127 
128 #define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
129 
130 
131 static GCState *
get_gc_state(void)132 get_gc_state(void)
133 {
134     PyInterpreterState *interp = _PyInterpreterState_GET();
135     return &interp->gc;
136 }
137 
138 
139 void
_PyGC_InitState(GCState * gcstate)140 _PyGC_InitState(GCState *gcstate)
141 {
142 #define INIT_HEAD(GEN) \
143     do { \
144         GEN.head._gc_next = (uintptr_t)&GEN.head; \
145         GEN.head._gc_prev = (uintptr_t)&GEN.head; \
146     } while (0)
147 
148     for (int i = 0; i < NUM_GENERATIONS; i++) {
149         assert(gcstate->generations[i].count == 0);
150         INIT_HEAD(gcstate->generations[i]);
151     };
152     gcstate->generation0 = GEN_HEAD(gcstate, 0);
153     INIT_HEAD(gcstate->permanent_generation);
154 
155 #undef INIT_HEAD
156 }
157 
158 
159 PyStatus
_PyGC_Init(PyInterpreterState * interp)160 _PyGC_Init(PyInterpreterState *interp)
161 {
162     GCState *gcstate = &interp->gc;
163 
164     gcstate->garbage = PyList_New(0);
165     if (gcstate->garbage == NULL) {
166         return _PyStatus_NO_MEMORY();
167     }
168 
169     gcstate->callbacks = PyList_New(0);
170     if (gcstate->callbacks == NULL) {
171         return _PyStatus_NO_MEMORY();
172     }
173 
174     return _PyStatus_OK();
175 }
176 
177 
178 /*
179 _gc_prev values
180 ---------------
181 
182 Between collections, _gc_prev is used for doubly linked list.
183 
184 Lowest two bits of _gc_prev are used for flags.
185 PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
186 or _PyObject_GC_UNTRACK() is called.
187 
188 During a collection, _gc_prev is temporary used for gc_refs, and the gc list
189 is singly linked until _gc_prev is restored.
190 
191 gc_refs
192     At the start of a collection, update_refs() copies the true refcount
193     to gc_refs, for each object in the generation being collected.
194     subtract_refs() then adjusts gc_refs so that it equals the number of
195     times an object is referenced directly from outside the generation
196     being collected.
197 
198 PREV_MASK_COLLECTING
199     Objects in generation being collected are marked PREV_MASK_COLLECTING in
200     update_refs().
201 
202 
203 _gc_next values
204 ---------------
205 
206 _gc_next takes these values:
207 
208 0
209     The object is not tracked
210 
211 != 0
212     Pointer to the next object in the GC list.
213     Additionally, lowest bit is used temporary for
214     NEXT_MASK_UNREACHABLE flag described below.
215 
216 NEXT_MASK_UNREACHABLE
217     move_unreachable() then moves objects not reachable (whether directly or
218     indirectly) from outside the generation into an "unreachable" set and
219     set this flag.
220 
221     Objects that are found to be reachable have gc_refs set to 1.
222     When this flag is set for the reachable object, the object must be in
223     "unreachable" set.
224     The flag is unset and the object is moved back to "reachable" set.
225 
226     move_legacy_finalizers() will remove this flag from "unreachable" set.
227 */
228 
229 /*** list functions ***/
230 
231 static inline void
gc_list_init(PyGC_Head * list)232 gc_list_init(PyGC_Head *list)
233 {
234     // List header must not have flags.
235     // We can assign pointer by simple cast.
236     list->_gc_prev = (uintptr_t)list;
237     list->_gc_next = (uintptr_t)list;
238 }
239 
240 static inline int
gc_list_is_empty(PyGC_Head * list)241 gc_list_is_empty(PyGC_Head *list)
242 {
243     return (list->_gc_next == (uintptr_t)list);
244 }
245 
246 /* Append `node` to `list`. */
247 static inline void
gc_list_append(PyGC_Head * node,PyGC_Head * list)248 gc_list_append(PyGC_Head *node, PyGC_Head *list)
249 {
250     PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
251 
252     // last <-> node
253     _PyGCHead_SET_PREV(node, last);
254     _PyGCHead_SET_NEXT(last, node);
255 
256     // node <-> list
257     _PyGCHead_SET_NEXT(node, list);
258     list->_gc_prev = (uintptr_t)node;
259 }
260 
261 /* Remove `node` from the gc list it's currently in. */
262 static inline void
gc_list_remove(PyGC_Head * node)263 gc_list_remove(PyGC_Head *node)
264 {
265     PyGC_Head *prev = GC_PREV(node);
266     PyGC_Head *next = GC_NEXT(node);
267 
268     _PyGCHead_SET_NEXT(prev, next);
269     _PyGCHead_SET_PREV(next, prev);
270 
271     node->_gc_next = 0; /* object is not currently tracked */
272 }
273 
274 /* Move `node` from the gc list it's currently in (which is not explicitly
275  * named here) to the end of `list`.  This is semantically the same as
276  * gc_list_remove(node) followed by gc_list_append(node, list).
277  */
278 static void
gc_list_move(PyGC_Head * node,PyGC_Head * list)279 gc_list_move(PyGC_Head *node, PyGC_Head *list)
280 {
281     /* Unlink from current list. */
282     PyGC_Head *from_prev = GC_PREV(node);
283     PyGC_Head *from_next = GC_NEXT(node);
284     _PyGCHead_SET_NEXT(from_prev, from_next);
285     _PyGCHead_SET_PREV(from_next, from_prev);
286 
287     /* Relink at end of new list. */
288     // list must not have flags.  So we can skip macros.
289     PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
290     _PyGCHead_SET_PREV(node, to_prev);
291     _PyGCHead_SET_NEXT(to_prev, node);
292     list->_gc_prev = (uintptr_t)node;
293     _PyGCHead_SET_NEXT(node, list);
294 }
295 
296 /* append list `from` onto list `to`; `from` becomes an empty list */
297 static void
gc_list_merge(PyGC_Head * from,PyGC_Head * to)298 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
299 {
300     assert(from != to);
301     if (!gc_list_is_empty(from)) {
302         PyGC_Head *to_tail = GC_PREV(to);
303         PyGC_Head *from_head = GC_NEXT(from);
304         PyGC_Head *from_tail = GC_PREV(from);
305         assert(from_head != from);
306         assert(from_tail != from);
307 
308         _PyGCHead_SET_NEXT(to_tail, from_head);
309         _PyGCHead_SET_PREV(from_head, to_tail);
310 
311         _PyGCHead_SET_NEXT(from_tail, to);
312         _PyGCHead_SET_PREV(to, from_tail);
313     }
314     gc_list_init(from);
315 }
316 
317 static Py_ssize_t
gc_list_size(PyGC_Head * list)318 gc_list_size(PyGC_Head *list)
319 {
320     PyGC_Head *gc;
321     Py_ssize_t n = 0;
322     for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
323         n++;
324     }
325     return n;
326 }
327 
328 /* Walk the list and mark all objects as non-collecting */
329 static inline void
gc_list_clear_collecting(PyGC_Head * collectable)330 gc_list_clear_collecting(PyGC_Head *collectable)
331 {
332     PyGC_Head *gc;
333     for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
334         gc_clear_collecting(gc);
335     }
336 }
337 
338 /* Append objects in a GC list to a Python list.
339  * Return 0 if all OK, < 0 if error (out of memory for list)
340  */
341 static int
append_objects(PyObject * py_list,PyGC_Head * gc_list)342 append_objects(PyObject *py_list, PyGC_Head *gc_list)
343 {
344     PyGC_Head *gc;
345     for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
346         PyObject *op = FROM_GC(gc);
347         if (op != py_list) {
348             if (PyList_Append(py_list, op)) {
349                 return -1; /* exception */
350             }
351         }
352     }
353     return 0;
354 }
355 
356 // Constants for validate_list's flags argument.
357 enum flagstates {collecting_clear_unreachable_clear,
358                  collecting_clear_unreachable_set,
359                  collecting_set_unreachable_clear,
360                  collecting_set_unreachable_set};
361 
362 #ifdef GC_DEBUG
363 // validate_list checks list consistency.  And it works as document
364 // describing when flags are expected to be set / unset.
365 // `head` must be a doubly-linked gc list, although it's fine (expected!) if
366 // the prev and next pointers are "polluted" with flags.
367 // What's checked:
368 // - The `head` pointers are not polluted.
369 // - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
370 //   `set or clear, as specified by the 'flags' argument.
371 // - The prev and next pointers are mutually consistent.
372 static void
validate_list(PyGC_Head * head,enum flagstates flags)373 validate_list(PyGC_Head *head, enum flagstates flags)
374 {
375     assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
376     assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
377     uintptr_t prev_value = 0, next_value = 0;
378     switch (flags) {
379         case collecting_clear_unreachable_clear:
380             break;
381         case collecting_set_unreachable_clear:
382             prev_value = PREV_MASK_COLLECTING;
383             break;
384         case collecting_clear_unreachable_set:
385             next_value = NEXT_MASK_UNREACHABLE;
386             break;
387         case collecting_set_unreachable_set:
388             prev_value = PREV_MASK_COLLECTING;
389             next_value = NEXT_MASK_UNREACHABLE;
390             break;
391         default:
392             assert(! "bad internal flags argument");
393     }
394     PyGC_Head *prev = head;
395     PyGC_Head *gc = GC_NEXT(head);
396     while (gc != head) {
397         PyGC_Head *trueprev = GC_PREV(gc);
398         PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next  & ~NEXT_MASK_UNREACHABLE);
399         assert(truenext != NULL);
400         assert(trueprev == prev);
401         assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
402         assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
403         prev = gc;
404         gc = truenext;
405     }
406     assert(prev == GC_PREV(head));
407 }
408 #else
409 #define validate_list(x, y) do{}while(0)
410 #endif
411 
412 /*** end of list stuff ***/
413 
414 
415 /* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
416  * PREV_MASK_COLLECTING bit is set for all objects in containers.
417  */
418 static void
update_refs(PyGC_Head * containers)419 update_refs(PyGC_Head *containers)
420 {
421     PyGC_Head *gc = GC_NEXT(containers);
422     for (; gc != containers; gc = GC_NEXT(gc)) {
423         gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc)));
424         /* Python's cyclic gc should never see an incoming refcount
425          * of 0:  if something decref'ed to 0, it should have been
426          * deallocated immediately at that time.
427          * Possible cause (if the assert triggers):  a tp_dealloc
428          * routine left a gc-aware object tracked during its teardown
429          * phase, and did something-- or allowed something to happen --
430          * that called back into Python.  gc can trigger then, and may
431          * see the still-tracked dying object.  Before this assert
432          * was added, such mistakes went on to allow gc to try to
433          * delete the object again.  In a debug build, that caused
434          * a mysterious segfault, when _Py_ForgetReference tried
435          * to remove the object from the doubly-linked list of all
436          * objects a second time.  In a release build, an actual
437          * double deallocation occurred, which leads to corruption
438          * of the allocator's internal bookkeeping pointers.  That's
439          * so serious that maybe this should be a release-build
440          * check instead of an assert?
441          */
442         _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0);
443     }
444 }
445 
446 /* A traversal callback for subtract_refs. */
447 static int
visit_decref(PyObject * op,void * parent)448 visit_decref(PyObject *op, void *parent)
449 {
450     _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
451 
452     if (_PyObject_IS_GC(op)) {
453         PyGC_Head *gc = AS_GC(op);
454         /* We're only interested in gc_refs for objects in the
455          * generation being collected, which can be recognized
456          * because only they have positive gc_refs.
457          */
458         if (gc_is_collecting(gc)) {
459             gc_decref(gc);
460         }
461     }
462     return 0;
463 }
464 
465 /* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
466  * for all objects in containers, and is GC_REACHABLE for all tracked gc
467  * objects not in containers.  The ones with gc_refs > 0 are directly
468  * reachable from outside containers, and so can't be collected.
469  */
470 static void
subtract_refs(PyGC_Head * containers)471 subtract_refs(PyGC_Head *containers)
472 {
473     traverseproc traverse;
474     PyGC_Head *gc = GC_NEXT(containers);
475     for (; gc != containers; gc = GC_NEXT(gc)) {
476         PyObject *op = FROM_GC(gc);
477         traverse = Py_TYPE(op)->tp_traverse;
478         (void) traverse(op,
479                         (visitproc)visit_decref,
480                         op);
481     }
482 }
483 
484 /* A traversal callback for move_unreachable. */
485 static int
visit_reachable(PyObject * op,PyGC_Head * reachable)486 visit_reachable(PyObject *op, PyGC_Head *reachable)
487 {
488     if (!_PyObject_IS_GC(op)) {
489         return 0;
490     }
491 
492     PyGC_Head *gc = AS_GC(op);
493     const Py_ssize_t gc_refs = gc_get_refs(gc);
494 
495     // Ignore objects in other generation.
496     // This also skips objects "to the left" of the current position in
497     // move_unreachable's scan of the 'young' list - they've already been
498     // traversed, and no longer have the PREV_MASK_COLLECTING flag.
499     if (! gc_is_collecting(gc)) {
500         return 0;
501     }
502     // It would be a logic error elsewhere if the collecting flag were set on
503     // an untracked object.
504     assert(gc->_gc_next != 0);
505 
506     if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
507         /* This had gc_refs = 0 when move_unreachable got
508          * to it, but turns out it's reachable after all.
509          * Move it back to move_unreachable's 'young' list,
510          * and move_unreachable will eventually get to it
511          * again.
512          */
513         // Manually unlink gc from unreachable list because the list functions
514         // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
515         PyGC_Head *prev = GC_PREV(gc);
516         PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
517         _PyObject_ASSERT(FROM_GC(prev),
518                          prev->_gc_next & NEXT_MASK_UNREACHABLE);
519         _PyObject_ASSERT(FROM_GC(next),
520                          next->_gc_next & NEXT_MASK_UNREACHABLE);
521         prev->_gc_next = gc->_gc_next;  // copy NEXT_MASK_UNREACHABLE
522         _PyGCHead_SET_PREV(next, prev);
523 
524         gc_list_append(gc, reachable);
525         gc_set_refs(gc, 1);
526     }
527     else if (gc_refs == 0) {
528         /* This is in move_unreachable's 'young' list, but
529          * the traversal hasn't yet gotten to it.  All
530          * we need to do is tell move_unreachable that it's
531          * reachable.
532          */
533         gc_set_refs(gc, 1);
534     }
535     /* Else there's nothing to do.
536      * If gc_refs > 0, it must be in move_unreachable's 'young'
537      * list, and move_unreachable will eventually get to it.
538      */
539     else {
540         _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
541     }
542     return 0;
543 }
544 
545 /* Move the unreachable objects from young to unreachable.  After this,
546  * all objects in young don't have PREV_MASK_COLLECTING flag and
547  * unreachable have the flag.
548  * All objects in young after this are directly or indirectly reachable
549  * from outside the original young; and all objects in unreachable are
550  * not.
551  *
552  * This function restores _gc_prev pointer.  young and unreachable are
553  * doubly linked list after this function.
554  * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
555  * So we can not gc_list_* functions for unreachable until we remove the flag.
556  */
557 static void
move_unreachable(PyGC_Head * young,PyGC_Head * unreachable)558 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
559 {
560     // previous elem in the young list, used for restore gc_prev.
561     PyGC_Head *prev = young;
562     PyGC_Head *gc = GC_NEXT(young);
563 
564     /* Invariants:  all objects "to the left" of us in young are reachable
565      * (directly or indirectly) from outside the young list as it was at entry.
566      *
567      * All other objects from the original young "to the left" of us are in
568      * unreachable now, and have NEXT_MASK_UNREACHABLE.  All objects to the
569      * left of us in 'young' now have been scanned, and no objects here
570      * or to the right have been scanned yet.
571      */
572 
573     while (gc != young) {
574         if (gc_get_refs(gc)) {
575             /* gc is definitely reachable from outside the
576              * original 'young'.  Mark it as such, and traverse
577              * its pointers to find any other objects that may
578              * be directly reachable from it.  Note that the
579              * call to tp_traverse may append objects to young,
580              * so we have to wait until it returns to determine
581              * the next object to visit.
582              */
583             PyObject *op = FROM_GC(gc);
584             traverseproc traverse = Py_TYPE(op)->tp_traverse;
585             _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
586                                       "refcount is too small");
587             // NOTE: visit_reachable may change gc->_gc_next when
588             // young->_gc_prev == gc.  Don't do gc = GC_NEXT(gc) before!
589             (void) traverse(op,
590                     (visitproc)visit_reachable,
591                     (void *)young);
592             // relink gc_prev to prev element.
593             _PyGCHead_SET_PREV(gc, prev);
594             // gc is not COLLECTING state after here.
595             gc_clear_collecting(gc);
596             prev = gc;
597         }
598         else {
599             /* This *may* be unreachable.  To make progress,
600              * assume it is.  gc isn't directly reachable from
601              * any object we've already traversed, but may be
602              * reachable from an object we haven't gotten to yet.
603              * visit_reachable will eventually move gc back into
604              * young if that's so, and we'll see it again.
605              */
606             // Move gc to unreachable.
607             // No need to gc->next->prev = prev because it is single linked.
608             prev->_gc_next = gc->_gc_next;
609 
610             // We can't use gc_list_append() here because we use
611             // NEXT_MASK_UNREACHABLE here.
612             PyGC_Head *last = GC_PREV(unreachable);
613             // NOTE: Since all objects in unreachable set has
614             // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
615             // But this may pollute the unreachable list head's 'next' pointer
616             // too. That's semantically senseless but expedient here - the
617             // damage is repaired when this function ends.
618             last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
619             _PyGCHead_SET_PREV(gc, last);
620             gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
621             unreachable->_gc_prev = (uintptr_t)gc;
622         }
623         gc = (PyGC_Head*)prev->_gc_next;
624     }
625     // young->_gc_prev must be last element remained in the list.
626     young->_gc_prev = (uintptr_t)prev;
627     // don't let the pollution of the list head's next pointer leak
628     unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
629 }
630 
631 static void
untrack_tuples(PyGC_Head * head)632 untrack_tuples(PyGC_Head *head)
633 {
634     PyGC_Head *next, *gc = GC_NEXT(head);
635     while (gc != head) {
636         PyObject *op = FROM_GC(gc);
637         next = GC_NEXT(gc);
638         if (PyTuple_CheckExact(op)) {
639             _PyTuple_MaybeUntrack(op);
640         }
641         gc = next;
642     }
643 }
644 
645 /* Try to untrack all currently tracked dictionaries */
646 static void
untrack_dicts(PyGC_Head * head)647 untrack_dicts(PyGC_Head *head)
648 {
649     PyGC_Head *next, *gc = GC_NEXT(head);
650     while (gc != head) {
651         PyObject *op = FROM_GC(gc);
652         next = GC_NEXT(gc);
653         if (PyDict_CheckExact(op)) {
654             _PyDict_MaybeUntrack(op);
655         }
656         gc = next;
657     }
658 }
659 
660 /* Return true if object has a pre-PEP 442 finalization method. */
661 static int
has_legacy_finalizer(PyObject * op)662 has_legacy_finalizer(PyObject *op)
663 {
664     return Py_TYPE(op)->tp_del != NULL;
665 }
666 
667 /* Move the objects in unreachable with tp_del slots into `finalizers`.
668  *
669  * This function also removes NEXT_MASK_UNREACHABLE flag
670  * from _gc_next in unreachable.
671  */
672 static void
move_legacy_finalizers(PyGC_Head * unreachable,PyGC_Head * finalizers)673 move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
674 {
675     PyGC_Head *gc, *next;
676     assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
677 
678     /* March over unreachable.  Move objects with finalizers into
679      * `finalizers`.
680      */
681     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
682         PyObject *op = FROM_GC(gc);
683 
684         _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
685         gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
686         next = (PyGC_Head*)gc->_gc_next;
687 
688         if (has_legacy_finalizer(op)) {
689             gc_clear_collecting(gc);
690             gc_list_move(gc, finalizers);
691         }
692     }
693 }
694 
695 static inline void
clear_unreachable_mask(PyGC_Head * unreachable)696 clear_unreachable_mask(PyGC_Head *unreachable)
697 {
698     /* Check that the list head does not have the unreachable bit set */
699     assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
700 
701     PyGC_Head *gc, *next;
702     assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
703     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
704         _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
705         gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
706         next = (PyGC_Head*)gc->_gc_next;
707     }
708     validate_list(unreachable, collecting_set_unreachable_clear);
709 }
710 
711 /* A traversal callback for move_legacy_finalizer_reachable. */
712 static int
visit_move(PyObject * op,PyGC_Head * tolist)713 visit_move(PyObject *op, PyGC_Head *tolist)
714 {
715     if (_PyObject_IS_GC(op)) {
716         PyGC_Head *gc = AS_GC(op);
717         if (gc_is_collecting(gc)) {
718             gc_list_move(gc, tolist);
719             gc_clear_collecting(gc);
720         }
721     }
722     return 0;
723 }
724 
725 /* Move objects that are reachable from finalizers, from the unreachable set
726  * into finalizers set.
727  */
728 static void
move_legacy_finalizer_reachable(PyGC_Head * finalizers)729 move_legacy_finalizer_reachable(PyGC_Head *finalizers)
730 {
731     traverseproc traverse;
732     PyGC_Head *gc = GC_NEXT(finalizers);
733     for (; gc != finalizers; gc = GC_NEXT(gc)) {
734         /* Note that the finalizers list may grow during this. */
735         traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
736         (void) traverse(FROM_GC(gc),
737                         (visitproc)visit_move,
738                         (void *)finalizers);
739     }
740 }
741 
742 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
743  * callback, invoke it if necessary.  Note that it's possible for such
744  * weakrefs to be outside the unreachable set -- indeed, those are precisely
745  * the weakrefs whose callbacks must be invoked.  See gc_weakref.txt for
746  * overview & some details.  Some weakrefs with callbacks may be reclaimed
747  * directly by this routine; the number reclaimed is the return value.  Other
748  * weakrefs with callbacks may be moved into the `old` generation.  Objects
749  * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
750  * unreachable are left at GC_TENTATIVELY_UNREACHABLE.  When this returns,
751  * no object in `unreachable` is weakly referenced anymore.
752  */
753 static int
handle_weakrefs(PyGC_Head * unreachable,PyGC_Head * old)754 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
755 {
756     PyGC_Head *gc;
757     PyObject *op;               /* generally FROM_GC(gc) */
758     PyWeakReference *wr;        /* generally a cast of op */
759     PyGC_Head wrcb_to_call;     /* weakrefs with callbacks to call */
760     PyGC_Head *next;
761     int num_freed = 0;
762 
763     gc_list_init(&wrcb_to_call);
764 
765     /* Clear all weakrefs to the objects in unreachable.  If such a weakref
766      * also has a callback, move it into `wrcb_to_call` if the callback
767      * needs to be invoked.  Note that we cannot invoke any callbacks until
768      * all weakrefs to unreachable objects are cleared, lest the callback
769      * resurrect an unreachable object via a still-active weakref.  We
770      * make another pass over wrcb_to_call, invoking callbacks, after this
771      * pass completes.
772      */
773     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
774         PyWeakReference **wrlist;
775 
776         op = FROM_GC(gc);
777         next = GC_NEXT(gc);
778 
779         if (PyWeakref_Check(op)) {
780             /* A weakref inside the unreachable set must be cleared.  If we
781              * allow its callback to execute inside delete_garbage(), it
782              * could expose objects that have tp_clear already called on
783              * them.  Or, it could resurrect unreachable objects.  One way
784              * this can happen is if some container objects do not implement
785              * tp_traverse.  Then, wr_object can be outside the unreachable
786              * set but can be deallocated as a result of breaking the
787              * reference cycle.  If we don't clear the weakref, the callback
788              * will run and potentially cause a crash.  See bpo-38006 for
789              * one example.
790              */
791             _PyWeakref_ClearRef((PyWeakReference *)op);
792         }
793 
794         if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
795             continue;
796 
797         /* It supports weakrefs.  Does it have any? */
798         wrlist = (PyWeakReference **)
799                                 _PyObject_GET_WEAKREFS_LISTPTR(op);
800 
801         /* `op` may have some weakrefs.  March over the list, clear
802          * all the weakrefs, and move the weakrefs with callbacks
803          * that must be called into wrcb_to_call.
804          */
805         for (wr = *wrlist; wr != NULL; wr = *wrlist) {
806             PyGC_Head *wrasgc;                  /* AS_GC(wr) */
807 
808             /* _PyWeakref_ClearRef clears the weakref but leaves
809              * the callback pointer intact.  Obscure:  it also
810              * changes *wrlist.
811              */
812             _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
813             _PyWeakref_ClearRef(wr);
814             _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
815             if (wr->wr_callback == NULL) {
816                 /* no callback */
817                 continue;
818             }
819 
820             /* Headache time.  `op` is going away, and is weakly referenced by
821              * `wr`, which has a callback.  Should the callback be invoked?  If wr
822              * is also trash, no:
823              *
824              * 1. There's no need to call it.  The object and the weakref are
825              *    both going away, so it's legitimate to pretend the weakref is
826              *    going away first.  The user has to ensure a weakref outlives its
827              *    referent if they want a guarantee that the wr callback will get
828              *    invoked.
829              *
830              * 2. It may be catastrophic to call it.  If the callback is also in
831              *    cyclic trash (CT), then although the CT is unreachable from
832              *    outside the current generation, CT may be reachable from the
833              *    callback.  Then the callback could resurrect insane objects.
834              *
835              * Since the callback is never needed and may be unsafe in this case,
836              * wr is simply left in the unreachable set.  Note that because we
837              * already called _PyWeakref_ClearRef(wr), its callback will never
838              * trigger.
839              *
840              * OTOH, if wr isn't part of CT, we should invoke the callback:  the
841              * weakref outlived the trash.  Note that since wr isn't CT in this
842              * case, its callback can't be CT either -- wr acted as an external
843              * root to this generation, and therefore its callback did too.  So
844              * nothing in CT is reachable from the callback either, so it's hard
845              * to imagine how calling it later could create a problem for us.  wr
846              * is moved to wrcb_to_call in this case.
847              */
848             if (gc_is_collecting(AS_GC(wr))) {
849                 /* it should already have been cleared above */
850                 assert(wr->wr_object == Py_None);
851                 continue;
852             }
853 
854             /* Create a new reference so that wr can't go away
855              * before we can process it again.
856              */
857             Py_INCREF(wr);
858 
859             /* Move wr to wrcb_to_call, for the next pass. */
860             wrasgc = AS_GC(wr);
861             assert(wrasgc != next); /* wrasgc is reachable, but
862                                        next isn't, so they can't
863                                        be the same */
864             gc_list_move(wrasgc, &wrcb_to_call);
865         }
866     }
867 
868     /* Invoke the callbacks we decided to honor.  It's safe to invoke them
869      * because they can't reference unreachable objects.
870      */
871     while (! gc_list_is_empty(&wrcb_to_call)) {
872         PyObject *temp;
873         PyObject *callback;
874 
875         gc = (PyGC_Head*)wrcb_to_call._gc_next;
876         op = FROM_GC(gc);
877         _PyObject_ASSERT(op, PyWeakref_Check(op));
878         wr = (PyWeakReference *)op;
879         callback = wr->wr_callback;
880         _PyObject_ASSERT(op, callback != NULL);
881 
882         /* copy-paste of weakrefobject.c's handle_callback() */
883         temp = PyObject_CallOneArg(callback, (PyObject *)wr);
884         if (temp == NULL)
885             PyErr_WriteUnraisable(callback);
886         else
887             Py_DECREF(temp);
888 
889         /* Give up the reference we created in the first pass.  When
890          * op's refcount hits 0 (which it may or may not do right now),
891          * op's tp_dealloc will decref op->wr_callback too.  Note
892          * that the refcount probably will hit 0 now, and because this
893          * weakref was reachable to begin with, gc didn't already
894          * add it to its count of freed objects.  Example:  a reachable
895          * weak value dict maps some key to this reachable weakref.
896          * The callback removes this key->weakref mapping from the
897          * dict, leaving no other references to the weakref (excepting
898          * ours).
899          */
900         Py_DECREF(op);
901         if (wrcb_to_call._gc_next == (uintptr_t)gc) {
902             /* object is still alive -- move it */
903             gc_list_move(gc, old);
904         }
905         else {
906             ++num_freed;
907         }
908     }
909 
910     return num_freed;
911 }
912 
913 static void
debug_cycle(const char * msg,PyObject * op)914 debug_cycle(const char *msg, PyObject *op)
915 {
916     PySys_FormatStderr("gc: %s <%s %p>\n",
917                        msg, Py_TYPE(op)->tp_name, op);
918 }
919 
920 /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
921  * only from such cycles).
922  * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
923  * garbage list (a Python list), else only the objects in finalizers with
924  * __del__ methods are appended to garbage.  All objects in finalizers are
925  * merged into the old list regardless.
926  */
927 static void
handle_legacy_finalizers(PyThreadState * tstate,GCState * gcstate,PyGC_Head * finalizers,PyGC_Head * old)928 handle_legacy_finalizers(PyThreadState *tstate,
929                          GCState *gcstate,
930                          PyGC_Head *finalizers, PyGC_Head *old)
931 {
932     assert(!_PyErr_Occurred(tstate));
933     assert(gcstate->garbage != NULL);
934 
935     PyGC_Head *gc = GC_NEXT(finalizers);
936     for (; gc != finalizers; gc = GC_NEXT(gc)) {
937         PyObject *op = FROM_GC(gc);
938 
939         if ((gcstate->debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
940             if (PyList_Append(gcstate->garbage, op) < 0) {
941                 _PyErr_Clear(tstate);
942                 break;
943             }
944         }
945     }
946 
947     gc_list_merge(finalizers, old);
948 }
949 
950 /* Run first-time finalizers (if any) on all the objects in collectable.
951  * Note that this may remove some (or even all) of the objects from the
952  * list, due to refcounts falling to 0.
953  */
954 static void
finalize_garbage(PyThreadState * tstate,PyGC_Head * collectable)955 finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
956 {
957     destructor finalize;
958     PyGC_Head seen;
959 
960     /* While we're going through the loop, `finalize(op)` may cause op, or
961      * other objects, to be reclaimed via refcounts falling to zero.  So
962      * there's little we can rely on about the structure of the input
963      * `collectable` list across iterations.  For safety, we always take the
964      * first object in that list and move it to a temporary `seen` list.
965      * If objects vanish from the `collectable` and `seen` lists we don't
966      * care.
967      */
968     gc_list_init(&seen);
969 
970     while (!gc_list_is_empty(collectable)) {
971         PyGC_Head *gc = GC_NEXT(collectable);
972         PyObject *op = FROM_GC(gc);
973         gc_list_move(gc, &seen);
974         if (!_PyGCHead_FINALIZED(gc) &&
975                 (finalize = Py_TYPE(op)->tp_finalize) != NULL) {
976             _PyGCHead_SET_FINALIZED(gc);
977             Py_INCREF(op);
978             finalize(op);
979             assert(!_PyErr_Occurred(tstate));
980             Py_DECREF(op);
981         }
982     }
983     gc_list_merge(&seen, collectable);
984 }
985 
986 /* Break reference cycles by clearing the containers involved.  This is
987  * tricky business as the lists can be changing and we don't know which
988  * objects may be freed.  It is possible I screwed something up here.
989  */
990 static void
delete_garbage(PyThreadState * tstate,GCState * gcstate,PyGC_Head * collectable,PyGC_Head * old)991 delete_garbage(PyThreadState *tstate, GCState *gcstate,
992                PyGC_Head *collectable, PyGC_Head *old)
993 {
994     assert(!_PyErr_Occurred(tstate));
995 
996     while (!gc_list_is_empty(collectable)) {
997         PyGC_Head *gc = GC_NEXT(collectable);
998         PyObject *op = FROM_GC(gc);
999 
1000         _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
1001                                   "refcount is too small");
1002 
1003         if (gcstate->debug & DEBUG_SAVEALL) {
1004             assert(gcstate->garbage != NULL);
1005             if (PyList_Append(gcstate->garbage, op) < 0) {
1006                 _PyErr_Clear(tstate);
1007             }
1008         }
1009         else {
1010             inquiry clear;
1011             if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1012                 Py_INCREF(op);
1013                 (void) clear(op);
1014                 if (_PyErr_Occurred(tstate)) {
1015                     _PyErr_WriteUnraisableMsg("in tp_clear of",
1016                                               (PyObject*)Py_TYPE(op));
1017                 }
1018                 Py_DECREF(op);
1019             }
1020         }
1021         if (GC_NEXT(collectable) == gc) {
1022             /* object is still alive, move it, it may die later */
1023             gc_clear_collecting(gc);
1024             gc_list_move(gc, old);
1025         }
1026     }
1027 }
1028 
1029 /* Clear all free lists
1030  * All free lists are cleared during the collection of the highest generation.
1031  * Allocated items in the free list may keep a pymalloc arena occupied.
1032  * Clearing the free lists may give back memory to the OS earlier.
1033  */
1034 static void
clear_freelists(PyInterpreterState * interp)1035 clear_freelists(PyInterpreterState *interp)
1036 {
1037     _PyTuple_ClearFreeList(interp);
1038     _PyFloat_ClearFreeList(interp);
1039     _PyList_ClearFreeList(interp);
1040     _PyDict_ClearFreeList(interp);
1041     _PyAsyncGen_ClearFreeLists(interp);
1042     _PyContext_ClearFreeList(interp);
1043 }
1044 
1045 // Show stats for objects in each generations
1046 static void
show_stats_each_generations(GCState * gcstate)1047 show_stats_each_generations(GCState *gcstate)
1048 {
1049     char buf[100];
1050     size_t pos = 0;
1051 
1052     for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
1053         pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
1054                              " %zd",
1055                              gc_list_size(GEN_HEAD(gcstate, i)));
1056     }
1057 
1058     PySys_FormatStderr(
1059         "gc: objects in each generation:%s\n"
1060         "gc: objects in permanent generation: %zd\n",
1061         buf, gc_list_size(&gcstate->permanent_generation.head));
1062 }
1063 
1064 /* Deduce which objects among "base" are unreachable from outside the list
1065    and move them to 'unreachable'. The process consist in the following steps:
1066 
1067 1. Copy all reference counts to a different field (gc_prev is used to hold
1068    this copy to save memory).
1069 2. Traverse all objects in "base" and visit all referred objects using
1070    "tp_traverse" and for every visited object, subtract 1 to the reference
1071    count (the one that we copied in the previous step). After this step, all
1072    objects that can be reached directly from outside must have strictly positive
1073    reference count, while all unreachable objects must have a count of exactly 0.
1074 3. Identify all unreachable objects (the ones with 0 reference count) and move
1075    them to the "unreachable" list. This step also needs to move back to "base" all
1076    objects that were initially marked as unreachable but are referred transitively
1077    by the reachable objects (the ones with strictly positive reference count).
1078 
1079 Contracts:
1080 
1081     * The "base" has to be a valid list with no mask set.
1082 
1083     * The "unreachable" list must be uninitialized (this function calls
1084       gc_list_init over 'unreachable').
1085 
1086 IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1087 flag set but it does not clear it to skip unnecessary iteration. Before the
1088 flag is cleared (for example, by using 'clear_unreachable_mask' function or
1089 by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1090 list and we can not use most gc_list_* functions for it. */
1091 static inline void
deduce_unreachable(PyGC_Head * base,PyGC_Head * unreachable)1092 deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
1093     validate_list(base, collecting_clear_unreachable_clear);
1094     /* Using ob_refcnt and gc_refs, calculate which objects in the
1095      * container set are reachable from outside the set (i.e., have a
1096      * refcount greater than 0 when all the references within the
1097      * set are taken into account).
1098      */
1099     update_refs(base);  // gc_prev is used for gc_refs
1100     subtract_refs(base);
1101 
1102     /* Leave everything reachable from outside base in base, and move
1103      * everything else (in base) to unreachable.
1104      *
1105      * NOTE:  This used to move the reachable objects into a reachable
1106      * set instead.  But most things usually turn out to be reachable,
1107      * so it's more efficient to move the unreachable things.  It "sounds slick"
1108      * to move the unreachable objects, until you think about it - the reason it
1109      * pays isn't actually obvious.
1110      *
1111      * Suppose we create objects A, B, C in that order.  They appear in the young
1112      * generation in the same order.  If B points to A, and C to B, and C is
1113      * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1114      * respectively.
1115      *
1116      * When move_unreachable finds A, A is moved to the unreachable list.  The
1117      * same for B when it's first encountered.  Then C is traversed, B is moved
1118      * _back_ to the reachable list.  B is eventually traversed, and then A is
1119      * moved back to the reachable list.
1120      *
1121      * So instead of not moving at all, the reachable objects B and A are moved
1122      * twice each.  Why is this a win?  A straightforward algorithm to move the
1123      * reachable objects instead would move A, B, and C once each.
1124      *
1125      * The key is that this dance leaves the objects in order C, B, A - it's
1126      * reversed from the original order.  On all _subsequent_ scans, none of
1127      * them will move.  Since most objects aren't in cycles, this can save an
1128      * unbounded number of moves across an unbounded number of later collections.
1129      * It can cost more only the first time the chain is scanned.
1130      *
1131      * Drawback:  move_unreachable is also used to find out what's still trash
1132      * after finalizers may resurrect objects.  In _that_ case most unreachable
1133      * objects will remain unreachable, so it would be more efficient to move
1134      * the reachable objects instead.  But this is a one-time cost, probably not
1135      * worth complicating the code to speed just a little.
1136      */
1137     gc_list_init(unreachable);
1138     move_unreachable(base, unreachable);  // gc_prev is pointer again
1139     validate_list(base, collecting_clear_unreachable_clear);
1140     validate_list(unreachable, collecting_set_unreachable_set);
1141 }
1142 
1143 /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1144    them to 'old_generation' and placing the rest on 'still_unreachable'.
1145 
1146    Contracts:
1147        * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1148          will contain the objects that did not resurrect.
1149 
1150        * The "still_unreachable" list must be uninitialized (this function calls
1151          gc_list_init over 'still_unreachable').
1152 
1153 IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1154 PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1155 we can skip the expense of clearing the flag to avoid extra iteration. */
1156 static inline void
handle_resurrected_objects(PyGC_Head * unreachable,PyGC_Head * still_unreachable,PyGC_Head * old_generation)1157 handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1158                            PyGC_Head *old_generation)
1159 {
1160     // Remove the PREV_MASK_COLLECTING from unreachable
1161     // to prepare it for a new call to 'deduce_unreachable'
1162     gc_list_clear_collecting(unreachable);
1163 
1164     // After the call to deduce_unreachable, the 'still_unreachable' set will
1165     // have the PREV_MARK_COLLECTING set, but the objects are going to be
1166     // removed so we can skip the expense of clearing the flag.
1167     PyGC_Head* resurrected = unreachable;
1168     deduce_unreachable(resurrected, still_unreachable);
1169     clear_unreachable_mask(still_unreachable);
1170 
1171     // Move the resurrected objects to the old generation for future collection.
1172     gc_list_merge(resurrected, old_generation);
1173 }
1174 
1175 /* This is the main function.  Read this to understand how the
1176  * collection process works. */
1177 static Py_ssize_t
gc_collect_main(PyThreadState * tstate,int generation,Py_ssize_t * n_collected,Py_ssize_t * n_uncollectable,int nofail)1178 gc_collect_main(PyThreadState *tstate, int generation,
1179                 Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
1180                 int nofail)
1181 {
1182     int i;
1183     Py_ssize_t m = 0; /* # objects collected */
1184     Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
1185     PyGC_Head *young; /* the generation we are examining */
1186     PyGC_Head *old; /* next older generation */
1187     PyGC_Head unreachable; /* non-problematic unreachable trash */
1188     PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
1189     PyGC_Head *gc;
1190     _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
1191     GCState *gcstate = &tstate->interp->gc;
1192 
1193     // gc_collect_main() must not be called before _PyGC_Init
1194     // or after _PyGC_Fini()
1195     assert(gcstate->garbage != NULL);
1196     assert(!_PyErr_Occurred(tstate));
1197 
1198     if (gcstate->debug & DEBUG_STATS) {
1199         PySys_WriteStderr("gc: collecting generation %d...\n", generation);
1200         show_stats_each_generations(gcstate);
1201         t1 = _PyTime_GetPerfCounter();
1202     }
1203 
1204     if (PyDTrace_GC_START_ENABLED())
1205         PyDTrace_GC_START(generation);
1206 
1207     /* update collection and allocation counters */
1208     if (generation+1 < NUM_GENERATIONS)
1209         gcstate->generations[generation+1].count += 1;
1210     for (i = 0; i <= generation; i++)
1211         gcstate->generations[i].count = 0;
1212 
1213     /* merge younger generations with one we are currently collecting */
1214     for (i = 0; i < generation; i++) {
1215         gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
1216     }
1217 
1218     /* handy references */
1219     young = GEN_HEAD(gcstate, generation);
1220     if (generation < NUM_GENERATIONS-1)
1221         old = GEN_HEAD(gcstate, generation+1);
1222     else
1223         old = young;
1224     validate_list(old, collecting_clear_unreachable_clear);
1225 
1226     deduce_unreachable(young, &unreachable);
1227 
1228     untrack_tuples(young);
1229     /* Move reachable objects to next generation. */
1230     if (young != old) {
1231         if (generation == NUM_GENERATIONS - 2) {
1232             gcstate->long_lived_pending += gc_list_size(young);
1233         }
1234         gc_list_merge(young, old);
1235     }
1236     else {
1237         /* We only un-track dicts in full collections, to avoid quadratic
1238            dict build-up. See issue #14775. */
1239         untrack_dicts(young);
1240         gcstate->long_lived_pending = 0;
1241         gcstate->long_lived_total = gc_list_size(young);
1242     }
1243 
1244     /* All objects in unreachable are trash, but objects reachable from
1245      * legacy finalizers (e.g. tp_del) can't safely be deleted.
1246      */
1247     gc_list_init(&finalizers);
1248     // NEXT_MASK_UNREACHABLE is cleared here.
1249     // After move_legacy_finalizers(), unreachable is normal list.
1250     move_legacy_finalizers(&unreachable, &finalizers);
1251     /* finalizers contains the unreachable objects with a legacy finalizer;
1252      * unreachable objects reachable *from* those are also uncollectable,
1253      * and we move those into the finalizers list too.
1254      */
1255     move_legacy_finalizer_reachable(&finalizers);
1256 
1257     validate_list(&finalizers, collecting_clear_unreachable_clear);
1258     validate_list(&unreachable, collecting_set_unreachable_clear);
1259 
1260     /* Print debugging information. */
1261     if (gcstate->debug & DEBUG_COLLECTABLE) {
1262         for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1263             debug_cycle("collectable", FROM_GC(gc));
1264         }
1265     }
1266 
1267     /* Clear weakrefs and invoke callbacks as necessary. */
1268     m += handle_weakrefs(&unreachable, old);
1269 
1270     validate_list(old, collecting_clear_unreachable_clear);
1271     validate_list(&unreachable, collecting_set_unreachable_clear);
1272 
1273     /* Call tp_finalize on objects which have one. */
1274     finalize_garbage(tstate, &unreachable);
1275 
1276     /* Handle any objects that may have resurrected after the call
1277      * to 'finalize_garbage' and continue the collection with the
1278      * objects that are still unreachable */
1279     PyGC_Head final_unreachable;
1280     handle_resurrected_objects(&unreachable, &final_unreachable, old);
1281 
1282     /* Call tp_clear on objects in the final_unreachable set.  This will cause
1283     * the reference cycles to be broken.  It may also cause some objects
1284     * in finalizers to be freed.
1285     */
1286     m += gc_list_size(&final_unreachable);
1287     delete_garbage(tstate, gcstate, &final_unreachable, old);
1288 
1289     /* Collect statistics on uncollectable objects found and print
1290      * debugging information. */
1291     for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1292         n++;
1293         if (gcstate->debug & DEBUG_UNCOLLECTABLE)
1294             debug_cycle("uncollectable", FROM_GC(gc));
1295     }
1296     if (gcstate->debug & DEBUG_STATS) {
1297         double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
1298         PySys_WriteStderr(
1299             "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
1300             n+m, n, d);
1301     }
1302 
1303     /* Append instances in the uncollectable set to a Python
1304      * reachable list of garbage.  The programmer has to deal with
1305      * this if they insist on creating this type of structure.
1306      */
1307     handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
1308     validate_list(old, collecting_clear_unreachable_clear);
1309 
1310     /* Clear free list only during the collection of the highest
1311      * generation */
1312     if (generation == NUM_GENERATIONS-1) {
1313         clear_freelists(tstate->interp);
1314     }
1315 
1316     if (_PyErr_Occurred(tstate)) {
1317         if (nofail) {
1318             _PyErr_Clear(tstate);
1319         }
1320         else {
1321             _PyErr_WriteUnraisableMsg("in garbage collection", NULL);
1322         }
1323     }
1324 
1325     /* Update stats */
1326     if (n_collected) {
1327         *n_collected = m;
1328     }
1329     if (n_uncollectable) {
1330         *n_uncollectable = n;
1331     }
1332 
1333     struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
1334     stats->collections++;
1335     stats->collected += m;
1336     stats->uncollectable += n;
1337 
1338     if (PyDTrace_GC_DONE_ENABLED()) {
1339         PyDTrace_GC_DONE(n + m);
1340     }
1341 
1342     assert(!_PyErr_Occurred(tstate));
1343     return n + m;
1344 }
1345 
1346 /* Invoke progress callbacks to notify clients that garbage collection
1347  * is starting or stopping
1348  */
1349 static void
invoke_gc_callback(PyThreadState * tstate,const char * phase,int generation,Py_ssize_t collected,Py_ssize_t uncollectable)1350 invoke_gc_callback(PyThreadState *tstate, const char *phase,
1351                    int generation, Py_ssize_t collected,
1352                    Py_ssize_t uncollectable)
1353 {
1354     assert(!_PyErr_Occurred(tstate));
1355 
1356     /* we may get called very early */
1357     GCState *gcstate = &tstate->interp->gc;
1358     if (gcstate->callbacks == NULL) {
1359         return;
1360     }
1361 
1362     /* The local variable cannot be rebound, check it for sanity */
1363     assert(PyList_CheckExact(gcstate->callbacks));
1364     PyObject *info = NULL;
1365     if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1366         info = Py_BuildValue("{sisnsn}",
1367             "generation", generation,
1368             "collected", collected,
1369             "uncollectable", uncollectable);
1370         if (info == NULL) {
1371             PyErr_WriteUnraisable(NULL);
1372             return;
1373         }
1374     }
1375     for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1376         PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1377         Py_INCREF(cb); /* make sure cb doesn't go away */
1378         r = PyObject_CallFunction(cb, "sO", phase, info);
1379         if (r == NULL) {
1380             PyErr_WriteUnraisable(cb);
1381         }
1382         else {
1383             Py_DECREF(r);
1384         }
1385         Py_DECREF(cb);
1386     }
1387     Py_XDECREF(info);
1388     assert(!_PyErr_Occurred(tstate));
1389 }
1390 
1391 /* Perform garbage collection of a generation and invoke
1392  * progress callbacks.
1393  */
1394 static Py_ssize_t
gc_collect_with_callback(PyThreadState * tstate,int generation)1395 gc_collect_with_callback(PyThreadState *tstate, int generation)
1396 {
1397     assert(!_PyErr_Occurred(tstate));
1398     Py_ssize_t result, collected, uncollectable;
1399     invoke_gc_callback(tstate, "start", generation, 0, 0);
1400     result = gc_collect_main(tstate, generation, &collected, &uncollectable, 0);
1401     invoke_gc_callback(tstate, "stop", generation, collected, uncollectable);
1402     assert(!_PyErr_Occurred(tstate));
1403     return result;
1404 }
1405 
1406 static Py_ssize_t
gc_collect_generations(PyThreadState * tstate)1407 gc_collect_generations(PyThreadState *tstate)
1408 {
1409     GCState *gcstate = &tstate->interp->gc;
1410     /* Find the oldest generation (highest numbered) where the count
1411      * exceeds the threshold.  Objects in the that generation and
1412      * generations younger than it will be collected. */
1413     Py_ssize_t n = 0;
1414     for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
1415         if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
1416             /* Avoid quadratic performance degradation in number
1417                of tracked objects (see also issue #4074):
1418 
1419                To limit the cost of garbage collection, there are two strategies;
1420                  - make each collection faster, e.g. by scanning fewer objects
1421                  - do less collections
1422                This heuristic is about the latter strategy.
1423 
1424                In addition to the various configurable thresholds, we only trigger a
1425                full collection if the ratio
1426 
1427                 long_lived_pending / long_lived_total
1428 
1429                is above a given value (hardwired to 25%).
1430 
1431                The reason is that, while "non-full" collections (i.e., collections of
1432                the young and middle generations) will always examine roughly the same
1433                number of objects -- determined by the aforementioned thresholds --,
1434                the cost of a full collection is proportional to the total number of
1435                long-lived objects, which is virtually unbounded.
1436 
1437                Indeed, it has been remarked that doing a full collection every
1438                <constant number> of object creations entails a dramatic performance
1439                degradation in workloads which consist in creating and storing lots of
1440                long-lived objects (e.g. building a large list of GC-tracked objects would
1441                show quadratic performance, instead of linear as expected: see issue #4074).
1442 
1443                Using the above ratio, instead, yields amortized linear performance in
1444                the total number of objects (the effect of which can be summarized
1445                thusly: "each full garbage collection is more and more costly as the
1446                number of objects grows, but we do fewer and fewer of them").
1447 
1448                This heuristic was suggested by Martin von Löwis on python-dev in
1449                June 2008. His original analysis and proposal can be found at:
1450                http://mail.python.org/pipermail/python-dev/2008-June/080579.html
1451             */
1452             if (i == NUM_GENERATIONS - 1
1453                 && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
1454                 continue;
1455             n = gc_collect_with_callback(tstate, i);
1456             break;
1457         }
1458     }
1459     return n;
1460 }
1461 
1462 #include "clinic/gcmodule.c.h"
1463 
1464 /*[clinic input]
1465 gc.enable
1466 
1467 Enable automatic garbage collection.
1468 [clinic start generated code]*/
1469 
1470 static PyObject *
gc_enable_impl(PyObject * module)1471 gc_enable_impl(PyObject *module)
1472 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
1473 {
1474     PyGC_Enable();
1475     Py_RETURN_NONE;
1476 }
1477 
1478 /*[clinic input]
1479 gc.disable
1480 
1481 Disable automatic garbage collection.
1482 [clinic start generated code]*/
1483 
1484 static PyObject *
gc_disable_impl(PyObject * module)1485 gc_disable_impl(PyObject *module)
1486 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
1487 {
1488     PyGC_Disable();
1489     Py_RETURN_NONE;
1490 }
1491 
1492 /*[clinic input]
1493 gc.isenabled -> bool
1494 
1495 Returns true if automatic garbage collection is enabled.
1496 [clinic start generated code]*/
1497 
1498 static int
gc_isenabled_impl(PyObject * module)1499 gc_isenabled_impl(PyObject *module)
1500 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
1501 {
1502     return PyGC_IsEnabled();
1503 }
1504 
1505 /*[clinic input]
1506 gc.collect -> Py_ssize_t
1507 
1508     generation: int(c_default="NUM_GENERATIONS - 1") = 2
1509 
1510 Run the garbage collector.
1511 
1512 With no arguments, run a full collection.  The optional argument
1513 may be an integer specifying which generation to collect.  A ValueError
1514 is raised if the generation number is invalid.
1515 
1516 The number of unreachable objects is returned.
1517 [clinic start generated code]*/
1518 
1519 static Py_ssize_t
gc_collect_impl(PyObject * module,int generation)1520 gc_collect_impl(PyObject *module, int generation)
1521 /*[clinic end generated code: output=b697e633043233c7 input=40720128b682d879]*/
1522 {
1523     PyThreadState *tstate = _PyThreadState_GET();
1524 
1525     if (generation < 0 || generation >= NUM_GENERATIONS) {
1526         _PyErr_SetString(tstate, PyExc_ValueError, "invalid generation");
1527         return -1;
1528     }
1529 
1530     GCState *gcstate = &tstate->interp->gc;
1531     Py_ssize_t n;
1532     if (gcstate->collecting) {
1533         /* already collecting, don't do anything */
1534         n = 0;
1535     }
1536     else {
1537         gcstate->collecting = 1;
1538         n = gc_collect_with_callback(tstate, generation);
1539         gcstate->collecting = 0;
1540     }
1541     return n;
1542 }
1543 
1544 /*[clinic input]
1545 gc.set_debug
1546 
1547     flags: int
1548         An integer that can have the following bits turned on:
1549           DEBUG_STATS - Print statistics during collection.
1550           DEBUG_COLLECTABLE - Print collectable objects found.
1551           DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects
1552             found.
1553           DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.
1554           DEBUG_LEAK - Debug leaking programs (everything but STATS).
1555     /
1556 
1557 Set the garbage collection debugging flags.
1558 
1559 Debugging information is written to sys.stderr.
1560 [clinic start generated code]*/
1561 
1562 static PyObject *
gc_set_debug_impl(PyObject * module,int flags)1563 gc_set_debug_impl(PyObject *module, int flags)
1564 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
1565 {
1566     GCState *gcstate = get_gc_state();
1567     gcstate->debug = flags;
1568     Py_RETURN_NONE;
1569 }
1570 
1571 /*[clinic input]
1572 gc.get_debug -> int
1573 
1574 Get the garbage collection debugging flags.
1575 [clinic start generated code]*/
1576 
1577 static int
gc_get_debug_impl(PyObject * module)1578 gc_get_debug_impl(PyObject *module)
1579 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
1580 {
1581     GCState *gcstate = get_gc_state();
1582     return gcstate->debug;
1583 }
1584 
1585 PyDoc_STRVAR(gc_set_thresh__doc__,
1586 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1587 "\n"
1588 "Sets the collection thresholds.  Setting threshold0 to zero disables\n"
1589 "collection.\n");
1590 
1591 static PyObject *
gc_set_threshold(PyObject * self,PyObject * args)1592 gc_set_threshold(PyObject *self, PyObject *args)
1593 {
1594     GCState *gcstate = get_gc_state();
1595     if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1596                           &gcstate->generations[0].threshold,
1597                           &gcstate->generations[1].threshold,
1598                           &gcstate->generations[2].threshold))
1599         return NULL;
1600     for (int i = 3; i < NUM_GENERATIONS; i++) {
1601         /* generations higher than 2 get the same threshold */
1602         gcstate->generations[i].threshold = gcstate->generations[2].threshold;
1603     }
1604     Py_RETURN_NONE;
1605 }
1606 
1607 /*[clinic input]
1608 gc.get_threshold
1609 
1610 Return the current collection thresholds.
1611 [clinic start generated code]*/
1612 
1613 static PyObject *
gc_get_threshold_impl(PyObject * module)1614 gc_get_threshold_impl(PyObject *module)
1615 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
1616 {
1617     GCState *gcstate = get_gc_state();
1618     return Py_BuildValue("(iii)",
1619                          gcstate->generations[0].threshold,
1620                          gcstate->generations[1].threshold,
1621                          gcstate->generations[2].threshold);
1622 }
1623 
1624 /*[clinic input]
1625 gc.get_count
1626 
1627 Return a three-tuple of the current collection counts.
1628 [clinic start generated code]*/
1629 
1630 static PyObject *
gc_get_count_impl(PyObject * module)1631 gc_get_count_impl(PyObject *module)
1632 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
1633 {
1634     GCState *gcstate = get_gc_state();
1635     return Py_BuildValue("(iii)",
1636                          gcstate->generations[0].count,
1637                          gcstate->generations[1].count,
1638                          gcstate->generations[2].count);
1639 }
1640 
1641 static int
referrersvisit(PyObject * obj,PyObject * objs)1642 referrersvisit(PyObject* obj, PyObject *objs)
1643 {
1644     Py_ssize_t i;
1645     for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1646         if (PyTuple_GET_ITEM(objs, i) == obj)
1647             return 1;
1648     return 0;
1649 }
1650 
1651 static int
gc_referrers_for(PyObject * objs,PyGC_Head * list,PyObject * resultlist)1652 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1653 {
1654     PyGC_Head *gc;
1655     PyObject *obj;
1656     traverseproc traverse;
1657     for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1658         obj = FROM_GC(gc);
1659         traverse = Py_TYPE(obj)->tp_traverse;
1660         if (obj == objs || obj == resultlist)
1661             continue;
1662         if (traverse(obj, (visitproc)referrersvisit, objs)) {
1663             if (PyList_Append(resultlist, obj) < 0)
1664                 return 0; /* error */
1665         }
1666     }
1667     return 1; /* no error */
1668 }
1669 
1670 PyDoc_STRVAR(gc_get_referrers__doc__,
1671 "get_referrers(*objs) -> list\n\
1672 Return the list of objects that directly refer to any of objs.");
1673 
1674 static PyObject *
gc_get_referrers(PyObject * self,PyObject * args)1675 gc_get_referrers(PyObject *self, PyObject *args)
1676 {
1677     if (PySys_Audit("gc.get_referrers", "(O)", args) < 0) {
1678         return NULL;
1679     }
1680 
1681     PyObject *result = PyList_New(0);
1682     if (!result) {
1683         return NULL;
1684     }
1685 
1686     GCState *gcstate = get_gc_state();
1687     for (int i = 0; i < NUM_GENERATIONS; i++) {
1688         if (!(gc_referrers_for(args, GEN_HEAD(gcstate, i), result))) {
1689             Py_DECREF(result);
1690             return NULL;
1691         }
1692     }
1693     return result;
1694 }
1695 
1696 /* Append obj to list; return true if error (out of memory), false if OK. */
1697 static int
referentsvisit(PyObject * obj,PyObject * list)1698 referentsvisit(PyObject *obj, PyObject *list)
1699 {
1700     return PyList_Append(list, obj) < 0;
1701 }
1702 
1703 PyDoc_STRVAR(gc_get_referents__doc__,
1704 "get_referents(*objs) -> list\n\
1705 Return the list of objects that are directly referred to by objs.");
1706 
1707 static PyObject *
gc_get_referents(PyObject * self,PyObject * args)1708 gc_get_referents(PyObject *self, PyObject *args)
1709 {
1710     Py_ssize_t i;
1711     if (PySys_Audit("gc.get_referents", "(O)", args) < 0) {
1712         return NULL;
1713     }
1714     PyObject *result = PyList_New(0);
1715 
1716     if (result == NULL)
1717         return NULL;
1718 
1719     for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1720         traverseproc traverse;
1721         PyObject *obj = PyTuple_GET_ITEM(args, i);
1722 
1723         if (!_PyObject_IS_GC(obj))
1724             continue;
1725         traverse = Py_TYPE(obj)->tp_traverse;
1726         if (! traverse)
1727             continue;
1728         if (traverse(obj, (visitproc)referentsvisit, result)) {
1729             Py_DECREF(result);
1730             return NULL;
1731         }
1732     }
1733     return result;
1734 }
1735 
1736 /*[clinic input]
1737 gc.get_objects
1738     generation: Py_ssize_t(accept={int, NoneType}, c_default="-1") = None
1739         Generation to extract the objects from.
1740 
1741 Return a list of objects tracked by the collector (excluding the list returned).
1742 
1743 If generation is not None, return only the objects tracked by the collector
1744 that are in that generation.
1745 [clinic start generated code]*/
1746 
1747 static PyObject *
gc_get_objects_impl(PyObject * module,Py_ssize_t generation)1748 gc_get_objects_impl(PyObject *module, Py_ssize_t generation)
1749 /*[clinic end generated code: output=48b35fea4ba6cb0e input=ef7da9df9806754c]*/
1750 {
1751     PyThreadState *tstate = _PyThreadState_GET();
1752     int i;
1753     PyObject* result;
1754     GCState *gcstate = &tstate->interp->gc;
1755 
1756     if (PySys_Audit("gc.get_objects", "n", generation) < 0) {
1757         return NULL;
1758     }
1759 
1760     result = PyList_New(0);
1761     if (result == NULL) {
1762         return NULL;
1763     }
1764 
1765     /* If generation is passed, we extract only that generation */
1766     if (generation != -1) {
1767         if (generation >= NUM_GENERATIONS) {
1768             _PyErr_Format(tstate, PyExc_ValueError,
1769                           "generation parameter must be less than the number of "
1770                           "available generations (%i)",
1771                            NUM_GENERATIONS);
1772             goto error;
1773         }
1774 
1775         if (generation < 0) {
1776             _PyErr_SetString(tstate, PyExc_ValueError,
1777                              "generation parameter cannot be negative");
1778             goto error;
1779         }
1780 
1781         if (append_objects(result, GEN_HEAD(gcstate, generation))) {
1782             goto error;
1783         }
1784 
1785         return result;
1786     }
1787 
1788     /* If generation is not passed or None, get all objects from all generations */
1789     for (i = 0; i < NUM_GENERATIONS; i++) {
1790         if (append_objects(result, GEN_HEAD(gcstate, i))) {
1791             goto error;
1792         }
1793     }
1794     return result;
1795 
1796 error:
1797     Py_DECREF(result);
1798     return NULL;
1799 }
1800 
1801 /*[clinic input]
1802 gc.get_stats
1803 
1804 Return a list of dictionaries containing per-generation statistics.
1805 [clinic start generated code]*/
1806 
1807 static PyObject *
gc_get_stats_impl(PyObject * module)1808 gc_get_stats_impl(PyObject *module)
1809 /*[clinic end generated code: output=a8ab1d8a5d26f3ab input=1ef4ed9d17b1a470]*/
1810 {
1811     int i;
1812     struct gc_generation_stats stats[NUM_GENERATIONS], *st;
1813 
1814     /* To get consistent values despite allocations while constructing
1815        the result list, we use a snapshot of the running stats. */
1816     GCState *gcstate = get_gc_state();
1817     for (i = 0; i < NUM_GENERATIONS; i++) {
1818         stats[i] = gcstate->generation_stats[i];
1819     }
1820 
1821     PyObject *result = PyList_New(0);
1822     if (result == NULL)
1823         return NULL;
1824 
1825     for (i = 0; i < NUM_GENERATIONS; i++) {
1826         PyObject *dict;
1827         st = &stats[i];
1828         dict = Py_BuildValue("{snsnsn}",
1829                              "collections", st->collections,
1830                              "collected", st->collected,
1831                              "uncollectable", st->uncollectable
1832                             );
1833         if (dict == NULL)
1834             goto error;
1835         if (PyList_Append(result, dict)) {
1836             Py_DECREF(dict);
1837             goto error;
1838         }
1839         Py_DECREF(dict);
1840     }
1841     return result;
1842 
1843 error:
1844     Py_XDECREF(result);
1845     return NULL;
1846 }
1847 
1848 
1849 /*[clinic input]
1850 gc.is_tracked
1851 
1852     obj: object
1853     /
1854 
1855 Returns true if the object is tracked by the garbage collector.
1856 
1857 Simple atomic objects will return false.
1858 [clinic start generated code]*/
1859 
1860 static PyObject *
gc_is_tracked(PyObject * module,PyObject * obj)1861 gc_is_tracked(PyObject *module, PyObject *obj)
1862 /*[clinic end generated code: output=14f0103423b28e31 input=d83057f170ea2723]*/
1863 {
1864     PyObject *result;
1865 
1866     if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj))
1867         result = Py_True;
1868     else
1869         result = Py_False;
1870     Py_INCREF(result);
1871     return result;
1872 }
1873 
1874 /*[clinic input]
1875 gc.is_finalized
1876 
1877     obj: object
1878     /
1879 
1880 Returns true if the object has been already finalized by the GC.
1881 [clinic start generated code]*/
1882 
1883 static PyObject *
gc_is_finalized(PyObject * module,PyObject * obj)1884 gc_is_finalized(PyObject *module, PyObject *obj)
1885 /*[clinic end generated code: output=e1516ac119a918ed input=201d0c58f69ae390]*/
1886 {
1887     if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
1888          Py_RETURN_TRUE;
1889     }
1890     Py_RETURN_FALSE;
1891 }
1892 
1893 /*[clinic input]
1894 gc.freeze
1895 
1896 Freeze all current tracked objects and ignore them for future collections.
1897 
1898 This can be used before a POSIX fork() call to make the gc copy-on-write friendly.
1899 Note: collection before a POSIX fork() call may free pages for future allocation
1900 which can cause copy-on-write.
1901 [clinic start generated code]*/
1902 
1903 static PyObject *
gc_freeze_impl(PyObject * module)1904 gc_freeze_impl(PyObject *module)
1905 /*[clinic end generated code: output=502159d9cdc4c139 input=b602b16ac5febbe5]*/
1906 {
1907     GCState *gcstate = get_gc_state();
1908     for (int i = 0; i < NUM_GENERATIONS; ++i) {
1909         gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
1910         gcstate->generations[i].count = 0;
1911     }
1912     Py_RETURN_NONE;
1913 }
1914 
1915 /*[clinic input]
1916 gc.unfreeze
1917 
1918 Unfreeze all objects in the permanent generation.
1919 
1920 Put all objects in the permanent generation back into oldest generation.
1921 [clinic start generated code]*/
1922 
1923 static PyObject *
gc_unfreeze_impl(PyObject * module)1924 gc_unfreeze_impl(PyObject *module)
1925 /*[clinic end generated code: output=1c15f2043b25e169 input=2dd52b170f4cef6c]*/
1926 {
1927     GCState *gcstate = get_gc_state();
1928     gc_list_merge(&gcstate->permanent_generation.head,
1929                   GEN_HEAD(gcstate, NUM_GENERATIONS-1));
1930     Py_RETURN_NONE;
1931 }
1932 
1933 /*[clinic input]
1934 gc.get_freeze_count -> Py_ssize_t
1935 
1936 Return the number of objects in the permanent generation.
1937 [clinic start generated code]*/
1938 
1939 static Py_ssize_t
gc_get_freeze_count_impl(PyObject * module)1940 gc_get_freeze_count_impl(PyObject *module)
1941 /*[clinic end generated code: output=61cbd9f43aa032e1 input=45ffbc65cfe2a6ed]*/
1942 {
1943     GCState *gcstate = get_gc_state();
1944     return gc_list_size(&gcstate->permanent_generation.head);
1945 }
1946 
1947 
1948 PyDoc_STRVAR(gc__doc__,
1949 "This module provides access to the garbage collector for reference cycles.\n"
1950 "\n"
1951 "enable() -- Enable automatic garbage collection.\n"
1952 "disable() -- Disable automatic garbage collection.\n"
1953 "isenabled() -- Returns true if automatic collection is enabled.\n"
1954 "collect() -- Do a full collection right now.\n"
1955 "get_count() -- Return the current collection counts.\n"
1956 "get_stats() -- Return list of dictionaries containing per-generation stats.\n"
1957 "set_debug() -- Set debugging flags.\n"
1958 "get_debug() -- Get debugging flags.\n"
1959 "set_threshold() -- Set the collection thresholds.\n"
1960 "get_threshold() -- Return the current the collection thresholds.\n"
1961 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1962 "is_tracked() -- Returns true if a given object is tracked.\n"
1963 "is_finalized() -- Returns true if a given object has been already finalized.\n"
1964 "get_referrers() -- Return the list of objects that refer to an object.\n"
1965 "get_referents() -- Return the list of objects that an object refers to.\n"
1966 "freeze() -- Freeze all tracked objects and ignore them for future collections.\n"
1967 "unfreeze() -- Unfreeze all objects in the permanent generation.\n"
1968 "get_freeze_count() -- Return the number of objects in the permanent generation.\n");
1969 
1970 static PyMethodDef GcMethods[] = {
1971     GC_ENABLE_METHODDEF
1972     GC_DISABLE_METHODDEF
1973     GC_ISENABLED_METHODDEF
1974     GC_SET_DEBUG_METHODDEF
1975     GC_GET_DEBUG_METHODDEF
1976     GC_GET_COUNT_METHODDEF
1977     {"set_threshold",  gc_set_threshold, METH_VARARGS, gc_set_thresh__doc__},
1978     GC_GET_THRESHOLD_METHODDEF
1979     GC_COLLECT_METHODDEF
1980     GC_GET_OBJECTS_METHODDEF
1981     GC_GET_STATS_METHODDEF
1982     GC_IS_TRACKED_METHODDEF
1983     GC_IS_FINALIZED_METHODDEF
1984     {"get_referrers",  gc_get_referrers, METH_VARARGS,
1985         gc_get_referrers__doc__},
1986     {"get_referents",  gc_get_referents, METH_VARARGS,
1987         gc_get_referents__doc__},
1988     GC_FREEZE_METHODDEF
1989     GC_UNFREEZE_METHODDEF
1990     GC_GET_FREEZE_COUNT_METHODDEF
1991     {NULL,      NULL}           /* Sentinel */
1992 };
1993 
1994 static int
gcmodule_exec(PyObject * module)1995 gcmodule_exec(PyObject *module)
1996 {
1997     GCState *gcstate = get_gc_state();
1998 
1999     /* garbage and callbacks are initialized by _PyGC_Init() early in
2000      * interpreter lifecycle. */
2001     assert(gcstate->garbage != NULL);
2002     if (PyModule_AddObjectRef(module, "garbage", gcstate->garbage) < 0) {
2003         return -1;
2004     }
2005     assert(gcstate->callbacks != NULL);
2006     if (PyModule_AddObjectRef(module, "callbacks", gcstate->callbacks) < 0) {
2007         return -1;
2008     }
2009 
2010 #define ADD_INT(NAME) if (PyModule_AddIntConstant(module, #NAME, NAME) < 0) { return -1; }
2011     ADD_INT(DEBUG_STATS);
2012     ADD_INT(DEBUG_COLLECTABLE);
2013     ADD_INT(DEBUG_UNCOLLECTABLE);
2014     ADD_INT(DEBUG_SAVEALL);
2015     ADD_INT(DEBUG_LEAK);
2016 #undef ADD_INT
2017     return 0;
2018 }
2019 
2020 static PyModuleDef_Slot gcmodule_slots[] = {
2021     {Py_mod_exec, gcmodule_exec},
2022     {0, NULL}
2023 };
2024 
2025 static struct PyModuleDef gcmodule = {
2026     PyModuleDef_HEAD_INIT,
2027     .m_name = "gc",
2028     .m_doc = gc__doc__,
2029     .m_size = 0,  // per interpreter state, see: get_gc_state()
2030     .m_methods = GcMethods,
2031     .m_slots = gcmodule_slots
2032 };
2033 
2034 PyMODINIT_FUNC
PyInit_gc(void)2035 PyInit_gc(void)
2036 {
2037     return PyModuleDef_Init(&gcmodule);
2038 }
2039 
2040 /* C API for controlling the state of the garbage collector */
2041 int
PyGC_Enable(void)2042 PyGC_Enable(void)
2043 {
2044     GCState *gcstate = get_gc_state();
2045     int old_state = gcstate->enabled;
2046     gcstate->enabled = 1;
2047     return old_state;
2048 }
2049 
2050 int
PyGC_Disable(void)2051 PyGC_Disable(void)
2052 {
2053     GCState *gcstate = get_gc_state();
2054     int old_state = gcstate->enabled;
2055     gcstate->enabled = 0;
2056     return old_state;
2057 }
2058 
2059 int
PyGC_IsEnabled(void)2060 PyGC_IsEnabled(void)
2061 {
2062     GCState *gcstate = get_gc_state();
2063     return gcstate->enabled;
2064 }
2065 
2066 /* Public API to invoke gc.collect() from C */
2067 Py_ssize_t
PyGC_Collect(void)2068 PyGC_Collect(void)
2069 {
2070     PyThreadState *tstate = _PyThreadState_GET();
2071     GCState *gcstate = &tstate->interp->gc;
2072 
2073     if (!gcstate->enabled) {
2074         return 0;
2075     }
2076 
2077     Py_ssize_t n;
2078     if (gcstate->collecting) {
2079         /* already collecting, don't do anything */
2080         n = 0;
2081     }
2082     else {
2083         PyObject *exc, *value, *tb;
2084         gcstate->collecting = 1;
2085         _PyErr_Fetch(tstate, &exc, &value, &tb);
2086         n = gc_collect_with_callback(tstate, NUM_GENERATIONS - 1);
2087         _PyErr_Restore(tstate, exc, value, tb);
2088         gcstate->collecting = 0;
2089     }
2090 
2091     return n;
2092 }
2093 
2094 Py_ssize_t
_PyGC_CollectNoFail(PyThreadState * tstate)2095 _PyGC_CollectNoFail(PyThreadState *tstate)
2096 {
2097     /* Ideally, this function is only called on interpreter shutdown,
2098        and therefore not recursively.  Unfortunately, when there are daemon
2099        threads, a daemon thread can start a cyclic garbage collection
2100        during interpreter shutdown (and then never finish it).
2101        See http://bugs.python.org/issue8713#msg195178 for an example.
2102        */
2103     GCState *gcstate = &tstate->interp->gc;
2104     if (gcstate->collecting) {
2105         return 0;
2106     }
2107 
2108     Py_ssize_t n;
2109     gcstate->collecting = 1;
2110     n = gc_collect_main(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1);
2111     gcstate->collecting = 0;
2112     return n;
2113 }
2114 
2115 void
_PyGC_DumpShutdownStats(PyInterpreterState * interp)2116 _PyGC_DumpShutdownStats(PyInterpreterState *interp)
2117 {
2118     GCState *gcstate = &interp->gc;
2119     if (!(gcstate->debug & DEBUG_SAVEALL)
2120         && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
2121         const char *message;
2122         if (gcstate->debug & DEBUG_UNCOLLECTABLE)
2123             message = "gc: %zd uncollectable objects at " \
2124                 "shutdown";
2125         else
2126             message = "gc: %zd uncollectable objects at " \
2127                 "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
2128         /* PyErr_WarnFormat does too many things and we are at shutdown,
2129            the warnings module's dependencies (e.g. linecache) may be gone
2130            already. */
2131         if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2132                                      "gc", NULL, message,
2133                                      PyList_GET_SIZE(gcstate->garbage)))
2134             PyErr_WriteUnraisable(NULL);
2135         if (gcstate->debug & DEBUG_UNCOLLECTABLE) {
2136             PyObject *repr = NULL, *bytes = NULL;
2137             repr = PyObject_Repr(gcstate->garbage);
2138             if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
2139                 PyErr_WriteUnraisable(gcstate->garbage);
2140             else {
2141                 PySys_WriteStderr(
2142                     "      %s\n",
2143                     PyBytes_AS_STRING(bytes)
2144                     );
2145             }
2146             Py_XDECREF(repr);
2147             Py_XDECREF(bytes);
2148         }
2149     }
2150 }
2151 
2152 
2153 static void
gc_fini_untrack(PyGC_Head * list)2154 gc_fini_untrack(PyGC_Head *list)
2155 {
2156     PyGC_Head *gc;
2157     for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(list)) {
2158         PyObject *op = FROM_GC(gc);
2159         _PyObject_GC_UNTRACK(op);
2160         // gh-92036: If a deallocator function expect the object to be tracked
2161         // by the GC (ex: func_dealloc()), it can crash if called on an object
2162         // which is no longer tracked by the GC. Leak one strong reference on
2163         // purpose so the object is never deleted and its deallocator is not
2164         // called.
2165         Py_INCREF(op);
2166     }
2167 }
2168 
2169 
2170 void
_PyGC_Fini(PyInterpreterState * interp)2171 _PyGC_Fini(PyInterpreterState *interp)
2172 {
2173     GCState *gcstate = &interp->gc;
2174     Py_CLEAR(gcstate->garbage);
2175     Py_CLEAR(gcstate->callbacks);
2176 
2177     if (!_Py_IsMainInterpreter(interp)) {
2178         // bpo-46070: Explicitly untrack all objects currently tracked by the
2179         // GC. Otherwise, if an object is used later by another interpreter,
2180         // calling PyObject_GC_UnTrack() on the object crashs if the previous
2181         // or the next object of the PyGC_Head structure became a dangling
2182         // pointer.
2183         for (int i = 0; i < NUM_GENERATIONS; i++) {
2184             PyGC_Head *gen = GEN_HEAD(gcstate, i);
2185             gc_fini_untrack(gen);
2186         }
2187     }
2188 }
2189 
2190 /* for debugging */
2191 void
_PyGC_Dump(PyGC_Head * g)2192 _PyGC_Dump(PyGC_Head *g)
2193 {
2194     _PyObject_Dump(FROM_GC(g));
2195 }
2196 
2197 
2198 #ifdef Py_DEBUG
2199 static int
visit_validate(PyObject * op,void * parent_raw)2200 visit_validate(PyObject *op, void *parent_raw)
2201 {
2202     PyObject *parent = _PyObject_CAST(parent_raw);
2203     if (_PyObject_IsFreed(op)) {
2204         _PyObject_ASSERT_FAILED_MSG(parent,
2205                                     "PyObject_GC_Track() object is not valid");
2206     }
2207     return 0;
2208 }
2209 #endif
2210 
2211 
2212 /* extension modules might be compiled with GC support so these
2213    functions must always be available */
2214 
2215 void
PyObject_GC_Track(void * op_raw)2216 PyObject_GC_Track(void *op_raw)
2217 {
2218     PyObject *op = _PyObject_CAST(op_raw);
2219     if (_PyObject_GC_IS_TRACKED(op)) {
2220         _PyObject_ASSERT_FAILED_MSG(op,
2221                                     "object already tracked "
2222                                     "by the garbage collector");
2223     }
2224     _PyObject_GC_TRACK(op);
2225 
2226 #ifdef Py_DEBUG
2227     /* Check that the object is valid: validate objects traversed
2228        by tp_traverse() */
2229     traverseproc traverse = Py_TYPE(op)->tp_traverse;
2230     (void)traverse(op, visit_validate, op);
2231 #endif
2232 }
2233 
2234 void
PyObject_GC_UnTrack(void * op_raw)2235 PyObject_GC_UnTrack(void *op_raw)
2236 {
2237     PyObject *op = _PyObject_CAST(op_raw);
2238     /* Obscure:  the Py_TRASHCAN mechanism requires that we be able to
2239      * call PyObject_GC_UnTrack twice on an object.
2240      */
2241     if (_PyObject_GC_IS_TRACKED(op)) {
2242         _PyObject_GC_UNTRACK(op);
2243     }
2244 }
2245 
2246 int
PyObject_IS_GC(PyObject * obj)2247 PyObject_IS_GC(PyObject *obj)
2248 {
2249     return _PyObject_IS_GC(obj);
2250 }
2251 
2252 void
_PyObject_GC_Link(PyObject * op)2253 _PyObject_GC_Link(PyObject *op)
2254 {
2255     PyGC_Head *g = AS_GC(op);
2256     assert(((uintptr_t)g & (sizeof(uintptr_t)-1)) == 0);  // g must be correctly aligned
2257 
2258     PyThreadState *tstate = _PyThreadState_GET();
2259     GCState *gcstate = &tstate->interp->gc;
2260     g->_gc_next = 0;
2261     g->_gc_prev = 0;
2262     gcstate->generations[0].count++; /* number of allocated GC objects */
2263     if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
2264         gcstate->enabled &&
2265         gcstate->generations[0].threshold &&
2266         !gcstate->collecting &&
2267         !_PyErr_Occurred(tstate))
2268     {
2269         gcstate->collecting = 1;
2270         gc_collect_generations(tstate);
2271         gcstate->collecting = 0;
2272     }
2273 }
2274 
2275 static PyObject *
gc_alloc(size_t basicsize,size_t presize)2276 gc_alloc(size_t basicsize, size_t presize)
2277 {
2278     PyThreadState *tstate = _PyThreadState_GET();
2279     if (basicsize > PY_SSIZE_T_MAX - presize) {
2280         return _PyErr_NoMemory(tstate);
2281     }
2282     size_t size = presize + basicsize;
2283     char *mem = PyObject_Malloc(size);
2284     if (mem == NULL) {
2285         return _PyErr_NoMemory(tstate);
2286     }
2287     ((PyObject **)mem)[0] = NULL;
2288     ((PyObject **)mem)[1] = NULL;
2289     PyObject *op = (PyObject *)(mem + presize);
2290     _PyObject_GC_Link(op);
2291     return op;
2292 }
2293 
2294 PyObject *
_PyObject_GC_New(PyTypeObject * tp)2295 _PyObject_GC_New(PyTypeObject *tp)
2296 {
2297     size_t presize = _PyType_PreHeaderSize(tp);
2298     PyObject *op = gc_alloc(_PyObject_SIZE(tp), presize);
2299     if (op == NULL) {
2300         return NULL;
2301     }
2302     _PyObject_Init(op, tp);
2303     return op;
2304 }
2305 
2306 PyVarObject *
_PyObject_GC_NewVar(PyTypeObject * tp,Py_ssize_t nitems)2307 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2308 {
2309     size_t size;
2310     PyVarObject *op;
2311 
2312     if (nitems < 0) {
2313         PyErr_BadInternalCall();
2314         return NULL;
2315     }
2316     size_t presize = _PyType_PreHeaderSize(tp);
2317     size = _PyObject_VAR_SIZE(tp, nitems);
2318     op = (PyVarObject *)gc_alloc(size, presize);
2319     if (op == NULL) {
2320         return NULL;
2321     }
2322     _PyObject_InitVar(op, tp, nitems);
2323     return op;
2324 }
2325 
2326 PyVarObject *
_PyObject_GC_Resize(PyVarObject * op,Py_ssize_t nitems)2327 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2328 {
2329     const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2330     _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2331     if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) {
2332         return (PyVarObject *)PyErr_NoMemory();
2333     }
2334 
2335     PyGC_Head *g = AS_GC(op);
2336     g = (PyGC_Head *)PyObject_Realloc(g,  sizeof(PyGC_Head) + basicsize);
2337     if (g == NULL)
2338         return (PyVarObject *)PyErr_NoMemory();
2339     op = (PyVarObject *) FROM_GC(g);
2340     Py_SET_SIZE(op, nitems);
2341     return op;
2342 }
2343 
2344 void
PyObject_GC_Del(void * op)2345 PyObject_GC_Del(void *op)
2346 {
2347     size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type);
2348     PyGC_Head *g = AS_GC(op);
2349     if (_PyObject_GC_IS_TRACKED(op)) {
2350 #ifdef Py_DEBUG
2351         if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2352                                      "gc", NULL, "Object of type %s is not untracked before destruction",
2353                                      ((PyObject*)op)->ob_type->tp_name)) {
2354             PyErr_WriteUnraisable(NULL);
2355         }
2356 #endif
2357         gc_list_remove(g);
2358     }
2359     GCState *gcstate = get_gc_state();
2360     if (gcstate->generations[0].count > 0) {
2361         gcstate->generations[0].count--;
2362     }
2363     PyObject_Free(((char *)op)-presize);
2364 }
2365 
2366 int
PyObject_GC_IsTracked(PyObject * obj)2367 PyObject_GC_IsTracked(PyObject* obj)
2368 {
2369     if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2370         return 1;
2371     }
2372     return 0;
2373 }
2374 
2375 int
PyObject_GC_IsFinalized(PyObject * obj)2376 PyObject_GC_IsFinalized(PyObject *obj)
2377 {
2378     if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
2379          return 1;
2380     }
2381     return 0;
2382 }
2383