xref: /aosp_15_r20/external/pytorch/torch/csrc/dynamo/cpython_defs.c (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <torch/csrc/dynamo/cpython_defs.h>
2 #include <torch/csrc/dynamo/cpython_includes.h>
3 #include <torch/csrc/dynamo/debug_macros.h>
4 
5 #if IS_PYTHON_3_11_PLUS
6 
7 #define Py_BUILD_CORE
8 #define NEED_OPCODE_TABLES // To get _PyOpcode_Deopt, _PyOpcode_Caches
9 
10 #if IS_PYTHON_3_13_PLUS
11 #include <cpython/code.h> // To get PyUnstable_Code_GetFirstFree
12 #define NEED_OPCODE_METADATA
13 #include <internal/pycore_opcode_metadata.h>
14 #undef NEED_OPCODE_METADATA
15 #else
16 #include <internal/pycore_opcode.h>
17 #endif
18 
19 #undef NEED_OPCODE_TABLES
20 #undef Py_BUILD_CORE
21 
22 // As a simple way to reduce the impact of ABI changes on the CPython side, this check forces
23 // us to manually re-check that the function didn't change on the next major version
24 #if IS_PYTHON_3_14_PLUS
25 #error "Please ensure that the functions below still match the CPython implementation for 3.14"
26 #endif
27 
28 // https://github.com/python/cpython/blob/a7715ccfba5b86ab09f86ec56ac3755c93b46b48/Objects/frameobject.c#L1079
29 static int
THP_PyFrame_OpAlreadyRan(_PyInterpreterFrame * frame,int opcode,int oparg)30 THP_PyFrame_OpAlreadyRan(_PyInterpreterFrame *frame, int opcode, int oparg)
31 {
32     // This only works when opcode is a non-quickened form:
33     CHECK(_PyOpcode_Deopt[opcode] == opcode);
34     int check_oparg = 0;
35     for (_Py_CODEUNIT *instruction = _PyCode_CODE(F_CODE(frame));
36          instruction < PREV_INSTR(frame) ; instruction++)
37     {
38         int check_opcode = _PyOpcode_Deopt[_Py_OPCODE(*instruction)];
39         check_oparg |= _Py_OPARG(*instruction);
40         if (check_opcode == opcode && check_oparg == oparg) {
41             return 1;
42         }
43         if (check_opcode == EXTENDED_ARG) {
44             check_oparg <<= 8;
45         }
46         else {
47             check_oparg = 0;
48         }
49         instruction += _PyOpcode_Caches[check_opcode];
50     }
51     return 0;
52 }
53 
54 #if IS_PYTHON_3_12_PLUS
55 
56 int
THP_PyFrame_FastToLocalsWithError(_PyInterpreterFrame * frame,int * free_vars_copied)57 THP_PyFrame_FastToLocalsWithError(_PyInterpreterFrame *frame, int *free_vars_copied)
58 {
59     // functionality moved to framelocals_mapping.cpp
60     return 0;
61 }
62 
63 #else
64 
65 // https://github.com/python/cpython/blob/a7715ccfba5b86ab09f86ec56ac3755c93b46b48/Objects/frameobject.c#L1182
66 // free_vars_copied argument added in order to let caller know that the COPY_FREE_VARS
67 // codepath occurred.
68 int
THP_PyFrame_FastToLocalsWithError(_PyInterpreterFrame * frame,int * free_vars_copied)69 THP_PyFrame_FastToLocalsWithError(_PyInterpreterFrame *frame, int *free_vars_copied) {
70     /* Merge fast locals into f->f_locals */
71     PyObject *locals = NULL;
72     PyObject **fast = NULL;
73     PyCodeObject *co = NULL;
74     locals = frame->f_locals;
75     if (locals == NULL) {
76         locals = frame->f_locals = PyDict_New();
77         if (locals == NULL)
78             return -1;
79     }
80     co = F_CODE(frame);
81     fast = _PyFrame_GetLocalsArray(frame);
82     // COPY_FREE_VARS has no quickened forms, so no need to use _PyOpcode_Deopt
83     // here:
84     int lasti = _PyInterpreterFrame_LASTI(frame);
85     if (lasti < 0 && _Py_OPCODE(_PyCode_CODE(co)[0]) == COPY_FREE_VARS) {
86         /* Free vars have not been initialized -- Do that */
87         PyCodeObject *co = F_CODE(frame);
88         PyObject *closure = frame->f_func->func_closure;
89         int offset = co->co_nlocals + co->co_nplaincellvars;
90         for (int i = 0; i < co->co_nfreevars; ++i) {
91             PyObject *o = PyTuple_GET_ITEM(closure, i);
92             Py_INCREF(o);
93             frame->localsplus[offset + i] = o;
94         }
95         // COPY_FREE_VARS doesn't have inline CACHEs, either:
96         PREV_INSTR(frame) = _PyCode_CODE(F_CODE(frame));
97 
98         *free_vars_copied = 1;
99     }
100     for (int i = 0; i < co->co_nlocalsplus; i++) {
101         _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
102 
103         /* If the namespace is unoptimized, then one of the
104            following cases applies:
105            1. It does not contain free variables, because it
106               uses import * or is a top-level namespace.
107            2. It is a class namespace.
108            We don't want to accidentally copy free variables
109            into the locals dict used by the class.
110         */
111         if (kind & CO_FAST_FREE && !(co->co_flags & CO_OPTIMIZED)) {
112             continue;
113         }
114 
115         PyObject *name = PyTuple_GET_ITEM(co->co_localsplusnames, i);
116         PyObject *value = fast[i];
117         if (frame->stacktop) {
118             if (kind & CO_FAST_FREE) {
119                 // The cell was set by COPY_FREE_VARS.
120                 CHECK(value != NULL && PyCell_Check(value));
121                 value = PyCell_GET(value);
122             }
123             else if (kind & CO_FAST_CELL) {
124                 // Note that no *_DEREF ops can happen before MAKE_CELL
125                 // executes.  So there's no need to duplicate the work
126                 // that MAKE_CELL would otherwise do later, if it hasn't
127                 // run yet.
128                 if (value != NULL) {
129                     if (PyCell_Check(value) &&
130                             THP_PyFrame_OpAlreadyRan(frame, MAKE_CELL, i)) {
131                         // (likely) MAKE_CELL must have executed already.
132                         value = PyCell_GET(value);
133                     }
134                     // (likely) Otherwise it it is an arg (kind & CO_FAST_LOCAL),
135                     // with the initial value set when the frame was created...
136                     // (unlikely) ...or it was set to some initial value by
137                     // an earlier call to PyFrame_LocalsToFast().
138                 }
139             }
140         }
141         else {
142             CHECK(value == NULL);
143         }
144         if (value == NULL) {
145             if (PyObject_DelItem(locals, name) != 0) {
146                 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
147                     PyErr_Clear();
148                 }
149                 else {
150                     return -1;
151                 }
152             }
153         }
154         else {
155             if (PyObject_SetItem(locals, name, value) != 0) {
156                 return -1;
157             }
158         }
159     }
160     return 0;
161 }
162 
163 #endif
164 
165 // e.g. COPY_FIELD(op, o, globals) becomes
166 // PY_XINCREF((o)->func_globals);
167 // (op)->func_globals = (o)->func_globals;
168 #define COPY_FIELD(f1, f2, field) \
169   Py_XINCREF((f2)->func_##field); \
170   (f1)->func_##field = (f2)->func_##field;
171 
172 // Not actually copied from CPython, but loosely based on
173 // https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Objects/funcobject.c
174 // Makes a new PyFunctionObject copy of `o`, but with the code object fields
175 // determined from `code`.
176 // Ensure that all fields defined in the PyFunctionObject struct in
177 // https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Include/cpython/funcobject.h
178 // are accounted for.
179 PyFunctionObject *
_PyFunction_CopyWithNewCode(PyFunctionObject * o,PyCodeObject * code)180 _PyFunction_CopyWithNewCode(PyFunctionObject *o, PyCodeObject* code)
181 {
182   PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type);
183   if (op == NULL) {
184     return NULL;
185   }
186   Py_XINCREF(code);
187   op->func_code = (PyObject *) code;
188   Py_XINCREF(code->co_name);
189   op->func_name = code->co_name;
190   Py_XINCREF(code->co_qualname);
191   op->func_qualname = code->co_qualname;
192   COPY_FIELD(op, o, globals);
193   COPY_FIELD(op, o, builtins);
194   COPY_FIELD(op, o, defaults);
195   COPY_FIELD(op, o, kwdefaults);
196   COPY_FIELD(op, o, closure);
197   COPY_FIELD(op, o, doc);
198   COPY_FIELD(op, o, dict);
199   op->func_weakreflist = NULL;
200   COPY_FIELD(op, o, module);
201   COPY_FIELD(op, o, annotations);
202   #if IS_PYTHON_3_12_PLUS
203   COPY_FIELD(op, o, typeparams);
204   #endif
205   op->vectorcall = o->vectorcall;
206   op->func_version = o->func_version;
207   PyObject_GC_Track(op);
208   return op;
209 }
210 
211 // From https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Objects/frameobject.c#L1020
212 PyFrameObject*
THP_PyFrame_New_NoTrack(const PyCodeObject * code)213 THP_PyFrame_New_NoTrack(const PyCodeObject *code)
214 {
215     // DYNAMO: commented out
216     // CALL_STAT_INC(frame_objects_created);
217     int slots = code->co_nlocalsplus + code->co_stacksize;
218     PyFrameObject *f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, slots);
219     if (f == NULL) {
220         return NULL;
221     }
222     f->f_back = NULL;
223     f->f_trace = NULL;
224     f->f_trace_lines = 1;
225     f->f_trace_opcodes = 0;
226 #if IS_PYTHON_3_13_PLUS
227     f->f_extra_locals = NULL;
228 #else
229     f->f_fast_as_locals = 0;
230 #endif
231     f->f_lineno = 0;
232     return f;
233 }
234 
235 // From https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Python/frame.c#L27
236 PyFrameObject *
THP_PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame * frame)237 THP_PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame *frame)
238 {
239     CHECK(frame->frame_obj == NULL);
240     PyObject *error_type = NULL, *error_value = NULL, *error_traceback = NULL;
241     PyErr_Fetch(&error_type, &error_value, &error_traceback);
242 
243     PyFrameObject *f = THP_PyFrame_New_NoTrack(F_CODE(frame));
244     if (f == NULL) {
245         Py_XDECREF(error_type);
246         Py_XDECREF(error_value);
247         Py_XDECREF(error_traceback);
248         return NULL;
249     }
250     PyErr_Restore(error_type, error_value, error_traceback);
251     if (frame->frame_obj) {
252         // GH-97002: How did we get into this horrible situation? Most likely,
253         // allocating f triggered a GC collection, which ran some code that
254         // *also* created the same frame... while we were in the middle of
255         // creating it! See test_sneaky_frame_object in test_frame.py for a
256         // concrete example.
257         //
258         // Regardless, just throw f away and use that frame instead, since it's
259         // already been exposed to user code. It's actually a bit tricky to do
260         // this, since we aren't backed by a real _PyInterpreterFrame anymore.
261         // Just pretend that we have an owned, cleared frame so frame_dealloc
262         // doesn't make the situation worse:
263         f->f_frame = (_PyInterpreterFrame *)f->_f_frame_data;
264         f->f_frame->owner = FRAME_CLEARED;
265         f->f_frame->frame_obj = f;
266         Py_DECREF(f);
267         return frame->frame_obj;
268     }
269     CHECK(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT);
270     CHECK(frame->owner != FRAME_CLEARED);
271     f->f_frame = frame;
272     frame->frame_obj = f;
273     return f;
274 }
275 
276 // From https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Include/internal/pycore_frame.h#L163
277 static inline PyFrameObject *
THP_PyFrame_GetFrameObject(_PyInterpreterFrame * frame)278 THP_PyFrame_GetFrameObject(_PyInterpreterFrame *frame)
279 {
280 
281     CHECK(!_PyFrame_IsIncomplete(frame));
282     PyFrameObject *res =  frame->frame_obj;
283     if (res != NULL) {
284         return res;
285     }
286     return THP_PyFrame_MakeAndSetFrameObject(frame);
287 }
288 
289 // From https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Python/frame.c#L79
290 static void
THP_take_ownership(PyFrameObject * f,_PyInterpreterFrame * frame)291 THP_take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame)
292 {
293     CHECK(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT);
294     CHECK(frame->owner != FRAME_CLEARED);
295     Py_ssize_t size = ((char*)&frame->localsplus[frame->stacktop]) - (char *)frame;
296     memcpy((_PyInterpreterFrame *)f->_f_frame_data, frame, size);
297     frame = (_PyInterpreterFrame *)f->_f_frame_data;
298     f->f_frame = frame;
299     frame->owner = FRAME_OWNED_BY_FRAME_OBJECT;
300     if (_PyFrame_IsIncomplete(frame)) {
301         // This may be a newly-created generator or coroutine frame. Since it's
302         // dead anyways, just pretend that the first RESUME ran:
303         PyCodeObject *code = F_CODE(frame);
304         PREV_INSTR(frame) = _PyCode_CODE(code) + code->_co_firsttraceable;
305     }
306     CHECK(!_PyFrame_IsIncomplete(frame));
307     CHECK(f->f_back == NULL);
308     _PyInterpreterFrame *prev = frame->previous;
309     while (prev && _PyFrame_IsIncomplete(prev)) {
310         prev = prev->previous;
311     }
312     if (prev) {
313         /* Link PyFrameObjects.f_back and remove link through _PyInterpreterFrame.previous */
314         PyFrameObject *back = THP_PyFrame_GetFrameObject(prev);
315         if (back == NULL) {
316             /* Memory error here. */
317             CHECK(PyErr_ExceptionMatches(PyExc_MemoryError));
318             /* Nothing we can do about it */
319             PyErr_Clear();
320         }
321         else {
322             f->f_back = (PyFrameObject *)Py_NewRef(back);
323         }
324         frame->previous = NULL;
325     }
326     // DYNAMO: use public GC functions instead of internal ones
327     if (!PyObject_GC_IsTracked((PyObject *) f)) {
328         PyObject_GC_Track((PyObject *) f);
329     }
330 }
331 
332 // From https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Python/frame.c#L120
333 void
THP_PyFrame_Clear(_PyInterpreterFrame * frame)334 THP_PyFrame_Clear(_PyInterpreterFrame *frame)
335 {
336     /* It is the responsibility of the owning generator/coroutine
337      * to have cleared the enclosing generator, if any. */
338     CHECK(frame->owner != FRAME_OWNED_BY_GENERATOR ||
339         _PyFrame_GetGenerator(frame)->gi_frame_state == FRAME_CLEARED);
340     // GH-99729: Clearing this frame can expose the stack (via finalizers). It's
341     // crucial that this frame has been unlinked, and is no longer visible:
342 #if IS_PYTHON_3_13_PLUS
343     CHECK(_PyThreadState_GET()->current_frame != frame);
344 #else
345     CHECK(_PyThreadState_GET()->cframe->current_frame != frame);
346 #endif
347     if (frame->frame_obj) {
348         PyFrameObject *f = frame->frame_obj;
349         frame->frame_obj = NULL;
350         if (Py_REFCNT(f) > 1) {
351             THP_take_ownership(f, frame);
352             Py_DECREF(f);
353             return;
354         }
355         Py_DECREF(f);
356     }
357     CHECK(frame->stacktop >= 0);
358     for (int i = 0; i < frame->stacktop; i++) {
359         Py_XDECREF(frame->localsplus[i]);
360     }
361     Py_XDECREF(frame->frame_obj);
362     Py_XDECREF(frame->f_locals);
363     // DYNAMO: additional field for 3.12
364     #if IS_PYTHON_3_12_PLUS
365     Py_DECREF(frame->f_funcobj);
366     #else
367     Py_DECREF(frame->f_func);
368     #endif
369     Py_DECREF(F_CODE(frame));
370 }
371 
372 // https://github.com/python/cpython/blob/fad48ea1816be3125ea51edcdfe2f999d6ade796/Objects/obmalloc.c#L635
373 void *
THP_PyObject_VirtualAlloc(size_t size)374 THP_PyObject_VirtualAlloc(size_t size)
375 {
376     PyObjectArenaAllocator arena;
377     PyObject_GetArenaAllocator(&arena);
378     return arena.alloc(arena.ctx, size);
379 }
380 
381 // https://github.com/python/cpython/blob/fad48ea1816be3125ea51edcdfe2f999d6ade796/Objects/obmalloc.c#L641
382 void
THP_PyObject_VirtualFree(void * obj,size_t size)383 THP_PyObject_VirtualFree(void *obj, size_t size)
384 {
385     PyObjectArenaAllocator arena;
386     PyObject_GetArenaAllocator(&arena);
387     return arena.free(arena.ctx, obj, size);
388 }
389 
390 // https://github.com/python/cpython/blob/051b8a2589ff28f0194c3701b21f729444691752/Python/pystate.c#L728
391 static _PyStackChunk*
allocate_chunk(int size_in_bytes,_PyStackChunk * previous)392 allocate_chunk(int size_in_bytes, _PyStackChunk* previous)
393 {
394     CHECK(size_in_bytes % sizeof(PyObject **) == 0);
395     _PyStackChunk *res = THP_PyObject_VirtualAlloc(size_in_bytes);
396     if (res == NULL) {
397         return NULL;
398     }
399     res->previous = previous;
400     res->size = size_in_bytes;
401     res->top = 0;
402     return res;
403 }
404 
405 #define DATA_STACK_CHUNK_SIZE (16*1024)
406 #define MINIMUM_OVERHEAD 1000
407 
408 // https://github.com/python/cpython/blob/051b8a2589ff28f0194c3701b21f729444691752/Python/pystate.c#L2182
409 static PyObject **
push_chunk(PyThreadState * tstate,int size)410 push_chunk(PyThreadState *tstate, int size)
411 {
412     int allocate_size = DATA_STACK_CHUNK_SIZE;
413     while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) {
414         allocate_size *= 2;
415     }
416     _PyStackChunk *new = allocate_chunk(allocate_size, tstate->datastack_chunk);
417     if (new == NULL) {
418         return NULL;
419     }
420     if (tstate->datastack_chunk) {
421         tstate->datastack_chunk->top = tstate->datastack_top -
422                                        &tstate->datastack_chunk->data[0];
423     }
424     tstate->datastack_chunk = new;
425     tstate->datastack_limit = (PyObject **)(((char *)new) + allocate_size);
426     // When new is the "root" chunk (i.e. new->previous == NULL), we can keep
427     // _PyThreadState_PopFrame from freeing it later by "skipping" over the
428     // first element:
429     PyObject **res = &new->data[new->previous == NULL];
430     tstate->datastack_top = res + size;
431     return res;
432 }
433 
434 // https://github.com/python/cpython/blob/051b8a2589ff28f0194c3701b21f729444691752/Include/internal/pycore_frame.h#L199
435 static inline bool
THP_PyThreadState_HasStackSpace(PyThreadState * tstate,size_t size)436 THP_PyThreadState_HasStackSpace(PyThreadState *tstate, size_t size)
437 {
438     CHECK(
439         (tstate->datastack_top == NULL && tstate->datastack_limit == NULL)
440         ||
441         (tstate->datastack_top != NULL && tstate->datastack_limit != NULL)
442     );
443     return tstate->datastack_top != NULL &&
444         size < (size_t)(tstate->datastack_limit - tstate->datastack_top);
445 }
446 
447 // https://github.com/python/cpython/blob/051b8a2589ff28f0194c3701b21f729444691752/Python/pystate.c#L2207
448 _PyInterpreterFrame *
THP_PyThreadState_BumpFramePointerSlow(PyThreadState * tstate,size_t size)449 THP_PyThreadState_BumpFramePointerSlow(PyThreadState *tstate, size_t size)
450 {
451     if (THP_PyThreadState_HasStackSpace(tstate, size)) {
452         _PyInterpreterFrame *res = (_PyInterpreterFrame *)tstate->datastack_top;
453         tstate->datastack_top += size;
454         return res;
455     }
456     if (size > INT_MAX/2) {
457         PyErr_NoMemory();
458         return NULL;
459     }
460     return (_PyInterpreterFrame *)push_chunk(tstate, (int)size);
461 }
462 
463 // https://github.com/python/cpython/blob/051b8a2589ff28f0194c3701b21f729444691752/Python/pystate.c#L2222
464 void
THP_PyThreadState_PopFrame(PyThreadState * tstate,_PyInterpreterFrame * frame)465 THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame)
466 {
467     CHECK(tstate->datastack_chunk);
468     PyObject **base = (PyObject **)frame;
469     if (base == &tstate->datastack_chunk->data[0]) {
470         _PyStackChunk *chunk = tstate->datastack_chunk;
471         _PyStackChunk *previous = chunk->previous;
472         // push_chunk ensures that the root chunk is never popped:
473         CHECK(previous);
474         tstate->datastack_top = &previous->data[previous->top];
475         tstate->datastack_chunk = previous;
476         THP_PyObject_VirtualFree(chunk, chunk->size);
477         tstate->datastack_limit = (PyObject **)(((char *)previous) + previous->size);
478     }
479     else {
480         CHECK(tstate->datastack_top);
481         CHECK(tstate->datastack_top >= base);
482         tstate->datastack_top = base;
483     }
484 }
485 
486 
487 #endif
488 
489 #if IS_PYTHON_3_11_PLUS
490 
491 const uint8_t* THP_PyOpcode_Caches = _PyOpcode_Caches;
492 const int THP_PyOpcode_Caches_size = sizeof(_PyOpcode_Caches) / sizeof(uint8_t);
493 
494 #else
495 
496 const uint8_t* THP_PyOpcode_Caches = NULL;
497 const int THP_PyOpcode_Caches_size = 0;
498 
499 #endif
500