1 #include <torch/csrc/dynamo/cpython_defs.h>
2 #include <torch/csrc/dynamo/cpython_includes.h>
3 #include <torch/csrc/dynamo/debug_macros.h>
4
5 #if IS_PYTHON_3_11_PLUS
6
7 #define Py_BUILD_CORE
8 #define NEED_OPCODE_TABLES // To get _PyOpcode_Deopt, _PyOpcode_Caches
9
10 #if IS_PYTHON_3_13_PLUS
11 #include <cpython/code.h> // To get PyUnstable_Code_GetFirstFree
12 #define NEED_OPCODE_METADATA
13 #include <internal/pycore_opcode_metadata.h>
14 #undef NEED_OPCODE_METADATA
15 #else
16 #include <internal/pycore_opcode.h>
17 #endif
18
19 #undef NEED_OPCODE_TABLES
20 #undef Py_BUILD_CORE
21
22 // As a simple way to reduce the impact of ABI changes on the CPython side, this check forces
23 // us to manually re-check that the function didn't change on the next major version
24 #if IS_PYTHON_3_14_PLUS
25 #error "Please ensure that the functions below still match the CPython implementation for 3.14"
26 #endif
27
28 // https://github.com/python/cpython/blob/a7715ccfba5b86ab09f86ec56ac3755c93b46b48/Objects/frameobject.c#L1079
29 static int
THP_PyFrame_OpAlreadyRan(_PyInterpreterFrame * frame,int opcode,int oparg)30 THP_PyFrame_OpAlreadyRan(_PyInterpreterFrame *frame, int opcode, int oparg)
31 {
32 // This only works when opcode is a non-quickened form:
33 CHECK(_PyOpcode_Deopt[opcode] == opcode);
34 int check_oparg = 0;
35 for (_Py_CODEUNIT *instruction = _PyCode_CODE(F_CODE(frame));
36 instruction < PREV_INSTR(frame) ; instruction++)
37 {
38 int check_opcode = _PyOpcode_Deopt[_Py_OPCODE(*instruction)];
39 check_oparg |= _Py_OPARG(*instruction);
40 if (check_opcode == opcode && check_oparg == oparg) {
41 return 1;
42 }
43 if (check_opcode == EXTENDED_ARG) {
44 check_oparg <<= 8;
45 }
46 else {
47 check_oparg = 0;
48 }
49 instruction += _PyOpcode_Caches[check_opcode];
50 }
51 return 0;
52 }
53
54 #if IS_PYTHON_3_12_PLUS
55
56 int
THP_PyFrame_FastToLocalsWithError(_PyInterpreterFrame * frame,int * free_vars_copied)57 THP_PyFrame_FastToLocalsWithError(_PyInterpreterFrame *frame, int *free_vars_copied)
58 {
59 // functionality moved to framelocals_mapping.cpp
60 return 0;
61 }
62
63 #else
64
65 // https://github.com/python/cpython/blob/a7715ccfba5b86ab09f86ec56ac3755c93b46b48/Objects/frameobject.c#L1182
66 // free_vars_copied argument added in order to let caller know that the COPY_FREE_VARS
67 // codepath occurred.
68 int
THP_PyFrame_FastToLocalsWithError(_PyInterpreterFrame * frame,int * free_vars_copied)69 THP_PyFrame_FastToLocalsWithError(_PyInterpreterFrame *frame, int *free_vars_copied) {
70 /* Merge fast locals into f->f_locals */
71 PyObject *locals = NULL;
72 PyObject **fast = NULL;
73 PyCodeObject *co = NULL;
74 locals = frame->f_locals;
75 if (locals == NULL) {
76 locals = frame->f_locals = PyDict_New();
77 if (locals == NULL)
78 return -1;
79 }
80 co = F_CODE(frame);
81 fast = _PyFrame_GetLocalsArray(frame);
82 // COPY_FREE_VARS has no quickened forms, so no need to use _PyOpcode_Deopt
83 // here:
84 int lasti = _PyInterpreterFrame_LASTI(frame);
85 if (lasti < 0 && _Py_OPCODE(_PyCode_CODE(co)[0]) == COPY_FREE_VARS) {
86 /* Free vars have not been initialized -- Do that */
87 PyCodeObject *co = F_CODE(frame);
88 PyObject *closure = frame->f_func->func_closure;
89 int offset = co->co_nlocals + co->co_nplaincellvars;
90 for (int i = 0; i < co->co_nfreevars; ++i) {
91 PyObject *o = PyTuple_GET_ITEM(closure, i);
92 Py_INCREF(o);
93 frame->localsplus[offset + i] = o;
94 }
95 // COPY_FREE_VARS doesn't have inline CACHEs, either:
96 PREV_INSTR(frame) = _PyCode_CODE(F_CODE(frame));
97
98 *free_vars_copied = 1;
99 }
100 for (int i = 0; i < co->co_nlocalsplus; i++) {
101 _PyLocals_Kind kind = _PyLocals_GetKind(co->co_localspluskinds, i);
102
103 /* If the namespace is unoptimized, then one of the
104 following cases applies:
105 1. It does not contain free variables, because it
106 uses import * or is a top-level namespace.
107 2. It is a class namespace.
108 We don't want to accidentally copy free variables
109 into the locals dict used by the class.
110 */
111 if (kind & CO_FAST_FREE && !(co->co_flags & CO_OPTIMIZED)) {
112 continue;
113 }
114
115 PyObject *name = PyTuple_GET_ITEM(co->co_localsplusnames, i);
116 PyObject *value = fast[i];
117 if (frame->stacktop) {
118 if (kind & CO_FAST_FREE) {
119 // The cell was set by COPY_FREE_VARS.
120 CHECK(value != NULL && PyCell_Check(value));
121 value = PyCell_GET(value);
122 }
123 else if (kind & CO_FAST_CELL) {
124 // Note that no *_DEREF ops can happen before MAKE_CELL
125 // executes. So there's no need to duplicate the work
126 // that MAKE_CELL would otherwise do later, if it hasn't
127 // run yet.
128 if (value != NULL) {
129 if (PyCell_Check(value) &&
130 THP_PyFrame_OpAlreadyRan(frame, MAKE_CELL, i)) {
131 // (likely) MAKE_CELL must have executed already.
132 value = PyCell_GET(value);
133 }
134 // (likely) Otherwise it it is an arg (kind & CO_FAST_LOCAL),
135 // with the initial value set when the frame was created...
136 // (unlikely) ...or it was set to some initial value by
137 // an earlier call to PyFrame_LocalsToFast().
138 }
139 }
140 }
141 else {
142 CHECK(value == NULL);
143 }
144 if (value == NULL) {
145 if (PyObject_DelItem(locals, name) != 0) {
146 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
147 PyErr_Clear();
148 }
149 else {
150 return -1;
151 }
152 }
153 }
154 else {
155 if (PyObject_SetItem(locals, name, value) != 0) {
156 return -1;
157 }
158 }
159 }
160 return 0;
161 }
162
163 #endif
164
165 // e.g. COPY_FIELD(op, o, globals) becomes
166 // PY_XINCREF((o)->func_globals);
167 // (op)->func_globals = (o)->func_globals;
168 #define COPY_FIELD(f1, f2, field) \
169 Py_XINCREF((f2)->func_##field); \
170 (f1)->func_##field = (f2)->func_##field;
171
172 // Not actually copied from CPython, but loosely based on
173 // https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Objects/funcobject.c
174 // Makes a new PyFunctionObject copy of `o`, but with the code object fields
175 // determined from `code`.
176 // Ensure that all fields defined in the PyFunctionObject struct in
177 // https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Include/cpython/funcobject.h
178 // are accounted for.
179 PyFunctionObject *
_PyFunction_CopyWithNewCode(PyFunctionObject * o,PyCodeObject * code)180 _PyFunction_CopyWithNewCode(PyFunctionObject *o, PyCodeObject* code)
181 {
182 PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type);
183 if (op == NULL) {
184 return NULL;
185 }
186 Py_XINCREF(code);
187 op->func_code = (PyObject *) code;
188 Py_XINCREF(code->co_name);
189 op->func_name = code->co_name;
190 Py_XINCREF(code->co_qualname);
191 op->func_qualname = code->co_qualname;
192 COPY_FIELD(op, o, globals);
193 COPY_FIELD(op, o, builtins);
194 COPY_FIELD(op, o, defaults);
195 COPY_FIELD(op, o, kwdefaults);
196 COPY_FIELD(op, o, closure);
197 COPY_FIELD(op, o, doc);
198 COPY_FIELD(op, o, dict);
199 op->func_weakreflist = NULL;
200 COPY_FIELD(op, o, module);
201 COPY_FIELD(op, o, annotations);
202 #if IS_PYTHON_3_12_PLUS
203 COPY_FIELD(op, o, typeparams);
204 #endif
205 op->vectorcall = o->vectorcall;
206 op->func_version = o->func_version;
207 PyObject_GC_Track(op);
208 return op;
209 }
210
211 // From https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Objects/frameobject.c#L1020
212 PyFrameObject*
THP_PyFrame_New_NoTrack(const PyCodeObject * code)213 THP_PyFrame_New_NoTrack(const PyCodeObject *code)
214 {
215 // DYNAMO: commented out
216 // CALL_STAT_INC(frame_objects_created);
217 int slots = code->co_nlocalsplus + code->co_stacksize;
218 PyFrameObject *f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, slots);
219 if (f == NULL) {
220 return NULL;
221 }
222 f->f_back = NULL;
223 f->f_trace = NULL;
224 f->f_trace_lines = 1;
225 f->f_trace_opcodes = 0;
226 #if IS_PYTHON_3_13_PLUS
227 f->f_extra_locals = NULL;
228 #else
229 f->f_fast_as_locals = 0;
230 #endif
231 f->f_lineno = 0;
232 return f;
233 }
234
235 // From https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Python/frame.c#L27
236 PyFrameObject *
THP_PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame * frame)237 THP_PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame *frame)
238 {
239 CHECK(frame->frame_obj == NULL);
240 PyObject *error_type = NULL, *error_value = NULL, *error_traceback = NULL;
241 PyErr_Fetch(&error_type, &error_value, &error_traceback);
242
243 PyFrameObject *f = THP_PyFrame_New_NoTrack(F_CODE(frame));
244 if (f == NULL) {
245 Py_XDECREF(error_type);
246 Py_XDECREF(error_value);
247 Py_XDECREF(error_traceback);
248 return NULL;
249 }
250 PyErr_Restore(error_type, error_value, error_traceback);
251 if (frame->frame_obj) {
252 // GH-97002: How did we get into this horrible situation? Most likely,
253 // allocating f triggered a GC collection, which ran some code that
254 // *also* created the same frame... while we were in the middle of
255 // creating it! See test_sneaky_frame_object in test_frame.py for a
256 // concrete example.
257 //
258 // Regardless, just throw f away and use that frame instead, since it's
259 // already been exposed to user code. It's actually a bit tricky to do
260 // this, since we aren't backed by a real _PyInterpreterFrame anymore.
261 // Just pretend that we have an owned, cleared frame so frame_dealloc
262 // doesn't make the situation worse:
263 f->f_frame = (_PyInterpreterFrame *)f->_f_frame_data;
264 f->f_frame->owner = FRAME_CLEARED;
265 f->f_frame->frame_obj = f;
266 Py_DECREF(f);
267 return frame->frame_obj;
268 }
269 CHECK(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT);
270 CHECK(frame->owner != FRAME_CLEARED);
271 f->f_frame = frame;
272 frame->frame_obj = f;
273 return f;
274 }
275
276 // From https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Include/internal/pycore_frame.h#L163
277 static inline PyFrameObject *
THP_PyFrame_GetFrameObject(_PyInterpreterFrame * frame)278 THP_PyFrame_GetFrameObject(_PyInterpreterFrame *frame)
279 {
280
281 CHECK(!_PyFrame_IsIncomplete(frame));
282 PyFrameObject *res = frame->frame_obj;
283 if (res != NULL) {
284 return res;
285 }
286 return THP_PyFrame_MakeAndSetFrameObject(frame);
287 }
288
289 // From https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Python/frame.c#L79
290 static void
THP_take_ownership(PyFrameObject * f,_PyInterpreterFrame * frame)291 THP_take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame)
292 {
293 CHECK(frame->owner != FRAME_OWNED_BY_FRAME_OBJECT);
294 CHECK(frame->owner != FRAME_CLEARED);
295 Py_ssize_t size = ((char*)&frame->localsplus[frame->stacktop]) - (char *)frame;
296 memcpy((_PyInterpreterFrame *)f->_f_frame_data, frame, size);
297 frame = (_PyInterpreterFrame *)f->_f_frame_data;
298 f->f_frame = frame;
299 frame->owner = FRAME_OWNED_BY_FRAME_OBJECT;
300 if (_PyFrame_IsIncomplete(frame)) {
301 // This may be a newly-created generator or coroutine frame. Since it's
302 // dead anyways, just pretend that the first RESUME ran:
303 PyCodeObject *code = F_CODE(frame);
304 PREV_INSTR(frame) = _PyCode_CODE(code) + code->_co_firsttraceable;
305 }
306 CHECK(!_PyFrame_IsIncomplete(frame));
307 CHECK(f->f_back == NULL);
308 _PyInterpreterFrame *prev = frame->previous;
309 while (prev && _PyFrame_IsIncomplete(prev)) {
310 prev = prev->previous;
311 }
312 if (prev) {
313 /* Link PyFrameObjects.f_back and remove link through _PyInterpreterFrame.previous */
314 PyFrameObject *back = THP_PyFrame_GetFrameObject(prev);
315 if (back == NULL) {
316 /* Memory error here. */
317 CHECK(PyErr_ExceptionMatches(PyExc_MemoryError));
318 /* Nothing we can do about it */
319 PyErr_Clear();
320 }
321 else {
322 f->f_back = (PyFrameObject *)Py_NewRef(back);
323 }
324 frame->previous = NULL;
325 }
326 // DYNAMO: use public GC functions instead of internal ones
327 if (!PyObject_GC_IsTracked((PyObject *) f)) {
328 PyObject_GC_Track((PyObject *) f);
329 }
330 }
331
332 // From https://github.com/python/cpython/blob/e715da6db1d1d70cd779dc48e1ba8110c51cc1bf/Python/frame.c#L120
333 void
THP_PyFrame_Clear(_PyInterpreterFrame * frame)334 THP_PyFrame_Clear(_PyInterpreterFrame *frame)
335 {
336 /* It is the responsibility of the owning generator/coroutine
337 * to have cleared the enclosing generator, if any. */
338 CHECK(frame->owner != FRAME_OWNED_BY_GENERATOR ||
339 _PyFrame_GetGenerator(frame)->gi_frame_state == FRAME_CLEARED);
340 // GH-99729: Clearing this frame can expose the stack (via finalizers). It's
341 // crucial that this frame has been unlinked, and is no longer visible:
342 #if IS_PYTHON_3_13_PLUS
343 CHECK(_PyThreadState_GET()->current_frame != frame);
344 #else
345 CHECK(_PyThreadState_GET()->cframe->current_frame != frame);
346 #endif
347 if (frame->frame_obj) {
348 PyFrameObject *f = frame->frame_obj;
349 frame->frame_obj = NULL;
350 if (Py_REFCNT(f) > 1) {
351 THP_take_ownership(f, frame);
352 Py_DECREF(f);
353 return;
354 }
355 Py_DECREF(f);
356 }
357 CHECK(frame->stacktop >= 0);
358 for (int i = 0; i < frame->stacktop; i++) {
359 Py_XDECREF(frame->localsplus[i]);
360 }
361 Py_XDECREF(frame->frame_obj);
362 Py_XDECREF(frame->f_locals);
363 // DYNAMO: additional field for 3.12
364 #if IS_PYTHON_3_12_PLUS
365 Py_DECREF(frame->f_funcobj);
366 #else
367 Py_DECREF(frame->f_func);
368 #endif
369 Py_DECREF(F_CODE(frame));
370 }
371
372 // https://github.com/python/cpython/blob/fad48ea1816be3125ea51edcdfe2f999d6ade796/Objects/obmalloc.c#L635
373 void *
THP_PyObject_VirtualAlloc(size_t size)374 THP_PyObject_VirtualAlloc(size_t size)
375 {
376 PyObjectArenaAllocator arena;
377 PyObject_GetArenaAllocator(&arena);
378 return arena.alloc(arena.ctx, size);
379 }
380
381 // https://github.com/python/cpython/blob/fad48ea1816be3125ea51edcdfe2f999d6ade796/Objects/obmalloc.c#L641
382 void
THP_PyObject_VirtualFree(void * obj,size_t size)383 THP_PyObject_VirtualFree(void *obj, size_t size)
384 {
385 PyObjectArenaAllocator arena;
386 PyObject_GetArenaAllocator(&arena);
387 return arena.free(arena.ctx, obj, size);
388 }
389
390 // https://github.com/python/cpython/blob/051b8a2589ff28f0194c3701b21f729444691752/Python/pystate.c#L728
391 static _PyStackChunk*
allocate_chunk(int size_in_bytes,_PyStackChunk * previous)392 allocate_chunk(int size_in_bytes, _PyStackChunk* previous)
393 {
394 CHECK(size_in_bytes % sizeof(PyObject **) == 0);
395 _PyStackChunk *res = THP_PyObject_VirtualAlloc(size_in_bytes);
396 if (res == NULL) {
397 return NULL;
398 }
399 res->previous = previous;
400 res->size = size_in_bytes;
401 res->top = 0;
402 return res;
403 }
404
405 #define DATA_STACK_CHUNK_SIZE (16*1024)
406 #define MINIMUM_OVERHEAD 1000
407
408 // https://github.com/python/cpython/blob/051b8a2589ff28f0194c3701b21f729444691752/Python/pystate.c#L2182
409 static PyObject **
push_chunk(PyThreadState * tstate,int size)410 push_chunk(PyThreadState *tstate, int size)
411 {
412 int allocate_size = DATA_STACK_CHUNK_SIZE;
413 while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) {
414 allocate_size *= 2;
415 }
416 _PyStackChunk *new = allocate_chunk(allocate_size, tstate->datastack_chunk);
417 if (new == NULL) {
418 return NULL;
419 }
420 if (tstate->datastack_chunk) {
421 tstate->datastack_chunk->top = tstate->datastack_top -
422 &tstate->datastack_chunk->data[0];
423 }
424 tstate->datastack_chunk = new;
425 tstate->datastack_limit = (PyObject **)(((char *)new) + allocate_size);
426 // When new is the "root" chunk (i.e. new->previous == NULL), we can keep
427 // _PyThreadState_PopFrame from freeing it later by "skipping" over the
428 // first element:
429 PyObject **res = &new->data[new->previous == NULL];
430 tstate->datastack_top = res + size;
431 return res;
432 }
433
434 // https://github.com/python/cpython/blob/051b8a2589ff28f0194c3701b21f729444691752/Include/internal/pycore_frame.h#L199
435 static inline bool
THP_PyThreadState_HasStackSpace(PyThreadState * tstate,size_t size)436 THP_PyThreadState_HasStackSpace(PyThreadState *tstate, size_t size)
437 {
438 CHECK(
439 (tstate->datastack_top == NULL && tstate->datastack_limit == NULL)
440 ||
441 (tstate->datastack_top != NULL && tstate->datastack_limit != NULL)
442 );
443 return tstate->datastack_top != NULL &&
444 size < (size_t)(tstate->datastack_limit - tstate->datastack_top);
445 }
446
447 // https://github.com/python/cpython/blob/051b8a2589ff28f0194c3701b21f729444691752/Python/pystate.c#L2207
448 _PyInterpreterFrame *
THP_PyThreadState_BumpFramePointerSlow(PyThreadState * tstate,size_t size)449 THP_PyThreadState_BumpFramePointerSlow(PyThreadState *tstate, size_t size)
450 {
451 if (THP_PyThreadState_HasStackSpace(tstate, size)) {
452 _PyInterpreterFrame *res = (_PyInterpreterFrame *)tstate->datastack_top;
453 tstate->datastack_top += size;
454 return res;
455 }
456 if (size > INT_MAX/2) {
457 PyErr_NoMemory();
458 return NULL;
459 }
460 return (_PyInterpreterFrame *)push_chunk(tstate, (int)size);
461 }
462
463 // https://github.com/python/cpython/blob/051b8a2589ff28f0194c3701b21f729444691752/Python/pystate.c#L2222
464 void
THP_PyThreadState_PopFrame(PyThreadState * tstate,_PyInterpreterFrame * frame)465 THP_PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame * frame)
466 {
467 CHECK(tstate->datastack_chunk);
468 PyObject **base = (PyObject **)frame;
469 if (base == &tstate->datastack_chunk->data[0]) {
470 _PyStackChunk *chunk = tstate->datastack_chunk;
471 _PyStackChunk *previous = chunk->previous;
472 // push_chunk ensures that the root chunk is never popped:
473 CHECK(previous);
474 tstate->datastack_top = &previous->data[previous->top];
475 tstate->datastack_chunk = previous;
476 THP_PyObject_VirtualFree(chunk, chunk->size);
477 tstate->datastack_limit = (PyObject **)(((char *)previous) + previous->size);
478 }
479 else {
480 CHECK(tstate->datastack_top);
481 CHECK(tstate->datastack_top >= base);
482 tstate->datastack_top = base;
483 }
484 }
485
486
487 #endif
488
489 #if IS_PYTHON_3_11_PLUS
490
491 const uint8_t* THP_PyOpcode_Caches = _PyOpcode_Caches;
492 const int THP_PyOpcode_Caches_size = sizeof(_PyOpcode_Caches) / sizeof(uint8_t);
493
494 #else
495
496 const uint8_t* THP_PyOpcode_Caches = NULL;
497 const int THP_PyOpcode_Caches_size = 0;
498
499 #endif
500