1
2 /* Traceback implementation */
3
4 #include "Python.h"
5
6 #include "pycore_ast.h" // asdl_seq_*
7 #include "pycore_call.h" // _PyObject_CallMethodFormat()
8 #include "pycore_compile.h" // _PyAST_Optimize
9 #include "pycore_fileutils.h" // _Py_BEGIN_SUPPRESS_IPH
10 #include "pycore_frame.h" // _PyFrame_GetCode()
11 #include "pycore_interp.h" // PyInterpreterState.gc
12 #include "pycore_parser.h" // _PyParser_ASTFromString
13 #include "pycore_pyarena.h" // _PyArena_Free()
14 #include "pycore_pyerrors.h" // _PyErr_Fetch()
15 #include "pycore_pystate.h" // _PyThreadState_GET()
16 #include "pycore_traceback.h" // EXCEPTION_TB_HEADER
17
18 #include "../Parser/pegen.h" // _PyPegen_byte_offset_to_character_offset()
19 #include "frameobject.h" // PyFrame_New()
20 #include "structmember.h" // PyMemberDef
21 #include "osdefs.h" // SEP
22 #ifdef HAVE_FCNTL_H
23 # include <fcntl.h>
24 #endif
25
26 #define OFF(x) offsetof(PyTracebackObject, x)
27
28 #define PUTS(fd, str) _Py_write_noraise(fd, str, (int)strlen(str))
29 #define MAX_STRING_LENGTH 500
30 #define MAX_FRAME_DEPTH 100
31 #define MAX_NTHREADS 100
32
33 /* Function from Parser/tokenizer.c */
34 extern char* _PyTokenizer_FindEncodingFilename(int, PyObject *);
35
36 /*[clinic input]
37 class TracebackType "PyTracebackObject *" "&PyTraceback_Type"
38 [clinic start generated code]*/
39 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=928fa06c10151120]*/
40
41 #include "clinic/traceback.c.h"
42
43 static PyObject *
tb_create_raw(PyTracebackObject * next,PyFrameObject * frame,int lasti,int lineno)44 tb_create_raw(PyTracebackObject *next, PyFrameObject *frame, int lasti,
45 int lineno)
46 {
47 PyTracebackObject *tb;
48 if ((next != NULL && !PyTraceBack_Check(next)) ||
49 frame == NULL || !PyFrame_Check(frame)) {
50 PyErr_BadInternalCall();
51 return NULL;
52 }
53 tb = PyObject_GC_New(PyTracebackObject, &PyTraceBack_Type);
54 if (tb != NULL) {
55 Py_XINCREF(next);
56 tb->tb_next = next;
57 Py_XINCREF(frame);
58 tb->tb_frame = frame;
59 tb->tb_lasti = lasti;
60 tb->tb_lineno = lineno;
61 PyObject_GC_Track(tb);
62 }
63 return (PyObject *)tb;
64 }
65
66 /*[clinic input]
67 @classmethod
68 TracebackType.__new__ as tb_new
69
70 tb_next: object
71 tb_frame: object(type='PyFrameObject *', subclass_of='&PyFrame_Type')
72 tb_lasti: int
73 tb_lineno: int
74
75 Create a new traceback object.
76 [clinic start generated code]*/
77
78 static PyObject *
tb_new_impl(PyTypeObject * type,PyObject * tb_next,PyFrameObject * tb_frame,int tb_lasti,int tb_lineno)79 tb_new_impl(PyTypeObject *type, PyObject *tb_next, PyFrameObject *tb_frame,
80 int tb_lasti, int tb_lineno)
81 /*[clinic end generated code: output=fa077debd72d861a input=01cbe8ec8783fca7]*/
82 {
83 if (tb_next == Py_None) {
84 tb_next = NULL;
85 } else if (!PyTraceBack_Check(tb_next)) {
86 return PyErr_Format(PyExc_TypeError,
87 "expected traceback object or None, got '%s'",
88 Py_TYPE(tb_next)->tp_name);
89 }
90
91 return tb_create_raw((PyTracebackObject *)tb_next, tb_frame, tb_lasti,
92 tb_lineno);
93 }
94
95 static PyObject *
tb_dir(PyTracebackObject * self,PyObject * Py_UNUSED (ignored))96 tb_dir(PyTracebackObject *self, PyObject *Py_UNUSED(ignored))
97 {
98 return Py_BuildValue("[ssss]", "tb_frame", "tb_next",
99 "tb_lasti", "tb_lineno");
100 }
101
102 static PyObject *
tb_next_get(PyTracebackObject * self,void * Py_UNUSED (_))103 tb_next_get(PyTracebackObject *self, void *Py_UNUSED(_))
104 {
105 PyObject* ret = (PyObject*)self->tb_next;
106 if (!ret) {
107 ret = Py_None;
108 }
109 Py_INCREF(ret);
110 return ret;
111 }
112
113 static int
tb_next_set(PyTracebackObject * self,PyObject * new_next,void * Py_UNUSED (_))114 tb_next_set(PyTracebackObject *self, PyObject *new_next, void *Py_UNUSED(_))
115 {
116 if (!new_next) {
117 PyErr_Format(PyExc_TypeError, "can't delete tb_next attribute");
118 return -1;
119 }
120
121 /* We accept None or a traceback object, and map None -> NULL (inverse of
122 tb_next_get) */
123 if (new_next == Py_None) {
124 new_next = NULL;
125 } else if (!PyTraceBack_Check(new_next)) {
126 PyErr_Format(PyExc_TypeError,
127 "expected traceback object, got '%s'",
128 Py_TYPE(new_next)->tp_name);
129 return -1;
130 }
131
132 /* Check for loops */
133 PyTracebackObject *cursor = (PyTracebackObject *)new_next;
134 while (cursor) {
135 if (cursor == self) {
136 PyErr_Format(PyExc_ValueError, "traceback loop detected");
137 return -1;
138 }
139 cursor = cursor->tb_next;
140 }
141
142 PyObject *old_next = (PyObject*)self->tb_next;
143 Py_XINCREF(new_next);
144 self->tb_next = (PyTracebackObject *)new_next;
145 Py_XDECREF(old_next);
146
147 return 0;
148 }
149
150
151 static PyMethodDef tb_methods[] = {
152 {"__dir__", _PyCFunction_CAST(tb_dir), METH_NOARGS},
153 {NULL, NULL, 0, NULL},
154 };
155
156 static PyMemberDef tb_memberlist[] = {
157 {"tb_frame", T_OBJECT, OFF(tb_frame), READONLY|PY_AUDIT_READ},
158 {"tb_lasti", T_INT, OFF(tb_lasti), READONLY},
159 {"tb_lineno", T_INT, OFF(tb_lineno), READONLY},
160 {NULL} /* Sentinel */
161 };
162
163 static PyGetSetDef tb_getsetters[] = {
164 {"tb_next", (getter)tb_next_get, (setter)tb_next_set, NULL, NULL},
165 {NULL} /* Sentinel */
166 };
167
168 static void
tb_dealloc(PyTracebackObject * tb)169 tb_dealloc(PyTracebackObject *tb)
170 {
171 PyObject_GC_UnTrack(tb);
172 Py_TRASHCAN_BEGIN(tb, tb_dealloc)
173 Py_XDECREF(tb->tb_next);
174 Py_XDECREF(tb->tb_frame);
175 PyObject_GC_Del(tb);
176 Py_TRASHCAN_END
177 }
178
179 static int
tb_traverse(PyTracebackObject * tb,visitproc visit,void * arg)180 tb_traverse(PyTracebackObject *tb, visitproc visit, void *arg)
181 {
182 Py_VISIT(tb->tb_next);
183 Py_VISIT(tb->tb_frame);
184 return 0;
185 }
186
187 static int
tb_clear(PyTracebackObject * tb)188 tb_clear(PyTracebackObject *tb)
189 {
190 Py_CLEAR(tb->tb_next);
191 Py_CLEAR(tb->tb_frame);
192 return 0;
193 }
194
195 PyTypeObject PyTraceBack_Type = {
196 PyVarObject_HEAD_INIT(&PyType_Type, 0)
197 "traceback",
198 sizeof(PyTracebackObject),
199 0,
200 (destructor)tb_dealloc, /*tp_dealloc*/
201 0, /*tp_vectorcall_offset*/
202 0, /*tp_getattr*/
203 0, /*tp_setattr*/
204 0, /*tp_as_async*/
205 0, /*tp_repr*/
206 0, /*tp_as_number*/
207 0, /*tp_as_sequence*/
208 0, /*tp_as_mapping*/
209 0, /* tp_hash */
210 0, /* tp_call */
211 0, /* tp_str */
212 PyObject_GenericGetAttr, /* tp_getattro */
213 0, /* tp_setattro */
214 0, /* tp_as_buffer */
215 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
216 tb_new__doc__, /* tp_doc */
217 (traverseproc)tb_traverse, /* tp_traverse */
218 (inquiry)tb_clear, /* tp_clear */
219 0, /* tp_richcompare */
220 0, /* tp_weaklistoffset */
221 0, /* tp_iter */
222 0, /* tp_iternext */
223 tb_methods, /* tp_methods */
224 tb_memberlist, /* tp_members */
225 tb_getsetters, /* tp_getset */
226 0, /* tp_base */
227 0, /* tp_dict */
228 0, /* tp_descr_get */
229 0, /* tp_descr_set */
230 0, /* tp_dictoffset */
231 0, /* tp_init */
232 0, /* tp_alloc */
233 tb_new, /* tp_new */
234 };
235
236
237 PyObject*
_PyTraceBack_FromFrame(PyObject * tb_next,PyFrameObject * frame)238 _PyTraceBack_FromFrame(PyObject *tb_next, PyFrameObject *frame)
239 {
240 assert(tb_next == NULL || PyTraceBack_Check(tb_next));
241 assert(frame != NULL);
242 int addr = _PyInterpreterFrame_LASTI(frame->f_frame) * sizeof(_Py_CODEUNIT);
243 return tb_create_raw((PyTracebackObject *)tb_next, frame, addr,
244 PyFrame_GetLineNumber(frame));
245 }
246
247
248 int
PyTraceBack_Here(PyFrameObject * frame)249 PyTraceBack_Here(PyFrameObject *frame)
250 {
251 PyObject *exc, *val, *tb, *newtb;
252 PyErr_Fetch(&exc, &val, &tb);
253 newtb = _PyTraceBack_FromFrame(tb, frame);
254 if (newtb == NULL) {
255 _PyErr_ChainExceptions(exc, val, tb);
256 return -1;
257 }
258 PyErr_Restore(exc, val, newtb);
259 Py_XDECREF(tb);
260 return 0;
261 }
262
263 /* Insert a frame into the traceback for (funcname, filename, lineno). */
_PyTraceback_Add(const char * funcname,const char * filename,int lineno)264 void _PyTraceback_Add(const char *funcname, const char *filename, int lineno)
265 {
266 PyObject *globals;
267 PyCodeObject *code;
268 PyFrameObject *frame;
269 PyObject *exc, *val, *tb;
270 PyThreadState *tstate = _PyThreadState_GET();
271
272 /* Save and clear the current exception. Python functions must not be
273 called with an exception set. Calling Python functions happens when
274 the codec of the filesystem encoding is implemented in pure Python. */
275 _PyErr_Fetch(tstate, &exc, &val, &tb);
276
277 globals = PyDict_New();
278 if (!globals)
279 goto error;
280 code = PyCode_NewEmpty(filename, funcname, lineno);
281 if (!code) {
282 Py_DECREF(globals);
283 goto error;
284 }
285 frame = PyFrame_New(tstate, code, globals, NULL);
286 Py_DECREF(globals);
287 Py_DECREF(code);
288 if (!frame)
289 goto error;
290 frame->f_lineno = lineno;
291
292 _PyErr_Restore(tstate, exc, val, tb);
293 PyTraceBack_Here(frame);
294 Py_DECREF(frame);
295 return;
296
297 error:
298 _PyErr_ChainExceptions(exc, val, tb);
299 }
300
301 static PyObject *
_Py_FindSourceFile(PyObject * filename,char * namebuf,size_t namelen,PyObject * io)302 _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *io)
303 {
304 Py_ssize_t i;
305 PyObject *binary;
306 PyObject *v;
307 Py_ssize_t npath;
308 size_t taillen;
309 PyObject *syspath;
310 PyObject *path;
311 const char* tail;
312 PyObject *filebytes;
313 const char* filepath;
314 Py_ssize_t len;
315 PyObject* result;
316 PyObject *open = NULL;
317
318 filebytes = PyUnicode_EncodeFSDefault(filename);
319 if (filebytes == NULL) {
320 PyErr_Clear();
321 return NULL;
322 }
323 filepath = PyBytes_AS_STRING(filebytes);
324
325 /* Search tail of filename in sys.path before giving up */
326 tail = strrchr(filepath, SEP);
327 if (tail == NULL)
328 tail = filepath;
329 else
330 tail++;
331 taillen = strlen(tail);
332
333 PyThreadState *tstate = _PyThreadState_GET();
334 syspath = _PySys_GetAttr(tstate, &_Py_ID(path));
335 if (syspath == NULL || !PyList_Check(syspath))
336 goto error;
337 npath = PyList_Size(syspath);
338
339 open = PyObject_GetAttr(io, &_Py_ID(open));
340 for (i = 0; i < npath; i++) {
341 v = PyList_GetItem(syspath, i);
342 if (v == NULL) {
343 PyErr_Clear();
344 break;
345 }
346 if (!PyUnicode_Check(v))
347 continue;
348 path = PyUnicode_EncodeFSDefault(v);
349 if (path == NULL) {
350 PyErr_Clear();
351 continue;
352 }
353 len = PyBytes_GET_SIZE(path);
354 if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) {
355 Py_DECREF(path);
356 continue; /* Too long */
357 }
358 strcpy(namebuf, PyBytes_AS_STRING(path));
359 Py_DECREF(path);
360 if (strlen(namebuf) != (size_t)len)
361 continue; /* v contains '\0' */
362 if (len > 0 && namebuf[len-1] != SEP)
363 namebuf[len++] = SEP;
364 strcpy(namebuf+len, tail);
365
366 binary = _PyObject_CallMethodFormat(tstate, open, "ss", namebuf, "rb");
367 if (binary != NULL) {
368 result = binary;
369 goto finally;
370 }
371 PyErr_Clear();
372 }
373 goto error;
374
375 error:
376 result = NULL;
377 finally:
378 Py_XDECREF(open);
379 Py_DECREF(filebytes);
380 return result;
381 }
382
383 /* Writes indent spaces. Returns 0 on success and non-zero on failure.
384 */
385 int
_Py_WriteIndent(int indent,PyObject * f)386 _Py_WriteIndent(int indent, PyObject *f)
387 {
388 char buf[11] = " ";
389 assert(strlen(buf) == 10);
390 while (indent > 0) {
391 if (indent < 10) {
392 buf[indent] = '\0';
393 }
394 if (PyFile_WriteString(buf, f) < 0) {
395 return -1;
396 }
397 indent -= 10;
398 }
399 return 0;
400 }
401
402 /* Writes indent spaces, followed by the margin if it is not `\0`.
403 Returns 0 on success and non-zero on failure.
404 */
405 int
_Py_WriteIndentedMargin(int indent,const char * margin,PyObject * f)406 _Py_WriteIndentedMargin(int indent, const char *margin, PyObject *f)
407 {
408 if (_Py_WriteIndent(indent, f) < 0) {
409 return -1;
410 }
411 if (margin) {
412 if (PyFile_WriteString(margin, f) < 0) {
413 return -1;
414 }
415 }
416 return 0;
417 }
418
419 static int
display_source_line_with_margin(PyObject * f,PyObject * filename,int lineno,int indent,int margin_indent,const char * margin,int * truncation,PyObject ** line)420 display_source_line_with_margin(PyObject *f, PyObject *filename, int lineno, int indent,
421 int margin_indent, const char *margin,
422 int *truncation, PyObject **line)
423 {
424 int fd;
425 int i;
426 char *found_encoding;
427 const char *encoding;
428 PyObject *io;
429 PyObject *binary;
430 PyObject *fob = NULL;
431 PyObject *lineobj = NULL;
432 PyObject *res;
433 char buf[MAXPATHLEN+1];
434 int kind;
435 const void *data;
436
437 /* open the file */
438 if (filename == NULL)
439 return 0;
440
441 /* Do not attempt to open things like <string> or <stdin> */
442 assert(PyUnicode_Check(filename));
443 if (PyUnicode_READ_CHAR(filename, 0) == '<') {
444 Py_ssize_t len = PyUnicode_GET_LENGTH(filename);
445 if (len > 0 && PyUnicode_READ_CHAR(filename, len - 1) == '>') {
446 return 0;
447 }
448 }
449
450 io = PyImport_ImportModule("io");
451 if (io == NULL) {
452 return -1;
453 }
454
455 binary = _PyObject_CallMethod(io, &_Py_ID(open), "Os", filename, "rb");
456 if (binary == NULL) {
457 PyErr_Clear();
458
459 binary = _Py_FindSourceFile(filename, buf, sizeof(buf), io);
460 if (binary == NULL) {
461 Py_DECREF(io);
462 return -1;
463 }
464 }
465
466 /* use the right encoding to decode the file as unicode */
467 fd = PyObject_AsFileDescriptor(binary);
468 if (fd < 0) {
469 Py_DECREF(io);
470 Py_DECREF(binary);
471 return 0;
472 }
473 found_encoding = _PyTokenizer_FindEncodingFilename(fd, filename);
474 if (found_encoding == NULL)
475 PyErr_Clear();
476 encoding = (found_encoding != NULL) ? found_encoding : "utf-8";
477 /* Reset position */
478 if (lseek(fd, 0, SEEK_SET) == (off_t)-1) {
479 Py_DECREF(io);
480 Py_DECREF(binary);
481 PyMem_Free(found_encoding);
482 return 0;
483 }
484 fob = _PyObject_CallMethod(io, &_Py_ID(TextIOWrapper),
485 "Os", binary, encoding);
486 Py_DECREF(io);
487 PyMem_Free(found_encoding);
488
489 if (fob == NULL) {
490 PyErr_Clear();
491
492 res = PyObject_CallMethodNoArgs(binary, &_Py_ID(close));
493 Py_DECREF(binary);
494 if (res)
495 Py_DECREF(res);
496 else
497 PyErr_Clear();
498 return 0;
499 }
500 Py_DECREF(binary);
501
502 /* get the line number lineno */
503 for (i = 0; i < lineno; i++) {
504 Py_XDECREF(lineobj);
505 lineobj = PyFile_GetLine(fob, -1);
506 if (!lineobj) {
507 PyErr_Clear();
508 break;
509 }
510 }
511 res = PyObject_CallMethodNoArgs(fob, &_Py_ID(close));
512 if (res) {
513 Py_DECREF(res);
514 }
515 else {
516 PyErr_Clear();
517 }
518 Py_DECREF(fob);
519 if (!lineobj || !PyUnicode_Check(lineobj)) {
520 Py_XDECREF(lineobj);
521 return -1;
522 }
523
524 if (line) {
525 Py_INCREF(lineobj);
526 *line = lineobj;
527 }
528
529 /* remove the indentation of the line */
530 kind = PyUnicode_KIND(lineobj);
531 data = PyUnicode_DATA(lineobj);
532 for (i=0; i < PyUnicode_GET_LENGTH(lineobj); i++) {
533 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
534 if (ch != ' ' && ch != '\t' && ch != '\014')
535 break;
536 }
537 if (i) {
538 PyObject *truncated;
539 truncated = PyUnicode_Substring(lineobj, i, PyUnicode_GET_LENGTH(lineobj));
540 if (truncated) {
541 Py_DECREF(lineobj);
542 lineobj = truncated;
543 } else {
544 PyErr_Clear();
545 }
546 }
547
548 if (truncation != NULL) {
549 *truncation = i - indent;
550 }
551
552 if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
553 goto error;
554 }
555
556 /* Write some spaces before the line */
557 if (_Py_WriteIndent(indent, f) < 0) {
558 goto error;
559 }
560
561 /* finally display the line */
562 if (PyFile_WriteObject(lineobj, f, Py_PRINT_RAW) < 0) {
563 goto error;
564 }
565
566 if (PyFile_WriteString("\n", f) < 0) {
567 goto error;
568 }
569
570 Py_DECREF(lineobj);
571 return 0;
572 error:
573 Py_DECREF(lineobj);
574 return -1;
575 }
576
577 int
_Py_DisplaySourceLine(PyObject * f,PyObject * filename,int lineno,int indent,int * truncation,PyObject ** line)578 _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent,
579 int *truncation, PyObject **line)
580 {
581 return display_source_line_with_margin(f, filename, lineno, indent, 0,
582 NULL, truncation, line);
583 }
584
585 /* AST based Traceback Specialization
586 *
587 * When displaying a new traceback line, for certain syntactical constructs
588 * (e.g a subscript, an arithmetic operation) we try to create a representation
589 * that separates the primary source of error from the rest.
590 *
591 * Example specialization of BinOp nodes:
592 * Traceback (most recent call last):
593 * File "/home/isidentical/cpython/cpython/t.py", line 10, in <module>
594 * add_values(1, 2, 'x', 3, 4)
595 * File "/home/isidentical/cpython/cpython/t.py", line 2, in add_values
596 * return a + b + c + d + e
597 * ~~~~~~^~~
598 * TypeError: 'NoneType' object is not subscriptable
599 */
600
601 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
602
603 static int
extract_anchors_from_expr(const char * segment_str,expr_ty expr,Py_ssize_t * left_anchor,Py_ssize_t * right_anchor,char ** primary_error_char,char ** secondary_error_char)604 extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
605 char** primary_error_char, char** secondary_error_char)
606 {
607 switch (expr->kind) {
608 case BinOp_kind: {
609 expr_ty left = expr->v.BinOp.left;
610 expr_ty right = expr->v.BinOp.right;
611 for (int i = left->end_col_offset; i < right->col_offset; i++) {
612 if (IS_WHITESPACE(segment_str[i])) {
613 continue;
614 }
615
616 *left_anchor = i;
617 *right_anchor = i + 1;
618
619 // Check whether if this a two-character operator (e.g //)
620 if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) {
621 ++*right_anchor;
622 }
623
624 // Set the error characters
625 *primary_error_char = "~";
626 *secondary_error_char = "^";
627 break;
628 }
629 return 1;
630 }
631 case Subscript_kind: {
632 *left_anchor = expr->v.Subscript.value->end_col_offset;
633 *right_anchor = expr->v.Subscript.slice->end_col_offset + 1;
634
635 // Set the error characters
636 *primary_error_char = "~";
637 *secondary_error_char = "^";
638 return 1;
639 }
640 default:
641 return 0;
642 }
643 }
644
645 static int
extract_anchors_from_stmt(const char * segment_str,stmt_ty statement,Py_ssize_t * left_anchor,Py_ssize_t * right_anchor,char ** primary_error_char,char ** secondary_error_char)646 extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
647 char** primary_error_char, char** secondary_error_char)
648 {
649 switch (statement->kind) {
650 case Expr_kind: {
651 return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor,
652 primary_error_char, secondary_error_char);
653 }
654 default:
655 return 0;
656 }
657 }
658
659 static int
extract_anchors_from_line(PyObject * filename,PyObject * line,Py_ssize_t start_offset,Py_ssize_t end_offset,Py_ssize_t * left_anchor,Py_ssize_t * right_anchor,char ** primary_error_char,char ** secondary_error_char)660 extract_anchors_from_line(PyObject *filename, PyObject *line,
661 Py_ssize_t start_offset, Py_ssize_t end_offset,
662 Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
663 char** primary_error_char, char** secondary_error_char)
664 {
665 int res = -1;
666 PyArena *arena = NULL;
667 PyObject *segment = PyUnicode_Substring(line, start_offset, end_offset);
668 if (!segment) {
669 goto done;
670 }
671
672 const char *segment_str = PyUnicode_AsUTF8(segment);
673 if (!segment_str) {
674 goto done;
675 }
676
677 arena = _PyArena_New();
678 if (!arena) {
679 goto done;
680 }
681
682 PyCompilerFlags flags = _PyCompilerFlags_INIT;
683
684 _PyASTOptimizeState state;
685 state.optimize = _Py_GetConfig()->optimization_level;
686 state.ff_features = 0;
687
688 mod_ty module = _PyParser_ASTFromString(segment_str, filename, Py_file_input,
689 &flags, arena);
690 if (!module) {
691 goto done;
692 }
693 if (!_PyAST_Optimize(module, arena, &state)) {
694 goto done;
695 }
696
697 assert(module->kind == Module_kind);
698 if (asdl_seq_LEN(module->v.Module.body) == 1) {
699 stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0);
700 res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor,
701 primary_error_char, secondary_error_char);
702 } else {
703 res = 0;
704 }
705
706 done:
707 if (res > 0) {
708 // Normalize the AST offsets to byte offsets and adjust them with the
709 // start of the actual line (instead of the source code segment).
710 assert(segment != NULL);
711 assert(*left_anchor >= 0);
712 assert(*right_anchor >= 0);
713 *left_anchor = _PyPegen_byte_offset_to_character_offset(segment, *left_anchor) + start_offset;
714 *right_anchor = _PyPegen_byte_offset_to_character_offset(segment, *right_anchor) + start_offset;
715 }
716 Py_XDECREF(segment);
717 if (arena) {
718 _PyArena_Free(arena);
719 }
720 return res;
721 }
722
723 #define _TRACEBACK_SOURCE_LINE_INDENT 4
724
725 static inline int
ignore_source_errors(void)726 ignore_source_errors(void) {
727 if (PyErr_Occurred()) {
728 if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) {
729 return -1;
730 }
731 PyErr_Clear();
732 }
733 return 0;
734 }
735
736 static inline int
print_error_location_carets(PyObject * f,int offset,Py_ssize_t start_offset,Py_ssize_t end_offset,Py_ssize_t right_start_offset,Py_ssize_t left_end_offset,const char * primary,const char * secondary)737 print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py_ssize_t end_offset,
738 Py_ssize_t right_start_offset, Py_ssize_t left_end_offset,
739 const char *primary, const char *secondary) {
740 int special_chars = (left_end_offset != -1 || right_start_offset != -1);
741 const char *str;
742 while (++offset <= end_offset) {
743 if (offset <= start_offset) {
744 str = " ";
745 } else if (special_chars && left_end_offset < offset && offset <= right_start_offset) {
746 str = secondary;
747 } else {
748 str = primary;
749 }
750 if (PyFile_WriteString(str, f) < 0) {
751 return -1;
752 }
753 }
754 if (PyFile_WriteString("\n", f) < 0) {
755 return -1;
756 }
757 return 0;
758 }
759
760 static int
tb_displayline(PyTracebackObject * tb,PyObject * f,PyObject * filename,int lineno,PyFrameObject * frame,PyObject * name,int margin_indent,const char * margin)761 tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
762 PyFrameObject *frame, PyObject *name, int margin_indent, const char *margin)
763 {
764 if (filename == NULL || name == NULL) {
765 return -1;
766 }
767
768 if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
769 return -1;
770 }
771
772 PyObject *line = PyUnicode_FromFormat(" File \"%U\", line %d, in %U\n",
773 filename, lineno, name);
774 if (line == NULL) {
775 return -1;
776 }
777
778 int res = PyFile_WriteObject(line, f, Py_PRINT_RAW);
779 Py_DECREF(line);
780 if (res < 0) {
781 return -1;
782 }
783
784 int err = 0;
785
786 int truncation = _TRACEBACK_SOURCE_LINE_INDENT;
787 PyObject* source_line = NULL;
788 int rc = display_source_line_with_margin(
789 f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
790 margin_indent, margin, &truncation, &source_line);
791 if (rc != 0 || !source_line) {
792 /* ignore errors since we can't report them, can we? */
793 err = ignore_source_errors();
794 goto done;
795 }
796
797 int code_offset = tb->tb_lasti;
798 PyCodeObject* code = frame->f_frame->f_code;
799 const Py_ssize_t source_line_len = PyUnicode_GET_LENGTH(source_line);
800
801 int start_line;
802 int end_line;
803 int start_col_byte_offset;
804 int end_col_byte_offset;
805 if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
806 &end_line, &end_col_byte_offset)) {
807 goto done;
808 }
809
810 if (start_line < 0 || end_line < 0
811 || start_col_byte_offset < 0
812 || end_col_byte_offset < 0)
813 {
814 goto done;
815 }
816
817 // When displaying errors, we will use the following generic structure:
818 //
819 // ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE
820 // ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~
821 // | |-> left_end_offset | |-> end_offset
822 // |-> start_offset |-> right_start_offset
823 //
824 // In general we will only have (start_offset, end_offset) but we can gather more information
825 // by analyzing the AST of the text between *start_offset* and *end_offset*. If this succeeds
826 // we could get *left_end_offset* and *right_start_offset* and some selection of characters for
827 // the different ranges (primary_error_char and secondary_error_char). If we cannot obtain the
828 // AST information or we cannot identify special ranges within it, then left_end_offset and
829 // right_end_offset will be set to -1.
830 //
831 // To keep the column indicators pertinent, they are not shown when the primary character
832 // spans the whole line.
833
834 // Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
835 assert(source_line);
836 Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
837 if (start_offset < 0) {
838 err = ignore_source_errors() < 0;
839 goto done;
840 }
841
842 Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
843 if (end_offset < 0) {
844 err = ignore_source_errors() < 0;
845 goto done;
846 }
847
848 Py_ssize_t left_end_offset = -1;
849 Py_ssize_t right_start_offset = -1;
850
851 char *primary_error_char = "^";
852 char *secondary_error_char = primary_error_char;
853
854 if (start_line == end_line) {
855 int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
856 &left_end_offset, &right_start_offset,
857 &primary_error_char, &secondary_error_char);
858 if (res < 0 && ignore_source_errors() < 0) {
859 goto done;
860 }
861 }
862 else {
863 // If this is a multi-line expression, then we will highlight until
864 // the last non-whitespace character.
865 const char *source_line_str = PyUnicode_AsUTF8(source_line);
866 if (!source_line_str) {
867 goto done;
868 }
869
870 Py_ssize_t i = source_line_len;
871 while (--i >= 0) {
872 if (!IS_WHITESPACE(source_line_str[i])) {
873 break;
874 }
875 }
876
877 end_offset = i + 1;
878 }
879
880 // Elide indicators if primary char spans the frame line
881 Py_ssize_t stripped_line_len = source_line_len - truncation - _TRACEBACK_SOURCE_LINE_INDENT;
882 bool has_secondary_ranges = (left_end_offset != -1 || right_start_offset != -1);
883 if (end_offset - start_offset == stripped_line_len && !has_secondary_ranges) {
884 goto done;
885 }
886
887 if (_Py_WriteIndentedMargin(margin_indent, margin, f) < 0) {
888 err = -1;
889 goto done;
890 }
891
892 if (print_error_location_carets(f, truncation, start_offset, end_offset,
893 right_start_offset, left_end_offset,
894 primary_error_char, secondary_error_char) < 0) {
895 err = -1;
896 goto done;
897 }
898
899 done:
900 Py_XDECREF(source_line);
901 return err;
902 }
903
904 static const int TB_RECURSIVE_CUTOFF = 3; // Also hardcoded in traceback.py.
905
906 static int
tb_print_line_repeated(PyObject * f,long cnt)907 tb_print_line_repeated(PyObject *f, long cnt)
908 {
909 cnt -= TB_RECURSIVE_CUTOFF;
910 PyObject *line = PyUnicode_FromFormat(
911 (cnt > 1)
912 ? " [Previous line repeated %ld more times]\n"
913 : " [Previous line repeated %ld more time]\n",
914 cnt);
915 if (line == NULL) {
916 return -1;
917 }
918 int err = PyFile_WriteObject(line, f, Py_PRINT_RAW);
919 Py_DECREF(line);
920 return err;
921 }
922
923 static int
tb_printinternal(PyTracebackObject * tb,PyObject * f,long limit,int indent,const char * margin)924 tb_printinternal(PyTracebackObject *tb, PyObject *f, long limit,
925 int indent, const char *margin)
926 {
927 PyCodeObject *code = NULL;
928 Py_ssize_t depth = 0;
929 PyObject *last_file = NULL;
930 int last_line = -1;
931 PyObject *last_name = NULL;
932 long cnt = 0;
933 PyTracebackObject *tb1 = tb;
934 while (tb1 != NULL) {
935 depth++;
936 tb1 = tb1->tb_next;
937 }
938 while (tb != NULL && depth > limit) {
939 depth--;
940 tb = tb->tb_next;
941 }
942 while (tb != NULL) {
943 code = PyFrame_GetCode(tb->tb_frame);
944 if (last_file == NULL ||
945 code->co_filename != last_file ||
946 last_line == -1 || tb->tb_lineno != last_line ||
947 last_name == NULL || code->co_name != last_name) {
948 if (cnt > TB_RECURSIVE_CUTOFF) {
949 if (tb_print_line_repeated(f, cnt) < 0) {
950 goto error;
951 }
952 }
953 last_file = code->co_filename;
954 last_line = tb->tb_lineno;
955 last_name = code->co_name;
956 cnt = 0;
957 }
958 cnt++;
959 if (cnt <= TB_RECURSIVE_CUTOFF) {
960 if (tb_displayline(tb, f, code->co_filename, tb->tb_lineno,
961 tb->tb_frame, code->co_name, indent, margin) < 0) {
962 goto error;
963 }
964
965 if (PyErr_CheckSignals() < 0) {
966 goto error;
967 }
968 }
969 Py_CLEAR(code);
970 tb = tb->tb_next;
971 }
972 if (cnt > TB_RECURSIVE_CUTOFF) {
973 if (tb_print_line_repeated(f, cnt) < 0) {
974 goto error;
975 }
976 }
977 return 0;
978 error:
979 Py_XDECREF(code);
980 return -1;
981 }
982
983 #define PyTraceBack_LIMIT 1000
984
985 int
_PyTraceBack_Print_Indented(PyObject * v,int indent,const char * margin,const char * header_margin,const char * header,PyObject * f)986 _PyTraceBack_Print_Indented(PyObject *v, int indent, const char *margin,
987 const char *header_margin, const char *header, PyObject *f)
988 {
989 PyObject *limitv;
990 long limit = PyTraceBack_LIMIT;
991
992 if (v == NULL) {
993 return 0;
994 }
995 if (!PyTraceBack_Check(v)) {
996 PyErr_BadInternalCall();
997 return -1;
998 }
999 limitv = PySys_GetObject("tracebacklimit");
1000 if (limitv && PyLong_Check(limitv)) {
1001 int overflow;
1002 limit = PyLong_AsLongAndOverflow(limitv, &overflow);
1003 if (overflow > 0) {
1004 limit = LONG_MAX;
1005 }
1006 else if (limit <= 0) {
1007 return 0;
1008 }
1009 }
1010 if (_Py_WriteIndentedMargin(indent, header_margin, f) < 0) {
1011 return -1;
1012 }
1013
1014 if (PyFile_WriteString(header, f) < 0) {
1015 return -1;
1016 }
1017
1018 if (tb_printinternal((PyTracebackObject *)v, f, limit, indent, margin) < 0) {
1019 return -1;
1020 }
1021
1022 return 0;
1023 }
1024
1025 int
PyTraceBack_Print(PyObject * v,PyObject * f)1026 PyTraceBack_Print(PyObject *v, PyObject *f)
1027 {
1028 int indent = 0;
1029 const char *margin = NULL;
1030 const char *header_margin = NULL;
1031 const char *header = EXCEPTION_TB_HEADER;
1032
1033 return _PyTraceBack_Print_Indented(v, indent, margin, header_margin, header, f);
1034 }
1035
1036 /* Format an integer in range [0; 0xffffffff] to decimal and write it
1037 into the file fd.
1038
1039 This function is signal safe. */
1040
1041 void
_Py_DumpDecimal(int fd,size_t value)1042 _Py_DumpDecimal(int fd, size_t value)
1043 {
1044 /* maximum number of characters required for output of %lld or %p.
1045 We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
1046 plus 1 for the null byte. 53/22 is an upper bound for log10(256). */
1047 char buffer[1 + (sizeof(size_t)*53-1) / 22 + 1];
1048 char *ptr, *end;
1049
1050 end = &buffer[Py_ARRAY_LENGTH(buffer) - 1];
1051 ptr = end;
1052 *ptr = '\0';
1053 do {
1054 --ptr;
1055 assert(ptr >= buffer);
1056 *ptr = '0' + (value % 10);
1057 value /= 10;
1058 } while (value);
1059
1060 _Py_write_noraise(fd, ptr, end - ptr);
1061 }
1062
1063 /* Format an integer as hexadecimal with width digits into fd file descriptor.
1064 The function is signal safe. */
1065 void
_Py_DumpHexadecimal(int fd,uintptr_t value,Py_ssize_t width)1066 _Py_DumpHexadecimal(int fd, uintptr_t value, Py_ssize_t width)
1067 {
1068 char buffer[sizeof(uintptr_t) * 2 + 1], *ptr, *end;
1069 const Py_ssize_t size = Py_ARRAY_LENGTH(buffer) - 1;
1070
1071 if (width > size)
1072 width = size;
1073 /* it's ok if width is negative */
1074
1075 end = &buffer[size];
1076 ptr = end;
1077 *ptr = '\0';
1078 do {
1079 --ptr;
1080 assert(ptr >= buffer);
1081 *ptr = Py_hexdigits[value & 15];
1082 value >>= 4;
1083 } while ((end - ptr) < width || value);
1084
1085 _Py_write_noraise(fd, ptr, end - ptr);
1086 }
1087
1088 void
_Py_DumpASCII(int fd,PyObject * text)1089 _Py_DumpASCII(int fd, PyObject *text)
1090 {
1091 PyASCIIObject *ascii = _PyASCIIObject_CAST(text);
1092 Py_ssize_t i, size;
1093 int truncated;
1094 int kind;
1095 void *data = NULL;
1096 wchar_t *wstr = NULL;
1097 Py_UCS4 ch;
1098
1099 if (!PyUnicode_Check(text))
1100 return;
1101
1102 size = ascii->length;
1103 kind = ascii->state.kind;
1104 if (kind == PyUnicode_WCHAR_KIND) {
1105 wstr = ascii->wstr;
1106 if (wstr == NULL)
1107 return;
1108 size = _PyCompactUnicodeObject_CAST(text)->wstr_length;
1109 }
1110 else if (ascii->state.compact) {
1111 if (ascii->state.ascii)
1112 data = ascii + 1;
1113 else
1114 data = _PyCompactUnicodeObject_CAST(text) + 1;
1115 }
1116 else {
1117 data = _PyUnicodeObject_CAST(text)->data.any;
1118 if (data == NULL)
1119 return;
1120 }
1121
1122 if (MAX_STRING_LENGTH < size) {
1123 size = MAX_STRING_LENGTH;
1124 truncated = 1;
1125 }
1126 else {
1127 truncated = 0;
1128 }
1129
1130 // Is an ASCII string?
1131 if (ascii->state.ascii) {
1132 assert(kind == PyUnicode_1BYTE_KIND);
1133 char *str = data;
1134
1135 int need_escape = 0;
1136 for (i=0; i < size; i++) {
1137 ch = str[i];
1138 if (!(' ' <= ch && ch <= 126)) {
1139 need_escape = 1;
1140 break;
1141 }
1142 }
1143 if (!need_escape) {
1144 // The string can be written with a single write() syscall
1145 _Py_write_noraise(fd, str, size);
1146 goto done;
1147 }
1148 }
1149
1150 for (i=0; i < size; i++) {
1151 if (kind != PyUnicode_WCHAR_KIND)
1152 ch = PyUnicode_READ(kind, data, i);
1153 else
1154 ch = wstr[i];
1155 if (' ' <= ch && ch <= 126) {
1156 /* printable ASCII character */
1157 char c = (char)ch;
1158 _Py_write_noraise(fd, &c, 1);
1159 }
1160 else if (ch <= 0xff) {
1161 PUTS(fd, "\\x");
1162 _Py_DumpHexadecimal(fd, ch, 2);
1163 }
1164 else if (ch <= 0xffff) {
1165 PUTS(fd, "\\u");
1166 _Py_DumpHexadecimal(fd, ch, 4);
1167 }
1168 else {
1169 PUTS(fd, "\\U");
1170 _Py_DumpHexadecimal(fd, ch, 8);
1171 }
1172 }
1173
1174 done:
1175 if (truncated) {
1176 PUTS(fd, "...");
1177 }
1178 }
1179
1180 /* Write a frame into the file fd: "File "xxx", line xxx in xxx".
1181
1182 This function is signal safe. */
1183
1184 static void
dump_frame(int fd,_PyInterpreterFrame * frame)1185 dump_frame(int fd, _PyInterpreterFrame *frame)
1186 {
1187 PyCodeObject *code = frame->f_code;
1188 PUTS(fd, " File ");
1189 if (code->co_filename != NULL
1190 && PyUnicode_Check(code->co_filename))
1191 {
1192 PUTS(fd, "\"");
1193 _Py_DumpASCII(fd, code->co_filename);
1194 PUTS(fd, "\"");
1195 } else {
1196 PUTS(fd, "???");
1197 }
1198
1199 int lineno = _PyInterpreterFrame_GetLine(frame);
1200 PUTS(fd, ", line ");
1201 if (lineno >= 0) {
1202 _Py_DumpDecimal(fd, (size_t)lineno);
1203 }
1204 else {
1205 PUTS(fd, "???");
1206 }
1207 PUTS(fd, " in ");
1208
1209 if (code->co_name != NULL
1210 && PyUnicode_Check(code->co_name)) {
1211 _Py_DumpASCII(fd, code->co_name);
1212 }
1213 else {
1214 PUTS(fd, "???");
1215 }
1216
1217 PUTS(fd, "\n");
1218 }
1219
1220 static void
dump_traceback(int fd,PyThreadState * tstate,int write_header)1221 dump_traceback(int fd, PyThreadState *tstate, int write_header)
1222 {
1223 _PyInterpreterFrame *frame;
1224 unsigned int depth;
1225
1226 if (write_header) {
1227 PUTS(fd, "Stack (most recent call first):\n");
1228 }
1229
1230 frame = tstate->cframe->current_frame;
1231 if (frame == NULL) {
1232 PUTS(fd, " <no Python frame>\n");
1233 return;
1234 }
1235
1236 depth = 0;
1237 while (1) {
1238 if (MAX_FRAME_DEPTH <= depth) {
1239 PUTS(fd, " ...\n");
1240 break;
1241 }
1242 dump_frame(fd, frame);
1243 frame = frame->previous;
1244 if (frame == NULL) {
1245 break;
1246 }
1247 depth++;
1248 }
1249 }
1250
1251 /* Dump the traceback of a Python thread into fd. Use write() to write the
1252 traceback and retry if write() is interrupted by a signal (failed with
1253 EINTR), but don't call the Python signal handler.
1254
1255 The caller is responsible to call PyErr_CheckSignals() to call Python signal
1256 handlers if signals were received. */
1257 void
_Py_DumpTraceback(int fd,PyThreadState * tstate)1258 _Py_DumpTraceback(int fd, PyThreadState *tstate)
1259 {
1260 dump_traceback(fd, tstate, 1);
1261 }
1262
1263 /* Write the thread identifier into the file 'fd': "Current thread 0xHHHH:\" if
1264 is_current is true, "Thread 0xHHHH:\n" otherwise.
1265
1266 This function is signal safe. */
1267
1268 static void
write_thread_id(int fd,PyThreadState * tstate,int is_current)1269 write_thread_id(int fd, PyThreadState *tstate, int is_current)
1270 {
1271 if (is_current)
1272 PUTS(fd, "Current thread 0x");
1273 else
1274 PUTS(fd, "Thread 0x");
1275 _Py_DumpHexadecimal(fd,
1276 tstate->thread_id,
1277 sizeof(unsigned long) * 2);
1278 PUTS(fd, " (most recent call first):\n");
1279 }
1280
1281 /* Dump the traceback of all Python threads into fd. Use write() to write the
1282 traceback and retry if write() is interrupted by a signal (failed with
1283 EINTR), but don't call the Python signal handler.
1284
1285 The caller is responsible to call PyErr_CheckSignals() to call Python signal
1286 handlers if signals were received. */
1287 const char*
_Py_DumpTracebackThreads(int fd,PyInterpreterState * interp,PyThreadState * current_tstate)1288 _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp,
1289 PyThreadState *current_tstate)
1290 {
1291 PyThreadState *tstate;
1292 unsigned int nthreads;
1293
1294 if (current_tstate == NULL) {
1295 /* _Py_DumpTracebackThreads() is called from signal handlers by
1296 faulthandler.
1297
1298 SIGSEGV, SIGFPE, SIGABRT, SIGBUS and SIGILL are synchronous signals
1299 and are thus delivered to the thread that caused the fault. Get the
1300 Python thread state of the current thread.
1301
1302 PyThreadState_Get() doesn't give the state of the thread that caused
1303 the fault if the thread released the GIL, and so
1304 _PyThreadState_GET() cannot be used. Read the thread specific
1305 storage (TSS) instead: call PyGILState_GetThisThreadState(). */
1306 current_tstate = PyGILState_GetThisThreadState();
1307 }
1308
1309 if (interp == NULL) {
1310 if (current_tstate == NULL) {
1311 interp = _PyGILState_GetInterpreterStateUnsafe();
1312 if (interp == NULL) {
1313 /* We need the interpreter state to get Python threads */
1314 return "unable to get the interpreter state";
1315 }
1316 }
1317 else {
1318 interp = current_tstate->interp;
1319 }
1320 }
1321 assert(interp != NULL);
1322
1323 /* Get the current interpreter from the current thread */
1324 tstate = PyInterpreterState_ThreadHead(interp);
1325 if (tstate == NULL)
1326 return "unable to get the thread head state";
1327
1328 /* Dump the traceback of each thread */
1329 tstate = PyInterpreterState_ThreadHead(interp);
1330 nthreads = 0;
1331 _Py_BEGIN_SUPPRESS_IPH
1332 do
1333 {
1334 if (nthreads != 0)
1335 PUTS(fd, "\n");
1336 if (nthreads >= MAX_NTHREADS) {
1337 PUTS(fd, "...\n");
1338 break;
1339 }
1340 write_thread_id(fd, tstate, tstate == current_tstate);
1341 if (tstate == current_tstate && tstate->interp->gc.collecting) {
1342 PUTS(fd, " Garbage-collecting\n");
1343 }
1344 dump_traceback(fd, tstate, 0);
1345 tstate = PyThreadState_Next(tstate);
1346 nthreads++;
1347 } while (tstate != NULL);
1348 _Py_END_SUPPRESS_IPH
1349
1350 return NULL;
1351 }
1352
1353