1#!/usr/bin/python 2''' 3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb 4to be extended with Python code e.g. for library-specific data visualizations, 5such as for the C++ STL types. Documentation on this API can be seen at: 6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html 7 8 9This python module deals with the case when the process being debugged (the 10"inferior process" in gdb parlance) is itself python, or more specifically, 11linked against libpython. In this situation, almost every item of data is a 12(PyObject*), and having the debugger merely print their addresses is not very 13enlightening. 14 15This module embeds knowledge about the implementation details of libpython so 16that we can emit useful visualizations e.g. a string, a list, a dict, a frame 17giving file/line information and the state of local variables 18 19In particular, given a gdb.Value corresponding to a PyObject* in the inferior 20process, we can generate a "proxy value" within the gdb process. For example, 21given a PyObject* in the inferior process that is in fact a PyListObject* 22holding three PyObject* that turn out to be PyBytesObject* instances, we can 23generate a proxy value within the gdb process that is a list of bytes 24instances: 25 [b"foo", b"bar", b"baz"] 26 27Doing so can be expensive for complicated graphs of objects, and could take 28some time, so we also have a "write_repr" method that writes a representation 29of the data to a file-like object. This allows us to stop the traversal by 30having the file-like object raise an exception if it gets too much data. 31 32With both "proxyval" and "write_repr" we keep track of the set of all addresses 33visited so far in the traversal, to avoid infinite recursion due to cycles in 34the graph of object references. 35 36We try to defer gdb.lookup_type() invocations for python types until as late as 37possible: for a dynamically linked python binary, when the process starts in 38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of 39the type names are known to the debugger 40 41The module also extends gdb with some python-specific commands. 42''' 43 44import gdb 45import os 46import locale 47import sys 48 49 50# Look up the gdb.Type for some standard types: 51# Those need to be refreshed as types (pointer sizes) may change when 52# gdb loads different executables 53 54def _type_char_ptr(): 55 return gdb.lookup_type('char').pointer() # char* 56 57 58def _type_unsigned_char_ptr(): 59 return gdb.lookup_type('unsigned char').pointer() # unsigned char* 60 61 62def _type_unsigned_short_ptr(): 63 return gdb.lookup_type('unsigned short').pointer() 64 65 66def _type_unsigned_int_ptr(): 67 return gdb.lookup_type('unsigned int').pointer() 68 69 70def _sizeof_void_p(): 71 return gdb.lookup_type('void').pointer().sizeof 72 73 74Py_TPFLAGS_MANAGED_DICT = (1 << 4) 75Py_TPFLAGS_HEAPTYPE = (1 << 9) 76Py_TPFLAGS_LONG_SUBCLASS = (1 << 24) 77Py_TPFLAGS_LIST_SUBCLASS = (1 << 25) 78Py_TPFLAGS_TUPLE_SUBCLASS = (1 << 26) 79Py_TPFLAGS_BYTES_SUBCLASS = (1 << 27) 80Py_TPFLAGS_UNICODE_SUBCLASS = (1 << 28) 81Py_TPFLAGS_DICT_SUBCLASS = (1 << 29) 82Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30) 83Py_TPFLAGS_TYPE_SUBCLASS = (1 << 31) 84 85 86MAX_OUTPUT_LEN=1024 87 88hexdigits = "0123456789abcdef" 89 90ENCODING = locale.getpreferredencoding() 91 92FRAME_INFO_OPTIMIZED_OUT = '(frame information optimized out)' 93UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame' 94EVALFRAME = '_PyEval_EvalFrameDefault' 95 96class NullPyObjectPtr(RuntimeError): 97 pass 98 99 100def safety_limit(val): 101 # Given an integer value from the process being debugged, limit it to some 102 # safety threshold so that arbitrary breakage within said process doesn't 103 # break the gdb process too much (e.g. sizes of iterations, sizes of lists) 104 return min(val, 1000) 105 106 107def safe_range(val): 108 # As per range, but don't trust the value too much: cap it to a safety 109 # threshold in case the data was corrupted 110 return range(safety_limit(int(val))) 111 112try: 113 os_fsencode = os.fsencode 114except AttributeError: 115 def os_fsencode(filename): 116 if not isinstance(filename, unicode): 117 return filename 118 encoding = sys.getfilesystemencoding() 119 if encoding == 'mbcs': 120 # mbcs doesn't support surrogateescape 121 return filename.encode(encoding) 122 encoded = [] 123 for char in filename: 124 # surrogateescape error handler 125 if 0xDC80 <= ord(char) <= 0xDCFF: 126 byte = chr(ord(char) - 0xDC00) 127 else: 128 byte = char.encode(encoding) 129 encoded.append(byte) 130 return ''.join(encoded) 131 132class StringTruncated(RuntimeError): 133 pass 134 135class TruncatedStringIO(object): 136 '''Similar to io.StringIO, but can truncate the output by raising a 137 StringTruncated exception''' 138 def __init__(self, maxlen=None): 139 self._val = '' 140 self.maxlen = maxlen 141 142 def write(self, data): 143 if self.maxlen: 144 if len(data) + len(self._val) > self.maxlen: 145 # Truncation: 146 self._val += data[0:self.maxlen - len(self._val)] 147 raise StringTruncated() 148 149 self._val += data 150 151 def getvalue(self): 152 return self._val 153 154class PyObjectPtr(object): 155 """ 156 Class wrapping a gdb.Value that's either a (PyObject*) within the 157 inferior process, or some subclass pointer e.g. (PyBytesObject*) 158 159 There will be a subclass for every refined PyObject type that we care 160 about. 161 162 Note that at every stage the underlying pointer could be NULL, point 163 to corrupt data, etc; this is the debugger, after all. 164 """ 165 _typename = 'PyObject' 166 167 def __init__(self, gdbval, cast_to=None): 168 if cast_to: 169 self._gdbval = gdbval.cast(cast_to) 170 else: 171 self._gdbval = gdbval 172 173 def field(self, name): 174 ''' 175 Get the gdb.Value for the given field within the PyObject, coping with 176 some python 2 versus python 3 differences. 177 178 Various libpython types are defined using the "PyObject_HEAD" and 179 "PyObject_VAR_HEAD" macros. 180 181 In Python 2, this these are defined so that "ob_type" and (for a var 182 object) "ob_size" are fields of the type in question. 183 184 In Python 3, this is defined as an embedded PyVarObject type thus: 185 PyVarObject ob_base; 186 so that the "ob_size" field is located insize the "ob_base" field, and 187 the "ob_type" is most easily accessed by casting back to a (PyObject*). 188 ''' 189 if self.is_null(): 190 raise NullPyObjectPtr(self) 191 192 if name == 'ob_type': 193 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) 194 return pyo_ptr.dereference()[name] 195 196 if name == 'ob_size': 197 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) 198 return pyo_ptr.dereference()[name] 199 200 # General case: look it up inside the object: 201 return self._gdbval.dereference()[name] 202 203 def pyop_field(self, name): 204 ''' 205 Get a PyObjectPtr for the given PyObject* field within this PyObject, 206 coping with some python 2 versus python 3 differences. 207 ''' 208 return PyObjectPtr.from_pyobject_ptr(self.field(name)) 209 210 def write_field_repr(self, name, out, visited): 211 ''' 212 Extract the PyObject* field named "name", and write its representation 213 to file-like object "out" 214 ''' 215 field_obj = self.pyop_field(name) 216 field_obj.write_repr(out, visited) 217 218 def get_truncated_repr(self, maxlen): 219 ''' 220 Get a repr-like string for the data, but truncate it at "maxlen" bytes 221 (ending the object graph traversal as soon as you do) 222 ''' 223 out = TruncatedStringIO(maxlen) 224 try: 225 self.write_repr(out, set()) 226 except StringTruncated: 227 # Truncation occurred: 228 return out.getvalue() + '...(truncated)' 229 230 # No truncation occurred: 231 return out.getvalue() 232 233 def type(self): 234 return PyTypeObjectPtr(self.field('ob_type')) 235 236 def is_null(self): 237 return 0 == int(self._gdbval) 238 239 def is_optimized_out(self): 240 ''' 241 Is the value of the underlying PyObject* visible to the debugger? 242 243 This can vary with the precise version of the compiler used to build 244 Python, and the precise version of gdb. 245 246 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with 247 PyEval_EvalFrameEx's "f" 248 ''' 249 return self._gdbval.is_optimized_out 250 251 def safe_tp_name(self): 252 try: 253 ob_type = self.type() 254 tp_name = ob_type.field('tp_name') 255 return tp_name.string() 256 # NullPyObjectPtr: NULL tp_name? 257 # RuntimeError: Can't even read the object at all? 258 # UnicodeDecodeError: Failed to decode tp_name bytestring 259 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 260 return 'unknown' 261 262 def proxyval(self, visited): 263 ''' 264 Scrape a value from the inferior process, and try to represent it 265 within the gdb process, whilst (hopefully) avoiding crashes when 266 the remote data is corrupt. 267 268 Derived classes will override this. 269 270 For example, a PyIntObject* with ob_ival 42 in the inferior process 271 should result in an int(42) in this process. 272 273 visited: a set of all gdb.Value pyobject pointers already visited 274 whilst generating this value (to guard against infinite recursion when 275 visiting object graphs with loops). Analogous to Py_ReprEnter and 276 Py_ReprLeave 277 ''' 278 279 class FakeRepr(object): 280 """ 281 Class representing a non-descript PyObject* value in the inferior 282 process for when we don't have a custom scraper, intended to have 283 a sane repr(). 284 """ 285 286 def __init__(self, tp_name, address): 287 self.tp_name = tp_name 288 self.address = address 289 290 def __repr__(self): 291 # For the NULL pointer, we have no way of knowing a type, so 292 # special-case it as per 293 # http://bugs.python.org/issue8032#msg100882 294 if self.address == 0: 295 return '0x0' 296 return '<%s at remote 0x%x>' % (self.tp_name, self.address) 297 298 return FakeRepr(self.safe_tp_name(), 299 int(self._gdbval)) 300 301 def write_repr(self, out, visited): 302 ''' 303 Write a string representation of the value scraped from the inferior 304 process to "out", a file-like object. 305 ''' 306 # Default implementation: generate a proxy value and write its repr 307 # However, this could involve a lot of work for complicated objects, 308 # so for derived classes we specialize this 309 return out.write(repr(self.proxyval(visited))) 310 311 @classmethod 312 def subclass_from_type(cls, t): 313 ''' 314 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a 315 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr 316 to use 317 318 Ideally, we would look up the symbols for the global types, but that 319 isn't working yet: 320 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value 321 Traceback (most recent call last): 322 File "<string>", line 1, in <module> 323 NotImplementedError: Symbol type not yet supported in Python scripts. 324 Error while executing Python code. 325 326 For now, we use tp_flags, after doing some string comparisons on the 327 tp_name for some special-cases that don't seem to be visible through 328 flags 329 ''' 330 try: 331 tp_name = t.field('tp_name').string() 332 tp_flags = int(t.field('tp_flags')) 333 # RuntimeError: NULL pointers 334 # UnicodeDecodeError: string() fails to decode the bytestring 335 except (RuntimeError, UnicodeDecodeError): 336 # Handle any kind of error e.g. NULL ptrs by simply using the base 337 # class 338 return cls 339 340 #print('tp_flags = 0x%08x' % tp_flags) 341 #print('tp_name = %r' % tp_name) 342 343 name_map = {'bool': PyBoolObjectPtr, 344 'classobj': PyClassObjectPtr, 345 'NoneType': PyNoneStructPtr, 346 'frame': PyFrameObjectPtr, 347 'set' : PySetObjectPtr, 348 'frozenset' : PySetObjectPtr, 349 'builtin_function_or_method' : PyCFunctionObjectPtr, 350 'method-wrapper': wrapperobject, 351 } 352 if tp_name in name_map: 353 return name_map[tp_name] 354 355 if tp_flags & Py_TPFLAGS_HEAPTYPE: 356 return HeapTypeObjectPtr 357 358 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: 359 return PyLongObjectPtr 360 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: 361 return PyListObjectPtr 362 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: 363 return PyTupleObjectPtr 364 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS: 365 return PyBytesObjectPtr 366 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: 367 return PyUnicodeObjectPtr 368 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: 369 return PyDictObjectPtr 370 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: 371 return PyBaseExceptionObjectPtr 372 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS: 373 # return PyTypeObjectPtr 374 375 # Use the base class: 376 return cls 377 378 @classmethod 379 def from_pyobject_ptr(cls, gdbval): 380 ''' 381 Try to locate the appropriate derived class dynamically, and cast 382 the pointer accordingly. 383 ''' 384 try: 385 p = PyObjectPtr(gdbval) 386 cls = cls.subclass_from_type(p.type()) 387 return cls(gdbval, cast_to=cls.get_gdb_type()) 388 except RuntimeError: 389 # Handle any kind of error e.g. NULL ptrs by simply using the base 390 # class 391 pass 392 return cls(gdbval) 393 394 @classmethod 395 def get_gdb_type(cls): 396 return gdb.lookup_type(cls._typename).pointer() 397 398 def as_address(self): 399 return int(self._gdbval) 400 401class PyVarObjectPtr(PyObjectPtr): 402 _typename = 'PyVarObject' 403 404class ProxyAlreadyVisited(object): 405 ''' 406 Placeholder proxy to use when protecting against infinite recursion due to 407 loops in the object graph. 408 409 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave 410 ''' 411 def __init__(self, rep): 412 self._rep = rep 413 414 def __repr__(self): 415 return self._rep 416 417 418def _write_instance_repr(out, visited, name, pyop_attrdict, address): 419 '''Shared code for use by all classes: 420 write a representation to file-like object "out"''' 421 out.write('<') 422 out.write(name) 423 424 # Write dictionary of instance attributes: 425 if isinstance(pyop_attrdict, (PyKeysValuesPair, PyDictObjectPtr)): 426 out.write('(') 427 first = True 428 items = pyop_attrdict.iteritems() 429 for pyop_arg, pyop_val in items: 430 if not first: 431 out.write(', ') 432 first = False 433 out.write(pyop_arg.proxyval(visited)) 434 out.write('=') 435 pyop_val.write_repr(out, visited) 436 out.write(')') 437 out.write(' at remote 0x%x>' % address) 438 439 440class InstanceProxy(object): 441 442 def __init__(self, cl_name, attrdict, address): 443 self.cl_name = cl_name 444 self.attrdict = attrdict 445 self.address = address 446 447 def __repr__(self): 448 if isinstance(self.attrdict, dict): 449 kwargs = ', '.join(["%s=%r" % (arg, val) 450 for arg, val in self.attrdict.items()]) 451 return '<%s(%s) at remote 0x%x>' % (self.cl_name, 452 kwargs, self.address) 453 else: 454 return '<%s at remote 0x%x>' % (self.cl_name, 455 self.address) 456 457def _PyObject_VAR_SIZE(typeobj, nitems): 458 if _PyObject_VAR_SIZE._type_size_t is None: 459 _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t') 460 461 return ( ( typeobj.field('tp_basicsize') + 462 nitems * typeobj.field('tp_itemsize') + 463 (_sizeof_void_p() - 1) 464 ) & ~(_sizeof_void_p() - 1) 465 ).cast(_PyObject_VAR_SIZE._type_size_t) 466_PyObject_VAR_SIZE._type_size_t = None 467 468class HeapTypeObjectPtr(PyObjectPtr): 469 _typename = 'PyObject' 470 471 def get_attr_dict(self): 472 ''' 473 Get the PyDictObject ptr representing the attribute dictionary 474 (or None if there's a problem) 475 ''' 476 try: 477 typeobj = self.type() 478 dictoffset = int_from_int(typeobj.field('tp_dictoffset')) 479 if dictoffset != 0: 480 if dictoffset < 0: 481 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() 482 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) 483 if tsize < 0: 484 tsize = -tsize 485 size = _PyObject_VAR_SIZE(typeobj, tsize) 486 dictoffset += size 487 assert dictoffset % _sizeof_void_p() == 0 488 489 dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset 490 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() 491 dictptr = dictptr.cast(PyObjectPtrPtr) 492 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference()) 493 except RuntimeError: 494 # Corrupt data somewhere; fail safe 495 pass 496 497 # Not found, or some kind of error: 498 return None 499 500 def get_keys_values(self): 501 typeobj = self.type() 502 has_values = int_from_int(typeobj.field('tp_flags')) & Py_TPFLAGS_MANAGED_DICT 503 if not has_values: 504 return None 505 PyDictValuesPtrPtr = gdb.lookup_type("PyDictValues").pointer().pointer() 506 valuesptr = self._gdbval.cast(PyDictValuesPtrPtr) - 4 507 values = valuesptr.dereference() 508 if int(values) == 0: 509 return None 510 values = values['values'] 511 return PyKeysValuesPair(self.get_cached_keys(), values) 512 513 def get_cached_keys(self): 514 typeobj = self.type() 515 HeapTypePtr = gdb.lookup_type("PyHeapTypeObject").pointer() 516 return typeobj._gdbval.cast(HeapTypePtr)['ht_cached_keys'] 517 518 def proxyval(self, visited): 519 ''' 520 Support for classes. 521 522 Currently we just locate the dictionary using a transliteration to 523 python of _PyObject_GetDictPtr, ignoring descriptors 524 ''' 525 # Guard against infinite loops: 526 if self.as_address() in visited: 527 return ProxyAlreadyVisited('<...>') 528 visited.add(self.as_address()) 529 530 pyop_attr_dict = self.get_attr_dict() 531 keys_values = self.get_keys_values() 532 if keys_values: 533 attr_dict = keys_values.proxyval(visited) 534 elif pyop_attr_dict: 535 attr_dict = pyop_attr_dict.proxyval(visited) 536 else: 537 attr_dict = {} 538 tp_name = self.safe_tp_name() 539 540 # Class: 541 return InstanceProxy(tp_name, attr_dict, int(self._gdbval)) 542 543 def write_repr(self, out, visited): 544 # Guard against infinite loops: 545 if self.as_address() in visited: 546 out.write('<...>') 547 return 548 visited.add(self.as_address()) 549 550 pyop_attrs = self.get_keys_values() 551 if not pyop_attrs: 552 pyop_attrs = self.get_attr_dict() 553 _write_instance_repr(out, visited, 554 self.safe_tp_name(), pyop_attrs, self.as_address()) 555 556class ProxyException(Exception): 557 def __init__(self, tp_name, args): 558 self.tp_name = tp_name 559 self.args = args 560 561 def __repr__(self): 562 return '%s%r' % (self.tp_name, self.args) 563 564class PyBaseExceptionObjectPtr(PyObjectPtr): 565 """ 566 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception 567 within the process being debugged. 568 """ 569 _typename = 'PyBaseExceptionObject' 570 571 def proxyval(self, visited): 572 # Guard against infinite loops: 573 if self.as_address() in visited: 574 return ProxyAlreadyVisited('(...)') 575 visited.add(self.as_address()) 576 arg_proxy = self.pyop_field('args').proxyval(visited) 577 return ProxyException(self.safe_tp_name(), 578 arg_proxy) 579 580 def write_repr(self, out, visited): 581 # Guard against infinite loops: 582 if self.as_address() in visited: 583 out.write('(...)') 584 return 585 visited.add(self.as_address()) 586 587 out.write(self.safe_tp_name()) 588 self.write_field_repr('args', out, visited) 589 590class PyClassObjectPtr(PyObjectPtr): 591 """ 592 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> 593 instance within the process being debugged. 594 """ 595 _typename = 'PyClassObject' 596 597 598class BuiltInFunctionProxy(object): 599 def __init__(self, ml_name): 600 self.ml_name = ml_name 601 602 def __repr__(self): 603 return "<built-in function %s>" % self.ml_name 604 605class BuiltInMethodProxy(object): 606 def __init__(self, ml_name, pyop_m_self): 607 self.ml_name = ml_name 608 self.pyop_m_self = pyop_m_self 609 610 def __repr__(self): 611 return ('<built-in method %s of %s object at remote 0x%x>' 612 % (self.ml_name, 613 self.pyop_m_self.safe_tp_name(), 614 self.pyop_m_self.as_address()) 615 ) 616 617class PyCFunctionObjectPtr(PyObjectPtr): 618 """ 619 Class wrapping a gdb.Value that's a PyCFunctionObject* 620 (see Include/methodobject.h and Objects/methodobject.c) 621 """ 622 _typename = 'PyCFunctionObject' 623 624 def proxyval(self, visited): 625 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*) 626 try: 627 ml_name = m_ml['ml_name'].string() 628 except UnicodeDecodeError: 629 ml_name = '<ml_name:UnicodeDecodeError>' 630 631 pyop_m_self = self.pyop_field('m_self') 632 if pyop_m_self.is_null(): 633 return BuiltInFunctionProxy(ml_name) 634 else: 635 return BuiltInMethodProxy(ml_name, pyop_m_self) 636 637# Python implementation of location table parsing algorithm 638def read(it): 639 return ord(next(it)) 640 641def read_varint(it): 642 b = read(it) 643 val = b & 63; 644 shift = 0; 645 while b & 64: 646 b = read(it) 647 shift += 6 648 val |= (b&63) << shift 649 return val 650 651def read_signed_varint(it): 652 uval = read_varint(it) 653 if uval & 1: 654 return -(uval >> 1) 655 else: 656 return uval >> 1 657 658def parse_location_table(firstlineno, linetable): 659 line = firstlineno 660 addr = 0 661 it = iter(linetable) 662 while True: 663 try: 664 first_byte = read(it) 665 except StopIteration: 666 return 667 code = (first_byte >> 3) & 15 668 length = (first_byte & 7) + 1 669 end_addr = addr + length 670 if code == 15: 671 yield addr, end_addr, None 672 addr = end_addr 673 continue 674 elif code == 14: # Long form 675 line_delta = read_signed_varint(it) 676 line += line_delta 677 end_line = line + read_varint(it) 678 col = read_varint(it) 679 end_col = read_varint(it) 680 elif code == 13: # No column 681 line_delta = read_signed_varint(it) 682 line += line_delta 683 elif code in (10, 11, 12): # new line 684 line_delta = code - 10 685 line += line_delta 686 column = read(it) 687 end_column = read(it) 688 else: 689 assert (0 <= code < 10) 690 second_byte = read(it) 691 column = code << 3 | (second_byte >> 4) 692 yield addr, end_addr, line 693 addr = end_addr 694 695class PyCodeObjectPtr(PyObjectPtr): 696 """ 697 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance 698 within the process being debugged. 699 """ 700 _typename = 'PyCodeObject' 701 702 def addr2line(self, addrq): 703 ''' 704 Get the line number for a given bytecode offset 705 706 Analogous to PyCode_Addr2Line; translated from pseudocode in 707 Objects/lnotab_notes.txt 708 ''' 709 co_linetable = self.pyop_field('co_linetable').proxyval(set()) 710 711 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line 712 # not 0, as lnotab_notes.txt has it: 713 lineno = int_from_int(self.field('co_firstlineno')) 714 715 if addrq < 0: 716 return lineno 717 addr = 0 718 for addr, end_addr, line in parse_location_table(lineno, co_linetable): 719 if addr <= addrq and end_addr > addrq: 720 return line 721 assert False, "Unreachable" 722 723 724def items_from_keys_and_values(keys, values): 725 entries, nentries = PyDictObjectPtr._get_entries(keys) 726 for i in safe_range(nentries): 727 ep = entries[i] 728 pyop_value = PyObjectPtr.from_pyobject_ptr(values[i]) 729 if not pyop_value.is_null(): 730 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) 731 yield (pyop_key, pyop_value) 732 733class PyKeysValuesPair: 734 735 def __init__(self, keys, values): 736 self.keys = keys 737 self.values = values 738 739 def iteritems(self): 740 return items_from_keys_and_values(self.keys, self.values) 741 742 def proxyval(self, visited): 743 result = {} 744 for pyop_key, pyop_value in self.iteritems(): 745 proxy_key = pyop_key.proxyval(visited) 746 proxy_value = pyop_value.proxyval(visited) 747 result[proxy_key] = proxy_value 748 return result 749 750class PyDictObjectPtr(PyObjectPtr): 751 """ 752 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance 753 within the process being debugged. 754 """ 755 _typename = 'PyDictObject' 756 757 def iteritems(self): 758 ''' 759 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, 760 analogous to dict.iteritems() 761 ''' 762 keys = self.field('ma_keys') 763 values = self.field('ma_values') 764 has_values = int(values) 765 if has_values: 766 values = values['values'] 767 if has_values: 768 for item in items_from_keys_and_values(keys, values): 769 yield item 770 return 771 entries, nentries = self._get_entries(keys) 772 for i in safe_range(nentries): 773 ep = entries[i] 774 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) 775 if not pyop_value.is_null(): 776 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) 777 yield (pyop_key, pyop_value) 778 779 def proxyval(self, visited): 780 # Guard against infinite loops: 781 if self.as_address() in visited: 782 return ProxyAlreadyVisited('{...}') 783 visited.add(self.as_address()) 784 785 result = {} 786 for pyop_key, pyop_value in self.iteritems(): 787 proxy_key = pyop_key.proxyval(visited) 788 proxy_value = pyop_value.proxyval(visited) 789 result[proxy_key] = proxy_value 790 return result 791 792 def write_repr(self, out, visited): 793 # Guard against infinite loops: 794 if self.as_address() in visited: 795 out.write('{...}') 796 return 797 visited.add(self.as_address()) 798 799 out.write('{') 800 first = True 801 for pyop_key, pyop_value in self.iteritems(): 802 if not first: 803 out.write(', ') 804 first = False 805 pyop_key.write_repr(out, visited) 806 out.write(': ') 807 pyop_value.write_repr(out, visited) 808 out.write('}') 809 810 @staticmethod 811 def _get_entries(keys): 812 dk_nentries = int(keys['dk_nentries']) 813 dk_size = 1<<int(keys['dk_log2_size']) 814 815 if dk_size <= 0xFF: 816 offset = dk_size 817 elif dk_size <= 0xFFFF: 818 offset = 2 * dk_size 819 elif dk_size <= 0xFFFFFFFF: 820 offset = 4 * dk_size 821 else: 822 offset = 8 * dk_size 823 824 ent_addr = keys['dk_indices'].address 825 ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset 826 if int(keys['dk_kind']) == 0: # DICT_KEYS_GENERAL 827 ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer() 828 else: 829 ent_ptr_t = gdb.lookup_type('PyDictUnicodeEntry').pointer() 830 ent_addr = ent_addr.cast(ent_ptr_t) 831 832 return ent_addr, dk_nentries 833 834 835class PyListObjectPtr(PyObjectPtr): 836 _typename = 'PyListObject' 837 838 def __getitem__(self, i): 839 # Get the gdb.Value for the (PyObject*) with the given index: 840 field_ob_item = self.field('ob_item') 841 return field_ob_item[i] 842 843 def proxyval(self, visited): 844 # Guard against infinite loops: 845 if self.as_address() in visited: 846 return ProxyAlreadyVisited('[...]') 847 visited.add(self.as_address()) 848 849 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 850 for i in safe_range(int_from_int(self.field('ob_size')))] 851 return result 852 853 def write_repr(self, out, visited): 854 # Guard against infinite loops: 855 if self.as_address() in visited: 856 out.write('[...]') 857 return 858 visited.add(self.as_address()) 859 860 out.write('[') 861 for i in safe_range(int_from_int(self.field('ob_size'))): 862 if i > 0: 863 out.write(', ') 864 element = PyObjectPtr.from_pyobject_ptr(self[i]) 865 element.write_repr(out, visited) 866 out.write(']') 867 868class PyLongObjectPtr(PyObjectPtr): 869 _typename = 'PyLongObject' 870 871 def proxyval(self, visited): 872 ''' 873 Python's Include/longobjrep.h has this declaration: 874 struct _longobject { 875 PyObject_VAR_HEAD 876 digit ob_digit[1]; 877 }; 878 879 with this description: 880 The absolute value of a number is equal to 881 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) 882 Negative numbers are represented with ob_size < 0; 883 zero is represented by ob_size == 0. 884 885 where SHIFT can be either: 886 #define PyLong_SHIFT 30 887 #define PyLong_SHIFT 15 888 ''' 889 ob_size = int(self.field('ob_size')) 890 if ob_size == 0: 891 return 0 892 893 ob_digit = self.field('ob_digit') 894 895 if gdb.lookup_type('digit').sizeof == 2: 896 SHIFT = 15 897 else: 898 SHIFT = 30 899 900 digits = [int(ob_digit[i]) * 2**(SHIFT*i) 901 for i in safe_range(abs(ob_size))] 902 result = sum(digits) 903 if ob_size < 0: 904 result = -result 905 return result 906 907 def write_repr(self, out, visited): 908 # Write this out as a Python 3 int literal, i.e. without the "L" suffix 909 proxy = self.proxyval(visited) 910 out.write("%s" % proxy) 911 912 913class PyBoolObjectPtr(PyLongObjectPtr): 914 """ 915 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two 916 <bool> instances (Py_True/Py_False) within the process being debugged. 917 """ 918 def proxyval(self, visited): 919 if PyLongObjectPtr.proxyval(self, visited): 920 return True 921 else: 922 return False 923 924class PyNoneStructPtr(PyObjectPtr): 925 """ 926 Class wrapping a gdb.Value that's a PyObject* pointing to the 927 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type 928 """ 929 _typename = 'PyObject' 930 931 def proxyval(self, visited): 932 return None 933 934class PyFrameObjectPtr(PyObjectPtr): 935 _typename = 'PyFrameObject' 936 937 def __init__(self, gdbval, cast_to=None): 938 PyObjectPtr.__init__(self, gdbval, cast_to) 939 940 if not self.is_optimized_out(): 941 self._frame = PyFramePtr(self.field('f_frame')) 942 943 def iter_locals(self): 944 ''' 945 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 946 the local variables of this frame 947 ''' 948 if self.is_optimized_out(): 949 return 950 return self._frame.iter_locals() 951 952 def iter_globals(self): 953 ''' 954 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 955 the global variables of this frame 956 ''' 957 if self.is_optimized_out(): 958 return () 959 return self._frame.iter_globals() 960 961 def iter_builtins(self): 962 ''' 963 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 964 the builtin variables 965 ''' 966 if self.is_optimized_out(): 967 return () 968 return self._frame.iter_builtins() 969 970 def get_var_by_name(self, name): 971 972 if self.is_optimized_out(): 973 return None, None 974 return self._frame.get_var_by_name(name) 975 976 def filename(self): 977 '''Get the path of the current Python source file, as a string''' 978 if self.is_optimized_out(): 979 return FRAME_INFO_OPTIMIZED_OUT 980 return self._frame.filename() 981 982 def current_line_num(self): 983 '''Get current line number as an integer (1-based) 984 985 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line 986 987 See Objects/lnotab_notes.txt 988 ''' 989 if self.is_optimized_out(): 990 return None 991 return self._frame.current_line_num() 992 993 def current_line(self): 994 '''Get the text of the current source line as a string, with a trailing 995 newline character''' 996 if self.is_optimized_out(): 997 return FRAME_INFO_OPTIMIZED_OUT 998 return self._frame.current_line() 999 1000 def write_repr(self, out, visited): 1001 if self.is_optimized_out(): 1002 out.write(FRAME_INFO_OPTIMIZED_OUT) 1003 return 1004 return self._frame.write_repr(out, visited) 1005 1006 def print_traceback(self): 1007 if self.is_optimized_out(): 1008 sys.stdout.write(' %s\n' % FRAME_INFO_OPTIMIZED_OUT) 1009 return 1010 return self._frame.print_traceback() 1011 1012class PyFramePtr: 1013 1014 def __init__(self, gdbval): 1015 self._gdbval = gdbval 1016 1017 if not self.is_optimized_out(): 1018 self.co = self._f_code() 1019 self.co_name = self.co.pyop_field('co_name') 1020 self.co_filename = self.co.pyop_field('co_filename') 1021 1022 self.f_lasti = self._f_lasti() 1023 self.co_nlocals = int_from_int(self.co.field('co_nlocals')) 1024 pnames = self.co.field('co_localsplusnames') 1025 self.co_localsplusnames = PyTupleObjectPtr.from_pyobject_ptr(pnames) 1026 1027 def is_optimized_out(self): 1028 return self._gdbval.is_optimized_out 1029 1030 def iter_locals(self): 1031 ''' 1032 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 1033 the local variables of this frame 1034 ''' 1035 if self.is_optimized_out(): 1036 return 1037 1038 1039 obj_ptr_ptr = gdb.lookup_type("PyObject").pointer().pointer() 1040 1041 localsplus = self._gdbval["localsplus"].cast(obj_ptr_ptr) 1042 1043 for i in safe_range(self.co_nlocals): 1044 pyop_value = PyObjectPtr.from_pyobject_ptr(localsplus[i]) 1045 if pyop_value.is_null(): 1046 continue 1047 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_localsplusnames[i]) 1048 yield (pyop_name, pyop_value) 1049 1050 def _f_special(self, name, convert=PyObjectPtr.from_pyobject_ptr): 1051 return convert(self._gdbval[name]) 1052 1053 def _f_globals(self): 1054 return self._f_special("f_globals") 1055 1056 def _f_builtins(self): 1057 return self._f_special("f_builtins") 1058 1059 def _f_code(self): 1060 return self._f_special("f_code", PyCodeObjectPtr.from_pyobject_ptr) 1061 1062 def _f_nlocalsplus(self): 1063 return self._f_special("nlocalsplus", int_from_int) 1064 1065 def _f_lasti(self): 1066 codeunit_p = gdb.lookup_type("_Py_CODEUNIT").pointer() 1067 prev_instr = self._gdbval["prev_instr"] 1068 first_instr = self._f_code().field("co_code_adaptive").cast(codeunit_p) 1069 return int(prev_instr - first_instr) 1070 1071 def is_entry(self): 1072 return self._f_special("is_entry", bool) 1073 1074 def previous(self): 1075 return self._f_special("previous", PyFramePtr) 1076 1077 def iter_globals(self): 1078 ''' 1079 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 1080 the global variables of this frame 1081 ''' 1082 if self.is_optimized_out(): 1083 return () 1084 1085 pyop_globals = self._f_globals() 1086 return pyop_globals.iteritems() 1087 1088 def iter_builtins(self): 1089 ''' 1090 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 1091 the builtin variables 1092 ''' 1093 if self.is_optimized_out(): 1094 return () 1095 1096 pyop_builtins = self._f_builtins() 1097 return pyop_builtins.iteritems() 1098 1099 def get_var_by_name(self, name): 1100 ''' 1101 Look for the named local variable, returning a (PyObjectPtr, scope) pair 1102 where scope is a string 'local', 'global', 'builtin' 1103 1104 If not found, return (None, None) 1105 ''' 1106 for pyop_name, pyop_value in self.iter_locals(): 1107 if name == pyop_name.proxyval(set()): 1108 return pyop_value, 'local' 1109 for pyop_name, pyop_value in self.iter_globals(): 1110 if name == pyop_name.proxyval(set()): 1111 return pyop_value, 'global' 1112 for pyop_name, pyop_value in self.iter_builtins(): 1113 if name == pyop_name.proxyval(set()): 1114 return pyop_value, 'builtin' 1115 return None, None 1116 1117 def filename(self): 1118 '''Get the path of the current Python source file, as a string''' 1119 if self.is_optimized_out(): 1120 return FRAME_INFO_OPTIMIZED_OUT 1121 return self.co_filename.proxyval(set()) 1122 1123 def current_line_num(self): 1124 '''Get current line number as an integer (1-based) 1125 1126 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line 1127 1128 See Objects/lnotab_notes.txt 1129 ''' 1130 if self.is_optimized_out(): 1131 return None 1132 try: 1133 return self.co.addr2line(self.f_lasti) 1134 except Exception as ex: 1135 # bpo-34989: addr2line() is a complex function, it can fail in many 1136 # ways. For example, it fails with a TypeError on "FakeRepr" if 1137 # gdb fails to load debug symbols. Use a catch-all "except 1138 # Exception" to make the whole function safe. The caller has to 1139 # handle None anyway for optimized Python. 1140 return None 1141 1142 def current_line(self): 1143 '''Get the text of the current source line as a string, with a trailing 1144 newline character''' 1145 if self.is_optimized_out(): 1146 return FRAME_INFO_OPTIMIZED_OUT 1147 1148 lineno = self.current_line_num() 1149 if lineno is None: 1150 return '(failed to get frame line number)' 1151 1152 filename = self.filename() 1153 try: 1154 with open(os_fsencode(filename), 'r', encoding="utf-8") as fp: 1155 lines = fp.readlines() 1156 except IOError: 1157 return None 1158 1159 try: 1160 # Convert from 1-based current_line_num to 0-based list offset 1161 return lines[lineno - 1] 1162 except IndexError: 1163 return None 1164 1165 def write_repr(self, out, visited): 1166 if self.is_optimized_out(): 1167 out.write(FRAME_INFO_OPTIMIZED_OUT) 1168 return 1169 lineno = self.current_line_num() 1170 lineno = str(lineno) if lineno is not None else "?" 1171 out.write('Frame 0x%x, for file %s, line %s, in %s (' 1172 % (self.as_address(), 1173 self.co_filename.proxyval(visited), 1174 lineno, 1175 self.co_name.proxyval(visited))) 1176 first = True 1177 for pyop_name, pyop_value in self.iter_locals(): 1178 if not first: 1179 out.write(', ') 1180 first = False 1181 1182 out.write(pyop_name.proxyval(visited)) 1183 out.write('=') 1184 pyop_value.write_repr(out, visited) 1185 1186 out.write(')') 1187 1188 def as_address(self): 1189 return int(self._gdbval) 1190 1191 def print_traceback(self): 1192 if self.is_optimized_out(): 1193 sys.stdout.write(' %s\n' % FRAME_INFO_OPTIMIZED_OUT) 1194 return 1195 visited = set() 1196 lineno = self.current_line_num() 1197 lineno = str(lineno) if lineno is not None else "?" 1198 sys.stdout.write(' File "%s", line %s, in %s\n' 1199 % (self.co_filename.proxyval(visited), 1200 lineno, 1201 self.co_name.proxyval(visited))) 1202 1203 def get_truncated_repr(self, maxlen): 1204 ''' 1205 Get a repr-like string for the data, but truncate it at "maxlen" bytes 1206 (ending the object graph traversal as soon as you do) 1207 ''' 1208 out = TruncatedStringIO(maxlen) 1209 try: 1210 self.write_repr(out, set()) 1211 except StringTruncated: 1212 # Truncation occurred: 1213 return out.getvalue() + '...(truncated)' 1214 1215 # No truncation occurred: 1216 return out.getvalue() 1217 1218class PySetObjectPtr(PyObjectPtr): 1219 _typename = 'PySetObject' 1220 1221 @classmethod 1222 def _dummy_key(self): 1223 return gdb.lookup_global_symbol('_PySet_Dummy').value() 1224 1225 def __iter__(self): 1226 dummy_ptr = self._dummy_key() 1227 table = self.field('table') 1228 for i in safe_range(self.field('mask') + 1): 1229 setentry = table[i] 1230 key = setentry['key'] 1231 if key != 0 and key != dummy_ptr: 1232 yield PyObjectPtr.from_pyobject_ptr(key) 1233 1234 def proxyval(self, visited): 1235 # Guard against infinite loops: 1236 if self.as_address() in visited: 1237 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) 1238 visited.add(self.as_address()) 1239 1240 members = (key.proxyval(visited) for key in self) 1241 if self.safe_tp_name() == 'frozenset': 1242 return frozenset(members) 1243 else: 1244 return set(members) 1245 1246 def write_repr(self, out, visited): 1247 # Emulate Python 3's set_repr 1248 tp_name = self.safe_tp_name() 1249 1250 # Guard against infinite loops: 1251 if self.as_address() in visited: 1252 out.write('(...)') 1253 return 1254 visited.add(self.as_address()) 1255 1256 # Python 3's set_repr special-cases the empty set: 1257 if not self.field('used'): 1258 out.write(tp_name) 1259 out.write('()') 1260 return 1261 1262 # Python 3 uses {} for set literals: 1263 if tp_name != 'set': 1264 out.write(tp_name) 1265 out.write('(') 1266 1267 out.write('{') 1268 first = True 1269 for key in self: 1270 if not first: 1271 out.write(', ') 1272 first = False 1273 key.write_repr(out, visited) 1274 out.write('}') 1275 1276 if tp_name != 'set': 1277 out.write(')') 1278 1279 1280class PyBytesObjectPtr(PyObjectPtr): 1281 _typename = 'PyBytesObject' 1282 1283 def __str__(self): 1284 field_ob_size = self.field('ob_size') 1285 field_ob_sval = self.field('ob_sval') 1286 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr()) 1287 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)]) 1288 1289 def proxyval(self, visited): 1290 return str(self) 1291 1292 def write_repr(self, out, visited): 1293 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix 1294 1295 # Get a PyStringObject* within the Python 2 gdb process: 1296 proxy = self.proxyval(visited) 1297 1298 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr 1299 # to Python 2 code: 1300 quote = "'" 1301 if "'" in proxy and not '"' in proxy: 1302 quote = '"' 1303 out.write('b') 1304 out.write(quote) 1305 for byte in proxy: 1306 if byte == quote or byte == '\\': 1307 out.write('\\') 1308 out.write(byte) 1309 elif byte == '\t': 1310 out.write('\\t') 1311 elif byte == '\n': 1312 out.write('\\n') 1313 elif byte == '\r': 1314 out.write('\\r') 1315 elif byte < ' ' or ord(byte) >= 0x7f: 1316 out.write('\\x') 1317 out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) 1318 out.write(hexdigits[ord(byte) & 0xf]) 1319 else: 1320 out.write(byte) 1321 out.write(quote) 1322 1323class PyTupleObjectPtr(PyObjectPtr): 1324 _typename = 'PyTupleObject' 1325 1326 def __getitem__(self, i): 1327 # Get the gdb.Value for the (PyObject*) with the given index: 1328 field_ob_item = self.field('ob_item') 1329 return field_ob_item[i] 1330 1331 def proxyval(self, visited): 1332 # Guard against infinite loops: 1333 if self.as_address() in visited: 1334 return ProxyAlreadyVisited('(...)') 1335 visited.add(self.as_address()) 1336 1337 result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 1338 for i in safe_range(int_from_int(self.field('ob_size')))) 1339 return result 1340 1341 def write_repr(self, out, visited): 1342 # Guard against infinite loops: 1343 if self.as_address() in visited: 1344 out.write('(...)') 1345 return 1346 visited.add(self.as_address()) 1347 1348 out.write('(') 1349 for i in safe_range(int_from_int(self.field('ob_size'))): 1350 if i > 0: 1351 out.write(', ') 1352 element = PyObjectPtr.from_pyobject_ptr(self[i]) 1353 element.write_repr(out, visited) 1354 if self.field('ob_size') == 1: 1355 out.write(',)') 1356 else: 1357 out.write(')') 1358 1359class PyTypeObjectPtr(PyObjectPtr): 1360 _typename = 'PyTypeObject' 1361 1362 1363def _unichr_is_printable(char): 1364 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py 1365 if char == u" ": 1366 return True 1367 import unicodedata 1368 return unicodedata.category(char) not in ("C", "Z") 1369 1370 1371class PyUnicodeObjectPtr(PyObjectPtr): 1372 _typename = 'PyUnicodeObject' 1373 1374 def char_width(self): 1375 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') 1376 return _type_Py_UNICODE.sizeof 1377 1378 def proxyval(self, visited): 1379 may_have_surrogates = False 1380 compact = self.field('_base') 1381 ascii = compact['_base'] 1382 state = ascii['state'] 1383 is_compact_ascii = (int(state['ascii']) and int(state['compact'])) 1384 if not int(state['ready']): 1385 # string is not ready 1386 field_length = int(compact['wstr_length']) 1387 may_have_surrogates = True 1388 field_str = ascii['wstr'] 1389 else: 1390 field_length = int(ascii['length']) 1391 if is_compact_ascii: 1392 field_str = ascii.address + 1 1393 elif int(state['compact']): 1394 field_str = compact.address + 1 1395 else: 1396 field_str = self.field('data')['any'] 1397 repr_kind = int(state['kind']) 1398 if repr_kind == 1: 1399 field_str = field_str.cast(_type_unsigned_char_ptr()) 1400 elif repr_kind == 2: 1401 field_str = field_str.cast(_type_unsigned_short_ptr()) 1402 elif repr_kind == 4: 1403 field_str = field_str.cast(_type_unsigned_int_ptr()) 1404 1405 # Gather a list of ints from the Py_UNICODE array; these are either 1406 # UCS-1, UCS-2 or UCS-4 code points: 1407 if not may_have_surrogates: 1408 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] 1409 else: 1410 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the 1411 # inferior process: we must join surrogate pairs. 1412 Py_UNICODEs = [] 1413 i = 0 1414 limit = safety_limit(field_length) 1415 while i < limit: 1416 ucs = int(field_str[i]) 1417 i += 1 1418 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: 1419 Py_UNICODEs.append(ucs) 1420 continue 1421 # This could be a surrogate pair. 1422 ucs2 = int(field_str[i]) 1423 if ucs2 < 0xDC00 or ucs2 > 0xDFFF: 1424 continue 1425 code = (ucs & 0x03FF) << 10 1426 code |= ucs2 & 0x03FF 1427 code += 0x00010000 1428 Py_UNICODEs.append(code) 1429 i += 1 1430 1431 # Convert the int code points to unicode characters, and generate a 1432 # local unicode instance. 1433 result = u''.join(map(chr, Py_UNICODEs)) 1434 return result 1435 1436 def write_repr(self, out, visited): 1437 # Write this out as a Python 3 str literal, i.e. without a "u" prefix 1438 1439 # Get a PyUnicodeObject* within the Python 2 gdb process: 1440 proxy = self.proxyval(visited) 1441 1442 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr 1443 # to Python 2: 1444 if "'" in proxy and '"' not in proxy: 1445 quote = '"' 1446 else: 1447 quote = "'" 1448 out.write(quote) 1449 1450 i = 0 1451 while i < len(proxy): 1452 ch = proxy[i] 1453 i += 1 1454 1455 # Escape quotes and backslashes 1456 if ch == quote or ch == '\\': 1457 out.write('\\') 1458 out.write(ch) 1459 1460 # Map special whitespace to '\t', \n', '\r' 1461 elif ch == '\t': 1462 out.write('\\t') 1463 elif ch == '\n': 1464 out.write('\\n') 1465 elif ch == '\r': 1466 out.write('\\r') 1467 1468 # Map non-printable US ASCII to '\xhh' */ 1469 elif ch < ' ' or ord(ch) == 0x7F: 1470 out.write('\\x') 1471 out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) 1472 out.write(hexdigits[ord(ch) & 0x000F]) 1473 1474 # Copy ASCII characters as-is 1475 elif ord(ch) < 0x7F: 1476 out.write(ch) 1477 1478 # Non-ASCII characters 1479 else: 1480 ucs = ch 1481 ch2 = None 1482 1483 printable = ucs.isprintable() 1484 if printable: 1485 try: 1486 ucs.encode(ENCODING) 1487 except UnicodeEncodeError: 1488 printable = False 1489 1490 # Map Unicode whitespace and control characters 1491 # (categories Z* and C* except ASCII space) 1492 if not printable: 1493 if ch2 is not None: 1494 # Match Python 3's representation of non-printable 1495 # wide characters. 1496 code = (ord(ch) & 0x03FF) << 10 1497 code |= ord(ch2) & 0x03FF 1498 code += 0x00010000 1499 else: 1500 code = ord(ucs) 1501 1502 # Map 8-bit characters to '\\xhh' 1503 if code <= 0xff: 1504 out.write('\\x') 1505 out.write(hexdigits[(code >> 4) & 0x000F]) 1506 out.write(hexdigits[code & 0x000F]) 1507 # Map 21-bit characters to '\U00xxxxxx' 1508 elif code >= 0x10000: 1509 out.write('\\U') 1510 out.write(hexdigits[(code >> 28) & 0x0000000F]) 1511 out.write(hexdigits[(code >> 24) & 0x0000000F]) 1512 out.write(hexdigits[(code >> 20) & 0x0000000F]) 1513 out.write(hexdigits[(code >> 16) & 0x0000000F]) 1514 out.write(hexdigits[(code >> 12) & 0x0000000F]) 1515 out.write(hexdigits[(code >> 8) & 0x0000000F]) 1516 out.write(hexdigits[(code >> 4) & 0x0000000F]) 1517 out.write(hexdigits[code & 0x0000000F]) 1518 # Map 16-bit characters to '\uxxxx' 1519 else: 1520 out.write('\\u') 1521 out.write(hexdigits[(code >> 12) & 0x000F]) 1522 out.write(hexdigits[(code >> 8) & 0x000F]) 1523 out.write(hexdigits[(code >> 4) & 0x000F]) 1524 out.write(hexdigits[code & 0x000F]) 1525 else: 1526 # Copy characters as-is 1527 out.write(ch) 1528 if ch2 is not None: 1529 out.write(ch2) 1530 1531 out.write(quote) 1532 1533 1534class wrapperobject(PyObjectPtr): 1535 _typename = 'wrapperobject' 1536 1537 def safe_name(self): 1538 try: 1539 name = self.field('descr')['d_base']['name'].string() 1540 return repr(name) 1541 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 1542 return '<unknown name>' 1543 1544 def safe_tp_name(self): 1545 try: 1546 return self.field('self')['ob_type']['tp_name'].string() 1547 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 1548 return '<unknown tp_name>' 1549 1550 def safe_self_addresss(self): 1551 try: 1552 address = int(self.field('self')) 1553 return '%#x' % address 1554 except (NullPyObjectPtr, RuntimeError): 1555 return '<failed to get self address>' 1556 1557 def proxyval(self, visited): 1558 name = self.safe_name() 1559 tp_name = self.safe_tp_name() 1560 self_address = self.safe_self_addresss() 1561 return ("<method-wrapper %s of %s object at %s>" 1562 % (name, tp_name, self_address)) 1563 1564 def write_repr(self, out, visited): 1565 proxy = self.proxyval(visited) 1566 out.write(proxy) 1567 1568 1569def int_from_int(gdbval): 1570 return int(gdbval) 1571 1572 1573def stringify(val): 1574 # TODO: repr() puts everything on one line; pformat can be nicer, but 1575 # can lead to v.long results; this function isolates the choice 1576 if True: 1577 return repr(val) 1578 else: 1579 from pprint import pformat 1580 return pformat(val) 1581 1582 1583class PyObjectPtrPrinter: 1584 "Prints a (PyObject*)" 1585 1586 def __init__ (self, gdbval): 1587 self.gdbval = gdbval 1588 1589 def to_string (self): 1590 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) 1591 if True: 1592 return pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1593 else: 1594 # Generate full proxy value then stringify it. 1595 # Doing so could be expensive 1596 proxyval = pyop.proxyval(set()) 1597 return stringify(proxyval) 1598 1599def pretty_printer_lookup(gdbval): 1600 type = gdbval.type.unqualified() 1601 if type.code != gdb.TYPE_CODE_PTR: 1602 return None 1603 1604 type = type.target().unqualified() 1605 t = str(type) 1606 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"): 1607 return PyObjectPtrPrinter(gdbval) 1608 1609""" 1610During development, I've been manually invoking the code in this way: 1611(gdb) python 1612 1613import sys 1614sys.path.append('/home/david/coding/python-gdb') 1615import libpython 1616end 1617 1618then reloading it after each edit like this: 1619(gdb) python reload(libpython) 1620 1621The following code should ensure that the prettyprinter is registered 1622if the code is autoloaded by gdb when visiting libpython.so, provided 1623that this python file is installed to the same path as the library (or its 1624.debug file) plus a "-gdb.py" suffix, e.g: 1625 /usr/lib/libpython2.6.so.1.0-gdb.py 1626 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py 1627""" 1628def register (obj): 1629 if obj is None: 1630 obj = gdb 1631 1632 # Wire up the pretty-printer 1633 obj.pretty_printers.append(pretty_printer_lookup) 1634 1635register (gdb.current_objfile ()) 1636 1637 1638 1639# Unfortunately, the exact API exposed by the gdb module varies somewhat 1640# from build to build 1641# See http://bugs.python.org/issue8279?#msg102276 1642 1643class Frame(object): 1644 ''' 1645 Wrapper for gdb.Frame, adding various methods 1646 ''' 1647 def __init__(self, gdbframe): 1648 self._gdbframe = gdbframe 1649 1650 def older(self): 1651 older = self._gdbframe.older() 1652 if older: 1653 return Frame(older) 1654 else: 1655 return None 1656 1657 def newer(self): 1658 newer = self._gdbframe.newer() 1659 if newer: 1660 return Frame(newer) 1661 else: 1662 return None 1663 1664 def select(self): 1665 '''If supported, select this frame and return True; return False if unsupported 1666 1667 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12 1668 onwards, but absent on Ubuntu buildbot''' 1669 if not hasattr(self._gdbframe, 'select'): 1670 print ('Unable to select frame: ' 1671 'this build of gdb does not expose a gdb.Frame.select method') 1672 return False 1673 self._gdbframe.select() 1674 return True 1675 1676 def get_index(self): 1677 '''Calculate index of frame, starting at 0 for the newest frame within 1678 this thread''' 1679 index = 0 1680 # Go down until you reach the newest frame: 1681 iter_frame = self 1682 while iter_frame.newer(): 1683 index += 1 1684 iter_frame = iter_frame.newer() 1685 return index 1686 1687 # We divide frames into: 1688 # - "python frames": 1689 # - "bytecode frames" i.e. PyEval_EvalFrameEx 1690 # - "other python frames": things that are of interest from a python 1691 # POV, but aren't bytecode (e.g. GC, GIL) 1692 # - everything else 1693 1694 def is_python_frame(self): 1695 '''Is this a _PyEval_EvalFrameDefault frame, or some other important 1696 frame? (see is_other_python_frame for what "important" means in this 1697 context)''' 1698 if self.is_evalframe(): 1699 return True 1700 if self.is_other_python_frame(): 1701 return True 1702 return False 1703 1704 def is_evalframe(self): 1705 '''Is this a _PyEval_EvalFrameDefault frame?''' 1706 if self._gdbframe.name() == EVALFRAME: 1707 ''' 1708 I believe we also need to filter on the inline 1709 struct frame_id.inline_depth, only regarding frames with 1710 an inline depth of 0 as actually being this function 1711 1712 So we reject those with type gdb.INLINE_FRAME 1713 ''' 1714 if self._gdbframe.type() == gdb.NORMAL_FRAME: 1715 # We have a _PyEval_EvalFrameDefault frame: 1716 return True 1717 1718 return False 1719 1720 def is_other_python_frame(self): 1721 '''Is this frame worth displaying in python backtraces? 1722 Examples: 1723 - waiting on the GIL 1724 - garbage-collecting 1725 - within a CFunction 1726 If it is, return a descriptive string 1727 For other frames, return False 1728 ''' 1729 if self.is_waiting_for_gil(): 1730 return 'Waiting for the GIL' 1731 1732 if self.is_gc_collect(): 1733 return 'Garbage-collecting' 1734 1735 # Detect invocations of PyCFunction instances: 1736 frame = self._gdbframe 1737 caller = frame.name() 1738 if not caller: 1739 return False 1740 1741 if (caller.startswith('cfunction_vectorcall_') or 1742 caller == 'cfunction_call'): 1743 arg_name = 'func' 1744 # Within that frame: 1745 # "func" is the local containing the PyObject* of the 1746 # PyCFunctionObject instance 1747 # "f" is the same value, but cast to (PyCFunctionObject*) 1748 # "self" is the (PyObject*) of the 'self' 1749 try: 1750 # Use the prettyprinter for the func: 1751 func = frame.read_var(arg_name) 1752 return str(func) 1753 except ValueError: 1754 return ('PyCFunction invocation (unable to read %s: ' 1755 'missing debuginfos?)' % arg_name) 1756 except RuntimeError: 1757 return 'PyCFunction invocation (unable to read %s)' % arg_name 1758 1759 if caller == 'wrapper_call': 1760 arg_name = 'wp' 1761 try: 1762 func = frame.read_var(arg_name) 1763 return str(func) 1764 except ValueError: 1765 return ('<wrapper_call invocation (unable to read %s: ' 1766 'missing debuginfos?)>' % arg_name) 1767 except RuntimeError: 1768 return '<wrapper_call invocation (unable to read %s)>' % arg_name 1769 1770 # This frame isn't worth reporting: 1771 return False 1772 1773 def is_waiting_for_gil(self): 1774 '''Is this frame waiting on the GIL?''' 1775 # This assumes the _POSIX_THREADS version of Python/ceval_gil.h: 1776 name = self._gdbframe.name() 1777 if name: 1778 return (name == 'take_gil') 1779 1780 def is_gc_collect(self): 1781 '''Is this frame gc_collect_main() within the garbage-collector?''' 1782 return self._gdbframe.name() in ('collect', 'gc_collect_main') 1783 1784 def get_pyop(self): 1785 try: 1786 frame = self._gdbframe.read_var('frame') 1787 frame = PyFramePtr(frame) 1788 if not frame.is_optimized_out(): 1789 return frame 1790 cframe = self._gdbframe.read_var('cframe') 1791 if cframe is None: 1792 return None 1793 frame = PyFramePtr(cframe["current_frame"]) 1794 if frame and not frame.is_optimized_out(): 1795 return frame 1796 return None 1797 except ValueError: 1798 return None 1799 1800 @classmethod 1801 def get_selected_frame(cls): 1802 _gdbframe = gdb.selected_frame() 1803 if _gdbframe: 1804 return Frame(_gdbframe) 1805 return None 1806 1807 @classmethod 1808 def get_selected_python_frame(cls): 1809 '''Try to obtain the Frame for the python-related code in the selected 1810 frame, or None''' 1811 try: 1812 frame = cls.get_selected_frame() 1813 except gdb.error: 1814 # No frame: Python didn't start yet 1815 return None 1816 1817 while frame: 1818 if frame.is_python_frame(): 1819 return frame 1820 frame = frame.older() 1821 1822 # Not found: 1823 return None 1824 1825 @classmethod 1826 def get_selected_bytecode_frame(cls): 1827 '''Try to obtain the Frame for the python bytecode interpreter in the 1828 selected GDB frame, or None''' 1829 frame = cls.get_selected_frame() 1830 1831 while frame: 1832 if frame.is_evalframe(): 1833 return frame 1834 frame = frame.older() 1835 1836 # Not found: 1837 return None 1838 1839 def print_summary(self): 1840 if self.is_evalframe(): 1841 interp_frame = self.get_pyop() 1842 while True: 1843 if interp_frame: 1844 line = interp_frame.get_truncated_repr(MAX_OUTPUT_LEN) 1845 sys.stdout.write('#%i %s\n' % (self.get_index(), line)) 1846 if not interp_frame.is_optimized_out(): 1847 line = interp_frame.current_line() 1848 if line is not None: 1849 sys.stdout.write(' %s\n' % line.strip()) 1850 if interp_frame.is_entry(): 1851 break 1852 else: 1853 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) 1854 break 1855 interp_frame = interp_frame.previous() 1856 else: 1857 info = self.is_other_python_frame() 1858 if info: 1859 sys.stdout.write('#%i %s\n' % (self.get_index(), info)) 1860 else: 1861 sys.stdout.write('#%i\n' % self.get_index()) 1862 1863 def print_traceback(self): 1864 if self.is_evalframe(): 1865 interp_frame = self.get_pyop() 1866 while True: 1867 if interp_frame: 1868 interp_frame.print_traceback() 1869 if not interp_frame.is_optimized_out(): 1870 line = interp_frame.current_line() 1871 if line is not None: 1872 sys.stdout.write(' %s\n' % line.strip()) 1873 if interp_frame.is_entry(): 1874 break 1875 else: 1876 sys.stdout.write(' (unable to read python frame information)\n') 1877 break 1878 interp_frame = interp_frame.previous() 1879 else: 1880 info = self.is_other_python_frame() 1881 if info: 1882 sys.stdout.write(' %s\n' % info) 1883 else: 1884 sys.stdout.write(' (not a python frame)\n') 1885 1886class PyList(gdb.Command): 1887 '''List the current Python source code, if any 1888 1889 Use 1890 py-list START 1891 to list at a different line number within the python source. 1892 1893 Use 1894 py-list START, END 1895 to list a specific range of lines within the python source. 1896 ''' 1897 1898 def __init__(self): 1899 gdb.Command.__init__ (self, 1900 "py-list", 1901 gdb.COMMAND_FILES, 1902 gdb.COMPLETE_NONE) 1903 1904 1905 def invoke(self, args, from_tty): 1906 import re 1907 1908 start = None 1909 end = None 1910 1911 m = re.match(r'\s*(\d+)\s*', args) 1912 if m: 1913 start = int(m.group(0)) 1914 end = start + 10 1915 1916 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args) 1917 if m: 1918 start, end = map(int, m.groups()) 1919 1920 # py-list requires an actual PyEval_EvalFrameEx frame: 1921 frame = Frame.get_selected_bytecode_frame() 1922 if not frame: 1923 print('Unable to locate gdb frame for python bytecode interpreter') 1924 return 1925 1926 pyop = frame.get_pyop() 1927 if not pyop or pyop.is_optimized_out(): 1928 print(UNABLE_READ_INFO_PYTHON_FRAME) 1929 return 1930 1931 filename = pyop.filename() 1932 lineno = pyop.current_line_num() 1933 if lineno is None: 1934 print('Unable to read python frame line number') 1935 return 1936 1937 if start is None: 1938 start = lineno - 5 1939 end = lineno + 5 1940 1941 if start<1: 1942 start = 1 1943 1944 try: 1945 f = open(os_fsencode(filename), 'r', encoding="utf-8") 1946 except IOError as err: 1947 sys.stdout.write('Unable to open %s: %s\n' 1948 % (filename, err)) 1949 return 1950 with f: 1951 all_lines = f.readlines() 1952 # start and end are 1-based, all_lines is 0-based; 1953 # so [start-1:end] as a python slice gives us [start, end] as a 1954 # closed interval 1955 for i, line in enumerate(all_lines[start-1:end]): 1956 linestr = str(i+start) 1957 # Highlight current line: 1958 if i + start == lineno: 1959 linestr = '>' + linestr 1960 sys.stdout.write('%4s %s' % (linestr, line)) 1961 1962 1963# ...and register the command: 1964PyList() 1965 1966def move_in_stack(move_up): 1967 '''Move up or down the stack (for the py-up/py-down command)''' 1968 # Important: 1969 # The amount of frames that are printed out depends on how many frames are inlined 1970 # in the same evaluation loop. As this command links directly the C stack with the 1971 # Python stack, the results are sensitive to the number of inlined frames and this 1972 # is likely to change between versions and optimizations. 1973 frame = Frame.get_selected_python_frame() 1974 if not frame: 1975 print('Unable to locate python frame') 1976 return 1977 while frame: 1978 if move_up: 1979 iter_frame = frame.older() 1980 else: 1981 iter_frame = frame.newer() 1982 1983 if not iter_frame: 1984 break 1985 1986 if iter_frame.is_python_frame(): 1987 # Result: 1988 if iter_frame.select(): 1989 iter_frame.print_summary() 1990 return 1991 1992 frame = iter_frame 1993 1994 if move_up: 1995 print('Unable to find an older python frame') 1996 else: 1997 print('Unable to find a newer python frame') 1998 1999 2000class PyUp(gdb.Command): 2001 'Select and print all python stack frame in the same eval loop starting from the one that called this one (if any)' 2002 def __init__(self): 2003 gdb.Command.__init__ (self, 2004 "py-up", 2005 gdb.COMMAND_STACK, 2006 gdb.COMPLETE_NONE) 2007 2008 2009 def invoke(self, args, from_tty): 2010 move_in_stack(move_up=True) 2011 2012class PyDown(gdb.Command): 2013 'Select and print all python stack frame in the same eval loop starting from the one called this one (if any)' 2014 def __init__(self): 2015 gdb.Command.__init__ (self, 2016 "py-down", 2017 gdb.COMMAND_STACK, 2018 gdb.COMPLETE_NONE) 2019 2020 2021 def invoke(self, args, from_tty): 2022 move_in_stack(move_up=False) 2023 2024# Not all builds of gdb have gdb.Frame.select 2025if hasattr(gdb.Frame, 'select'): 2026 PyUp() 2027 PyDown() 2028 2029class PyBacktraceFull(gdb.Command): 2030 'Display the current python frame and all the frames within its call stack (if any)' 2031 def __init__(self): 2032 gdb.Command.__init__ (self, 2033 "py-bt-full", 2034 gdb.COMMAND_STACK, 2035 gdb.COMPLETE_NONE) 2036 2037 2038 def invoke(self, args, from_tty): 2039 frame = Frame.get_selected_python_frame() 2040 if not frame: 2041 print('Unable to locate python frame') 2042 return 2043 2044 while frame: 2045 if frame.is_python_frame(): 2046 frame.print_summary() 2047 frame = frame.older() 2048 2049PyBacktraceFull() 2050 2051class PyBacktrace(gdb.Command): 2052 'Display the current python frame and all the frames within its call stack (if any)' 2053 def __init__(self): 2054 gdb.Command.__init__ (self, 2055 "py-bt", 2056 gdb.COMMAND_STACK, 2057 gdb.COMPLETE_NONE) 2058 2059 2060 def invoke(self, args, from_tty): 2061 frame = Frame.get_selected_python_frame() 2062 if not frame: 2063 print('Unable to locate python frame') 2064 return 2065 2066 sys.stdout.write('Traceback (most recent call first):\n') 2067 while frame: 2068 if frame.is_python_frame(): 2069 frame.print_traceback() 2070 frame = frame.older() 2071 2072PyBacktrace() 2073 2074class PyPrint(gdb.Command): 2075 'Look up the given python variable name, and print it' 2076 def __init__(self): 2077 gdb.Command.__init__ (self, 2078 "py-print", 2079 gdb.COMMAND_DATA, 2080 gdb.COMPLETE_NONE) 2081 2082 2083 def invoke(self, args, from_tty): 2084 name = str(args) 2085 2086 frame = Frame.get_selected_python_frame() 2087 if not frame: 2088 print('Unable to locate python frame') 2089 return 2090 2091 pyop_frame = frame.get_pyop() 2092 if not pyop_frame: 2093 print(UNABLE_READ_INFO_PYTHON_FRAME) 2094 return 2095 2096 pyop_var, scope = pyop_frame.get_var_by_name(name) 2097 2098 if pyop_var: 2099 print('%s %r = %s' 2100 % (scope, 2101 name, 2102 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) 2103 else: 2104 print('%r not found' % name) 2105 2106PyPrint() 2107 2108class PyLocals(gdb.Command): 2109 'Look up the given python variable name, and print it' 2110 def __init__(self): 2111 gdb.Command.__init__ (self, 2112 "py-locals", 2113 gdb.COMMAND_DATA, 2114 gdb.COMPLETE_NONE) 2115 2116 2117 def invoke(self, args, from_tty): 2118 name = str(args) 2119 2120 frame = Frame.get_selected_python_frame() 2121 if not frame: 2122 print('Unable to locate python frame') 2123 return 2124 2125 pyop_frame = frame.get_pyop() 2126 while True: 2127 if not pyop_frame: 2128 print(UNABLE_READ_INFO_PYTHON_FRAME) 2129 break 2130 2131 sys.stdout.write('Locals for %s\n' % (pyop_frame.co_name.proxyval(set()))) 2132 2133 for pyop_name, pyop_value in pyop_frame.iter_locals(): 2134 print('%s = %s' 2135 % (pyop_name.proxyval(set()), 2136 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN))) 2137 2138 if pyop_frame.is_entry(): 2139 break 2140 2141 pyop_frame = pyop_frame.previous() 2142 2143PyLocals() 2144