1#!/usr/bin/python
2'''
3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
4to be extended with Python code e.g. for library-specific data visualizations,
5such as for the C++ STL types.  Documentation on this API can be seen at:
6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
7
8
9This python module deals with the case when the process being debugged (the
10"inferior process" in gdb parlance) is itself python, or more specifically,
11linked against libpython.  In this situation, almost every item of data is a
12(PyObject*), and having the debugger merely print their addresses is not very
13enlightening.
14
15This module embeds knowledge about the implementation details of libpython so
16that we can emit useful visualizations e.g. a string, a list, a dict, a frame
17giving file/line information and the state of local variables
18
19In particular, given a gdb.Value corresponding to a PyObject* in the inferior
20process, we can generate a "proxy value" within the gdb process.  For example,
21given a PyObject* in the inferior process that is in fact a PyListObject*
22holding three PyObject* that turn out to be PyBytesObject* instances, we can
23generate a proxy value within the gdb process that is a list of bytes
24instances:
25  [b"foo", b"bar", b"baz"]
26
27Doing so can be expensive for complicated graphs of objects, and could take
28some time, so we also have a "write_repr" method that writes a representation
29of the data to a file-like object.  This allows us to stop the traversal by
30having the file-like object raise an exception if it gets too much data.
31
32With both "proxyval" and "write_repr" we keep track of the set of all addresses
33visited so far in the traversal, to avoid infinite recursion due to cycles in
34the graph of object references.
35
36We try to defer gdb.lookup_type() invocations for python types until as late as
37possible: for a dynamically linked python binary, when the process starts in
38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
39the type names are known to the debugger
40
41The module also extends gdb with some python-specific commands.
42'''
43
44import gdb
45import os
46import locale
47import sys
48
49
50# Look up the gdb.Type for some standard types:
51# Those need to be refreshed as types (pointer sizes) may change when
52# gdb loads different executables
53
54def _type_char_ptr():
55    return gdb.lookup_type('char').pointer()  # char*
56
57
58def _type_unsigned_char_ptr():
59    return gdb.lookup_type('unsigned char').pointer()  # unsigned char*
60
61
62def _type_unsigned_short_ptr():
63    return gdb.lookup_type('unsigned short').pointer()
64
65
66def _type_unsigned_int_ptr():
67    return gdb.lookup_type('unsigned int').pointer()
68
69
70def _sizeof_void_p():
71    return gdb.lookup_type('void').pointer().sizeof
72
73
74Py_TPFLAGS_MANAGED_DICT      = (1 << 4)
75Py_TPFLAGS_HEAPTYPE          = (1 << 9)
76Py_TPFLAGS_LONG_SUBCLASS     = (1 << 24)
77Py_TPFLAGS_LIST_SUBCLASS     = (1 << 25)
78Py_TPFLAGS_TUPLE_SUBCLASS    = (1 << 26)
79Py_TPFLAGS_BYTES_SUBCLASS    = (1 << 27)
80Py_TPFLAGS_UNICODE_SUBCLASS  = (1 << 28)
81Py_TPFLAGS_DICT_SUBCLASS     = (1 << 29)
82Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30)
83Py_TPFLAGS_TYPE_SUBCLASS     = (1 << 31)
84
85
86MAX_OUTPUT_LEN=1024
87
88hexdigits = "0123456789abcdef"
89
90ENCODING = locale.getpreferredencoding()
91
92FRAME_INFO_OPTIMIZED_OUT = '(frame information optimized out)'
93UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame'
94EVALFRAME = '_PyEval_EvalFrameDefault'
95
96class NullPyObjectPtr(RuntimeError):
97    pass
98
99
100def safety_limit(val):
101    # Given an integer value from the process being debugged, limit it to some
102    # safety threshold so that arbitrary breakage within said process doesn't
103    # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
104    return min(val, 1000)
105
106
107def safe_range(val):
108    # As per range, but don't trust the value too much: cap it to a safety
109    # threshold in case the data was corrupted
110    return range(safety_limit(int(val)))
111
112try:
113    os_fsencode = os.fsencode
114except AttributeError:
115    def os_fsencode(filename):
116        if not isinstance(filename, unicode):
117            return filename
118        encoding = sys.getfilesystemencoding()
119        if encoding == 'mbcs':
120            # mbcs doesn't support surrogateescape
121            return filename.encode(encoding)
122        encoded = []
123        for char in filename:
124            # surrogateescape error handler
125            if 0xDC80 <= ord(char) <= 0xDCFF:
126                byte = chr(ord(char) - 0xDC00)
127            else:
128                byte = char.encode(encoding)
129            encoded.append(byte)
130        return ''.join(encoded)
131
132class StringTruncated(RuntimeError):
133    pass
134
135class TruncatedStringIO(object):
136    '''Similar to io.StringIO, but can truncate the output by raising a
137    StringTruncated exception'''
138    def __init__(self, maxlen=None):
139        self._val = ''
140        self.maxlen = maxlen
141
142    def write(self, data):
143        if self.maxlen:
144            if len(data) + len(self._val) > self.maxlen:
145                # Truncation:
146                self._val += data[0:self.maxlen - len(self._val)]
147                raise StringTruncated()
148
149        self._val += data
150
151    def getvalue(self):
152        return self._val
153
154class PyObjectPtr(object):
155    """
156    Class wrapping a gdb.Value that's either a (PyObject*) within the
157    inferior process, or some subclass pointer e.g. (PyBytesObject*)
158
159    There will be a subclass for every refined PyObject type that we care
160    about.
161
162    Note that at every stage the underlying pointer could be NULL, point
163    to corrupt data, etc; this is the debugger, after all.
164    """
165    _typename = 'PyObject'
166
167    def __init__(self, gdbval, cast_to=None):
168        if cast_to:
169            self._gdbval = gdbval.cast(cast_to)
170        else:
171            self._gdbval = gdbval
172
173    def field(self, name):
174        '''
175        Get the gdb.Value for the given field within the PyObject, coping with
176        some python 2 versus python 3 differences.
177
178        Various libpython types are defined using the "PyObject_HEAD" and
179        "PyObject_VAR_HEAD" macros.
180
181        In Python 2, this these are defined so that "ob_type" and (for a var
182        object) "ob_size" are fields of the type in question.
183
184        In Python 3, this is defined as an embedded PyVarObject type thus:
185           PyVarObject ob_base;
186        so that the "ob_size" field is located insize the "ob_base" field, and
187        the "ob_type" is most easily accessed by casting back to a (PyObject*).
188        '''
189        if self.is_null():
190            raise NullPyObjectPtr(self)
191
192        if name == 'ob_type':
193            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
194            return pyo_ptr.dereference()[name]
195
196        if name == 'ob_size':
197            pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
198            return pyo_ptr.dereference()[name]
199
200        # General case: look it up inside the object:
201        return self._gdbval.dereference()[name]
202
203    def pyop_field(self, name):
204        '''
205        Get a PyObjectPtr for the given PyObject* field within this PyObject,
206        coping with some python 2 versus python 3 differences.
207        '''
208        return PyObjectPtr.from_pyobject_ptr(self.field(name))
209
210    def write_field_repr(self, name, out, visited):
211        '''
212        Extract the PyObject* field named "name", and write its representation
213        to file-like object "out"
214        '''
215        field_obj = self.pyop_field(name)
216        field_obj.write_repr(out, visited)
217
218    def get_truncated_repr(self, maxlen):
219        '''
220        Get a repr-like string for the data, but truncate it at "maxlen" bytes
221        (ending the object graph traversal as soon as you do)
222        '''
223        out = TruncatedStringIO(maxlen)
224        try:
225            self.write_repr(out, set())
226        except StringTruncated:
227            # Truncation occurred:
228            return out.getvalue() + '...(truncated)'
229
230        # No truncation occurred:
231        return out.getvalue()
232
233    def type(self):
234        return PyTypeObjectPtr(self.field('ob_type'))
235
236    def is_null(self):
237        return 0 == int(self._gdbval)
238
239    def is_optimized_out(self):
240        '''
241        Is the value of the underlying PyObject* visible to the debugger?
242
243        This can vary with the precise version of the compiler used to build
244        Python, and the precise version of gdb.
245
246        See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
247        PyEval_EvalFrameEx's "f"
248        '''
249        return self._gdbval.is_optimized_out
250
251    def safe_tp_name(self):
252        try:
253            ob_type = self.type()
254            tp_name = ob_type.field('tp_name')
255            return tp_name.string()
256        # NullPyObjectPtr: NULL tp_name?
257        # RuntimeError: Can't even read the object at all?
258        # UnicodeDecodeError: Failed to decode tp_name bytestring
259        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
260            return 'unknown'
261
262    def proxyval(self, visited):
263        '''
264        Scrape a value from the inferior process, and try to represent it
265        within the gdb process, whilst (hopefully) avoiding crashes when
266        the remote data is corrupt.
267
268        Derived classes will override this.
269
270        For example, a PyIntObject* with ob_ival 42 in the inferior process
271        should result in an int(42) in this process.
272
273        visited: a set of all gdb.Value pyobject pointers already visited
274        whilst generating this value (to guard against infinite recursion when
275        visiting object graphs with loops).  Analogous to Py_ReprEnter and
276        Py_ReprLeave
277        '''
278
279        class FakeRepr(object):
280            """
281            Class representing a non-descript PyObject* value in the inferior
282            process for when we don't have a custom scraper, intended to have
283            a sane repr().
284            """
285
286            def __init__(self, tp_name, address):
287                self.tp_name = tp_name
288                self.address = address
289
290            def __repr__(self):
291                # For the NULL pointer, we have no way of knowing a type, so
292                # special-case it as per
293                # http://bugs.python.org/issue8032#msg100882
294                if self.address == 0:
295                    return '0x0'
296                return '<%s at remote 0x%x>' % (self.tp_name, self.address)
297
298        return FakeRepr(self.safe_tp_name(),
299                        int(self._gdbval))
300
301    def write_repr(self, out, visited):
302        '''
303        Write a string representation of the value scraped from the inferior
304        process to "out", a file-like object.
305        '''
306        # Default implementation: generate a proxy value and write its repr
307        # However, this could involve a lot of work for complicated objects,
308        # so for derived classes we specialize this
309        return out.write(repr(self.proxyval(visited)))
310
311    @classmethod
312    def subclass_from_type(cls, t):
313        '''
314        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
315        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
316        to use
317
318        Ideally, we would look up the symbols for the global types, but that
319        isn't working yet:
320          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
321          Traceback (most recent call last):
322            File "<string>", line 1, in <module>
323          NotImplementedError: Symbol type not yet supported in Python scripts.
324          Error while executing Python code.
325
326        For now, we use tp_flags, after doing some string comparisons on the
327        tp_name for some special-cases that don't seem to be visible through
328        flags
329        '''
330        try:
331            tp_name = t.field('tp_name').string()
332            tp_flags = int(t.field('tp_flags'))
333        # RuntimeError: NULL pointers
334        # UnicodeDecodeError: string() fails to decode the bytestring
335        except (RuntimeError, UnicodeDecodeError):
336            # Handle any kind of error e.g. NULL ptrs by simply using the base
337            # class
338            return cls
339
340        #print('tp_flags = 0x%08x' % tp_flags)
341        #print('tp_name = %r' % tp_name)
342
343        name_map = {'bool': PyBoolObjectPtr,
344                    'classobj': PyClassObjectPtr,
345                    'NoneType': PyNoneStructPtr,
346                    'frame': PyFrameObjectPtr,
347                    'set' : PySetObjectPtr,
348                    'frozenset' : PySetObjectPtr,
349                    'builtin_function_or_method' : PyCFunctionObjectPtr,
350                    'method-wrapper': wrapperobject,
351                    }
352        if tp_name in name_map:
353            return name_map[tp_name]
354
355        if tp_flags & Py_TPFLAGS_HEAPTYPE:
356            return HeapTypeObjectPtr
357
358        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
359            return PyLongObjectPtr
360        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
361            return PyListObjectPtr
362        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
363            return PyTupleObjectPtr
364        if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS:
365            return PyBytesObjectPtr
366        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
367            return PyUnicodeObjectPtr
368        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
369            return PyDictObjectPtr
370        if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
371            return PyBaseExceptionObjectPtr
372        #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
373        #    return PyTypeObjectPtr
374
375        # Use the base class:
376        return cls
377
378    @classmethod
379    def from_pyobject_ptr(cls, gdbval):
380        '''
381        Try to locate the appropriate derived class dynamically, and cast
382        the pointer accordingly.
383        '''
384        try:
385            p = PyObjectPtr(gdbval)
386            cls = cls.subclass_from_type(p.type())
387            return cls(gdbval, cast_to=cls.get_gdb_type())
388        except RuntimeError:
389            # Handle any kind of error e.g. NULL ptrs by simply using the base
390            # class
391            pass
392        return cls(gdbval)
393
394    @classmethod
395    def get_gdb_type(cls):
396        return gdb.lookup_type(cls._typename).pointer()
397
398    def as_address(self):
399        return int(self._gdbval)
400
401class PyVarObjectPtr(PyObjectPtr):
402    _typename = 'PyVarObject'
403
404class ProxyAlreadyVisited(object):
405    '''
406    Placeholder proxy to use when protecting against infinite recursion due to
407    loops in the object graph.
408
409    Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
410    '''
411    def __init__(self, rep):
412        self._rep = rep
413
414    def __repr__(self):
415        return self._rep
416
417
418def _write_instance_repr(out, visited, name, pyop_attrdict, address):
419    '''Shared code for use by all classes:
420    write a representation to file-like object "out"'''
421    out.write('<')
422    out.write(name)
423
424    # Write dictionary of instance attributes:
425    if isinstance(pyop_attrdict, (PyKeysValuesPair, PyDictObjectPtr)):
426        out.write('(')
427        first = True
428        items = pyop_attrdict.iteritems()
429        for pyop_arg, pyop_val in items:
430            if not first:
431                out.write(', ')
432            first = False
433            out.write(pyop_arg.proxyval(visited))
434            out.write('=')
435            pyop_val.write_repr(out, visited)
436        out.write(')')
437    out.write(' at remote 0x%x>' % address)
438
439
440class InstanceProxy(object):
441
442    def __init__(self, cl_name, attrdict, address):
443        self.cl_name = cl_name
444        self.attrdict = attrdict
445        self.address = address
446
447    def __repr__(self):
448        if isinstance(self.attrdict, dict):
449            kwargs = ', '.join(["%s=%r" % (arg, val)
450                                for arg, val in self.attrdict.items()])
451            return '<%s(%s) at remote 0x%x>' % (self.cl_name,
452                                                kwargs, self.address)
453        else:
454            return '<%s at remote 0x%x>' % (self.cl_name,
455                                            self.address)
456
457def _PyObject_VAR_SIZE(typeobj, nitems):
458    if _PyObject_VAR_SIZE._type_size_t is None:
459        _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t')
460
461    return ( ( typeobj.field('tp_basicsize') +
462               nitems * typeobj.field('tp_itemsize') +
463               (_sizeof_void_p() - 1)
464             ) & ~(_sizeof_void_p() - 1)
465           ).cast(_PyObject_VAR_SIZE._type_size_t)
466_PyObject_VAR_SIZE._type_size_t = None
467
468class HeapTypeObjectPtr(PyObjectPtr):
469    _typename = 'PyObject'
470
471    def get_attr_dict(self):
472        '''
473        Get the PyDictObject ptr representing the attribute dictionary
474        (or None if there's a problem)
475        '''
476        try:
477            typeobj = self.type()
478            dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
479            if dictoffset != 0:
480                if dictoffset < 0:
481                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
482                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
483                    if tsize < 0:
484                        tsize = -tsize
485                    size = _PyObject_VAR_SIZE(typeobj, tsize)
486                    dictoffset += size
487                    assert dictoffset % _sizeof_void_p() == 0
488
489                dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset
490                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
491                dictptr = dictptr.cast(PyObjectPtrPtr)
492                return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
493        except RuntimeError:
494            # Corrupt data somewhere; fail safe
495            pass
496
497        # Not found, or some kind of error:
498        return None
499
500    def get_keys_values(self):
501        typeobj = self.type()
502        has_values =  int_from_int(typeobj.field('tp_flags')) & Py_TPFLAGS_MANAGED_DICT
503        if not has_values:
504            return None
505        PyDictValuesPtrPtr = gdb.lookup_type("PyDictValues").pointer().pointer()
506        valuesptr = self._gdbval.cast(PyDictValuesPtrPtr) - 4
507        values = valuesptr.dereference()
508        if int(values) == 0:
509            return None
510        values = values['values']
511        return PyKeysValuesPair(self.get_cached_keys(), values)
512
513    def get_cached_keys(self):
514        typeobj = self.type()
515        HeapTypePtr = gdb.lookup_type("PyHeapTypeObject").pointer()
516        return typeobj._gdbval.cast(HeapTypePtr)['ht_cached_keys']
517
518    def proxyval(self, visited):
519        '''
520        Support for classes.
521
522        Currently we just locate the dictionary using a transliteration to
523        python of _PyObject_GetDictPtr, ignoring descriptors
524        '''
525        # Guard against infinite loops:
526        if self.as_address() in visited:
527            return ProxyAlreadyVisited('<...>')
528        visited.add(self.as_address())
529
530        pyop_attr_dict = self.get_attr_dict()
531        keys_values = self.get_keys_values()
532        if keys_values:
533            attr_dict = keys_values.proxyval(visited)
534        elif pyop_attr_dict:
535            attr_dict = pyop_attr_dict.proxyval(visited)
536        else:
537            attr_dict = {}
538        tp_name = self.safe_tp_name()
539
540        # Class:
541        return InstanceProxy(tp_name, attr_dict, int(self._gdbval))
542
543    def write_repr(self, out, visited):
544        # Guard against infinite loops:
545        if self.as_address() in visited:
546            out.write('<...>')
547            return
548        visited.add(self.as_address())
549
550        pyop_attrs = self.get_keys_values()
551        if not pyop_attrs:
552            pyop_attrs = self.get_attr_dict()
553        _write_instance_repr(out, visited,
554                             self.safe_tp_name(), pyop_attrs, self.as_address())
555
556class ProxyException(Exception):
557    def __init__(self, tp_name, args):
558        self.tp_name = tp_name
559        self.args = args
560
561    def __repr__(self):
562        return '%s%r' % (self.tp_name, self.args)
563
564class PyBaseExceptionObjectPtr(PyObjectPtr):
565    """
566    Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
567    within the process being debugged.
568    """
569    _typename = 'PyBaseExceptionObject'
570
571    def proxyval(self, visited):
572        # Guard against infinite loops:
573        if self.as_address() in visited:
574            return ProxyAlreadyVisited('(...)')
575        visited.add(self.as_address())
576        arg_proxy = self.pyop_field('args').proxyval(visited)
577        return ProxyException(self.safe_tp_name(),
578                              arg_proxy)
579
580    def write_repr(self, out, visited):
581        # Guard against infinite loops:
582        if self.as_address() in visited:
583            out.write('(...)')
584            return
585        visited.add(self.as_address())
586
587        out.write(self.safe_tp_name())
588        self.write_field_repr('args', out, visited)
589
590class PyClassObjectPtr(PyObjectPtr):
591    """
592    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
593    instance within the process being debugged.
594    """
595    _typename = 'PyClassObject'
596
597
598class BuiltInFunctionProxy(object):
599    def __init__(self, ml_name):
600        self.ml_name = ml_name
601
602    def __repr__(self):
603        return "<built-in function %s>" % self.ml_name
604
605class BuiltInMethodProxy(object):
606    def __init__(self, ml_name, pyop_m_self):
607        self.ml_name = ml_name
608        self.pyop_m_self = pyop_m_self
609
610    def __repr__(self):
611        return ('<built-in method %s of %s object at remote 0x%x>'
612                % (self.ml_name,
613                   self.pyop_m_self.safe_tp_name(),
614                   self.pyop_m_self.as_address())
615                )
616
617class PyCFunctionObjectPtr(PyObjectPtr):
618    """
619    Class wrapping a gdb.Value that's a PyCFunctionObject*
620    (see Include/methodobject.h and Objects/methodobject.c)
621    """
622    _typename = 'PyCFunctionObject'
623
624    def proxyval(self, visited):
625        m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
626        try:
627            ml_name = m_ml['ml_name'].string()
628        except UnicodeDecodeError:
629            ml_name = '<ml_name:UnicodeDecodeError>'
630
631        pyop_m_self = self.pyop_field('m_self')
632        if pyop_m_self.is_null():
633            return BuiltInFunctionProxy(ml_name)
634        else:
635            return BuiltInMethodProxy(ml_name, pyop_m_self)
636
637# Python implementation of location table parsing algorithm
638def read(it):
639    return ord(next(it))
640
641def read_varint(it):
642    b = read(it)
643    val = b & 63;
644    shift = 0;
645    while b & 64:
646        b = read(it)
647        shift += 6
648        val |= (b&63) << shift
649    return val
650
651def read_signed_varint(it):
652    uval = read_varint(it)
653    if uval & 1:
654        return -(uval >> 1)
655    else:
656        return uval >> 1
657
658def parse_location_table(firstlineno, linetable):
659    line = firstlineno
660    addr = 0
661    it = iter(linetable)
662    while True:
663        try:
664            first_byte = read(it)
665        except StopIteration:
666            return
667        code = (first_byte >> 3) & 15
668        length = (first_byte & 7) + 1
669        end_addr = addr + length
670        if code == 15:
671            yield addr, end_addr, None
672            addr = end_addr
673            continue
674        elif code == 14: # Long form
675            line_delta = read_signed_varint(it)
676            line += line_delta
677            end_line = line + read_varint(it)
678            col = read_varint(it)
679            end_col = read_varint(it)
680        elif code == 13: # No column
681            line_delta = read_signed_varint(it)
682            line += line_delta
683        elif code in (10, 11, 12): # new line
684            line_delta = code - 10
685            line += line_delta
686            column = read(it)
687            end_column = read(it)
688        else:
689            assert (0 <= code < 10)
690            second_byte = read(it)
691            column = code << 3 | (second_byte >> 4)
692        yield addr, end_addr, line
693        addr = end_addr
694
695class PyCodeObjectPtr(PyObjectPtr):
696    """
697    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
698    within the process being debugged.
699    """
700    _typename = 'PyCodeObject'
701
702    def addr2line(self, addrq):
703        '''
704        Get the line number for a given bytecode offset
705
706        Analogous to PyCode_Addr2Line; translated from pseudocode in
707        Objects/lnotab_notes.txt
708        '''
709        co_linetable = self.pyop_field('co_linetable').proxyval(set())
710
711        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
712        # not 0, as lnotab_notes.txt has it:
713        lineno = int_from_int(self.field('co_firstlineno'))
714
715        if addrq < 0:
716            return lineno
717        addr = 0
718        for addr, end_addr, line in parse_location_table(lineno, co_linetable):
719            if addr <= addrq and end_addr > addrq:
720                return line
721        assert False, "Unreachable"
722
723
724def items_from_keys_and_values(keys, values):
725    entries, nentries = PyDictObjectPtr._get_entries(keys)
726    for i in safe_range(nentries):
727        ep = entries[i]
728        pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
729        if not pyop_value.is_null():
730            pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
731            yield (pyop_key, pyop_value)
732
733class PyKeysValuesPair:
734
735    def __init__(self, keys, values):
736        self.keys = keys
737        self.values = values
738
739    def iteritems(self):
740        return items_from_keys_and_values(self.keys, self.values)
741
742    def proxyval(self, visited):
743        result = {}
744        for pyop_key, pyop_value in self.iteritems():
745            proxy_key = pyop_key.proxyval(visited)
746            proxy_value = pyop_value.proxyval(visited)
747            result[proxy_key] = proxy_value
748        return result
749
750class PyDictObjectPtr(PyObjectPtr):
751    """
752    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
753    within the process being debugged.
754    """
755    _typename = 'PyDictObject'
756
757    def iteritems(self):
758        '''
759        Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
760        analogous to dict.iteritems()
761        '''
762        keys = self.field('ma_keys')
763        values = self.field('ma_values')
764        has_values = int(values)
765        if has_values:
766            values = values['values']
767        if has_values:
768            for item in items_from_keys_and_values(keys, values):
769                yield item
770            return
771        entries, nentries = self._get_entries(keys)
772        for i in safe_range(nentries):
773            ep = entries[i]
774            pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
775            if not pyop_value.is_null():
776                pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
777                yield (pyop_key, pyop_value)
778
779    def proxyval(self, visited):
780        # Guard against infinite loops:
781        if self.as_address() in visited:
782            return ProxyAlreadyVisited('{...}')
783        visited.add(self.as_address())
784
785        result = {}
786        for pyop_key, pyop_value in self.iteritems():
787            proxy_key = pyop_key.proxyval(visited)
788            proxy_value = pyop_value.proxyval(visited)
789            result[proxy_key] = proxy_value
790        return result
791
792    def write_repr(self, out, visited):
793        # Guard against infinite loops:
794        if self.as_address() in visited:
795            out.write('{...}')
796            return
797        visited.add(self.as_address())
798
799        out.write('{')
800        first = True
801        for pyop_key, pyop_value in self.iteritems():
802            if not first:
803                out.write(', ')
804            first = False
805            pyop_key.write_repr(out, visited)
806            out.write(': ')
807            pyop_value.write_repr(out, visited)
808        out.write('}')
809
810    @staticmethod
811    def _get_entries(keys):
812        dk_nentries = int(keys['dk_nentries'])
813        dk_size = 1<<int(keys['dk_log2_size'])
814
815        if dk_size <= 0xFF:
816            offset = dk_size
817        elif dk_size <= 0xFFFF:
818            offset = 2 * dk_size
819        elif dk_size <= 0xFFFFFFFF:
820            offset = 4 * dk_size
821        else:
822            offset = 8 * dk_size
823
824        ent_addr = keys['dk_indices'].address
825        ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset
826        if int(keys['dk_kind']) == 0:  # DICT_KEYS_GENERAL
827            ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer()
828        else:
829            ent_ptr_t = gdb.lookup_type('PyDictUnicodeEntry').pointer()
830        ent_addr = ent_addr.cast(ent_ptr_t)
831
832        return ent_addr, dk_nentries
833
834
835class PyListObjectPtr(PyObjectPtr):
836    _typename = 'PyListObject'
837
838    def __getitem__(self, i):
839        # Get the gdb.Value for the (PyObject*) with the given index:
840        field_ob_item = self.field('ob_item')
841        return field_ob_item[i]
842
843    def proxyval(self, visited):
844        # Guard against infinite loops:
845        if self.as_address() in visited:
846            return ProxyAlreadyVisited('[...]')
847        visited.add(self.as_address())
848
849        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
850                  for i in safe_range(int_from_int(self.field('ob_size')))]
851        return result
852
853    def write_repr(self, out, visited):
854        # Guard against infinite loops:
855        if self.as_address() in visited:
856            out.write('[...]')
857            return
858        visited.add(self.as_address())
859
860        out.write('[')
861        for i in safe_range(int_from_int(self.field('ob_size'))):
862            if i > 0:
863                out.write(', ')
864            element = PyObjectPtr.from_pyobject_ptr(self[i])
865            element.write_repr(out, visited)
866        out.write(']')
867
868class PyLongObjectPtr(PyObjectPtr):
869    _typename = 'PyLongObject'
870
871    def proxyval(self, visited):
872        '''
873        Python's Include/longobjrep.h has this declaration:
874           struct _longobject {
875               PyObject_VAR_HEAD
876               digit ob_digit[1];
877           };
878
879        with this description:
880            The absolute value of a number is equal to
881                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
882            Negative numbers are represented with ob_size < 0;
883            zero is represented by ob_size == 0.
884
885        where SHIFT can be either:
886            #define PyLong_SHIFT        30
887            #define PyLong_SHIFT        15
888        '''
889        ob_size = int(self.field('ob_size'))
890        if ob_size == 0:
891            return 0
892
893        ob_digit = self.field('ob_digit')
894
895        if gdb.lookup_type('digit').sizeof == 2:
896            SHIFT = 15
897        else:
898            SHIFT = 30
899
900        digits = [int(ob_digit[i]) * 2**(SHIFT*i)
901                  for i in safe_range(abs(ob_size))]
902        result = sum(digits)
903        if ob_size < 0:
904            result = -result
905        return result
906
907    def write_repr(self, out, visited):
908        # Write this out as a Python 3 int literal, i.e. without the "L" suffix
909        proxy = self.proxyval(visited)
910        out.write("%s" % proxy)
911
912
913class PyBoolObjectPtr(PyLongObjectPtr):
914    """
915    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
916    <bool> instances (Py_True/Py_False) within the process being debugged.
917    """
918    def proxyval(self, visited):
919        if PyLongObjectPtr.proxyval(self, visited):
920            return True
921        else:
922            return False
923
924class PyNoneStructPtr(PyObjectPtr):
925    """
926    Class wrapping a gdb.Value that's a PyObject* pointing to the
927    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
928    """
929    _typename = 'PyObject'
930
931    def proxyval(self, visited):
932        return None
933
934class PyFrameObjectPtr(PyObjectPtr):
935    _typename = 'PyFrameObject'
936
937    def __init__(self, gdbval, cast_to=None):
938        PyObjectPtr.__init__(self, gdbval, cast_to)
939
940        if not self.is_optimized_out():
941            self._frame = PyFramePtr(self.field('f_frame'))
942
943    def iter_locals(self):
944        '''
945        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
946        the local variables of this frame
947        '''
948        if self.is_optimized_out():
949            return
950        return self._frame.iter_locals()
951
952    def iter_globals(self):
953        '''
954        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
955        the global variables of this frame
956        '''
957        if self.is_optimized_out():
958            return ()
959        return self._frame.iter_globals()
960
961    def iter_builtins(self):
962        '''
963        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
964        the builtin variables
965        '''
966        if self.is_optimized_out():
967            return ()
968        return self._frame.iter_builtins()
969
970    def get_var_by_name(self, name):
971
972        if self.is_optimized_out():
973            return None, None
974        return self._frame.get_var_by_name(name)
975
976    def filename(self):
977        '''Get the path of the current Python source file, as a string'''
978        if self.is_optimized_out():
979            return FRAME_INFO_OPTIMIZED_OUT
980        return self._frame.filename()
981
982    def current_line_num(self):
983        '''Get current line number as an integer (1-based)
984
985        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
986
987        See Objects/lnotab_notes.txt
988        '''
989        if self.is_optimized_out():
990            return None
991        return self._frame.current_line_num()
992
993    def current_line(self):
994        '''Get the text of the current source line as a string, with a trailing
995        newline character'''
996        if self.is_optimized_out():
997            return FRAME_INFO_OPTIMIZED_OUT
998        return self._frame.current_line()
999
1000    def write_repr(self, out, visited):
1001        if self.is_optimized_out():
1002            out.write(FRAME_INFO_OPTIMIZED_OUT)
1003            return
1004        return self._frame.write_repr(out, visited)
1005
1006    def print_traceback(self):
1007        if self.is_optimized_out():
1008            sys.stdout.write('  %s\n' % FRAME_INFO_OPTIMIZED_OUT)
1009            return
1010        return self._frame.print_traceback()
1011
1012class PyFramePtr:
1013
1014    def __init__(self, gdbval):
1015        self._gdbval = gdbval
1016
1017        if not self.is_optimized_out():
1018            self.co = self._f_code()
1019            self.co_name = self.co.pyop_field('co_name')
1020            self.co_filename = self.co.pyop_field('co_filename')
1021
1022            self.f_lasti = self._f_lasti()
1023            self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
1024            pnames = self.co.field('co_localsplusnames')
1025            self.co_localsplusnames = PyTupleObjectPtr.from_pyobject_ptr(pnames)
1026
1027    def is_optimized_out(self):
1028        return self._gdbval.is_optimized_out
1029
1030    def iter_locals(self):
1031        '''
1032        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
1033        the local variables of this frame
1034        '''
1035        if self.is_optimized_out():
1036            return
1037
1038
1039        obj_ptr_ptr = gdb.lookup_type("PyObject").pointer().pointer()
1040
1041        localsplus = self._gdbval["localsplus"].cast(obj_ptr_ptr)
1042
1043        for i in safe_range(self.co_nlocals):
1044            pyop_value = PyObjectPtr.from_pyobject_ptr(localsplus[i])
1045            if pyop_value.is_null():
1046                continue
1047            pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_localsplusnames[i])
1048            yield (pyop_name, pyop_value)
1049
1050    def _f_special(self, name, convert=PyObjectPtr.from_pyobject_ptr):
1051        return convert(self._gdbval[name])
1052
1053    def _f_globals(self):
1054        return self._f_special("f_globals")
1055
1056    def _f_builtins(self):
1057        return self._f_special("f_builtins")
1058
1059    def _f_code(self):
1060        return self._f_special("f_code", PyCodeObjectPtr.from_pyobject_ptr)
1061
1062    def _f_nlocalsplus(self):
1063        return self._f_special("nlocalsplus", int_from_int)
1064
1065    def _f_lasti(self):
1066        codeunit_p = gdb.lookup_type("_Py_CODEUNIT").pointer()
1067        prev_instr = self._gdbval["prev_instr"]
1068        first_instr = self._f_code().field("co_code_adaptive").cast(codeunit_p)
1069        return int(prev_instr - first_instr)
1070
1071    def is_entry(self):
1072        return self._f_special("is_entry", bool)
1073
1074    def previous(self):
1075        return self._f_special("previous", PyFramePtr)
1076
1077    def iter_globals(self):
1078        '''
1079        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
1080        the global variables of this frame
1081        '''
1082        if self.is_optimized_out():
1083            return ()
1084
1085        pyop_globals = self._f_globals()
1086        return pyop_globals.iteritems()
1087
1088    def iter_builtins(self):
1089        '''
1090        Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
1091        the builtin variables
1092        '''
1093        if self.is_optimized_out():
1094            return ()
1095
1096        pyop_builtins = self._f_builtins()
1097        return pyop_builtins.iteritems()
1098
1099    def get_var_by_name(self, name):
1100        '''
1101        Look for the named local variable, returning a (PyObjectPtr, scope) pair
1102        where scope is a string 'local', 'global', 'builtin'
1103
1104        If not found, return (None, None)
1105        '''
1106        for pyop_name, pyop_value in self.iter_locals():
1107            if name == pyop_name.proxyval(set()):
1108                return pyop_value, 'local'
1109        for pyop_name, pyop_value in self.iter_globals():
1110            if name == pyop_name.proxyval(set()):
1111                return pyop_value, 'global'
1112        for pyop_name, pyop_value in self.iter_builtins():
1113            if name == pyop_name.proxyval(set()):
1114                return pyop_value, 'builtin'
1115        return None, None
1116
1117    def filename(self):
1118        '''Get the path of the current Python source file, as a string'''
1119        if self.is_optimized_out():
1120            return FRAME_INFO_OPTIMIZED_OUT
1121        return self.co_filename.proxyval(set())
1122
1123    def current_line_num(self):
1124        '''Get current line number as an integer (1-based)
1125
1126        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
1127
1128        See Objects/lnotab_notes.txt
1129        '''
1130        if self.is_optimized_out():
1131            return None
1132        try:
1133            return self.co.addr2line(self.f_lasti)
1134        except Exception as ex:
1135            # bpo-34989: addr2line() is a complex function, it can fail in many
1136            # ways. For example, it fails with a TypeError on "FakeRepr" if
1137            # gdb fails to load debug symbols. Use a catch-all "except
1138            # Exception" to make the whole function safe. The caller has to
1139            # handle None anyway for optimized Python.
1140            return None
1141
1142    def current_line(self):
1143        '''Get the text of the current source line as a string, with a trailing
1144        newline character'''
1145        if self.is_optimized_out():
1146            return FRAME_INFO_OPTIMIZED_OUT
1147
1148        lineno = self.current_line_num()
1149        if lineno is None:
1150            return '(failed to get frame line number)'
1151
1152        filename = self.filename()
1153        try:
1154            with open(os_fsencode(filename), 'r', encoding="utf-8") as fp:
1155                lines = fp.readlines()
1156        except IOError:
1157            return None
1158
1159        try:
1160            # Convert from 1-based current_line_num to 0-based list offset
1161            return lines[lineno - 1]
1162        except IndexError:
1163            return None
1164
1165    def write_repr(self, out, visited):
1166        if self.is_optimized_out():
1167            out.write(FRAME_INFO_OPTIMIZED_OUT)
1168            return
1169        lineno = self.current_line_num()
1170        lineno = str(lineno) if lineno is not None else "?"
1171        out.write('Frame 0x%x, for file %s, line %s, in %s ('
1172                  % (self.as_address(),
1173                     self.co_filename.proxyval(visited),
1174                     lineno,
1175                     self.co_name.proxyval(visited)))
1176        first = True
1177        for pyop_name, pyop_value in self.iter_locals():
1178            if not first:
1179                out.write(', ')
1180            first = False
1181
1182            out.write(pyop_name.proxyval(visited))
1183            out.write('=')
1184            pyop_value.write_repr(out, visited)
1185
1186        out.write(')')
1187
1188    def as_address(self):
1189        return int(self._gdbval)
1190
1191    def print_traceback(self):
1192        if self.is_optimized_out():
1193            sys.stdout.write('  %s\n' % FRAME_INFO_OPTIMIZED_OUT)
1194            return
1195        visited = set()
1196        lineno = self.current_line_num()
1197        lineno = str(lineno) if lineno is not None else "?"
1198        sys.stdout.write('  File "%s", line %s, in %s\n'
1199                  % (self.co_filename.proxyval(visited),
1200                     lineno,
1201                     self.co_name.proxyval(visited)))
1202
1203    def get_truncated_repr(self, maxlen):
1204        '''
1205        Get a repr-like string for the data, but truncate it at "maxlen" bytes
1206        (ending the object graph traversal as soon as you do)
1207        '''
1208        out = TruncatedStringIO(maxlen)
1209        try:
1210            self.write_repr(out, set())
1211        except StringTruncated:
1212            # Truncation occurred:
1213            return out.getvalue() + '...(truncated)'
1214
1215        # No truncation occurred:
1216        return out.getvalue()
1217
1218class PySetObjectPtr(PyObjectPtr):
1219    _typename = 'PySetObject'
1220
1221    @classmethod
1222    def _dummy_key(self):
1223        return gdb.lookup_global_symbol('_PySet_Dummy').value()
1224
1225    def __iter__(self):
1226        dummy_ptr = self._dummy_key()
1227        table = self.field('table')
1228        for i in safe_range(self.field('mask') + 1):
1229            setentry = table[i]
1230            key = setentry['key']
1231            if key != 0 and key != dummy_ptr:
1232                yield PyObjectPtr.from_pyobject_ptr(key)
1233
1234    def proxyval(self, visited):
1235        # Guard against infinite loops:
1236        if self.as_address() in visited:
1237            return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
1238        visited.add(self.as_address())
1239
1240        members = (key.proxyval(visited) for key in self)
1241        if self.safe_tp_name() == 'frozenset':
1242            return frozenset(members)
1243        else:
1244            return set(members)
1245
1246    def write_repr(self, out, visited):
1247        # Emulate Python 3's set_repr
1248        tp_name = self.safe_tp_name()
1249
1250        # Guard against infinite loops:
1251        if self.as_address() in visited:
1252            out.write('(...)')
1253            return
1254        visited.add(self.as_address())
1255
1256        # Python 3's set_repr special-cases the empty set:
1257        if not self.field('used'):
1258            out.write(tp_name)
1259            out.write('()')
1260            return
1261
1262        # Python 3 uses {} for set literals:
1263        if tp_name != 'set':
1264            out.write(tp_name)
1265            out.write('(')
1266
1267        out.write('{')
1268        first = True
1269        for key in self:
1270            if not first:
1271                out.write(', ')
1272            first = False
1273            key.write_repr(out, visited)
1274        out.write('}')
1275
1276        if tp_name != 'set':
1277            out.write(')')
1278
1279
1280class PyBytesObjectPtr(PyObjectPtr):
1281    _typename = 'PyBytesObject'
1282
1283    def __str__(self):
1284        field_ob_size = self.field('ob_size')
1285        field_ob_sval = self.field('ob_sval')
1286        char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr())
1287        return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
1288
1289    def proxyval(self, visited):
1290        return str(self)
1291
1292    def write_repr(self, out, visited):
1293        # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
1294
1295        # Get a PyStringObject* within the Python 2 gdb process:
1296        proxy = self.proxyval(visited)
1297
1298        # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
1299        # to Python 2 code:
1300        quote = "'"
1301        if "'" in proxy and not '"' in proxy:
1302            quote = '"'
1303        out.write('b')
1304        out.write(quote)
1305        for byte in proxy:
1306            if byte == quote or byte == '\\':
1307                out.write('\\')
1308                out.write(byte)
1309            elif byte == '\t':
1310                out.write('\\t')
1311            elif byte == '\n':
1312                out.write('\\n')
1313            elif byte == '\r':
1314                out.write('\\r')
1315            elif byte < ' ' or ord(byte) >= 0x7f:
1316                out.write('\\x')
1317                out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
1318                out.write(hexdigits[ord(byte) & 0xf])
1319            else:
1320                out.write(byte)
1321        out.write(quote)
1322
1323class PyTupleObjectPtr(PyObjectPtr):
1324    _typename = 'PyTupleObject'
1325
1326    def __getitem__(self, i):
1327        # Get the gdb.Value for the (PyObject*) with the given index:
1328        field_ob_item = self.field('ob_item')
1329        return field_ob_item[i]
1330
1331    def proxyval(self, visited):
1332        # Guard against infinite loops:
1333        if self.as_address() in visited:
1334            return ProxyAlreadyVisited('(...)')
1335        visited.add(self.as_address())
1336
1337        result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
1338                       for i in safe_range(int_from_int(self.field('ob_size'))))
1339        return result
1340
1341    def write_repr(self, out, visited):
1342        # Guard against infinite loops:
1343        if self.as_address() in visited:
1344            out.write('(...)')
1345            return
1346        visited.add(self.as_address())
1347
1348        out.write('(')
1349        for i in safe_range(int_from_int(self.field('ob_size'))):
1350            if i > 0:
1351                out.write(', ')
1352            element = PyObjectPtr.from_pyobject_ptr(self[i])
1353            element.write_repr(out, visited)
1354        if self.field('ob_size') == 1:
1355            out.write(',)')
1356        else:
1357            out.write(')')
1358
1359class PyTypeObjectPtr(PyObjectPtr):
1360    _typename = 'PyTypeObject'
1361
1362
1363def _unichr_is_printable(char):
1364    # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
1365    if char == u" ":
1366        return True
1367    import unicodedata
1368    return unicodedata.category(char) not in ("C", "Z")
1369
1370
1371class PyUnicodeObjectPtr(PyObjectPtr):
1372    _typename = 'PyUnicodeObject'
1373
1374    def char_width(self):
1375        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
1376        return _type_Py_UNICODE.sizeof
1377
1378    def proxyval(self, visited):
1379        may_have_surrogates = False
1380        compact = self.field('_base')
1381        ascii = compact['_base']
1382        state = ascii['state']
1383        is_compact_ascii = (int(state['ascii']) and int(state['compact']))
1384        if not int(state['ready']):
1385            # string is not ready
1386            field_length = int(compact['wstr_length'])
1387            may_have_surrogates = True
1388            field_str = ascii['wstr']
1389        else:
1390            field_length = int(ascii['length'])
1391            if is_compact_ascii:
1392                field_str = ascii.address + 1
1393            elif int(state['compact']):
1394                field_str = compact.address + 1
1395            else:
1396                field_str = self.field('data')['any']
1397            repr_kind = int(state['kind'])
1398            if repr_kind == 1:
1399                field_str = field_str.cast(_type_unsigned_char_ptr())
1400            elif repr_kind == 2:
1401                field_str = field_str.cast(_type_unsigned_short_ptr())
1402            elif repr_kind == 4:
1403                field_str = field_str.cast(_type_unsigned_int_ptr())
1404
1405        # Gather a list of ints from the Py_UNICODE array; these are either
1406        # UCS-1, UCS-2 or UCS-4 code points:
1407        if not may_have_surrogates:
1408            Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
1409        else:
1410            # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
1411            # inferior process: we must join surrogate pairs.
1412            Py_UNICODEs = []
1413            i = 0
1414            limit = safety_limit(field_length)
1415            while i < limit:
1416                ucs = int(field_str[i])
1417                i += 1
1418                if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
1419                    Py_UNICODEs.append(ucs)
1420                    continue
1421                # This could be a surrogate pair.
1422                ucs2 = int(field_str[i])
1423                if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
1424                    continue
1425                code = (ucs & 0x03FF) << 10
1426                code |= ucs2 & 0x03FF
1427                code += 0x00010000
1428                Py_UNICODEs.append(code)
1429                i += 1
1430
1431        # Convert the int code points to unicode characters, and generate a
1432        # local unicode instance.
1433        result = u''.join(map(chr, Py_UNICODEs))
1434        return result
1435
1436    def write_repr(self, out, visited):
1437        # Write this out as a Python 3 str literal, i.e. without a "u" prefix
1438
1439        # Get a PyUnicodeObject* within the Python 2 gdb process:
1440        proxy = self.proxyval(visited)
1441
1442        # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
1443        # to Python 2:
1444        if "'" in proxy and '"' not in proxy:
1445            quote = '"'
1446        else:
1447            quote = "'"
1448        out.write(quote)
1449
1450        i = 0
1451        while i < len(proxy):
1452            ch = proxy[i]
1453            i += 1
1454
1455            # Escape quotes and backslashes
1456            if ch == quote or ch == '\\':
1457                out.write('\\')
1458                out.write(ch)
1459
1460            #  Map special whitespace to '\t', \n', '\r'
1461            elif ch == '\t':
1462                out.write('\\t')
1463            elif ch == '\n':
1464                out.write('\\n')
1465            elif ch == '\r':
1466                out.write('\\r')
1467
1468            # Map non-printable US ASCII to '\xhh' */
1469            elif ch < ' ' or ord(ch) == 0x7F:
1470                out.write('\\x')
1471                out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
1472                out.write(hexdigits[ord(ch) & 0x000F])
1473
1474            # Copy ASCII characters as-is
1475            elif ord(ch) < 0x7F:
1476                out.write(ch)
1477
1478            # Non-ASCII characters
1479            else:
1480                ucs = ch
1481                ch2 = None
1482
1483                printable = ucs.isprintable()
1484                if printable:
1485                    try:
1486                        ucs.encode(ENCODING)
1487                    except UnicodeEncodeError:
1488                        printable = False
1489
1490                # Map Unicode whitespace and control characters
1491                # (categories Z* and C* except ASCII space)
1492                if not printable:
1493                    if ch2 is not None:
1494                        # Match Python 3's representation of non-printable
1495                        # wide characters.
1496                        code = (ord(ch) & 0x03FF) << 10
1497                        code |= ord(ch2) & 0x03FF
1498                        code += 0x00010000
1499                    else:
1500                        code = ord(ucs)
1501
1502                    # Map 8-bit characters to '\\xhh'
1503                    if code <= 0xff:
1504                        out.write('\\x')
1505                        out.write(hexdigits[(code >> 4) & 0x000F])
1506                        out.write(hexdigits[code & 0x000F])
1507                    # Map 21-bit characters to '\U00xxxxxx'
1508                    elif code >= 0x10000:
1509                        out.write('\\U')
1510                        out.write(hexdigits[(code >> 28) & 0x0000000F])
1511                        out.write(hexdigits[(code >> 24) & 0x0000000F])
1512                        out.write(hexdigits[(code >> 20) & 0x0000000F])
1513                        out.write(hexdigits[(code >> 16) & 0x0000000F])
1514                        out.write(hexdigits[(code >> 12) & 0x0000000F])
1515                        out.write(hexdigits[(code >> 8) & 0x0000000F])
1516                        out.write(hexdigits[(code >> 4) & 0x0000000F])
1517                        out.write(hexdigits[code & 0x0000000F])
1518                    # Map 16-bit characters to '\uxxxx'
1519                    else:
1520                        out.write('\\u')
1521                        out.write(hexdigits[(code >> 12) & 0x000F])
1522                        out.write(hexdigits[(code >> 8) & 0x000F])
1523                        out.write(hexdigits[(code >> 4) & 0x000F])
1524                        out.write(hexdigits[code & 0x000F])
1525                else:
1526                    # Copy characters as-is
1527                    out.write(ch)
1528                    if ch2 is not None:
1529                        out.write(ch2)
1530
1531        out.write(quote)
1532
1533
1534class wrapperobject(PyObjectPtr):
1535    _typename = 'wrapperobject'
1536
1537    def safe_name(self):
1538        try:
1539            name = self.field('descr')['d_base']['name'].string()
1540            return repr(name)
1541        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
1542            return '<unknown name>'
1543
1544    def safe_tp_name(self):
1545        try:
1546            return self.field('self')['ob_type']['tp_name'].string()
1547        except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError):
1548            return '<unknown tp_name>'
1549
1550    def safe_self_addresss(self):
1551        try:
1552            address = int(self.field('self'))
1553            return '%#x' % address
1554        except (NullPyObjectPtr, RuntimeError):
1555            return '<failed to get self address>'
1556
1557    def proxyval(self, visited):
1558        name = self.safe_name()
1559        tp_name = self.safe_tp_name()
1560        self_address = self.safe_self_addresss()
1561        return ("<method-wrapper %s of %s object at %s>"
1562                % (name, tp_name, self_address))
1563
1564    def write_repr(self, out, visited):
1565        proxy = self.proxyval(visited)
1566        out.write(proxy)
1567
1568
1569def int_from_int(gdbval):
1570    return int(gdbval)
1571
1572
1573def stringify(val):
1574    # TODO: repr() puts everything on one line; pformat can be nicer, but
1575    # can lead to v.long results; this function isolates the choice
1576    if True:
1577        return repr(val)
1578    else:
1579        from pprint import pformat
1580        return pformat(val)
1581
1582
1583class PyObjectPtrPrinter:
1584    "Prints a (PyObject*)"
1585
1586    def __init__ (self, gdbval):
1587        self.gdbval = gdbval
1588
1589    def to_string (self):
1590        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
1591        if True:
1592            return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
1593        else:
1594            # Generate full proxy value then stringify it.
1595            # Doing so could be expensive
1596            proxyval = pyop.proxyval(set())
1597            return stringify(proxyval)
1598
1599def pretty_printer_lookup(gdbval):
1600    type = gdbval.type.unqualified()
1601    if type.code != gdb.TYPE_CODE_PTR:
1602        return None
1603
1604    type = type.target().unqualified()
1605    t = str(type)
1606    if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"):
1607        return PyObjectPtrPrinter(gdbval)
1608
1609"""
1610During development, I've been manually invoking the code in this way:
1611(gdb) python
1612
1613import sys
1614sys.path.append('/home/david/coding/python-gdb')
1615import libpython
1616end
1617
1618then reloading it after each edit like this:
1619(gdb) python reload(libpython)
1620
1621The following code should ensure that the prettyprinter is registered
1622if the code is autoloaded by gdb when visiting libpython.so, provided
1623that this python file is installed to the same path as the library (or its
1624.debug file) plus a "-gdb.py" suffix, e.g:
1625  /usr/lib/libpython2.6.so.1.0-gdb.py
1626  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
1627"""
1628def register (obj):
1629    if obj is None:
1630        obj = gdb
1631
1632    # Wire up the pretty-printer
1633    obj.pretty_printers.append(pretty_printer_lookup)
1634
1635register (gdb.current_objfile ())
1636
1637
1638
1639# Unfortunately, the exact API exposed by the gdb module varies somewhat
1640# from build to build
1641# See http://bugs.python.org/issue8279?#msg102276
1642
1643class Frame(object):
1644    '''
1645    Wrapper for gdb.Frame, adding various methods
1646    '''
1647    def __init__(self, gdbframe):
1648        self._gdbframe = gdbframe
1649
1650    def older(self):
1651        older = self._gdbframe.older()
1652        if older:
1653            return Frame(older)
1654        else:
1655            return None
1656
1657    def newer(self):
1658        newer = self._gdbframe.newer()
1659        if newer:
1660            return Frame(newer)
1661        else:
1662            return None
1663
1664    def select(self):
1665        '''If supported, select this frame and return True; return False if unsupported
1666
1667        Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
1668        onwards, but absent on Ubuntu buildbot'''
1669        if not hasattr(self._gdbframe, 'select'):
1670            print ('Unable to select frame: '
1671                   'this build of gdb does not expose a gdb.Frame.select method')
1672            return False
1673        self._gdbframe.select()
1674        return True
1675
1676    def get_index(self):
1677        '''Calculate index of frame, starting at 0 for the newest frame within
1678        this thread'''
1679        index = 0
1680        # Go down until you reach the newest frame:
1681        iter_frame = self
1682        while iter_frame.newer():
1683            index += 1
1684            iter_frame = iter_frame.newer()
1685        return index
1686
1687    # We divide frames into:
1688    #   - "python frames":
1689    #       - "bytecode frames" i.e. PyEval_EvalFrameEx
1690    #       - "other python frames": things that are of interest from a python
1691    #         POV, but aren't bytecode (e.g. GC, GIL)
1692    #   - everything else
1693
1694    def is_python_frame(self):
1695        '''Is this a _PyEval_EvalFrameDefault frame, or some other important
1696        frame? (see is_other_python_frame for what "important" means in this
1697        context)'''
1698        if self.is_evalframe():
1699            return True
1700        if self.is_other_python_frame():
1701            return True
1702        return False
1703
1704    def is_evalframe(self):
1705        '''Is this a _PyEval_EvalFrameDefault frame?'''
1706        if self._gdbframe.name() == EVALFRAME:
1707            '''
1708            I believe we also need to filter on the inline
1709            struct frame_id.inline_depth, only regarding frames with
1710            an inline depth of 0 as actually being this function
1711
1712            So we reject those with type gdb.INLINE_FRAME
1713            '''
1714            if self._gdbframe.type() == gdb.NORMAL_FRAME:
1715                # We have a _PyEval_EvalFrameDefault frame:
1716                return True
1717
1718        return False
1719
1720    def is_other_python_frame(self):
1721        '''Is this frame worth displaying in python backtraces?
1722        Examples:
1723          - waiting on the GIL
1724          - garbage-collecting
1725          - within a CFunction
1726         If it is, return a descriptive string
1727         For other frames, return False
1728         '''
1729        if self.is_waiting_for_gil():
1730            return 'Waiting for the GIL'
1731
1732        if self.is_gc_collect():
1733            return 'Garbage-collecting'
1734
1735        # Detect invocations of PyCFunction instances:
1736        frame = self._gdbframe
1737        caller = frame.name()
1738        if not caller:
1739            return False
1740
1741        if (caller.startswith('cfunction_vectorcall_') or
1742            caller == 'cfunction_call'):
1743            arg_name = 'func'
1744            # Within that frame:
1745            #   "func" is the local containing the PyObject* of the
1746            # PyCFunctionObject instance
1747            #   "f" is the same value, but cast to (PyCFunctionObject*)
1748            #   "self" is the (PyObject*) of the 'self'
1749            try:
1750                # Use the prettyprinter for the func:
1751                func = frame.read_var(arg_name)
1752                return str(func)
1753            except ValueError:
1754                return ('PyCFunction invocation (unable to read %s: '
1755                        'missing debuginfos?)' % arg_name)
1756            except RuntimeError:
1757                return 'PyCFunction invocation (unable to read %s)' % arg_name
1758
1759        if caller == 'wrapper_call':
1760            arg_name = 'wp'
1761            try:
1762                func = frame.read_var(arg_name)
1763                return str(func)
1764            except ValueError:
1765                return ('<wrapper_call invocation (unable to read %s: '
1766                        'missing debuginfos?)>' % arg_name)
1767            except RuntimeError:
1768                return '<wrapper_call invocation (unable to read %s)>' % arg_name
1769
1770        # This frame isn't worth reporting:
1771        return False
1772
1773    def is_waiting_for_gil(self):
1774        '''Is this frame waiting on the GIL?'''
1775        # This assumes the _POSIX_THREADS version of Python/ceval_gil.h:
1776        name = self._gdbframe.name()
1777        if name:
1778            return (name == 'take_gil')
1779
1780    def is_gc_collect(self):
1781        '''Is this frame gc_collect_main() within the garbage-collector?'''
1782        return self._gdbframe.name() in ('collect', 'gc_collect_main')
1783
1784    def get_pyop(self):
1785        try:
1786            frame = self._gdbframe.read_var('frame')
1787            frame = PyFramePtr(frame)
1788            if not frame.is_optimized_out():
1789                return frame
1790            cframe = self._gdbframe.read_var('cframe')
1791            if cframe is None:
1792                return None
1793            frame = PyFramePtr(cframe["current_frame"])
1794            if frame and not frame.is_optimized_out():
1795                return frame
1796            return None
1797        except ValueError:
1798            return None
1799
1800    @classmethod
1801    def get_selected_frame(cls):
1802        _gdbframe = gdb.selected_frame()
1803        if _gdbframe:
1804            return Frame(_gdbframe)
1805        return None
1806
1807    @classmethod
1808    def get_selected_python_frame(cls):
1809        '''Try to obtain the Frame for the python-related code in the selected
1810        frame, or None'''
1811        try:
1812            frame = cls.get_selected_frame()
1813        except gdb.error:
1814            # No frame: Python didn't start yet
1815            return None
1816
1817        while frame:
1818            if frame.is_python_frame():
1819                return frame
1820            frame = frame.older()
1821
1822        # Not found:
1823        return None
1824
1825    @classmethod
1826    def get_selected_bytecode_frame(cls):
1827        '''Try to obtain the Frame for the python bytecode interpreter in the
1828        selected GDB frame, or None'''
1829        frame = cls.get_selected_frame()
1830
1831        while frame:
1832            if frame.is_evalframe():
1833                return frame
1834            frame = frame.older()
1835
1836        # Not found:
1837        return None
1838
1839    def print_summary(self):
1840        if self.is_evalframe():
1841            interp_frame = self.get_pyop()
1842            while True:
1843                if interp_frame:
1844                    line = interp_frame.get_truncated_repr(MAX_OUTPUT_LEN)
1845                    sys.stdout.write('#%i %s\n' % (self.get_index(), line))
1846                    if not interp_frame.is_optimized_out():
1847                        line = interp_frame.current_line()
1848                        if line is not None:
1849                            sys.stdout.write('    %s\n' % line.strip())
1850                    if interp_frame.is_entry():
1851                        break
1852                else:
1853                    sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
1854                    break
1855                interp_frame = interp_frame.previous()
1856        else:
1857            info = self.is_other_python_frame()
1858            if info:
1859                sys.stdout.write('#%i %s\n' % (self.get_index(), info))
1860            else:
1861                sys.stdout.write('#%i\n' % self.get_index())
1862
1863    def print_traceback(self):
1864        if self.is_evalframe():
1865            interp_frame = self.get_pyop()
1866            while True:
1867                if interp_frame:
1868                    interp_frame.print_traceback()
1869                    if not interp_frame.is_optimized_out():
1870                        line = interp_frame.current_line()
1871                        if line is not None:
1872                            sys.stdout.write('    %s\n' % line.strip())
1873                    if interp_frame.is_entry():
1874                        break
1875                else:
1876                    sys.stdout.write('  (unable to read python frame information)\n')
1877                    break
1878                interp_frame = interp_frame.previous()
1879        else:
1880            info = self.is_other_python_frame()
1881            if info:
1882                sys.stdout.write('  %s\n' % info)
1883            else:
1884                sys.stdout.write('  (not a python frame)\n')
1885
1886class PyList(gdb.Command):
1887    '''List the current Python source code, if any
1888
1889    Use
1890       py-list START
1891    to list at a different line number within the python source.
1892
1893    Use
1894       py-list START, END
1895    to list a specific range of lines within the python source.
1896    '''
1897
1898    def __init__(self):
1899        gdb.Command.__init__ (self,
1900                              "py-list",
1901                              gdb.COMMAND_FILES,
1902                              gdb.COMPLETE_NONE)
1903
1904
1905    def invoke(self, args, from_tty):
1906        import re
1907
1908        start = None
1909        end = None
1910
1911        m = re.match(r'\s*(\d+)\s*', args)
1912        if m:
1913            start = int(m.group(0))
1914            end = start + 10
1915
1916        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
1917        if m:
1918            start, end = map(int, m.groups())
1919
1920        # py-list requires an actual PyEval_EvalFrameEx frame:
1921        frame = Frame.get_selected_bytecode_frame()
1922        if not frame:
1923            print('Unable to locate gdb frame for python bytecode interpreter')
1924            return
1925
1926        pyop = frame.get_pyop()
1927        if not pyop or pyop.is_optimized_out():
1928            print(UNABLE_READ_INFO_PYTHON_FRAME)
1929            return
1930
1931        filename = pyop.filename()
1932        lineno = pyop.current_line_num()
1933        if lineno is None:
1934            print('Unable to read python frame line number')
1935            return
1936
1937        if start is None:
1938            start = lineno - 5
1939            end = lineno + 5
1940
1941        if start<1:
1942            start = 1
1943
1944        try:
1945            f = open(os_fsencode(filename), 'r', encoding="utf-8")
1946        except IOError as err:
1947            sys.stdout.write('Unable to open %s: %s\n'
1948                             % (filename, err))
1949            return
1950        with f:
1951            all_lines = f.readlines()
1952            # start and end are 1-based, all_lines is 0-based;
1953            # so [start-1:end] as a python slice gives us [start, end] as a
1954            # closed interval
1955            for i, line in enumerate(all_lines[start-1:end]):
1956                linestr = str(i+start)
1957                # Highlight current line:
1958                if i + start == lineno:
1959                    linestr = '>' + linestr
1960                sys.stdout.write('%4s    %s' % (linestr, line))
1961
1962
1963# ...and register the command:
1964PyList()
1965
1966def move_in_stack(move_up):
1967    '''Move up or down the stack (for the py-up/py-down command)'''
1968    # Important:
1969    # The amount of frames that are printed out depends on how many frames are inlined
1970    # in the same evaluation loop. As this command links directly the C stack with the
1971    # Python stack, the results are sensitive to the number of inlined frames and this
1972    # is likely to change between versions and optimizations.
1973    frame = Frame.get_selected_python_frame()
1974    if not frame:
1975        print('Unable to locate python frame')
1976        return
1977    while frame:
1978        if move_up:
1979            iter_frame = frame.older()
1980        else:
1981            iter_frame = frame.newer()
1982
1983        if not iter_frame:
1984            break
1985
1986        if iter_frame.is_python_frame():
1987            # Result:
1988            if iter_frame.select():
1989                iter_frame.print_summary()
1990            return
1991
1992        frame = iter_frame
1993
1994    if move_up:
1995        print('Unable to find an older python frame')
1996    else:
1997        print('Unable to find a newer python frame')
1998
1999
2000class PyUp(gdb.Command):
2001    'Select and print all python stack frame in the same eval loop starting from the one that called this one (if any)'
2002    def __init__(self):
2003        gdb.Command.__init__ (self,
2004                              "py-up",
2005                              gdb.COMMAND_STACK,
2006                              gdb.COMPLETE_NONE)
2007
2008
2009    def invoke(self, args, from_tty):
2010        move_in_stack(move_up=True)
2011
2012class PyDown(gdb.Command):
2013    'Select and print all python stack frame in the same eval loop starting from the one called this one (if any)'
2014    def __init__(self):
2015        gdb.Command.__init__ (self,
2016                              "py-down",
2017                              gdb.COMMAND_STACK,
2018                              gdb.COMPLETE_NONE)
2019
2020
2021    def invoke(self, args, from_tty):
2022        move_in_stack(move_up=False)
2023
2024# Not all builds of gdb have gdb.Frame.select
2025if hasattr(gdb.Frame, 'select'):
2026    PyUp()
2027    PyDown()
2028
2029class PyBacktraceFull(gdb.Command):
2030    'Display the current python frame and all the frames within its call stack (if any)'
2031    def __init__(self):
2032        gdb.Command.__init__ (self,
2033                              "py-bt-full",
2034                              gdb.COMMAND_STACK,
2035                              gdb.COMPLETE_NONE)
2036
2037
2038    def invoke(self, args, from_tty):
2039        frame = Frame.get_selected_python_frame()
2040        if not frame:
2041            print('Unable to locate python frame')
2042            return
2043
2044        while frame:
2045            if frame.is_python_frame():
2046                frame.print_summary()
2047            frame = frame.older()
2048
2049PyBacktraceFull()
2050
2051class PyBacktrace(gdb.Command):
2052    'Display the current python frame and all the frames within its call stack (if any)'
2053    def __init__(self):
2054        gdb.Command.__init__ (self,
2055                              "py-bt",
2056                              gdb.COMMAND_STACK,
2057                              gdb.COMPLETE_NONE)
2058
2059
2060    def invoke(self, args, from_tty):
2061        frame = Frame.get_selected_python_frame()
2062        if not frame:
2063            print('Unable to locate python frame')
2064            return
2065
2066        sys.stdout.write('Traceback (most recent call first):\n')
2067        while frame:
2068            if frame.is_python_frame():
2069                frame.print_traceback()
2070            frame = frame.older()
2071
2072PyBacktrace()
2073
2074class PyPrint(gdb.Command):
2075    'Look up the given python variable name, and print it'
2076    def __init__(self):
2077        gdb.Command.__init__ (self,
2078                              "py-print",
2079                              gdb.COMMAND_DATA,
2080                              gdb.COMPLETE_NONE)
2081
2082
2083    def invoke(self, args, from_tty):
2084        name = str(args)
2085
2086        frame = Frame.get_selected_python_frame()
2087        if not frame:
2088            print('Unable to locate python frame')
2089            return
2090
2091        pyop_frame = frame.get_pyop()
2092        if not pyop_frame:
2093            print(UNABLE_READ_INFO_PYTHON_FRAME)
2094            return
2095
2096        pyop_var, scope = pyop_frame.get_var_by_name(name)
2097
2098        if pyop_var:
2099            print('%s %r = %s'
2100                   % (scope,
2101                      name,
2102                      pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
2103        else:
2104            print('%r not found' % name)
2105
2106PyPrint()
2107
2108class PyLocals(gdb.Command):
2109    'Look up the given python variable name, and print it'
2110    def __init__(self):
2111        gdb.Command.__init__ (self,
2112                              "py-locals",
2113                              gdb.COMMAND_DATA,
2114                              gdb.COMPLETE_NONE)
2115
2116
2117    def invoke(self, args, from_tty):
2118        name = str(args)
2119
2120        frame = Frame.get_selected_python_frame()
2121        if not frame:
2122            print('Unable to locate python frame')
2123            return
2124
2125        pyop_frame = frame.get_pyop()
2126        while True:
2127            if not pyop_frame:
2128                print(UNABLE_READ_INFO_PYTHON_FRAME)
2129                break
2130
2131            sys.stdout.write('Locals for %s\n' % (pyop_frame.co_name.proxyval(set())))
2132
2133            for pyop_name, pyop_value in pyop_frame.iter_locals():
2134                print('%s = %s'
2135                    % (pyop_name.proxyval(set()),
2136                        pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)))
2137
2138            if pyop_frame.is_entry():
2139                break
2140
2141            pyop_frame = pyop_frame.previous()
2142
2143PyLocals()
2144