1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// DWARF debug information entry parser.
6// An entry is a sequence of data items of a given format.
7// The first word in the entry is an index into what DWARF
8// calls the ``abbreviation table.''  An abbreviation is really
9// just a type descriptor: it's an array of attribute tag/value format pairs.
10
11package dwarf
12
13import (
14	"encoding/binary"
15	"errors"
16	"fmt"
17	"strconv"
18)
19
20// a single entry's description: a sequence of attributes
21type abbrev struct {
22	tag      Tag
23	children bool
24	field    []afield
25}
26
27type afield struct {
28	attr  Attr
29	fmt   format
30	class Class
31	val   int64 // for formImplicitConst
32}
33
34// a map from entry format ids to their descriptions
35type abbrevTable map[uint32]abbrev
36
37// parseAbbrev returns the abbreviation table that starts at byte off
38// in the .debug_abbrev section.
39func (d *Data) parseAbbrev(off uint64, vers int) (abbrevTable, error) {
40	if m, ok := d.abbrevCache[off]; ok {
41		return m, nil
42	}
43
44	data := d.abbrev
45	if off > uint64(len(data)) {
46		data = nil
47	} else {
48		data = data[off:]
49	}
50	b := makeBuf(d, unknownFormat{}, "abbrev", 0, data)
51
52	// Error handling is simplified by the buf getters
53	// returning an endless stream of 0s after an error.
54	m := make(abbrevTable)
55	for {
56		// Table ends with id == 0.
57		id := uint32(b.uint())
58		if id == 0 {
59			break
60		}
61
62		// Walk over attributes, counting.
63		n := 0
64		b1 := b // Read from copy of b.
65		b1.uint()
66		b1.uint8()
67		for {
68			tag := b1.uint()
69			fmt := b1.uint()
70			if tag == 0 && fmt == 0 {
71				break
72			}
73			if format(fmt) == formImplicitConst {
74				b1.int()
75			}
76			n++
77		}
78		if b1.err != nil {
79			return nil, b1.err
80		}
81
82		// Walk over attributes again, this time writing them down.
83		var a abbrev
84		a.tag = Tag(b.uint())
85		a.children = b.uint8() != 0
86		a.field = make([]afield, n)
87		for i := range a.field {
88			a.field[i].attr = Attr(b.uint())
89			a.field[i].fmt = format(b.uint())
90			a.field[i].class = formToClass(a.field[i].fmt, a.field[i].attr, vers, &b)
91			if a.field[i].fmt == formImplicitConst {
92				a.field[i].val = b.int()
93			}
94		}
95		b.uint()
96		b.uint()
97
98		m[id] = a
99	}
100	if b.err != nil {
101		return nil, b.err
102	}
103	d.abbrevCache[off] = m
104	return m, nil
105}
106
107// attrIsExprloc indicates attributes that allow exprloc values that
108// are encoded as block values in DWARF 2 and 3. See DWARF 4, Figure
109// 20.
110var attrIsExprloc = map[Attr]bool{
111	AttrLocation:      true,
112	AttrByteSize:      true,
113	AttrBitOffset:     true,
114	AttrBitSize:       true,
115	AttrStringLength:  true,
116	AttrLowerBound:    true,
117	AttrReturnAddr:    true,
118	AttrStrideSize:    true,
119	AttrUpperBound:    true,
120	AttrCount:         true,
121	AttrDataMemberLoc: true,
122	AttrFrameBase:     true,
123	AttrSegment:       true,
124	AttrStaticLink:    true,
125	AttrUseLocation:   true,
126	AttrVtableElemLoc: true,
127	AttrAllocated:     true,
128	AttrAssociated:    true,
129	AttrDataLocation:  true,
130	AttrStride:        true,
131}
132
133// attrPtrClass indicates the *ptr class of attributes that have
134// encoding formSecOffset in DWARF 4 or formData* in DWARF 2 and 3.
135var attrPtrClass = map[Attr]Class{
136	AttrLocation:      ClassLocListPtr,
137	AttrStmtList:      ClassLinePtr,
138	AttrStringLength:  ClassLocListPtr,
139	AttrReturnAddr:    ClassLocListPtr,
140	AttrStartScope:    ClassRangeListPtr,
141	AttrDataMemberLoc: ClassLocListPtr,
142	AttrFrameBase:     ClassLocListPtr,
143	AttrMacroInfo:     ClassMacPtr,
144	AttrSegment:       ClassLocListPtr,
145	AttrStaticLink:    ClassLocListPtr,
146	AttrUseLocation:   ClassLocListPtr,
147	AttrVtableElemLoc: ClassLocListPtr,
148	AttrRanges:        ClassRangeListPtr,
149	// The following are new in DWARF 5.
150	AttrStrOffsetsBase: ClassStrOffsetsPtr,
151	AttrAddrBase:       ClassAddrPtr,
152	AttrRnglistsBase:   ClassRngListsPtr,
153	AttrLoclistsBase:   ClassLocListPtr,
154}
155
156// formToClass returns the DWARF 4 Class for the given form. If the
157// DWARF version is less then 4, it will disambiguate some forms
158// depending on the attribute.
159func formToClass(form format, attr Attr, vers int, b *buf) Class {
160	switch form {
161	default:
162		b.error("cannot determine class of unknown attribute form")
163		return 0
164
165	case formIndirect:
166		return ClassUnknown
167
168	case formAddr, formAddrx, formAddrx1, formAddrx2, formAddrx3, formAddrx4:
169		return ClassAddress
170
171	case formDwarfBlock1, formDwarfBlock2, formDwarfBlock4, formDwarfBlock:
172		// In DWARF 2 and 3, ClassExprLoc was encoded as a
173		// block. DWARF 4 distinguishes ClassBlock and
174		// ClassExprLoc, but there are no attributes that can
175		// be both, so we also promote ClassBlock values in
176		// DWARF 4 that should be ClassExprLoc in case
177		// producers get this wrong.
178		if attrIsExprloc[attr] {
179			return ClassExprLoc
180		}
181		return ClassBlock
182
183	case formData1, formData2, formData4, formData8, formSdata, formUdata, formData16, formImplicitConst:
184		// In DWARF 2 and 3, ClassPtr was encoded as a
185		// constant. Unlike ClassExprLoc/ClassBlock, some
186		// DWARF 4 attributes need to distinguish Class*Ptr
187		// from ClassConstant, so we only do this promotion
188		// for versions 2 and 3.
189		if class, ok := attrPtrClass[attr]; vers < 4 && ok {
190			return class
191		}
192		return ClassConstant
193
194	case formFlag, formFlagPresent:
195		return ClassFlag
196
197	case formRefAddr, formRef1, formRef2, formRef4, formRef8, formRefUdata, formRefSup4, formRefSup8:
198		return ClassReference
199
200	case formRefSig8:
201		return ClassReferenceSig
202
203	case formString, formStrp, formStrx, formStrpSup, formLineStrp, formStrx1, formStrx2, formStrx3, formStrx4:
204		return ClassString
205
206	case formSecOffset:
207		// DWARF 4 defines four *ptr classes, but doesn't
208		// distinguish them in the encoding. Disambiguate
209		// these classes using the attribute.
210		if class, ok := attrPtrClass[attr]; ok {
211			return class
212		}
213		return ClassUnknown
214
215	case formExprloc:
216		return ClassExprLoc
217
218	case formGnuRefAlt:
219		return ClassReferenceAlt
220
221	case formGnuStrpAlt:
222		return ClassStringAlt
223
224	case formLoclistx:
225		return ClassLocList
226
227	case formRnglistx:
228		return ClassRngList
229	}
230}
231
232// An entry is a sequence of attribute/value pairs.
233type Entry struct {
234	Offset   Offset // offset of Entry in DWARF info
235	Tag      Tag    // tag (kind of Entry)
236	Children bool   // whether Entry is followed by children
237	Field    []Field
238}
239
240// A Field is a single attribute/value pair in an [Entry].
241//
242// A value can be one of several "attribute classes" defined by DWARF.
243// The Go types corresponding to each class are:
244//
245//	DWARF class       Go type        Class
246//	-----------       -------        -----
247//	address           uint64         ClassAddress
248//	block             []byte         ClassBlock
249//	constant          int64          ClassConstant
250//	flag              bool           ClassFlag
251//	reference
252//	  to info         dwarf.Offset   ClassReference
253//	  to type unit    uint64         ClassReferenceSig
254//	string            string         ClassString
255//	exprloc           []byte         ClassExprLoc
256//	lineptr           int64          ClassLinePtr
257//	loclistptr        int64          ClassLocListPtr
258//	macptr            int64          ClassMacPtr
259//	rangelistptr      int64          ClassRangeListPtr
260//
261// For unrecognized or vendor-defined attributes, [Class] may be
262// [ClassUnknown].
263type Field struct {
264	Attr  Attr
265	Val   any
266	Class Class
267}
268
269// A Class is the DWARF 4 class of an attribute value.
270//
271// In general, a given attribute's value may take on one of several
272// possible classes defined by DWARF, each of which leads to a
273// slightly different interpretation of the attribute.
274//
275// DWARF version 4 distinguishes attribute value classes more finely
276// than previous versions of DWARF. The reader will disambiguate
277// coarser classes from earlier versions of DWARF into the appropriate
278// DWARF 4 class. For example, DWARF 2 uses "constant" for constants
279// as well as all types of section offsets, but the reader will
280// canonicalize attributes in DWARF 2 files that refer to section
281// offsets to one of the Class*Ptr classes, even though these classes
282// were only defined in DWARF 3.
283type Class int
284
285const (
286	// ClassUnknown represents values of unknown DWARF class.
287	ClassUnknown Class = iota
288
289	// ClassAddress represents values of type uint64 that are
290	// addresses on the target machine.
291	ClassAddress
292
293	// ClassBlock represents values of type []byte whose
294	// interpretation depends on the attribute.
295	ClassBlock
296
297	// ClassConstant represents values of type int64 that are
298	// constants. The interpretation of this constant depends on
299	// the attribute.
300	ClassConstant
301
302	// ClassExprLoc represents values of type []byte that contain
303	// an encoded DWARF expression or location description.
304	ClassExprLoc
305
306	// ClassFlag represents values of type bool.
307	ClassFlag
308
309	// ClassLinePtr represents values that are an int64 offset
310	// into the "line" section.
311	ClassLinePtr
312
313	// ClassLocListPtr represents values that are an int64 offset
314	// into the "loclist" section.
315	ClassLocListPtr
316
317	// ClassMacPtr represents values that are an int64 offset into
318	// the "mac" section.
319	ClassMacPtr
320
321	// ClassRangeListPtr represents values that are an int64 offset into
322	// the "rangelist" section.
323	ClassRangeListPtr
324
325	// ClassReference represents values that are an Offset offset
326	// of an Entry in the info section (for use with Reader.Seek).
327	// The DWARF specification combines ClassReference and
328	// ClassReferenceSig into class "reference".
329	ClassReference
330
331	// ClassReferenceSig represents values that are a uint64 type
332	// signature referencing a type Entry.
333	ClassReferenceSig
334
335	// ClassString represents values that are strings. If the
336	// compilation unit specifies the AttrUseUTF8 flag (strongly
337	// recommended), the string value will be encoded in UTF-8.
338	// Otherwise, the encoding is unspecified.
339	ClassString
340
341	// ClassReferenceAlt represents values of type int64 that are
342	// an offset into the DWARF "info" section of an alternate
343	// object file.
344	ClassReferenceAlt
345
346	// ClassStringAlt represents values of type int64 that are an
347	// offset into the DWARF string section of an alternate object
348	// file.
349	ClassStringAlt
350
351	// ClassAddrPtr represents values that are an int64 offset
352	// into the "addr" section.
353	ClassAddrPtr
354
355	// ClassLocList represents values that are an int64 offset
356	// into the "loclists" section.
357	ClassLocList
358
359	// ClassRngList represents values that are a uint64 offset
360	// from the base of the "rnglists" section.
361	ClassRngList
362
363	// ClassRngListsPtr represents values that are an int64 offset
364	// into the "rnglists" section. These are used as the base for
365	// ClassRngList values.
366	ClassRngListsPtr
367
368	// ClassStrOffsetsPtr represents values that are an int64
369	// offset into the "str_offsets" section.
370	ClassStrOffsetsPtr
371)
372
373//go:generate stringer -type=Class
374
375func (i Class) GoString() string {
376	return "dwarf." + i.String()
377}
378
379// Val returns the value associated with attribute [Attr] in [Entry],
380// or nil if there is no such attribute.
381//
382// A common idiom is to merge the check for nil return with
383// the check that the value has the expected dynamic type, as in:
384//
385//	v, ok := e.Val(AttrSibling).(int64)
386func (e *Entry) Val(a Attr) any {
387	if f := e.AttrField(a); f != nil {
388		return f.Val
389	}
390	return nil
391}
392
393// AttrField returns the [Field] associated with attribute [Attr] in
394// [Entry], or nil if there is no such attribute.
395func (e *Entry) AttrField(a Attr) *Field {
396	for i, f := range e.Field {
397		if f.Attr == a {
398			return &e.Field[i]
399		}
400	}
401	return nil
402}
403
404// An Offset represents the location of an [Entry] within the DWARF info.
405// (See [Reader.Seek].)
406type Offset uint32
407
408// Entry reads a single entry from buf, decoding
409// according to the given abbreviation table.
410func (b *buf) entry(cu *Entry, atab abbrevTable, ubase Offset, vers int) *Entry {
411	off := b.off
412	id := uint32(b.uint())
413	if id == 0 {
414		return &Entry{}
415	}
416	a, ok := atab[id]
417	if !ok {
418		b.error("unknown abbreviation table index")
419		return nil
420	}
421	e := &Entry{
422		Offset:   off,
423		Tag:      a.tag,
424		Children: a.children,
425		Field:    make([]Field, len(a.field)),
426	}
427
428	// If we are currently parsing the compilation unit,
429	// we can't evaluate Addrx or Strx until we've seen the
430	// relevant base entry.
431	type delayed struct {
432		idx int
433		off uint64
434		fmt format
435	}
436	var delay []delayed
437
438	resolveStrx := func(strBase, off uint64) string {
439		off += strBase
440		if uint64(int(off)) != off {
441			b.error("DW_FORM_strx offset out of range")
442		}
443
444		b1 := makeBuf(b.dwarf, b.format, "str_offsets", 0, b.dwarf.strOffsets)
445		b1.skip(int(off))
446		is64, _ := b.format.dwarf64()
447		if is64 {
448			off = b1.uint64()
449		} else {
450			off = uint64(b1.uint32())
451		}
452		if b1.err != nil {
453			b.err = b1.err
454			return ""
455		}
456		if uint64(int(off)) != off {
457			b.error("DW_FORM_strx indirect offset out of range")
458		}
459		b1 = makeBuf(b.dwarf, b.format, "str", 0, b.dwarf.str)
460		b1.skip(int(off))
461		val := b1.string()
462		if b1.err != nil {
463			b.err = b1.err
464		}
465		return val
466	}
467
468	resolveRnglistx := func(rnglistsBase, off uint64) uint64 {
469		is64, _ := b.format.dwarf64()
470		if is64 {
471			off *= 8
472		} else {
473			off *= 4
474		}
475		off += rnglistsBase
476		if uint64(int(off)) != off {
477			b.error("DW_FORM_rnglistx offset out of range")
478		}
479
480		b1 := makeBuf(b.dwarf, b.format, "rnglists", 0, b.dwarf.rngLists)
481		b1.skip(int(off))
482		if is64 {
483			off = b1.uint64()
484		} else {
485			off = uint64(b1.uint32())
486		}
487		if b1.err != nil {
488			b.err = b1.err
489			return 0
490		}
491		if uint64(int(off)) != off {
492			b.error("DW_FORM_rnglistx indirect offset out of range")
493		}
494		return rnglistsBase + off
495	}
496
497	for i := range e.Field {
498		e.Field[i].Attr = a.field[i].attr
499		e.Field[i].Class = a.field[i].class
500		fmt := a.field[i].fmt
501		if fmt == formIndirect {
502			fmt = format(b.uint())
503			e.Field[i].Class = formToClass(fmt, a.field[i].attr, vers, b)
504		}
505		var val any
506		switch fmt {
507		default:
508			b.error("unknown entry attr format 0x" + strconv.FormatInt(int64(fmt), 16))
509
510		// address
511		case formAddr:
512			val = b.addr()
513		case formAddrx, formAddrx1, formAddrx2, formAddrx3, formAddrx4:
514			var off uint64
515			switch fmt {
516			case formAddrx:
517				off = b.uint()
518			case formAddrx1:
519				off = uint64(b.uint8())
520			case formAddrx2:
521				off = uint64(b.uint16())
522			case formAddrx3:
523				off = uint64(b.uint24())
524			case formAddrx4:
525				off = uint64(b.uint32())
526			}
527			if b.dwarf.addr == nil {
528				b.error("DW_FORM_addrx with no .debug_addr section")
529			}
530			if b.err != nil {
531				return nil
532			}
533
534			// We have to adjust by the offset of the
535			// compilation unit. This won't work if the
536			// program uses Reader.Seek to skip over the
537			// unit. Not much we can do about that.
538			var addrBase int64
539			if cu != nil {
540				addrBase, _ = cu.Val(AttrAddrBase).(int64)
541			} else if a.tag == TagCompileUnit {
542				delay = append(delay, delayed{i, off, formAddrx})
543				break
544			}
545
546			var err error
547			val, err = b.dwarf.debugAddr(b.format, uint64(addrBase), off)
548			if err != nil {
549				if b.err == nil {
550					b.err = err
551				}
552				return nil
553			}
554
555		// block
556		case formDwarfBlock1:
557			val = b.bytes(int(b.uint8()))
558		case formDwarfBlock2:
559			val = b.bytes(int(b.uint16()))
560		case formDwarfBlock4:
561			val = b.bytes(int(b.uint32()))
562		case formDwarfBlock:
563			val = b.bytes(int(b.uint()))
564
565		// constant
566		case formData1:
567			val = int64(b.uint8())
568		case formData2:
569			val = int64(b.uint16())
570		case formData4:
571			val = int64(b.uint32())
572		case formData8:
573			val = int64(b.uint64())
574		case formData16:
575			val = b.bytes(16)
576		case formSdata:
577			val = int64(b.int())
578		case formUdata:
579			val = int64(b.uint())
580		case formImplicitConst:
581			val = a.field[i].val
582
583		// flag
584		case formFlag:
585			val = b.uint8() == 1
586		// New in DWARF 4.
587		case formFlagPresent:
588			// The attribute is implicitly indicated as present, and no value is
589			// encoded in the debugging information entry itself.
590			val = true
591
592		// reference to other entry
593		case formRefAddr:
594			vers := b.format.version()
595			if vers == 0 {
596				b.error("unknown version for DW_FORM_ref_addr")
597			} else if vers == 2 {
598				val = Offset(b.addr())
599			} else {
600				is64, known := b.format.dwarf64()
601				if !known {
602					b.error("unknown size for DW_FORM_ref_addr")
603				} else if is64 {
604					val = Offset(b.uint64())
605				} else {
606					val = Offset(b.uint32())
607				}
608			}
609		case formRef1:
610			val = Offset(b.uint8()) + ubase
611		case formRef2:
612			val = Offset(b.uint16()) + ubase
613		case formRef4:
614			val = Offset(b.uint32()) + ubase
615		case formRef8:
616			val = Offset(b.uint64()) + ubase
617		case formRefUdata:
618			val = Offset(b.uint()) + ubase
619
620		// string
621		case formString:
622			val = b.string()
623		case formStrp, formLineStrp:
624			var off uint64 // offset into .debug_str
625			is64, known := b.format.dwarf64()
626			if !known {
627				b.error("unknown size for DW_FORM_strp/line_strp")
628			} else if is64 {
629				off = b.uint64()
630			} else {
631				off = uint64(b.uint32())
632			}
633			if uint64(int(off)) != off {
634				b.error("DW_FORM_strp/line_strp offset out of range")
635			}
636			if b.err != nil {
637				return nil
638			}
639			var b1 buf
640			if fmt == formStrp {
641				b1 = makeBuf(b.dwarf, b.format, "str", 0, b.dwarf.str)
642			} else {
643				if len(b.dwarf.lineStr) == 0 {
644					b.error("DW_FORM_line_strp with no .debug_line_str section")
645					return nil
646				}
647				b1 = makeBuf(b.dwarf, b.format, "line_str", 0, b.dwarf.lineStr)
648			}
649			b1.skip(int(off))
650			val = b1.string()
651			if b1.err != nil {
652				b.err = b1.err
653				return nil
654			}
655		case formStrx, formStrx1, formStrx2, formStrx3, formStrx4:
656			var off uint64
657			switch fmt {
658			case formStrx:
659				off = b.uint()
660			case formStrx1:
661				off = uint64(b.uint8())
662			case formStrx2:
663				off = uint64(b.uint16())
664			case formStrx3:
665				off = uint64(b.uint24())
666			case formStrx4:
667				off = uint64(b.uint32())
668			}
669			if len(b.dwarf.strOffsets) == 0 {
670				b.error("DW_FORM_strx with no .debug_str_offsets section")
671			}
672			is64, known := b.format.dwarf64()
673			if !known {
674				b.error("unknown offset size for DW_FORM_strx")
675			}
676			if b.err != nil {
677				return nil
678			}
679			if is64 {
680				off *= 8
681			} else {
682				off *= 4
683			}
684
685			// We have to adjust by the offset of the
686			// compilation unit. This won't work if the
687			// program uses Reader.Seek to skip over the
688			// unit. Not much we can do about that.
689			var strBase int64
690			if cu != nil {
691				strBase, _ = cu.Val(AttrStrOffsetsBase).(int64)
692			} else if a.tag == TagCompileUnit {
693				delay = append(delay, delayed{i, off, formStrx})
694				break
695			}
696
697			val = resolveStrx(uint64(strBase), off)
698
699		case formStrpSup:
700			is64, known := b.format.dwarf64()
701			if !known {
702				b.error("unknown size for DW_FORM_strp_sup")
703			} else if is64 {
704				val = b.uint64()
705			} else {
706				val = b.uint32()
707			}
708
709		// lineptr, loclistptr, macptr, rangelistptr
710		// New in DWARF 4, but clang can generate them with -gdwarf-2.
711		// Section reference, replacing use of formData4 and formData8.
712		case formSecOffset, formGnuRefAlt, formGnuStrpAlt:
713			is64, known := b.format.dwarf64()
714			if !known {
715				b.error("unknown size for form 0x" + strconv.FormatInt(int64(fmt), 16))
716			} else if is64 {
717				val = int64(b.uint64())
718			} else {
719				val = int64(b.uint32())
720			}
721
722		// exprloc
723		// New in DWARF 4.
724		case formExprloc:
725			val = b.bytes(int(b.uint()))
726
727		// reference
728		// New in DWARF 4.
729		case formRefSig8:
730			// 64-bit type signature.
731			val = b.uint64()
732		case formRefSup4:
733			val = b.uint32()
734		case formRefSup8:
735			val = b.uint64()
736
737		// loclist
738		case formLoclistx:
739			val = b.uint()
740
741		// rnglist
742		case formRnglistx:
743			off := b.uint()
744
745			// We have to adjust by the rnglists_base of
746			// the compilation unit. This won't work if
747			// the program uses Reader.Seek to skip over
748			// the unit. Not much we can do about that.
749			var rnglistsBase int64
750			if cu != nil {
751				rnglistsBase, _ = cu.Val(AttrRnglistsBase).(int64)
752			} else if a.tag == TagCompileUnit {
753				delay = append(delay, delayed{i, off, formRnglistx})
754				break
755			}
756
757			val = resolveRnglistx(uint64(rnglistsBase), off)
758		}
759
760		e.Field[i].Val = val
761	}
762	if b.err != nil {
763		return nil
764	}
765
766	for _, del := range delay {
767		switch del.fmt {
768		case formAddrx:
769			addrBase, _ := e.Val(AttrAddrBase).(int64)
770			val, err := b.dwarf.debugAddr(b.format, uint64(addrBase), del.off)
771			if err != nil {
772				b.err = err
773				return nil
774			}
775			e.Field[del.idx].Val = val
776		case formStrx:
777			strBase, _ := e.Val(AttrStrOffsetsBase).(int64)
778			e.Field[del.idx].Val = resolveStrx(uint64(strBase), del.off)
779			if b.err != nil {
780				return nil
781			}
782		case formRnglistx:
783			rnglistsBase, _ := e.Val(AttrRnglistsBase).(int64)
784			e.Field[del.idx].Val = resolveRnglistx(uint64(rnglistsBase), del.off)
785			if b.err != nil {
786				return nil
787			}
788		}
789	}
790
791	return e
792}
793
794// A Reader allows reading [Entry] structures from a DWARF “info” section.
795// The [Entry] structures are arranged in a tree. The [Reader.Next] function
796// return successive entries from a pre-order traversal of the tree.
797// If an entry has children, its Children field will be true, and the children
798// follow, terminated by an [Entry] with [Tag] 0.
799type Reader struct {
800	b            buf
801	d            *Data
802	err          error
803	unit         int
804	lastUnit     bool   // set if last entry returned by Next is TagCompileUnit/TagPartialUnit
805	lastChildren bool   // .Children of last entry returned by Next
806	lastSibling  Offset // .Val(AttrSibling) of last entry returned by Next
807	cu           *Entry // current compilation unit
808}
809
810// Reader returns a new Reader for [Data].
811// The reader is positioned at byte offset 0 in the DWARF “info” section.
812func (d *Data) Reader() *Reader {
813	r := &Reader{d: d}
814	r.Seek(0)
815	return r
816}
817
818// AddressSize returns the size in bytes of addresses in the current compilation
819// unit.
820func (r *Reader) AddressSize() int {
821	return r.d.unit[r.unit].asize
822}
823
824// ByteOrder returns the byte order in the current compilation unit.
825func (r *Reader) ByteOrder() binary.ByteOrder {
826	return r.b.order
827}
828
829// Seek positions the [Reader] at offset off in the encoded entry stream.
830// Offset 0 can be used to denote the first entry.
831func (r *Reader) Seek(off Offset) {
832	d := r.d
833	r.err = nil
834	r.lastChildren = false
835	if off == 0 {
836		if len(d.unit) == 0 {
837			return
838		}
839		u := &d.unit[0]
840		r.unit = 0
841		r.b = makeBuf(r.d, u, "info", u.off, u.data)
842		r.cu = nil
843		return
844	}
845
846	i := d.offsetToUnit(off)
847	if i == -1 {
848		r.err = errors.New("offset out of range")
849		return
850	}
851	if i != r.unit {
852		r.cu = nil
853	}
854	u := &d.unit[i]
855	r.unit = i
856	r.b = makeBuf(r.d, u, "info", off, u.data[off-u.off:])
857}
858
859// maybeNextUnit advances to the next unit if this one is finished.
860func (r *Reader) maybeNextUnit() {
861	for len(r.b.data) == 0 && r.unit+1 < len(r.d.unit) {
862		r.nextUnit()
863	}
864}
865
866// nextUnit advances to the next unit.
867func (r *Reader) nextUnit() {
868	r.unit++
869	u := &r.d.unit[r.unit]
870	r.b = makeBuf(r.d, u, "info", u.off, u.data)
871	r.cu = nil
872}
873
874// Next reads the next entry from the encoded entry stream.
875// It returns nil, nil when it reaches the end of the section.
876// It returns an error if the current offset is invalid or the data at the
877// offset cannot be decoded as a valid [Entry].
878func (r *Reader) Next() (*Entry, error) {
879	if r.err != nil {
880		return nil, r.err
881	}
882	r.maybeNextUnit()
883	if len(r.b.data) == 0 {
884		return nil, nil
885	}
886	u := &r.d.unit[r.unit]
887	e := r.b.entry(r.cu, u.atable, u.base, u.vers)
888	if r.b.err != nil {
889		r.err = r.b.err
890		return nil, r.err
891	}
892	r.lastUnit = false
893	if e != nil {
894		r.lastChildren = e.Children
895		if r.lastChildren {
896			r.lastSibling, _ = e.Val(AttrSibling).(Offset)
897		}
898		if e.Tag == TagCompileUnit || e.Tag == TagPartialUnit {
899			r.lastUnit = true
900			r.cu = e
901		}
902	} else {
903		r.lastChildren = false
904	}
905	return e, nil
906}
907
908// SkipChildren skips over the child entries associated with
909// the last [Entry] returned by [Reader.Next]. If that [Entry] did not have
910// children or [Reader.Next] has not been called, SkipChildren is a no-op.
911func (r *Reader) SkipChildren() {
912	if r.err != nil || !r.lastChildren {
913		return
914	}
915
916	// If the last entry had a sibling attribute,
917	// that attribute gives the offset of the next
918	// sibling, so we can avoid decoding the
919	// child subtrees.
920	if r.lastSibling >= r.b.off {
921		r.Seek(r.lastSibling)
922		return
923	}
924
925	if r.lastUnit && r.unit+1 < len(r.d.unit) {
926		r.nextUnit()
927		return
928	}
929
930	for {
931		e, err := r.Next()
932		if err != nil || e == nil || e.Tag == 0 {
933			break
934		}
935		if e.Children {
936			r.SkipChildren()
937		}
938	}
939}
940
941// clone returns a copy of the reader. This is used by the typeReader
942// interface.
943func (r *Reader) clone() typeReader {
944	return r.d.Reader()
945}
946
947// offset returns the current buffer offset. This is used by the
948// typeReader interface.
949func (r *Reader) offset() Offset {
950	return r.b.off
951}
952
953// SeekPC returns the [Entry] for the compilation unit that includes pc,
954// and positions the reader to read the children of that unit.  If pc
955// is not covered by any unit, SeekPC returns [ErrUnknownPC] and the
956// position of the reader is undefined.
957//
958// Because compilation units can describe multiple regions of the
959// executable, in the worst case SeekPC must search through all the
960// ranges in all the compilation units. Each call to SeekPC starts the
961// search at the compilation unit of the last call, so in general
962// looking up a series of PCs will be faster if they are sorted. If
963// the caller wishes to do repeated fast PC lookups, it should build
964// an appropriate index using the Ranges method.
965func (r *Reader) SeekPC(pc uint64) (*Entry, error) {
966	unit := r.unit
967	for i := 0; i < len(r.d.unit); i++ {
968		if unit >= len(r.d.unit) {
969			unit = 0
970		}
971		r.err = nil
972		r.lastChildren = false
973		r.unit = unit
974		r.cu = nil
975		u := &r.d.unit[unit]
976		r.b = makeBuf(r.d, u, "info", u.off, u.data)
977		e, err := r.Next()
978		if err != nil {
979			return nil, err
980		}
981		if e == nil || e.Tag == 0 {
982			return nil, ErrUnknownPC
983		}
984		ranges, err := r.d.Ranges(e)
985		if err != nil {
986			return nil, err
987		}
988		for _, pcs := range ranges {
989			if pcs[0] <= pc && pc < pcs[1] {
990				return e, nil
991			}
992		}
993		unit++
994	}
995	return nil, ErrUnknownPC
996}
997
998// Ranges returns the PC ranges covered by e, a slice of [low,high) pairs.
999// Only some entry types, such as [TagCompileUnit] or [TagSubprogram], have PC
1000// ranges; for others, this will return nil with no error.
1001func (d *Data) Ranges(e *Entry) ([][2]uint64, error) {
1002	var ret [][2]uint64
1003
1004	low, lowOK := e.Val(AttrLowpc).(uint64)
1005
1006	var high uint64
1007	var highOK bool
1008	highField := e.AttrField(AttrHighpc)
1009	if highField != nil {
1010		switch highField.Class {
1011		case ClassAddress:
1012			high, highOK = highField.Val.(uint64)
1013		case ClassConstant:
1014			off, ok := highField.Val.(int64)
1015			if ok {
1016				high = low + uint64(off)
1017				highOK = true
1018			}
1019		}
1020	}
1021
1022	if lowOK && highOK {
1023		ret = append(ret, [2]uint64{low, high})
1024	}
1025
1026	var u *unit
1027	if uidx := d.offsetToUnit(e.Offset); uidx >= 0 && uidx < len(d.unit) {
1028		u = &d.unit[uidx]
1029	}
1030
1031	if u != nil && u.vers >= 5 && d.rngLists != nil {
1032		// DWARF version 5 and later
1033		field := e.AttrField(AttrRanges)
1034		if field == nil {
1035			return ret, nil
1036		}
1037		switch field.Class {
1038		case ClassRangeListPtr:
1039			ranges, rangesOK := field.Val.(int64)
1040			if !rangesOK {
1041				return ret, nil
1042			}
1043			cu, base, err := d.baseAddressForEntry(e)
1044			if err != nil {
1045				return nil, err
1046			}
1047			return d.dwarf5Ranges(u, cu, base, ranges, ret)
1048
1049		case ClassRngList:
1050			rnglist, ok := field.Val.(uint64)
1051			if !ok {
1052				return ret, nil
1053			}
1054			cu, base, err := d.baseAddressForEntry(e)
1055			if err != nil {
1056				return nil, err
1057			}
1058			return d.dwarf5Ranges(u, cu, base, int64(rnglist), ret)
1059
1060		default:
1061			return ret, nil
1062		}
1063	}
1064
1065	// DWARF version 2 through 4
1066	ranges, rangesOK := e.Val(AttrRanges).(int64)
1067	if rangesOK && d.ranges != nil {
1068		_, base, err := d.baseAddressForEntry(e)
1069		if err != nil {
1070			return nil, err
1071		}
1072		return d.dwarf2Ranges(u, base, ranges, ret)
1073	}
1074
1075	return ret, nil
1076}
1077
1078// baseAddressForEntry returns the initial base address to be used when
1079// looking up the range list of entry e.
1080// DWARF specifies that this should be the lowpc attribute of the enclosing
1081// compilation unit, however comments in gdb/dwarf2read.c say that some
1082// versions of GCC use the entrypc attribute, so we check that too.
1083func (d *Data) baseAddressForEntry(e *Entry) (*Entry, uint64, error) {
1084	var cu *Entry
1085	if e.Tag == TagCompileUnit {
1086		cu = e
1087	} else {
1088		i := d.offsetToUnit(e.Offset)
1089		if i == -1 {
1090			return nil, 0, errors.New("no unit for entry")
1091		}
1092		u := &d.unit[i]
1093		b := makeBuf(d, u, "info", u.off, u.data)
1094		cu = b.entry(nil, u.atable, u.base, u.vers)
1095		if b.err != nil {
1096			return nil, 0, b.err
1097		}
1098	}
1099
1100	if cuEntry, cuEntryOK := cu.Val(AttrEntrypc).(uint64); cuEntryOK {
1101		return cu, cuEntry, nil
1102	} else if cuLow, cuLowOK := cu.Val(AttrLowpc).(uint64); cuLowOK {
1103		return cu, cuLow, nil
1104	}
1105
1106	return cu, 0, nil
1107}
1108
1109func (d *Data) dwarf2Ranges(u *unit, base uint64, ranges int64, ret [][2]uint64) ([][2]uint64, error) {
1110	if ranges < 0 || ranges > int64(len(d.ranges)) {
1111		return nil, fmt.Errorf("invalid range offset %d (max %d)", ranges, len(d.ranges))
1112	}
1113	buf := makeBuf(d, u, "ranges", Offset(ranges), d.ranges[ranges:])
1114	for len(buf.data) > 0 {
1115		low := buf.addr()
1116		high := buf.addr()
1117
1118		if low == 0 && high == 0 {
1119			break
1120		}
1121
1122		if low == ^uint64(0)>>uint((8-u.addrsize())*8) {
1123			base = high
1124		} else {
1125			ret = append(ret, [2]uint64{base + low, base + high})
1126		}
1127	}
1128
1129	return ret, nil
1130}
1131
1132// dwarf5Ranges interprets a debug_rnglists sequence, see DWARFv5 section
1133// 2.17.3 (page 53).
1134func (d *Data) dwarf5Ranges(u *unit, cu *Entry, base uint64, ranges int64, ret [][2]uint64) ([][2]uint64, error) {
1135	if ranges < 0 || ranges > int64(len(d.rngLists)) {
1136		return nil, fmt.Errorf("invalid rnglist offset %d (max %d)", ranges, len(d.ranges))
1137	}
1138	var addrBase int64
1139	if cu != nil {
1140		addrBase, _ = cu.Val(AttrAddrBase).(int64)
1141	}
1142
1143	buf := makeBuf(d, u, "rnglists", 0, d.rngLists)
1144	buf.skip(int(ranges))
1145	for {
1146		opcode := buf.uint8()
1147		switch opcode {
1148		case rleEndOfList:
1149			if buf.err != nil {
1150				return nil, buf.err
1151			}
1152			return ret, nil
1153
1154		case rleBaseAddressx:
1155			baseIdx := buf.uint()
1156			var err error
1157			base, err = d.debugAddr(u, uint64(addrBase), baseIdx)
1158			if err != nil {
1159				return nil, err
1160			}
1161
1162		case rleStartxEndx:
1163			startIdx := buf.uint()
1164			endIdx := buf.uint()
1165
1166			start, err := d.debugAddr(u, uint64(addrBase), startIdx)
1167			if err != nil {
1168				return nil, err
1169			}
1170			end, err := d.debugAddr(u, uint64(addrBase), endIdx)
1171			if err != nil {
1172				return nil, err
1173			}
1174			ret = append(ret, [2]uint64{start, end})
1175
1176		case rleStartxLength:
1177			startIdx := buf.uint()
1178			len := buf.uint()
1179			start, err := d.debugAddr(u, uint64(addrBase), startIdx)
1180			if err != nil {
1181				return nil, err
1182			}
1183			ret = append(ret, [2]uint64{start, start + len})
1184
1185		case rleOffsetPair:
1186			off1 := buf.uint()
1187			off2 := buf.uint()
1188			ret = append(ret, [2]uint64{base + off1, base + off2})
1189
1190		case rleBaseAddress:
1191			base = buf.addr()
1192
1193		case rleStartEnd:
1194			start := buf.addr()
1195			end := buf.addr()
1196			ret = append(ret, [2]uint64{start, end})
1197
1198		case rleStartLength:
1199			start := buf.addr()
1200			len := buf.uint()
1201			ret = append(ret, [2]uint64{start, start + len})
1202		}
1203	}
1204}
1205
1206// debugAddr returns the address at idx in debug_addr
1207func (d *Data) debugAddr(format dataFormat, addrBase, idx uint64) (uint64, error) {
1208	off := idx*uint64(format.addrsize()) + addrBase
1209
1210	if uint64(int(off)) != off {
1211		return 0, errors.New("offset out of range")
1212	}
1213
1214	b := makeBuf(d, format, "addr", 0, d.addr)
1215	b.skip(int(off))
1216	val := b.addr()
1217	if b.err != nil {
1218		return 0, b.err
1219	}
1220	return val, nil
1221}
1222