1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package gosym implements access to the Go symbol
6// and line number tables embedded in Go binaries generated
7// by the gc compilers.
8package gosym
9
10import (
11	"bytes"
12	"encoding/binary"
13	"fmt"
14	"strconv"
15	"strings"
16)
17
18/*
19 * Symbols
20 */
21
22// A Sym represents a single symbol table entry.
23type Sym struct {
24	Value  uint64
25	Type   byte
26	Name   string
27	GoType uint64
28	// If this symbol is a function symbol, the corresponding Func
29	Func *Func
30
31	goVersion version
32}
33
34// Static reports whether this symbol is static (not visible outside its file).
35func (s *Sym) Static() bool { return s.Type >= 'a' }
36
37// nameWithoutInst returns s.Name if s.Name has no brackets (does not reference an
38// instantiated type, function, or method). If s.Name contains brackets, then it
39// returns s.Name with all the contents between (and including) the outermost left
40// and right bracket removed. This is useful to ignore any extra slashes or dots
41// inside the brackets from the string searches below, where needed.
42func (s *Sym) nameWithoutInst() string {
43	start := strings.Index(s.Name, "[")
44	if start < 0 {
45		return s.Name
46	}
47	end := strings.LastIndex(s.Name, "]")
48	if end < 0 {
49		// Malformed name, should contain closing bracket too.
50		return s.Name
51	}
52	return s.Name[0:start] + s.Name[end+1:]
53}
54
55// PackageName returns the package part of the symbol name,
56// or the empty string if there is none.
57func (s *Sym) PackageName() string {
58	name := s.nameWithoutInst()
59
60	// Since go1.20, a prefix of "type:" and "go:" is a compiler-generated symbol,
61	// they do not belong to any package.
62	//
63	// See cmd/compile/internal/base/link.go:ReservedImports variable.
64	if s.goVersion >= ver120 && (strings.HasPrefix(name, "go:") || strings.HasPrefix(name, "type:")) {
65		return ""
66	}
67
68	// For go1.18 and below, the prefix are "type." and "go." instead.
69	if s.goVersion <= ver118 && (strings.HasPrefix(name, "go.") || strings.HasPrefix(name, "type.")) {
70		return ""
71	}
72
73	pathend := strings.LastIndex(name, "/")
74	if pathend < 0 {
75		pathend = 0
76	}
77
78	if i := strings.Index(name[pathend:], "."); i != -1 {
79		return name[:pathend+i]
80	}
81	return ""
82}
83
84// ReceiverName returns the receiver type name of this symbol,
85// or the empty string if there is none.  A receiver name is only detected in
86// the case that s.Name is fully-specified with a package name.
87func (s *Sym) ReceiverName() string {
88	name := s.nameWithoutInst()
89	// If we find a slash in name, it should precede any bracketed expression
90	// that was removed, so pathend will apply correctly to name and s.Name.
91	pathend := strings.LastIndex(name, "/")
92	if pathend < 0 {
93		pathend = 0
94	}
95	// Find the first dot after pathend (or from the beginning, if there was
96	// no slash in name).
97	l := strings.Index(name[pathend:], ".")
98	// Find the last dot after pathend (or the beginning).
99	r := strings.LastIndex(name[pathend:], ".")
100	if l == -1 || r == -1 || l == r {
101		// There is no receiver if we didn't find two distinct dots after pathend.
102		return ""
103	}
104	// Given there is a trailing '.' that is in name, find it now in s.Name.
105	// pathend+l should apply to s.Name, because it should be the dot in the
106	// package name.
107	r = strings.LastIndex(s.Name[pathend:], ".")
108	return s.Name[pathend+l+1 : pathend+r]
109}
110
111// BaseName returns the symbol name without the package or receiver name.
112func (s *Sym) BaseName() string {
113	name := s.nameWithoutInst()
114	if i := strings.LastIndex(name, "."); i != -1 {
115		if s.Name != name {
116			brack := strings.Index(s.Name, "[")
117			if i > brack {
118				// BaseName is a method name after the brackets, so
119				// recalculate for s.Name. Otherwise, i applies
120				// correctly to s.Name, since it is before the
121				// brackets.
122				i = strings.LastIndex(s.Name, ".")
123			}
124		}
125		return s.Name[i+1:]
126	}
127	return s.Name
128}
129
130// A Func collects information about a single function.
131type Func struct {
132	Entry uint64
133	*Sym
134	End       uint64
135	Params    []*Sym // nil for Go 1.3 and later binaries
136	Locals    []*Sym // nil for Go 1.3 and later binaries
137	FrameSize int
138	LineTable *LineTable
139	Obj       *Obj
140}
141
142// An Obj represents a collection of functions in a symbol table.
143//
144// The exact method of division of a binary into separate Objs is an internal detail
145// of the symbol table format.
146//
147// In early versions of Go each source file became a different Obj.
148//
149// In Go 1 and Go 1.1, each package produced one Obj for all Go sources
150// and one Obj per C source file.
151//
152// In Go 1.2, there is a single Obj for the entire program.
153type Obj struct {
154	// Funcs is a list of functions in the Obj.
155	Funcs []Func
156
157	// In Go 1.1 and earlier, Paths is a list of symbols corresponding
158	// to the source file names that produced the Obj.
159	// In Go 1.2, Paths is nil.
160	// Use the keys of Table.Files to obtain a list of source files.
161	Paths []Sym // meta
162}
163
164/*
165 * Symbol tables
166 */
167
168// Table represents a Go symbol table. It stores all of the
169// symbols decoded from the program and provides methods to translate
170// between symbols, names, and addresses.
171type Table struct {
172	Syms  []Sym // nil for Go 1.3 and later binaries
173	Funcs []Func
174	Files map[string]*Obj // for Go 1.2 and later all files map to one Obj
175	Objs  []Obj           // for Go 1.2 and later only one Obj in slice
176
177	go12line *LineTable // Go 1.2 line number table
178}
179
180type sym struct {
181	value  uint64
182	gotype uint64
183	typ    byte
184	name   []byte
185}
186
187var (
188	littleEndianSymtab    = []byte{0xFD, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00}
189	bigEndianSymtab       = []byte{0xFF, 0xFF, 0xFF, 0xFD, 0x00, 0x00, 0x00}
190	oldLittleEndianSymtab = []byte{0xFE, 0xFF, 0xFF, 0xFF, 0x00, 0x00}
191)
192
193func walksymtab(data []byte, fn func(sym) error) error {
194	if len(data) == 0 { // missing symtab is okay
195		return nil
196	}
197	var order binary.ByteOrder = binary.BigEndian
198	newTable := false
199	switch {
200	case bytes.HasPrefix(data, oldLittleEndianSymtab):
201		// Same as Go 1.0, but little endian.
202		// Format was used during interim development between Go 1.0 and Go 1.1.
203		// Should not be widespread, but easy to support.
204		data = data[6:]
205		order = binary.LittleEndian
206	case bytes.HasPrefix(data, bigEndianSymtab):
207		newTable = true
208	case bytes.HasPrefix(data, littleEndianSymtab):
209		newTable = true
210		order = binary.LittleEndian
211	}
212	var ptrsz int
213	if newTable {
214		if len(data) < 8 {
215			return &DecodingError{len(data), "unexpected EOF", nil}
216		}
217		ptrsz = int(data[7])
218		if ptrsz != 4 && ptrsz != 8 {
219			return &DecodingError{7, "invalid pointer size", ptrsz}
220		}
221		data = data[8:]
222	}
223	var s sym
224	p := data
225	for len(p) >= 4 {
226		var typ byte
227		if newTable {
228			// Symbol type, value, Go type.
229			typ = p[0] & 0x3F
230			wideValue := p[0]&0x40 != 0
231			goType := p[0]&0x80 != 0
232			if typ < 26 {
233				typ += 'A'
234			} else {
235				typ += 'a' - 26
236			}
237			s.typ = typ
238			p = p[1:]
239			if wideValue {
240				if len(p) < ptrsz {
241					return &DecodingError{len(data), "unexpected EOF", nil}
242				}
243				// fixed-width value
244				if ptrsz == 8 {
245					s.value = order.Uint64(p[0:8])
246					p = p[8:]
247				} else {
248					s.value = uint64(order.Uint32(p[0:4]))
249					p = p[4:]
250				}
251			} else {
252				// varint value
253				s.value = 0
254				shift := uint(0)
255				for len(p) > 0 && p[0]&0x80 != 0 {
256					s.value |= uint64(p[0]&0x7F) << shift
257					shift += 7
258					p = p[1:]
259				}
260				if len(p) == 0 {
261					return &DecodingError{len(data), "unexpected EOF", nil}
262				}
263				s.value |= uint64(p[0]) << shift
264				p = p[1:]
265			}
266			if goType {
267				if len(p) < ptrsz {
268					return &DecodingError{len(data), "unexpected EOF", nil}
269				}
270				// fixed-width go type
271				if ptrsz == 8 {
272					s.gotype = order.Uint64(p[0:8])
273					p = p[8:]
274				} else {
275					s.gotype = uint64(order.Uint32(p[0:4]))
276					p = p[4:]
277				}
278			}
279		} else {
280			// Value, symbol type.
281			s.value = uint64(order.Uint32(p[0:4]))
282			if len(p) < 5 {
283				return &DecodingError{len(data), "unexpected EOF", nil}
284			}
285			typ = p[4]
286			if typ&0x80 == 0 {
287				return &DecodingError{len(data) - len(p) + 4, "bad symbol type", typ}
288			}
289			typ &^= 0x80
290			s.typ = typ
291			p = p[5:]
292		}
293
294		// Name.
295		var i int
296		var nnul int
297		for i = 0; i < len(p); i++ {
298			if p[i] == 0 {
299				nnul = 1
300				break
301			}
302		}
303		switch typ {
304		case 'z', 'Z':
305			p = p[i+nnul:]
306			for i = 0; i+2 <= len(p); i += 2 {
307				if p[i] == 0 && p[i+1] == 0 {
308					nnul = 2
309					break
310				}
311			}
312		}
313		if len(p) < i+nnul {
314			return &DecodingError{len(data), "unexpected EOF", nil}
315		}
316		s.name = p[0:i]
317		i += nnul
318		p = p[i:]
319
320		if !newTable {
321			if len(p) < 4 {
322				return &DecodingError{len(data), "unexpected EOF", nil}
323			}
324			// Go type.
325			s.gotype = uint64(order.Uint32(p[:4]))
326			p = p[4:]
327		}
328		fn(s)
329	}
330	return nil
331}
332
333// NewTable decodes the Go symbol table (the ".gosymtab" section in ELF),
334// returning an in-memory representation.
335// Starting with Go 1.3, the Go symbol table no longer includes symbol data.
336func NewTable(symtab []byte, pcln *LineTable) (*Table, error) {
337	var n int
338	err := walksymtab(symtab, func(s sym) error {
339		n++
340		return nil
341	})
342	if err != nil {
343		return nil, err
344	}
345
346	var t Table
347	if pcln.isGo12() {
348		t.go12line = pcln
349	}
350	fname := make(map[uint16]string)
351	t.Syms = make([]Sym, 0, n)
352	nf := 0
353	nz := 0
354	lasttyp := uint8(0)
355	err = walksymtab(symtab, func(s sym) error {
356		n := len(t.Syms)
357		t.Syms = t.Syms[0 : n+1]
358		ts := &t.Syms[n]
359		ts.Type = s.typ
360		ts.Value = s.value
361		ts.GoType = s.gotype
362		ts.goVersion = pcln.version
363		switch s.typ {
364		default:
365			// rewrite name to use . instead of · (c2 b7)
366			w := 0
367			b := s.name
368			for i := 0; i < len(b); i++ {
369				if b[i] == 0xc2 && i+1 < len(b) && b[i+1] == 0xb7 {
370					i++
371					b[i] = '.'
372				}
373				b[w] = b[i]
374				w++
375			}
376			ts.Name = string(s.name[0:w])
377		case 'z', 'Z':
378			if lasttyp != 'z' && lasttyp != 'Z' {
379				nz++
380			}
381			for i := 0; i < len(s.name); i += 2 {
382				eltIdx := binary.BigEndian.Uint16(s.name[i : i+2])
383				elt, ok := fname[eltIdx]
384				if !ok {
385					return &DecodingError{-1, "bad filename code", eltIdx}
386				}
387				if n := len(ts.Name); n > 0 && ts.Name[n-1] != '/' {
388					ts.Name += "/"
389				}
390				ts.Name += elt
391			}
392		}
393		switch s.typ {
394		case 'T', 't', 'L', 'l':
395			nf++
396		case 'f':
397			fname[uint16(s.value)] = ts.Name
398		}
399		lasttyp = s.typ
400		return nil
401	})
402	if err != nil {
403		return nil, err
404	}
405
406	t.Funcs = make([]Func, 0, nf)
407	t.Files = make(map[string]*Obj)
408
409	var obj *Obj
410	if t.go12line != nil {
411		// Put all functions into one Obj.
412		t.Objs = make([]Obj, 1)
413		obj = &t.Objs[0]
414		t.go12line.go12MapFiles(t.Files, obj)
415	} else {
416		t.Objs = make([]Obj, 0, nz)
417	}
418
419	// Count text symbols and attach frame sizes, parameters, and
420	// locals to them. Also, find object file boundaries.
421	lastf := 0
422	for i := 0; i < len(t.Syms); i++ {
423		sym := &t.Syms[i]
424		switch sym.Type {
425		case 'Z', 'z': // path symbol
426			if t.go12line != nil {
427				// Go 1.2 binaries have the file information elsewhere. Ignore.
428				break
429			}
430			// Finish the current object
431			if obj != nil {
432				obj.Funcs = t.Funcs[lastf:]
433			}
434			lastf = len(t.Funcs)
435
436			// Start new object
437			n := len(t.Objs)
438			t.Objs = t.Objs[0 : n+1]
439			obj = &t.Objs[n]
440
441			// Count & copy path symbols
442			var end int
443			for end = i + 1; end < len(t.Syms); end++ {
444				if c := t.Syms[end].Type; c != 'Z' && c != 'z' {
445					break
446				}
447			}
448			obj.Paths = t.Syms[i:end]
449			i = end - 1 // loop will i++
450
451			// Record file names
452			depth := 0
453			for j := range obj.Paths {
454				s := &obj.Paths[j]
455				if s.Name == "" {
456					depth--
457				} else {
458					if depth == 0 {
459						t.Files[s.Name] = obj
460					}
461					depth++
462				}
463			}
464
465		case 'T', 't', 'L', 'l': // text symbol
466			if n := len(t.Funcs); n > 0 {
467				t.Funcs[n-1].End = sym.Value
468			}
469			if sym.Name == "runtime.etext" || sym.Name == "etext" {
470				continue
471			}
472
473			// Count parameter and local (auto) syms
474			var np, na int
475			var end int
476		countloop:
477			for end = i + 1; end < len(t.Syms); end++ {
478				switch t.Syms[end].Type {
479				case 'T', 't', 'L', 'l', 'Z', 'z':
480					break countloop
481				case 'p':
482					np++
483				case 'a':
484					na++
485				}
486			}
487
488			// Fill in the function symbol
489			n := len(t.Funcs)
490			t.Funcs = t.Funcs[0 : n+1]
491			fn := &t.Funcs[n]
492			sym.Func = fn
493			fn.Params = make([]*Sym, 0, np)
494			fn.Locals = make([]*Sym, 0, na)
495			fn.Sym = sym
496			fn.Entry = sym.Value
497			fn.Obj = obj
498			if t.go12line != nil {
499				// All functions share the same line table.
500				// It knows how to narrow down to a specific
501				// function quickly.
502				fn.LineTable = t.go12line
503			} else if pcln != nil {
504				fn.LineTable = pcln.slice(fn.Entry)
505				pcln = fn.LineTable
506			}
507			for j := i; j < end; j++ {
508				s := &t.Syms[j]
509				switch s.Type {
510				case 'm':
511					fn.FrameSize = int(s.Value)
512				case 'p':
513					n := len(fn.Params)
514					fn.Params = fn.Params[0 : n+1]
515					fn.Params[n] = s
516				case 'a':
517					n := len(fn.Locals)
518					fn.Locals = fn.Locals[0 : n+1]
519					fn.Locals[n] = s
520				}
521			}
522			i = end - 1 // loop will i++
523		}
524	}
525
526	if t.go12line != nil && nf == 0 {
527		t.Funcs = t.go12line.go12Funcs()
528	}
529	if obj != nil {
530		obj.Funcs = t.Funcs[lastf:]
531	}
532	return &t, nil
533}
534
535// PCToFunc returns the function containing the program counter pc,
536// or nil if there is no such function.
537func (t *Table) PCToFunc(pc uint64) *Func {
538	funcs := t.Funcs
539	for len(funcs) > 0 {
540		m := len(funcs) / 2
541		fn := &funcs[m]
542		switch {
543		case pc < fn.Entry:
544			funcs = funcs[0:m]
545		case fn.Entry <= pc && pc < fn.End:
546			return fn
547		default:
548			funcs = funcs[m+1:]
549		}
550	}
551	return nil
552}
553
554// PCToLine looks up line number information for a program counter.
555// If there is no information, it returns fn == nil.
556func (t *Table) PCToLine(pc uint64) (file string, line int, fn *Func) {
557	if fn = t.PCToFunc(pc); fn == nil {
558		return
559	}
560	if t.go12line != nil {
561		file = t.go12line.go12PCToFile(pc)
562		line = t.go12line.go12PCToLine(pc)
563	} else {
564		file, line = fn.Obj.lineFromAline(fn.LineTable.PCToLine(pc))
565	}
566	return
567}
568
569// LineToPC looks up the first program counter on the given line in
570// the named file. It returns [UnknownFileError] or [UnknownLineError] if
571// there is an error looking up this line.
572func (t *Table) LineToPC(file string, line int) (pc uint64, fn *Func, err error) {
573	obj, ok := t.Files[file]
574	if !ok {
575		return 0, nil, UnknownFileError(file)
576	}
577
578	if t.go12line != nil {
579		pc := t.go12line.go12LineToPC(file, line)
580		if pc == 0 {
581			return 0, nil, &UnknownLineError{file, line}
582		}
583		return pc, t.PCToFunc(pc), nil
584	}
585
586	abs, err := obj.alineFromLine(file, line)
587	if err != nil {
588		return
589	}
590	for i := range obj.Funcs {
591		f := &obj.Funcs[i]
592		pc := f.LineTable.LineToPC(abs, f.End)
593		if pc != 0 {
594			return pc, f, nil
595		}
596	}
597	return 0, nil, &UnknownLineError{file, line}
598}
599
600// LookupSym returns the text, data, or bss symbol with the given name,
601// or nil if no such symbol is found.
602func (t *Table) LookupSym(name string) *Sym {
603	// TODO(austin) Maybe make a map
604	for i := range t.Syms {
605		s := &t.Syms[i]
606		switch s.Type {
607		case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
608			if s.Name == name {
609				return s
610			}
611		}
612	}
613	return nil
614}
615
616// LookupFunc returns the text, data, or bss symbol with the given name,
617// or nil if no such symbol is found.
618func (t *Table) LookupFunc(name string) *Func {
619	for i := range t.Funcs {
620		f := &t.Funcs[i]
621		if f.Sym.Name == name {
622			return f
623		}
624	}
625	return nil
626}
627
628// SymByAddr returns the text, data, or bss symbol starting at the given address.
629func (t *Table) SymByAddr(addr uint64) *Sym {
630	for i := range t.Syms {
631		s := &t.Syms[i]
632		switch s.Type {
633		case 'T', 't', 'L', 'l', 'D', 'd', 'B', 'b':
634			if s.Value == addr {
635				return s
636			}
637		}
638	}
639	return nil
640}
641
642/*
643 * Object files
644 */
645
646// This is legacy code for Go 1.1 and earlier, which used the
647// Plan 9 format for pc-line tables. This code was never quite
648// correct. It's probably very close, and it's usually correct, but
649// we never quite found all the corner cases.
650//
651// Go 1.2 and later use a simpler format, documented at golang.org/s/go12symtab.
652
653func (o *Obj) lineFromAline(aline int) (string, int) {
654	type stackEnt struct {
655		path   string
656		start  int
657		offset int
658		prev   *stackEnt
659	}
660
661	noPath := &stackEnt{"", 0, 0, nil}
662	tos := noPath
663
664pathloop:
665	for _, s := range o.Paths {
666		val := int(s.Value)
667		switch {
668		case val > aline:
669			break pathloop
670
671		case val == 1:
672			// Start a new stack
673			tos = &stackEnt{s.Name, val, 0, noPath}
674
675		case s.Name == "":
676			// Pop
677			if tos == noPath {
678				return "<malformed symbol table>", 0
679			}
680			tos.prev.offset += val - tos.start
681			tos = tos.prev
682
683		default:
684			// Push
685			tos = &stackEnt{s.Name, val, 0, tos}
686		}
687	}
688
689	if tos == noPath {
690		return "", 0
691	}
692	return tos.path, aline - tos.start - tos.offset + 1
693}
694
695func (o *Obj) alineFromLine(path string, line int) (int, error) {
696	if line < 1 {
697		return 0, &UnknownLineError{path, line}
698	}
699
700	for i, s := range o.Paths {
701		// Find this path
702		if s.Name != path {
703			continue
704		}
705
706		// Find this line at this stack level
707		depth := 0
708		var incstart int
709		line += int(s.Value)
710	pathloop:
711		for _, s := range o.Paths[i:] {
712			val := int(s.Value)
713			switch {
714			case depth == 1 && val >= line:
715				return line - 1, nil
716
717			case s.Name == "":
718				depth--
719				if depth == 0 {
720					break pathloop
721				} else if depth == 1 {
722					line += val - incstart
723				}
724
725			default:
726				if depth == 1 {
727					incstart = val
728				}
729				depth++
730			}
731		}
732		return 0, &UnknownLineError{path, line}
733	}
734	return 0, UnknownFileError(path)
735}
736
737/*
738 * Errors
739 */
740
741// UnknownFileError represents a failure to find the specific file in
742// the symbol table.
743type UnknownFileError string
744
745func (e UnknownFileError) Error() string { return "unknown file: " + string(e) }
746
747// UnknownLineError represents a failure to map a line to a program
748// counter, either because the line is beyond the bounds of the file
749// or because there is no code on the given line.
750type UnknownLineError struct {
751	File string
752	Line int
753}
754
755func (e *UnknownLineError) Error() string {
756	return "no code at " + e.File + ":" + strconv.Itoa(e.Line)
757}
758
759// DecodingError represents an error during the decoding of
760// the symbol table.
761type DecodingError struct {
762	off int
763	msg string
764	val any
765}
766
767func (e *DecodingError) Error() string {
768	msg := e.msg
769	if e.val != nil {
770		msg += fmt.Sprintf(" '%v'", e.val)
771	}
772	msg += fmt.Sprintf(" at byte %#x", e.off)
773	return msg
774}
775