1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package binutils
16
17import (
18	"bufio"
19	"fmt"
20	"io"
21	"os/exec"
22	"strconv"
23	"strings"
24	"sync"
25
26	"github.com/google/pprof/internal/plugin"
27)
28
29const (
30	defaultAddr2line = "addr2line"
31
32	// addr2line may produce multiple lines of output. We
33	// use this sentinel to identify the end of the output.
34	sentinel = ^uint64(0)
35)
36
37// addr2Liner is a connection to an addr2line command for obtaining
38// address and line number information from a binary.
39type addr2Liner struct {
40	mu   sync.Mutex
41	rw   lineReaderWriter
42	base uint64
43
44	// nm holds an addr2Liner using nm tool. Certain versions of addr2line
45	// produce incomplete names due to
46	// https://sourceware.org/bugzilla/show_bug.cgi?id=17541. As a workaround,
47	// the names from nm are used when they look more complete. See addrInfo()
48	// code below for the exact heuristic.
49	nm *addr2LinerNM
50}
51
52// lineReaderWriter is an interface to abstract the I/O to an addr2line
53// process. It writes a line of input to the job, and reads its output
54// one line at a time.
55type lineReaderWriter interface {
56	write(string) error
57	readLine() (string, error)
58	close()
59}
60
61type addr2LinerJob struct {
62	cmd *exec.Cmd
63	in  io.WriteCloser
64	out *bufio.Reader
65}
66
67func (a *addr2LinerJob) write(s string) error {
68	_, err := fmt.Fprint(a.in, s+"\n")
69	return err
70}
71
72func (a *addr2LinerJob) readLine() (string, error) {
73	s, err := a.out.ReadString('\n')
74	if err != nil {
75		return "", err
76	}
77	return strings.TrimSpace(s), nil
78}
79
80// close releases any resources used by the addr2liner object.
81func (a *addr2LinerJob) close() {
82	a.in.Close()
83	a.cmd.Wait()
84}
85
86// newAddr2Liner starts the given addr2liner command reporting
87// information about the given executable file. If file is a shared
88// library, base should be the address at which it was mapped in the
89// program under consideration.
90func newAddr2Liner(cmd, file string, base uint64) (*addr2Liner, error) {
91	if cmd == "" {
92		cmd = defaultAddr2line
93	}
94
95	j := &addr2LinerJob{
96		cmd: exec.Command(cmd, "-aif", "-e", file),
97	}
98
99	var err error
100	if j.in, err = j.cmd.StdinPipe(); err != nil {
101		return nil, err
102	}
103
104	outPipe, err := j.cmd.StdoutPipe()
105	if err != nil {
106		return nil, err
107	}
108
109	j.out = bufio.NewReader(outPipe)
110	if err := j.cmd.Start(); err != nil {
111		return nil, err
112	}
113
114	a := &addr2Liner{
115		rw:   j,
116		base: base,
117	}
118
119	return a, nil
120}
121
122// readFrame parses the addr2line output for a single address. It
123// returns a populated plugin.Frame and whether it has reached the end of the
124// data.
125func (d *addr2Liner) readFrame() (plugin.Frame, bool) {
126	funcname, err := d.rw.readLine()
127	if err != nil {
128		return plugin.Frame{}, true
129	}
130	if strings.HasPrefix(funcname, "0x") {
131		// If addr2line returns a hex address we can assume it is the
132		// sentinel. Read and ignore next two lines of output from
133		// addr2line
134		d.rw.readLine()
135		d.rw.readLine()
136		return plugin.Frame{}, true
137	}
138
139	fileline, err := d.rw.readLine()
140	if err != nil {
141		return plugin.Frame{}, true
142	}
143
144	linenumber := 0
145
146	if funcname == "??" {
147		funcname = ""
148	}
149
150	if fileline == "??:0" {
151		fileline = ""
152	} else {
153		if i := strings.LastIndex(fileline, ":"); i >= 0 {
154			// Remove discriminator, if present
155			if disc := strings.Index(fileline, " (discriminator"); disc > 0 {
156				fileline = fileline[:disc]
157			}
158			// If we cannot parse a number after the last ":", keep it as
159			// part of the filename.
160			if line, err := strconv.Atoi(fileline[i+1:]); err == nil {
161				linenumber = line
162				fileline = fileline[:i]
163			}
164		}
165	}
166
167	return plugin.Frame{
168		Func: funcname,
169		File: fileline,
170		Line: linenumber}, false
171}
172
173func (d *addr2Liner) rawAddrInfo(addr uint64) ([]plugin.Frame, error) {
174	d.mu.Lock()
175	defer d.mu.Unlock()
176
177	if err := d.rw.write(fmt.Sprintf("%x", addr-d.base)); err != nil {
178		return nil, err
179	}
180
181	if err := d.rw.write(fmt.Sprintf("%x", sentinel)); err != nil {
182		return nil, err
183	}
184
185	resp, err := d.rw.readLine()
186	if err != nil {
187		return nil, err
188	}
189
190	if !strings.HasPrefix(resp, "0x") {
191		return nil, fmt.Errorf("unexpected addr2line output: %s", resp)
192	}
193
194	var stack []plugin.Frame
195	for {
196		frame, end := d.readFrame()
197		if end {
198			break
199		}
200
201		if frame != (plugin.Frame{}) {
202			stack = append(stack, frame)
203		}
204	}
205	return stack, err
206}
207
208// addrInfo returns the stack frame information for a specific program
209// address. It returns nil if the address could not be identified.
210func (d *addr2Liner) addrInfo(addr uint64) ([]plugin.Frame, error) {
211	stack, err := d.rawAddrInfo(addr)
212	if err != nil {
213		return nil, err
214	}
215
216	// Certain versions of addr2line produce incomplete names due to
217	// https://sourceware.org/bugzilla/show_bug.cgi?id=17541. Attempt to replace
218	// the name with a better one from nm.
219	if len(stack) > 0 && d.nm != nil {
220		nm, err := d.nm.addrInfo(addr)
221		if err == nil && len(nm) > 0 {
222			// Last entry in frame list should match since it is non-inlined. As a
223			// simple heuristic, we only switch to the nm-based name if it is longer
224			// by 2 or more characters. We consider nm names that are longer by 1
225			// character insignificant to avoid replacing foo with _foo on MacOS (for
226			// unknown reasons read2line produces the former and nm produces the
227			// latter on MacOS even though both tools are asked to produce mangled
228			// names).
229			nmName := nm[len(nm)-1].Func
230			a2lName := stack[len(stack)-1].Func
231			if len(nmName) > len(a2lName)+1 {
232				stack[len(stack)-1].Func = nmName
233			}
234		}
235	}
236
237	return stack, nil
238}
239