1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package binutils
16
17import (
18	"bufio"
19	"fmt"
20	"io"
21	"os/exec"
22	"strconv"
23	"strings"
24	"sync"
25
26	"github.com/google/pprof/internal/plugin"
27)
28
29const (
30	defaultLLVMSymbolizer = "llvm-symbolizer"
31)
32
33// llvmSymbolizer is a connection to an llvm-symbolizer command for
34// obtaining address and line number information from a binary.
35type llvmSymbolizer struct {
36	sync.Mutex
37	filename string
38	rw       lineReaderWriter
39	base     uint64
40}
41
42type llvmSymbolizerJob struct {
43	cmd *exec.Cmd
44	in  io.WriteCloser
45	out *bufio.Reader
46	// llvm-symbolizer requires the symbol type, CODE or DATA, for symbolization.
47	symType string
48}
49
50func (a *llvmSymbolizerJob) write(s string) error {
51	_, err := fmt.Fprintln(a.in, a.symType, s)
52	return err
53}
54
55func (a *llvmSymbolizerJob) readLine() (string, error) {
56	s, err := a.out.ReadString('\n')
57	if err != nil {
58		return "", err
59	}
60	return strings.TrimSpace(s), nil
61}
62
63// close releases any resources used by the llvmSymbolizer object.
64func (a *llvmSymbolizerJob) close() {
65	a.in.Close()
66	a.cmd.Wait()
67}
68
69// newLLVMSymbolizer starts the given llvmSymbolizer command reporting
70// information about the given executable file. If file is a shared
71// library, base should be the address at which it was mapped in the
72// program under consideration.
73func newLLVMSymbolizer(cmd, file string, base uint64, isData bool) (*llvmSymbolizer, error) {
74	if cmd == "" {
75		cmd = defaultLLVMSymbolizer
76	}
77
78	j := &llvmSymbolizerJob{
79		cmd:     exec.Command(cmd, "--inlining", "-demangle=false"),
80		symType: "CODE",
81	}
82	if isData {
83		j.symType = "DATA"
84	}
85
86	var err error
87	if j.in, err = j.cmd.StdinPipe(); err != nil {
88		return nil, err
89	}
90
91	outPipe, err := j.cmd.StdoutPipe()
92	if err != nil {
93		return nil, err
94	}
95
96	j.out = bufio.NewReader(outPipe)
97	if err := j.cmd.Start(); err != nil {
98		return nil, err
99	}
100
101	a := &llvmSymbolizer{
102		filename: file,
103		rw:       j,
104		base:     base,
105	}
106
107	return a, nil
108}
109
110// readFrame parses the llvm-symbolizer output for a single address. It
111// returns a populated plugin.Frame and whether it has reached the end of the
112// data.
113func (d *llvmSymbolizer) readFrame() (plugin.Frame, bool) {
114	funcname, err := d.rw.readLine()
115	if err != nil {
116		return plugin.Frame{}, true
117	}
118
119	switch funcname {
120	case "":
121		return plugin.Frame{}, true
122	case "??":
123		funcname = ""
124	}
125
126	fileline, err := d.rw.readLine()
127	if err != nil {
128		return plugin.Frame{Func: funcname}, true
129	}
130
131	linenumber := 0
132	columnnumber := 0
133	// The llvm-symbolizer outputs the <file_name>:<line_number>:<column_number>.
134	// When it cannot identify the source code location, it outputs "??:0:0".
135	// Older versions output just the filename and line number, so we check for
136	// both conditions here.
137	if fileline == "??:0" || fileline == "??:0:0" {
138		fileline = ""
139	} else {
140		switch split := strings.Split(fileline, ":"); len(split) {
141		case 3:
142			// filename:line:column
143			if col, err := strconv.Atoi(split[2]); err == nil {
144				columnnumber = col
145			}
146			fallthrough
147		case 2:
148			// filename:line
149			if line, err := strconv.Atoi(split[1]); err == nil {
150				linenumber = line
151			}
152			fallthrough
153		case 1:
154			// filename
155			fileline = split[0]
156		default:
157			// Unrecognized, ignore
158		}
159	}
160
161	return plugin.Frame{Func: funcname, File: fileline, Line: linenumber, Column: columnnumber}, false
162}
163
164// addrInfo returns the stack frame information for a specific program
165// address. It returns nil if the address could not be identified.
166func (d *llvmSymbolizer) addrInfo(addr uint64) ([]plugin.Frame, error) {
167	d.Lock()
168	defer d.Unlock()
169
170	if err := d.rw.write(fmt.Sprintf("%s 0x%x", d.filename, addr-d.base)); err != nil {
171		return nil, err
172	}
173
174	var stack []plugin.Frame
175	for {
176		frame, end := d.readFrame()
177		if end {
178			break
179		}
180
181		if frame != (plugin.Frame{}) {
182			stack = append(stack, frame)
183		}
184	}
185
186	return stack, nil
187}
188