1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package binutils
16
17import (
18	"bytes"
19	"io"
20	"regexp"
21	"strconv"
22	"strings"
23
24	"github.com/google/pprof/internal/plugin"
25	"github.com/ianlancetaylor/demangle"
26)
27
28var (
29	nmOutputRE                = regexp.MustCompile(`^\s*([[:xdigit:]]+)\s+(.)\s+(.*)`)
30	objdumpAsmOutputRE        = regexp.MustCompile(`^\s*([[:xdigit:]]+):\s+(.*)`)
31	objdumpOutputFileLine     = regexp.MustCompile(`^;?\s?(.*):([0-9]+)`)
32	objdumpOutputFunction     = regexp.MustCompile(`^;?\s?(\S.*)\(\):`)
33	objdumpOutputFunctionLLVM = regexp.MustCompile(`^([[:xdigit:]]+)?\s?(.*):`)
34)
35
36func findSymbols(syms []byte, file string, r *regexp.Regexp, address uint64) ([]*plugin.Sym, error) {
37	// Collect all symbols from the nm output, grouping names mapped to
38	// the same address into a single symbol.
39
40	// The symbols to return.
41	var symbols []*plugin.Sym
42
43	// The current group of symbol names, and the address they are all at.
44	names, start := []string{}, uint64(0)
45
46	buf := bytes.NewBuffer(syms)
47
48	for {
49		symAddr, name, err := nextSymbol(buf)
50		if err == io.EOF {
51			// Done. If there was an unfinished group, append it.
52			if len(names) != 0 {
53				if match := matchSymbol(names, start, symAddr-1, r, address); match != nil {
54					symbols = append(symbols, &plugin.Sym{Name: match, File: file, Start: start, End: symAddr - 1})
55				}
56			}
57
58			// And return the symbols.
59			return symbols, nil
60		}
61
62		if err != nil {
63			// There was some kind of serious error reading nm's output.
64			return nil, err
65		}
66
67		// If this symbol is at the same address as the current group, add it to the group.
68		if symAddr == start {
69			names = append(names, name)
70			continue
71		}
72
73		// Otherwise append the current group to the list of symbols.
74		if match := matchSymbol(names, start, symAddr-1, r, address); match != nil {
75			symbols = append(symbols, &plugin.Sym{Name: match, File: file, Start: start, End: symAddr - 1})
76		}
77
78		// And start a new group.
79		names, start = []string{name}, symAddr
80	}
81}
82
83// matchSymbol checks if a symbol is to be selected by checking its
84// name to the regexp and optionally its address. It returns the name(s)
85// to be used for the matched symbol, or nil if no match
86func matchSymbol(names []string, start, end uint64, r *regexp.Regexp, address uint64) []string {
87	if address != 0 && address >= start && address <= end {
88		return names
89	}
90	for _, name := range names {
91		if r == nil || r.MatchString(name) {
92			return []string{name}
93		}
94
95		// Match all possible demangled versions of the name.
96		for _, o := range [][]demangle.Option{
97			{demangle.NoClones},
98			{demangle.NoParams, demangle.NoEnclosingParams},
99			{demangle.NoParams, demangle.NoEnclosingParams, demangle.NoTemplateParams},
100		} {
101			if demangled, err := demangle.ToString(name, o...); err == nil && r.MatchString(demangled) {
102				return []string{demangled}
103			}
104		}
105	}
106	return nil
107}
108
109// disassemble parses the output of the objdump command and returns
110// the assembly instructions in a slice.
111func disassemble(asm []byte) ([]plugin.Inst, error) {
112	buf := bytes.NewBuffer(asm)
113	function, file, line := "", "", 0
114	var assembly []plugin.Inst
115	for {
116		input, err := buf.ReadString('\n')
117		if err != nil {
118			if err != io.EOF {
119				return nil, err
120			}
121			if input == "" {
122				break
123			}
124		}
125		input = strings.TrimSpace(input)
126
127		if fields := objdumpAsmOutputRE.FindStringSubmatch(input); len(fields) == 3 {
128			if address, err := strconv.ParseUint(fields[1], 16, 64); err == nil {
129				assembly = append(assembly,
130					plugin.Inst{
131						Addr:     address,
132						Text:     fields[2],
133						Function: function,
134						File:     file,
135						Line:     line,
136					})
137				continue
138			}
139		}
140		if fields := objdumpOutputFileLine.FindStringSubmatch(input); len(fields) == 3 {
141			if l, err := strconv.ParseUint(fields[2], 10, 32); err == nil {
142				file, line = fields[1], int(l)
143			}
144			continue
145		}
146		if fields := objdumpOutputFunction.FindStringSubmatch(input); len(fields) == 2 {
147			function = fields[1]
148			continue
149		} else {
150			if fields := objdumpOutputFunctionLLVM.FindStringSubmatch(input); len(fields) == 3 {
151				function = fields[2]
152				continue
153			}
154		}
155		// Reset on unrecognized lines.
156		function, file, line = "", "", 0
157	}
158
159	return assembly, nil
160}
161
162// nextSymbol parses the nm output to find the next symbol listed.
163// Skips over any output it cannot recognize.
164func nextSymbol(buf *bytes.Buffer) (uint64, string, error) {
165	for {
166		line, err := buf.ReadString('\n')
167		if err != nil {
168			if err != io.EOF || line == "" {
169				return 0, "", err
170			}
171		}
172		line = strings.TrimSpace(line)
173
174		if fields := nmOutputRE.FindStringSubmatch(line); len(fields) == 4 {
175			if address, err := strconv.ParseUint(fields[1], 16, 64); err == nil {
176				return address, fields[3], nil
177			}
178		}
179	}
180}
181