1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package pprof
6
7import (
8	"bytes"
9	"compress/gzip"
10	"fmt"
11	"internal/abi"
12	"io"
13	"runtime"
14	"strconv"
15	"strings"
16	"time"
17	"unsafe"
18)
19
20// lostProfileEvent is the function to which lost profiling
21// events are attributed.
22// (The name shows up in the pprof graphs.)
23func lostProfileEvent() { lostProfileEvent() }
24
25// A profileBuilder writes a profile incrementally from a
26// stream of profile samples delivered by the runtime.
27type profileBuilder struct {
28	start      time.Time
29	end        time.Time
30	havePeriod bool
31	period     int64
32	m          profMap
33
34	// encoding state
35	w         io.Writer
36	zw        *gzip.Writer
37	pb        protobuf
38	strings   []string
39	stringMap map[string]int
40	locs      map[uintptr]locInfo // list of locInfo starting with the given PC.
41	funcs     map[string]int      // Package path-qualified function name to Function.ID
42	mem       []memMap
43	deck      pcDeck
44}
45
46type memMap struct {
47	// initialized as reading mapping
48	start   uintptr // Address at which the binary (or DLL) is loaded into memory.
49	end     uintptr // The limit of the address range occupied by this mapping.
50	offset  uint64  // Offset in the binary that corresponds to the first mapped address.
51	file    string  // The object this entry is loaded from.
52	buildID string  // A string that uniquely identifies a particular program version with high probability.
53
54	funcs symbolizeFlag
55	fake  bool // map entry was faked; /proc/self/maps wasn't available
56}
57
58// symbolizeFlag keeps track of symbolization result.
59//
60//	0                  : no symbol lookup was performed
61//	1<<0 (lookupTried) : symbol lookup was performed
62//	1<<1 (lookupFailed): symbol lookup was performed but failed
63type symbolizeFlag uint8
64
65const (
66	lookupTried  symbolizeFlag = 1 << iota
67	lookupFailed symbolizeFlag = 1 << iota
68)
69
70const (
71	// message Profile
72	tagProfile_SampleType        = 1  // repeated ValueType
73	tagProfile_Sample            = 2  // repeated Sample
74	tagProfile_Mapping           = 3  // repeated Mapping
75	tagProfile_Location          = 4  // repeated Location
76	tagProfile_Function          = 5  // repeated Function
77	tagProfile_StringTable       = 6  // repeated string
78	tagProfile_DropFrames        = 7  // int64 (string table index)
79	tagProfile_KeepFrames        = 8  // int64 (string table index)
80	tagProfile_TimeNanos         = 9  // int64
81	tagProfile_DurationNanos     = 10 // int64
82	tagProfile_PeriodType        = 11 // ValueType (really optional string???)
83	tagProfile_Period            = 12 // int64
84	tagProfile_Comment           = 13 // repeated int64
85	tagProfile_DefaultSampleType = 14 // int64
86
87	// message ValueType
88	tagValueType_Type = 1 // int64 (string table index)
89	tagValueType_Unit = 2 // int64 (string table index)
90
91	// message Sample
92	tagSample_Location = 1 // repeated uint64
93	tagSample_Value    = 2 // repeated int64
94	tagSample_Label    = 3 // repeated Label
95
96	// message Label
97	tagLabel_Key = 1 // int64 (string table index)
98	tagLabel_Str = 2 // int64 (string table index)
99	tagLabel_Num = 3 // int64
100
101	// message Mapping
102	tagMapping_ID              = 1  // uint64
103	tagMapping_Start           = 2  // uint64
104	tagMapping_Limit           = 3  // uint64
105	tagMapping_Offset          = 4  // uint64
106	tagMapping_Filename        = 5  // int64 (string table index)
107	tagMapping_BuildID         = 6  // int64 (string table index)
108	tagMapping_HasFunctions    = 7  // bool
109	tagMapping_HasFilenames    = 8  // bool
110	tagMapping_HasLineNumbers  = 9  // bool
111	tagMapping_HasInlineFrames = 10 // bool
112
113	// message Location
114	tagLocation_ID        = 1 // uint64
115	tagLocation_MappingID = 2 // uint64
116	tagLocation_Address   = 3 // uint64
117	tagLocation_Line      = 4 // repeated Line
118
119	// message Line
120	tagLine_FunctionID = 1 // uint64
121	tagLine_Line       = 2 // int64
122
123	// message Function
124	tagFunction_ID         = 1 // uint64
125	tagFunction_Name       = 2 // int64 (string table index)
126	tagFunction_SystemName = 3 // int64 (string table index)
127	tagFunction_Filename   = 4 // int64 (string table index)
128	tagFunction_StartLine  = 5 // int64
129)
130
131// stringIndex adds s to the string table if not already present
132// and returns the index of s in the string table.
133func (b *profileBuilder) stringIndex(s string) int64 {
134	id, ok := b.stringMap[s]
135	if !ok {
136		id = len(b.strings)
137		b.strings = append(b.strings, s)
138		b.stringMap[s] = id
139	}
140	return int64(id)
141}
142
143func (b *profileBuilder) flush() {
144	const dataFlush = 4096
145	if b.pb.nest == 0 && len(b.pb.data) > dataFlush {
146		b.zw.Write(b.pb.data)
147		b.pb.data = b.pb.data[:0]
148	}
149}
150
151// pbValueType encodes a ValueType message to b.pb.
152func (b *profileBuilder) pbValueType(tag int, typ, unit string) {
153	start := b.pb.startMessage()
154	b.pb.int64(tagValueType_Type, b.stringIndex(typ))
155	b.pb.int64(tagValueType_Unit, b.stringIndex(unit))
156	b.pb.endMessage(tag, start)
157}
158
159// pbSample encodes a Sample message to b.pb.
160func (b *profileBuilder) pbSample(values []int64, locs []uint64, labels func()) {
161	start := b.pb.startMessage()
162	b.pb.int64s(tagSample_Value, values)
163	b.pb.uint64s(tagSample_Location, locs)
164	if labels != nil {
165		labels()
166	}
167	b.pb.endMessage(tagProfile_Sample, start)
168	b.flush()
169}
170
171// pbLabel encodes a Label message to b.pb.
172func (b *profileBuilder) pbLabel(tag int, key, str string, num int64) {
173	start := b.pb.startMessage()
174	b.pb.int64Opt(tagLabel_Key, b.stringIndex(key))
175	b.pb.int64Opt(tagLabel_Str, b.stringIndex(str))
176	b.pb.int64Opt(tagLabel_Num, num)
177	b.pb.endMessage(tag, start)
178}
179
180// pbLine encodes a Line message to b.pb.
181func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) {
182	start := b.pb.startMessage()
183	b.pb.uint64Opt(tagLine_FunctionID, funcID)
184	b.pb.int64Opt(tagLine_Line, line)
185	b.pb.endMessage(tag, start)
186}
187
188// pbMapping encodes a Mapping message to b.pb.
189func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) {
190	start := b.pb.startMessage()
191	b.pb.uint64Opt(tagMapping_ID, id)
192	b.pb.uint64Opt(tagMapping_Start, base)
193	b.pb.uint64Opt(tagMapping_Limit, limit)
194	b.pb.uint64Opt(tagMapping_Offset, offset)
195	b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file))
196	b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID))
197	// TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs).
198	// Decide what to do about HasInlineFrames and HasLineNumbers.
199	// Also, another approach to handle the mapping entry with
200	// incomplete symbolization results is to duplicate the mapping
201	// entry (but with different Has* fields values) and use
202	// different entries for symbolized locations and unsymbolized locations.
203	if hasFuncs {
204		b.pb.bool(tagMapping_HasFunctions, true)
205	}
206	b.pb.endMessage(tag, start)
207}
208
209func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
210	// Expand this one address using CallersFrames so we can cache
211	// each expansion. In general, CallersFrames takes a whole
212	// stack, but in this case we know there will be no skips in
213	// the stack and we have return PCs anyway.
214	frames := runtime.CallersFrames([]uintptr{addr})
215	frame, more := frames.Next()
216	if frame.Function == "runtime.goexit" {
217		// Short-circuit if we see runtime.goexit so the loop
218		// below doesn't allocate a useless empty location.
219		return nil, 0
220	}
221
222	symbolizeResult := lookupTried
223	if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
224		symbolizeResult |= lookupFailed
225	}
226
227	if frame.PC == 0 {
228		// If we failed to resolve the frame, at least make up
229		// a reasonable call PC. This mostly happens in tests.
230		frame.PC = addr - 1
231	}
232	ret := []runtime.Frame{frame}
233	for frame.Function != "runtime.goexit" && more {
234		frame, more = frames.Next()
235		ret = append(ret, frame)
236	}
237	return ret, symbolizeResult
238}
239
240type locInfo struct {
241	// location id assigned by the profileBuilder
242	id uint64
243
244	// sequence of PCs, including the fake PCs returned by the traceback
245	// to represent inlined functions
246	// https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
247	pcs []uintptr
248
249	// firstPCFrames and firstPCSymbolizeResult hold the results of the
250	// allFrames call for the first (leaf-most) PC this locInfo represents
251	firstPCFrames          []runtime.Frame
252	firstPCSymbolizeResult symbolizeFlag
253}
254
255// newProfileBuilder returns a new profileBuilder.
256// CPU profiling data obtained from the runtime can be added
257// by calling b.addCPUData, and then the eventual profile
258// can be obtained by calling b.finish.
259func newProfileBuilder(w io.Writer) *profileBuilder {
260	zw, _ := gzip.NewWriterLevel(w, gzip.BestSpeed)
261	b := &profileBuilder{
262		w:         w,
263		zw:        zw,
264		start:     time.Now(),
265		strings:   []string{""},
266		stringMap: map[string]int{"": 0},
267		locs:      map[uintptr]locInfo{},
268		funcs:     map[string]int{},
269	}
270	b.readMapping()
271	return b
272}
273
274// addCPUData adds the CPU profiling data to the profile.
275//
276// The data must be a whole number of records, as delivered by the runtime.
277// len(tags) must be equal to the number of records in data.
278func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error {
279	if !b.havePeriod {
280		// first record is period
281		if len(data) < 3 {
282			return fmt.Errorf("truncated profile")
283		}
284		if data[0] != 3 || data[2] == 0 {
285			return fmt.Errorf("malformed profile")
286		}
287		// data[2] is sampling rate in Hz. Convert to sampling
288		// period in nanoseconds.
289		b.period = 1e9 / int64(data[2])
290		b.havePeriod = true
291		data = data[3:]
292		// Consume tag slot. Note that there isn't a meaningful tag
293		// value for this record.
294		tags = tags[1:]
295	}
296
297	// Parse CPU samples from the profile.
298	// Each sample is 3+n uint64s:
299	//	data[0] = 3+n
300	//	data[1] = time stamp (ignored)
301	//	data[2] = count
302	//	data[3:3+n] = stack
303	// If the count is 0 and the stack has length 1,
304	// that's an overflow record inserted by the runtime
305	// to indicate that stack[0] samples were lost.
306	// Otherwise the count is usually 1,
307	// but in a few special cases like lost non-Go samples
308	// there can be larger counts.
309	// Because many samples with the same stack arrive,
310	// we want to deduplicate immediately, which we do
311	// using the b.m profMap.
312	for len(data) > 0 {
313		if len(data) < 3 || data[0] > uint64(len(data)) {
314			return fmt.Errorf("truncated profile")
315		}
316		if data[0] < 3 || tags != nil && len(tags) < 1 {
317			return fmt.Errorf("malformed profile")
318		}
319		if len(tags) < 1 {
320			return fmt.Errorf("mismatched profile records and tags")
321		}
322		count := data[2]
323		stk := data[3:data[0]]
324		data = data[data[0]:]
325		tag := tags[0]
326		tags = tags[1:]
327
328		if count == 0 && len(stk) == 1 {
329			// overflow record
330			count = uint64(stk[0])
331			stk = []uint64{
332				// gentraceback guarantees that PCs in the
333				// stack can be unconditionally decremented and
334				// still be valid, so we must do the same.
335				uint64(abi.FuncPCABIInternal(lostProfileEvent) + 1),
336			}
337		}
338		b.m.lookup(stk, tag).count += int64(count)
339	}
340
341	if len(tags) != 0 {
342		return fmt.Errorf("mismatched profile records and tags")
343	}
344	return nil
345}
346
347// build completes and returns the constructed profile.
348func (b *profileBuilder) build() {
349	b.end = time.Now()
350
351	b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano())
352	if b.havePeriod { // must be CPU profile
353		b.pbValueType(tagProfile_SampleType, "samples", "count")
354		b.pbValueType(tagProfile_SampleType, "cpu", "nanoseconds")
355		b.pb.int64Opt(tagProfile_DurationNanos, b.end.Sub(b.start).Nanoseconds())
356		b.pbValueType(tagProfile_PeriodType, "cpu", "nanoseconds")
357		b.pb.int64Opt(tagProfile_Period, b.period)
358	}
359
360	values := []int64{0, 0}
361	var locs []uint64
362
363	for e := b.m.all; e != nil; e = e.nextAll {
364		values[0] = e.count
365		values[1] = e.count * b.period
366
367		var labels func()
368		if e.tag != nil {
369			labels = func() {
370				for k, v := range *(*labelMap)(e.tag) {
371					b.pbLabel(tagSample_Label, k, v, 0)
372				}
373			}
374		}
375
376		locs = b.appendLocsForStack(locs[:0], e.stk)
377
378		b.pbSample(values, locs, labels)
379	}
380
381	for i, m := range b.mem {
382		hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed
383		b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions)
384	}
385
386	// TODO: Anything for tagProfile_DropFrames?
387	// TODO: Anything for tagProfile_KeepFrames?
388
389	b.pb.strings(tagProfile_StringTable, b.strings)
390	b.zw.Write(b.pb.data)
391	b.zw.Close()
392}
393
394// appendLocsForStack appends the location IDs for the given stack trace to the given
395// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
396// an inline marker as the runtime traceback function returns.
397//
398// It may return an empty slice even if locs is non-empty, for example if locs consists
399// solely of runtime.goexit. We still count these empty stacks in profiles in order to
400// get the right cumulative sample count.
401//
402// It may emit to b.pb, so there must be no message encoding in progress.
403func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
404	b.deck.reset()
405
406	// The last frame might be truncated. Recover lost inline frames.
407	stk = runtime_expandFinalInlineFrame(stk)
408
409	for len(stk) > 0 {
410		addr := stk[0]
411		if l, ok := b.locs[addr]; ok {
412			// When generating code for an inlined function, the compiler adds
413			// NOP instructions to the outermost function as a placeholder for
414			// each layer of inlining. When the runtime generates tracebacks for
415			// stacks that include inlined functions, it uses the addresses of
416			// those NOPs as "fake" PCs on the stack as if they were regular
417			// function call sites. But if a profiling signal arrives while the
418			// CPU is executing one of those NOPs, its PC will show up as a leaf
419			// in the profile with its own Location entry. So, always check
420			// whether addr is a "fake" PC in the context of the current call
421			// stack by trying to add it to the inlining deck before assuming
422			// that the deck is complete.
423			if len(b.deck.pcs) > 0 {
424				if added := b.deck.tryAdd(addr, l.firstPCFrames, l.firstPCSymbolizeResult); added {
425					stk = stk[1:]
426					continue
427				}
428			}
429
430			// first record the location if there is any pending accumulated info.
431			if id := b.emitLocation(); id > 0 {
432				locs = append(locs, id)
433			}
434
435			// then, record the cached location.
436			locs = append(locs, l.id)
437
438			// Skip the matching pcs.
439			//
440			// Even if stk was truncated due to the stack depth
441			// limit, expandFinalInlineFrame above has already
442			// fixed the truncation, ensuring it is long enough.
443			stk = stk[len(l.pcs):]
444			continue
445		}
446
447		frames, symbolizeResult := allFrames(addr)
448		if len(frames) == 0 { // runtime.goexit.
449			if id := b.emitLocation(); id > 0 {
450				locs = append(locs, id)
451			}
452			stk = stk[1:]
453			continue
454		}
455
456		if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
457			stk = stk[1:]
458			continue
459		}
460		// add failed because this addr is not inlined with the
461		// existing PCs in the deck. Flush the deck and retry handling
462		// this pc.
463		if id := b.emitLocation(); id > 0 {
464			locs = append(locs, id)
465		}
466
467		// check cache again - previous emitLocation added a new entry
468		if l, ok := b.locs[addr]; ok {
469			locs = append(locs, l.id)
470			stk = stk[len(l.pcs):] // skip the matching pcs.
471		} else {
472			b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
473			stk = stk[1:]
474		}
475	}
476	if id := b.emitLocation(); id > 0 { // emit remaining location.
477		locs = append(locs, id)
478	}
479	return locs
480}
481
482// Here's an example of how Go 1.17 writes out inlined functions, compiled for
483// linux/amd64. The disassembly of main.main shows two levels of inlining: main
484// calls b, b calls a, a does some work.
485//
486//   inline.go:9   0x4553ec  90              NOPL                 // func main()    { b(v) }
487//   inline.go:6   0x4553ed  90              NOPL                 // func b(v *int) { a(v) }
488//   inline.go:5   0x4553ee  48c7002a000000  MOVQ $0x2a, 0(AX)    // func a(v *int) { *v = 42 }
489//
490// If a profiling signal arrives while executing the MOVQ at 0x4553ee (for line
491// 5), the runtime will report the stack as the MOVQ frame being called by the
492// NOPL at 0x4553ed (for line 6) being called by the NOPL at 0x4553ec (for line
493// 9).
494//
495// The role of pcDeck is to collapse those three frames back into a single
496// location at 0x4553ee, with file/line/function symbolization info representing
497// the three layers of calls. It does that via sequential calls to pcDeck.tryAdd
498// starting with the leaf-most address. The fourth call to pcDeck.tryAdd will be
499// for the caller of main.main. Because main.main was not inlined in its caller,
500// the deck will reject the addition, and the fourth PC on the stack will get
501// its own location.
502
503// pcDeck is a helper to detect a sequence of inlined functions from
504// a stack trace returned by the runtime.
505//
506// The stack traces returned by runtime's trackback functions are fully
507// expanded (at least for Go functions) and include the fake pcs representing
508// inlined functions. The profile proto expects the inlined functions to be
509// encoded in one Location message.
510// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
511//
512// Runtime does not directly expose whether a frame is for an inlined function
513// and looking up debug info is not ideal, so we use a heuristic to filter
514// the fake pcs and restore the inlined and entry functions. Inlined functions
515// have the following properties:
516//
517//	Frame's Func is nil (note: also true for non-Go functions), and
518//	Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
519//	Frame's Name does not match its entry function frame's name (note: inlined functions cannot be directly recursive).
520//
521// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
522// we use pcDeck to temporarily hold the observed pcs and their expanded frames
523// until we observe the entry function frame.
524type pcDeck struct {
525	pcs             []uintptr
526	frames          []runtime.Frame
527	symbolizeResult symbolizeFlag
528
529	// firstPCFrames indicates the number of frames associated with the first
530	// (leaf-most) PC in the deck
531	firstPCFrames int
532	// firstPCSymbolizeResult holds the results of the allFrames call for the
533	// first (leaf-most) PC in the deck
534	firstPCSymbolizeResult symbolizeFlag
535}
536
537func (d *pcDeck) reset() {
538	d.pcs = d.pcs[:0]
539	d.frames = d.frames[:0]
540	d.symbolizeResult = 0
541	d.firstPCFrames = 0
542	d.firstPCSymbolizeResult = 0
543}
544
545// tryAdd tries to add the pc and Frames expanded from it (most likely one,
546// since the stack trace is already fully expanded) and the symbolizeResult
547// to the deck. If it fails the caller needs to flush the deck and retry.
548func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
549	if existing := len(d.frames); existing > 0 {
550		// 'd.frames' are all expanded from one 'pc' and represent all
551		// inlined functions so we check only the last one.
552		newFrame := frames[0]
553		last := d.frames[existing-1]
554		if last.Func != nil { // the last frame can't be inlined. Flush.
555			return false
556		}
557		if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
558			return false
559		}
560
561		if last.Entry != newFrame.Entry { // newFrame is for a different function.
562			return false
563		}
564		if runtime_FrameSymbolName(&last) == runtime_FrameSymbolName(&newFrame) { // maybe recursion.
565			return false
566		}
567	}
568	d.pcs = append(d.pcs, pc)
569	d.frames = append(d.frames, frames...)
570	d.symbolizeResult |= symbolizeResult
571	if len(d.pcs) == 1 {
572		d.firstPCFrames = len(d.frames)
573		d.firstPCSymbolizeResult = symbolizeResult
574	}
575	return true
576}
577
578// emitLocation emits the new location and function information recorded in the deck
579// and returns the location ID encoded in the profile protobuf.
580// It emits to b.pb, so there must be no message encoding in progress.
581// It resets the deck.
582func (b *profileBuilder) emitLocation() uint64 {
583	if len(b.deck.pcs) == 0 {
584		return 0
585	}
586	defer b.deck.reset()
587
588	addr := b.deck.pcs[0]
589	firstFrame := b.deck.frames[0]
590
591	// We can't write out functions while in the middle of the
592	// Location message, so record new functions we encounter and
593	// write them out after the Location.
594	type newFunc struct {
595		id         uint64
596		name, file string
597		startLine  int64
598	}
599	newFuncs := make([]newFunc, 0, 8)
600
601	id := uint64(len(b.locs)) + 1
602	b.locs[addr] = locInfo{
603		id:                     id,
604		pcs:                    append([]uintptr{}, b.deck.pcs...),
605		firstPCSymbolizeResult: b.deck.firstPCSymbolizeResult,
606		firstPCFrames:          append([]runtime.Frame{}, b.deck.frames[:b.deck.firstPCFrames]...),
607	}
608
609	start := b.pb.startMessage()
610	b.pb.uint64Opt(tagLocation_ID, id)
611	b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
612	for _, frame := range b.deck.frames {
613		// Write out each line in frame expansion.
614		funcName := runtime_FrameSymbolName(&frame)
615		funcID := uint64(b.funcs[funcName])
616		if funcID == 0 {
617			funcID = uint64(len(b.funcs)) + 1
618			b.funcs[funcName] = int(funcID)
619			newFuncs = append(newFuncs, newFunc{
620				id:        funcID,
621				name:      funcName,
622				file:      frame.File,
623				startLine: int64(runtime_FrameStartLine(&frame)),
624			})
625		}
626		b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
627	}
628	for i := range b.mem {
629		if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
630			b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
631
632			m := b.mem[i]
633			m.funcs |= b.deck.symbolizeResult
634			b.mem[i] = m
635			break
636		}
637	}
638	b.pb.endMessage(tagProfile_Location, start)
639
640	// Write out functions we found during frame expansion.
641	for _, fn := range newFuncs {
642		start := b.pb.startMessage()
643		b.pb.uint64Opt(tagFunction_ID, fn.id)
644		b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
645		b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
646		b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
647		b.pb.int64Opt(tagFunction_StartLine, fn.startLine)
648		b.pb.endMessage(tagProfile_Function, start)
649	}
650
651	b.flush()
652	return id
653}
654
655var space = []byte(" ")
656var newline = []byte("\n")
657
658func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) {
659	// $ cat /proc/self/maps
660	// 00400000-0040b000 r-xp 00000000 fc:01 787766                             /bin/cat
661	// 0060a000-0060b000 r--p 0000a000 fc:01 787766                             /bin/cat
662	// 0060b000-0060c000 rw-p 0000b000 fc:01 787766                             /bin/cat
663	// 014ab000-014cc000 rw-p 00000000 00:00 0                                  [heap]
664	// 7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064                    /usr/lib/locale/locale-archive
665	// 7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
666	// 7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
667	// 7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
668	// 7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226                    /lib/x86_64-linux-gnu/libc-2.19.so
669	// 7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0
670	// 7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
671	// 7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0
672	// 7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0
673	// 7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
674	// 7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217                    /lib/x86_64-linux-gnu/ld-2.19.so
675	// 7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0
676	// 7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0                          [stack]
677	// 7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0                          [vdso]
678	// ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
679
680	var line []byte
681	// next removes and returns the next field in the line.
682	// It also removes from line any spaces following the field.
683	next := func() []byte {
684		var f []byte
685		f, line, _ = bytes.Cut(line, space)
686		line = bytes.TrimLeft(line, " ")
687		return f
688	}
689
690	for len(data) > 0 {
691		line, data, _ = bytes.Cut(data, newline)
692		addr := next()
693		loStr, hiStr, ok := strings.Cut(string(addr), "-")
694		if !ok {
695			continue
696		}
697		lo, err := strconv.ParseUint(loStr, 16, 64)
698		if err != nil {
699			continue
700		}
701		hi, err := strconv.ParseUint(hiStr, 16, 64)
702		if err != nil {
703			continue
704		}
705		perm := next()
706		if len(perm) < 4 || perm[2] != 'x' {
707			// Only interested in executable mappings.
708			continue
709		}
710		offset, err := strconv.ParseUint(string(next()), 16, 64)
711		if err != nil {
712			continue
713		}
714		next()          // dev
715		inode := next() // inode
716		if line == nil {
717			continue
718		}
719		file := string(line)
720
721		// Trim deleted file marker.
722		deletedStr := " (deleted)"
723		deletedLen := len(deletedStr)
724		if len(file) >= deletedLen && file[len(file)-deletedLen:] == deletedStr {
725			file = file[:len(file)-deletedLen]
726		}
727
728		if len(inode) == 1 && inode[0] == '0' && file == "" {
729			// Huge-page text mappings list the initial fragment of
730			// mapped but unpopulated memory as being inode 0.
731			// Don't report that part.
732			// But [vdso] and [vsyscall] are inode 0, so let non-empty file names through.
733			continue
734		}
735
736		// TODO: pprof's remapMappingIDs makes one adjustment:
737		// 1. If there is an /anon_hugepage mapping first and it is
738		// consecutive to a next mapping, drop the /anon_hugepage.
739		// There's no indication why this is needed.
740		// Let's try not doing this and see what breaks.
741		// If we do need it, it would go here, before we
742		// enter the mappings into b.mem in the first place.
743
744		buildID, _ := elfBuildID(file)
745		addMapping(lo, hi, offset, file, buildID)
746	}
747}
748
749func (b *profileBuilder) addMapping(lo, hi, offset uint64, file, buildID string) {
750	b.addMappingEntry(lo, hi, offset, file, buildID, false)
751}
752
753func (b *profileBuilder) addMappingEntry(lo, hi, offset uint64, file, buildID string, fake bool) {
754	b.mem = append(b.mem, memMap{
755		start:   uintptr(lo),
756		end:     uintptr(hi),
757		offset:  offset,
758		file:    file,
759		buildID: buildID,
760		fake:    fake,
761	})
762}
763