1// Copyright 2024 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package oldtrace implements a parser for Go execution traces from versions
6// 1.11–1.21.
7//
8// The package started as a copy of Go 1.19's internal/trace, but has been
9// optimized to be faster while using less memory and fewer allocations. It has
10// been further modified for the specific purpose of converting traces to the
11// new 1.22+ format.
12package oldtrace
13
14import (
15	"bytes"
16	"cmp"
17	"encoding/binary"
18	"errors"
19	"fmt"
20	"internal/trace/event"
21	"internal/trace/version"
22	"io"
23	"math"
24	"slices"
25	"sort"
26)
27
28// Timestamp represents a count of nanoseconds since the beginning of the trace.
29// They can only be meaningfully compared with other timestamps from the same
30// trace.
31type Timestamp int64
32
33// Event describes one event in the trace.
34type Event struct {
35	// The Event type is carefully laid out to optimize its size and to avoid
36	// pointers, the latter so that the garbage collector won't have to scan any
37	// memory of our millions of events.
38
39	Ts    Timestamp  // timestamp in nanoseconds
40	G     uint64     // G on which the event happened
41	Args  [4]uint64  // event-type-specific arguments
42	StkID uint32     // unique stack ID
43	P     int32      // P on which the event happened (can be a real P or one of TimerP, NetpollP, SyscallP)
44	Type  event.Type // one of Ev*
45}
46
47// Frame is a frame in stack traces.
48type Frame struct {
49	PC uint64
50	// string ID of the function name
51	Fn uint64
52	// string ID of the file name
53	File uint64
54	Line int
55}
56
57const (
58	// Special P identifiers:
59	FakeP    = 1000000 + iota
60	TimerP   // contains timer unblocks
61	NetpollP // contains network unblocks
62	SyscallP // contains returns from syscalls
63	GCP      // contains GC state
64	ProfileP // contains recording of CPU profile samples
65)
66
67// Trace is the result of Parse.
68type Trace struct {
69	Version version.Version
70
71	// Events is the sorted list of Events in the trace.
72	Events Events
73	// Stacks is the stack traces (stored as slices of PCs), keyed by stack IDs
74	// from the trace.
75	Stacks        map[uint32][]uint64
76	PCs           map[uint64]Frame
77	Strings       map[uint64]string
78	InlineStrings []string
79}
80
81// batchOffset records the byte offset of, and number of events in, a batch. A
82// batch is a sequence of events emitted by a P. Events within a single batch
83// are sorted by time.
84type batchOffset struct {
85	offset    int
86	numEvents int
87}
88
89type parser struct {
90	ver  version.Version
91	data []byte
92	off  int
93
94	strings              map[uint64]string
95	inlineStrings        []string
96	inlineStringsMapping map[string]int
97	// map from Ps to their batch offsets
98	batchOffsets map[int32][]batchOffset
99	stacks       map[uint32][]uint64
100	stacksData   []uint64
101	ticksPerSec  int64
102	pcs          map[uint64]Frame
103	cpuSamples   []Event
104	timerGoids   map[uint64]bool
105
106	// state for readRawEvent
107	args []uint64
108
109	// state for parseEvent
110	lastTs Timestamp
111	lastG  uint64
112	// map from Ps to the last Gs that ran on them
113	lastGs map[int32]uint64
114	lastP  int32
115}
116
117func (p *parser) discard(n uint64) bool {
118	if n > math.MaxInt {
119		return false
120	}
121	if noff := p.off + int(n); noff < p.off || noff > len(p.data) {
122		return false
123	} else {
124		p.off = noff
125	}
126	return true
127}
128
129func newParser(r io.Reader, ver version.Version) (*parser, error) {
130	var buf []byte
131	if seeker, ok := r.(io.Seeker); ok {
132		// Determine the size of the reader so that we can allocate a buffer
133		// without having to grow it later.
134		cur, err := seeker.Seek(0, io.SeekCurrent)
135		if err != nil {
136			return nil, err
137		}
138		end, err := seeker.Seek(0, io.SeekEnd)
139		if err != nil {
140			return nil, err
141		}
142		_, err = seeker.Seek(cur, io.SeekStart)
143		if err != nil {
144			return nil, err
145		}
146
147		buf = make([]byte, end-cur)
148		_, err = io.ReadFull(r, buf)
149		if err != nil {
150			return nil, err
151		}
152	} else {
153		var err error
154		buf, err = io.ReadAll(r)
155		if err != nil {
156			return nil, err
157		}
158	}
159	return &parser{data: buf, ver: ver, timerGoids: make(map[uint64]bool)}, nil
160}
161
162// Parse parses Go execution traces from versions 1.11–1.21. The provided reader
163// will be read to completion and the entire trace will be materialized in
164// memory. That is, this function does not allow incremental parsing.
165//
166// The reader has to be positioned just after the trace header and vers needs to
167// be the version of the trace. This can be achieved by using
168// version.ReadHeader.
169func Parse(r io.Reader, vers version.Version) (Trace, error) {
170	// We accept the version as an argument because internal/trace will have
171	// already read the version to determine which parser to use.
172	p, err := newParser(r, vers)
173	if err != nil {
174		return Trace{}, err
175	}
176	return p.parse()
177}
178
179// parse parses, post-processes and verifies the trace.
180func (p *parser) parse() (Trace, error) {
181	defer func() {
182		p.data = nil
183	}()
184
185	// We parse a trace by running the following steps in order:
186	//
187	// 1. In the initial pass we collect information about batches (their
188	//    locations and sizes.) We also parse CPU profiling samples in this
189	//    step, simply to reduce the number of full passes that we need.
190	//
191	// 2. In the second pass we parse batches and merge them into a globally
192	//    ordered event stream. This uses the batch information from the first
193	//    pass to quickly find batches.
194	//
195	// 3. After all events have been parsed we convert their timestamps from CPU
196	//    ticks to wall time. Furthermore we move timers and syscalls to
197	//    dedicated, fake Ps.
198	//
199	// 4. Finally, we validate the trace.
200
201	p.strings = make(map[uint64]string)
202	p.batchOffsets = make(map[int32][]batchOffset)
203	p.lastGs = make(map[int32]uint64)
204	p.stacks = make(map[uint32][]uint64)
205	p.pcs = make(map[uint64]Frame)
206	p.inlineStringsMapping = make(map[string]int)
207
208	if err := p.collectBatchesAndCPUSamples(); err != nil {
209		return Trace{}, err
210	}
211
212	events, err := p.parseEventBatches()
213	if err != nil {
214		return Trace{}, err
215	}
216
217	if p.ticksPerSec == 0 {
218		return Trace{}, errors.New("no EvFrequency event")
219	}
220
221	if events.Len() > 0 {
222		// Translate cpu ticks to real time.
223		minTs := events.Ptr(0).Ts
224		// Use floating point to avoid integer overflows.
225		freq := 1e9 / float64(p.ticksPerSec)
226		for i := 0; i < events.Len(); i++ {
227			ev := events.Ptr(i)
228			ev.Ts = Timestamp(float64(ev.Ts-minTs) * freq)
229			// Move timers and syscalls to separate fake Ps.
230			if p.timerGoids[ev.G] && ev.Type == EvGoUnblock {
231				ev.P = TimerP
232			}
233			if ev.Type == EvGoSysExit {
234				ev.P = SyscallP
235			}
236		}
237	}
238
239	if err := p.postProcessTrace(events); err != nil {
240		return Trace{}, err
241	}
242
243	res := Trace{
244		Version:       p.ver,
245		Events:        events,
246		Stacks:        p.stacks,
247		Strings:       p.strings,
248		InlineStrings: p.inlineStrings,
249		PCs:           p.pcs,
250	}
251	return res, nil
252}
253
254// rawEvent is a helper type used during parsing.
255type rawEvent struct {
256	typ   event.Type
257	args  []uint64
258	sargs []string
259
260	// if typ == EvBatch, these fields describe the batch.
261	batchPid    int32
262	batchOffset int
263}
264
265type proc struct {
266	pid int32
267	// the remaining events in the current batch
268	events []Event
269	// buffer for reading batches into, aliased by proc.events
270	buf []Event
271
272	// there are no more batches left
273	done bool
274}
275
276const eventsBucketSize = 524288 // 32 MiB of events
277
278type Events struct {
279	// Events is a slice of slices that grows one slice of size eventsBucketSize
280	// at a time. This avoids the O(n) cost of slice growth in append, and
281	// additionally allows consumers to drop references to parts of the data,
282	// freeing memory piecewise.
283	n       int
284	buckets []*[eventsBucketSize]Event
285	off     int
286}
287
288// grow grows the slice by one and returns a pointer to the new element, without
289// overwriting it.
290func (l *Events) grow() *Event {
291	a, b := l.index(l.n)
292	if a >= len(l.buckets) {
293		l.buckets = append(l.buckets, new([eventsBucketSize]Event))
294	}
295	ptr := &l.buckets[a][b]
296	l.n++
297	return ptr
298}
299
300// append appends v to the slice and returns a pointer to the new element.
301func (l *Events) append(v Event) *Event {
302	ptr := l.grow()
303	*ptr = v
304	return ptr
305}
306
307func (l *Events) Ptr(i int) *Event {
308	a, b := l.index(i + l.off)
309	return &l.buckets[a][b]
310}
311
312func (l *Events) index(i int) (int, int) {
313	// Doing the division on uint instead of int compiles this function to a
314	// shift and an AND (for power of 2 bucket sizes), versus a whole bunch of
315	// instructions for int.
316	return int(uint(i) / eventsBucketSize), int(uint(i) % eventsBucketSize)
317}
318
319func (l *Events) Len() int {
320	return l.n - l.off
321}
322
323func (l *Events) Less(i, j int) bool {
324	return l.Ptr(i).Ts < l.Ptr(j).Ts
325}
326
327func (l *Events) Swap(i, j int) {
328	*l.Ptr(i), *l.Ptr(j) = *l.Ptr(j), *l.Ptr(i)
329}
330
331func (l *Events) Pop() (*Event, bool) {
332	if l.off == l.n {
333		return nil, false
334	}
335	a, b := l.index(l.off)
336	ptr := &l.buckets[a][b]
337	l.off++
338	if b == eventsBucketSize-1 || l.off == l.n {
339		// We've consumed the last event from the bucket, so drop the bucket and
340		// allow GC to collect it.
341		l.buckets[a] = nil
342	}
343	return ptr, true
344}
345
346func (l *Events) All() func(yield func(ev *Event) bool) {
347	return func(yield func(ev *Event) bool) {
348		for i := 0; i < l.Len(); i++ {
349			a, b := l.index(i + l.off)
350			ptr := &l.buckets[a][b]
351			if !yield(ptr) {
352				return
353			}
354		}
355	}
356}
357
358// parseEventBatches reads per-P event batches and merges them into a single, consistent
359// stream. The high level idea is as follows. Events within an individual batch
360// are in correct order, because they are emitted by a single P. So we need to
361// produce a correct interleaving of the batches. To do this we take first
362// unmerged event from each batch (frontier). Then choose subset that is "ready"
363// to be merged, that is, events for which all dependencies are already merged.
364// Then we choose event with the lowest timestamp from the subset, merge it and
365// repeat. This approach ensures that we form a consistent stream even if
366// timestamps are incorrect (condition observed on some machines).
367func (p *parser) parseEventBatches() (Events, error) {
368	// The ordering of CPU profile sample events in the data stream is based on
369	// when each run of the signal handler was able to acquire the spinlock,
370	// with original timestamps corresponding to when ReadTrace pulled the data
371	// off of the profBuf queue. Re-sort them by the timestamp we captured
372	// inside the signal handler.
373	slices.SortFunc(p.cpuSamples, func(a, b Event) int {
374		return cmp.Compare(a.Ts, b.Ts)
375	})
376
377	allProcs := make([]proc, 0, len(p.batchOffsets))
378	for pid := range p.batchOffsets {
379		allProcs = append(allProcs, proc{pid: pid})
380	}
381	allProcs = append(allProcs, proc{pid: ProfileP, events: p.cpuSamples})
382
383	events := Events{}
384
385	// Merge events as long as at least one P has more events
386	gs := make(map[uint64]gState)
387	// Note: technically we don't need a priority queue here. We're only ever
388	// interested in the earliest elligible event, which means we just have to
389	// track the smallest element. However, in practice, the priority queue
390	// performs better, because for each event we only have to compute its state
391	// transition once, not on each iteration. If it was elligible before, it'll
392	// already be in the queue. Furthermore, on average, we only have one P to
393	// look at in each iteration, because all other Ps are already in the queue.
394	var frontier orderEventList
395
396	availableProcs := make([]*proc, len(allProcs))
397	for i := range allProcs {
398		availableProcs[i] = &allProcs[i]
399	}
400	for {
401	pidLoop:
402		for i := 0; i < len(availableProcs); i++ {
403			proc := availableProcs[i]
404
405			for len(proc.events) == 0 {
406				// Call loadBatch in a loop because sometimes batches are empty
407				evs, err := p.loadBatch(proc.pid, proc.buf[:0])
408				proc.buf = evs[:0]
409				if err == io.EOF {
410					// This P has no more events
411					proc.done = true
412					availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i]
413					availableProcs = availableProcs[:len(availableProcs)-1]
414					// We swapped the element at i with another proc, so look at
415					// the index again
416					i--
417					continue pidLoop
418				} else if err != nil {
419					return Events{}, err
420				} else {
421					proc.events = evs
422				}
423			}
424
425			ev := &proc.events[0]
426			g, init, _ := stateTransition(ev)
427
428			// TODO(dh): This implementation matches the behavior of the
429			// upstream 'go tool trace', and works in practice, but has run into
430			// the following inconsistency during fuzzing: what happens if
431			// multiple Ps have events for the same G? While building the
432			// frontier we will check all of the events against the current
433			// state of the G. However, when we process the frontier, the state
434			// of the G changes, and a transition that was valid while building
435			// the frontier may no longer be valid when processing the frontier.
436			// Is this something that can happen for real, valid traces, or is
437			// this only possible with corrupt data?
438			if !transitionReady(g, gs[g], init) {
439				continue
440			}
441			proc.events = proc.events[1:]
442			availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i]
443			availableProcs = availableProcs[:len(availableProcs)-1]
444			frontier.Push(orderEvent{*ev, proc})
445
446			// We swapped the element at i with another proc, so look at the
447			// index again
448			i--
449		}
450
451		if len(frontier) == 0 {
452			for i := range allProcs {
453				if !allProcs[i].done {
454					return Events{}, fmt.Errorf("no consistent ordering of events possible")
455				}
456			}
457			break
458		}
459		f := frontier.Pop()
460
461		// We're computing the state transition twice, once when computing the
462		// frontier, and now to apply the transition. This is fine because
463		// stateTransition is a pure function. Computing it again is cheaper
464		// than storing large items in the frontier.
465		g, init, next := stateTransition(&f.ev)
466
467		// Get rid of "Local" events, they are intended merely for ordering.
468		switch f.ev.Type {
469		case EvGoStartLocal:
470			f.ev.Type = EvGoStart
471		case EvGoUnblockLocal:
472			f.ev.Type = EvGoUnblock
473		case EvGoSysExitLocal:
474			f.ev.Type = EvGoSysExit
475		}
476		events.append(f.ev)
477
478		if err := transition(gs, g, init, next); err != nil {
479			return Events{}, err
480		}
481		availableProcs = append(availableProcs, f.proc)
482	}
483
484	// At this point we have a consistent stream of events. Make sure time
485	// stamps respect the ordering. The tests will skip (not fail) the test case
486	// if they see this error.
487	if !sort.IsSorted(&events) {
488		return Events{}, ErrTimeOrder
489	}
490
491	// The last part is giving correct timestamps to EvGoSysExit events. The
492	// problem with EvGoSysExit is that actual syscall exit timestamp
493	// (ev.Args[2]) is potentially acquired long before event emission. So far
494	// we've used timestamp of event emission (ev.Ts). We could not set ev.Ts =
495	// ev.Args[2] earlier, because it would produce seemingly broken timestamps
496	// (misplaced event). We also can't simply update the timestamp and resort
497	// events, because if timestamps are broken we will misplace the event and
498	// later report logically broken trace (instead of reporting broken
499	// timestamps).
500	lastSysBlock := make(map[uint64]Timestamp)
501	for i := 0; i < events.Len(); i++ {
502		ev := events.Ptr(i)
503		switch ev.Type {
504		case EvGoSysBlock, EvGoInSyscall:
505			lastSysBlock[ev.G] = ev.Ts
506		case EvGoSysExit:
507			ts := Timestamp(ev.Args[2])
508			if ts == 0 {
509				continue
510			}
511			block := lastSysBlock[ev.G]
512			if block == 0 {
513				return Events{}, fmt.Errorf("stray syscall exit")
514			}
515			if ts < block {
516				return Events{}, ErrTimeOrder
517			}
518			ev.Ts = ts
519		}
520	}
521	sort.Stable(&events)
522
523	return events, nil
524}
525
526// collectBatchesAndCPUSamples records the offsets of batches and parses CPU samples.
527func (p *parser) collectBatchesAndCPUSamples() error {
528	// Read events.
529	var raw rawEvent
530	var curP int32
531	for n := uint64(0); ; n++ {
532		err := p.readRawEvent(skipArgs|skipStrings, &raw)
533		if err == io.EOF {
534			break
535		}
536		if err != nil {
537			return err
538		}
539		if raw.typ == EvNone {
540			continue
541		}
542
543		if raw.typ == EvBatch {
544			bo := batchOffset{offset: raw.batchOffset}
545			p.batchOffsets[raw.batchPid] = append(p.batchOffsets[raw.batchPid], bo)
546			curP = raw.batchPid
547		}
548
549		batches := p.batchOffsets[curP]
550		if len(batches) == 0 {
551			return fmt.Errorf("read event %d with current P of %d, but P has no batches yet",
552				raw.typ, curP)
553		}
554		batches[len(batches)-1].numEvents++
555
556		if raw.typ == EvCPUSample {
557			e := Event{Type: raw.typ}
558
559			argOffset := 1
560			narg := raw.argNum()
561			if len(raw.args) != narg {
562				return fmt.Errorf("CPU sample has wrong number of arguments: want %d, got %d", narg, len(raw.args))
563			}
564			for i := argOffset; i < narg; i++ {
565				if i == narg-1 {
566					e.StkID = uint32(raw.args[i])
567				} else {
568					e.Args[i-argOffset] = raw.args[i]
569				}
570			}
571
572			e.Ts = Timestamp(e.Args[0])
573			e.P = int32(e.Args[1])
574			e.G = e.Args[2]
575			e.Args[0] = 0
576
577			// Most events are written out by the active P at the exact moment
578			// they describe. CPU profile samples are different because they're
579			// written to the tracing log after some delay, by a separate worker
580			// goroutine, into a separate buffer.
581			//
582			// We keep these in their own batch until all of the batches are
583			// merged in timestamp order. We also (right before the merge)
584			// re-sort these events by the timestamp captured in the profiling
585			// signal handler.
586			//
587			// Note that we're not concerned about the memory usage of storing
588			// all CPU samples during the indexing phase. There are orders of
589			// magnitude fewer CPU samples than runtime events.
590			p.cpuSamples = append(p.cpuSamples, e)
591		}
592	}
593
594	return nil
595}
596
597const (
598	skipArgs = 1 << iota
599	skipStrings
600)
601
602func (p *parser) readByte() (byte, bool) {
603	if p.off < len(p.data) && p.off >= 0 {
604		b := p.data[p.off]
605		p.off++
606		return b, true
607	} else {
608		return 0, false
609	}
610}
611
612func (p *parser) readFull(n int) ([]byte, error) {
613	if p.off >= len(p.data) || p.off < 0 || p.off+n > len(p.data) {
614		// p.off < 0 is impossible but makes BCE happy.
615		//
616		// We do fail outright if there's not enough data, we don't care about
617		// partial results.
618		return nil, io.ErrUnexpectedEOF
619	}
620	buf := p.data[p.off : p.off+n]
621	p.off += n
622	return buf, nil
623}
624
625// readRawEvent reads a raw event into ev. The slices in ev are only valid until
626// the next call to readRawEvent, even when storing to a different location.
627func (p *parser) readRawEvent(flags uint, ev *rawEvent) error {
628	// The number of arguments is encoded using two bits and can thus only
629	// represent the values 0–3. The value 3 (on the wire) indicates that
630	// arguments are prefixed by their byte length, to encode >=3 arguments.
631	const inlineArgs = 3
632
633	// Read event type and number of arguments (1 byte).
634	b, ok := p.readByte()
635	if !ok {
636		return io.EOF
637	}
638	typ := event.Type(b << 2 >> 2)
639	// Most events have a timestamp before the actual arguments, so we add 1 and
640	// parse it like it's the first argument. EvString has a special format and
641	// the number of arguments doesn't matter. EvBatch writes '1' as the number
642	// of arguments, but actually has two: a pid and a timestamp, but here the
643	// timestamp is the second argument, not the first; adding 1 happens to come
644	// up with the correct number, but it doesn't matter, because EvBatch has
645	// custom logic for parsing.
646	//
647	// Note that because we're adding 1, inlineArgs == 3 describes the largest
648	// number of logical arguments that isn't length-prefixed, even though the
649	// value 3 on the wire indicates length-prefixing. For us, that becomes narg
650	// == 4.
651	narg := b>>6 + 1
652	if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > p.ver {
653		return fmt.Errorf("unknown event type %d", typ)
654	}
655
656	switch typ {
657	case EvString:
658		if flags&skipStrings != 0 {
659			// String dictionary entry [ID, length, string].
660			if _, err := p.readVal(); err != nil {
661				return errMalformedVarint
662			}
663			ln, err := p.readVal()
664			if err != nil {
665				return err
666			}
667			if !p.discard(ln) {
668				return fmt.Errorf("failed to read trace: %w", io.EOF)
669			}
670		} else {
671			// String dictionary entry [ID, length, string].
672			id, err := p.readVal()
673			if err != nil {
674				return err
675			}
676			if id == 0 {
677				return errors.New("string has invalid id 0")
678			}
679			if p.strings[id] != "" {
680				return fmt.Errorf("string has duplicate id %d", id)
681			}
682			var ln uint64
683			ln, err = p.readVal()
684			if err != nil {
685				return err
686			}
687			if ln == 0 {
688				return errors.New("string has invalid length 0")
689			}
690			if ln > 1e6 {
691				return fmt.Errorf("string has too large length %d", ln)
692			}
693			buf, err := p.readFull(int(ln))
694			if err != nil {
695				return fmt.Errorf("failed to read trace: %w", err)
696			}
697			p.strings[id] = string(buf)
698		}
699
700		ev.typ = EvNone
701		return nil
702	case EvBatch:
703		if want := byte(2); narg != want {
704			return fmt.Errorf("EvBatch has wrong number of arguments: got %d, want %d", narg, want)
705		}
706
707		// -1 because we've already read the first byte of the batch
708		off := p.off - 1
709
710		pid, err := p.readVal()
711		if err != nil {
712			return err
713		}
714		if pid != math.MaxUint64 && pid > math.MaxInt32 {
715			return fmt.Errorf("processor ID %d is larger than maximum of %d", pid, uint64(math.MaxUint))
716		}
717
718		var pid32 int32
719		if pid == math.MaxUint64 {
720			pid32 = -1
721		} else {
722			pid32 = int32(pid)
723		}
724
725		v, err := p.readVal()
726		if err != nil {
727			return err
728		}
729
730		*ev = rawEvent{
731			typ:         EvBatch,
732			args:        p.args[:0],
733			batchPid:    pid32,
734			batchOffset: off,
735		}
736		ev.args = append(ev.args, pid, v)
737		return nil
738	default:
739		*ev = rawEvent{typ: typ, args: p.args[:0]}
740		if narg <= inlineArgs {
741			if flags&skipArgs == 0 {
742				for i := 0; i < int(narg); i++ {
743					v, err := p.readVal()
744					if err != nil {
745						return fmt.Errorf("failed to read event %d argument: %w", typ, err)
746					}
747					ev.args = append(ev.args, v)
748				}
749			} else {
750				for i := 0; i < int(narg); i++ {
751					if _, err := p.readVal(); err != nil {
752						return fmt.Errorf("failed to read event %d argument: %w", typ, errMalformedVarint)
753					}
754				}
755			}
756		} else {
757			// More than inlineArgs args, the first value is length of the event
758			// in bytes.
759			v, err := p.readVal()
760			if err != nil {
761				return fmt.Errorf("failed to read event %d argument: %w", typ, err)
762			}
763
764			if limit := uint64(2048); v > limit {
765				// At the time of Go 1.19, v seems to be at most 128. Set 2048
766				// as a generous upper limit and guard against malformed traces.
767				return fmt.Errorf("failed to read event %d argument: length-prefixed argument too big: %d bytes, limit is %d", typ, v, limit)
768			}
769
770			if flags&skipArgs == 0 || typ == EvCPUSample {
771				buf, err := p.readFull(int(v))
772				if err != nil {
773					return fmt.Errorf("failed to read trace: %w", err)
774				}
775				for len(buf) > 0 {
776					var v uint64
777					v, buf, err = readValFrom(buf)
778					if err != nil {
779						return err
780					}
781					ev.args = append(ev.args, v)
782				}
783			} else {
784				// Skip over arguments
785				if !p.discard(v) {
786					return fmt.Errorf("failed to read trace: %w", io.EOF)
787				}
788			}
789			if typ == EvUserLog {
790				// EvUserLog records are followed by a value string
791				if flags&skipArgs == 0 {
792					// Read string
793					s, err := p.readStr()
794					if err != nil {
795						return err
796					}
797					ev.sargs = append(ev.sargs, s)
798				} else {
799					// Skip string
800					v, err := p.readVal()
801					if err != nil {
802						return err
803					}
804					if !p.discard(v) {
805						return io.EOF
806					}
807				}
808			}
809		}
810
811		p.args = ev.args[:0]
812		return nil
813	}
814}
815
816// loadBatch loads the next batch for pid and appends its contents to to events.
817func (p *parser) loadBatch(pid int32, events []Event) ([]Event, error) {
818	offsets := p.batchOffsets[pid]
819	if len(offsets) == 0 {
820		return nil, io.EOF
821	}
822	n := offsets[0].numEvents
823	offset := offsets[0].offset
824	offsets = offsets[1:]
825	p.batchOffsets[pid] = offsets
826
827	p.off = offset
828
829	if cap(events) < n {
830		events = make([]Event, 0, n)
831	}
832
833	gotHeader := false
834	var raw rawEvent
835	var ev Event
836	for {
837		err := p.readRawEvent(0, &raw)
838		if err == io.EOF {
839			break
840		}
841		if err != nil {
842			return nil, err
843		}
844		if raw.typ == EvNone || raw.typ == EvCPUSample {
845			continue
846		}
847		if raw.typ == EvBatch {
848			if gotHeader {
849				break
850			} else {
851				gotHeader = true
852			}
853		}
854
855		err = p.parseEvent(&raw, &ev)
856		if err != nil {
857			return nil, err
858		}
859		if ev.Type != EvNone {
860			events = append(events, ev)
861		}
862	}
863
864	return events, nil
865}
866
867func (p *parser) readStr() (s string, err error) {
868	sz, err := p.readVal()
869	if err != nil {
870		return "", err
871	}
872	if sz == 0 {
873		return "", nil
874	}
875	if sz > 1e6 {
876		return "", fmt.Errorf("string is too large (len=%d)", sz)
877	}
878	buf, err := p.readFull(int(sz))
879	if err != nil {
880		return "", fmt.Errorf("failed to read trace: %w", err)
881	}
882	return string(buf), nil
883}
884
885// parseEvent transforms raw events into events.
886// It does analyze and verify per-event-type arguments.
887func (p *parser) parseEvent(raw *rawEvent, ev *Event) error {
888	desc := &EventDescriptions[raw.typ]
889	if desc.Name == "" {
890		return fmt.Errorf("missing description for event type %d", raw.typ)
891	}
892	narg := raw.argNum()
893	if len(raw.args) != narg {
894		return fmt.Errorf("%s has wrong number of arguments: want %d, got %d", desc.Name, narg, len(raw.args))
895	}
896	switch raw.typ {
897	case EvBatch:
898		p.lastGs[p.lastP] = p.lastG
899		if raw.args[0] != math.MaxUint64 && raw.args[0] > math.MaxInt32 {
900			return fmt.Errorf("processor ID %d is larger than maximum of %d", raw.args[0], uint64(math.MaxInt32))
901		}
902		if raw.args[0] == math.MaxUint64 {
903			p.lastP = -1
904		} else {
905			p.lastP = int32(raw.args[0])
906		}
907		p.lastG = p.lastGs[p.lastP]
908		p.lastTs = Timestamp(raw.args[1])
909	case EvFrequency:
910		p.ticksPerSec = int64(raw.args[0])
911		if p.ticksPerSec <= 0 {
912			// The most likely cause for this is tick skew on different CPUs.
913			// For example, solaris/amd64 seems to have wildly different
914			// ticks on different CPUs.
915			return ErrTimeOrder
916		}
917	case EvTimerGoroutine:
918		p.timerGoids[raw.args[0]] = true
919	case EvStack:
920		if len(raw.args) < 2 {
921			return fmt.Errorf("EvStack has wrong number of arguments: want at least 2, got %d", len(raw.args))
922		}
923		size := raw.args[1]
924		if size > 1000 {
925			return fmt.Errorf("EvStack has bad number of frames: %d", size)
926		}
927		want := 2 + 4*size
928		if uint64(len(raw.args)) != want {
929			return fmt.Errorf("EvStack has wrong number of arguments: want %d, got %d", want, len(raw.args))
930		}
931		id := uint32(raw.args[0])
932		if id != 0 && size > 0 {
933			stk := p.allocateStack(size)
934			for i := 0; i < int(size); i++ {
935				pc := raw.args[2+i*4+0]
936				fn := raw.args[2+i*4+1]
937				file := raw.args[2+i*4+2]
938				line := raw.args[2+i*4+3]
939				stk[i] = pc
940
941				if _, ok := p.pcs[pc]; !ok {
942					p.pcs[pc] = Frame{PC: pc, Fn: fn, File: file, Line: int(line)}
943				}
944			}
945			p.stacks[id] = stk
946		}
947	case EvCPUSample:
948		// These events get parsed during the indexing step and don't strictly
949		// belong to the batch.
950	default:
951		*ev = Event{Type: raw.typ, P: p.lastP, G: p.lastG}
952		var argOffset int
953		ev.Ts = p.lastTs + Timestamp(raw.args[0])
954		argOffset = 1
955		p.lastTs = ev.Ts
956		for i := argOffset; i < narg; i++ {
957			if i == narg-1 && desc.Stack {
958				ev.StkID = uint32(raw.args[i])
959			} else {
960				ev.Args[i-argOffset] = raw.args[i]
961			}
962		}
963		switch raw.typ {
964		case EvGoStart, EvGoStartLocal, EvGoStartLabel:
965			p.lastG = ev.Args[0]
966			ev.G = p.lastG
967		case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt,
968			EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
969			EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
970			EvGoSysBlock, EvGoBlockGC:
971			p.lastG = 0
972		case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
973			ev.G = ev.Args[0]
974		case EvUserTaskCreate:
975			// e.Args 0: taskID, 1:parentID, 2:nameID
976		case EvUserRegion:
977			// e.Args 0: taskID, 1: mode, 2:nameID
978		case EvUserLog:
979			// e.Args 0: taskID, 1:keyID, 2: stackID, 3: messageID
980			// raw.sargs 0: message
981
982			if id, ok := p.inlineStringsMapping[raw.sargs[0]]; ok {
983				ev.Args[3] = uint64(id)
984			} else {
985				id := len(p.inlineStrings)
986				p.inlineStringsMapping[raw.sargs[0]] = id
987				p.inlineStrings = append(p.inlineStrings, raw.sargs[0])
988				ev.Args[3] = uint64(id)
989			}
990		}
991
992		return nil
993	}
994
995	ev.Type = EvNone
996	return nil
997}
998
999// ErrTimeOrder is returned by Parse when the trace contains
1000// time stamps that do not respect actual event ordering.
1001var ErrTimeOrder = errors.New("time stamps out of order")
1002
1003// postProcessTrace does inter-event verification and information restoration.
1004// The resulting trace is guaranteed to be consistent
1005// (for example, a P does not run two Gs at the same time, or a G is indeed
1006// blocked before an unblock event).
1007func (p *parser) postProcessTrace(events Events) error {
1008	const (
1009		gDead = iota
1010		gRunnable
1011		gRunning
1012		gWaiting
1013	)
1014	type gdesc struct {
1015		state        int
1016		ev           *Event
1017		evStart      *Event
1018		evCreate     *Event
1019		evMarkAssist *Event
1020	}
1021	type pdesc struct {
1022		running bool
1023		g       uint64
1024		evSweep *Event
1025	}
1026
1027	gs := make(map[uint64]gdesc)
1028	ps := make(map[int32]pdesc)
1029	tasks := make(map[uint64]*Event)           // task id to task creation events
1030	activeRegions := make(map[uint64][]*Event) // goroutine id to stack of regions
1031	gs[0] = gdesc{state: gRunning}
1032	var evGC, evSTW *Event
1033
1034	checkRunning := func(p pdesc, g gdesc, ev *Event, allowG0 bool) error {
1035		name := EventDescriptions[ev.Type].Name
1036		if g.state != gRunning {
1037			return fmt.Errorf("g %d is not running while %s (time %d)", ev.G, name, ev.Ts)
1038		}
1039		if p.g != ev.G {
1040			return fmt.Errorf("p %d is not running g %d while %s (time %d)", ev.P, ev.G, name, ev.Ts)
1041		}
1042		if !allowG0 && ev.G == 0 {
1043			return fmt.Errorf("g 0 did %s (time %d)", name, ev.Ts)
1044		}
1045		return nil
1046	}
1047
1048	for evIdx := 0; evIdx < events.Len(); evIdx++ {
1049		ev := events.Ptr(evIdx)
1050
1051		switch ev.Type {
1052		case EvProcStart:
1053			p := ps[ev.P]
1054			if p.running {
1055				return fmt.Errorf("p %d is running before start (time %d)", ev.P, ev.Ts)
1056			}
1057			p.running = true
1058
1059			ps[ev.P] = p
1060		case EvProcStop:
1061			p := ps[ev.P]
1062			if !p.running {
1063				return fmt.Errorf("p %d is not running before stop (time %d)", ev.P, ev.Ts)
1064			}
1065			if p.g != 0 {
1066				return fmt.Errorf("p %d is running a goroutine %d during stop (time %d)", ev.P, p.g, ev.Ts)
1067			}
1068			p.running = false
1069
1070			ps[ev.P] = p
1071		case EvGCStart:
1072			if evGC != nil {
1073				return fmt.Errorf("previous GC is not ended before a new one (time %d)", ev.Ts)
1074			}
1075			evGC = ev
1076			// Attribute this to the global GC state.
1077			ev.P = GCP
1078		case EvGCDone:
1079			if evGC == nil {
1080				return fmt.Errorf("bogus GC end (time %d)", ev.Ts)
1081			}
1082			evGC = nil
1083		case EvSTWStart:
1084			evp := &evSTW
1085			if *evp != nil {
1086				return fmt.Errorf("previous STW is not ended before a new one (time %d)", ev.Ts)
1087			}
1088			*evp = ev
1089		case EvSTWDone:
1090			evp := &evSTW
1091			if *evp == nil {
1092				return fmt.Errorf("bogus STW end (time %d)", ev.Ts)
1093			}
1094			*evp = nil
1095		case EvGCSweepStart:
1096			p := ps[ev.P]
1097			if p.evSweep != nil {
1098				return fmt.Errorf("previous sweeping is not ended before a new one (time %d)", ev.Ts)
1099			}
1100			p.evSweep = ev
1101
1102			ps[ev.P] = p
1103		case EvGCMarkAssistStart:
1104			g := gs[ev.G]
1105			if g.evMarkAssist != nil {
1106				return fmt.Errorf("previous mark assist is not ended before a new one (time %d)", ev.Ts)
1107			}
1108			g.evMarkAssist = ev
1109
1110			gs[ev.G] = g
1111		case EvGCMarkAssistDone:
1112			// Unlike most events, mark assists can be in progress when a
1113			// goroutine starts tracing, so we can't report an error here.
1114			g := gs[ev.G]
1115			if g.evMarkAssist != nil {
1116				g.evMarkAssist = nil
1117			}
1118
1119			gs[ev.G] = g
1120		case EvGCSweepDone:
1121			p := ps[ev.P]
1122			if p.evSweep == nil {
1123				return fmt.Errorf("bogus sweeping end (time %d)", ev.Ts)
1124			}
1125			p.evSweep = nil
1126
1127			ps[ev.P] = p
1128		case EvGoWaiting:
1129			g := gs[ev.G]
1130			if g.state != gRunnable {
1131				return fmt.Errorf("g %d is not runnable before EvGoWaiting (time %d)", ev.G, ev.Ts)
1132			}
1133			g.state = gWaiting
1134			g.ev = ev
1135
1136			gs[ev.G] = g
1137		case EvGoInSyscall:
1138			g := gs[ev.G]
1139			if g.state != gRunnable {
1140				return fmt.Errorf("g %d is not runnable before EvGoInSyscall (time %d)", ev.G, ev.Ts)
1141			}
1142			g.state = gWaiting
1143			g.ev = ev
1144
1145			gs[ev.G] = g
1146		case EvGoCreate:
1147			g := gs[ev.G]
1148			p := ps[ev.P]
1149			if err := checkRunning(p, g, ev, true); err != nil {
1150				return err
1151			}
1152			if _, ok := gs[ev.Args[0]]; ok {
1153				return fmt.Errorf("g %d already exists (time %d)", ev.Args[0], ev.Ts)
1154			}
1155			gs[ev.Args[0]] = gdesc{state: gRunnable, ev: ev, evCreate: ev}
1156
1157		case EvGoStart, EvGoStartLabel:
1158			g := gs[ev.G]
1159			p := ps[ev.P]
1160			if g.state != gRunnable {
1161				return fmt.Errorf("g %d is not runnable before start (time %d)", ev.G, ev.Ts)
1162			}
1163			if p.g != 0 {
1164				return fmt.Errorf("p %d is already running g %d while start g %d (time %d)", ev.P, p.g, ev.G, ev.Ts)
1165			}
1166			g.state = gRunning
1167			g.evStart = ev
1168			p.g = ev.G
1169			if g.evCreate != nil {
1170				ev.StkID = uint32(g.evCreate.Args[1])
1171				g.evCreate = nil
1172			}
1173
1174			if g.ev != nil {
1175				g.ev = nil
1176			}
1177
1178			gs[ev.G] = g
1179			ps[ev.P] = p
1180		case EvGoEnd, EvGoStop:
1181			g := gs[ev.G]
1182			p := ps[ev.P]
1183			if err := checkRunning(p, g, ev, false); err != nil {
1184				return err
1185			}
1186			g.evStart = nil
1187			g.state = gDead
1188			p.g = 0
1189
1190			if ev.Type == EvGoEnd { // flush all active regions
1191				delete(activeRegions, ev.G)
1192			}
1193
1194			gs[ev.G] = g
1195			ps[ev.P] = p
1196		case EvGoSched, EvGoPreempt:
1197			g := gs[ev.G]
1198			p := ps[ev.P]
1199			if err := checkRunning(p, g, ev, false); err != nil {
1200				return err
1201			}
1202			g.state = gRunnable
1203			g.evStart = nil
1204			p.g = 0
1205			g.ev = ev
1206
1207			gs[ev.G] = g
1208			ps[ev.P] = p
1209		case EvGoUnblock:
1210			g := gs[ev.G]
1211			p := ps[ev.P]
1212			if g.state != gRunning {
1213				return fmt.Errorf("g %d is not running while unpark (time %d)", ev.G, ev.Ts)
1214			}
1215			if ev.P != TimerP && p.g != ev.G {
1216				return fmt.Errorf("p %d is not running g %d while unpark (time %d)", ev.P, ev.G, ev.Ts)
1217			}
1218			g1 := gs[ev.Args[0]]
1219			if g1.state != gWaiting {
1220				return fmt.Errorf("g %d is not waiting before unpark (time %d)", ev.Args[0], ev.Ts)
1221			}
1222			if g1.ev != nil && g1.ev.Type == EvGoBlockNet {
1223				ev.P = NetpollP
1224			}
1225			g1.state = gRunnable
1226			g1.ev = ev
1227			gs[ev.Args[0]] = g1
1228
1229		case EvGoSysCall:
1230			g := gs[ev.G]
1231			p := ps[ev.P]
1232			if err := checkRunning(p, g, ev, false); err != nil {
1233				return err
1234			}
1235			g.ev = ev
1236
1237			gs[ev.G] = g
1238		case EvGoSysBlock:
1239			g := gs[ev.G]
1240			p := ps[ev.P]
1241			if err := checkRunning(p, g, ev, false); err != nil {
1242				return err
1243			}
1244			g.state = gWaiting
1245			g.evStart = nil
1246			p.g = 0
1247
1248			gs[ev.G] = g
1249			ps[ev.P] = p
1250		case EvGoSysExit:
1251			g := gs[ev.G]
1252			if g.state != gWaiting {
1253				return fmt.Errorf("g %d is not waiting during syscall exit (time %d)", ev.G, ev.Ts)
1254			}
1255			g.state = gRunnable
1256			g.ev = ev
1257
1258			gs[ev.G] = g
1259		case EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
1260			EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoBlockGC:
1261			g := gs[ev.G]
1262			p := ps[ev.P]
1263			if err := checkRunning(p, g, ev, false); err != nil {
1264				return err
1265			}
1266			g.state = gWaiting
1267			g.ev = ev
1268			g.evStart = nil
1269			p.g = 0
1270
1271			gs[ev.G] = g
1272			ps[ev.P] = p
1273		case EvUserTaskCreate:
1274			taskid := ev.Args[0]
1275			if prevEv, ok := tasks[taskid]; ok {
1276				return fmt.Errorf("task id conflicts (id:%d), %q vs %q", taskid, ev, prevEv)
1277			}
1278			tasks[ev.Args[0]] = ev
1279
1280		case EvUserTaskEnd:
1281			taskid := ev.Args[0]
1282			delete(tasks, taskid)
1283
1284		case EvUserRegion:
1285			mode := ev.Args[1]
1286			regions := activeRegions[ev.G]
1287			if mode == 0 { // region start
1288				activeRegions[ev.G] = append(regions, ev) // push
1289			} else if mode == 1 { // region end
1290				n := len(regions)
1291				if n > 0 { // matching region start event is in the trace.
1292					s := regions[n-1]
1293					if s.Args[0] != ev.Args[0] || s.Args[2] != ev.Args[2] { // task id, region name mismatch
1294						return fmt.Errorf("misuse of region in goroutine %d: span end %q when the inner-most active span start event is %q", ev.G, ev, s)
1295					}
1296
1297					if n > 1 {
1298						activeRegions[ev.G] = regions[:n-1]
1299					} else {
1300						delete(activeRegions, ev.G)
1301					}
1302				}
1303			} else {
1304				return fmt.Errorf("invalid user region mode: %q", ev)
1305			}
1306		}
1307
1308		if ev.StkID != 0 && len(p.stacks[ev.StkID]) == 0 {
1309			// Make sure events don't refer to stacks that don't exist or to
1310			// stacks with zero frames. Neither of these should be possible, but
1311			// better be safe than sorry.
1312
1313			ev.StkID = 0
1314		}
1315
1316	}
1317
1318	// TODO(mknyszek): restore stacks for EvGoStart events.
1319	return nil
1320}
1321
1322var errMalformedVarint = errors.New("malformatted base-128 varint")
1323
1324// readVal reads unsigned base-128 value from r.
1325func (p *parser) readVal() (uint64, error) {
1326	v, n := binary.Uvarint(p.data[p.off:])
1327	if n <= 0 {
1328		return 0, errMalformedVarint
1329	}
1330	p.off += n
1331	return v, nil
1332}
1333
1334func readValFrom(buf []byte) (v uint64, rem []byte, err error) {
1335	v, n := binary.Uvarint(buf)
1336	if n <= 0 {
1337		return 0, nil, errMalformedVarint
1338	}
1339	return v, buf[n:], nil
1340}
1341
1342func (ev *Event) String() string {
1343	desc := &EventDescriptions[ev.Type]
1344	w := new(bytes.Buffer)
1345	fmt.Fprintf(w, "%d %s p=%d g=%d stk=%d", ev.Ts, desc.Name, ev.P, ev.G, ev.StkID)
1346	for i, a := range desc.Args {
1347		fmt.Fprintf(w, " %s=%d", a, ev.Args[i])
1348	}
1349	return w.String()
1350}
1351
1352// argNum returns total number of args for the event accounting for timestamps,
1353// sequence numbers and differences between trace format versions.
1354func (raw *rawEvent) argNum() int {
1355	desc := &EventDescriptions[raw.typ]
1356	if raw.typ == EvStack {
1357		return len(raw.args)
1358	}
1359	narg := len(desc.Args)
1360	if desc.Stack {
1361		narg++
1362	}
1363	switch raw.typ {
1364	case EvBatch, EvFrequency, EvTimerGoroutine:
1365		return narg
1366	}
1367	narg++ // timestamp
1368	return narg
1369}
1370
1371// Event types in the trace.
1372// Verbatim copy from src/runtime/trace.go with the "trace" prefix removed.
1373const (
1374	EvNone              event.Type = 0  // unused
1375	EvBatch             event.Type = 1  // start of per-P batch of events [pid, timestamp]
1376	EvFrequency         event.Type = 2  // contains tracer timer frequency [frequency (ticks per second)]
1377	EvStack             event.Type = 3  // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
1378	EvGomaxprocs        event.Type = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
1379	EvProcStart         event.Type = 5  // start of P [timestamp, thread id]
1380	EvProcStop          event.Type = 6  // stop of P [timestamp]
1381	EvGCStart           event.Type = 7  // GC start [timestamp, seq, stack id]
1382	EvGCDone            event.Type = 8  // GC done [timestamp]
1383	EvSTWStart          event.Type = 9  // GC mark termination start [timestamp, kind]
1384	EvSTWDone           event.Type = 10 // GC mark termination done [timestamp]
1385	EvGCSweepStart      event.Type = 11 // GC sweep start [timestamp, stack id]
1386	EvGCSweepDone       event.Type = 12 // GC sweep done [timestamp, swept, reclaimed]
1387	EvGoCreate          event.Type = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
1388	EvGoStart           event.Type = 14 // goroutine starts running [timestamp, goroutine id, seq]
1389	EvGoEnd             event.Type = 15 // goroutine ends [timestamp]
1390	EvGoStop            event.Type = 16 // goroutine stops (like in select{}) [timestamp, stack]
1391	EvGoSched           event.Type = 17 // goroutine calls Gosched [timestamp, stack]
1392	EvGoPreempt         event.Type = 18 // goroutine is preempted [timestamp, stack]
1393	EvGoSleep           event.Type = 19 // goroutine calls Sleep [timestamp, stack]
1394	EvGoBlock           event.Type = 20 // goroutine blocks [timestamp, stack]
1395	EvGoUnblock         event.Type = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
1396	EvGoBlockSend       event.Type = 22 // goroutine blocks on chan send [timestamp, stack]
1397	EvGoBlockRecv       event.Type = 23 // goroutine blocks on chan recv [timestamp, stack]
1398	EvGoBlockSelect     event.Type = 24 // goroutine blocks on select [timestamp, stack]
1399	EvGoBlockSync       event.Type = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack]
1400	EvGoBlockCond       event.Type = 26 // goroutine blocks on Cond [timestamp, stack]
1401	EvGoBlockNet        event.Type = 27 // goroutine blocks on network [timestamp, stack]
1402	EvGoSysCall         event.Type = 28 // syscall enter [timestamp, stack]
1403	EvGoSysExit         event.Type = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
1404	EvGoSysBlock        event.Type = 30 // syscall blocks [timestamp]
1405	EvGoWaiting         event.Type = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
1406	EvGoInSyscall       event.Type = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
1407	EvHeapAlloc         event.Type = 33 // gcController.heapLive change [timestamp, heap live bytes]
1408	EvHeapGoal          event.Type = 34 // gcController.heapGoal change [timestamp, heap goal bytes]
1409	EvTimerGoroutine    event.Type = 35 // denotes timer goroutine [timer goroutine id]
1410	EvFutileWakeup      event.Type = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
1411	EvString            event.Type = 37 // string dictionary entry [ID, length, string]
1412	EvGoStartLocal      event.Type = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
1413	EvGoUnblockLocal    event.Type = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
1414	EvGoSysExitLocal    event.Type = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
1415	EvGoStartLabel      event.Type = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
1416	EvGoBlockGC         event.Type = 42 // goroutine blocks on GC assist [timestamp, stack]
1417	EvGCMarkAssistStart event.Type = 43 // GC mark assist start [timestamp, stack]
1418	EvGCMarkAssistDone  event.Type = 44 // GC mark assist done [timestamp]
1419	EvUserTaskCreate    event.Type = 45 // trace.NewTask [timestamp, internal task id, internal parent id, stack, name string]
1420	EvUserTaskEnd       event.Type = 46 // end of task [timestamp, internal task id, stack]
1421	EvUserRegion        event.Type = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), name string]
1422	EvUserLog           event.Type = 48 // trace.Log [timestamp, internal id, key string id, stack, value string]
1423	EvCPUSample         event.Type = 49 // CPU profiling sample [timestamp, stack, real timestamp, real P id (-1 when absent), goroutine id]
1424	EvCount             event.Type = 50
1425)
1426
1427var EventDescriptions = [256]struct {
1428	Name       string
1429	minVersion version.Version
1430	Stack      bool
1431	Args       []string
1432	SArgs      []string // string arguments
1433}{
1434	EvNone:              {"None", 5, false, []string{}, nil},
1435	EvBatch:             {"Batch", 5, false, []string{"p", "ticks"}, nil}, // in 1.5 format it was {"p", "seq", "ticks"}
1436	EvFrequency:         {"Frequency", 5, false, []string{"freq"}, nil},   // in 1.5 format it was {"freq", "unused"}
1437	EvStack:             {"Stack", 5, false, []string{"id", "siz"}, nil},
1438	EvGomaxprocs:        {"Gomaxprocs", 5, true, []string{"procs"}, nil},
1439	EvProcStart:         {"ProcStart", 5, false, []string{"thread"}, nil},
1440	EvProcStop:          {"ProcStop", 5, false, []string{}, nil},
1441	EvGCStart:           {"GCStart", 5, true, []string{"seq"}, nil}, // in 1.5 format it was {}
1442	EvGCDone:            {"GCDone", 5, false, []string{}, nil},
1443	EvSTWStart:          {"GCSTWStart", 5, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0})
1444	EvSTWDone:           {"GCSTWDone", 5, false, []string{}, nil},
1445	EvGCSweepStart:      {"GCSweepStart", 5, true, []string{}, nil},
1446	EvGCSweepDone:       {"GCSweepDone", 5, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {}
1447	EvGoCreate:          {"GoCreate", 5, true, []string{"g", "stack"}, nil},
1448	EvGoStart:           {"GoStart", 5, false, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
1449	EvGoEnd:             {"GoEnd", 5, false, []string{}, nil},
1450	EvGoStop:            {"GoStop", 5, true, []string{}, nil},
1451	EvGoSched:           {"GoSched", 5, true, []string{}, nil},
1452	EvGoPreempt:         {"GoPreempt", 5, true, []string{}, nil},
1453	EvGoSleep:           {"GoSleep", 5, true, []string{}, nil},
1454	EvGoBlock:           {"GoBlock", 5, true, []string{}, nil},
1455	EvGoUnblock:         {"GoUnblock", 5, true, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
1456	EvGoBlockSend:       {"GoBlockSend", 5, true, []string{}, nil},
1457	EvGoBlockRecv:       {"GoBlockRecv", 5, true, []string{}, nil},
1458	EvGoBlockSelect:     {"GoBlockSelect", 5, true, []string{}, nil},
1459	EvGoBlockSync:       {"GoBlockSync", 5, true, []string{}, nil},
1460	EvGoBlockCond:       {"GoBlockCond", 5, true, []string{}, nil},
1461	EvGoBlockNet:        {"GoBlockNet", 5, true, []string{}, nil},
1462	EvGoSysCall:         {"GoSysCall", 5, true, []string{}, nil},
1463	EvGoSysExit:         {"GoSysExit", 5, false, []string{"g", "seq", "ts"}, nil},
1464	EvGoSysBlock:        {"GoSysBlock", 5, false, []string{}, nil},
1465	EvGoWaiting:         {"GoWaiting", 5, false, []string{"g"}, nil},
1466	EvGoInSyscall:       {"GoInSyscall", 5, false, []string{"g"}, nil},
1467	EvHeapAlloc:         {"HeapAlloc", 5, false, []string{"mem"}, nil},
1468	EvHeapGoal:          {"HeapGoal", 5, false, []string{"mem"}, nil},
1469	EvTimerGoroutine:    {"TimerGoroutine", 5, false, []string{"g"}, nil}, // in 1.5 format it was {"g", "unused"}
1470	EvFutileWakeup:      {"FutileWakeup", 5, false, []string{}, nil},
1471	EvString:            {"String", 7, false, []string{}, nil},
1472	EvGoStartLocal:      {"GoStartLocal", 7, false, []string{"g"}, nil},
1473	EvGoUnblockLocal:    {"GoUnblockLocal", 7, true, []string{"g"}, nil},
1474	EvGoSysExitLocal:    {"GoSysExitLocal", 7, false, []string{"g", "ts"}, nil},
1475	EvGoStartLabel:      {"GoStartLabel", 8, false, []string{"g", "seq", "labelid"}, []string{"label"}},
1476	EvGoBlockGC:         {"GoBlockGC", 8, true, []string{}, nil},
1477	EvGCMarkAssistStart: {"GCMarkAssistStart", 9, true, []string{}, nil},
1478	EvGCMarkAssistDone:  {"GCMarkAssistDone", 9, false, []string{}, nil},
1479	EvUserTaskCreate:    {"UserTaskCreate", 11, true, []string{"taskid", "pid", "typeid"}, []string{"name"}},
1480	EvUserTaskEnd:       {"UserTaskEnd", 11, true, []string{"taskid"}, nil},
1481	EvUserRegion:        {"UserRegion", 11, true, []string{"taskid", "mode", "typeid"}, []string{"name"}},
1482	EvUserLog:           {"UserLog", 11, true, []string{"id", "keyid"}, []string{"category", "message"}},
1483	EvCPUSample:         {"CPUSample", 19, true, []string{"ts", "p", "g"}, nil},
1484}
1485
1486//gcassert:inline
1487func (p *parser) allocateStack(size uint64) []uint64 {
1488	if size == 0 {
1489		return nil
1490	}
1491
1492	// Stacks are plentiful but small. For our "Staticcheck on std" trace with
1493	// 11e6 events, we have roughly 500,000 stacks, using 200 MiB of memory. To
1494	// avoid making 500,000 small allocations we allocate backing arrays 1 MiB
1495	// at a time.
1496	out := p.stacksData
1497	if uint64(len(out)) < size {
1498		out = make([]uint64, 1024*128)
1499	}
1500	p.stacksData = out[size:]
1501	return out[:size:size]
1502}
1503
1504func (tr *Trace) STWReason(kindID uint64) STWReason {
1505	if tr.Version < 21 {
1506		if kindID == 0 || kindID == 1 {
1507			return STWReason(kindID + 1)
1508		} else {
1509			return STWUnknown
1510		}
1511	} else if tr.Version == 21 {
1512		if kindID < NumSTWReasons {
1513			return STWReason(kindID)
1514		} else {
1515			return STWUnknown
1516		}
1517	} else {
1518		return STWUnknown
1519	}
1520}
1521
1522type STWReason int
1523
1524const (
1525	STWUnknown                 STWReason = 0
1526	STWGCMarkTermination       STWReason = 1
1527	STWGCSweepTermination      STWReason = 2
1528	STWWriteHeapDump           STWReason = 3
1529	STWGoroutineProfile        STWReason = 4
1530	STWGoroutineProfileCleanup STWReason = 5
1531	STWAllGoroutinesStackTrace STWReason = 6
1532	STWReadMemStats            STWReason = 7
1533	STWAllThreadsSyscall       STWReason = 8
1534	STWGOMAXPROCS              STWReason = 9
1535	STWStartTrace              STWReason = 10
1536	STWStopTrace               STWReason = 11
1537	STWCountPagesInUse         STWReason = 12
1538	STWReadMetricsSlow         STWReason = 13
1539	STWReadMemStatsSlow        STWReason = 14
1540	STWPageCachePagesLeaked    STWReason = 15
1541	STWResetDebugLog           STWReason = 16
1542
1543	NumSTWReasons = 17
1544)
1545