1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file contains data types that all implementations of the trace format
6// parser need to provide to the rest of the package.
7
8package trace
9
10import (
11	"fmt"
12	"math"
13	"strings"
14
15	"internal/trace/event"
16	"internal/trace/event/go122"
17	"internal/trace/version"
18)
19
20// maxArgs is the maximum number of arguments for "plain" events,
21// i.e. anything that could reasonably be represented as a baseEvent.
22//
23// TODO(mknyszek): This is only 6 instead of 5 because GoStatusStack
24// has 5 arguments and needs to smuggle in a 6th. Figure out a way to
25// shrink this in the future.
26const maxArgs = 6
27
28// timedEventArgs is an array that is able to hold the arguments for any
29// timed event.
30type timedEventArgs [maxArgs - 1]uint64
31
32// baseEvent is the basic unprocessed event. This serves as a common
33// fundamental data structure across.
34type baseEvent struct {
35	typ  event.Type
36	time Time
37	args timedEventArgs
38}
39
40// extra returns a slice representing extra available space in args
41// that the parser can use to pass data up into Event.
42func (e *baseEvent) extra(v version.Version) []uint64 {
43	switch v {
44	case version.Go122:
45		return e.args[len(go122.Specs()[e.typ].Args)-1:]
46	}
47	panic(fmt.Sprintf("unsupported version: go 1.%d", v))
48}
49
50// evTable contains the per-generation data necessary to
51// interpret an individual event.
52type evTable struct {
53	freq    frequency
54	strings dataTable[stringID, string]
55	stacks  dataTable[stackID, stack]
56	pcs     map[uint64]frame
57
58	// extraStrings are strings that get generated during
59	// parsing but haven't come directly from the trace, so
60	// they don't appear in strings.
61	extraStrings   []string
62	extraStringIDs map[string]extraStringID
63	nextExtra      extraStringID
64
65	// expData contains extra unparsed data that is accessible
66	// only to ExperimentEvent via an EventExperimental event.
67	expData map[event.Experiment]*ExperimentalData
68}
69
70// addExtraString adds an extra string to the evTable and returns
71// a unique ID for the string in the table.
72func (t *evTable) addExtraString(s string) extraStringID {
73	if s == "" {
74		return 0
75	}
76	if t.extraStringIDs == nil {
77		t.extraStringIDs = make(map[string]extraStringID)
78	}
79	if id, ok := t.extraStringIDs[s]; ok {
80		return id
81	}
82	t.nextExtra++
83	id := t.nextExtra
84	t.extraStrings = append(t.extraStrings, s)
85	t.extraStringIDs[s] = id
86	return id
87}
88
89// getExtraString returns the extra string for the provided ID.
90// The ID must have been produced by addExtraString for this evTable.
91func (t *evTable) getExtraString(id extraStringID) string {
92	if id == 0 {
93		return ""
94	}
95	return t.extraStrings[id-1]
96}
97
98// dataTable is a mapping from EIs to Es.
99type dataTable[EI ~uint64, E any] struct {
100	present []uint8
101	dense   []E
102	sparse  map[EI]E
103}
104
105// insert tries to add a mapping from id to s.
106//
107// Returns an error if a mapping for id already exists, regardless
108// of whether or not s is the same in content. This should be used
109// for validation during parsing.
110func (d *dataTable[EI, E]) insert(id EI, data E) error {
111	if d.sparse == nil {
112		d.sparse = make(map[EI]E)
113	}
114	if existing, ok := d.get(id); ok {
115		return fmt.Errorf("multiple %Ts with the same ID: id=%d, new=%v, existing=%v", data, id, data, existing)
116	}
117	d.sparse[id] = data
118	return nil
119}
120
121// compactify attempts to compact sparse into dense.
122//
123// This is intended to be called only once after insertions are done.
124func (d *dataTable[EI, E]) compactify() {
125	if d.sparse == nil || len(d.dense) != 0 {
126		// Already compactified.
127		return
128	}
129	// Find the range of IDs.
130	maxID := EI(0)
131	minID := ^EI(0)
132	for id := range d.sparse {
133		if id > maxID {
134			maxID = id
135		}
136		if id < minID {
137			minID = id
138		}
139	}
140	if maxID >= math.MaxInt {
141		// We can't create a slice big enough to hold maxID elements
142		return
143	}
144	// We're willing to waste at most 2x memory.
145	if int(maxID-minID) > max(len(d.sparse), 2*len(d.sparse)) {
146		return
147	}
148	if int(minID) > len(d.sparse) {
149		return
150	}
151	size := int(maxID) + 1
152	d.present = make([]uint8, (size+7)/8)
153	d.dense = make([]E, size)
154	for id, data := range d.sparse {
155		d.dense[id] = data
156		d.present[id/8] |= uint8(1) << (id % 8)
157	}
158	d.sparse = nil
159}
160
161// get returns the E for id or false if it doesn't
162// exist. This should be used for validation during parsing.
163func (d *dataTable[EI, E]) get(id EI) (E, bool) {
164	if id == 0 {
165		return *new(E), true
166	}
167	if uint64(id) < uint64(len(d.dense)) {
168		if d.present[id/8]&(uint8(1)<<(id%8)) != 0 {
169			return d.dense[id], true
170		}
171	} else if d.sparse != nil {
172		if data, ok := d.sparse[id]; ok {
173			return data, true
174		}
175	}
176	return *new(E), false
177}
178
179// forEach iterates over all ID/value pairs in the data table.
180func (d *dataTable[EI, E]) forEach(yield func(EI, E) bool) bool {
181	for id, value := range d.dense {
182		if d.present[id/8]&(uint8(1)<<(id%8)) == 0 {
183			continue
184		}
185		if !yield(EI(id), value) {
186			return false
187		}
188	}
189	if d.sparse == nil {
190		return true
191	}
192	for id, value := range d.sparse {
193		if !yield(id, value) {
194			return false
195		}
196	}
197	return true
198}
199
200// mustGet returns the E for id or panics if it fails.
201//
202// This should only be used if id has already been validated.
203func (d *dataTable[EI, E]) mustGet(id EI) E {
204	data, ok := d.get(id)
205	if !ok {
206		panic(fmt.Sprintf("expected id %d in %T table", id, data))
207	}
208	return data
209}
210
211// frequency is nanoseconds per timestamp unit.
212type frequency float64
213
214// mul multiplies an unprocessed to produce a time in nanoseconds.
215func (f frequency) mul(t timestamp) Time {
216	return Time(float64(t) * float64(f))
217}
218
219// stringID is an index into the string table for a generation.
220type stringID uint64
221
222// extraStringID is an index into the extra string table for a generation.
223type extraStringID uint64
224
225// stackID is an index into the stack table for a generation.
226type stackID uint64
227
228// cpuSample represents a CPU profiling sample captured by the trace.
229type cpuSample struct {
230	schedCtx
231	time  Time
232	stack stackID
233}
234
235// asEvent produces a complete Event from a cpuSample. It needs
236// the evTable from the generation that created it.
237//
238// We don't just store it as an Event in generation to minimize
239// the amount of pointer data floating around.
240func (s cpuSample) asEvent(table *evTable) Event {
241	// TODO(mknyszek): This is go122-specific, but shouldn't be.
242	// Generalize this in the future.
243	e := Event{
244		table: table,
245		ctx:   s.schedCtx,
246		base: baseEvent{
247			typ:  go122.EvCPUSample,
248			time: s.time,
249		},
250	}
251	e.base.args[0] = uint64(s.stack)
252	return e
253}
254
255// stack represents a goroutine stack sample.
256type stack struct {
257	pcs []uint64
258}
259
260func (s stack) String() string {
261	var sb strings.Builder
262	for _, frame := range s.pcs {
263		fmt.Fprintf(&sb, "\t%#v\n", frame)
264	}
265	return sb.String()
266}
267
268// frame represents a single stack frame.
269type frame struct {
270	pc     uint64
271	funcID stringID
272	fileID stringID
273	line   uint64
274}
275