1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package main
6
7import (
8	"fmt"
9	"internal/trace"
10	"internal/trace/traceviewer"
11	"internal/trace/traceviewer/format"
12	"strings"
13)
14
15// resource is a generic constraint interface for resource IDs.
16type resource interface {
17	trace.GoID | trace.ProcID | trace.ThreadID
18}
19
20// noResource indicates the lack of a resource.
21const noResource = -1
22
23// gState represents the trace viewer state of a goroutine in a trace.
24//
25// The type parameter on this type is the resource which is used to construct
26// a timeline of events. e.g. R=ProcID for a proc-oriented view, R=GoID for
27// a goroutine-oriented view, etc.
28type gState[R resource] struct {
29	baseName  string
30	named     bool   // Whether baseName has been set.
31	label     string // EventLabel extension.
32	isSystemG bool
33
34	executing R // The resource this goroutine is executing on. (Could be itself.)
35
36	// lastStopStack is the stack trace at the point of the last
37	// call to the stop method. This tends to be a more reliable way
38	// of picking up stack traces, since the parser doesn't provide
39	// a stack for every state transition event.
40	lastStopStack trace.Stack
41
42	// activeRanges is the set of all active ranges on the goroutine.
43	activeRanges map[string]activeRange
44
45	// completedRanges is a list of ranges that completed since before the
46	// goroutine stopped executing. These are flushed on every stop or block.
47	completedRanges []completedRange
48
49	// startRunningTime is the most recent event that caused a goroutine to
50	// transition to GoRunning.
51	startRunningTime trace.Time
52
53	// startSyscall is the most recent event that caused a goroutine to
54	// transition to GoSyscall.
55	syscall struct {
56		time   trace.Time
57		stack  trace.Stack
58		active bool
59	}
60
61	// startBlockReason is the StateTransition.Reason of the most recent
62	// event that caused a goroutine to transition to GoWaiting.
63	startBlockReason string
64
65	// startCause is the event that allowed this goroutine to start running.
66	// It's used to generate flow events. This is typically something like
67	// an unblock event or a goroutine creation event.
68	//
69	// startCause.resource is the resource on which startCause happened, but is
70	// listed separately because the cause may have happened on a resource that
71	// isn't R (or perhaps on some abstract nebulous resource, like trace.NetpollP).
72	startCause struct {
73		time     trace.Time
74		name     string
75		resource uint64
76		stack    trace.Stack
77	}
78}
79
80// newGState constructs a new goroutine state for the goroutine
81// identified by the provided ID.
82func newGState[R resource](goID trace.GoID) *gState[R] {
83	return &gState[R]{
84		baseName:     fmt.Sprintf("G%d", goID),
85		executing:    R(noResource),
86		activeRanges: make(map[string]activeRange),
87	}
88}
89
90// augmentName attempts to use stk to augment the name of the goroutine
91// with stack information. This stack must be related to the goroutine
92// in some way, but it doesn't really matter which stack.
93func (gs *gState[R]) augmentName(stk trace.Stack) {
94	if gs.named {
95		return
96	}
97	if stk == trace.NoStack {
98		return
99	}
100	name := lastFunc(stk)
101	gs.baseName += fmt.Sprintf(" %s", name)
102	gs.named = true
103	gs.isSystemG = trace.IsSystemGoroutine(name)
104}
105
106// setLabel adds an additional label to the goroutine's name.
107func (gs *gState[R]) setLabel(label string) {
108	gs.label = label
109}
110
111// name returns a name for the goroutine.
112func (gs *gState[R]) name() string {
113	name := gs.baseName
114	if gs.label != "" {
115		name += " (" + gs.label + ")"
116	}
117	return name
118}
119
120// setStartCause sets the reason a goroutine will be allowed to start soon.
121// For example, via unblocking or exiting a blocked syscall.
122func (gs *gState[R]) setStartCause(ts trace.Time, name string, resource uint64, stack trace.Stack) {
123	gs.startCause.time = ts
124	gs.startCause.name = name
125	gs.startCause.resource = resource
126	gs.startCause.stack = stack
127}
128
129// created indicates that this goroutine was just created by the provided creator.
130func (gs *gState[R]) created(ts trace.Time, creator R, stack trace.Stack) {
131	if creator == R(noResource) {
132		return
133	}
134	gs.setStartCause(ts, "go", uint64(creator), stack)
135}
136
137// start indicates that a goroutine has started running on a proc.
138func (gs *gState[R]) start(ts trace.Time, resource R, ctx *traceContext) {
139	// Set the time for all the active ranges.
140	for name := range gs.activeRanges {
141		gs.activeRanges[name] = activeRange{ts, trace.NoStack}
142	}
143
144	if gs.startCause.name != "" {
145		// It has a start cause. Emit a flow event.
146		ctx.Arrow(traceviewer.ArrowEvent{
147			Name:         gs.startCause.name,
148			Start:        ctx.elapsed(gs.startCause.time),
149			End:          ctx.elapsed(ts),
150			FromResource: uint64(gs.startCause.resource),
151			ToResource:   uint64(resource),
152			FromStack:    ctx.Stack(viewerFrames(gs.startCause.stack)),
153		})
154		gs.startCause.time = 0
155		gs.startCause.name = ""
156		gs.startCause.resource = 0
157		gs.startCause.stack = trace.NoStack
158	}
159	gs.executing = resource
160	gs.startRunningTime = ts
161}
162
163// syscallBegin indicates that the goroutine entered a syscall on a proc.
164func (gs *gState[R]) syscallBegin(ts trace.Time, resource R, stack trace.Stack) {
165	gs.syscall.time = ts
166	gs.syscall.stack = stack
167	gs.syscall.active = true
168	if gs.executing == R(noResource) {
169		gs.executing = resource
170		gs.startRunningTime = ts
171	}
172}
173
174// syscallEnd ends the syscall slice, wherever the syscall is at. This is orthogonal
175// to blockedSyscallEnd -- both must be called when a syscall ends and that syscall
176// blocked. They're kept separate because syscallEnd indicates the point at which the
177// goroutine is no longer executing on the resource (e.g. a proc) whereas blockedSyscallEnd
178// is the point at which the goroutine actually exited the syscall regardless of which
179// resource that happened on.
180func (gs *gState[R]) syscallEnd(ts trace.Time, blocked bool, ctx *traceContext) {
181	if !gs.syscall.active {
182		return
183	}
184	blockString := "no"
185	if blocked {
186		blockString = "yes"
187	}
188	gs.completedRanges = append(gs.completedRanges, completedRange{
189		name:       "syscall",
190		startTime:  gs.syscall.time,
191		endTime:    ts,
192		startStack: gs.syscall.stack,
193		arg:        format.BlockedArg{Blocked: blockString},
194	})
195	gs.syscall.active = false
196	gs.syscall.time = 0
197	gs.syscall.stack = trace.NoStack
198}
199
200// blockedSyscallEnd indicates the point at which the blocked syscall ended. This is distinct
201// and orthogonal to syscallEnd; both must be called if the syscall blocked. This sets up an instant
202// to emit a flow event from, indicating explicitly that this goroutine was unblocked by the system.
203func (gs *gState[R]) blockedSyscallEnd(ts trace.Time, stack trace.Stack, ctx *traceContext) {
204	name := "exit blocked syscall"
205	gs.setStartCause(ts, name, trace.SyscallP, stack)
206
207	// Emit an syscall exit instant event for the "Syscall" lane.
208	ctx.Instant(traceviewer.InstantEvent{
209		Name:     name,
210		Ts:       ctx.elapsed(ts),
211		Resource: trace.SyscallP,
212		Stack:    ctx.Stack(viewerFrames(stack)),
213	})
214}
215
216// unblock indicates that the goroutine gs represents has been unblocked.
217func (gs *gState[R]) unblock(ts trace.Time, stack trace.Stack, resource R, ctx *traceContext) {
218	name := "unblock"
219	viewerResource := uint64(resource)
220	if gs.startBlockReason != "" {
221		name = fmt.Sprintf("%s (%s)", name, gs.startBlockReason)
222	}
223	if strings.Contains(gs.startBlockReason, "network") {
224		// Attribute the network instant to the nebulous "NetpollP" if
225		// resource isn't a thread, because there's a good chance that
226		// resource isn't going to be valid in this case.
227		//
228		// TODO(mknyszek): Handle this invalidness in a more general way.
229		if _, ok := any(resource).(trace.ThreadID); !ok {
230			// Emit an unblock instant event for the "Network" lane.
231			viewerResource = trace.NetpollP
232		}
233		ctx.Instant(traceviewer.InstantEvent{
234			Name:     name,
235			Ts:       ctx.elapsed(ts),
236			Resource: viewerResource,
237			Stack:    ctx.Stack(viewerFrames(stack)),
238		})
239	}
240	gs.startBlockReason = ""
241	if viewerResource != 0 {
242		gs.setStartCause(ts, name, viewerResource, stack)
243	}
244}
245
246// block indicates that the goroutine has stopped executing on a proc -- specifically,
247// it blocked for some reason.
248func (gs *gState[R]) block(ts trace.Time, stack trace.Stack, reason string, ctx *traceContext) {
249	gs.startBlockReason = reason
250	gs.stop(ts, stack, ctx)
251}
252
253// stop indicates that the goroutine has stopped executing on a proc.
254func (gs *gState[R]) stop(ts trace.Time, stack trace.Stack, ctx *traceContext) {
255	// Emit the execution time slice.
256	var stk int
257	if gs.lastStopStack != trace.NoStack {
258		stk = ctx.Stack(viewerFrames(gs.lastStopStack))
259	}
260	// Check invariants.
261	if gs.startRunningTime == 0 {
262		panic("silently broken trace or generator invariant (startRunningTime != 0) not held")
263	}
264	if gs.executing == R(noResource) {
265		panic("non-executing goroutine stopped")
266	}
267	ctx.Slice(traceviewer.SliceEvent{
268		Name:     gs.name(),
269		Ts:       ctx.elapsed(gs.startRunningTime),
270		Dur:      ts.Sub(gs.startRunningTime),
271		Resource: uint64(gs.executing),
272		Stack:    stk,
273	})
274
275	// Flush completed ranges.
276	for _, cr := range gs.completedRanges {
277		ctx.Slice(traceviewer.SliceEvent{
278			Name:     cr.name,
279			Ts:       ctx.elapsed(cr.startTime),
280			Dur:      cr.endTime.Sub(cr.startTime),
281			Resource: uint64(gs.executing),
282			Stack:    ctx.Stack(viewerFrames(cr.startStack)),
283			EndStack: ctx.Stack(viewerFrames(cr.endStack)),
284			Arg:      cr.arg,
285		})
286	}
287	gs.completedRanges = gs.completedRanges[:0]
288
289	// Continue in-progress ranges.
290	for name, r := range gs.activeRanges {
291		// Check invariant.
292		if r.time == 0 {
293			panic("silently broken trace or generator invariant (activeRanges time != 0) not held")
294		}
295		ctx.Slice(traceviewer.SliceEvent{
296			Name:     name,
297			Ts:       ctx.elapsed(r.time),
298			Dur:      ts.Sub(r.time),
299			Resource: uint64(gs.executing),
300			Stack:    ctx.Stack(viewerFrames(r.stack)),
301		})
302	}
303
304	// Clear the range info.
305	for name := range gs.activeRanges {
306		gs.activeRanges[name] = activeRange{0, trace.NoStack}
307	}
308
309	gs.startRunningTime = 0
310	gs.lastStopStack = stack
311	gs.executing = R(noResource)
312}
313
314// finalize writes out any in-progress slices as if the goroutine stopped.
315// This must only be used once the trace has been fully processed and no
316// further events will be processed. This method may leave the gState in
317// an inconsistent state.
318func (gs *gState[R]) finish(ctx *traceContext) {
319	if gs.executing != R(noResource) {
320		gs.syscallEnd(ctx.endTime, false, ctx)
321		gs.stop(ctx.endTime, trace.NoStack, ctx)
322	}
323}
324
325// rangeBegin indicates the start of a special range of time.
326func (gs *gState[R]) rangeBegin(ts trace.Time, name string, stack trace.Stack) {
327	if gs.executing != R(noResource) {
328		// If we're executing, start the slice from here.
329		gs.activeRanges[name] = activeRange{ts, stack}
330	} else {
331		// If the goroutine isn't executing, there's no place for
332		// us to create a slice from. Wait until it starts executing.
333		gs.activeRanges[name] = activeRange{0, stack}
334	}
335}
336
337// rangeActive indicates that a special range of time has been in progress.
338func (gs *gState[R]) rangeActive(name string) {
339	if gs.executing != R(noResource) {
340		// If we're executing, and the range is active, then start
341		// from wherever the goroutine started running from.
342		gs.activeRanges[name] = activeRange{gs.startRunningTime, trace.NoStack}
343	} else {
344		// If the goroutine isn't executing, there's no place for
345		// us to create a slice from. Wait until it starts executing.
346		gs.activeRanges[name] = activeRange{0, trace.NoStack}
347	}
348}
349
350// rangeEnd indicates the end of a special range of time.
351func (gs *gState[R]) rangeEnd(ts trace.Time, name string, stack trace.Stack, ctx *traceContext) {
352	if gs.executing != R(noResource) {
353		r := gs.activeRanges[name]
354		gs.completedRanges = append(gs.completedRanges, completedRange{
355			name:       name,
356			startTime:  r.time,
357			endTime:    ts,
358			startStack: r.stack,
359			endStack:   stack,
360		})
361	}
362	delete(gs.activeRanges, name)
363}
364
365func lastFunc(s trace.Stack) string {
366	var last trace.StackFrame
367	s.Frames(func(f trace.StackFrame) bool {
368		last = f
369		return true
370	})
371	return last.Func
372}
373