1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package trace_test
6
7import (
8	"bufio"
9	"bytes"
10	"fmt"
11	"internal/race"
12	"internal/testenv"
13	"internal/trace"
14	"internal/trace/testtrace"
15	"io"
16	"os"
17	"path/filepath"
18	"runtime"
19	"strings"
20	"testing"
21)
22
23func TestTraceAnnotations(t *testing.T) {
24	testTraceProg(t, "annotations.go", func(t *testing.T, tb, _ []byte, _ bool) {
25		type evDesc struct {
26			kind trace.EventKind
27			task trace.TaskID
28			args []string
29		}
30		want := []evDesc{
31			{trace.EventTaskBegin, trace.TaskID(1), []string{"task0"}},
32			{trace.EventRegionBegin, trace.TaskID(1), []string{"region0"}},
33			{trace.EventRegionBegin, trace.TaskID(1), []string{"region1"}},
34			{trace.EventLog, trace.TaskID(1), []string{"key0", "0123456789abcdef"}},
35			{trace.EventRegionEnd, trace.TaskID(1), []string{"region1"}},
36			{trace.EventRegionEnd, trace.TaskID(1), []string{"region0"}},
37			{trace.EventTaskEnd, trace.TaskID(1), []string{"task0"}},
38			//  Currently, pre-existing region is not recorded to avoid allocations.
39			{trace.EventRegionBegin, trace.BackgroundTask, []string{"post-existing region"}},
40		}
41		r, err := trace.NewReader(bytes.NewReader(tb))
42		if err != nil {
43			t.Error(err)
44		}
45		for {
46			ev, err := r.ReadEvent()
47			if err == io.EOF {
48				break
49			}
50			if err != nil {
51				t.Fatal(err)
52			}
53			for i, wantEv := range want {
54				if wantEv.kind != ev.Kind() {
55					continue
56				}
57				match := false
58				switch ev.Kind() {
59				case trace.EventTaskBegin, trace.EventTaskEnd:
60					task := ev.Task()
61					match = task.ID == wantEv.task && task.Type == wantEv.args[0]
62				case trace.EventRegionBegin, trace.EventRegionEnd:
63					reg := ev.Region()
64					match = reg.Task == wantEv.task && reg.Type == wantEv.args[0]
65				case trace.EventLog:
66					log := ev.Log()
67					match = log.Task == wantEv.task && log.Category == wantEv.args[0] && log.Message == wantEv.args[1]
68				}
69				if match {
70					want[i] = want[len(want)-1]
71					want = want[:len(want)-1]
72					break
73				}
74			}
75		}
76		if len(want) != 0 {
77			for _, ev := range want {
78				t.Errorf("no match for %s TaskID=%d Args=%#v", ev.kind, ev.task, ev.args)
79			}
80		}
81	})
82}
83
84func TestTraceAnnotationsStress(t *testing.T) {
85	testTraceProg(t, "annotations-stress.go", nil)
86}
87
88func TestTraceCgoCallback(t *testing.T) {
89	testenv.MustHaveCGO(t)
90
91	switch runtime.GOOS {
92	case "plan9", "windows":
93		t.Skipf("cgo callback test requires pthreads and is not supported on %s", runtime.GOOS)
94	}
95	testTraceProg(t, "cgo-callback.go", nil)
96}
97
98func TestTraceCPUProfile(t *testing.T) {
99	testTraceProg(t, "cpu-profile.go", func(t *testing.T, tb, stderr []byte, _ bool) {
100		// Parse stderr which has a CPU profile summary, if everything went well.
101		// (If it didn't, we shouldn't even make it here.)
102		scanner := bufio.NewScanner(bytes.NewReader(stderr))
103		pprofSamples := 0
104		pprofStacks := make(map[string]int)
105		for scanner.Scan() {
106			var stack string
107			var samples int
108			_, err := fmt.Sscanf(scanner.Text(), "%s\t%d", &stack, &samples)
109			if err != nil {
110				t.Fatalf("failed to parse CPU profile summary in stderr: %s\n\tfull:\n%s", scanner.Text(), stderr)
111			}
112			pprofStacks[stack] = samples
113			pprofSamples += samples
114		}
115		if err := scanner.Err(); err != nil {
116			t.Fatalf("failed to parse CPU profile summary in stderr: %v", err)
117		}
118		if pprofSamples == 0 {
119			t.Skip("CPU profile did not include any samples while tracing was active")
120		}
121
122		// Examine the execution tracer's view of the CPU profile samples. Filter it
123		// to only include samples from the single test goroutine. Use the goroutine
124		// ID that was recorded in the events: that should reflect getg().m.curg,
125		// same as the profiler's labels (even when the M is using its g0 stack).
126		totalTraceSamples := 0
127		traceSamples := 0
128		traceStacks := make(map[string]int)
129		r, err := trace.NewReader(bytes.NewReader(tb))
130		if err != nil {
131			t.Error(err)
132		}
133		var hogRegion *trace.Event
134		var hogRegionClosed bool
135		for {
136			ev, err := r.ReadEvent()
137			if err == io.EOF {
138				break
139			}
140			if err != nil {
141				t.Fatal(err)
142			}
143			if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "cpuHogger" {
144				hogRegion = &ev
145			}
146			if ev.Kind() == trace.EventStackSample {
147				totalTraceSamples++
148				if hogRegion != nil && ev.Goroutine() == hogRegion.Goroutine() {
149					traceSamples++
150					var fns []string
151					ev.Stack().Frames(func(frame trace.StackFrame) bool {
152						if frame.Func != "runtime.goexit" {
153							fns = append(fns, fmt.Sprintf("%s:%d", frame.Func, frame.Line))
154						}
155						return true
156					})
157					stack := strings.Join(fns, "|")
158					traceStacks[stack]++
159				}
160			}
161			if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "cpuHogger" {
162				hogRegionClosed = true
163			}
164		}
165		if hogRegion == nil {
166			t.Fatalf("execution trace did not identify cpuHogger goroutine")
167		} else if !hogRegionClosed {
168			t.Fatalf("execution trace did not close cpuHogger region")
169		}
170
171		// The execution trace may drop CPU profile samples if the profiling buffer
172		// overflows. Based on the size of profBufWordCount, that takes a bit over
173		// 1900 CPU samples or 19 thread-seconds at a 100 Hz sample rate. If we've
174		// hit that case, then we definitely have at least one full buffer's worth
175		// of CPU samples, so we'll call that success.
176		overflowed := totalTraceSamples >= 1900
177		if traceSamples < pprofSamples {
178			t.Logf("execution trace did not include all CPU profile samples; %d in profile, %d in trace", pprofSamples, traceSamples)
179			if !overflowed {
180				t.Fail()
181			}
182		}
183
184		for stack, traceSamples := range traceStacks {
185			pprofSamples := pprofStacks[stack]
186			delete(pprofStacks, stack)
187			if traceSamples < pprofSamples {
188				t.Logf("execution trace did not include all CPU profile samples for stack %q; %d in profile, %d in trace",
189					stack, pprofSamples, traceSamples)
190				if !overflowed {
191					t.Fail()
192				}
193			}
194		}
195		for stack, pprofSamples := range pprofStacks {
196			t.Logf("CPU profile included %d samples at stack %q not present in execution trace", pprofSamples, stack)
197			if !overflowed {
198				t.Fail()
199			}
200		}
201
202		if t.Failed() {
203			t.Logf("execution trace CPU samples:")
204			for stack, samples := range traceStacks {
205				t.Logf("%d: %q", samples, stack)
206			}
207			t.Logf("CPU profile:\n%s", stderr)
208		}
209	})
210}
211
212func TestTraceFutileWakeup(t *testing.T) {
213	testTraceProg(t, "futile-wakeup.go", func(t *testing.T, tb, _ []byte, _ bool) {
214		// Check to make sure that no goroutine in the "special" trace region
215		// ends up blocking, unblocking, then immediately blocking again.
216		//
217		// The goroutines are careful to call runtime.Gosched in between blocking,
218		// so there should never be a clean block/unblock on the goroutine unless
219		// the runtime was generating extraneous events.
220		const (
221			entered = iota
222			blocked
223			runnable
224			running
225		)
226		gs := make(map[trace.GoID]int)
227		seenSpecialGoroutines := false
228		r, err := trace.NewReader(bytes.NewReader(tb))
229		if err != nil {
230			t.Error(err)
231		}
232		for {
233			ev, err := r.ReadEvent()
234			if err == io.EOF {
235				break
236			}
237			if err != nil {
238				t.Fatal(err)
239			}
240			// Only track goroutines in the special region we control, so runtime
241			// goroutines don't interfere (it's totally valid in traces for a
242			// goroutine to block, run, and block again; that's not what we care about).
243			if ev.Kind() == trace.EventRegionBegin && ev.Region().Type == "special" {
244				seenSpecialGoroutines = true
245				gs[ev.Goroutine()] = entered
246			}
247			if ev.Kind() == trace.EventRegionEnd && ev.Region().Type == "special" {
248				delete(gs, ev.Goroutine())
249			}
250			// Track state transitions for goroutines we care about.
251			//
252			// The goroutines we care about will advance through the state machine
253			// of entered -> blocked -> runnable -> running. If in the running state
254			// we block, then we have a futile wakeup. Because of the runtime.Gosched
255			// on these specially marked goroutines, we should end up back in runnable
256			// first. If at any point we go to a different state, switch back to entered
257			// and wait for the next time the goroutine blocks.
258			if ev.Kind() != trace.EventStateTransition {
259				continue
260			}
261			st := ev.StateTransition()
262			if st.Resource.Kind != trace.ResourceGoroutine {
263				continue
264			}
265			id := st.Resource.Goroutine()
266			state, ok := gs[id]
267			if !ok {
268				continue
269			}
270			_, new := st.Goroutine()
271			switch state {
272			case entered:
273				if new == trace.GoWaiting {
274					state = blocked
275				} else {
276					state = entered
277				}
278			case blocked:
279				if new == trace.GoRunnable {
280					state = runnable
281				} else {
282					state = entered
283				}
284			case runnable:
285				if new == trace.GoRunning {
286					state = running
287				} else {
288					state = entered
289				}
290			case running:
291				if new == trace.GoWaiting {
292					t.Fatalf("found futile wakeup on goroutine %d", id)
293				} else {
294					state = entered
295				}
296			}
297			gs[id] = state
298		}
299		if !seenSpecialGoroutines {
300			t.Fatal("did not see a goroutine in a the region 'special'")
301		}
302	})
303}
304
305func TestTraceGCStress(t *testing.T) {
306	testTraceProg(t, "gc-stress.go", nil)
307}
308
309func TestTraceGOMAXPROCS(t *testing.T) {
310	testTraceProg(t, "gomaxprocs.go", nil)
311}
312
313func TestTraceStacks(t *testing.T) {
314	testTraceProg(t, "stacks.go", func(t *testing.T, tb, _ []byte, stress bool) {
315		type frame struct {
316			fn   string
317			line int
318		}
319		type evDesc struct {
320			kind   trace.EventKind
321			match  string
322			frames []frame
323		}
324		// mainLine is the line number of `func main()` in testprog/stacks.go.
325		const mainLine = 21
326		want := []evDesc{
327			{trace.EventStateTransition, "Goroutine Running->Runnable", []frame{
328				{"main.main", mainLine + 82},
329			}},
330			{trace.EventStateTransition, "Goroutine NotExist->Runnable", []frame{
331				{"main.main", mainLine + 11},
332			}},
333			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
334				{"runtime.block", 0},
335				{"main.main.func1", 0},
336			}},
337			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
338				{"runtime.chansend1", 0},
339				{"main.main.func2", 0},
340			}},
341			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
342				{"runtime.chanrecv1", 0},
343				{"main.main.func3", 0},
344			}},
345			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
346				{"runtime.chanrecv1", 0},
347				{"main.main.func4", 0},
348			}},
349			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
350				{"runtime.chansend1", 0},
351				{"main.main", mainLine + 84},
352			}},
353			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
354				{"runtime.chansend1", 0},
355				{"main.main.func5", 0},
356			}},
357			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
358				{"runtime.chanrecv1", 0},
359				{"main.main", mainLine + 85},
360			}},
361			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
362				{"runtime.selectgo", 0},
363				{"main.main.func6", 0},
364			}},
365			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
366				{"runtime.selectgo", 0},
367				{"main.main", mainLine + 86},
368			}},
369			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
370				{"sync.(*Mutex).Lock", 0},
371				{"main.main.func7", 0},
372			}},
373			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
374				{"sync.(*Mutex).Unlock", 0},
375				{"main.main", 0},
376			}},
377			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
378				{"sync.(*WaitGroup).Wait", 0},
379				{"main.main.func8", 0},
380			}},
381			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
382				{"sync.(*WaitGroup).Add", 0},
383				{"sync.(*WaitGroup).Done", 0},
384				{"main.main", mainLine + 91},
385			}},
386			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
387				{"sync.(*Cond).Wait", 0},
388				{"main.main.func9", 0},
389			}},
390			{trace.EventStateTransition, "Goroutine Waiting->Runnable", []frame{
391				{"sync.(*Cond).Signal", 0},
392				{"main.main", 0},
393			}},
394			{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
395				{"time.Sleep", 0},
396				{"main.main", 0},
397			}},
398			{trace.EventMetric, "/sched/gomaxprocs:threads", []frame{
399				{"runtime.startTheWorld", 0}, // this is when the current gomaxprocs is logged.
400				{"runtime.startTheWorldGC", 0},
401				{"runtime.GOMAXPROCS", 0},
402				{"main.main", 0},
403			}},
404		}
405		if !stress {
406			// Only check for this stack if !stress because traceAdvance alone could
407			// allocate enough memory to trigger a GC if called frequently enough.
408			// This might cause the runtime.GC call we're trying to match against to
409			// coalesce with an active GC triggered this by traceAdvance. In that case
410			// we won't have an EventRangeBegin event that matches the stace trace we're
411			// looking for, since runtime.GC will not have triggered the GC.
412			gcEv := evDesc{trace.EventRangeBegin, "GC concurrent mark phase", []frame{
413				{"runtime.GC", 0},
414				{"main.main", 0},
415			}}
416			want = append(want, gcEv)
417		}
418		if runtime.GOOS != "windows" && runtime.GOOS != "plan9" {
419			want = append(want, []evDesc{
420				{trace.EventStateTransition, "Goroutine Running->Waiting", []frame{
421					{"internal/poll.(*FD).Accept", 0},
422					{"net.(*netFD).accept", 0},
423					{"net.(*TCPListener).accept", 0},
424					{"net.(*TCPListener).Accept", 0},
425					{"main.main.func10", 0},
426				}},
427				{trace.EventStateTransition, "Goroutine Running->Syscall", []frame{
428					{"syscall.read", 0},
429					{"syscall.Read", 0},
430					{"internal/poll.ignoringEINTRIO", 0},
431					{"internal/poll.(*FD).Read", 0},
432					{"os.(*File).read", 0},
433					{"os.(*File).Read", 0},
434					{"main.main.func11", 0},
435				}},
436			}...)
437		}
438		stackMatches := func(stk trace.Stack, frames []frame) bool {
439			i := 0
440			match := true
441			stk.Frames(func(f trace.StackFrame) bool {
442				if f.Func != frames[i].fn {
443					match = false
444					return false
445				}
446				if line := uint64(frames[i].line); line != 0 && line != f.Line {
447					match = false
448					return false
449				}
450				i++
451				return true
452			})
453			return match
454		}
455		r, err := trace.NewReader(bytes.NewReader(tb))
456		if err != nil {
457			t.Error(err)
458		}
459		for {
460			ev, err := r.ReadEvent()
461			if err == io.EOF {
462				break
463			}
464			if err != nil {
465				t.Fatal(err)
466			}
467			for i, wantEv := range want {
468				if wantEv.kind != ev.Kind() {
469					continue
470				}
471				match := false
472				switch ev.Kind() {
473				case trace.EventStateTransition:
474					st := ev.StateTransition()
475					str := ""
476					switch st.Resource.Kind {
477					case trace.ResourceGoroutine:
478						old, new := st.Goroutine()
479						str = fmt.Sprintf("%s %s->%s", st.Resource.Kind, old, new)
480					}
481					match = str == wantEv.match
482				case trace.EventRangeBegin:
483					rng := ev.Range()
484					match = rng.Name == wantEv.match
485				case trace.EventMetric:
486					metric := ev.Metric()
487					match = metric.Name == wantEv.match
488				}
489				match = match && stackMatches(ev.Stack(), wantEv.frames)
490				if match {
491					want[i] = want[len(want)-1]
492					want = want[:len(want)-1]
493					break
494				}
495			}
496		}
497		if len(want) != 0 {
498			for _, ev := range want {
499				t.Errorf("no match for %s Match=%s Stack=%#v", ev.kind, ev.match, ev.frames)
500			}
501		}
502	})
503}
504
505func TestTraceStress(t *testing.T) {
506	switch runtime.GOOS {
507	case "js", "wasip1":
508		t.Skip("no os.Pipe on " + runtime.GOOS)
509	}
510	testTraceProg(t, "stress.go", nil)
511}
512
513func TestTraceStressStartStop(t *testing.T) {
514	switch runtime.GOOS {
515	case "js", "wasip1":
516		t.Skip("no os.Pipe on " + runtime.GOOS)
517	}
518	testTraceProg(t, "stress-start-stop.go", nil)
519}
520
521func TestTraceManyStartStop(t *testing.T) {
522	testTraceProg(t, "many-start-stop.go", nil)
523}
524
525func TestTraceWaitOnPipe(t *testing.T) {
526	switch runtime.GOOS {
527	case "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "solaris":
528		testTraceProg(t, "wait-on-pipe.go", nil)
529		return
530	}
531	t.Skip("no applicable syscall.Pipe on " + runtime.GOOS)
532}
533
534func TestTraceIterPull(t *testing.T) {
535	testTraceProg(t, "iter-pull.go", nil)
536}
537
538func testTraceProg(t *testing.T, progName string, extra func(t *testing.T, trace, stderr []byte, stress bool)) {
539	testenv.MustHaveGoRun(t)
540
541	// Check if we're on a builder.
542	onBuilder := testenv.Builder() != ""
543	onOldBuilder := !strings.Contains(testenv.Builder(), "gotip") && !strings.Contains(testenv.Builder(), "go1")
544
545	testPath := filepath.Join("./testdata/testprog", progName)
546	testName := progName
547	runTest := func(t *testing.T, stress bool, extraGODEBUG string) {
548		// Run the program and capture the trace, which is always written to stdout.
549		cmd := testenv.Command(t, testenv.GoToolPath(t), "run")
550		if race.Enabled {
551			cmd.Args = append(cmd.Args, "-race")
552		}
553		cmd.Args = append(cmd.Args, testPath)
554		cmd.Env = append(os.Environ(), "GOEXPERIMENT=rangefunc")
555		// Add a stack ownership check. This is cheap enough for testing.
556		godebug := "tracecheckstackownership=1"
557		if stress {
558			// Advance a generation constantly to stress the tracer.
559			godebug += ",traceadvanceperiod=0"
560		}
561		if extraGODEBUG != "" {
562			// Add extra GODEBUG flags.
563			godebug += "," + extraGODEBUG
564		}
565		cmd.Env = append(cmd.Env, "GODEBUG="+godebug)
566
567		// Capture stdout and stderr.
568		//
569		// The protocol for these programs is that stdout contains the trace data
570		// and stderr is an expectation in string format.
571		var traceBuf, errBuf bytes.Buffer
572		cmd.Stdout = &traceBuf
573		cmd.Stderr = &errBuf
574		// Run the program.
575		if err := cmd.Run(); err != nil {
576			if errBuf.Len() != 0 {
577				t.Logf("stderr: %s", string(errBuf.Bytes()))
578			}
579			t.Fatal(err)
580		}
581		tb := traceBuf.Bytes()
582
583		// Test the trace and the parser.
584		testReader(t, bytes.NewReader(tb), testtrace.ExpectSuccess())
585
586		// Run some extra validation.
587		if !t.Failed() && extra != nil {
588			extra(t, tb, errBuf.Bytes(), stress)
589		}
590
591		// Dump some more information on failure.
592		if t.Failed() && onBuilder {
593			// Dump directly to the test log on the builder, since this
594			// data is critical for debugging and this is the only way
595			// we can currently make sure it's retained.
596			t.Log("found bad trace; dumping to test log...")
597			s := dumpTraceToText(t, tb)
598			if onOldBuilder && len(s) > 1<<20+512<<10 {
599				// The old build infrastructure truncates logs at ~2 MiB.
600				// Let's assume we're the only failure and give ourselves
601				// up to 1.5 MiB to dump the trace.
602				//
603				// TODO(mknyszek): Remove this when we've migrated off of
604				// the old infrastructure.
605				t.Logf("text trace too large to dump (%d bytes)", len(s))
606			} else {
607				t.Log(s)
608			}
609		} else if t.Failed() || *dumpTraces {
610			// We asked to dump the trace or failed. Write the trace to a file.
611			t.Logf("wrote trace to file: %s", dumpTraceToFile(t, testName, stress, tb))
612		}
613	}
614	t.Run("Default", func(t *testing.T) {
615		runTest(t, false, "")
616	})
617	t.Run("Stress", func(t *testing.T) {
618		if testing.Short() {
619			t.Skip("skipping trace stress tests in short mode")
620		}
621		runTest(t, true, "")
622	})
623	t.Run("AllocFree", func(t *testing.T) {
624		if testing.Short() {
625			t.Skip("skipping trace alloc/free tests in short mode")
626		}
627		runTest(t, false, "traceallocfree=1")
628	})
629}
630