1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build !js
6
7package pprof
8
9import (
10	"bytes"
11	"context"
12	"fmt"
13	"internal/abi"
14	"internal/profile"
15	"internal/syscall/unix"
16	"internal/testenv"
17	"io"
18	"math"
19	"math/big"
20	"os"
21	"regexp"
22	"runtime"
23	"runtime/debug"
24	"strconv"
25	"strings"
26	"sync"
27	"sync/atomic"
28	"testing"
29	"time"
30	_ "unsafe"
31)
32
33func cpuHogger(f func(x int) int, y *int, dur time.Duration) {
34	// We only need to get one 100 Hz clock tick, so we've got
35	// a large safety buffer.
36	// But do at least 500 iterations (which should take about 100ms),
37	// otherwise TestCPUProfileMultithreaded can fail if only one
38	// thread is scheduled during the testing period.
39	t0 := time.Now()
40	accum := *y
41	for i := 0; i < 500 || time.Since(t0) < dur; i++ {
42		accum = f(accum)
43	}
44	*y = accum
45}
46
47var (
48	salt1 = 0
49	salt2 = 0
50)
51
52// The actual CPU hogging function.
53// Must not call other functions nor access heap/globals in the loop,
54// otherwise under race detector the samples will be in the race runtime.
55func cpuHog1(x int) int {
56	return cpuHog0(x, 1e5)
57}
58
59func cpuHog0(x, n int) int {
60	foo := x
61	for i := 0; i < n; i++ {
62		if foo > 0 {
63			foo *= foo
64		} else {
65			foo *= foo + 1
66		}
67	}
68	return foo
69}
70
71func cpuHog2(x int) int {
72	foo := x
73	for i := 0; i < 1e5; i++ {
74		if foo > 0 {
75			foo *= foo
76		} else {
77			foo *= foo + 2
78		}
79	}
80	return foo
81}
82
83// Return a list of functions that we don't want to ever appear in CPU
84// profiles. For gccgo, that list includes the sigprof handler itself.
85func avoidFunctions() []string {
86	if runtime.Compiler == "gccgo" {
87		return []string{"runtime.sigprof"}
88	}
89	return nil
90}
91
92func TestCPUProfile(t *testing.T) {
93	matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1"}, avoidFunctions())
94	testCPUProfile(t, matches, func(dur time.Duration) {
95		cpuHogger(cpuHog1, &salt1, dur)
96	})
97}
98
99func TestCPUProfileMultithreaded(t *testing.T) {
100	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
101	matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1", "runtime/pprof.cpuHog2"}, avoidFunctions())
102	testCPUProfile(t, matches, func(dur time.Duration) {
103		c := make(chan int)
104		go func() {
105			cpuHogger(cpuHog1, &salt1, dur)
106			c <- 1
107		}()
108		cpuHogger(cpuHog2, &salt2, dur)
109		<-c
110	})
111}
112
113func TestCPUProfileMultithreadMagnitude(t *testing.T) {
114	if runtime.GOOS != "linux" {
115		t.Skip("issue 35057 is only confirmed on Linux")
116	}
117
118	// Linux [5.9,5.16) has a kernel bug that can break CPU timers on newly
119	// created threads, breaking our CPU accounting.
120	major, minor := unix.KernelVersion()
121	t.Logf("Running on Linux %d.%d", major, minor)
122	defer func() {
123		if t.Failed() {
124			t.Logf("Failure of this test may indicate that your system suffers from a known Linux kernel bug fixed on newer kernels. See https://golang.org/issue/49065.")
125		}
126	}()
127
128	// Disable on affected builders to avoid flakiness, but otherwise keep
129	// it enabled to potentially warn users that they are on a broken
130	// kernel.
131	if testenv.Builder() != "" && (runtime.GOARCH == "386" || runtime.GOARCH == "amd64") {
132		have59 := major > 5 || (major == 5 && minor >= 9)
133		have516 := major > 5 || (major == 5 && minor >= 16)
134		if have59 && !have516 {
135			testenv.SkipFlaky(t, 49065)
136		}
137	}
138
139	// Run a workload in a single goroutine, then run copies of the same
140	// workload in several goroutines. For both the serial and parallel cases,
141	// the CPU time the process measures with its own profiler should match the
142	// total CPU usage that the OS reports.
143	//
144	// We could also check that increases in parallelism (GOMAXPROCS) lead to a
145	// linear increase in the CPU usage reported by both the OS and the
146	// profiler, but without a guarantee of exclusive access to CPU resources
147	// that is likely to be a flaky test.
148
149	// Require the smaller value to be within 10%, or 40% in short mode.
150	maxDiff := 0.10
151	if testing.Short() {
152		maxDiff = 0.40
153	}
154
155	compare := func(a, b time.Duration, maxDiff float64) error {
156		if a <= 0 || b <= 0 {
157			return fmt.Errorf("Expected both time reports to be positive")
158		}
159
160		if a < b {
161			a, b = b, a
162		}
163
164		diff := float64(a-b) / float64(a)
165		if diff > maxDiff {
166			return fmt.Errorf("CPU usage reports are too different (limit -%.1f%%, got -%.1f%%)", maxDiff*100, diff*100)
167		}
168
169		return nil
170	}
171
172	for _, tc := range []struct {
173		name    string
174		workers int
175	}{
176		{
177			name:    "serial",
178			workers: 1,
179		},
180		{
181			name:    "parallel",
182			workers: runtime.GOMAXPROCS(0),
183		},
184	} {
185		// check that the OS's perspective matches what the Go runtime measures.
186		t.Run(tc.name, func(t *testing.T) {
187			t.Logf("Running with %d workers", tc.workers)
188
189			var userTime, systemTime time.Duration
190			matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.cpuHog1"}, avoidFunctions())
191			acceptProfile := func(t *testing.T, p *profile.Profile) bool {
192				if !matches(t, p) {
193					return false
194				}
195
196				ok := true
197				for i, unit := range []string{"count", "nanoseconds"} {
198					if have, want := p.SampleType[i].Unit, unit; have != want {
199						t.Logf("pN SampleType[%d]; %q != %q", i, have, want)
200						ok = false
201					}
202				}
203
204				// cpuHog1 called below is the primary source of CPU
205				// load, but there may be some background work by the
206				// runtime. Since the OS rusage measurement will
207				// include all work done by the process, also compare
208				// against all samples in our profile.
209				var value time.Duration
210				for _, sample := range p.Sample {
211					value += time.Duration(sample.Value[1]) * time.Nanosecond
212				}
213
214				totalTime := userTime + systemTime
215				t.Logf("compare %s user + %s system = %s vs %s", userTime, systemTime, totalTime, value)
216				if err := compare(totalTime, value, maxDiff); err != nil {
217					t.Logf("compare got %v want nil", err)
218					ok = false
219				}
220
221				return ok
222			}
223
224			testCPUProfile(t, acceptProfile, func(dur time.Duration) {
225				userTime, systemTime = diffCPUTime(t, func() {
226					var wg sync.WaitGroup
227					var once sync.Once
228					for i := 0; i < tc.workers; i++ {
229						wg.Add(1)
230						go func() {
231							defer wg.Done()
232							var salt = 0
233							cpuHogger(cpuHog1, &salt, dur)
234							once.Do(func() { salt1 = salt })
235						}()
236					}
237					wg.Wait()
238				})
239			})
240		})
241	}
242}
243
244// containsInlinedCall reports whether the function body for the function f is
245// known to contain an inlined function call within the first maxBytes bytes.
246func containsInlinedCall(f any, maxBytes int) bool {
247	_, found := findInlinedCall(f, maxBytes)
248	return found
249}
250
251// findInlinedCall returns the PC of an inlined function call within
252// the function body for the function f if any.
253func findInlinedCall(f any, maxBytes int) (pc uint64, found bool) {
254	fFunc := runtime.FuncForPC(uintptr(abi.FuncPCABIInternal(f)))
255	if fFunc == nil || fFunc.Entry() == 0 {
256		panic("failed to locate function entry")
257	}
258
259	for offset := 0; offset < maxBytes; offset++ {
260		innerPC := fFunc.Entry() + uintptr(offset)
261		inner := runtime.FuncForPC(innerPC)
262		if inner == nil {
263			// No function known for this PC value.
264			// It might simply be misaligned, so keep searching.
265			continue
266		}
267		if inner.Entry() != fFunc.Entry() {
268			// Scanned past f and didn't find any inlined functions.
269			break
270		}
271		if inner.Name() != fFunc.Name() {
272			// This PC has f as its entry-point, but is not f. Therefore, it must be a
273			// function inlined into f.
274			return uint64(innerPC), true
275		}
276	}
277
278	return 0, false
279}
280
281func TestCPUProfileInlining(t *testing.T) {
282	if !containsInlinedCall(inlinedCaller, 4<<10) {
283		t.Skip("Can't determine whether inlinedCallee was inlined into inlinedCaller.")
284	}
285
286	matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}, avoidFunctions())
287	p := testCPUProfile(t, matches, func(dur time.Duration) {
288		cpuHogger(inlinedCaller, &salt1, dur)
289	})
290
291	// Check if inlined function locations are encoded correctly. The inlinedCalee and inlinedCaller should be in one location.
292	for _, loc := range p.Location {
293		hasInlinedCallerAfterInlinedCallee, hasInlinedCallee := false, false
294		for _, line := range loc.Line {
295			if line.Function.Name == "runtime/pprof.inlinedCallee" {
296				hasInlinedCallee = true
297			}
298			if hasInlinedCallee && line.Function.Name == "runtime/pprof.inlinedCaller" {
299				hasInlinedCallerAfterInlinedCallee = true
300			}
301		}
302		if hasInlinedCallee != hasInlinedCallerAfterInlinedCallee {
303			t.Fatalf("want inlinedCallee followed by inlinedCaller, got separate Location entries:\n%v", p)
304		}
305	}
306}
307
308func inlinedCaller(x int) int {
309	x = inlinedCallee(x, 1e5)
310	return x
311}
312
313func inlinedCallee(x, n int) int {
314	return cpuHog0(x, n)
315}
316
317//go:noinline
318func dumpCallers(pcs []uintptr) {
319	if pcs == nil {
320		return
321	}
322
323	skip := 2 // Callers and dumpCallers
324	runtime.Callers(skip, pcs)
325}
326
327//go:noinline
328func inlinedCallerDump(pcs []uintptr) {
329	inlinedCalleeDump(pcs)
330}
331
332func inlinedCalleeDump(pcs []uintptr) {
333	dumpCallers(pcs)
334}
335
336type inlineWrapperInterface interface {
337	dump(stack []uintptr)
338}
339
340type inlineWrapper struct {
341}
342
343func (h inlineWrapper) dump(pcs []uintptr) {
344	dumpCallers(pcs)
345}
346
347func inlinedWrapperCallerDump(pcs []uintptr) {
348	var h inlineWrapperInterface
349	h = &inlineWrapper{}
350	h.dump(pcs)
351}
352
353func TestCPUProfileRecursion(t *testing.T) {
354	matches := matchAndAvoidStacks(stackContains, []string{"runtime/pprof.inlinedCallee", "runtime/pprof.recursionCallee", "runtime/pprof.recursionCaller"}, avoidFunctions())
355	p := testCPUProfile(t, matches, func(dur time.Duration) {
356		cpuHogger(recursionCaller, &salt1, dur)
357	})
358
359	// check the Location encoding was not confused by recursive calls.
360	for i, loc := range p.Location {
361		recursionFunc := 0
362		for _, line := range loc.Line {
363			if name := line.Function.Name; name == "runtime/pprof.recursionCaller" || name == "runtime/pprof.recursionCallee" {
364				recursionFunc++
365			}
366		}
367		if recursionFunc > 1 {
368			t.Fatalf("want at most one recursionCaller or recursionCallee in one Location, got a violating Location (index: %d):\n%v", i, p)
369		}
370	}
371}
372
373func recursionCaller(x int) int {
374	y := recursionCallee(3, x)
375	return y
376}
377
378func recursionCallee(n, x int) int {
379	if n == 0 {
380		return 1
381	}
382	y := inlinedCallee(x, 1e4)
383	return y * recursionCallee(n-1, x)
384}
385
386func recursionChainTop(x int, pcs []uintptr) {
387	if x < 0 {
388		return
389	}
390	recursionChainMiddle(x, pcs)
391}
392
393func recursionChainMiddle(x int, pcs []uintptr) {
394	recursionChainBottom(x, pcs)
395}
396
397func recursionChainBottom(x int, pcs []uintptr) {
398	// This will be called each time, we only care about the last. We
399	// can't make this conditional or this function won't be inlined.
400	dumpCallers(pcs)
401
402	recursionChainTop(x-1, pcs)
403}
404
405func parseProfile(t *testing.T, valBytes []byte, f func(uintptr, []*profile.Location, map[string][]string)) *profile.Profile {
406	p, err := profile.Parse(bytes.NewReader(valBytes))
407	if err != nil {
408		t.Fatal(err)
409	}
410	for _, sample := range p.Sample {
411		count := uintptr(sample.Value[0])
412		f(count, sample.Location, sample.Label)
413	}
414	return p
415}
416
417func cpuProfilingBroken() bool {
418	switch runtime.GOOS {
419	case "plan9":
420		// Profiling unimplemented.
421		return true
422	case "aix":
423		// See https://golang.org/issue/45170.
424		return true
425	case "ios", "dragonfly", "netbsd", "illumos", "solaris":
426		// See https://golang.org/issue/13841.
427		return true
428	case "openbsd":
429		if runtime.GOARCH == "arm" || runtime.GOARCH == "arm64" {
430			// See https://golang.org/issue/13841.
431			return true
432		}
433	}
434
435	return false
436}
437
438// testCPUProfile runs f under the CPU profiler, checking for some conditions specified by need,
439// as interpreted by matches, and returns the parsed profile.
440func testCPUProfile(t *testing.T, matches profileMatchFunc, f func(dur time.Duration)) *profile.Profile {
441	switch runtime.GOOS {
442	case "darwin":
443		out, err := testenv.Command(t, "uname", "-a").CombinedOutput()
444		if err != nil {
445			t.Fatal(err)
446		}
447		vers := string(out)
448		t.Logf("uname -a: %v", vers)
449	case "plan9":
450		t.Skip("skipping on plan9")
451	case "wasip1":
452		t.Skip("skipping on wasip1")
453	}
454
455	broken := cpuProfilingBroken()
456
457	deadline, ok := t.Deadline()
458	if broken || !ok {
459		if broken && testing.Short() {
460			// If it's expected to be broken, no point waiting around.
461			deadline = time.Now().Add(1 * time.Second)
462		} else {
463			deadline = time.Now().Add(10 * time.Second)
464		}
465	}
466
467	// If we're running a long test, start with a long duration
468	// for tests that try to make sure something *doesn't* happen.
469	duration := 5 * time.Second
470	if testing.Short() {
471		duration = 100 * time.Millisecond
472	}
473
474	// Profiling tests are inherently flaky, especially on a
475	// loaded system, such as when this test is running with
476	// several others under go test std. If a test fails in a way
477	// that could mean it just didn't run long enough, try with a
478	// longer duration.
479	for {
480		var prof bytes.Buffer
481		if err := StartCPUProfile(&prof); err != nil {
482			t.Fatal(err)
483		}
484		f(duration)
485		StopCPUProfile()
486
487		if p, ok := profileOk(t, matches, prof, duration); ok {
488			return p
489		}
490
491		duration *= 2
492		if time.Until(deadline) < duration {
493			break
494		}
495		t.Logf("retrying with %s duration", duration)
496	}
497
498	if broken {
499		t.Skipf("ignoring failure on %s/%s; see golang.org/issue/13841", runtime.GOOS, runtime.GOARCH)
500	}
501
502	// Ignore the failure if the tests are running in a QEMU-based emulator,
503	// QEMU is not perfect at emulating everything.
504	// IN_QEMU environmental variable is set by some of the Go builders.
505	// IN_QEMU=1 indicates that the tests are running in QEMU. See issue 9605.
506	if os.Getenv("IN_QEMU") == "1" {
507		t.Skip("ignore the failure in QEMU; see golang.org/issue/9605")
508	}
509	t.FailNow()
510	return nil
511}
512
513var diffCPUTimeImpl func(f func()) (user, system time.Duration)
514
515func diffCPUTime(t *testing.T, f func()) (user, system time.Duration) {
516	if fn := diffCPUTimeImpl; fn != nil {
517		return fn(f)
518	}
519	t.Fatalf("cannot measure CPU time on GOOS=%s GOARCH=%s", runtime.GOOS, runtime.GOARCH)
520	return 0, 0
521}
522
523func contains(slice []string, s string) bool {
524	for i := range slice {
525		if slice[i] == s {
526			return true
527		}
528	}
529	return false
530}
531
532// stackContains matches if a function named spec appears anywhere in the stack trace.
533func stackContains(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool {
534	for _, loc := range stk {
535		for _, line := range loc.Line {
536			if strings.Contains(line.Function.Name, spec) {
537				return true
538			}
539		}
540	}
541	return false
542}
543
544type sampleMatchFunc func(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool
545
546func profileOk(t *testing.T, matches profileMatchFunc, prof bytes.Buffer, duration time.Duration) (_ *profile.Profile, ok bool) {
547	ok = true
548
549	var samples uintptr
550	var buf strings.Builder
551	p := parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, labels map[string][]string) {
552		fmt.Fprintf(&buf, "%d:", count)
553		fprintStack(&buf, stk)
554		fmt.Fprintf(&buf, " labels: %v\n", labels)
555		samples += count
556		fmt.Fprintf(&buf, "\n")
557	})
558	t.Logf("total %d CPU profile samples collected:\n%s", samples, buf.String())
559
560	if samples < 10 && runtime.GOOS == "windows" {
561		// On some windows machines we end up with
562		// not enough samples due to coarse timer
563		// resolution. Let it go.
564		t.Log("too few samples on Windows (golang.org/issue/10842)")
565		return p, false
566	}
567
568	// Check that we got a reasonable number of samples.
569	// We used to always require at least ideal/4 samples,
570	// but that is too hard to guarantee on a loaded system.
571	// Now we accept 10 or more samples, which we take to be
572	// enough to show that at least some profiling is occurring.
573	if ideal := uintptr(duration * 100 / time.Second); samples == 0 || (samples < ideal/4 && samples < 10) {
574		t.Logf("too few samples; got %d, want at least %d, ideally %d", samples, ideal/4, ideal)
575		ok = false
576	}
577
578	if matches != nil && !matches(t, p) {
579		ok = false
580	}
581
582	return p, ok
583}
584
585type profileMatchFunc func(*testing.T, *profile.Profile) bool
586
587func matchAndAvoidStacks(matches sampleMatchFunc, need []string, avoid []string) profileMatchFunc {
588	return func(t *testing.T, p *profile.Profile) (ok bool) {
589		ok = true
590
591		// Check that profile is well formed, contains 'need', and does not contain
592		// anything from 'avoid'.
593		have := make([]uintptr, len(need))
594		avoidSamples := make([]uintptr, len(avoid))
595
596		for _, sample := range p.Sample {
597			count := uintptr(sample.Value[0])
598			for i, spec := range need {
599				if matches(spec, count, sample.Location, sample.Label) {
600					have[i] += count
601				}
602			}
603			for i, name := range avoid {
604				for _, loc := range sample.Location {
605					for _, line := range loc.Line {
606						if strings.Contains(line.Function.Name, name) {
607							avoidSamples[i] += count
608						}
609					}
610				}
611			}
612		}
613
614		for i, name := range avoid {
615			bad := avoidSamples[i]
616			if bad != 0 {
617				t.Logf("found %d samples in avoid-function %s\n", bad, name)
618				ok = false
619			}
620		}
621
622		if len(need) == 0 {
623			return
624		}
625
626		var total uintptr
627		for i, name := range need {
628			total += have[i]
629			t.Logf("found %d samples in expected function %s\n", have[i], name)
630		}
631		if total == 0 {
632			t.Logf("no samples in expected functions")
633			ok = false
634		}
635
636		// We'd like to check a reasonable minimum, like
637		// total / len(have) / smallconstant, but this test is
638		// pretty flaky (see bug 7095).  So we'll just test to
639		// make sure we got at least one sample.
640		min := uintptr(1)
641		for i, name := range need {
642			if have[i] < min {
643				t.Logf("%s has %d samples out of %d, want at least %d, ideally %d", name, have[i], total, min, total/uintptr(len(have)))
644				ok = false
645			}
646		}
647		return
648	}
649}
650
651// Fork can hang if preempted with signals frequently enough (see issue 5517).
652// Ensure that we do not do this.
653func TestCPUProfileWithFork(t *testing.T) {
654	testenv.MustHaveExec(t)
655
656	exe, err := os.Executable()
657	if err != nil {
658		t.Fatal(err)
659	}
660
661	heap := 1 << 30
662	if runtime.GOOS == "android" {
663		// Use smaller size for Android to avoid crash.
664		heap = 100 << 20
665	}
666	if runtime.GOOS == "windows" && runtime.GOARCH == "arm" {
667		// Use smaller heap for Windows/ARM to avoid crash.
668		heap = 100 << 20
669	}
670	if testing.Short() {
671		heap = 100 << 20
672	}
673	// This makes fork slower.
674	garbage := make([]byte, heap)
675	// Need to touch the slice, otherwise it won't be paged in.
676	done := make(chan bool)
677	go func() {
678		for i := range garbage {
679			garbage[i] = 42
680		}
681		done <- true
682	}()
683	<-done
684
685	var prof bytes.Buffer
686	if err := StartCPUProfile(&prof); err != nil {
687		t.Fatal(err)
688	}
689	defer StopCPUProfile()
690
691	for i := 0; i < 10; i++ {
692		testenv.Command(t, exe, "-h").CombinedOutput()
693	}
694}
695
696// Test that profiler does not observe runtime.gogo as "user" goroutine execution.
697// If it did, it would see inconsistent state and would either record an incorrect stack
698// or crash because the stack was malformed.
699func TestGoroutineSwitch(t *testing.T) {
700	if runtime.Compiler == "gccgo" {
701		t.Skip("not applicable for gccgo")
702	}
703	// How much to try. These defaults take about 1 seconds
704	// on a 2012 MacBook Pro. The ones in short mode take
705	// about 0.1 seconds.
706	tries := 10
707	count := 1000000
708	if testing.Short() {
709		tries = 1
710	}
711	for try := 0; try < tries; try++ {
712		var prof bytes.Buffer
713		if err := StartCPUProfile(&prof); err != nil {
714			t.Fatal(err)
715		}
716		for i := 0; i < count; i++ {
717			runtime.Gosched()
718		}
719		StopCPUProfile()
720
721		// Read profile to look for entries for gogo with an attempt at a traceback.
722		// "runtime.gogo" is OK, because that's the part of the context switch
723		// before the actual switch begins. But we should not see "gogo",
724		// aka "gogo<>(SB)", which does the actual switch and is marked SPWRITE.
725		parseProfile(t, prof.Bytes(), func(count uintptr, stk []*profile.Location, _ map[string][]string) {
726			// An entry with two frames with 'System' in its top frame
727			// exists to record a PC without a traceback. Those are okay.
728			if len(stk) == 2 {
729				name := stk[1].Line[0].Function.Name
730				if name == "runtime._System" || name == "runtime._ExternalCode" || name == "runtime._GC" {
731					return
732				}
733			}
734
735			// An entry with just one frame is OK too:
736			// it knew to stop at gogo.
737			if len(stk) == 1 {
738				return
739			}
740
741			// Otherwise, should not see gogo.
742			// The place we'd see it would be the inner most frame.
743			name := stk[0].Line[0].Function.Name
744			if name == "gogo" {
745				var buf strings.Builder
746				fprintStack(&buf, stk)
747				t.Fatalf("found profile entry for gogo:\n%s", buf.String())
748			}
749		})
750	}
751}
752
753func fprintStack(w io.Writer, stk []*profile.Location) {
754	if len(stk) == 0 {
755		fmt.Fprintf(w, " (stack empty)")
756	}
757	for _, loc := range stk {
758		fmt.Fprintf(w, " %#x", loc.Address)
759		fmt.Fprintf(w, " (")
760		for i, line := range loc.Line {
761			if i > 0 {
762				fmt.Fprintf(w, " ")
763			}
764			fmt.Fprintf(w, "%s:%d", line.Function.Name, line.Line)
765		}
766		fmt.Fprintf(w, ")")
767	}
768}
769
770// Test that profiling of division operations is okay, especially on ARM. See issue 6681.
771func TestMathBigDivide(t *testing.T) {
772	testCPUProfile(t, nil, func(duration time.Duration) {
773		t := time.After(duration)
774		pi := new(big.Int)
775		for {
776			for i := 0; i < 100; i++ {
777				n := big.NewInt(2646693125139304345)
778				d := big.NewInt(842468587426513207)
779				pi.Div(n, d)
780			}
781			select {
782			case <-t:
783				return
784			default:
785			}
786		}
787	})
788}
789
790// stackContainsAll matches if all functions in spec (comma-separated) appear somewhere in the stack trace.
791func stackContainsAll(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool {
792	for _, f := range strings.Split(spec, ",") {
793		if !stackContains(f, count, stk, labels) {
794			return false
795		}
796	}
797	return true
798}
799
800func TestMorestack(t *testing.T) {
801	matches := matchAndAvoidStacks(stackContainsAll, []string{"runtime.newstack,runtime/pprof.growstack"}, avoidFunctions())
802	testCPUProfile(t, matches, func(duration time.Duration) {
803		t := time.After(duration)
804		c := make(chan bool)
805		for {
806			go func() {
807				growstack1()
808				c <- true
809			}()
810			select {
811			case <-t:
812				return
813			case <-c:
814			}
815		}
816	})
817}
818
819//go:noinline
820func growstack1() {
821	growstack(10)
822}
823
824//go:noinline
825func growstack(n int) {
826	var buf [8 << 18]byte
827	use(buf)
828	if n > 0 {
829		growstack(n - 1)
830	}
831}
832
833//go:noinline
834func use(x [8 << 18]byte) {}
835
836func TestBlockProfile(t *testing.T) {
837	type TestCase struct {
838		name string
839		f    func(*testing.T)
840		stk  []string
841		re   string
842	}
843	tests := [...]TestCase{
844		{
845			name: "chan recv",
846			f:    blockChanRecv,
847			stk: []string{
848				"runtime.chanrecv1",
849				"runtime/pprof.blockChanRecv",
850				"runtime/pprof.TestBlockProfile",
851			},
852			re: `
853[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
854#	0x[0-9a-f]+	runtime\.chanrecv1\+0x[0-9a-f]+	.*runtime/chan.go:[0-9]+
855#	0x[0-9a-f]+	runtime/pprof\.blockChanRecv\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
856#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
857`},
858		{
859			name: "chan send",
860			f:    blockChanSend,
861			stk: []string{
862				"runtime.chansend1",
863				"runtime/pprof.blockChanSend",
864				"runtime/pprof.TestBlockProfile",
865			},
866			re: `
867[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
868#	0x[0-9a-f]+	runtime\.chansend1\+0x[0-9a-f]+	.*runtime/chan.go:[0-9]+
869#	0x[0-9a-f]+	runtime/pprof\.blockChanSend\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
870#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
871`},
872		{
873			name: "chan close",
874			f:    blockChanClose,
875			stk: []string{
876				"runtime.chanrecv1",
877				"runtime/pprof.blockChanClose",
878				"runtime/pprof.TestBlockProfile",
879			},
880			re: `
881[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
882#	0x[0-9a-f]+	runtime\.chanrecv1\+0x[0-9a-f]+	.*runtime/chan.go:[0-9]+
883#	0x[0-9a-f]+	runtime/pprof\.blockChanClose\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
884#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
885`},
886		{
887			name: "select recv async",
888			f:    blockSelectRecvAsync,
889			stk: []string{
890				"runtime.selectgo",
891				"runtime/pprof.blockSelectRecvAsync",
892				"runtime/pprof.TestBlockProfile",
893			},
894			re: `
895[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
896#	0x[0-9a-f]+	runtime\.selectgo\+0x[0-9a-f]+	.*runtime/select.go:[0-9]+
897#	0x[0-9a-f]+	runtime/pprof\.blockSelectRecvAsync\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
898#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
899`},
900		{
901			name: "select send sync",
902			f:    blockSelectSendSync,
903			stk: []string{
904				"runtime.selectgo",
905				"runtime/pprof.blockSelectSendSync",
906				"runtime/pprof.TestBlockProfile",
907			},
908			re: `
909[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
910#	0x[0-9a-f]+	runtime\.selectgo\+0x[0-9a-f]+	.*runtime/select.go:[0-9]+
911#	0x[0-9a-f]+	runtime/pprof\.blockSelectSendSync\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
912#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
913`},
914		{
915			name: "mutex",
916			f:    blockMutex,
917			stk: []string{
918				"sync.(*Mutex).Lock",
919				"runtime/pprof.blockMutex",
920				"runtime/pprof.TestBlockProfile",
921			},
922			re: `
923[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
924#	0x[0-9a-f]+	sync\.\(\*Mutex\)\.Lock\+0x[0-9a-f]+	.*sync/mutex\.go:[0-9]+
925#	0x[0-9a-f]+	runtime/pprof\.blockMutex\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
926#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
927`},
928		{
929			name: "cond",
930			f:    blockCond,
931			stk: []string{
932				"sync.(*Cond).Wait",
933				"runtime/pprof.blockCond",
934				"runtime/pprof.TestBlockProfile",
935			},
936			re: `
937[0-9]+ [0-9]+ @( 0x[[:xdigit:]]+)+
938#	0x[0-9a-f]+	sync\.\(\*Cond\)\.Wait\+0x[0-9a-f]+	.*sync/cond\.go:[0-9]+
939#	0x[0-9a-f]+	runtime/pprof\.blockCond\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
940#	0x[0-9a-f]+	runtime/pprof\.TestBlockProfile\+0x[0-9a-f]+	.*runtime/pprof/pprof_test.go:[0-9]+
941`},
942	}
943
944	// Generate block profile
945	runtime.SetBlockProfileRate(1)
946	defer runtime.SetBlockProfileRate(0)
947	for _, test := range tests {
948		test.f(t)
949	}
950
951	t.Run("debug=1", func(t *testing.T) {
952		var w strings.Builder
953		Lookup("block").WriteTo(&w, 1)
954		prof := w.String()
955
956		if !strings.HasPrefix(prof, "--- contention:\ncycles/second=") {
957			t.Fatalf("Bad profile header:\n%v", prof)
958		}
959
960		if strings.HasSuffix(prof, "#\t0x0\n\n") {
961			t.Errorf("Useless 0 suffix:\n%v", prof)
962		}
963
964		for _, test := range tests {
965			if !regexp.MustCompile(strings.ReplaceAll(test.re, "\t", "\t+")).MatchString(prof) {
966				t.Errorf("Bad %v entry, expect:\n%v\ngot:\n%v", test.name, test.re, prof)
967			}
968		}
969	})
970
971	t.Run("proto", func(t *testing.T) {
972		// proto format
973		var w bytes.Buffer
974		Lookup("block").WriteTo(&w, 0)
975		p, err := profile.Parse(&w)
976		if err != nil {
977			t.Fatalf("failed to parse profile: %v", err)
978		}
979		t.Logf("parsed proto: %s", p)
980		if err := p.CheckValid(); err != nil {
981			t.Fatalf("invalid profile: %v", err)
982		}
983
984		stks := stacks(p)
985		for _, test := range tests {
986			if !containsStack(stks, test.stk) {
987				t.Errorf("No matching stack entry for %v, want %+v", test.name, test.stk)
988			}
989		}
990	})
991
992}
993
994func stacks(p *profile.Profile) (res [][]string) {
995	for _, s := range p.Sample {
996		var stk []string
997		for _, l := range s.Location {
998			for _, line := range l.Line {
999				stk = append(stk, line.Function.Name)
1000			}
1001		}
1002		res = append(res, stk)
1003	}
1004	return res
1005}
1006
1007func containsStack(got [][]string, want []string) bool {
1008	for _, stk := range got {
1009		if len(stk) < len(want) {
1010			continue
1011		}
1012		for i, f := range want {
1013			if f != stk[i] {
1014				break
1015			}
1016			if i == len(want)-1 {
1017				return true
1018			}
1019		}
1020	}
1021	return false
1022}
1023
1024// awaitBlockedGoroutine spins on runtime.Gosched until a runtime stack dump
1025// shows a goroutine in the given state with a stack frame in
1026// runtime/pprof.<fName>.
1027func awaitBlockedGoroutine(t *testing.T, state, fName string, count int) {
1028	re := fmt.Sprintf(`(?m)^goroutine \d+ \[%s\]:\n(?:.+\n\t.+\n)*runtime/pprof\.%s`, regexp.QuoteMeta(state), fName)
1029	r := regexp.MustCompile(re)
1030
1031	if deadline, ok := t.Deadline(); ok {
1032		if d := time.Until(deadline); d > 1*time.Second {
1033			timer := time.AfterFunc(d-1*time.Second, func() {
1034				debug.SetTraceback("all")
1035				panic(fmt.Sprintf("timed out waiting for %#q", re))
1036			})
1037			defer timer.Stop()
1038		}
1039	}
1040
1041	buf := make([]byte, 64<<10)
1042	for {
1043		runtime.Gosched()
1044		n := runtime.Stack(buf, true)
1045		if n == len(buf) {
1046			// Buffer wasn't large enough for a full goroutine dump.
1047			// Resize it and try again.
1048			buf = make([]byte, 2*len(buf))
1049			continue
1050		}
1051		if len(r.FindAll(buf[:n], -1)) >= count {
1052			return
1053		}
1054	}
1055}
1056
1057func blockChanRecv(t *testing.T) {
1058	c := make(chan bool)
1059	go func() {
1060		awaitBlockedGoroutine(t, "chan receive", "blockChanRecv", 1)
1061		c <- true
1062	}()
1063	<-c
1064}
1065
1066func blockChanSend(t *testing.T) {
1067	c := make(chan bool)
1068	go func() {
1069		awaitBlockedGoroutine(t, "chan send", "blockChanSend", 1)
1070		<-c
1071	}()
1072	c <- true
1073}
1074
1075func blockChanClose(t *testing.T) {
1076	c := make(chan bool)
1077	go func() {
1078		awaitBlockedGoroutine(t, "chan receive", "blockChanClose", 1)
1079		close(c)
1080	}()
1081	<-c
1082}
1083
1084func blockSelectRecvAsync(t *testing.T) {
1085	const numTries = 3
1086	c := make(chan bool, 1)
1087	c2 := make(chan bool, 1)
1088	go func() {
1089		for i := 0; i < numTries; i++ {
1090			awaitBlockedGoroutine(t, "select", "blockSelectRecvAsync", 1)
1091			c <- true
1092		}
1093	}()
1094	for i := 0; i < numTries; i++ {
1095		select {
1096		case <-c:
1097		case <-c2:
1098		}
1099	}
1100}
1101
1102func blockSelectSendSync(t *testing.T) {
1103	c := make(chan bool)
1104	c2 := make(chan bool)
1105	go func() {
1106		awaitBlockedGoroutine(t, "select", "blockSelectSendSync", 1)
1107		<-c
1108	}()
1109	select {
1110	case c <- true:
1111	case c2 <- true:
1112	}
1113}
1114
1115func blockMutex(t *testing.T) {
1116	var mu sync.Mutex
1117	mu.Lock()
1118	go func() {
1119		awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex", 1)
1120		mu.Unlock()
1121	}()
1122	// Note: Unlock releases mu before recording the mutex event,
1123	// so it's theoretically possible for this to proceed and
1124	// capture the profile before the event is recorded. As long
1125	// as this is blocked before the unlock happens, it's okay.
1126	mu.Lock()
1127}
1128
1129func blockMutexN(t *testing.T, n int, d time.Duration) {
1130	var wg sync.WaitGroup
1131	var mu sync.Mutex
1132	mu.Lock()
1133	go func() {
1134		awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutex", n)
1135		time.Sleep(d)
1136		mu.Unlock()
1137	}()
1138	// Note: Unlock releases mu before recording the mutex event,
1139	// so it's theoretically possible for this to proceed and
1140	// capture the profile before the event is recorded. As long
1141	// as this is blocked before the unlock happens, it's okay.
1142	for i := 0; i < n; i++ {
1143		wg.Add(1)
1144		go func() {
1145			defer wg.Done()
1146			mu.Lock()
1147			mu.Unlock()
1148		}()
1149	}
1150	wg.Wait()
1151}
1152
1153func blockCond(t *testing.T) {
1154	var mu sync.Mutex
1155	c := sync.NewCond(&mu)
1156	mu.Lock()
1157	go func() {
1158		awaitBlockedGoroutine(t, "sync.Cond.Wait", "blockCond", 1)
1159		mu.Lock()
1160		c.Signal()
1161		mu.Unlock()
1162	}()
1163	c.Wait()
1164	mu.Unlock()
1165}
1166
1167// See http://golang.org/cl/299991.
1168func TestBlockProfileBias(t *testing.T) {
1169	rate := int(1000) // arbitrary value
1170	runtime.SetBlockProfileRate(rate)
1171	defer runtime.SetBlockProfileRate(0)
1172
1173	// simulate blocking events
1174	blockFrequentShort(rate)
1175	blockInfrequentLong(rate)
1176
1177	var w bytes.Buffer
1178	Lookup("block").WriteTo(&w, 0)
1179	p, err := profile.Parse(&w)
1180	if err != nil {
1181		t.Fatalf("failed to parse profile: %v", err)
1182	}
1183	t.Logf("parsed proto: %s", p)
1184
1185	il := float64(-1) // blockInfrequentLong duration
1186	fs := float64(-1) // blockFrequentShort duration
1187	for _, s := range p.Sample {
1188		for _, l := range s.Location {
1189			for _, line := range l.Line {
1190				if len(s.Value) < 2 {
1191					t.Fatal("block profile has less than 2 sample types")
1192				}
1193
1194				if line.Function.Name == "runtime/pprof.blockInfrequentLong" {
1195					il = float64(s.Value[1])
1196				} else if line.Function.Name == "runtime/pprof.blockFrequentShort" {
1197					fs = float64(s.Value[1])
1198				}
1199			}
1200		}
1201	}
1202	if il == -1 || fs == -1 {
1203		t.Fatal("block profile is missing expected functions")
1204	}
1205
1206	// stddev of bias from 100 runs on local machine multiplied by 10x
1207	const threshold = 0.2
1208	if bias := (il - fs) / il; math.Abs(bias) > threshold {
1209		t.Fatalf("bias: abs(%f) > %f", bias, threshold)
1210	} else {
1211		t.Logf("bias: abs(%f) < %f", bias, threshold)
1212	}
1213}
1214
1215// blockFrequentShort produces 100000 block events with an average duration of
1216// rate / 10.
1217func blockFrequentShort(rate int) {
1218	for i := 0; i < 100000; i++ {
1219		blockevent(int64(rate/10), 1)
1220	}
1221}
1222
1223// blockFrequentShort produces 10000 block events with an average duration of
1224// rate.
1225func blockInfrequentLong(rate int) {
1226	for i := 0; i < 10000; i++ {
1227		blockevent(int64(rate), 1)
1228	}
1229}
1230
1231// Used by TestBlockProfileBias.
1232//
1233//go:linkname blockevent runtime.blockevent
1234func blockevent(cycles int64, skip int)
1235
1236func TestMutexProfile(t *testing.T) {
1237	// Generate mutex profile
1238
1239	old := runtime.SetMutexProfileFraction(1)
1240	defer runtime.SetMutexProfileFraction(old)
1241	if old != 0 {
1242		t.Fatalf("need MutexProfileRate 0, got %d", old)
1243	}
1244
1245	const (
1246		N = 100
1247		D = 100 * time.Millisecond
1248	)
1249	start := time.Now()
1250	blockMutexN(t, N, D)
1251	blockMutexNTime := time.Since(start)
1252
1253	t.Run("debug=1", func(t *testing.T) {
1254		var w strings.Builder
1255		Lookup("mutex").WriteTo(&w, 1)
1256		prof := w.String()
1257		t.Logf("received profile: %v", prof)
1258
1259		if !strings.HasPrefix(prof, "--- mutex:\ncycles/second=") {
1260			t.Errorf("Bad profile header:\n%v", prof)
1261		}
1262		prof = strings.Trim(prof, "\n")
1263		lines := strings.Split(prof, "\n")
1264		if len(lines) < 6 {
1265			t.Fatalf("expected >=6 lines, got %d %q\n%s", len(lines), prof, prof)
1266		}
1267		// checking that the line is like "35258904 1 @ 0x48288d 0x47cd28 0x458931"
1268		r2 := `^\d+ \d+ @(?: 0x[[:xdigit:]]+)+`
1269		if ok, err := regexp.MatchString(r2, lines[3]); err != nil || !ok {
1270			t.Errorf("%q didn't match %q", lines[3], r2)
1271		}
1272		r3 := "^#.*runtime/pprof.blockMutex.*$"
1273		if ok, err := regexp.MatchString(r3, lines[5]); err != nil || !ok {
1274			t.Errorf("%q didn't match %q", lines[5], r3)
1275		}
1276		t.Log(prof)
1277	})
1278	t.Run("proto", func(t *testing.T) {
1279		// proto format
1280		var w bytes.Buffer
1281		Lookup("mutex").WriteTo(&w, 0)
1282		p, err := profile.Parse(&w)
1283		if err != nil {
1284			t.Fatalf("failed to parse profile: %v", err)
1285		}
1286		t.Logf("parsed proto: %s", p)
1287		if err := p.CheckValid(); err != nil {
1288			t.Fatalf("invalid profile: %v", err)
1289		}
1290
1291		stks := stacks(p)
1292		for _, want := range [][]string{
1293			{"sync.(*Mutex).Unlock", "runtime/pprof.blockMutexN.func1"},
1294		} {
1295			if !containsStack(stks, want) {
1296				t.Errorf("No matching stack entry for %+v", want)
1297			}
1298		}
1299
1300		i := 0
1301		for ; i < len(p.SampleType); i++ {
1302			if p.SampleType[i].Unit == "nanoseconds" {
1303				break
1304			}
1305		}
1306		if i >= len(p.SampleType) {
1307			t.Fatalf("profile did not contain nanoseconds sample")
1308		}
1309		total := int64(0)
1310		for _, s := range p.Sample {
1311			total += s.Value[i]
1312		}
1313		// Want d to be at least N*D, but give some wiggle-room to avoid
1314		// a test flaking. Set an upper-bound proportional to the total
1315		// wall time spent in blockMutexN. Generally speaking, the total
1316		// contention time could be arbitrarily high when considering
1317		// OS scheduler delays, or any other delays from the environment:
1318		// time keeps ticking during these delays. By making the upper
1319		// bound proportional to the wall time in blockMutexN, in theory
1320		// we're accounting for all these possible delays.
1321		d := time.Duration(total)
1322		lo := time.Duration(N * D * 9 / 10)
1323		hi := time.Duration(N) * blockMutexNTime * 11 / 10
1324		if d < lo || d > hi {
1325			for _, s := range p.Sample {
1326				t.Logf("sample: %s", time.Duration(s.Value[i]))
1327			}
1328			t.Fatalf("profile samples total %v, want within range [%v, %v] (target: %v)", d, lo, hi, N*D)
1329		}
1330	})
1331}
1332
1333func TestMutexProfileRateAdjust(t *testing.T) {
1334	old := runtime.SetMutexProfileFraction(1)
1335	defer runtime.SetMutexProfileFraction(old)
1336	if old != 0 {
1337		t.Fatalf("need MutexProfileRate 0, got %d", old)
1338	}
1339
1340	readProfile := func() (contentions int64, delay int64) {
1341		var w bytes.Buffer
1342		Lookup("mutex").WriteTo(&w, 0)
1343		p, err := profile.Parse(&w)
1344		if err != nil {
1345			t.Fatalf("failed to parse profile: %v", err)
1346		}
1347		t.Logf("parsed proto: %s", p)
1348		if err := p.CheckValid(); err != nil {
1349			t.Fatalf("invalid profile: %v", err)
1350		}
1351
1352		for _, s := range p.Sample {
1353			var match, runtimeInternal bool
1354			for _, l := range s.Location {
1355				for _, line := range l.Line {
1356					if line.Function.Name == "runtime/pprof.blockMutex.func1" {
1357						match = true
1358					}
1359					if line.Function.Name == "runtime.unlock" {
1360						runtimeInternal = true
1361					}
1362				}
1363			}
1364			if match && !runtimeInternal {
1365				contentions += s.Value[0]
1366				delay += s.Value[1]
1367			}
1368		}
1369		return
1370	}
1371
1372	blockMutex(t)
1373	contentions, delay := readProfile()
1374	if contentions == 0 || delay == 0 {
1375		t.Fatal("did not see expected function in profile")
1376	}
1377	runtime.SetMutexProfileFraction(0)
1378	newContentions, newDelay := readProfile()
1379	if newContentions != contentions || newDelay != delay {
1380		t.Fatalf("sample value changed: got [%d, %d], want [%d, %d]", newContentions, newDelay, contentions, delay)
1381	}
1382}
1383
1384func func1(c chan int) { <-c }
1385func func2(c chan int) { <-c }
1386func func3(c chan int) { <-c }
1387func func4(c chan int) { <-c }
1388
1389func TestGoroutineCounts(t *testing.T) {
1390	// Setting GOMAXPROCS to 1 ensures we can force all goroutines to the
1391	// desired blocking point.
1392	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1))
1393
1394	c := make(chan int)
1395	for i := 0; i < 100; i++ {
1396		switch {
1397		case i%10 == 0:
1398			go func1(c)
1399		case i%2 == 0:
1400			go func2(c)
1401		default:
1402			go func3(c)
1403		}
1404		// Let goroutines block on channel
1405		for j := 0; j < 5; j++ {
1406			runtime.Gosched()
1407		}
1408	}
1409	ctx := context.Background()
1410
1411	// ... and again, with labels this time (just with fewer iterations to keep
1412	// sorting deterministic).
1413	Do(ctx, Labels("label", "value"), func(context.Context) {
1414		for i := 0; i < 89; i++ {
1415			switch {
1416			case i%10 == 0:
1417				go func1(c)
1418			case i%2 == 0:
1419				go func2(c)
1420			default:
1421				go func3(c)
1422			}
1423			// Let goroutines block on channel
1424			for j := 0; j < 5; j++ {
1425				runtime.Gosched()
1426			}
1427		}
1428	})
1429
1430	SetGoroutineLabels(WithLabels(context.Background(), Labels("self-label", "self-value")))
1431	defer SetGoroutineLabels(context.Background())
1432
1433	garbage := new(*int)
1434	fingReady := make(chan struct{})
1435	runtime.SetFinalizer(garbage, func(v **int) {
1436		Do(context.Background(), Labels("fing-label", "fing-value"), func(ctx context.Context) {
1437			close(fingReady)
1438			<-c
1439		})
1440	})
1441	garbage = nil
1442	for i := 0; i < 2; i++ {
1443		runtime.GC()
1444	}
1445	<-fingReady
1446
1447	var w bytes.Buffer
1448	goroutineProf := Lookup("goroutine")
1449
1450	// Check debug profile
1451	goroutineProf.WriteTo(&w, 1)
1452	prof := w.String()
1453
1454	labels := labelMap{"label": "value"}
1455	labelStr := "\n# labels: " + labels.String()
1456	selfLabel := labelMap{"self-label": "self-value"}
1457	selfLabelStr := "\n# labels: " + selfLabel.String()
1458	fingLabel := labelMap{"fing-label": "fing-value"}
1459	fingLabelStr := "\n# labels: " + fingLabel.String()
1460	orderedPrefix := []string{
1461		"\n50 @ ",
1462		"\n44 @", labelStr,
1463		"\n40 @",
1464		"\n36 @", labelStr,
1465		"\n10 @",
1466		"\n9 @", labelStr,
1467		"\n1 @"}
1468	if !containsInOrder(prof, append(orderedPrefix, selfLabelStr)...) {
1469		t.Errorf("expected sorted goroutine counts with Labels:\n%s", prof)
1470	}
1471	if !containsInOrder(prof, append(orderedPrefix, fingLabelStr)...) {
1472		t.Errorf("expected sorted goroutine counts with Labels:\n%s", prof)
1473	}
1474
1475	// Check proto profile
1476	w.Reset()
1477	goroutineProf.WriteTo(&w, 0)
1478	p, err := profile.Parse(&w)
1479	if err != nil {
1480		t.Errorf("error parsing protobuf profile: %v", err)
1481	}
1482	if err := p.CheckValid(); err != nil {
1483		t.Errorf("protobuf profile is invalid: %v", err)
1484	}
1485	expectedLabels := map[int64]map[string]string{
1486		50: {},
1487		44: {"label": "value"},
1488		40: {},
1489		36: {"label": "value"},
1490		10: {},
1491		9:  {"label": "value"},
1492		1:  {"self-label": "self-value", "fing-label": "fing-value"},
1493	}
1494	if !containsCountsLabels(p, expectedLabels) {
1495		t.Errorf("expected count profile to contain goroutines with counts and labels %v, got %v",
1496			expectedLabels, p)
1497	}
1498
1499	close(c)
1500
1501	time.Sleep(10 * time.Millisecond) // let goroutines exit
1502}
1503
1504func containsInOrder(s string, all ...string) bool {
1505	for _, t := range all {
1506		var ok bool
1507		if _, s, ok = strings.Cut(s, t); !ok {
1508			return false
1509		}
1510	}
1511	return true
1512}
1513
1514func containsCountsLabels(prof *profile.Profile, countLabels map[int64]map[string]string) bool {
1515	m := make(map[int64]int)
1516	type nkey struct {
1517		count    int64
1518		key, val string
1519	}
1520	n := make(map[nkey]int)
1521	for c, kv := range countLabels {
1522		m[c]++
1523		for k, v := range kv {
1524			n[nkey{
1525				count: c,
1526				key:   k,
1527				val:   v,
1528			}]++
1529
1530		}
1531	}
1532	for _, s := range prof.Sample {
1533		// The count is the single value in the sample
1534		if len(s.Value) != 1 {
1535			return false
1536		}
1537		m[s.Value[0]]--
1538		for k, vs := range s.Label {
1539			for _, v := range vs {
1540				n[nkey{
1541					count: s.Value[0],
1542					key:   k,
1543					val:   v,
1544				}]--
1545			}
1546		}
1547	}
1548	for _, n := range m {
1549		if n > 0 {
1550			return false
1551		}
1552	}
1553	for _, ncnt := range n {
1554		if ncnt != 0 {
1555			return false
1556		}
1557	}
1558	return true
1559}
1560
1561func TestGoroutineProfileConcurrency(t *testing.T) {
1562	testenv.MustHaveParallelism(t)
1563
1564	goroutineProf := Lookup("goroutine")
1565
1566	profilerCalls := func(s string) int {
1567		return strings.Count(s, "\truntime/pprof.runtime_goroutineProfileWithLabels+")
1568	}
1569
1570	includesFinalizer := func(s string) bool {
1571		return strings.Contains(s, "runtime.runfinq")
1572	}
1573
1574	// Concurrent calls to the goroutine profiler should not trigger data races
1575	// or corruption.
1576	t.Run("overlapping profile requests", func(t *testing.T) {
1577		ctx := context.Background()
1578		ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
1579		defer cancel()
1580
1581		var wg sync.WaitGroup
1582		for i := 0; i < 2; i++ {
1583			wg.Add(1)
1584			Do(ctx, Labels("i", fmt.Sprint(i)), func(context.Context) {
1585				go func() {
1586					defer wg.Done()
1587					for ctx.Err() == nil {
1588						var w strings.Builder
1589						goroutineProf.WriteTo(&w, 1)
1590						prof := w.String()
1591						count := profilerCalls(prof)
1592						if count >= 2 {
1593							t.Logf("prof %d\n%s", count, prof)
1594							cancel()
1595						}
1596					}
1597				}()
1598			})
1599		}
1600		wg.Wait()
1601	})
1602
1603	// The finalizer goroutine should not show up in most profiles, since it's
1604	// marked as a system goroutine when idle.
1605	t.Run("finalizer not present", func(t *testing.T) {
1606		var w strings.Builder
1607		goroutineProf.WriteTo(&w, 1)
1608		prof := w.String()
1609		if includesFinalizer(prof) {
1610			t.Errorf("profile includes finalizer (but finalizer should be marked as system):\n%s", prof)
1611		}
1612	})
1613
1614	// The finalizer goroutine should show up when it's running user code.
1615	t.Run("finalizer present", func(t *testing.T) {
1616		// T is a pointer type so it won't be allocated by the tiny
1617		// allocator, which can lead to its finalizer not being called
1618		// during this test
1619		type T *byte
1620		obj := new(T)
1621		ch1, ch2 := make(chan int), make(chan int)
1622		defer close(ch2)
1623		runtime.SetFinalizer(obj, func(_ interface{}) {
1624			close(ch1)
1625			<-ch2
1626		})
1627		obj = nil
1628		for i := 10; i >= 0; i-- {
1629			select {
1630			case <-ch1:
1631			default:
1632				if i == 0 {
1633					t.Fatalf("finalizer did not run")
1634				}
1635				runtime.GC()
1636			}
1637		}
1638		var w strings.Builder
1639		goroutineProf.WriteTo(&w, 1)
1640		prof := w.String()
1641		if !includesFinalizer(prof) {
1642			t.Errorf("profile does not include finalizer (and it should be marked as user):\n%s", prof)
1643		}
1644	})
1645
1646	// Check that new goroutines only show up in order.
1647	testLaunches := func(t *testing.T) {
1648		var done sync.WaitGroup
1649		defer done.Wait()
1650
1651		ctx := context.Background()
1652		ctx, cancel := context.WithCancel(ctx)
1653		defer cancel()
1654
1655		ch := make(chan int)
1656		defer close(ch)
1657
1658		var ready sync.WaitGroup
1659
1660		// These goroutines all survive until the end of the subtest, so we can
1661		// check that a (numbered) goroutine appearing in the profile implies
1662		// that all older goroutines also appear in the profile.
1663		ready.Add(1)
1664		done.Add(1)
1665		go func() {
1666			defer done.Done()
1667			for i := 0; ctx.Err() == nil; i++ {
1668				// Use SetGoroutineLabels rather than Do we can always expect an
1669				// extra goroutine (this one) with most recent label.
1670				SetGoroutineLabels(WithLabels(ctx, Labels(t.Name()+"-loop-i", fmt.Sprint(i))))
1671				done.Add(1)
1672				go func() {
1673					<-ch
1674					done.Done()
1675				}()
1676				for j := 0; j < i; j++ {
1677					// Spin for longer and longer as the test goes on. This
1678					// goroutine will do O(N^2) work with the number of
1679					// goroutines it launches. This should be slow relative to
1680					// the work involved in collecting a goroutine profile,
1681					// which is O(N) with the high-water mark of the number of
1682					// goroutines in this process (in the allgs slice).
1683					runtime.Gosched()
1684				}
1685				if i == 0 {
1686					ready.Done()
1687				}
1688			}
1689		}()
1690
1691		// Short-lived goroutines exercise different code paths (goroutines with
1692		// status _Gdead, for instance). This churn doesn't have behavior that
1693		// we can test directly, but does help to shake out data races.
1694		ready.Add(1)
1695		var churn func(i int)
1696		churn = func(i int) {
1697			SetGoroutineLabels(WithLabels(ctx, Labels(t.Name()+"-churn-i", fmt.Sprint(i))))
1698			if i == 0 {
1699				ready.Done()
1700			} else if i%16 == 0 {
1701				// Yield on occasion so this sequence of goroutine launches
1702				// doesn't monopolize a P. See issue #52934.
1703				runtime.Gosched()
1704			}
1705			if ctx.Err() == nil {
1706				go churn(i + 1)
1707			}
1708		}
1709		go func() {
1710			churn(0)
1711		}()
1712
1713		ready.Wait()
1714
1715		var w [3]bytes.Buffer
1716		for i := range w {
1717			goroutineProf.WriteTo(&w[i], 0)
1718		}
1719		for i := range w {
1720			p, err := profile.Parse(bytes.NewReader(w[i].Bytes()))
1721			if err != nil {
1722				t.Errorf("error parsing protobuf profile: %v", err)
1723			}
1724
1725			// High-numbered loop-i goroutines imply that every lower-numbered
1726			// loop-i goroutine should be present in the profile too.
1727			counts := make(map[string]int)
1728			for _, s := range p.Sample {
1729				label := s.Label[t.Name()+"-loop-i"]
1730				if len(label) > 0 {
1731					counts[label[0]]++
1732				}
1733			}
1734			for j, max := 0, len(counts)-1; j <= max; j++ {
1735				n := counts[fmt.Sprint(j)]
1736				if n == 1 || (n == 2 && j == max) {
1737					continue
1738				}
1739				t.Errorf("profile #%d's goroutines with label loop-i:%d; %d != 1 (or 2 for the last entry, %d)",
1740					i+1, j, n, max)
1741				t.Logf("counts %v", counts)
1742				break
1743			}
1744		}
1745	}
1746
1747	runs := 100
1748	if testing.Short() {
1749		runs = 5
1750	}
1751	for i := 0; i < runs; i++ {
1752		// Run multiple times to shake out data races
1753		t.Run("goroutine launches", testLaunches)
1754	}
1755}
1756
1757func BenchmarkGoroutine(b *testing.B) {
1758	withIdle := func(n int, fn func(b *testing.B)) func(b *testing.B) {
1759		return func(b *testing.B) {
1760			c := make(chan int)
1761			var ready, done sync.WaitGroup
1762			defer func() {
1763				close(c)
1764				done.Wait()
1765			}()
1766
1767			for i := 0; i < n; i++ {
1768				ready.Add(1)
1769				done.Add(1)
1770				go func() {
1771					ready.Done()
1772					<-c
1773					done.Done()
1774				}()
1775			}
1776			// Let goroutines block on channel
1777			ready.Wait()
1778			for i := 0; i < 5; i++ {
1779				runtime.Gosched()
1780			}
1781
1782			fn(b)
1783		}
1784	}
1785
1786	withChurn := func(fn func(b *testing.B)) func(b *testing.B) {
1787		return func(b *testing.B) {
1788			ctx := context.Background()
1789			ctx, cancel := context.WithCancel(ctx)
1790			defer cancel()
1791
1792			var ready sync.WaitGroup
1793			ready.Add(1)
1794			var count int64
1795			var churn func(i int)
1796			churn = func(i int) {
1797				SetGoroutineLabels(WithLabels(ctx, Labels("churn-i", fmt.Sprint(i))))
1798				atomic.AddInt64(&count, 1)
1799				if i == 0 {
1800					ready.Done()
1801				}
1802				if ctx.Err() == nil {
1803					go churn(i + 1)
1804				}
1805			}
1806			go func() {
1807				churn(0)
1808			}()
1809			ready.Wait()
1810
1811			fn(b)
1812			b.ReportMetric(float64(atomic.LoadInt64(&count))/float64(b.N), "concurrent_launches/op")
1813		}
1814	}
1815
1816	benchWriteTo := func(b *testing.B) {
1817		goroutineProf := Lookup("goroutine")
1818		b.ResetTimer()
1819		for i := 0; i < b.N; i++ {
1820			goroutineProf.WriteTo(io.Discard, 0)
1821		}
1822		b.StopTimer()
1823	}
1824
1825	benchGoroutineProfile := func(b *testing.B) {
1826		p := make([]runtime.StackRecord, 10000)
1827		b.ResetTimer()
1828		for i := 0; i < b.N; i++ {
1829			runtime.GoroutineProfile(p)
1830		}
1831		b.StopTimer()
1832	}
1833
1834	// Note that some costs of collecting a goroutine profile depend on the
1835	// length of the runtime.allgs slice, which never shrinks. Stay within race
1836	// detector's 8k-goroutine limit
1837	for _, n := range []int{50, 500, 5000} {
1838		b.Run(fmt.Sprintf("Profile.WriteTo idle %d", n), withIdle(n, benchWriteTo))
1839		b.Run(fmt.Sprintf("Profile.WriteTo churn %d", n), withIdle(n, withChurn(benchWriteTo)))
1840		b.Run(fmt.Sprintf("runtime.GoroutineProfile churn %d", n), withIdle(n, withChurn(benchGoroutineProfile)))
1841	}
1842}
1843
1844var emptyCallStackTestRun int64
1845
1846// Issue 18836.
1847func TestEmptyCallStack(t *testing.T) {
1848	name := fmt.Sprintf("test18836_%d", emptyCallStackTestRun)
1849	emptyCallStackTestRun++
1850
1851	t.Parallel()
1852	var buf strings.Builder
1853	p := NewProfile(name)
1854
1855	p.Add("foo", 47674)
1856	p.WriteTo(&buf, 1)
1857	p.Remove("foo")
1858	got := buf.String()
1859	prefix := name + " profile: total 1\n"
1860	if !strings.HasPrefix(got, prefix) {
1861		t.Fatalf("got:\n\t%q\nwant prefix:\n\t%q\n", got, prefix)
1862	}
1863	lostevent := "lostProfileEvent"
1864	if !strings.Contains(got, lostevent) {
1865		t.Fatalf("got:\n\t%q\ndoes not contain:\n\t%q\n", got, lostevent)
1866	}
1867}
1868
1869// stackContainsLabeled takes a spec like funcname;key=value and matches if the stack has that key
1870// and value and has funcname somewhere in the stack.
1871func stackContainsLabeled(spec string, count uintptr, stk []*profile.Location, labels map[string][]string) bool {
1872	base, kv, ok := strings.Cut(spec, ";")
1873	if !ok {
1874		panic("no semicolon in key/value spec")
1875	}
1876	k, v, ok := strings.Cut(kv, "=")
1877	if !ok {
1878		panic("missing = in key/value spec")
1879	}
1880	if !contains(labels[k], v) {
1881		return false
1882	}
1883	return stackContains(base, count, stk, labels)
1884}
1885
1886func TestCPUProfileLabel(t *testing.T) {
1887	matches := matchAndAvoidStacks(stackContainsLabeled, []string{"runtime/pprof.cpuHogger;key=value"}, avoidFunctions())
1888	testCPUProfile(t, matches, func(dur time.Duration) {
1889		Do(context.Background(), Labels("key", "value"), func(context.Context) {
1890			cpuHogger(cpuHog1, &salt1, dur)
1891		})
1892	})
1893}
1894
1895func TestLabelRace(t *testing.T) {
1896	testenv.MustHaveParallelism(t)
1897	// Test the race detector annotations for synchronization
1898	// between setting labels and consuming them from the
1899	// profile.
1900	matches := matchAndAvoidStacks(stackContainsLabeled, []string{"runtime/pprof.cpuHogger;key=value"}, nil)
1901	testCPUProfile(t, matches, func(dur time.Duration) {
1902		start := time.Now()
1903		var wg sync.WaitGroup
1904		for time.Since(start) < dur {
1905			var salts [10]int
1906			for i := 0; i < 10; i++ {
1907				wg.Add(1)
1908				go func(j int) {
1909					Do(context.Background(), Labels("key", "value"), func(context.Context) {
1910						cpuHogger(cpuHog1, &salts[j], time.Millisecond)
1911					})
1912					wg.Done()
1913				}(i)
1914			}
1915			wg.Wait()
1916		}
1917	})
1918}
1919
1920func TestGoroutineProfileLabelRace(t *testing.T) {
1921	testenv.MustHaveParallelism(t)
1922	// Test the race detector annotations for synchronization
1923	// between setting labels and consuming them from the
1924	// goroutine profile. See issue #50292.
1925
1926	t.Run("reset", func(t *testing.T) {
1927		ctx := context.Background()
1928		ctx, cancel := context.WithCancel(ctx)
1929		defer cancel()
1930
1931		go func() {
1932			goroutineProf := Lookup("goroutine")
1933			for ctx.Err() == nil {
1934				var w strings.Builder
1935				goroutineProf.WriteTo(&w, 1)
1936				prof := w.String()
1937				if strings.Contains(prof, "loop-i") {
1938					cancel()
1939				}
1940			}
1941		}()
1942
1943		for i := 0; ctx.Err() == nil; i++ {
1944			Do(ctx, Labels("loop-i", fmt.Sprint(i)), func(ctx context.Context) {
1945			})
1946		}
1947	})
1948
1949	t.Run("churn", func(t *testing.T) {
1950		ctx := context.Background()
1951		ctx, cancel := context.WithCancel(ctx)
1952		defer cancel()
1953
1954		var ready sync.WaitGroup
1955		ready.Add(1)
1956		var churn func(i int)
1957		churn = func(i int) {
1958			SetGoroutineLabels(WithLabels(ctx, Labels("churn-i", fmt.Sprint(i))))
1959			if i == 0 {
1960				ready.Done()
1961			}
1962			if ctx.Err() == nil {
1963				go churn(i + 1)
1964			}
1965		}
1966		go func() {
1967			churn(0)
1968		}()
1969		ready.Wait()
1970
1971		goroutineProf := Lookup("goroutine")
1972		for i := 0; i < 10; i++ {
1973			goroutineProf.WriteTo(io.Discard, 1)
1974		}
1975	})
1976}
1977
1978// TestLabelSystemstack makes sure CPU profiler samples of goroutines running
1979// on systemstack include the correct pprof labels. See issue #48577
1980func TestLabelSystemstack(t *testing.T) {
1981	// Grab and re-set the initial value before continuing to ensure
1982	// GOGC doesn't actually change following the test.
1983	gogc := debug.SetGCPercent(100)
1984	debug.SetGCPercent(gogc)
1985
1986	matches := matchAndAvoidStacks(stackContainsLabeled, []string{"runtime.systemstack;key=value"}, avoidFunctions())
1987	p := testCPUProfile(t, matches, func(dur time.Duration) {
1988		Do(context.Background(), Labels("key", "value"), func(ctx context.Context) {
1989			parallelLabelHog(ctx, dur, gogc)
1990		})
1991	})
1992
1993	// Two conditions to check:
1994	// * labelHog should always be labeled.
1995	// * The label should _only_ appear on labelHog and the Do call above.
1996	for _, s := range p.Sample {
1997		isLabeled := s.Label != nil && contains(s.Label["key"], "value")
1998		var (
1999			mayBeLabeled     bool
2000			mustBeLabeled    string
2001			mustNotBeLabeled string
2002		)
2003		for _, loc := range s.Location {
2004			for _, l := range loc.Line {
2005				switch l.Function.Name {
2006				case "runtime/pprof.labelHog", "runtime/pprof.parallelLabelHog", "runtime/pprof.parallelLabelHog.func1":
2007					mustBeLabeled = l.Function.Name
2008				case "runtime/pprof.Do":
2009					// Do sets the labels, so samples may
2010					// or may not be labeled depending on
2011					// which part of the function they are
2012					// at.
2013					mayBeLabeled = true
2014				case "runtime.bgsweep", "runtime.bgscavenge", "runtime.forcegchelper", "runtime.gcBgMarkWorker", "runtime.runfinq", "runtime.sysmon":
2015					// Runtime system goroutines or threads
2016					// (such as those identified by
2017					// runtime.isSystemGoroutine). These
2018					// should never be labeled.
2019					mustNotBeLabeled = l.Function.Name
2020				case "gogo", "gosave_systemstack_switch", "racecall":
2021					// These are context switch/race
2022					// critical that we can't do a full
2023					// traceback from. Typically this would
2024					// be covered by the runtime check
2025					// below, but these symbols don't have
2026					// the package name.
2027					mayBeLabeled = true
2028				}
2029
2030				if strings.HasPrefix(l.Function.Name, "runtime.") {
2031					// There are many places in the runtime
2032					// where we can't do a full traceback.
2033					// Ideally we'd list them all, but
2034					// barring that allow anything in the
2035					// runtime, unless explicitly excluded
2036					// above.
2037					mayBeLabeled = true
2038				}
2039			}
2040		}
2041		errorStack := func(f string, args ...any) {
2042			var buf strings.Builder
2043			fprintStack(&buf, s.Location)
2044			t.Errorf("%s: %s", fmt.Sprintf(f, args...), buf.String())
2045		}
2046		if mustBeLabeled != "" && mustNotBeLabeled != "" {
2047			errorStack("sample contains both %s, which must be labeled, and %s, which must not be labeled", mustBeLabeled, mustNotBeLabeled)
2048			continue
2049		}
2050		if mustBeLabeled != "" || mustNotBeLabeled != "" {
2051			// We found a definitive frame, so mayBeLabeled hints are not relevant.
2052			mayBeLabeled = false
2053		}
2054		if mayBeLabeled {
2055			// This sample may or may not be labeled, so there's nothing we can check.
2056			continue
2057		}
2058		if mustBeLabeled != "" && !isLabeled {
2059			errorStack("sample must be labeled because of %s, but is not", mustBeLabeled)
2060		}
2061		if mustNotBeLabeled != "" && isLabeled {
2062			errorStack("sample must not be labeled because of %s, but is", mustNotBeLabeled)
2063		}
2064	}
2065}
2066
2067// labelHog is designed to burn CPU time in a way that a high number of CPU
2068// samples end up running on systemstack.
2069func labelHog(stop chan struct{}, gogc int) {
2070	// Regression test for issue 50032. We must give GC an opportunity to
2071	// be initially triggered by a labelled goroutine.
2072	runtime.GC()
2073
2074	for i := 0; ; i++ {
2075		select {
2076		case <-stop:
2077			return
2078		default:
2079			debug.SetGCPercent(gogc)
2080		}
2081	}
2082}
2083
2084// parallelLabelHog runs GOMAXPROCS goroutines running labelHog.
2085func parallelLabelHog(ctx context.Context, dur time.Duration, gogc int) {
2086	var wg sync.WaitGroup
2087	stop := make(chan struct{})
2088	for i := 0; i < runtime.GOMAXPROCS(0); i++ {
2089		wg.Add(1)
2090		go func() {
2091			defer wg.Done()
2092			labelHog(stop, gogc)
2093		}()
2094	}
2095
2096	time.Sleep(dur)
2097	close(stop)
2098	wg.Wait()
2099}
2100
2101// Check that there is no deadlock when the program receives SIGPROF while in
2102// 64bit atomics' critical section. Used to happen on mips{,le}. See #20146.
2103func TestAtomicLoadStore64(t *testing.T) {
2104	f, err := os.CreateTemp("", "profatomic")
2105	if err != nil {
2106		t.Fatalf("TempFile: %v", err)
2107	}
2108	defer os.Remove(f.Name())
2109	defer f.Close()
2110
2111	if err := StartCPUProfile(f); err != nil {
2112		t.Fatal(err)
2113	}
2114	defer StopCPUProfile()
2115
2116	var flag uint64
2117	done := make(chan bool, 1)
2118
2119	go func() {
2120		for atomic.LoadUint64(&flag) == 0 {
2121			runtime.Gosched()
2122		}
2123		done <- true
2124	}()
2125	time.Sleep(50 * time.Millisecond)
2126	atomic.StoreUint64(&flag, 1)
2127	<-done
2128}
2129
2130func TestTracebackAll(t *testing.T) {
2131	// With gccgo, if a profiling signal arrives at the wrong time
2132	// during traceback, it may crash or hang. See issue #29448.
2133	f, err := os.CreateTemp("", "proftraceback")
2134	if err != nil {
2135		t.Fatalf("TempFile: %v", err)
2136	}
2137	defer os.Remove(f.Name())
2138	defer f.Close()
2139
2140	if err := StartCPUProfile(f); err != nil {
2141		t.Fatal(err)
2142	}
2143	defer StopCPUProfile()
2144
2145	ch := make(chan int)
2146	defer close(ch)
2147
2148	count := 10
2149	for i := 0; i < count; i++ {
2150		go func() {
2151			<-ch // block
2152		}()
2153	}
2154
2155	N := 10000
2156	if testing.Short() {
2157		N = 500
2158	}
2159	buf := make([]byte, 10*1024)
2160	for i := 0; i < N; i++ {
2161		runtime.Stack(buf, true)
2162	}
2163}
2164
2165// TestTryAdd tests the cases that are hard to test with real program execution.
2166//
2167// For example, the current go compilers may not always inline functions
2168// involved in recursion but that may not be true in the future compilers. This
2169// tests such cases by using fake call sequences and forcing the profile build
2170// utilizing translateCPUProfile defined in proto_test.go
2171func TestTryAdd(t *testing.T) {
2172	if _, found := findInlinedCall(inlinedCallerDump, 4<<10); !found {
2173		t.Skip("Can't determine whether anything was inlined into inlinedCallerDump.")
2174	}
2175
2176	// inlinedCallerDump
2177	//   inlinedCalleeDump
2178	pcs := make([]uintptr, 2)
2179	inlinedCallerDump(pcs)
2180	inlinedCallerStack := make([]uint64, 2)
2181	for i := range pcs {
2182		inlinedCallerStack[i] = uint64(pcs[i])
2183	}
2184	wrapperPCs := make([]uintptr, 1)
2185	inlinedWrapperCallerDump(wrapperPCs)
2186
2187	if _, found := findInlinedCall(recursionChainBottom, 4<<10); !found {
2188		t.Skip("Can't determine whether anything was inlined into recursionChainBottom.")
2189	}
2190
2191	// recursionChainTop
2192	//   recursionChainMiddle
2193	//     recursionChainBottom
2194	//       recursionChainTop
2195	//         recursionChainMiddle
2196	//           recursionChainBottom
2197	pcs = make([]uintptr, 6)
2198	recursionChainTop(1, pcs)
2199	recursionStack := make([]uint64, len(pcs))
2200	for i := range pcs {
2201		recursionStack[i] = uint64(pcs[i])
2202	}
2203
2204	period := int64(2000 * 1000) // 1/500*1e9 nanosec.
2205
2206	testCases := []struct {
2207		name        string
2208		input       []uint64          // following the input format assumed by profileBuilder.addCPUData.
2209		count       int               // number of records in input.
2210		wantLocs    [][]string        // ordered location entries with function names.
2211		wantSamples []*profile.Sample // ordered samples, we care only about Value and the profile location IDs.
2212	}{{
2213		// Sanity test for a normal, complete stack trace.
2214		name: "full_stack_trace",
2215		input: []uint64{
2216			3, 0, 500, // hz = 500. Must match the period.
2217			5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1],
2218		},
2219		count: 2,
2220		wantLocs: [][]string{
2221			{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"},
2222		},
2223		wantSamples: []*profile.Sample{
2224			{Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
2225		},
2226	}, {
2227		name: "bug35538",
2228		input: []uint64{
2229			3, 0, 500, // hz = 500. Must match the period.
2230			// Fake frame: tryAdd will have inlinedCallerDump
2231			// (stack[1]) on the deck when it encounters the next
2232			// inline function. It should accept this.
2233			7, 0, 10, inlinedCallerStack[0], inlinedCallerStack[1], inlinedCallerStack[0], inlinedCallerStack[1],
2234			5, 0, 20, inlinedCallerStack[0], inlinedCallerStack[1],
2235		},
2236		count:    3,
2237		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
2238		wantSamples: []*profile.Sample{
2239			{Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
2240			{Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}},
2241		},
2242	}, {
2243		name: "bug38096",
2244		input: []uint64{
2245			3, 0, 500, // hz = 500. Must match the period.
2246			// count (data[2]) == 0 && len(stk) == 1 is an overflow
2247			// entry. The "stk" entry is actually the count.
2248			4, 0, 0, 4242,
2249		},
2250		count:    2,
2251		wantLocs: [][]string{{"runtime/pprof.lostProfileEvent"}},
2252		wantSamples: []*profile.Sample{
2253			{Value: []int64{4242, 4242 * period}, Location: []*profile.Location{{ID: 1}}},
2254		},
2255	}, {
2256		// If a function is directly called recursively then it must
2257		// not be inlined in the caller.
2258		//
2259		// N.B. We're generating an impossible profile here, with a
2260		// recursive inlineCalleeDump call. This is simulating a non-Go
2261		// function that looks like an inlined Go function other than
2262		// its recursive property. See pcDeck.tryAdd.
2263		name: "directly_recursive_func_is_not_inlined",
2264		input: []uint64{
2265			3, 0, 500, // hz = 500. Must match the period.
2266			5, 0, 30, inlinedCallerStack[0], inlinedCallerStack[0],
2267			4, 0, 40, inlinedCallerStack[0],
2268		},
2269		count: 3,
2270		// inlinedCallerDump shows up here because
2271		// runtime_expandFinalInlineFrame adds it to the stack frame.
2272		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump"}, {"runtime/pprof.inlinedCallerDump"}},
2273		wantSamples: []*profile.Sample{
2274			{Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}, {ID: 2}}},
2275			{Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
2276		},
2277	}, {
2278		name: "recursion_chain_inline",
2279		input: []uint64{
2280			3, 0, 500, // hz = 500. Must match the period.
2281			9, 0, 10, recursionStack[0], recursionStack[1], recursionStack[2], recursionStack[3], recursionStack[4], recursionStack[5],
2282		},
2283		count: 2,
2284		wantLocs: [][]string{
2285			{"runtime/pprof.recursionChainBottom"},
2286			{
2287				"runtime/pprof.recursionChainMiddle",
2288				"runtime/pprof.recursionChainTop",
2289				"runtime/pprof.recursionChainBottom",
2290			},
2291			{
2292				"runtime/pprof.recursionChainMiddle",
2293				"runtime/pprof.recursionChainTop",
2294				"runtime/pprof.TestTryAdd", // inlined into the test.
2295			},
2296		},
2297		wantSamples: []*profile.Sample{
2298			{Value: []int64{10, 10 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}, {ID: 3}}},
2299		},
2300	}, {
2301		name: "truncated_stack_trace_later",
2302		input: []uint64{
2303			3, 0, 500, // hz = 500. Must match the period.
2304			5, 0, 50, inlinedCallerStack[0], inlinedCallerStack[1],
2305			4, 0, 60, inlinedCallerStack[0],
2306		},
2307		count:    3,
2308		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
2309		wantSamples: []*profile.Sample{
2310			{Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
2311			{Value: []int64{60, 60 * period}, Location: []*profile.Location{{ID: 1}}},
2312		},
2313	}, {
2314		name: "truncated_stack_trace_first",
2315		input: []uint64{
2316			3, 0, 500, // hz = 500. Must match the period.
2317			4, 0, 70, inlinedCallerStack[0],
2318			5, 0, 80, inlinedCallerStack[0], inlinedCallerStack[1],
2319		},
2320		count:    3,
2321		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
2322		wantSamples: []*profile.Sample{
2323			{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
2324			{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}}},
2325		},
2326	}, {
2327		// We can recover the inlined caller from a truncated stack.
2328		name: "truncated_stack_trace_only",
2329		input: []uint64{
2330			3, 0, 500, // hz = 500. Must match the period.
2331			4, 0, 70, inlinedCallerStack[0],
2332		},
2333		count:    2,
2334		wantLocs: [][]string{{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"}},
2335		wantSamples: []*profile.Sample{
2336			{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
2337		},
2338	}, {
2339		// The same location is used for duplicated stacks.
2340		name: "truncated_stack_trace_twice",
2341		input: []uint64{
2342			3, 0, 500, // hz = 500. Must match the period.
2343			4, 0, 70, inlinedCallerStack[0],
2344			// Fake frame: add a fake call to
2345			// inlinedCallerDump to prevent this sample
2346			// from getting merged into above.
2347			5, 0, 80, inlinedCallerStack[1], inlinedCallerStack[0],
2348		},
2349		count: 3,
2350		wantLocs: [][]string{
2351			{"runtime/pprof.inlinedCalleeDump", "runtime/pprof.inlinedCallerDump"},
2352			{"runtime/pprof.inlinedCallerDump"},
2353		},
2354		wantSamples: []*profile.Sample{
2355			{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
2356			{Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 2}, {ID: 1}}},
2357		},
2358	}, {
2359		name: "expand_wrapper_function",
2360		input: []uint64{
2361			3, 0, 500, // hz = 500. Must match the period.
2362			4, 0, 50, uint64(wrapperPCs[0]),
2363		},
2364		count:    2,
2365		wantLocs: [][]string{{"runtime/pprof.inlineWrapper.dump"}},
2366		wantSamples: []*profile.Sample{
2367			{Value: []int64{50, 50 * period}, Location: []*profile.Location{{ID: 1}}},
2368		},
2369	}}
2370
2371	for _, tc := range testCases {
2372		t.Run(tc.name, func(t *testing.T) {
2373			p, err := translateCPUProfile(tc.input, tc.count)
2374			if err != nil {
2375				t.Fatalf("translating profile: %v", err)
2376			}
2377			t.Logf("Profile: %v\n", p)
2378
2379			// One location entry with all inlined functions.
2380			var gotLoc [][]string
2381			for _, loc := range p.Location {
2382				var names []string
2383				for _, line := range loc.Line {
2384					names = append(names, line.Function.Name)
2385				}
2386				gotLoc = append(gotLoc, names)
2387			}
2388			if got, want := fmtJSON(gotLoc), fmtJSON(tc.wantLocs); got != want {
2389				t.Errorf("Got Location = %+v\n\twant %+v", got, want)
2390			}
2391			// All samples should point to one location.
2392			var gotSamples []*profile.Sample
2393			for _, sample := range p.Sample {
2394				var locs []*profile.Location
2395				for _, loc := range sample.Location {
2396					locs = append(locs, &profile.Location{ID: loc.ID})
2397				}
2398				gotSamples = append(gotSamples, &profile.Sample{Value: sample.Value, Location: locs})
2399			}
2400			if got, want := fmtJSON(gotSamples), fmtJSON(tc.wantSamples); got != want {
2401				t.Errorf("Got Samples = %+v\n\twant %+v", got, want)
2402			}
2403		})
2404	}
2405}
2406
2407func TestTimeVDSO(t *testing.T) {
2408	// Test that time functions have the right stack trace. In particular,
2409	// it shouldn't be recursive.
2410
2411	if runtime.GOOS == "android" {
2412		// Flaky on Android, issue 48655. VDSO may not be enabled.
2413		testenv.SkipFlaky(t, 48655)
2414	}
2415
2416	matches := matchAndAvoidStacks(stackContains, []string{"time.now"}, avoidFunctions())
2417	p := testCPUProfile(t, matches, func(dur time.Duration) {
2418		t0 := time.Now()
2419		for {
2420			t := time.Now()
2421			if t.Sub(t0) >= dur {
2422				return
2423			}
2424		}
2425	})
2426
2427	// Check for recursive time.now sample.
2428	for _, sample := range p.Sample {
2429		var seenNow bool
2430		for _, loc := range sample.Location {
2431			for _, line := range loc.Line {
2432				if line.Function.Name == "time.now" {
2433					if seenNow {
2434						t.Fatalf("unexpected recursive time.now")
2435					}
2436					seenNow = true
2437				}
2438			}
2439		}
2440	}
2441}
2442
2443func TestProfilerStackDepth(t *testing.T) {
2444	// Disable sampling, otherwise it's difficult to assert anything.
2445	oldMemRate := runtime.MemProfileRate
2446	runtime.MemProfileRate = 1
2447	runtime.SetBlockProfileRate(1)
2448	oldMutexRate := runtime.SetMutexProfileFraction(1)
2449	t.Cleanup(func() {
2450		runtime.MemProfileRate = oldMemRate
2451		runtime.SetBlockProfileRate(0)
2452		runtime.SetMutexProfileFraction(oldMutexRate)
2453	})
2454
2455	const depth = 128
2456	go produceProfileEvents(t, depth)
2457	awaitBlockedGoroutine(t, "chan receive", "goroutineDeep", 1)
2458
2459	tests := []struct {
2460		profiler string
2461		prefix   []string
2462	}{
2463		{"heap", []string{"runtime/pprof.allocDeep"}},
2464		{"block", []string{"runtime.chanrecv1", "runtime/pprof.blockChanDeep"}},
2465		{"mutex", []string{"sync.(*Mutex).Unlock", "runtime/pprof.blockMutexDeep"}},
2466		{"goroutine", []string{"runtime.gopark", "runtime.chanrecv", "runtime.chanrecv1", "runtime/pprof.goroutineDeep"}},
2467	}
2468
2469	for _, test := range tests {
2470		t.Run(test.profiler, func(t *testing.T) {
2471			var buf bytes.Buffer
2472			if err := Lookup(test.profiler).WriteTo(&buf, 0); err != nil {
2473				t.Fatalf("failed to write heap profile: %v", err)
2474			}
2475			p, err := profile.Parse(&buf)
2476			if err != nil {
2477				t.Fatalf("failed to parse heap profile: %v", err)
2478			}
2479			t.Logf("Profile = %v", p)
2480
2481			stks := stacks(p)
2482			var stk []string
2483			for _, s := range stks {
2484				if hasPrefix(s, test.prefix) {
2485					stk = s
2486					break
2487				}
2488			}
2489			if len(stk) != depth {
2490				t.Fatalf("want stack depth = %d, got %d", depth, len(stk))
2491			}
2492
2493			if rootFn, wantFn := stk[depth-1], "runtime/pprof.produceProfileEvents"; rootFn != wantFn {
2494				t.Fatalf("want stack stack root %s, got %v", wantFn, rootFn)
2495			}
2496		})
2497	}
2498}
2499
2500func hasPrefix(stk []string, prefix []string) bool {
2501	if len(prefix) > len(stk) {
2502		return false
2503	}
2504	for i := range prefix {
2505		if stk[i] != prefix[i] {
2506			return false
2507		}
2508	}
2509	return true
2510}
2511
2512// ensure that stack records are valid map keys (comparable)
2513var _ = map[runtime.MemProfileRecord]struct{}{}
2514var _ = map[runtime.StackRecord]struct{}{}
2515
2516// allocDeep calls itself n times before calling fn.
2517func allocDeep(n int) {
2518	if n > 1 {
2519		allocDeep(n - 1)
2520		return
2521	}
2522	memSink = make([]byte, 1<<20)
2523}
2524
2525// blockChanDeep produces a block profile event at stack depth n, including the
2526// caller.
2527func blockChanDeep(t *testing.T, n int) {
2528	if n > 1 {
2529		blockChanDeep(t, n-1)
2530		return
2531	}
2532	ch := make(chan struct{})
2533	go func() {
2534		awaitBlockedGoroutine(t, "chan receive", "blockChanDeep", 1)
2535		ch <- struct{}{}
2536	}()
2537	<-ch
2538}
2539
2540// blockMutexDeep produces a block profile event at stack depth n, including the
2541// caller.
2542func blockMutexDeep(t *testing.T, n int) {
2543	if n > 1 {
2544		blockMutexDeep(t, n-1)
2545		return
2546	}
2547	var mu sync.Mutex
2548	go func() {
2549		mu.Lock()
2550		mu.Lock()
2551	}()
2552	awaitBlockedGoroutine(t, "sync.Mutex.Lock", "blockMutexDeep", 1)
2553	mu.Unlock()
2554}
2555
2556// goroutineDeep blocks at stack depth n, including the caller until the test is
2557// finished.
2558func goroutineDeep(t *testing.T, n int) {
2559	if n > 1 {
2560		goroutineDeep(t, n-1)
2561		return
2562	}
2563	wait := make(chan struct{}, 1)
2564	t.Cleanup(func() {
2565		wait <- struct{}{}
2566	})
2567	<-wait
2568}
2569
2570// produceProfileEvents produces pprof events at the given stack depth and then
2571// blocks in goroutineDeep until the test completes. The stack traces are
2572// guaranteed to have exactly the desired depth with produceProfileEvents as
2573// their root frame which is expected by TestProfilerStackDepth.
2574func produceProfileEvents(t *testing.T, depth int) {
2575	allocDeep(depth - 1)       // -1 for produceProfileEvents, **
2576	blockChanDeep(t, depth-2)  // -2 for produceProfileEvents, **, chanrecv1
2577	blockMutexDeep(t, depth-2) // -2 for produceProfileEvents, **, Unlock
2578	memSink = nil
2579	runtime.GC()
2580	goroutineDeep(t, depth-4) // -4 for produceProfileEvents, **, chanrecv1, chanrev, gopark
2581}
2582
2583func getProfileStacks(collect func([]runtime.BlockProfileRecord) (int, bool), fileLine bool) []string {
2584	var n int
2585	var ok bool
2586	var p []runtime.BlockProfileRecord
2587	for {
2588		p = make([]runtime.BlockProfileRecord, n)
2589		n, ok = collect(p)
2590		if ok {
2591			p = p[:n]
2592			break
2593		}
2594	}
2595	var stacks []string
2596	for _, r := range p {
2597		var stack strings.Builder
2598		for i, pc := range r.Stack() {
2599			if i > 0 {
2600				stack.WriteByte('\n')
2601			}
2602			// Use FuncForPC instead of CallersFrames,
2603			// because we want to see the info for exactly
2604			// the PCs returned by the mutex profile to
2605			// ensure inlined calls have already been properly
2606			// expanded.
2607			f := runtime.FuncForPC(pc - 1)
2608			stack.WriteString(f.Name())
2609			if fileLine {
2610				stack.WriteByte(' ')
2611				file, line := f.FileLine(pc - 1)
2612				stack.WriteString(file)
2613				stack.WriteByte(':')
2614				stack.WriteString(strconv.Itoa(line))
2615			}
2616		}
2617		stacks = append(stacks, stack.String())
2618	}
2619	return stacks
2620}
2621
2622func TestMutexBlockFullAggregation(t *testing.T) {
2623	// This regression test is adapted from
2624	// https://github.com/grafana/pyroscope-go/issues/103,
2625	// authored by Tolya Korniltsev
2626
2627	var m sync.Mutex
2628
2629	prev := runtime.SetMutexProfileFraction(-1)
2630	defer runtime.SetMutexProfileFraction(prev)
2631
2632	const fraction = 1
2633	const iters = 100
2634	const workers = 2
2635
2636	runtime.SetMutexProfileFraction(fraction)
2637	runtime.SetBlockProfileRate(1)
2638	defer runtime.SetBlockProfileRate(0)
2639
2640	wg := sync.WaitGroup{}
2641	wg.Add(workers)
2642	for j := 0; j < workers; j++ {
2643		go func() {
2644			for i := 0; i < iters; i++ {
2645				m.Lock()
2646				// Wait at least 1 millisecond to pass the
2647				// starvation threshold for the mutex
2648				time.Sleep(time.Millisecond)
2649				m.Unlock()
2650			}
2651			wg.Done()
2652		}()
2653	}
2654	wg.Wait()
2655
2656	assertNoDuplicates := func(name string, collect func([]runtime.BlockProfileRecord) (int, bool)) {
2657		stacks := getProfileStacks(collect, true)
2658		seen := make(map[string]struct{})
2659		for _, s := range stacks {
2660			if _, ok := seen[s]; ok {
2661				t.Errorf("saw duplicate entry in %s profile with stack:\n%s", name, s)
2662			}
2663			seen[s] = struct{}{}
2664		}
2665		if len(seen) == 0 {
2666			t.Errorf("did not see any samples in %s profile for this test", name)
2667		}
2668	}
2669	t.Run("mutex", func(t *testing.T) {
2670		assertNoDuplicates("mutex", runtime.MutexProfile)
2671	})
2672	t.Run("block", func(t *testing.T) {
2673		assertNoDuplicates("block", runtime.BlockProfile)
2674	})
2675}
2676
2677func inlineA(mu *sync.Mutex, wg *sync.WaitGroup) { inlineB(mu, wg) }
2678func inlineB(mu *sync.Mutex, wg *sync.WaitGroup) { inlineC(mu, wg) }
2679func inlineC(mu *sync.Mutex, wg *sync.WaitGroup) {
2680	defer wg.Done()
2681	mu.Lock()
2682	mu.Unlock()
2683}
2684
2685func inlineD(mu *sync.Mutex, wg *sync.WaitGroup) { inlineE(mu, wg) }
2686func inlineE(mu *sync.Mutex, wg *sync.WaitGroup) { inlineF(mu, wg) }
2687func inlineF(mu *sync.Mutex, wg *sync.WaitGroup) {
2688	defer wg.Done()
2689	mu.Unlock()
2690}
2691
2692func TestBlockMutexProfileInlineExpansion(t *testing.T) {
2693	runtime.SetBlockProfileRate(1)
2694	defer runtime.SetBlockProfileRate(0)
2695	prev := runtime.SetMutexProfileFraction(1)
2696	defer runtime.SetMutexProfileFraction(prev)
2697
2698	var mu sync.Mutex
2699	var wg sync.WaitGroup
2700	wg.Add(2)
2701	mu.Lock()
2702	go inlineA(&mu, &wg)
2703	awaitBlockedGoroutine(t, "sync.Mutex.Lock", "inlineC", 1)
2704	// inlineD will unblock inlineA
2705	go inlineD(&mu, &wg)
2706	wg.Wait()
2707
2708	tcs := []struct {
2709		Name     string
2710		Collect  func([]runtime.BlockProfileRecord) (int, bool)
2711		SubStack string
2712	}{
2713		{
2714			Name:    "mutex",
2715			Collect: runtime.MutexProfile,
2716			SubStack: `sync.(*Mutex).Unlock
2717runtime/pprof.inlineF
2718runtime/pprof.inlineE
2719runtime/pprof.inlineD`,
2720		},
2721		{
2722			Name:    "block",
2723			Collect: runtime.BlockProfile,
2724			SubStack: `sync.(*Mutex).Lock
2725runtime/pprof.inlineC
2726runtime/pprof.inlineB
2727runtime/pprof.inlineA`,
2728		},
2729	}
2730
2731	for _, tc := range tcs {
2732		t.Run(tc.Name, func(t *testing.T) {
2733			stacks := getProfileStacks(tc.Collect, false)
2734			for _, s := range stacks {
2735				if strings.Contains(s, tc.SubStack) {
2736					return
2737				}
2738			}
2739			t.Error("did not see expected stack")
2740			t.Logf("wanted:\n%s", tc.SubStack)
2741			t.Logf("got: %s", stacks)
2742		})
2743	}
2744}
2745