1// Copyright 2020 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime_test
6
7import (
8	"bytes"
9	"fmt"
10	"internal/abi"
11	"internal/goexperiment"
12	"internal/profile"
13	"internal/testenv"
14	"os"
15	"reflect"
16	"runtime"
17	"runtime/debug"
18	"runtime/metrics"
19	"runtime/pprof"
20	"runtime/trace"
21	"slices"
22	"sort"
23	"strings"
24	"sync"
25	"sync/atomic"
26	"testing"
27	"time"
28	"unsafe"
29)
30
31func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) {
32	all := metrics.All()
33	samples := make([]metrics.Sample, len(all))
34	descs := make(map[string]metrics.Description)
35	for i := range all {
36		samples[i].Name = all[i].Name
37		descs[all[i].Name] = all[i]
38	}
39	return descs, samples
40}
41
42func TestReadMetrics(t *testing.T) {
43	// Run a GC cycle to get some of the stats to be non-zero.
44	runtime.GC()
45
46	// Set an arbitrary memory limit to check the metric for it
47	limit := int64(512 * 1024 * 1024)
48	oldLimit := debug.SetMemoryLimit(limit)
49	defer debug.SetMemoryLimit(oldLimit)
50
51	// Set a GC percent to check the metric for it
52	gcPercent := 99
53	oldGCPercent := debug.SetGCPercent(gcPercent)
54	defer debug.SetGCPercent(oldGCPercent)
55
56	// Tests whether readMetrics produces values aligning
57	// with ReadMemStats while the world is stopped.
58	var mstats runtime.MemStats
59	_, samples := prepareAllMetricsSamples()
60	runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples))
61
62	checkUint64 := func(t *testing.T, m string, got, want uint64) {
63		t.Helper()
64		if got != want {
65			t.Errorf("metric %q: got %d, want %d", m, got, want)
66		}
67	}
68
69	// Check to make sure the values we read line up with other values we read.
70	var allocsBySize, gcPauses, schedPausesTotalGC *metrics.Float64Histogram
71	var tinyAllocs uint64
72	var mallocs, frees uint64
73	for i := range samples {
74		switch name := samples[i].Name; name {
75		case "/cgo/go-to-c-calls:calls":
76			checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall()))
77		case "/memory/classes/heap/free:bytes":
78			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased)
79		case "/memory/classes/heap/released:bytes":
80			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased)
81		case "/memory/classes/heap/objects:bytes":
82			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc)
83		case "/memory/classes/heap/unused:bytes":
84			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc)
85		case "/memory/classes/heap/stacks:bytes":
86			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse)
87		case "/memory/classes/metadata/mcache/free:bytes":
88			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse)
89		case "/memory/classes/metadata/mcache/inuse:bytes":
90			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse)
91		case "/memory/classes/metadata/mspan/free:bytes":
92			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse)
93		case "/memory/classes/metadata/mspan/inuse:bytes":
94			checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse)
95		case "/memory/classes/metadata/other:bytes":
96			checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys)
97		case "/memory/classes/os-stacks:bytes":
98			checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse)
99		case "/memory/classes/other:bytes":
100			checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys)
101		case "/memory/classes/profiling/buckets:bytes":
102			checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys)
103		case "/memory/classes/total:bytes":
104			checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys)
105		case "/gc/heap/allocs-by-size:bytes":
106			hist := samples[i].Value.Float64Histogram()
107			// Skip size class 0 in BySize, because it's always empty and not represented
108			// in the histogram.
109			for i, sc := range mstats.BySize[1:] {
110				if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
111					t.Errorf("bucket does not match size class: got %f, want %f", b, s)
112					// The rest of the checks aren't expected to work anyway.
113					continue
114				}
115				if c, m := hist.Counts[i], sc.Mallocs; c != m {
116					t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m)
117				}
118			}
119			allocsBySize = hist
120		case "/gc/heap/allocs:bytes":
121			checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc)
122		case "/gc/heap/frees-by-size:bytes":
123			hist := samples[i].Value.Float64Histogram()
124			// Skip size class 0 in BySize, because it's always empty and not represented
125			// in the histogram.
126			for i, sc := range mstats.BySize[1:] {
127				if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
128					t.Errorf("bucket does not match size class: got %f, want %f", b, s)
129					// The rest of the checks aren't expected to work anyway.
130					continue
131				}
132				if c, f := hist.Counts[i], sc.Frees; c != f {
133					t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f)
134				}
135			}
136		case "/gc/heap/frees:bytes":
137			checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc)
138		case "/gc/heap/tiny/allocs:objects":
139			// Currently, MemStats adds tiny alloc count to both Mallocs AND Frees.
140			// The reason for this is because MemStats couldn't be extended at the time
141			// but there was a desire to have Mallocs at least be a little more representative,
142			// while having Mallocs - Frees still represent a live object count.
143			// Unfortunately, MemStats doesn't actually export a large allocation count,
144			// so it's impossible to pull this number out directly.
145			//
146			// Check tiny allocation count outside of this loop, by using the allocs-by-size
147			// histogram in order to figure out how many large objects there are.
148			tinyAllocs = samples[i].Value.Uint64()
149			// Because the next two metrics tests are checking against Mallocs and Frees,
150			// we can't check them directly for the same reason: we need to account for tiny
151			// allocations included in Mallocs and Frees.
152		case "/gc/heap/allocs:objects":
153			mallocs = samples[i].Value.Uint64()
154		case "/gc/heap/frees:objects":
155			frees = samples[i].Value.Uint64()
156		case "/gc/heap/live:bytes":
157			// Check for "obviously wrong" values. We can't check a stronger invariant,
158			// such as live <= HeapAlloc, because live is not 100% accurate. It's computed
159			// under racy conditions, and some objects may be double-counted (this is
160			// intentional and necessary for GC performance).
161			//
162			// Instead, check against a much more reasonable upper-bound: the amount of
163			// mapped heap memory. We can't possibly overcount to the point of exceeding
164			// total mapped heap memory, except if there's an accounting bug.
165			if live := samples[i].Value.Uint64(); live > mstats.HeapSys {
166				t.Errorf("live bytes: %d > heap sys: %d", live, mstats.HeapSys)
167			} else if live == 0 {
168				// Might happen if we don't call runtime.GC() above.
169				t.Error("live bytes is 0")
170			}
171		case "/gc/gomemlimit:bytes":
172			checkUint64(t, name, samples[i].Value.Uint64(), uint64(limit))
173		case "/gc/heap/objects:objects":
174			checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects)
175		case "/gc/heap/goal:bytes":
176			checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC)
177		case "/gc/gogc:percent":
178			checkUint64(t, name, samples[i].Value.Uint64(), uint64(gcPercent))
179		case "/gc/cycles/automatic:gc-cycles":
180			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC))
181		case "/gc/cycles/forced:gc-cycles":
182			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC))
183		case "/gc/cycles/total:gc-cycles":
184			checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC))
185		case "/gc/pauses:seconds":
186			gcPauses = samples[i].Value.Float64Histogram()
187		case "/sched/pauses/total/gc:seconds":
188			schedPausesTotalGC = samples[i].Value.Float64Histogram()
189		}
190	}
191
192	// Check tinyAllocs.
193	nonTinyAllocs := uint64(0)
194	for _, c := range allocsBySize.Counts {
195		nonTinyAllocs += c
196	}
197	checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs)
198
199	// Check allocation and free counts.
200	checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs)
201	checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs)
202
203	// Verify that /gc/pauses:seconds is a copy of /sched/pauses/total/gc:seconds
204	if !reflect.DeepEqual(gcPauses.Buckets, schedPausesTotalGC.Buckets) {
205		t.Errorf("/gc/pauses:seconds buckets %v do not match /sched/pauses/total/gc:seconds buckets %v", gcPauses.Buckets, schedPausesTotalGC.Counts)
206	}
207	if !reflect.DeepEqual(gcPauses.Counts, schedPausesTotalGC.Counts) {
208		t.Errorf("/gc/pauses:seconds counts %v do not match /sched/pauses/total/gc:seconds counts %v", gcPauses.Counts, schedPausesTotalGC.Counts)
209	}
210}
211
212func TestReadMetricsConsistency(t *testing.T) {
213	// Tests whether readMetrics produces consistent, sensible values.
214	// The values are read concurrently with the runtime doing other
215	// things (e.g. allocating) so what we read can't reasonably compared
216	// to other runtime values (e.g. MemStats).
217
218	// Run a few GC cycles to get some of the stats to be non-zero.
219	runtime.GC()
220	runtime.GC()
221	runtime.GC()
222
223	// Set GOMAXPROCS high then sleep briefly to ensure we generate
224	// some idle time.
225	oldmaxprocs := runtime.GOMAXPROCS(10)
226	time.Sleep(time.Millisecond)
227	runtime.GOMAXPROCS(oldmaxprocs)
228
229	// Read all the supported metrics through the metrics package.
230	descs, samples := prepareAllMetricsSamples()
231	metrics.Read(samples)
232
233	// Check to make sure the values we read make sense.
234	var totalVirtual struct {
235		got, want uint64
236	}
237	var objects struct {
238		alloc, free             *metrics.Float64Histogram
239		allocs, frees           uint64
240		allocdBytes, freedBytes uint64
241		total, totalBytes       uint64
242	}
243	var gc struct {
244		numGC  uint64
245		pauses uint64
246	}
247	var totalScan struct {
248		got, want uint64
249	}
250	var cpu struct {
251		gcAssist    float64
252		gcDedicated float64
253		gcIdle      float64
254		gcPause     float64
255		gcTotal     float64
256
257		idle float64
258		user float64
259
260		scavengeAssist float64
261		scavengeBg     float64
262		scavengeTotal  float64
263
264		total float64
265	}
266	for i := range samples {
267		kind := samples[i].Value.Kind()
268		if want := descs[samples[i].Name].Kind; kind != want {
269			t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want)
270			continue
271		}
272		if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") {
273			v := samples[i].Value.Uint64()
274			totalVirtual.want += v
275
276			// None of these stats should ever get this big.
277			// If they do, there's probably overflow involved,
278			// usually due to bad accounting.
279			if int64(v) < 0 {
280				t.Errorf("%q has high/negative value: %d", samples[i].Name, v)
281			}
282		}
283		switch samples[i].Name {
284		case "/cpu/classes/gc/mark/assist:cpu-seconds":
285			cpu.gcAssist = samples[i].Value.Float64()
286		case "/cpu/classes/gc/mark/dedicated:cpu-seconds":
287			cpu.gcDedicated = samples[i].Value.Float64()
288		case "/cpu/classes/gc/mark/idle:cpu-seconds":
289			cpu.gcIdle = samples[i].Value.Float64()
290		case "/cpu/classes/gc/pause:cpu-seconds":
291			cpu.gcPause = samples[i].Value.Float64()
292		case "/cpu/classes/gc/total:cpu-seconds":
293			cpu.gcTotal = samples[i].Value.Float64()
294		case "/cpu/classes/idle:cpu-seconds":
295			cpu.idle = samples[i].Value.Float64()
296		case "/cpu/classes/scavenge/assist:cpu-seconds":
297			cpu.scavengeAssist = samples[i].Value.Float64()
298		case "/cpu/classes/scavenge/background:cpu-seconds":
299			cpu.scavengeBg = samples[i].Value.Float64()
300		case "/cpu/classes/scavenge/total:cpu-seconds":
301			cpu.scavengeTotal = samples[i].Value.Float64()
302		case "/cpu/classes/total:cpu-seconds":
303			cpu.total = samples[i].Value.Float64()
304		case "/cpu/classes/user:cpu-seconds":
305			cpu.user = samples[i].Value.Float64()
306		case "/memory/classes/total:bytes":
307			totalVirtual.got = samples[i].Value.Uint64()
308		case "/memory/classes/heap/objects:bytes":
309			objects.totalBytes = samples[i].Value.Uint64()
310		case "/gc/heap/objects:objects":
311			objects.total = samples[i].Value.Uint64()
312		case "/gc/heap/allocs:bytes":
313			objects.allocdBytes = samples[i].Value.Uint64()
314		case "/gc/heap/allocs:objects":
315			objects.allocs = samples[i].Value.Uint64()
316		case "/gc/heap/allocs-by-size:bytes":
317			objects.alloc = samples[i].Value.Float64Histogram()
318		case "/gc/heap/frees:bytes":
319			objects.freedBytes = samples[i].Value.Uint64()
320		case "/gc/heap/frees:objects":
321			objects.frees = samples[i].Value.Uint64()
322		case "/gc/heap/frees-by-size:bytes":
323			objects.free = samples[i].Value.Float64Histogram()
324		case "/gc/cycles:gc-cycles":
325			gc.numGC = samples[i].Value.Uint64()
326		case "/gc/pauses:seconds":
327			h := samples[i].Value.Float64Histogram()
328			gc.pauses = 0
329			for i := range h.Counts {
330				gc.pauses += h.Counts[i]
331			}
332		case "/gc/scan/heap:bytes":
333			totalScan.want += samples[i].Value.Uint64()
334		case "/gc/scan/globals:bytes":
335			totalScan.want += samples[i].Value.Uint64()
336		case "/gc/scan/stack:bytes":
337			totalScan.want += samples[i].Value.Uint64()
338		case "/gc/scan/total:bytes":
339			totalScan.got = samples[i].Value.Uint64()
340		case "/sched/gomaxprocs:threads":
341			if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want {
342				t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want)
343			}
344		case "/sched/goroutines:goroutines":
345			if samples[i].Value.Uint64() < 1 {
346				t.Error("number of goroutines is less than one")
347			}
348		}
349	}
350	// Only check this on Linux where we can be reasonably sure we have a high-resolution timer.
351	if runtime.GOOS == "linux" {
352		if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 {
353			t.Errorf("found no time spent on GC work: %#v", cpu)
354		}
355		if cpu.gcPause <= 0 {
356			t.Errorf("found no GC pauses: %f", cpu.gcPause)
357		}
358		if cpu.idle <= 0 {
359			t.Errorf("found no idle time: %f", cpu.idle)
360		}
361		if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.001) {
362			t.Errorf("calculated total GC CPU time not within %%0.1 of total: %f vs. %f", total, cpu.gcTotal)
363		}
364		if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.001) {
365			t.Errorf("calculated total scavenge CPU not within %%0.1 of total: %f vs. %f", total, cpu.scavengeTotal)
366		}
367		if cpu.total <= 0 {
368			t.Errorf("found no total CPU time passed")
369		}
370		if cpu.user <= 0 {
371			t.Errorf("found no user time passed")
372		}
373		if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.001) {
374			t.Errorf("calculated total CPU not within %%0.1 of total: %f vs. %f", total, cpu.total)
375		}
376	}
377	if totalVirtual.got != totalVirtual.want {
378		t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want)
379	}
380	if got, want := objects.allocs-objects.frees, objects.total; got != want {
381		t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
382	}
383	if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want {
384		t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
385	}
386	if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 {
387		t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
388	}
389	if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 {
390		t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
391	}
392	if len(objects.alloc.Buckets) != len(objects.free.Buckets) {
393		t.Error("allocs-by-size and frees-by-size buckets don't match in length")
394	} else if len(objects.alloc.Counts) != len(objects.free.Counts) {
395		t.Error("allocs-by-size and frees-by-size counts don't match in length")
396	} else {
397		for i := range objects.alloc.Buckets {
398			ba := objects.alloc.Buckets[i]
399			bf := objects.free.Buckets[i]
400			if ba != bf {
401				t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf)
402			}
403		}
404		if !t.Failed() {
405			var gotAlloc, gotFree uint64
406			want := objects.total
407			for i := range objects.alloc.Counts {
408				if objects.alloc.Counts[i] < objects.free.Counts[i] {
409					t.Errorf("found more allocs than frees in object dist bucket %d", i)
410					continue
411				}
412				gotAlloc += objects.alloc.Counts[i]
413				gotFree += objects.free.Counts[i]
414			}
415			if got := gotAlloc - gotFree; got != want {
416				t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want)
417			}
418			if gotAlloc != objects.allocs {
419				t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs)
420			}
421			if gotFree != objects.frees {
422				t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees)
423			}
424		}
425	}
426	// The current GC has at least 2 pauses per GC.
427	// Check to see if that value makes sense.
428	if gc.pauses < gc.numGC*2 {
429		t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2)
430	}
431	if totalScan.got <= 0 {
432		t.Errorf("scannable GC space is empty: %d", totalScan.got)
433	}
434	if totalScan.got != totalScan.want {
435		t.Errorf("/gc/scan/total:bytes doesn't line up with sum of /gc/scan*: total %d vs. sum %d", totalScan.got, totalScan.want)
436	}
437}
438
439func BenchmarkReadMetricsLatency(b *testing.B) {
440	stop := applyGCLoad(b)
441
442	// Spend this much time measuring latencies.
443	latencies := make([]time.Duration, 0, 1024)
444	_, samples := prepareAllMetricsSamples()
445
446	// Hit metrics.Read continuously and measure.
447	b.ResetTimer()
448	for i := 0; i < b.N; i++ {
449		start := time.Now()
450		metrics.Read(samples)
451		latencies = append(latencies, time.Since(start))
452	}
453	// Make sure to stop the timer before we wait! The load created above
454	// is very heavy-weight and not easy to stop, so we could end up
455	// confusing the benchmarking framework for small b.N.
456	b.StopTimer()
457	stop()
458
459	// Disable the default */op metrics.
460	// ns/op doesn't mean anything because it's an average, but we
461	// have a sleep in our b.N loop above which skews this significantly.
462	b.ReportMetric(0, "ns/op")
463	b.ReportMetric(0, "B/op")
464	b.ReportMetric(0, "allocs/op")
465
466	// Sort latencies then report percentiles.
467	sort.Slice(latencies, func(i, j int) bool {
468		return latencies[i] < latencies[j]
469	})
470	b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
471	b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
472	b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
473}
474
475var readMetricsSink [1024]interface{}
476
477func TestReadMetricsCumulative(t *testing.T) {
478	// Set up the set of metrics marked cumulative.
479	descs := metrics.All()
480	var samples [2][]metrics.Sample
481	samples[0] = make([]metrics.Sample, len(descs))
482	samples[1] = make([]metrics.Sample, len(descs))
483	total := 0
484	for i := range samples[0] {
485		if !descs[i].Cumulative {
486			continue
487		}
488		samples[0][total].Name = descs[i].Name
489		total++
490	}
491	samples[0] = samples[0][:total]
492	samples[1] = samples[1][:total]
493	copy(samples[1], samples[0])
494
495	// Start some noise in the background.
496	var wg sync.WaitGroup
497	wg.Add(1)
498	done := make(chan struct{})
499	go func() {
500		defer wg.Done()
501		for {
502			// Add more things here that could influence metrics.
503			for i := 0; i < len(readMetricsSink); i++ {
504				readMetricsSink[i] = make([]byte, 1024)
505				select {
506				case <-done:
507					return
508				default:
509				}
510			}
511			runtime.GC()
512		}
513	}()
514
515	sum := func(us []uint64) uint64 {
516		total := uint64(0)
517		for _, u := range us {
518			total += u
519		}
520		return total
521	}
522
523	// Populate the first generation.
524	metrics.Read(samples[0])
525
526	// Check to make sure that these metrics only grow monotonically.
527	for gen := 1; gen < 10; gen++ {
528		metrics.Read(samples[gen%2])
529		for i := range samples[gen%2] {
530			name := samples[gen%2][i].Name
531			vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value
532
533			switch vNew.Kind() {
534			case metrics.KindUint64:
535				new := vNew.Uint64()
536				old := vOld.Uint64()
537				if new < old {
538					t.Errorf("%s decreased: %d < %d", name, new, old)
539				}
540			case metrics.KindFloat64:
541				new := vNew.Float64()
542				old := vOld.Float64()
543				if new < old {
544					t.Errorf("%s decreased: %f < %f", name, new, old)
545				}
546			case metrics.KindFloat64Histogram:
547				new := sum(vNew.Float64Histogram().Counts)
548				old := sum(vOld.Float64Histogram().Counts)
549				if new < old {
550					t.Errorf("%s counts decreased: %d < %d", name, new, old)
551				}
552			}
553		}
554	}
555	close(done)
556
557	wg.Wait()
558}
559
560func withinEpsilon(v1, v2, e float64) bool {
561	return v2-v2*e <= v1 && v1 <= v2+v2*e
562}
563
564func TestMutexWaitTimeMetric(t *testing.T) {
565	var sample [1]metrics.Sample
566	sample[0].Name = "/sync/mutex/wait/total:seconds"
567
568	locks := []locker2{
569		new(mutex),
570		new(rwmutexWrite),
571		new(rwmutexReadWrite),
572		new(rwmutexWriteRead),
573	}
574	for _, lock := range locks {
575		t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) {
576			metrics.Read(sample[:])
577			before := time.Duration(sample[0].Value.Float64() * 1e9)
578
579			minMutexWaitTime := generateMutexWaitTime(lock)
580
581			metrics.Read(sample[:])
582			after := time.Duration(sample[0].Value.Float64() * 1e9)
583
584			if wt := after - before; wt < minMutexWaitTime {
585				t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime)
586			}
587		})
588	}
589}
590
591// locker2 represents an API surface of two concurrent goroutines
592// locking the same resource, but through different APIs. It's intended
593// to abstract over the relationship of two Lock calls or an RLock
594// and a Lock call.
595type locker2 interface {
596	Lock1()
597	Unlock1()
598	Lock2()
599	Unlock2()
600}
601
602type mutex struct {
603	mu sync.Mutex
604}
605
606func (m *mutex) Lock1()   { m.mu.Lock() }
607func (m *mutex) Unlock1() { m.mu.Unlock() }
608func (m *mutex) Lock2()   { m.mu.Lock() }
609func (m *mutex) Unlock2() { m.mu.Unlock() }
610
611type rwmutexWrite struct {
612	mu sync.RWMutex
613}
614
615func (m *rwmutexWrite) Lock1()   { m.mu.Lock() }
616func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() }
617func (m *rwmutexWrite) Lock2()   { m.mu.Lock() }
618func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() }
619
620type rwmutexReadWrite struct {
621	mu sync.RWMutex
622}
623
624func (m *rwmutexReadWrite) Lock1()   { m.mu.RLock() }
625func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() }
626func (m *rwmutexReadWrite) Lock2()   { m.mu.Lock() }
627func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() }
628
629type rwmutexWriteRead struct {
630	mu sync.RWMutex
631}
632
633func (m *rwmutexWriteRead) Lock1()   { m.mu.Lock() }
634func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() }
635func (m *rwmutexWriteRead) Lock2()   { m.mu.RLock() }
636func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() }
637
638// generateMutexWaitTime causes a couple of goroutines
639// to block a whole bunch of times on a sync.Mutex, returning
640// the minimum amount of time that should be visible in the
641// /sync/mutex-wait:seconds metric.
642func generateMutexWaitTime(mu locker2) time.Duration {
643	// Set up the runtime to always track casgstatus transitions for metrics.
644	*runtime.CasGStatusAlwaysTrack = true
645
646	mu.Lock1()
647
648	// Start up a goroutine to wait on the lock.
649	gc := make(chan *runtime.G)
650	done := make(chan bool)
651	go func() {
652		gc <- runtime.Getg()
653
654		for {
655			mu.Lock2()
656			mu.Unlock2()
657			if <-done {
658				return
659			}
660		}
661	}()
662	gp := <-gc
663
664	// Set the block time high enough so that it will always show up, even
665	// on systems with coarse timer granularity.
666	const blockTime = 100 * time.Millisecond
667
668	// Make sure the goroutine spawned above actually blocks on the lock.
669	for {
670		if runtime.GIsWaitingOnMutex(gp) {
671			break
672		}
673		runtime.Gosched()
674	}
675
676	// Let some amount of time pass.
677	time.Sleep(blockTime)
678
679	// Let the other goroutine acquire the lock.
680	mu.Unlock1()
681	done <- true
682
683	// Reset flag.
684	*runtime.CasGStatusAlwaysTrack = false
685	return blockTime
686}
687
688// See issue #60276.
689func TestCPUMetricsSleep(t *testing.T) {
690	if runtime.GOOS == "wasip1" {
691		// Since wasip1 busy-waits in the scheduler, there's no meaningful idle
692		// time. This is accurately reflected in the metrics, but it means this
693		// test is basically meaningless on this platform.
694		t.Skip("wasip1 currently busy-waits in idle time; test not applicable")
695	}
696
697	names := []string{
698		"/cpu/classes/idle:cpu-seconds",
699
700		"/cpu/classes/gc/mark/assist:cpu-seconds",
701		"/cpu/classes/gc/mark/dedicated:cpu-seconds",
702		"/cpu/classes/gc/mark/idle:cpu-seconds",
703		"/cpu/classes/gc/pause:cpu-seconds",
704		"/cpu/classes/gc/total:cpu-seconds",
705		"/cpu/classes/scavenge/assist:cpu-seconds",
706		"/cpu/classes/scavenge/background:cpu-seconds",
707		"/cpu/classes/scavenge/total:cpu-seconds",
708		"/cpu/classes/total:cpu-seconds",
709		"/cpu/classes/user:cpu-seconds",
710	}
711	prep := func() []metrics.Sample {
712		mm := make([]metrics.Sample, len(names))
713		for i := range names {
714			mm[i].Name = names[i]
715		}
716		return mm
717	}
718	m1, m2 := prep(), prep()
719
720	const (
721		// Expected time spent idle.
722		dur = 100 * time.Millisecond
723
724		// maxFailures is the number of consecutive failures requires to cause the test to fail.
725		maxFailures = 10
726	)
727
728	failureIdleTimes := make([]float64, 0, maxFailures)
729
730	// If the bug we expect is happening, then the Sleep CPU time will be accounted for
731	// as user time rather than idle time. In an ideal world we'd expect the whole application
732	// to go instantly idle the moment this goroutine goes to sleep, and stay asleep for that
733	// duration. However, the Go runtime can easily eat into idle time while this goroutine is
734	// blocked in a sleep. For example, slow platforms might spend more time expected in the
735	// scheduler. Another example is that a Go runtime background goroutine could run while
736	// everything else is idle. Lastly, if a running goroutine is descheduled by the OS, enough
737	// time may pass such that the goroutine is ready to wake, even though the runtime couldn't
738	// observe itself as idle with nanotime.
739	//
740	// To deal with all this, we give a half-proc's worth of leniency.
741	//
742	// We also retry multiple times to deal with the fact that the OS might deschedule us before
743	// we yield and go idle. That has a rare enough chance that retries should resolve it.
744	// If the issue we expect is happening, it should be persistent.
745	minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5)
746
747	// Let's make sure there's no background scavenge work to do.
748	//
749	// The runtime.GC calls below ensure the background sweeper
750	// will not run during the idle period.
751	debug.FreeOSMemory()
752
753	for retries := 0; retries < maxFailures; retries++ {
754		// Read 1.
755		runtime.GC() // Update /cpu/classes metrics.
756		metrics.Read(m1)
757
758		// Sleep.
759		time.Sleep(dur)
760
761		// Read 2.
762		runtime.GC() // Update /cpu/classes metrics.
763		metrics.Read(m2)
764
765		dt := m2[0].Value.Float64() - m1[0].Value.Float64()
766		if dt >= minIdleCPUSeconds {
767			// All is well. Test passed.
768			return
769		}
770		failureIdleTimes = append(failureIdleTimes, dt)
771		// Try again.
772	}
773
774	// We couldn't observe the expected idle time even once.
775	for i, dt := range failureIdleTimes {
776		t.Logf("try %2d: idle time = %.5fs\n", i+1, dt)
777	}
778	t.Logf("try %d breakdown:\n", len(failureIdleTimes))
779	for i := range names {
780		if m1[i].Value.Kind() == metrics.KindBad {
781			continue
782		}
783		t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64())
784	}
785	t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds)
786}
787
788// Call f() and verify that the correct STW metrics increment. If isGC is true,
789// fn triggers a GC STW. Otherwise, fn triggers an other STW.
790func testSchedPauseMetrics(t *testing.T, fn func(t *testing.T), isGC bool) {
791	m := []metrics.Sample{
792		{Name: "/sched/pauses/stopping/gc:seconds"},
793		{Name: "/sched/pauses/stopping/other:seconds"},
794		{Name: "/sched/pauses/total/gc:seconds"},
795		{Name: "/sched/pauses/total/other:seconds"},
796	}
797
798	stoppingGC := &m[0]
799	stoppingOther := &m[1]
800	totalGC := &m[2]
801	totalOther := &m[3]
802
803	sampleCount := func(s *metrics.Sample) uint64 {
804		h := s.Value.Float64Histogram()
805
806		var n uint64
807		for _, c := range h.Counts {
808			n += c
809		}
810		return n
811	}
812
813	// Read baseline.
814	metrics.Read(m)
815
816	baselineStartGC := sampleCount(stoppingGC)
817	baselineStartOther := sampleCount(stoppingOther)
818	baselineTotalGC := sampleCount(totalGC)
819	baselineTotalOther := sampleCount(totalOther)
820
821	fn(t)
822
823	metrics.Read(m)
824
825	if isGC {
826		if got := sampleCount(stoppingGC); got <= baselineStartGC {
827			t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d did not increase from baseline of %d", got, baselineStartGC)
828		}
829		if got := sampleCount(totalGC); got <= baselineTotalGC {
830			t.Errorf("/sched/pauses/total/gc:seconds sample count %d did not increase from baseline of %d", got, baselineTotalGC)
831		}
832
833		if got := sampleCount(stoppingOther); got != baselineStartOther {
834			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineStartOther)
835		}
836		if got := sampleCount(totalOther); got != baselineTotalOther {
837			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineTotalOther)
838		}
839	} else {
840		if got := sampleCount(stoppingGC); got != baselineStartGC {
841			t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d changed from baseline of %d", got, baselineStartGC)
842		}
843		if got := sampleCount(totalGC); got != baselineTotalGC {
844			t.Errorf("/sched/pauses/total/gc:seconds sample count %d changed from baseline of %d", got, baselineTotalGC)
845		}
846
847		if got := sampleCount(stoppingOther); got <= baselineStartOther {
848			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineStartOther)
849		}
850		if got := sampleCount(totalOther); got <= baselineTotalOther {
851			t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineTotalOther)
852		}
853	}
854}
855
856func TestSchedPauseMetrics(t *testing.T) {
857	tests := []struct {
858		name string
859		isGC bool
860		fn   func(t *testing.T)
861	}{
862		{
863			name: "runtime.GC",
864			isGC: true,
865			fn: func(t *testing.T) {
866				runtime.GC()
867			},
868		},
869		{
870			name: "runtime.GOMAXPROCS",
871			fn: func(t *testing.T) {
872				if runtime.GOARCH == "wasm" {
873					t.Skip("GOMAXPROCS >1 not supported on wasm")
874				}
875
876				n := runtime.GOMAXPROCS(0)
877				defer runtime.GOMAXPROCS(n)
878
879				runtime.GOMAXPROCS(n + 1)
880			},
881		},
882		{
883			name: "runtime.GoroutineProfile",
884			fn: func(t *testing.T) {
885				var s [1]runtime.StackRecord
886				runtime.GoroutineProfile(s[:])
887			},
888		},
889		{
890			name: "runtime.ReadMemStats",
891			fn: func(t *testing.T) {
892				var mstats runtime.MemStats
893				runtime.ReadMemStats(&mstats)
894			},
895		},
896		{
897			name: "runtime.Stack",
898			fn: func(t *testing.T) {
899				var b [64]byte
900				runtime.Stack(b[:], true)
901			},
902		},
903		{
904			name: "runtime/debug.WriteHeapDump",
905			fn: func(t *testing.T) {
906				if runtime.GOOS == "js" {
907					t.Skip("WriteHeapDump not supported on js")
908				}
909
910				f, err := os.CreateTemp(t.TempDir(), "heapdumptest")
911				if err != nil {
912					t.Fatalf("os.CreateTemp failed: %v", err)
913				}
914				defer os.Remove(f.Name())
915				defer f.Close()
916				debug.WriteHeapDump(f.Fd())
917			},
918		},
919		{
920			name: "runtime/trace.Start",
921			fn: func(t *testing.T) {
922				if trace.IsEnabled() {
923					t.Skip("tracing already enabled")
924				}
925
926				var buf bytes.Buffer
927				if err := trace.Start(&buf); err != nil {
928					t.Errorf("trace.Start err got %v want nil", err)
929				}
930				trace.Stop()
931			},
932		},
933	}
934
935	// These tests count STW pauses, classified based on whether they're related
936	// to the GC or not. Disable automatic GC cycles during the test so we don't
937	// have an incidental GC pause when we're trying to observe only
938	// non-GC-related pauses. This is especially important for the
939	// runtime/trace.Start test, since (as of this writing) that will block
940	// until any active GC mark phase completes.
941	defer debug.SetGCPercent(debug.SetGCPercent(-1))
942	runtime.GC()
943
944	for _, tc := range tests {
945		t.Run(tc.name, func(t *testing.T) {
946			testSchedPauseMetrics(t, tc.fn, tc.isGC)
947		})
948	}
949}
950
951func TestRuntimeLockMetricsAndProfile(t *testing.T) {
952	old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests
953	defer runtime.SetMutexProfileFraction(old)
954	if old != 0 {
955		t.Fatalf("need MutexProfileRate 0, got %d", old)
956	}
957
958	{
959		before := os.Getenv("GODEBUG")
960		for _, s := range strings.Split(before, ",") {
961			if strings.HasPrefix(s, "runtimecontentionstacks=") {
962				t.Logf("GODEBUG includes explicit setting %q", s)
963			}
964		}
965		defer func() { os.Setenv("GODEBUG", before) }()
966		os.Setenv("GODEBUG", fmt.Sprintf("%s,runtimecontentionstacks=1", before))
967	}
968
969	t.Logf("NumCPU %d", runtime.NumCPU())
970	t.Logf("GOMAXPROCS %d", runtime.GOMAXPROCS(0))
971	if minCPU := 2; runtime.NumCPU() < minCPU {
972		t.Skipf("creating and observing contention on runtime-internal locks requires NumCPU >= %d", minCPU)
973	}
974
975	loadProfile := func(t *testing.T) *profile.Profile {
976		var w bytes.Buffer
977		pprof.Lookup("mutex").WriteTo(&w, 0)
978		p, err := profile.Parse(&w)
979		if err != nil {
980			t.Fatalf("failed to parse profile: %v", err)
981		}
982		if err := p.CheckValid(); err != nil {
983			t.Fatalf("invalid profile: %v", err)
984		}
985		return p
986	}
987
988	measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) {
989		beforeProfile := loadProfile(t)
990		beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
991		metrics.Read(beforeMetrics)
992
993		fn()
994
995		afterProfile := loadProfile(t)
996		afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
997		metrics.Read(afterMetrics)
998
999		sumSamples := func(p *profile.Profile, i int) int64 {
1000			var sum int64
1001			for _, s := range p.Sample {
1002				sum += s.Value[i]
1003			}
1004			return sum
1005		}
1006
1007		metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64()
1008		profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds()
1009
1010		// The internal/profile package does not support compaction; this delta
1011		// profile will include separate positive and negative entries.
1012		p = afterProfile.Copy()
1013		if len(beforeProfile.Sample) > 0 {
1014			err := p.Merge(beforeProfile, -1)
1015			if err != nil {
1016				t.Fatalf("Merge profiles: %v", err)
1017			}
1018		}
1019
1020		return metricGrowth, profileGrowth, p
1021	}
1022
1023	testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
1024		return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
1025			metricGrowth, profileGrowth, p := measureDelta(t, func() {
1026				var started, stopped sync.WaitGroup
1027				started.Add(workers)
1028				stopped.Add(workers)
1029				for i := 0; i < workers; i++ {
1030					w := &contentionWorker{
1031						before: func() {
1032							started.Done()
1033							started.Wait()
1034						},
1035						after: func() {
1036							stopped.Done()
1037						},
1038						fn: fn,
1039					}
1040					go w.run()
1041				}
1042				stopped.Wait()
1043			})
1044
1045			if profileGrowth == 0 {
1046				t.Errorf("no increase in mutex profile")
1047			}
1048			if metricGrowth == 0 && strictTiming {
1049				// If the critical section is very short, systems with low timer
1050				// resolution may be unable to measure it via nanotime.
1051				//
1052				// This is sampled at 1 per gTrackingPeriod, but the explicit
1053				// runtime.mutex tests create 200 contention events. Observing
1054				// zero of those has a probability of (7/8)^200 = 2.5e-12 which
1055				// is acceptably low (though the calculation has a tenuous
1056				// dependency on cheaprandn being a good-enough source of
1057				// entropy).
1058				t.Errorf("no increase in /sync/mutex/wait/total:seconds metric")
1059			}
1060			// This comparison is possible because the time measurements in support of
1061			// runtime/pprof and runtime/metrics for runtime-internal locks are so close
1062			// together. It doesn't work as well for user-space contention, where the
1063			// involved goroutines are not _Grunnable the whole time and so need to pass
1064			// through the scheduler.
1065			t.Logf("lock contention growth in runtime/pprof's view  (%fs)", profileGrowth)
1066			t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
1067
1068			acceptStacks = append([][]string(nil), acceptStacks...)
1069			for i, stk := range acceptStacks {
1070				if goexperiment.StaticLockRanking {
1071					if !slices.ContainsFunc(stk, func(s string) bool {
1072						return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart"
1073					}) {
1074						// stk is a call stack that is still on the user stack when
1075						// it calls runtime.unlock. Add the extra function that
1076						// we'll see, when the static lock ranking implementation of
1077						// runtime.unlockWithRank switches to the system stack.
1078						stk = append([]string{"runtime.unlockWithRank"}, stk...)
1079					}
1080				}
1081				acceptStacks[i] = stk
1082			}
1083
1084			var stks [][]string
1085			values := make([][2]int64, len(acceptStacks))
1086			for _, s := range p.Sample {
1087				var have []string
1088				for _, loc := range s.Location {
1089					for _, line := range loc.Line {
1090						have = append(have, line.Function.Name)
1091					}
1092				}
1093				stks = append(stks, have)
1094				for i, stk := range acceptStacks {
1095					if slices.Equal(have, stk) {
1096						values[i][0] += s.Value[0]
1097						values[i][1] += s.Value[1]
1098					}
1099				}
1100			}
1101			for i, stk := range acceptStacks {
1102				n += values[i][0]
1103				value += values[i][1]
1104				t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
1105			}
1106			if n == 0 && value == 0 {
1107				t.Logf("profile:\n%s", p)
1108				for _, have := range stks {
1109					t.Logf("have stack %v", have)
1110				}
1111				for _, stk := range acceptStacks {
1112					t.Errorf("want stack %v", stk)
1113				}
1114			}
1115
1116			return metricGrowth, profileGrowth, n, value
1117		}
1118	}
1119
1120	name := t.Name()
1121
1122	t.Run("runtime.lock", func(t *testing.T) {
1123		mus := make([]runtime.Mutex, 200)
1124		var needContention atomic.Int64
1125		delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks
1126		delayMicros := delay.Microseconds()
1127
1128		// The goroutine that acquires the lock will only proceed when it
1129		// detects that its partner is contended for the lock. That will lead to
1130		// live-lock if anything (such as a STW) prevents the partner goroutine
1131		// from running. Allowing the contention workers to pause and restart
1132		// (to allow a STW to proceed) makes it harder to confirm that we're
1133		// counting the correct number of contention events, since some locks
1134		// will end up contended twice. Instead, disable the GC.
1135		defer debug.SetGCPercent(debug.SetGCPercent(-1))
1136
1137		const workers = 2
1138		if runtime.GOMAXPROCS(0) < workers {
1139			t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers)
1140		}
1141
1142		fn := func() bool {
1143			n := int(needContention.Load())
1144			if n < 0 {
1145				return false
1146			}
1147			mu := &mus[n]
1148
1149			runtime.Lock(mu)
1150			for int(needContention.Load()) == n {
1151				if runtime.MutexContended(mu) {
1152					// make them wait a little while
1153					for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
1154						runtime.Usleep(uint32(delayMicros))
1155					}
1156					break
1157				}
1158			}
1159			runtime.Unlock(mu)
1160			needContention.Store(int64(n - 1))
1161
1162			return true
1163		}
1164
1165		stks := [][]string{{
1166			"runtime.unlock",
1167			"runtime_test." + name + ".func5.1",
1168			"runtime_test.(*contentionWorker).run",
1169		}}
1170
1171		t.Run("sample-1", func(t *testing.T) {
1172			old := runtime.SetMutexProfileFraction(1)
1173			defer runtime.SetMutexProfileFraction(old)
1174
1175			needContention.Store(int64(len(mus) - 1))
1176			metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t)
1177
1178			t.Run("metric", func(t *testing.T) {
1179				// The runtime/metrics view may be sampled at 1 per
1180				// gTrackingPeriod, so we don't have a hard lower bound here.
1181				testenv.SkipFlaky(t, 64253)
1182
1183				if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want {
1184					// The test imposes a delay with usleep, verified with calls to
1185					// nanotime. Compare against the runtime/metrics package's view
1186					// (based on nanotime) rather than runtime/pprof's view (based
1187					// on cputicks).
1188					t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want)
1189				}
1190			})
1191			if have, want := n, int64(len(mus)); have != want {
1192				t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
1193			}
1194
1195			const slop = 1.5 // account for nanotime vs cputicks
1196			t.Run("compare timers", func(t *testing.T) {
1197				testenv.SkipFlaky(t, 64253)
1198				if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth {
1199					t.Errorf("views differ by more than %fx", slop)
1200				}
1201			})
1202		})
1203
1204		t.Run("sample-2", func(t *testing.T) {
1205			testenv.SkipFlaky(t, 64253)
1206
1207			old := runtime.SetMutexProfileFraction(2)
1208			defer runtime.SetMutexProfileFraction(old)
1209
1210			needContention.Store(int64(len(mus) - 1))
1211			metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t)
1212
1213			// With 100 trials and profile fraction of 2, we expect to capture
1214			// 50 samples. Allow the test to pass if we get at least 20 samples;
1215			// the CDF of the binomial distribution says there's less than a
1216			// 1e-9 chance of that, which is an acceptably low flakiness rate.
1217			const samplingSlop = 2.5
1218
1219			if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want {
1220				// The test imposes a delay with usleep, verified with calls to
1221				// nanotime. Compare against the runtime/metrics package's view
1222				// (based on nanotime) rather than runtime/pprof's view (based
1223				// on cputicks).
1224				t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want)
1225			}
1226			if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop {
1227				t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
1228			}
1229
1230			const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling
1231			if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth {
1232				t.Errorf("views differ by more than %fx", timerSlop)
1233			}
1234		})
1235	})
1236
1237	t.Run("runtime.semrelease", func(t *testing.T) {
1238		testenv.SkipFlaky(t, 64253)
1239
1240		old := runtime.SetMutexProfileFraction(1)
1241		defer runtime.SetMutexProfileFraction(old)
1242
1243		const workers = 3
1244		if runtime.GOMAXPROCS(0) < workers {
1245			t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers)
1246		}
1247
1248		var sem uint32 = 1
1249		var tries atomic.Int32
1250		tries.Store(10_000_000) // prefer controlled failure to timeout
1251		var sawContention atomic.Int32
1252		var need int32 = 1
1253		fn := func() bool {
1254			if sawContention.Load() >= need {
1255				return false
1256			}
1257			if tries.Add(-1) < 0 {
1258				return false
1259			}
1260
1261			runtime.Semacquire(&sem)
1262			runtime.Semrelease1(&sem, false, 0)
1263			if runtime.MutexContended(runtime.SemRootLock(&sem)) {
1264				sawContention.Add(1)
1265			}
1266			return true
1267		}
1268
1269		stks := [][]string{
1270			{
1271				"runtime.unlock",
1272				"runtime.semrelease1",
1273				"runtime_test.TestRuntimeLockMetricsAndProfile.func6.1",
1274				"runtime_test.(*contentionWorker).run",
1275			},
1276			{
1277				"runtime.unlock",
1278				"runtime.semacquire1",
1279				"runtime.semacquire",
1280				"runtime_test.TestRuntimeLockMetricsAndProfile.func6.1",
1281				"runtime_test.(*contentionWorker).run",
1282			},
1283		}
1284
1285		// Verify that we get call stack we expect, with anything more than zero
1286		// cycles / zero samples. The duration of each contention event is too
1287		// small relative to the expected overhead for us to verify its value
1288		// more directly. Leave that to the explicit lock/unlock test.
1289
1290		testcase(false, stks, workers, fn)(t)
1291
1292		if remaining := tries.Load(); remaining >= 0 {
1293			t.Logf("finished test early (%d tries remaining)", remaining)
1294		}
1295	})
1296}
1297
1298// contentionWorker provides cleaner call stacks for lock contention profile tests
1299type contentionWorker struct {
1300	before func()
1301	fn     func() bool
1302	after  func()
1303}
1304
1305func (w *contentionWorker) run() {
1306	defer w.after()
1307	w.before()
1308
1309	for w.fn() {
1310	}
1311}
1312
1313func TestCPUStats(t *testing.T) {
1314	// Run a few GC cycles to get some of the stats to be non-zero.
1315	runtime.GC()
1316	runtime.GC()
1317	runtime.GC()
1318
1319	// Set GOMAXPROCS high then sleep briefly to ensure we generate
1320	// some idle time.
1321	oldmaxprocs := runtime.GOMAXPROCS(10)
1322	time.Sleep(time.Millisecond)
1323	runtime.GOMAXPROCS(oldmaxprocs)
1324
1325	stats := runtime.ReadCPUStats()
1326	gcTotal := stats.GCAssistTime + stats.GCDedicatedTime + stats.GCIdleTime + stats.GCPauseTime
1327	if gcTotal != stats.GCTotalTime {
1328		t.Errorf("manually computed total does not match GCTotalTime: %d cpu-ns vs. %d cpu-ns", gcTotal, stats.GCTotalTime)
1329	}
1330	scavTotal := stats.ScavengeAssistTime + stats.ScavengeBgTime
1331	if scavTotal != stats.ScavengeTotalTime {
1332		t.Errorf("manually computed total does not match ScavengeTotalTime: %d cpu-ns vs. %d cpu-ns", scavTotal, stats.ScavengeTotalTime)
1333	}
1334	total := gcTotal + scavTotal + stats.IdleTime + stats.UserTime
1335	if total != stats.TotalTime {
1336		t.Errorf("manually computed overall total does not match TotalTime: %d cpu-ns vs. %d cpu-ns", total, stats.TotalTime)
1337	}
1338	if total == 0 {
1339		t.Error("total time is zero")
1340	}
1341	if gcTotal == 0 {
1342		t.Error("GC total time is zero")
1343	}
1344	if stats.IdleTime == 0 {
1345		t.Error("idle time is zero")
1346	}
1347}
1348
1349func TestMetricHeapUnusedLargeObjectOverflow(t *testing.T) {
1350	// This test makes sure /memory/classes/heap/unused:bytes
1351	// doesn't overflow when allocating and deallocating large
1352	// objects. It is a regression test for #67019.
1353	done := make(chan struct{})
1354	var wg sync.WaitGroup
1355	wg.Add(1)
1356	go func() {
1357		defer wg.Done()
1358		for {
1359			for range 10 {
1360				abi.Escape(make([]byte, 1<<20))
1361			}
1362			runtime.GC()
1363			select {
1364			case <-done:
1365				return
1366			default:
1367			}
1368		}
1369	}()
1370	s := []metrics.Sample{
1371		{Name: "/memory/classes/heap/unused:bytes"},
1372	}
1373	for range 1000 {
1374		metrics.Read(s)
1375		if s[0].Value.Uint64() > 1<<40 {
1376			t.Errorf("overflow")
1377			break
1378		}
1379	}
1380	done <- struct{}{}
1381	wg.Wait()
1382}
1383