1// Copyright 2020 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package runtime_test 6 7import ( 8 "bytes" 9 "fmt" 10 "internal/abi" 11 "internal/goexperiment" 12 "internal/profile" 13 "internal/testenv" 14 "os" 15 "reflect" 16 "runtime" 17 "runtime/debug" 18 "runtime/metrics" 19 "runtime/pprof" 20 "runtime/trace" 21 "slices" 22 "sort" 23 "strings" 24 "sync" 25 "sync/atomic" 26 "testing" 27 "time" 28 "unsafe" 29) 30 31func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) { 32 all := metrics.All() 33 samples := make([]metrics.Sample, len(all)) 34 descs := make(map[string]metrics.Description) 35 for i := range all { 36 samples[i].Name = all[i].Name 37 descs[all[i].Name] = all[i] 38 } 39 return descs, samples 40} 41 42func TestReadMetrics(t *testing.T) { 43 // Run a GC cycle to get some of the stats to be non-zero. 44 runtime.GC() 45 46 // Set an arbitrary memory limit to check the metric for it 47 limit := int64(512 * 1024 * 1024) 48 oldLimit := debug.SetMemoryLimit(limit) 49 defer debug.SetMemoryLimit(oldLimit) 50 51 // Set a GC percent to check the metric for it 52 gcPercent := 99 53 oldGCPercent := debug.SetGCPercent(gcPercent) 54 defer debug.SetGCPercent(oldGCPercent) 55 56 // Tests whether readMetrics produces values aligning 57 // with ReadMemStats while the world is stopped. 58 var mstats runtime.MemStats 59 _, samples := prepareAllMetricsSamples() 60 runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples)) 61 62 checkUint64 := func(t *testing.T, m string, got, want uint64) { 63 t.Helper() 64 if got != want { 65 t.Errorf("metric %q: got %d, want %d", m, got, want) 66 } 67 } 68 69 // Check to make sure the values we read line up with other values we read. 70 var allocsBySize, gcPauses, schedPausesTotalGC *metrics.Float64Histogram 71 var tinyAllocs uint64 72 var mallocs, frees uint64 73 for i := range samples { 74 switch name := samples[i].Name; name { 75 case "/cgo/go-to-c-calls:calls": 76 checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall())) 77 case "/memory/classes/heap/free:bytes": 78 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased) 79 case "/memory/classes/heap/released:bytes": 80 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased) 81 case "/memory/classes/heap/objects:bytes": 82 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc) 83 case "/memory/classes/heap/unused:bytes": 84 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc) 85 case "/memory/classes/heap/stacks:bytes": 86 checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse) 87 case "/memory/classes/metadata/mcache/free:bytes": 88 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse) 89 case "/memory/classes/metadata/mcache/inuse:bytes": 90 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse) 91 case "/memory/classes/metadata/mspan/free:bytes": 92 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse) 93 case "/memory/classes/metadata/mspan/inuse:bytes": 94 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse) 95 case "/memory/classes/metadata/other:bytes": 96 checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys) 97 case "/memory/classes/os-stacks:bytes": 98 checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse) 99 case "/memory/classes/other:bytes": 100 checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys) 101 case "/memory/classes/profiling/buckets:bytes": 102 checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys) 103 case "/memory/classes/total:bytes": 104 checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys) 105 case "/gc/heap/allocs-by-size:bytes": 106 hist := samples[i].Value.Float64Histogram() 107 // Skip size class 0 in BySize, because it's always empty and not represented 108 // in the histogram. 109 for i, sc := range mstats.BySize[1:] { 110 if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s { 111 t.Errorf("bucket does not match size class: got %f, want %f", b, s) 112 // The rest of the checks aren't expected to work anyway. 113 continue 114 } 115 if c, m := hist.Counts[i], sc.Mallocs; c != m { 116 t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m) 117 } 118 } 119 allocsBySize = hist 120 case "/gc/heap/allocs:bytes": 121 checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc) 122 case "/gc/heap/frees-by-size:bytes": 123 hist := samples[i].Value.Float64Histogram() 124 // Skip size class 0 in BySize, because it's always empty and not represented 125 // in the histogram. 126 for i, sc := range mstats.BySize[1:] { 127 if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s { 128 t.Errorf("bucket does not match size class: got %f, want %f", b, s) 129 // The rest of the checks aren't expected to work anyway. 130 continue 131 } 132 if c, f := hist.Counts[i], sc.Frees; c != f { 133 t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f) 134 } 135 } 136 case "/gc/heap/frees:bytes": 137 checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc) 138 case "/gc/heap/tiny/allocs:objects": 139 // Currently, MemStats adds tiny alloc count to both Mallocs AND Frees. 140 // The reason for this is because MemStats couldn't be extended at the time 141 // but there was a desire to have Mallocs at least be a little more representative, 142 // while having Mallocs - Frees still represent a live object count. 143 // Unfortunately, MemStats doesn't actually export a large allocation count, 144 // so it's impossible to pull this number out directly. 145 // 146 // Check tiny allocation count outside of this loop, by using the allocs-by-size 147 // histogram in order to figure out how many large objects there are. 148 tinyAllocs = samples[i].Value.Uint64() 149 // Because the next two metrics tests are checking against Mallocs and Frees, 150 // we can't check them directly for the same reason: we need to account for tiny 151 // allocations included in Mallocs and Frees. 152 case "/gc/heap/allocs:objects": 153 mallocs = samples[i].Value.Uint64() 154 case "/gc/heap/frees:objects": 155 frees = samples[i].Value.Uint64() 156 case "/gc/heap/live:bytes": 157 // Check for "obviously wrong" values. We can't check a stronger invariant, 158 // such as live <= HeapAlloc, because live is not 100% accurate. It's computed 159 // under racy conditions, and some objects may be double-counted (this is 160 // intentional and necessary for GC performance). 161 // 162 // Instead, check against a much more reasonable upper-bound: the amount of 163 // mapped heap memory. We can't possibly overcount to the point of exceeding 164 // total mapped heap memory, except if there's an accounting bug. 165 if live := samples[i].Value.Uint64(); live > mstats.HeapSys { 166 t.Errorf("live bytes: %d > heap sys: %d", live, mstats.HeapSys) 167 } else if live == 0 { 168 // Might happen if we don't call runtime.GC() above. 169 t.Error("live bytes is 0") 170 } 171 case "/gc/gomemlimit:bytes": 172 checkUint64(t, name, samples[i].Value.Uint64(), uint64(limit)) 173 case "/gc/heap/objects:objects": 174 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects) 175 case "/gc/heap/goal:bytes": 176 checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC) 177 case "/gc/gogc:percent": 178 checkUint64(t, name, samples[i].Value.Uint64(), uint64(gcPercent)) 179 case "/gc/cycles/automatic:gc-cycles": 180 checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC)) 181 case "/gc/cycles/forced:gc-cycles": 182 checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC)) 183 case "/gc/cycles/total:gc-cycles": 184 checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC)) 185 case "/gc/pauses:seconds": 186 gcPauses = samples[i].Value.Float64Histogram() 187 case "/sched/pauses/total/gc:seconds": 188 schedPausesTotalGC = samples[i].Value.Float64Histogram() 189 } 190 } 191 192 // Check tinyAllocs. 193 nonTinyAllocs := uint64(0) 194 for _, c := range allocsBySize.Counts { 195 nonTinyAllocs += c 196 } 197 checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs) 198 199 // Check allocation and free counts. 200 checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs) 201 checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs) 202 203 // Verify that /gc/pauses:seconds is a copy of /sched/pauses/total/gc:seconds 204 if !reflect.DeepEqual(gcPauses.Buckets, schedPausesTotalGC.Buckets) { 205 t.Errorf("/gc/pauses:seconds buckets %v do not match /sched/pauses/total/gc:seconds buckets %v", gcPauses.Buckets, schedPausesTotalGC.Counts) 206 } 207 if !reflect.DeepEqual(gcPauses.Counts, schedPausesTotalGC.Counts) { 208 t.Errorf("/gc/pauses:seconds counts %v do not match /sched/pauses/total/gc:seconds counts %v", gcPauses.Counts, schedPausesTotalGC.Counts) 209 } 210} 211 212func TestReadMetricsConsistency(t *testing.T) { 213 // Tests whether readMetrics produces consistent, sensible values. 214 // The values are read concurrently with the runtime doing other 215 // things (e.g. allocating) so what we read can't reasonably compared 216 // to other runtime values (e.g. MemStats). 217 218 // Run a few GC cycles to get some of the stats to be non-zero. 219 runtime.GC() 220 runtime.GC() 221 runtime.GC() 222 223 // Set GOMAXPROCS high then sleep briefly to ensure we generate 224 // some idle time. 225 oldmaxprocs := runtime.GOMAXPROCS(10) 226 time.Sleep(time.Millisecond) 227 runtime.GOMAXPROCS(oldmaxprocs) 228 229 // Read all the supported metrics through the metrics package. 230 descs, samples := prepareAllMetricsSamples() 231 metrics.Read(samples) 232 233 // Check to make sure the values we read make sense. 234 var totalVirtual struct { 235 got, want uint64 236 } 237 var objects struct { 238 alloc, free *metrics.Float64Histogram 239 allocs, frees uint64 240 allocdBytes, freedBytes uint64 241 total, totalBytes uint64 242 } 243 var gc struct { 244 numGC uint64 245 pauses uint64 246 } 247 var totalScan struct { 248 got, want uint64 249 } 250 var cpu struct { 251 gcAssist float64 252 gcDedicated float64 253 gcIdle float64 254 gcPause float64 255 gcTotal float64 256 257 idle float64 258 user float64 259 260 scavengeAssist float64 261 scavengeBg float64 262 scavengeTotal float64 263 264 total float64 265 } 266 for i := range samples { 267 kind := samples[i].Value.Kind() 268 if want := descs[samples[i].Name].Kind; kind != want { 269 t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want) 270 continue 271 } 272 if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") { 273 v := samples[i].Value.Uint64() 274 totalVirtual.want += v 275 276 // None of these stats should ever get this big. 277 // If they do, there's probably overflow involved, 278 // usually due to bad accounting. 279 if int64(v) < 0 { 280 t.Errorf("%q has high/negative value: %d", samples[i].Name, v) 281 } 282 } 283 switch samples[i].Name { 284 case "/cpu/classes/gc/mark/assist:cpu-seconds": 285 cpu.gcAssist = samples[i].Value.Float64() 286 case "/cpu/classes/gc/mark/dedicated:cpu-seconds": 287 cpu.gcDedicated = samples[i].Value.Float64() 288 case "/cpu/classes/gc/mark/idle:cpu-seconds": 289 cpu.gcIdle = samples[i].Value.Float64() 290 case "/cpu/classes/gc/pause:cpu-seconds": 291 cpu.gcPause = samples[i].Value.Float64() 292 case "/cpu/classes/gc/total:cpu-seconds": 293 cpu.gcTotal = samples[i].Value.Float64() 294 case "/cpu/classes/idle:cpu-seconds": 295 cpu.idle = samples[i].Value.Float64() 296 case "/cpu/classes/scavenge/assist:cpu-seconds": 297 cpu.scavengeAssist = samples[i].Value.Float64() 298 case "/cpu/classes/scavenge/background:cpu-seconds": 299 cpu.scavengeBg = samples[i].Value.Float64() 300 case "/cpu/classes/scavenge/total:cpu-seconds": 301 cpu.scavengeTotal = samples[i].Value.Float64() 302 case "/cpu/classes/total:cpu-seconds": 303 cpu.total = samples[i].Value.Float64() 304 case "/cpu/classes/user:cpu-seconds": 305 cpu.user = samples[i].Value.Float64() 306 case "/memory/classes/total:bytes": 307 totalVirtual.got = samples[i].Value.Uint64() 308 case "/memory/classes/heap/objects:bytes": 309 objects.totalBytes = samples[i].Value.Uint64() 310 case "/gc/heap/objects:objects": 311 objects.total = samples[i].Value.Uint64() 312 case "/gc/heap/allocs:bytes": 313 objects.allocdBytes = samples[i].Value.Uint64() 314 case "/gc/heap/allocs:objects": 315 objects.allocs = samples[i].Value.Uint64() 316 case "/gc/heap/allocs-by-size:bytes": 317 objects.alloc = samples[i].Value.Float64Histogram() 318 case "/gc/heap/frees:bytes": 319 objects.freedBytes = samples[i].Value.Uint64() 320 case "/gc/heap/frees:objects": 321 objects.frees = samples[i].Value.Uint64() 322 case "/gc/heap/frees-by-size:bytes": 323 objects.free = samples[i].Value.Float64Histogram() 324 case "/gc/cycles:gc-cycles": 325 gc.numGC = samples[i].Value.Uint64() 326 case "/gc/pauses:seconds": 327 h := samples[i].Value.Float64Histogram() 328 gc.pauses = 0 329 for i := range h.Counts { 330 gc.pauses += h.Counts[i] 331 } 332 case "/gc/scan/heap:bytes": 333 totalScan.want += samples[i].Value.Uint64() 334 case "/gc/scan/globals:bytes": 335 totalScan.want += samples[i].Value.Uint64() 336 case "/gc/scan/stack:bytes": 337 totalScan.want += samples[i].Value.Uint64() 338 case "/gc/scan/total:bytes": 339 totalScan.got = samples[i].Value.Uint64() 340 case "/sched/gomaxprocs:threads": 341 if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want { 342 t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want) 343 } 344 case "/sched/goroutines:goroutines": 345 if samples[i].Value.Uint64() < 1 { 346 t.Error("number of goroutines is less than one") 347 } 348 } 349 } 350 // Only check this on Linux where we can be reasonably sure we have a high-resolution timer. 351 if runtime.GOOS == "linux" { 352 if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 { 353 t.Errorf("found no time spent on GC work: %#v", cpu) 354 } 355 if cpu.gcPause <= 0 { 356 t.Errorf("found no GC pauses: %f", cpu.gcPause) 357 } 358 if cpu.idle <= 0 { 359 t.Errorf("found no idle time: %f", cpu.idle) 360 } 361 if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.001) { 362 t.Errorf("calculated total GC CPU time not within %%0.1 of total: %f vs. %f", total, cpu.gcTotal) 363 } 364 if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.001) { 365 t.Errorf("calculated total scavenge CPU not within %%0.1 of total: %f vs. %f", total, cpu.scavengeTotal) 366 } 367 if cpu.total <= 0 { 368 t.Errorf("found no total CPU time passed") 369 } 370 if cpu.user <= 0 { 371 t.Errorf("found no user time passed") 372 } 373 if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.001) { 374 t.Errorf("calculated total CPU not within %%0.1 of total: %f vs. %f", total, cpu.total) 375 } 376 } 377 if totalVirtual.got != totalVirtual.want { 378 t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want) 379 } 380 if got, want := objects.allocs-objects.frees, objects.total; got != want { 381 t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want) 382 } 383 if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want { 384 t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want) 385 } 386 if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 { 387 t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c) 388 } 389 if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 { 390 t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c) 391 } 392 if len(objects.alloc.Buckets) != len(objects.free.Buckets) { 393 t.Error("allocs-by-size and frees-by-size buckets don't match in length") 394 } else if len(objects.alloc.Counts) != len(objects.free.Counts) { 395 t.Error("allocs-by-size and frees-by-size counts don't match in length") 396 } else { 397 for i := range objects.alloc.Buckets { 398 ba := objects.alloc.Buckets[i] 399 bf := objects.free.Buckets[i] 400 if ba != bf { 401 t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf) 402 } 403 } 404 if !t.Failed() { 405 var gotAlloc, gotFree uint64 406 want := objects.total 407 for i := range objects.alloc.Counts { 408 if objects.alloc.Counts[i] < objects.free.Counts[i] { 409 t.Errorf("found more allocs than frees in object dist bucket %d", i) 410 continue 411 } 412 gotAlloc += objects.alloc.Counts[i] 413 gotFree += objects.free.Counts[i] 414 } 415 if got := gotAlloc - gotFree; got != want { 416 t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want) 417 } 418 if gotAlloc != objects.allocs { 419 t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs) 420 } 421 if gotFree != objects.frees { 422 t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees) 423 } 424 } 425 } 426 // The current GC has at least 2 pauses per GC. 427 // Check to see if that value makes sense. 428 if gc.pauses < gc.numGC*2 { 429 t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2) 430 } 431 if totalScan.got <= 0 { 432 t.Errorf("scannable GC space is empty: %d", totalScan.got) 433 } 434 if totalScan.got != totalScan.want { 435 t.Errorf("/gc/scan/total:bytes doesn't line up with sum of /gc/scan*: total %d vs. sum %d", totalScan.got, totalScan.want) 436 } 437} 438 439func BenchmarkReadMetricsLatency(b *testing.B) { 440 stop := applyGCLoad(b) 441 442 // Spend this much time measuring latencies. 443 latencies := make([]time.Duration, 0, 1024) 444 _, samples := prepareAllMetricsSamples() 445 446 // Hit metrics.Read continuously and measure. 447 b.ResetTimer() 448 for i := 0; i < b.N; i++ { 449 start := time.Now() 450 metrics.Read(samples) 451 latencies = append(latencies, time.Since(start)) 452 } 453 // Make sure to stop the timer before we wait! The load created above 454 // is very heavy-weight and not easy to stop, so we could end up 455 // confusing the benchmarking framework for small b.N. 456 b.StopTimer() 457 stop() 458 459 // Disable the default */op metrics. 460 // ns/op doesn't mean anything because it's an average, but we 461 // have a sleep in our b.N loop above which skews this significantly. 462 b.ReportMetric(0, "ns/op") 463 b.ReportMetric(0, "B/op") 464 b.ReportMetric(0, "allocs/op") 465 466 // Sort latencies then report percentiles. 467 sort.Slice(latencies, func(i, j int) bool { 468 return latencies[i] < latencies[j] 469 }) 470 b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns") 471 b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns") 472 b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns") 473} 474 475var readMetricsSink [1024]interface{} 476 477func TestReadMetricsCumulative(t *testing.T) { 478 // Set up the set of metrics marked cumulative. 479 descs := metrics.All() 480 var samples [2][]metrics.Sample 481 samples[0] = make([]metrics.Sample, len(descs)) 482 samples[1] = make([]metrics.Sample, len(descs)) 483 total := 0 484 for i := range samples[0] { 485 if !descs[i].Cumulative { 486 continue 487 } 488 samples[0][total].Name = descs[i].Name 489 total++ 490 } 491 samples[0] = samples[0][:total] 492 samples[1] = samples[1][:total] 493 copy(samples[1], samples[0]) 494 495 // Start some noise in the background. 496 var wg sync.WaitGroup 497 wg.Add(1) 498 done := make(chan struct{}) 499 go func() { 500 defer wg.Done() 501 for { 502 // Add more things here that could influence metrics. 503 for i := 0; i < len(readMetricsSink); i++ { 504 readMetricsSink[i] = make([]byte, 1024) 505 select { 506 case <-done: 507 return 508 default: 509 } 510 } 511 runtime.GC() 512 } 513 }() 514 515 sum := func(us []uint64) uint64 { 516 total := uint64(0) 517 for _, u := range us { 518 total += u 519 } 520 return total 521 } 522 523 // Populate the first generation. 524 metrics.Read(samples[0]) 525 526 // Check to make sure that these metrics only grow monotonically. 527 for gen := 1; gen < 10; gen++ { 528 metrics.Read(samples[gen%2]) 529 for i := range samples[gen%2] { 530 name := samples[gen%2][i].Name 531 vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value 532 533 switch vNew.Kind() { 534 case metrics.KindUint64: 535 new := vNew.Uint64() 536 old := vOld.Uint64() 537 if new < old { 538 t.Errorf("%s decreased: %d < %d", name, new, old) 539 } 540 case metrics.KindFloat64: 541 new := vNew.Float64() 542 old := vOld.Float64() 543 if new < old { 544 t.Errorf("%s decreased: %f < %f", name, new, old) 545 } 546 case metrics.KindFloat64Histogram: 547 new := sum(vNew.Float64Histogram().Counts) 548 old := sum(vOld.Float64Histogram().Counts) 549 if new < old { 550 t.Errorf("%s counts decreased: %d < %d", name, new, old) 551 } 552 } 553 } 554 } 555 close(done) 556 557 wg.Wait() 558} 559 560func withinEpsilon(v1, v2, e float64) bool { 561 return v2-v2*e <= v1 && v1 <= v2+v2*e 562} 563 564func TestMutexWaitTimeMetric(t *testing.T) { 565 var sample [1]metrics.Sample 566 sample[0].Name = "/sync/mutex/wait/total:seconds" 567 568 locks := []locker2{ 569 new(mutex), 570 new(rwmutexWrite), 571 new(rwmutexReadWrite), 572 new(rwmutexWriteRead), 573 } 574 for _, lock := range locks { 575 t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) { 576 metrics.Read(sample[:]) 577 before := time.Duration(sample[0].Value.Float64() * 1e9) 578 579 minMutexWaitTime := generateMutexWaitTime(lock) 580 581 metrics.Read(sample[:]) 582 after := time.Duration(sample[0].Value.Float64() * 1e9) 583 584 if wt := after - before; wt < minMutexWaitTime { 585 t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime) 586 } 587 }) 588 } 589} 590 591// locker2 represents an API surface of two concurrent goroutines 592// locking the same resource, but through different APIs. It's intended 593// to abstract over the relationship of two Lock calls or an RLock 594// and a Lock call. 595type locker2 interface { 596 Lock1() 597 Unlock1() 598 Lock2() 599 Unlock2() 600} 601 602type mutex struct { 603 mu sync.Mutex 604} 605 606func (m *mutex) Lock1() { m.mu.Lock() } 607func (m *mutex) Unlock1() { m.mu.Unlock() } 608func (m *mutex) Lock2() { m.mu.Lock() } 609func (m *mutex) Unlock2() { m.mu.Unlock() } 610 611type rwmutexWrite struct { 612 mu sync.RWMutex 613} 614 615func (m *rwmutexWrite) Lock1() { m.mu.Lock() } 616func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() } 617func (m *rwmutexWrite) Lock2() { m.mu.Lock() } 618func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() } 619 620type rwmutexReadWrite struct { 621 mu sync.RWMutex 622} 623 624func (m *rwmutexReadWrite) Lock1() { m.mu.RLock() } 625func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() } 626func (m *rwmutexReadWrite) Lock2() { m.mu.Lock() } 627func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() } 628 629type rwmutexWriteRead struct { 630 mu sync.RWMutex 631} 632 633func (m *rwmutexWriteRead) Lock1() { m.mu.Lock() } 634func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() } 635func (m *rwmutexWriteRead) Lock2() { m.mu.RLock() } 636func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() } 637 638// generateMutexWaitTime causes a couple of goroutines 639// to block a whole bunch of times on a sync.Mutex, returning 640// the minimum amount of time that should be visible in the 641// /sync/mutex-wait:seconds metric. 642func generateMutexWaitTime(mu locker2) time.Duration { 643 // Set up the runtime to always track casgstatus transitions for metrics. 644 *runtime.CasGStatusAlwaysTrack = true 645 646 mu.Lock1() 647 648 // Start up a goroutine to wait on the lock. 649 gc := make(chan *runtime.G) 650 done := make(chan bool) 651 go func() { 652 gc <- runtime.Getg() 653 654 for { 655 mu.Lock2() 656 mu.Unlock2() 657 if <-done { 658 return 659 } 660 } 661 }() 662 gp := <-gc 663 664 // Set the block time high enough so that it will always show up, even 665 // on systems with coarse timer granularity. 666 const blockTime = 100 * time.Millisecond 667 668 // Make sure the goroutine spawned above actually blocks on the lock. 669 for { 670 if runtime.GIsWaitingOnMutex(gp) { 671 break 672 } 673 runtime.Gosched() 674 } 675 676 // Let some amount of time pass. 677 time.Sleep(blockTime) 678 679 // Let the other goroutine acquire the lock. 680 mu.Unlock1() 681 done <- true 682 683 // Reset flag. 684 *runtime.CasGStatusAlwaysTrack = false 685 return blockTime 686} 687 688// See issue #60276. 689func TestCPUMetricsSleep(t *testing.T) { 690 if runtime.GOOS == "wasip1" { 691 // Since wasip1 busy-waits in the scheduler, there's no meaningful idle 692 // time. This is accurately reflected in the metrics, but it means this 693 // test is basically meaningless on this platform. 694 t.Skip("wasip1 currently busy-waits in idle time; test not applicable") 695 } 696 697 names := []string{ 698 "/cpu/classes/idle:cpu-seconds", 699 700 "/cpu/classes/gc/mark/assist:cpu-seconds", 701 "/cpu/classes/gc/mark/dedicated:cpu-seconds", 702 "/cpu/classes/gc/mark/idle:cpu-seconds", 703 "/cpu/classes/gc/pause:cpu-seconds", 704 "/cpu/classes/gc/total:cpu-seconds", 705 "/cpu/classes/scavenge/assist:cpu-seconds", 706 "/cpu/classes/scavenge/background:cpu-seconds", 707 "/cpu/classes/scavenge/total:cpu-seconds", 708 "/cpu/classes/total:cpu-seconds", 709 "/cpu/classes/user:cpu-seconds", 710 } 711 prep := func() []metrics.Sample { 712 mm := make([]metrics.Sample, len(names)) 713 for i := range names { 714 mm[i].Name = names[i] 715 } 716 return mm 717 } 718 m1, m2 := prep(), prep() 719 720 const ( 721 // Expected time spent idle. 722 dur = 100 * time.Millisecond 723 724 // maxFailures is the number of consecutive failures requires to cause the test to fail. 725 maxFailures = 10 726 ) 727 728 failureIdleTimes := make([]float64, 0, maxFailures) 729 730 // If the bug we expect is happening, then the Sleep CPU time will be accounted for 731 // as user time rather than idle time. In an ideal world we'd expect the whole application 732 // to go instantly idle the moment this goroutine goes to sleep, and stay asleep for that 733 // duration. However, the Go runtime can easily eat into idle time while this goroutine is 734 // blocked in a sleep. For example, slow platforms might spend more time expected in the 735 // scheduler. Another example is that a Go runtime background goroutine could run while 736 // everything else is idle. Lastly, if a running goroutine is descheduled by the OS, enough 737 // time may pass such that the goroutine is ready to wake, even though the runtime couldn't 738 // observe itself as idle with nanotime. 739 // 740 // To deal with all this, we give a half-proc's worth of leniency. 741 // 742 // We also retry multiple times to deal with the fact that the OS might deschedule us before 743 // we yield and go idle. That has a rare enough chance that retries should resolve it. 744 // If the issue we expect is happening, it should be persistent. 745 minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5) 746 747 // Let's make sure there's no background scavenge work to do. 748 // 749 // The runtime.GC calls below ensure the background sweeper 750 // will not run during the idle period. 751 debug.FreeOSMemory() 752 753 for retries := 0; retries < maxFailures; retries++ { 754 // Read 1. 755 runtime.GC() // Update /cpu/classes metrics. 756 metrics.Read(m1) 757 758 // Sleep. 759 time.Sleep(dur) 760 761 // Read 2. 762 runtime.GC() // Update /cpu/classes metrics. 763 metrics.Read(m2) 764 765 dt := m2[0].Value.Float64() - m1[0].Value.Float64() 766 if dt >= minIdleCPUSeconds { 767 // All is well. Test passed. 768 return 769 } 770 failureIdleTimes = append(failureIdleTimes, dt) 771 // Try again. 772 } 773 774 // We couldn't observe the expected idle time even once. 775 for i, dt := range failureIdleTimes { 776 t.Logf("try %2d: idle time = %.5fs\n", i+1, dt) 777 } 778 t.Logf("try %d breakdown:\n", len(failureIdleTimes)) 779 for i := range names { 780 if m1[i].Value.Kind() == metrics.KindBad { 781 continue 782 } 783 t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64()) 784 } 785 t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds) 786} 787 788// Call f() and verify that the correct STW metrics increment. If isGC is true, 789// fn triggers a GC STW. Otherwise, fn triggers an other STW. 790func testSchedPauseMetrics(t *testing.T, fn func(t *testing.T), isGC bool) { 791 m := []metrics.Sample{ 792 {Name: "/sched/pauses/stopping/gc:seconds"}, 793 {Name: "/sched/pauses/stopping/other:seconds"}, 794 {Name: "/sched/pauses/total/gc:seconds"}, 795 {Name: "/sched/pauses/total/other:seconds"}, 796 } 797 798 stoppingGC := &m[0] 799 stoppingOther := &m[1] 800 totalGC := &m[2] 801 totalOther := &m[3] 802 803 sampleCount := func(s *metrics.Sample) uint64 { 804 h := s.Value.Float64Histogram() 805 806 var n uint64 807 for _, c := range h.Counts { 808 n += c 809 } 810 return n 811 } 812 813 // Read baseline. 814 metrics.Read(m) 815 816 baselineStartGC := sampleCount(stoppingGC) 817 baselineStartOther := sampleCount(stoppingOther) 818 baselineTotalGC := sampleCount(totalGC) 819 baselineTotalOther := sampleCount(totalOther) 820 821 fn(t) 822 823 metrics.Read(m) 824 825 if isGC { 826 if got := sampleCount(stoppingGC); got <= baselineStartGC { 827 t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d did not increase from baseline of %d", got, baselineStartGC) 828 } 829 if got := sampleCount(totalGC); got <= baselineTotalGC { 830 t.Errorf("/sched/pauses/total/gc:seconds sample count %d did not increase from baseline of %d", got, baselineTotalGC) 831 } 832 833 if got := sampleCount(stoppingOther); got != baselineStartOther { 834 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineStartOther) 835 } 836 if got := sampleCount(totalOther); got != baselineTotalOther { 837 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineTotalOther) 838 } 839 } else { 840 if got := sampleCount(stoppingGC); got != baselineStartGC { 841 t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d changed from baseline of %d", got, baselineStartGC) 842 } 843 if got := sampleCount(totalGC); got != baselineTotalGC { 844 t.Errorf("/sched/pauses/total/gc:seconds sample count %d changed from baseline of %d", got, baselineTotalGC) 845 } 846 847 if got := sampleCount(stoppingOther); got <= baselineStartOther { 848 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineStartOther) 849 } 850 if got := sampleCount(totalOther); got <= baselineTotalOther { 851 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineTotalOther) 852 } 853 } 854} 855 856func TestSchedPauseMetrics(t *testing.T) { 857 tests := []struct { 858 name string 859 isGC bool 860 fn func(t *testing.T) 861 }{ 862 { 863 name: "runtime.GC", 864 isGC: true, 865 fn: func(t *testing.T) { 866 runtime.GC() 867 }, 868 }, 869 { 870 name: "runtime.GOMAXPROCS", 871 fn: func(t *testing.T) { 872 if runtime.GOARCH == "wasm" { 873 t.Skip("GOMAXPROCS >1 not supported on wasm") 874 } 875 876 n := runtime.GOMAXPROCS(0) 877 defer runtime.GOMAXPROCS(n) 878 879 runtime.GOMAXPROCS(n + 1) 880 }, 881 }, 882 { 883 name: "runtime.GoroutineProfile", 884 fn: func(t *testing.T) { 885 var s [1]runtime.StackRecord 886 runtime.GoroutineProfile(s[:]) 887 }, 888 }, 889 { 890 name: "runtime.ReadMemStats", 891 fn: func(t *testing.T) { 892 var mstats runtime.MemStats 893 runtime.ReadMemStats(&mstats) 894 }, 895 }, 896 { 897 name: "runtime.Stack", 898 fn: func(t *testing.T) { 899 var b [64]byte 900 runtime.Stack(b[:], true) 901 }, 902 }, 903 { 904 name: "runtime/debug.WriteHeapDump", 905 fn: func(t *testing.T) { 906 if runtime.GOOS == "js" { 907 t.Skip("WriteHeapDump not supported on js") 908 } 909 910 f, err := os.CreateTemp(t.TempDir(), "heapdumptest") 911 if err != nil { 912 t.Fatalf("os.CreateTemp failed: %v", err) 913 } 914 defer os.Remove(f.Name()) 915 defer f.Close() 916 debug.WriteHeapDump(f.Fd()) 917 }, 918 }, 919 { 920 name: "runtime/trace.Start", 921 fn: func(t *testing.T) { 922 if trace.IsEnabled() { 923 t.Skip("tracing already enabled") 924 } 925 926 var buf bytes.Buffer 927 if err := trace.Start(&buf); err != nil { 928 t.Errorf("trace.Start err got %v want nil", err) 929 } 930 trace.Stop() 931 }, 932 }, 933 } 934 935 // These tests count STW pauses, classified based on whether they're related 936 // to the GC or not. Disable automatic GC cycles during the test so we don't 937 // have an incidental GC pause when we're trying to observe only 938 // non-GC-related pauses. This is especially important for the 939 // runtime/trace.Start test, since (as of this writing) that will block 940 // until any active GC mark phase completes. 941 defer debug.SetGCPercent(debug.SetGCPercent(-1)) 942 runtime.GC() 943 944 for _, tc := range tests { 945 t.Run(tc.name, func(t *testing.T) { 946 testSchedPauseMetrics(t, tc.fn, tc.isGC) 947 }) 948 } 949} 950 951func TestRuntimeLockMetricsAndProfile(t *testing.T) { 952 old := runtime.SetMutexProfileFraction(0) // enabled during sub-tests 953 defer runtime.SetMutexProfileFraction(old) 954 if old != 0 { 955 t.Fatalf("need MutexProfileRate 0, got %d", old) 956 } 957 958 { 959 before := os.Getenv("GODEBUG") 960 for _, s := range strings.Split(before, ",") { 961 if strings.HasPrefix(s, "runtimecontentionstacks=") { 962 t.Logf("GODEBUG includes explicit setting %q", s) 963 } 964 } 965 defer func() { os.Setenv("GODEBUG", before) }() 966 os.Setenv("GODEBUG", fmt.Sprintf("%s,runtimecontentionstacks=1", before)) 967 } 968 969 t.Logf("NumCPU %d", runtime.NumCPU()) 970 t.Logf("GOMAXPROCS %d", runtime.GOMAXPROCS(0)) 971 if minCPU := 2; runtime.NumCPU() < minCPU { 972 t.Skipf("creating and observing contention on runtime-internal locks requires NumCPU >= %d", minCPU) 973 } 974 975 loadProfile := func(t *testing.T) *profile.Profile { 976 var w bytes.Buffer 977 pprof.Lookup("mutex").WriteTo(&w, 0) 978 p, err := profile.Parse(&w) 979 if err != nil { 980 t.Fatalf("failed to parse profile: %v", err) 981 } 982 if err := p.CheckValid(); err != nil { 983 t.Fatalf("invalid profile: %v", err) 984 } 985 return p 986 } 987 988 measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) { 989 beforeProfile := loadProfile(t) 990 beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}} 991 metrics.Read(beforeMetrics) 992 993 fn() 994 995 afterProfile := loadProfile(t) 996 afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}} 997 metrics.Read(afterMetrics) 998 999 sumSamples := func(p *profile.Profile, i int) int64 { 1000 var sum int64 1001 for _, s := range p.Sample { 1002 sum += s.Value[i] 1003 } 1004 return sum 1005 } 1006 1007 metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64() 1008 profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds() 1009 1010 // The internal/profile package does not support compaction; this delta 1011 // profile will include separate positive and negative entries. 1012 p = afterProfile.Copy() 1013 if len(beforeProfile.Sample) > 0 { 1014 err := p.Merge(beforeProfile, -1) 1015 if err != nil { 1016 t.Fatalf("Merge profiles: %v", err) 1017 } 1018 } 1019 1020 return metricGrowth, profileGrowth, p 1021 } 1022 1023 testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) { 1024 return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) { 1025 metricGrowth, profileGrowth, p := measureDelta(t, func() { 1026 var started, stopped sync.WaitGroup 1027 started.Add(workers) 1028 stopped.Add(workers) 1029 for i := 0; i < workers; i++ { 1030 w := &contentionWorker{ 1031 before: func() { 1032 started.Done() 1033 started.Wait() 1034 }, 1035 after: func() { 1036 stopped.Done() 1037 }, 1038 fn: fn, 1039 } 1040 go w.run() 1041 } 1042 stopped.Wait() 1043 }) 1044 1045 if profileGrowth == 0 { 1046 t.Errorf("no increase in mutex profile") 1047 } 1048 if metricGrowth == 0 && strictTiming { 1049 // If the critical section is very short, systems with low timer 1050 // resolution may be unable to measure it via nanotime. 1051 // 1052 // This is sampled at 1 per gTrackingPeriod, but the explicit 1053 // runtime.mutex tests create 200 contention events. Observing 1054 // zero of those has a probability of (7/8)^200 = 2.5e-12 which 1055 // is acceptably low (though the calculation has a tenuous 1056 // dependency on cheaprandn being a good-enough source of 1057 // entropy). 1058 t.Errorf("no increase in /sync/mutex/wait/total:seconds metric") 1059 } 1060 // This comparison is possible because the time measurements in support of 1061 // runtime/pprof and runtime/metrics for runtime-internal locks are so close 1062 // together. It doesn't work as well for user-space contention, where the 1063 // involved goroutines are not _Grunnable the whole time and so need to pass 1064 // through the scheduler. 1065 t.Logf("lock contention growth in runtime/pprof's view (%fs)", profileGrowth) 1066 t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth) 1067 1068 acceptStacks = append([][]string(nil), acceptStacks...) 1069 for i, stk := range acceptStacks { 1070 if goexperiment.StaticLockRanking { 1071 if !slices.ContainsFunc(stk, func(s string) bool { 1072 return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart" 1073 }) { 1074 // stk is a call stack that is still on the user stack when 1075 // it calls runtime.unlock. Add the extra function that 1076 // we'll see, when the static lock ranking implementation of 1077 // runtime.unlockWithRank switches to the system stack. 1078 stk = append([]string{"runtime.unlockWithRank"}, stk...) 1079 } 1080 } 1081 acceptStacks[i] = stk 1082 } 1083 1084 var stks [][]string 1085 values := make([][2]int64, len(acceptStacks)) 1086 for _, s := range p.Sample { 1087 var have []string 1088 for _, loc := range s.Location { 1089 for _, line := range loc.Line { 1090 have = append(have, line.Function.Name) 1091 } 1092 } 1093 stks = append(stks, have) 1094 for i, stk := range acceptStacks { 1095 if slices.Equal(have, stk) { 1096 values[i][0] += s.Value[0] 1097 values[i][1] += s.Value[1] 1098 } 1099 } 1100 } 1101 for i, stk := range acceptStacks { 1102 n += values[i][0] 1103 value += values[i][1] 1104 t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1]) 1105 } 1106 if n == 0 && value == 0 { 1107 t.Logf("profile:\n%s", p) 1108 for _, have := range stks { 1109 t.Logf("have stack %v", have) 1110 } 1111 for _, stk := range acceptStacks { 1112 t.Errorf("want stack %v", stk) 1113 } 1114 } 1115 1116 return metricGrowth, profileGrowth, n, value 1117 } 1118 } 1119 1120 name := t.Name() 1121 1122 t.Run("runtime.lock", func(t *testing.T) { 1123 mus := make([]runtime.Mutex, 200) 1124 var needContention atomic.Int64 1125 delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks 1126 delayMicros := delay.Microseconds() 1127 1128 // The goroutine that acquires the lock will only proceed when it 1129 // detects that its partner is contended for the lock. That will lead to 1130 // live-lock if anything (such as a STW) prevents the partner goroutine 1131 // from running. Allowing the contention workers to pause and restart 1132 // (to allow a STW to proceed) makes it harder to confirm that we're 1133 // counting the correct number of contention events, since some locks 1134 // will end up contended twice. Instead, disable the GC. 1135 defer debug.SetGCPercent(debug.SetGCPercent(-1)) 1136 1137 const workers = 2 1138 if runtime.GOMAXPROCS(0) < workers { 1139 t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers) 1140 } 1141 1142 fn := func() bool { 1143 n := int(needContention.Load()) 1144 if n < 0 { 1145 return false 1146 } 1147 mu := &mus[n] 1148 1149 runtime.Lock(mu) 1150 for int(needContention.Load()) == n { 1151 if runtime.MutexContended(mu) { 1152 // make them wait a little while 1153 for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; { 1154 runtime.Usleep(uint32(delayMicros)) 1155 } 1156 break 1157 } 1158 } 1159 runtime.Unlock(mu) 1160 needContention.Store(int64(n - 1)) 1161 1162 return true 1163 } 1164 1165 stks := [][]string{{ 1166 "runtime.unlock", 1167 "runtime_test." + name + ".func5.1", 1168 "runtime_test.(*contentionWorker).run", 1169 }} 1170 1171 t.Run("sample-1", func(t *testing.T) { 1172 old := runtime.SetMutexProfileFraction(1) 1173 defer runtime.SetMutexProfileFraction(old) 1174 1175 needContention.Store(int64(len(mus) - 1)) 1176 metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t) 1177 1178 t.Run("metric", func(t *testing.T) { 1179 // The runtime/metrics view may be sampled at 1 per 1180 // gTrackingPeriod, so we don't have a hard lower bound here. 1181 testenv.SkipFlaky(t, 64253) 1182 1183 if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want { 1184 // The test imposes a delay with usleep, verified with calls to 1185 // nanotime. Compare against the runtime/metrics package's view 1186 // (based on nanotime) rather than runtime/pprof's view (based 1187 // on cputicks). 1188 t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want) 1189 } 1190 }) 1191 if have, want := n, int64(len(mus)); have != want { 1192 t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want) 1193 } 1194 1195 const slop = 1.5 // account for nanotime vs cputicks 1196 t.Run("compare timers", func(t *testing.T) { 1197 testenv.SkipFlaky(t, 64253) 1198 if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth { 1199 t.Errorf("views differ by more than %fx", slop) 1200 } 1201 }) 1202 }) 1203 1204 t.Run("sample-2", func(t *testing.T) { 1205 testenv.SkipFlaky(t, 64253) 1206 1207 old := runtime.SetMutexProfileFraction(2) 1208 defer runtime.SetMutexProfileFraction(old) 1209 1210 needContention.Store(int64(len(mus) - 1)) 1211 metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t) 1212 1213 // With 100 trials and profile fraction of 2, we expect to capture 1214 // 50 samples. Allow the test to pass if we get at least 20 samples; 1215 // the CDF of the binomial distribution says there's less than a 1216 // 1e-9 chance of that, which is an acceptably low flakiness rate. 1217 const samplingSlop = 2.5 1218 1219 if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want { 1220 // The test imposes a delay with usleep, verified with calls to 1221 // nanotime. Compare against the runtime/metrics package's view 1222 // (based on nanotime) rather than runtime/pprof's view (based 1223 // on cputicks). 1224 t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want) 1225 } 1226 if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop { 1227 t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want) 1228 } 1229 1230 const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling 1231 if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth { 1232 t.Errorf("views differ by more than %fx", timerSlop) 1233 } 1234 }) 1235 }) 1236 1237 t.Run("runtime.semrelease", func(t *testing.T) { 1238 testenv.SkipFlaky(t, 64253) 1239 1240 old := runtime.SetMutexProfileFraction(1) 1241 defer runtime.SetMutexProfileFraction(old) 1242 1243 const workers = 3 1244 if runtime.GOMAXPROCS(0) < workers { 1245 t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers) 1246 } 1247 1248 var sem uint32 = 1 1249 var tries atomic.Int32 1250 tries.Store(10_000_000) // prefer controlled failure to timeout 1251 var sawContention atomic.Int32 1252 var need int32 = 1 1253 fn := func() bool { 1254 if sawContention.Load() >= need { 1255 return false 1256 } 1257 if tries.Add(-1) < 0 { 1258 return false 1259 } 1260 1261 runtime.Semacquire(&sem) 1262 runtime.Semrelease1(&sem, false, 0) 1263 if runtime.MutexContended(runtime.SemRootLock(&sem)) { 1264 sawContention.Add(1) 1265 } 1266 return true 1267 } 1268 1269 stks := [][]string{ 1270 { 1271 "runtime.unlock", 1272 "runtime.semrelease1", 1273 "runtime_test.TestRuntimeLockMetricsAndProfile.func6.1", 1274 "runtime_test.(*contentionWorker).run", 1275 }, 1276 { 1277 "runtime.unlock", 1278 "runtime.semacquire1", 1279 "runtime.semacquire", 1280 "runtime_test.TestRuntimeLockMetricsAndProfile.func6.1", 1281 "runtime_test.(*contentionWorker).run", 1282 }, 1283 } 1284 1285 // Verify that we get call stack we expect, with anything more than zero 1286 // cycles / zero samples. The duration of each contention event is too 1287 // small relative to the expected overhead for us to verify its value 1288 // more directly. Leave that to the explicit lock/unlock test. 1289 1290 testcase(false, stks, workers, fn)(t) 1291 1292 if remaining := tries.Load(); remaining >= 0 { 1293 t.Logf("finished test early (%d tries remaining)", remaining) 1294 } 1295 }) 1296} 1297 1298// contentionWorker provides cleaner call stacks for lock contention profile tests 1299type contentionWorker struct { 1300 before func() 1301 fn func() bool 1302 after func() 1303} 1304 1305func (w *contentionWorker) run() { 1306 defer w.after() 1307 w.before() 1308 1309 for w.fn() { 1310 } 1311} 1312 1313func TestCPUStats(t *testing.T) { 1314 // Run a few GC cycles to get some of the stats to be non-zero. 1315 runtime.GC() 1316 runtime.GC() 1317 runtime.GC() 1318 1319 // Set GOMAXPROCS high then sleep briefly to ensure we generate 1320 // some idle time. 1321 oldmaxprocs := runtime.GOMAXPROCS(10) 1322 time.Sleep(time.Millisecond) 1323 runtime.GOMAXPROCS(oldmaxprocs) 1324 1325 stats := runtime.ReadCPUStats() 1326 gcTotal := stats.GCAssistTime + stats.GCDedicatedTime + stats.GCIdleTime + stats.GCPauseTime 1327 if gcTotal != stats.GCTotalTime { 1328 t.Errorf("manually computed total does not match GCTotalTime: %d cpu-ns vs. %d cpu-ns", gcTotal, stats.GCTotalTime) 1329 } 1330 scavTotal := stats.ScavengeAssistTime + stats.ScavengeBgTime 1331 if scavTotal != stats.ScavengeTotalTime { 1332 t.Errorf("manually computed total does not match ScavengeTotalTime: %d cpu-ns vs. %d cpu-ns", scavTotal, stats.ScavengeTotalTime) 1333 } 1334 total := gcTotal + scavTotal + stats.IdleTime + stats.UserTime 1335 if total != stats.TotalTime { 1336 t.Errorf("manually computed overall total does not match TotalTime: %d cpu-ns vs. %d cpu-ns", total, stats.TotalTime) 1337 } 1338 if total == 0 { 1339 t.Error("total time is zero") 1340 } 1341 if gcTotal == 0 { 1342 t.Error("GC total time is zero") 1343 } 1344 if stats.IdleTime == 0 { 1345 t.Error("idle time is zero") 1346 } 1347} 1348 1349func TestMetricHeapUnusedLargeObjectOverflow(t *testing.T) { 1350 // This test makes sure /memory/classes/heap/unused:bytes 1351 // doesn't overflow when allocating and deallocating large 1352 // objects. It is a regression test for #67019. 1353 done := make(chan struct{}) 1354 var wg sync.WaitGroup 1355 wg.Add(1) 1356 go func() { 1357 defer wg.Done() 1358 for { 1359 for range 10 { 1360 abi.Escape(make([]byte, 1<<20)) 1361 } 1362 runtime.GC() 1363 select { 1364 case <-done: 1365 return 1366 default: 1367 } 1368 } 1369 }() 1370 s := []metrics.Sample{ 1371 {Name: "/memory/classes/heap/unused:bytes"}, 1372 } 1373 for range 1000 { 1374 metrics.Read(s) 1375 if s[0].Value.Uint64() > 1<<40 { 1376 t.Errorf("overflow") 1377 break 1378 } 1379 } 1380 done <- struct{}{} 1381 wg.Wait() 1382} 1383