1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package pprof writes runtime profiling data in the format expected 6// by the pprof visualization tool. 7// 8// # Profiling a Go program 9// 10// The first step to profiling a Go program is to enable profiling. 11// Support for profiling benchmarks built with the standard testing 12// package is built into go test. For example, the following command 13// runs benchmarks in the current directory and writes the CPU and 14// memory profiles to cpu.prof and mem.prof: 15// 16// go test -cpuprofile cpu.prof -memprofile mem.prof -bench . 17// 18// To add equivalent profiling support to a standalone program, add 19// code like the following to your main function: 20// 21// var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") 22// var memprofile = flag.String("memprofile", "", "write memory profile to `file`") 23// 24// func main() { 25// flag.Parse() 26// if *cpuprofile != "" { 27// f, err := os.Create(*cpuprofile) 28// if err != nil { 29// log.Fatal("could not create CPU profile: ", err) 30// } 31// defer f.Close() // error handling omitted for example 32// if err := pprof.StartCPUProfile(f); err != nil { 33// log.Fatal("could not start CPU profile: ", err) 34// } 35// defer pprof.StopCPUProfile() 36// } 37// 38// // ... rest of the program ... 39// 40// if *memprofile != "" { 41// f, err := os.Create(*memprofile) 42// if err != nil { 43// log.Fatal("could not create memory profile: ", err) 44// } 45// defer f.Close() // error handling omitted for example 46// runtime.GC() // get up-to-date statistics 47// if err := pprof.WriteHeapProfile(f); err != nil { 48// log.Fatal("could not write memory profile: ", err) 49// } 50// } 51// } 52// 53// There is also a standard HTTP interface to profiling data. Adding 54// the following line will install handlers under the /debug/pprof/ 55// URL to download live profiles: 56// 57// import _ "net/http/pprof" 58// 59// See the net/http/pprof package for more details. 60// 61// Profiles can then be visualized with the pprof tool: 62// 63// go tool pprof cpu.prof 64// 65// There are many commands available from the pprof command line. 66// Commonly used commands include "top", which prints a summary of the 67// top program hot-spots, and "web", which opens an interactive graph 68// of hot-spots and their call graphs. Use "help" for information on 69// all pprof commands. 70// 71// For more information about pprof, see 72// https://github.com/google/pprof/blob/main/doc/README.md. 73package pprof 74 75import ( 76 "bufio" 77 "cmp" 78 "fmt" 79 "internal/abi" 80 "internal/profilerecord" 81 "io" 82 "runtime" 83 "slices" 84 "sort" 85 "strings" 86 "sync" 87 "text/tabwriter" 88 "time" 89 "unsafe" 90) 91 92// BUG(rsc): Profiles are only as good as the kernel support used to generate them. 93// See https://golang.org/issue/13841 for details about known problems. 94 95// A Profile is a collection of stack traces showing the call sequences 96// that led to instances of a particular event, such as allocation. 97// Packages can create and maintain their own profiles; the most common 98// use is for tracking resources that must be explicitly closed, such as files 99// or network connections. 100// 101// A Profile's methods can be called from multiple goroutines simultaneously. 102// 103// Each Profile has a unique name. A few profiles are predefined: 104// 105// goroutine - stack traces of all current goroutines 106// heap - a sampling of memory allocations of live objects 107// allocs - a sampling of all past memory allocations 108// threadcreate - stack traces that led to the creation of new OS threads 109// block - stack traces that led to blocking on synchronization primitives 110// mutex - stack traces of holders of contended mutexes 111// 112// These predefined profiles maintain themselves and panic on an explicit 113// [Profile.Add] or [Profile.Remove] method call. 114// 115// The CPU profile is not available as a Profile. It has a special API, 116// the [StartCPUProfile] and [StopCPUProfile] functions, because it streams 117// output to a writer during profiling. 118// 119// # Heap profile 120// 121// The heap profile reports statistics as of the most recently completed 122// garbage collection; it elides more recent allocation to avoid skewing 123// the profile away from live data and toward garbage. 124// If there has been no garbage collection at all, the heap profile reports 125// all known allocations. This exception helps mainly in programs running 126// without garbage collection enabled, usually for debugging purposes. 127// 128// The heap profile tracks both the allocation sites for all live objects in 129// the application memory and for all objects allocated since the program start. 130// Pprof's -inuse_space, -inuse_objects, -alloc_space, and -alloc_objects 131// flags select which to display, defaulting to -inuse_space (live objects, 132// scaled by size). 133// 134// # Allocs profile 135// 136// The allocs profile is the same as the heap profile but changes the default 137// pprof display to -alloc_space, the total number of bytes allocated since 138// the program began (including garbage-collected bytes). 139// 140// # Block profile 141// 142// The block profile tracks time spent blocked on synchronization primitives, 143// such as [sync.Mutex], [sync.RWMutex], [sync.WaitGroup], [sync.Cond], and 144// channel send/receive/select. 145// 146// Stack traces correspond to the location that blocked (for example, 147// [sync.Mutex.Lock]). 148// 149// Sample values correspond to cumulative time spent blocked at that stack 150// trace, subject to time-based sampling specified by 151// [runtime.SetBlockProfileRate]. 152// 153// # Mutex profile 154// 155// The mutex profile tracks contention on mutexes, such as [sync.Mutex], 156// [sync.RWMutex], and runtime-internal locks. 157// 158// Stack traces correspond to the end of the critical section causing 159// contention. For example, a lock held for a long time while other goroutines 160// are waiting to acquire the lock will report contention when the lock is 161// finally unlocked (that is, at [sync.Mutex.Unlock]). 162// 163// Sample values correspond to the approximate cumulative time other goroutines 164// spent blocked waiting for the lock, subject to event-based sampling 165// specified by [runtime.SetMutexProfileFraction]. For example, if a caller 166// holds a lock for 1s while 5 other goroutines are waiting for the entire 167// second to acquire the lock, its unlock call stack will report 5s of 168// contention. 169// 170// Runtime-internal locks are always reported at the location 171// "runtime._LostContendedRuntimeLock". More detailed stack traces for 172// runtime-internal locks can be obtained by setting 173// `GODEBUG=runtimecontentionstacks=1` (see package [runtime] docs for 174// caveats). 175type Profile struct { 176 name string 177 mu sync.Mutex 178 m map[any][]uintptr 179 count func() int 180 write func(io.Writer, int) error 181} 182 183// profiles records all registered profiles. 184var profiles struct { 185 mu sync.Mutex 186 m map[string]*Profile 187} 188 189var goroutineProfile = &Profile{ 190 name: "goroutine", 191 count: countGoroutine, 192 write: writeGoroutine, 193} 194 195var threadcreateProfile = &Profile{ 196 name: "threadcreate", 197 count: countThreadCreate, 198 write: writeThreadCreate, 199} 200 201var heapProfile = &Profile{ 202 name: "heap", 203 count: countHeap, 204 write: writeHeap, 205} 206 207var allocsProfile = &Profile{ 208 name: "allocs", 209 count: countHeap, // identical to heap profile 210 write: writeAlloc, 211} 212 213var blockProfile = &Profile{ 214 name: "block", 215 count: countBlock, 216 write: writeBlock, 217} 218 219var mutexProfile = &Profile{ 220 name: "mutex", 221 count: countMutex, 222 write: writeMutex, 223} 224 225func lockProfiles() { 226 profiles.mu.Lock() 227 if profiles.m == nil { 228 // Initial built-in profiles. 229 profiles.m = map[string]*Profile{ 230 "goroutine": goroutineProfile, 231 "threadcreate": threadcreateProfile, 232 "heap": heapProfile, 233 "allocs": allocsProfile, 234 "block": blockProfile, 235 "mutex": mutexProfile, 236 } 237 } 238} 239 240func unlockProfiles() { 241 profiles.mu.Unlock() 242} 243 244// NewProfile creates a new profile with the given name. 245// If a profile with that name already exists, NewProfile panics. 246// The convention is to use a 'import/path.' prefix to create 247// separate name spaces for each package. 248// For compatibility with various tools that read pprof data, 249// profile names should not contain spaces. 250func NewProfile(name string) *Profile { 251 lockProfiles() 252 defer unlockProfiles() 253 if name == "" { 254 panic("pprof: NewProfile with empty name") 255 } 256 if profiles.m[name] != nil { 257 panic("pprof: NewProfile name already in use: " + name) 258 } 259 p := &Profile{ 260 name: name, 261 m: map[any][]uintptr{}, 262 } 263 profiles.m[name] = p 264 return p 265} 266 267// Lookup returns the profile with the given name, or nil if no such profile exists. 268func Lookup(name string) *Profile { 269 lockProfiles() 270 defer unlockProfiles() 271 return profiles.m[name] 272} 273 274// Profiles returns a slice of all the known profiles, sorted by name. 275func Profiles() []*Profile { 276 lockProfiles() 277 defer unlockProfiles() 278 279 all := make([]*Profile, 0, len(profiles.m)) 280 for _, p := range profiles.m { 281 all = append(all, p) 282 } 283 284 slices.SortFunc(all, func(a, b *Profile) int { 285 return strings.Compare(a.name, b.name) 286 }) 287 return all 288} 289 290// Name returns this profile's name, which can be passed to [Lookup] to reobtain the profile. 291func (p *Profile) Name() string { 292 return p.name 293} 294 295// Count returns the number of execution stacks currently in the profile. 296func (p *Profile) Count() int { 297 p.mu.Lock() 298 defer p.mu.Unlock() 299 if p.count != nil { 300 return p.count() 301 } 302 return len(p.m) 303} 304 305// Add adds the current execution stack to the profile, associated with value. 306// Add stores value in an internal map, so value must be suitable for use as 307// a map key and will not be garbage collected until the corresponding 308// call to [Profile.Remove]. Add panics if the profile already contains a stack for value. 309// 310// The skip parameter has the same meaning as [runtime.Caller]'s skip 311// and controls where the stack trace begins. Passing skip=0 begins the 312// trace in the function calling Add. For example, given this 313// execution stack: 314// 315// Add 316// called from rpc.NewClient 317// called from mypkg.Run 318// called from main.main 319// 320// Passing skip=0 begins the stack trace at the call to Add inside rpc.NewClient. 321// Passing skip=1 begins the stack trace at the call to NewClient inside mypkg.Run. 322func (p *Profile) Add(value any, skip int) { 323 if p.name == "" { 324 panic("pprof: use of uninitialized Profile") 325 } 326 if p.write != nil { 327 panic("pprof: Add called on built-in Profile " + p.name) 328 } 329 330 stk := make([]uintptr, 32) 331 n := runtime.Callers(skip+1, stk[:]) 332 stk = stk[:n] 333 if len(stk) == 0 { 334 // The value for skip is too large, and there's no stack trace to record. 335 stk = []uintptr{abi.FuncPCABIInternal(lostProfileEvent)} 336 } 337 338 p.mu.Lock() 339 defer p.mu.Unlock() 340 if p.m[value] != nil { 341 panic("pprof: Profile.Add of duplicate value") 342 } 343 p.m[value] = stk 344} 345 346// Remove removes the execution stack associated with value from the profile. 347// It is a no-op if the value is not in the profile. 348func (p *Profile) Remove(value any) { 349 p.mu.Lock() 350 defer p.mu.Unlock() 351 delete(p.m, value) 352} 353 354// WriteTo writes a pprof-formatted snapshot of the profile to w. 355// If a write to w returns an error, WriteTo returns that error. 356// Otherwise, WriteTo returns nil. 357// 358// The debug parameter enables additional output. 359// Passing debug=0 writes the gzip-compressed protocol buffer described 360// in https://github.com/google/pprof/tree/main/proto#overview. 361// Passing debug=1 writes the legacy text format with comments 362// translating addresses to function names and line numbers, so that a 363// programmer can read the profile without tools. 364// 365// The predefined profiles may assign meaning to other debug values; 366// for example, when printing the "goroutine" profile, debug=2 means to 367// print the goroutine stacks in the same form that a Go program uses 368// when dying due to an unrecovered panic. 369func (p *Profile) WriteTo(w io.Writer, debug int) error { 370 if p.name == "" { 371 panic("pprof: use of zero Profile") 372 } 373 if p.write != nil { 374 return p.write(w, debug) 375 } 376 377 // Obtain consistent snapshot under lock; then process without lock. 378 p.mu.Lock() 379 all := make([][]uintptr, 0, len(p.m)) 380 for _, stk := range p.m { 381 all = append(all, stk) 382 } 383 p.mu.Unlock() 384 385 // Map order is non-deterministic; make output deterministic. 386 slices.SortFunc(all, slices.Compare) 387 388 return printCountProfile(w, debug, p.name, stackProfile(all)) 389} 390 391type stackProfile [][]uintptr 392 393func (x stackProfile) Len() int { return len(x) } 394func (x stackProfile) Stack(i int) []uintptr { return x[i] } 395func (x stackProfile) Label(i int) *labelMap { return nil } 396 397// A countProfile is a set of stack traces to be printed as counts 398// grouped by stack trace. There are multiple implementations: 399// all that matters is that we can find out how many traces there are 400// and obtain each trace in turn. 401type countProfile interface { 402 Len() int 403 Stack(i int) []uintptr 404 Label(i int) *labelMap 405} 406 407// expandInlinedFrames copies the call stack from pcs into dst, expanding any 408// PCs corresponding to inlined calls into the corresponding PCs for the inlined 409// functions. Returns the number of frames copied to dst. 410func expandInlinedFrames(dst, pcs []uintptr) int { 411 cf := runtime.CallersFrames(pcs) 412 var n int 413 for n < len(dst) { 414 f, more := cf.Next() 415 // f.PC is a "call PC", but later consumers will expect 416 // "return PCs" 417 dst[n] = f.PC + 1 418 n++ 419 if !more { 420 break 421 } 422 } 423 return n 424} 425 426// printCountCycleProfile outputs block profile records (for block or mutex profiles) 427// as the pprof-proto format output. Translations from cycle count to time duration 428// are done because The proto expects count and time (nanoseconds) instead of count 429// and the number of cycles for block, contention profiles. 430func printCountCycleProfile(w io.Writer, countName, cycleName string, records []profilerecord.BlockProfileRecord) error { 431 // Output profile in protobuf form. 432 b := newProfileBuilder(w) 433 b.pbValueType(tagProfile_PeriodType, countName, "count") 434 b.pb.int64Opt(tagProfile_Period, 1) 435 b.pbValueType(tagProfile_SampleType, countName, "count") 436 b.pbValueType(tagProfile_SampleType, cycleName, "nanoseconds") 437 438 cpuGHz := float64(pprof_cyclesPerSecond()) / 1e9 439 440 values := []int64{0, 0} 441 var locs []uint64 442 expandedStack := pprof_makeProfStack() 443 for _, r := range records { 444 values[0] = r.Count 445 values[1] = int64(float64(r.Cycles) / cpuGHz) 446 // For count profiles, all stack addresses are 447 // return PCs, which is what appendLocsForStack expects. 448 n := expandInlinedFrames(expandedStack, r.Stack) 449 locs = b.appendLocsForStack(locs[:0], expandedStack[:n]) 450 b.pbSample(values, locs, nil) 451 } 452 b.build() 453 return nil 454} 455 456// printCountProfile prints a countProfile at the specified debug level. 457// The profile will be in compressed proto format unless debug is nonzero. 458func printCountProfile(w io.Writer, debug int, name string, p countProfile) error { 459 // Build count of each stack. 460 var buf strings.Builder 461 key := func(stk []uintptr, lbls *labelMap) string { 462 buf.Reset() 463 fmt.Fprintf(&buf, "@") 464 for _, pc := range stk { 465 fmt.Fprintf(&buf, " %#x", pc) 466 } 467 if lbls != nil { 468 buf.WriteString("\n# labels: ") 469 buf.WriteString(lbls.String()) 470 } 471 return buf.String() 472 } 473 count := map[string]int{} 474 index := map[string]int{} 475 var keys []string 476 n := p.Len() 477 for i := 0; i < n; i++ { 478 k := key(p.Stack(i), p.Label(i)) 479 if count[k] == 0 { 480 index[k] = i 481 keys = append(keys, k) 482 } 483 count[k]++ 484 } 485 486 sort.Sort(&keysByCount{keys, count}) 487 488 if debug > 0 { 489 // Print debug profile in legacy format 490 tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) 491 fmt.Fprintf(tw, "%s profile: total %d\n", name, p.Len()) 492 for _, k := range keys { 493 fmt.Fprintf(tw, "%d %s\n", count[k], k) 494 printStackRecord(tw, p.Stack(index[k]), false) 495 } 496 return tw.Flush() 497 } 498 499 // Output profile in protobuf form. 500 b := newProfileBuilder(w) 501 b.pbValueType(tagProfile_PeriodType, name, "count") 502 b.pb.int64Opt(tagProfile_Period, 1) 503 b.pbValueType(tagProfile_SampleType, name, "count") 504 505 values := []int64{0} 506 var locs []uint64 507 for _, k := range keys { 508 values[0] = int64(count[k]) 509 // For count profiles, all stack addresses are 510 // return PCs, which is what appendLocsForStack expects. 511 locs = b.appendLocsForStack(locs[:0], p.Stack(index[k])) 512 idx := index[k] 513 var labels func() 514 if p.Label(idx) != nil { 515 labels = func() { 516 for k, v := range *p.Label(idx) { 517 b.pbLabel(tagSample_Label, k, v, 0) 518 } 519 } 520 } 521 b.pbSample(values, locs, labels) 522 } 523 b.build() 524 return nil 525} 526 527// keysByCount sorts keys with higher counts first, breaking ties by key string order. 528type keysByCount struct { 529 keys []string 530 count map[string]int 531} 532 533func (x *keysByCount) Len() int { return len(x.keys) } 534func (x *keysByCount) Swap(i, j int) { x.keys[i], x.keys[j] = x.keys[j], x.keys[i] } 535func (x *keysByCount) Less(i, j int) bool { 536 ki, kj := x.keys[i], x.keys[j] 537 ci, cj := x.count[ki], x.count[kj] 538 if ci != cj { 539 return ci > cj 540 } 541 return ki < kj 542} 543 544// printStackRecord prints the function + source line information 545// for a single stack trace. 546func printStackRecord(w io.Writer, stk []uintptr, allFrames bool) { 547 show := allFrames 548 frames := runtime.CallersFrames(stk) 549 for { 550 frame, more := frames.Next() 551 name := frame.Function 552 if name == "" { 553 show = true 554 fmt.Fprintf(w, "#\t%#x\n", frame.PC) 555 } else if name != "runtime.goexit" && (show || !strings.HasPrefix(name, "runtime.")) { 556 // Hide runtime.goexit and any runtime functions at the beginning. 557 // This is useful mainly for allocation traces. 558 show = true 559 fmt.Fprintf(w, "#\t%#x\t%s+%#x\t%s:%d\n", frame.PC, name, frame.PC-frame.Entry, frame.File, frame.Line) 560 } 561 if !more { 562 break 563 } 564 } 565 if !show { 566 // We didn't print anything; do it again, 567 // and this time include runtime functions. 568 printStackRecord(w, stk, true) 569 return 570 } 571 fmt.Fprintf(w, "\n") 572} 573 574// Interface to system profiles. 575 576// WriteHeapProfile is shorthand for [Lookup]("heap").WriteTo(w, 0). 577// It is preserved for backwards compatibility. 578func WriteHeapProfile(w io.Writer) error { 579 return writeHeap(w, 0) 580} 581 582// countHeap returns the number of records in the heap profile. 583func countHeap() int { 584 n, _ := runtime.MemProfile(nil, true) 585 return n 586} 587 588// writeHeap writes the current runtime heap profile to w. 589func writeHeap(w io.Writer, debug int) error { 590 return writeHeapInternal(w, debug, "") 591} 592 593// writeAlloc writes the current runtime heap profile to w 594// with the total allocation space as the default sample type. 595func writeAlloc(w io.Writer, debug int) error { 596 return writeHeapInternal(w, debug, "alloc_space") 597} 598 599func writeHeapInternal(w io.Writer, debug int, defaultSampleType string) error { 600 var memStats *runtime.MemStats 601 if debug != 0 { 602 // Read mem stats first, so that our other allocations 603 // do not appear in the statistics. 604 memStats = new(runtime.MemStats) 605 runtime.ReadMemStats(memStats) 606 } 607 608 // Find out how many records there are (the call 609 // pprof_memProfileInternal(nil, true) below), 610 // allocate that many records, and get the data. 611 // There's a race—more records might be added between 612 // the two calls—so allocate a few extra records for safety 613 // and also try again if we're very unlucky. 614 // The loop should only execute one iteration in the common case. 615 var p []profilerecord.MemProfileRecord 616 n, ok := pprof_memProfileInternal(nil, true) 617 for { 618 // Allocate room for a slightly bigger profile, 619 // in case a few more entries have been added 620 // since the call to MemProfile. 621 p = make([]profilerecord.MemProfileRecord, n+50) 622 n, ok = pprof_memProfileInternal(p, true) 623 if ok { 624 p = p[0:n] 625 break 626 } 627 // Profile grew; try again. 628 } 629 630 if debug == 0 { 631 return writeHeapProto(w, p, int64(runtime.MemProfileRate), defaultSampleType) 632 } 633 634 slices.SortFunc(p, func(a, b profilerecord.MemProfileRecord) int { 635 return cmp.Compare(a.InUseBytes(), b.InUseBytes()) 636 }) 637 638 b := bufio.NewWriter(w) 639 tw := tabwriter.NewWriter(b, 1, 8, 1, '\t', 0) 640 w = tw 641 642 var total runtime.MemProfileRecord 643 for i := range p { 644 r := &p[i] 645 total.AllocBytes += r.AllocBytes 646 total.AllocObjects += r.AllocObjects 647 total.FreeBytes += r.FreeBytes 648 total.FreeObjects += r.FreeObjects 649 } 650 651 // Technically the rate is MemProfileRate not 2*MemProfileRate, 652 // but early versions of the C++ heap profiler reported 2*MemProfileRate, 653 // so that's what pprof has come to expect. 654 rate := 2 * runtime.MemProfileRate 655 656 // pprof reads a profile with alloc == inuse as being a "2-column" profile 657 // (objects and bytes, not distinguishing alloc from inuse), 658 // but then such a profile can't be merged using pprof *.prof with 659 // other 4-column profiles where alloc != inuse. 660 // The easiest way to avoid this bug is to adjust allocBytes so it's never == inuseBytes. 661 // pprof doesn't use these header values anymore except for checking equality. 662 inUseBytes := total.InUseBytes() 663 allocBytes := total.AllocBytes 664 if inUseBytes == allocBytes { 665 allocBytes++ 666 } 667 668 fmt.Fprintf(w, "heap profile: %d: %d [%d: %d] @ heap/%d\n", 669 total.InUseObjects(), inUseBytes, 670 total.AllocObjects, allocBytes, 671 rate) 672 673 for i := range p { 674 r := &p[i] 675 fmt.Fprintf(w, "%d: %d [%d: %d] @", 676 r.InUseObjects(), r.InUseBytes(), 677 r.AllocObjects, r.AllocBytes) 678 for _, pc := range r.Stack { 679 fmt.Fprintf(w, " %#x", pc) 680 } 681 fmt.Fprintf(w, "\n") 682 printStackRecord(w, r.Stack, false) 683 } 684 685 // Print memstats information too. 686 // Pprof will ignore, but useful for people 687 s := memStats 688 fmt.Fprintf(w, "\n# runtime.MemStats\n") 689 fmt.Fprintf(w, "# Alloc = %d\n", s.Alloc) 690 fmt.Fprintf(w, "# TotalAlloc = %d\n", s.TotalAlloc) 691 fmt.Fprintf(w, "# Sys = %d\n", s.Sys) 692 fmt.Fprintf(w, "# Lookups = %d\n", s.Lookups) 693 fmt.Fprintf(w, "# Mallocs = %d\n", s.Mallocs) 694 fmt.Fprintf(w, "# Frees = %d\n", s.Frees) 695 696 fmt.Fprintf(w, "# HeapAlloc = %d\n", s.HeapAlloc) 697 fmt.Fprintf(w, "# HeapSys = %d\n", s.HeapSys) 698 fmt.Fprintf(w, "# HeapIdle = %d\n", s.HeapIdle) 699 fmt.Fprintf(w, "# HeapInuse = %d\n", s.HeapInuse) 700 fmt.Fprintf(w, "# HeapReleased = %d\n", s.HeapReleased) 701 fmt.Fprintf(w, "# HeapObjects = %d\n", s.HeapObjects) 702 703 fmt.Fprintf(w, "# Stack = %d / %d\n", s.StackInuse, s.StackSys) 704 fmt.Fprintf(w, "# MSpan = %d / %d\n", s.MSpanInuse, s.MSpanSys) 705 fmt.Fprintf(w, "# MCache = %d / %d\n", s.MCacheInuse, s.MCacheSys) 706 fmt.Fprintf(w, "# BuckHashSys = %d\n", s.BuckHashSys) 707 fmt.Fprintf(w, "# GCSys = %d\n", s.GCSys) 708 fmt.Fprintf(w, "# OtherSys = %d\n", s.OtherSys) 709 710 fmt.Fprintf(w, "# NextGC = %d\n", s.NextGC) 711 fmt.Fprintf(w, "# LastGC = %d\n", s.LastGC) 712 fmt.Fprintf(w, "# PauseNs = %d\n", s.PauseNs) 713 fmt.Fprintf(w, "# PauseEnd = %d\n", s.PauseEnd) 714 fmt.Fprintf(w, "# NumGC = %d\n", s.NumGC) 715 fmt.Fprintf(w, "# NumForcedGC = %d\n", s.NumForcedGC) 716 fmt.Fprintf(w, "# GCCPUFraction = %v\n", s.GCCPUFraction) 717 fmt.Fprintf(w, "# DebugGC = %v\n", s.DebugGC) 718 719 // Also flush out MaxRSS on supported platforms. 720 addMaxRSS(w) 721 722 tw.Flush() 723 return b.Flush() 724} 725 726// countThreadCreate returns the size of the current ThreadCreateProfile. 727func countThreadCreate() int { 728 n, _ := runtime.ThreadCreateProfile(nil) 729 return n 730} 731 732// writeThreadCreate writes the current runtime ThreadCreateProfile to w. 733func writeThreadCreate(w io.Writer, debug int) error { 734 // Until https://golang.org/issues/6104 is addressed, wrap 735 // ThreadCreateProfile because there's no point in tracking labels when we 736 // don't get any stack-traces. 737 return writeRuntimeProfile(w, debug, "threadcreate", func(p []profilerecord.StackRecord, _ []unsafe.Pointer) (n int, ok bool) { 738 return pprof_threadCreateInternal(p) 739 }) 740} 741 742// countGoroutine returns the number of goroutines. 743func countGoroutine() int { 744 return runtime.NumGoroutine() 745} 746 747// writeGoroutine writes the current runtime GoroutineProfile to w. 748func writeGoroutine(w io.Writer, debug int) error { 749 if debug >= 2 { 750 return writeGoroutineStacks(w) 751 } 752 return writeRuntimeProfile(w, debug, "goroutine", pprof_goroutineProfileWithLabels) 753} 754 755func writeGoroutineStacks(w io.Writer) error { 756 // We don't know how big the buffer needs to be to collect 757 // all the goroutines. Start with 1 MB and try a few times, doubling each time. 758 // Give up and use a truncated trace if 64 MB is not enough. 759 buf := make([]byte, 1<<20) 760 for i := 0; ; i++ { 761 n := runtime.Stack(buf, true) 762 if n < len(buf) { 763 buf = buf[:n] 764 break 765 } 766 if len(buf) >= 64<<20 { 767 // Filled 64 MB - stop there. 768 break 769 } 770 buf = make([]byte, 2*len(buf)) 771 } 772 _, err := w.Write(buf) 773 return err 774} 775 776func writeRuntimeProfile(w io.Writer, debug int, name string, fetch func([]profilerecord.StackRecord, []unsafe.Pointer) (int, bool)) error { 777 // Find out how many records there are (fetch(nil)), 778 // allocate that many records, and get the data. 779 // There's a race—more records might be added between 780 // the two calls—so allocate a few extra records for safety 781 // and also try again if we're very unlucky. 782 // The loop should only execute one iteration in the common case. 783 var p []profilerecord.StackRecord 784 var labels []unsafe.Pointer 785 n, ok := fetch(nil, nil) 786 787 for { 788 // Allocate room for a slightly bigger profile, 789 // in case a few more entries have been added 790 // since the call to ThreadProfile. 791 p = make([]profilerecord.StackRecord, n+10) 792 labels = make([]unsafe.Pointer, n+10) 793 n, ok = fetch(p, labels) 794 if ok { 795 p = p[0:n] 796 break 797 } 798 // Profile grew; try again. 799 } 800 801 return printCountProfile(w, debug, name, &runtimeProfile{p, labels}) 802} 803 804type runtimeProfile struct { 805 stk []profilerecord.StackRecord 806 labels []unsafe.Pointer 807} 808 809func (p *runtimeProfile) Len() int { return len(p.stk) } 810func (p *runtimeProfile) Stack(i int) []uintptr { return p.stk[i].Stack } 811func (p *runtimeProfile) Label(i int) *labelMap { return (*labelMap)(p.labels[i]) } 812 813var cpu struct { 814 sync.Mutex 815 profiling bool 816 done chan bool 817} 818 819// StartCPUProfile enables CPU profiling for the current process. 820// While profiling, the profile will be buffered and written to w. 821// StartCPUProfile returns an error if profiling is already enabled. 822// 823// On Unix-like systems, StartCPUProfile does not work by default for 824// Go code built with -buildmode=c-archive or -buildmode=c-shared. 825// StartCPUProfile relies on the SIGPROF signal, but that signal will 826// be delivered to the main program's SIGPROF signal handler (if any) 827// not to the one used by Go. To make it work, call [os/signal.Notify] 828// for [syscall.SIGPROF], but note that doing so may break any profiling 829// being done by the main program. 830func StartCPUProfile(w io.Writer) error { 831 // The runtime routines allow a variable profiling rate, 832 // but in practice operating systems cannot trigger signals 833 // at more than about 500 Hz, and our processing of the 834 // signal is not cheap (mostly getting the stack trace). 835 // 100 Hz is a reasonable choice: it is frequent enough to 836 // produce useful data, rare enough not to bog down the 837 // system, and a nice round number to make it easy to 838 // convert sample counts to seconds. Instead of requiring 839 // each client to specify the frequency, we hard code it. 840 const hz = 100 841 842 cpu.Lock() 843 defer cpu.Unlock() 844 if cpu.done == nil { 845 cpu.done = make(chan bool) 846 } 847 // Double-check. 848 if cpu.profiling { 849 return fmt.Errorf("cpu profiling already in use") 850 } 851 cpu.profiling = true 852 runtime.SetCPUProfileRate(hz) 853 go profileWriter(w) 854 return nil 855} 856 857// readProfile, provided by the runtime, returns the next chunk of 858// binary CPU profiling stack trace data, blocking until data is available. 859// If profiling is turned off and all the profile data accumulated while it was 860// on has been returned, readProfile returns eof=true. 861// The caller must save the returned data and tags before calling readProfile again. 862func readProfile() (data []uint64, tags []unsafe.Pointer, eof bool) 863 864func profileWriter(w io.Writer) { 865 b := newProfileBuilder(w) 866 var err error 867 for { 868 time.Sleep(100 * time.Millisecond) 869 data, tags, eof := readProfile() 870 if e := b.addCPUData(data, tags); e != nil && err == nil { 871 err = e 872 } 873 if eof { 874 break 875 } 876 } 877 if err != nil { 878 // The runtime should never produce an invalid or truncated profile. 879 // It drops records that can't fit into its log buffers. 880 panic("runtime/pprof: converting profile: " + err.Error()) 881 } 882 b.build() 883 cpu.done <- true 884} 885 886// StopCPUProfile stops the current CPU profile, if any. 887// StopCPUProfile only returns after all the writes for the 888// profile have completed. 889func StopCPUProfile() { 890 cpu.Lock() 891 defer cpu.Unlock() 892 893 if !cpu.profiling { 894 return 895 } 896 cpu.profiling = false 897 runtime.SetCPUProfileRate(0) 898 <-cpu.done 899} 900 901// countBlock returns the number of records in the blocking profile. 902func countBlock() int { 903 n, _ := runtime.BlockProfile(nil) 904 return n 905} 906 907// countMutex returns the number of records in the mutex profile. 908func countMutex() int { 909 n, _ := runtime.MutexProfile(nil) 910 return n 911} 912 913// writeBlock writes the current blocking profile to w. 914func writeBlock(w io.Writer, debug int) error { 915 return writeProfileInternal(w, debug, "contention", pprof_blockProfileInternal) 916} 917 918// writeMutex writes the current mutex profile to w. 919func writeMutex(w io.Writer, debug int) error { 920 return writeProfileInternal(w, debug, "mutex", pprof_mutexProfileInternal) 921} 922 923// writeProfileInternal writes the current blocking or mutex profile depending on the passed parameters. 924func writeProfileInternal(w io.Writer, debug int, name string, runtimeProfile func([]profilerecord.BlockProfileRecord) (int, bool)) error { 925 var p []profilerecord.BlockProfileRecord 926 n, ok := runtimeProfile(nil) 927 for { 928 p = make([]profilerecord.BlockProfileRecord, n+50) 929 n, ok = runtimeProfile(p) 930 if ok { 931 p = p[:n] 932 break 933 } 934 } 935 936 slices.SortFunc(p, func(a, b profilerecord.BlockProfileRecord) int { 937 return cmp.Compare(b.Cycles, a.Cycles) 938 }) 939 940 if debug <= 0 { 941 return printCountCycleProfile(w, "contentions", "delay", p) 942 } 943 944 b := bufio.NewWriter(w) 945 tw := tabwriter.NewWriter(w, 1, 8, 1, '\t', 0) 946 w = tw 947 948 fmt.Fprintf(w, "--- %v:\n", name) 949 fmt.Fprintf(w, "cycles/second=%v\n", pprof_cyclesPerSecond()) 950 if name == "mutex" { 951 fmt.Fprintf(w, "sampling period=%d\n", runtime.SetMutexProfileFraction(-1)) 952 } 953 expandedStack := pprof_makeProfStack() 954 for i := range p { 955 r := &p[i] 956 fmt.Fprintf(w, "%v %v @", r.Cycles, r.Count) 957 n := expandInlinedFrames(expandedStack, r.Stack) 958 stack := expandedStack[:n] 959 for _, pc := range stack { 960 fmt.Fprintf(w, " %#x", pc) 961 } 962 fmt.Fprint(w, "\n") 963 if debug > 0 { 964 printStackRecord(w, stack, true) 965 } 966 } 967 968 if tw != nil { 969 tw.Flush() 970 } 971 return b.Flush() 972} 973 974//go:linkname pprof_goroutineProfileWithLabels runtime.pprof_goroutineProfileWithLabels 975func pprof_goroutineProfileWithLabels(p []profilerecord.StackRecord, labels []unsafe.Pointer) (n int, ok bool) 976 977//go:linkname pprof_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond 978func pprof_cyclesPerSecond() int64 979 980//go:linkname pprof_memProfileInternal runtime.pprof_memProfileInternal 981func pprof_memProfileInternal(p []profilerecord.MemProfileRecord, inuseZero bool) (n int, ok bool) 982 983//go:linkname pprof_blockProfileInternal runtime.pprof_blockProfileInternal 984func pprof_blockProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool) 985 986//go:linkname pprof_mutexProfileInternal runtime.pprof_mutexProfileInternal 987func pprof_mutexProfileInternal(p []profilerecord.BlockProfileRecord) (n int, ok bool) 988 989//go:linkname pprof_threadCreateInternal runtime.pprof_threadCreateInternal 990func pprof_threadCreateInternal(p []profilerecord.StackRecord) (n int, ok bool) 991 992//go:linkname pprof_fpunwindExpand runtime.pprof_fpunwindExpand 993func pprof_fpunwindExpand(dst, src []uintptr) int 994 995//go:linkname pprof_makeProfStack runtime.pprof_makeProfStack 996func pprof_makeProfStack() []uintptr 997