1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// CPU profiling.
6//
7// The signal handler for the profiling clock tick adds a new stack trace
8// to a log of recent traces. The log is read by a user goroutine that
9// turns it into formatted profile data. If the reader does not keep up
10// with the log, those writes will be recorded as a count of lost records.
11// The actual profile buffer is in profbuf.go.
12
13package runtime
14
15import (
16	"internal/abi"
17	"runtime/internal/sys"
18	"unsafe"
19)
20
21const (
22	maxCPUProfStack = 64
23
24	// profBufWordCount is the size of the CPU profile buffer's storage for the
25	// header and stack of each sample, measured in 64-bit words. Every sample
26	// has a required header of two words. With a small additional header (a
27	// word or two) and stacks at the profiler's maximum length of 64 frames,
28	// that capacity can support 1900 samples or 19 thread-seconds at a 100 Hz
29	// sample rate, at a cost of 1 MiB.
30	profBufWordCount = 1 << 17
31	// profBufTagCount is the size of the CPU profile buffer's storage for the
32	// goroutine tags associated with each sample. A capacity of 1<<14 means
33	// room for 16k samples, or 160 thread-seconds at a 100 Hz sample rate.
34	profBufTagCount = 1 << 14
35)
36
37type cpuProfile struct {
38	lock mutex
39	on   bool     // profiling is on
40	log  *profBuf // profile events written here
41
42	// extra holds extra stacks accumulated in addNonGo
43	// corresponding to profiling signals arriving on
44	// non-Go-created threads. Those stacks are written
45	// to log the next time a normal Go thread gets the
46	// signal handler.
47	// Assuming the stacks are 2 words each (we don't get
48	// a full traceback from those threads), plus one word
49	// size for framing, 100 Hz profiling would generate
50	// 300 words per second.
51	// Hopefully a normal Go thread will get the profiling
52	// signal at least once every few seconds.
53	extra      [1000]uintptr
54	numExtra   int
55	lostExtra  uint64 // count of frames lost because extra is full
56	lostAtomic uint64 // count of frames lost because of being in atomic64 on mips/arm; updated racily
57}
58
59var cpuprof cpuProfile
60
61// SetCPUProfileRate sets the CPU profiling rate to hz samples per second.
62// If hz <= 0, SetCPUProfileRate turns off profiling.
63// If the profiler is on, the rate cannot be changed without first turning it off.
64//
65// Most clients should use the [runtime/pprof] package or
66// the [testing] package's -test.cpuprofile flag instead of calling
67// SetCPUProfileRate directly.
68func SetCPUProfileRate(hz int) {
69	// Clamp hz to something reasonable.
70	if hz < 0 {
71		hz = 0
72	}
73	if hz > 1000000 {
74		hz = 1000000
75	}
76
77	lock(&cpuprof.lock)
78	if hz > 0 {
79		if cpuprof.on || cpuprof.log != nil {
80			print("runtime: cannot set cpu profile rate until previous profile has finished.\n")
81			unlock(&cpuprof.lock)
82			return
83		}
84
85		cpuprof.on = true
86		cpuprof.log = newProfBuf(1, profBufWordCount, profBufTagCount)
87		hdr := [1]uint64{uint64(hz)}
88		cpuprof.log.write(nil, nanotime(), hdr[:], nil)
89		setcpuprofilerate(int32(hz))
90	} else if cpuprof.on {
91		setcpuprofilerate(0)
92		cpuprof.on = false
93		cpuprof.addExtra()
94		cpuprof.log.close()
95	}
96	unlock(&cpuprof.lock)
97}
98
99// add adds the stack trace to the profile.
100// It is called from signal handlers and other limited environments
101// and cannot allocate memory or acquire locks that might be
102// held at the time of the signal, nor can it use substantial amounts
103// of stack.
104//
105//go:nowritebarrierrec
106func (p *cpuProfile) add(tagPtr *unsafe.Pointer, stk []uintptr) {
107	// Simple cas-lock to coordinate with setcpuprofilerate.
108	for !prof.signalLock.CompareAndSwap(0, 1) {
109		// TODO: Is it safe to osyield here? https://go.dev/issue/52672
110		osyield()
111	}
112
113	if prof.hz.Load() != 0 { // implies cpuprof.log != nil
114		if p.numExtra > 0 || p.lostExtra > 0 || p.lostAtomic > 0 {
115			p.addExtra()
116		}
117		hdr := [1]uint64{1}
118		// Note: write "knows" that the argument is &gp.labels,
119		// because otherwise its write barrier behavior may not
120		// be correct. See the long comment there before
121		// changing the argument here.
122		cpuprof.log.write(tagPtr, nanotime(), hdr[:], stk)
123	}
124
125	prof.signalLock.Store(0)
126}
127
128// addNonGo adds the non-Go stack trace to the profile.
129// It is called from a non-Go thread, so we cannot use much stack at all,
130// nor do anything that needs a g or an m.
131// In particular, we can't call cpuprof.log.write.
132// Instead, we copy the stack into cpuprof.extra,
133// which will be drained the next time a Go thread
134// gets the signal handling event.
135//
136//go:nosplit
137//go:nowritebarrierrec
138func (p *cpuProfile) addNonGo(stk []uintptr) {
139	// Simple cas-lock to coordinate with SetCPUProfileRate.
140	// (Other calls to add or addNonGo should be blocked out
141	// by the fact that only one SIGPROF can be handled by the
142	// process at a time. If not, this lock will serialize those too.
143	// The use of timer_create(2) on Linux to request process-targeted
144	// signals may have changed this.)
145	for !prof.signalLock.CompareAndSwap(0, 1) {
146		// TODO: Is it safe to osyield here? https://go.dev/issue/52672
147		osyield()
148	}
149
150	if cpuprof.numExtra+1+len(stk) < len(cpuprof.extra) {
151		i := cpuprof.numExtra
152		cpuprof.extra[i] = uintptr(1 + len(stk))
153		copy(cpuprof.extra[i+1:], stk)
154		cpuprof.numExtra += 1 + len(stk)
155	} else {
156		cpuprof.lostExtra++
157	}
158
159	prof.signalLock.Store(0)
160}
161
162// addExtra adds the "extra" profiling events,
163// queued by addNonGo, to the profile log.
164// addExtra is called either from a signal handler on a Go thread
165// or from an ordinary goroutine; either way it can use stack
166// and has a g. The world may be stopped, though.
167func (p *cpuProfile) addExtra() {
168	// Copy accumulated non-Go profile events.
169	hdr := [1]uint64{1}
170	for i := 0; i < p.numExtra; {
171		p.log.write(nil, 0, hdr[:], p.extra[i+1:i+int(p.extra[i])])
172		i += int(p.extra[i])
173	}
174	p.numExtra = 0
175
176	// Report any lost events.
177	if p.lostExtra > 0 {
178		hdr := [1]uint64{p.lostExtra}
179		lostStk := [2]uintptr{
180			abi.FuncPCABIInternal(_LostExternalCode) + sys.PCQuantum,
181			abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum,
182		}
183		p.log.write(nil, 0, hdr[:], lostStk[:])
184		p.lostExtra = 0
185	}
186
187	if p.lostAtomic > 0 {
188		hdr := [1]uint64{p.lostAtomic}
189		lostStk := [2]uintptr{
190			abi.FuncPCABIInternal(_LostSIGPROFDuringAtomic64) + sys.PCQuantum,
191			abi.FuncPCABIInternal(_System) + sys.PCQuantum,
192		}
193		p.log.write(nil, 0, hdr[:], lostStk[:])
194		p.lostAtomic = 0
195	}
196
197}
198
199// CPUProfile panics.
200// It formerly provided raw access to chunks of
201// a pprof-format profile generated by the runtime.
202// The details of generating that format have changed,
203// so this functionality has been removed.
204//
205// Deprecated: Use the [runtime/pprof] package,
206// or the handlers in the [net/http/pprof] package,
207// or the [testing] package's -test.cpuprofile flag instead.
208func CPUProfile() []byte {
209	panic("CPUProfile no longer available")
210}
211
212// runtime/pprof.runtime_cyclesPerSecond should be an internal detail,
213// but widely used packages access it using linkname.
214// Notable members of the hall of shame include:
215//   - github.com/grafana/pyroscope-go/godeltaprof
216//   - github.com/pyroscope-io/godeltaprof
217//
218// Do not remove or change the type signature.
219// See go.dev/issue/67401.
220//
221//go:linkname pprof_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond
222func pprof_cyclesPerSecond() int64 {
223	return ticksPerSecond()
224}
225
226// readProfile, provided to runtime/pprof, returns the next chunk of
227// binary CPU profiling stack trace data, blocking until data is available.
228// If profiling is turned off and all the profile data accumulated while it was
229// on has been returned, readProfile returns eof=true.
230// The caller must save the returned data and tags before calling readProfile again.
231// The returned data contains a whole number of records, and tags contains
232// exactly one entry per record.
233//
234// runtime_pprof_readProfile should be an internal detail,
235// but widely used packages access it using linkname.
236// Notable members of the hall of shame include:
237//   - github.com/pyroscope-io/pyroscope
238//
239// Do not remove or change the type signature.
240// See go.dev/issue/67401.
241//
242//go:linkname runtime_pprof_readProfile runtime/pprof.readProfile
243func runtime_pprof_readProfile() ([]uint64, []unsafe.Pointer, bool) {
244	lock(&cpuprof.lock)
245	log := cpuprof.log
246	unlock(&cpuprof.lock)
247	readMode := profBufBlocking
248	if GOOS == "darwin" || GOOS == "ios" {
249		readMode = profBufNonBlocking // For #61768; on Darwin notes are not async-signal-safe.  See sigNoteSetup in os_darwin.go.
250	}
251	data, tags, eof := log.read(readMode)
252	if len(data) == 0 && eof {
253		lock(&cpuprof.lock)
254		cpuprof.log = nil
255		unlock(&cpuprof.lock)
256	}
257	return data, tags, eof
258}
259