1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Memory statistics
6
7package runtime
8
9import (
10	"internal/runtime/atomic"
11	"unsafe"
12)
13
14type mstats struct {
15	// Statistics about malloc heap.
16	heapStats consistentHeapStats
17
18	// Statistics about stacks.
19	stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
20
21	// Statistics about allocation of low-level fixed-size structures.
22	mspan_sys    sysMemStat
23	mcache_sys   sysMemStat
24	buckhash_sys sysMemStat // profiling bucket hash table
25
26	// Statistics about GC overhead.
27	gcMiscSys sysMemStat // updated atomically or during STW
28
29	// Miscellaneous statistics.
30	other_sys sysMemStat // updated atomically or during STW
31
32	// Statistics about the garbage collector.
33
34	// Protected by mheap or worldsema during GC.
35	last_gc_unix    uint64 // last gc (in unix time)
36	pause_total_ns  uint64
37	pause_ns        [256]uint64 // circular buffer of recent gc pause lengths
38	pause_end       [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
39	numgc           uint32
40	numforcedgc     uint32  // number of user-forced GCs
41	gc_cpu_fraction float64 // fraction of CPU time used by GC
42
43	last_gc_nanotime uint64 // last gc (monotonic time)
44	lastHeapInUse    uint64 // heapInUse at mark termination of the previous GC
45
46	enablegc bool
47}
48
49var memstats mstats
50
51// A MemStats records statistics about the memory allocator.
52type MemStats struct {
53	// General statistics.
54
55	// Alloc is bytes of allocated heap objects.
56	//
57	// This is the same as HeapAlloc (see below).
58	Alloc uint64
59
60	// TotalAlloc is cumulative bytes allocated for heap objects.
61	//
62	// TotalAlloc increases as heap objects are allocated, but
63	// unlike Alloc and HeapAlloc, it does not decrease when
64	// objects are freed.
65	TotalAlloc uint64
66
67	// Sys is the total bytes of memory obtained from the OS.
68	//
69	// Sys is the sum of the XSys fields below. Sys measures the
70	// virtual address space reserved by the Go runtime for the
71	// heap, stacks, and other internal data structures. It's
72	// likely that not all of the virtual address space is backed
73	// by physical memory at any given moment, though in general
74	// it all was at some point.
75	Sys uint64
76
77	// Lookups is the number of pointer lookups performed by the
78	// runtime.
79	//
80	// This is primarily useful for debugging runtime internals.
81	Lookups uint64
82
83	// Mallocs is the cumulative count of heap objects allocated.
84	// The number of live objects is Mallocs - Frees.
85	Mallocs uint64
86
87	// Frees is the cumulative count of heap objects freed.
88	Frees uint64
89
90	// Heap memory statistics.
91	//
92	// Interpreting the heap statistics requires some knowledge of
93	// how Go organizes memory. Go divides the virtual address
94	// space of the heap into "spans", which are contiguous
95	// regions of memory 8K or larger. A span may be in one of
96	// three states:
97	//
98	// An "idle" span contains no objects or other data. The
99	// physical memory backing an idle span can be released back
100	// to the OS (but the virtual address space never is), or it
101	// can be converted into an "in use" or "stack" span.
102	//
103	// An "in use" span contains at least one heap object and may
104	// have free space available to allocate more heap objects.
105	//
106	// A "stack" span is used for goroutine stacks. Stack spans
107	// are not considered part of the heap. A span can change
108	// between heap and stack memory; it is never used for both
109	// simultaneously.
110
111	// HeapAlloc is bytes of allocated heap objects.
112	//
113	// "Allocated" heap objects include all reachable objects, as
114	// well as unreachable objects that the garbage collector has
115	// not yet freed. Specifically, HeapAlloc increases as heap
116	// objects are allocated and decreases as the heap is swept
117	// and unreachable objects are freed. Sweeping occurs
118	// incrementally between GC cycles, so these two processes
119	// occur simultaneously, and as a result HeapAlloc tends to
120	// change smoothly (in contrast with the sawtooth that is
121	// typical of stop-the-world garbage collectors).
122	HeapAlloc uint64
123
124	// HeapSys is bytes of heap memory obtained from the OS.
125	//
126	// HeapSys measures the amount of virtual address space
127	// reserved for the heap. This includes virtual address space
128	// that has been reserved but not yet used, which consumes no
129	// physical memory, but tends to be small, as well as virtual
130	// address space for which the physical memory has been
131	// returned to the OS after it became unused (see HeapReleased
132	// for a measure of the latter).
133	//
134	// HeapSys estimates the largest size the heap has had.
135	HeapSys uint64
136
137	// HeapIdle is bytes in idle (unused) spans.
138	//
139	// Idle spans have no objects in them. These spans could be
140	// (and may already have been) returned to the OS, or they can
141	// be reused for heap allocations, or they can be reused as
142	// stack memory.
143	//
144	// HeapIdle minus HeapReleased estimates the amount of memory
145	// that could be returned to the OS, but is being retained by
146	// the runtime so it can grow the heap without requesting more
147	// memory from the OS. If this difference is significantly
148	// larger than the heap size, it indicates there was a recent
149	// transient spike in live heap size.
150	HeapIdle uint64
151
152	// HeapInuse is bytes in in-use spans.
153	//
154	// In-use spans have at least one object in them. These spans
155	// can only be used for other objects of roughly the same
156	// size.
157	//
158	// HeapInuse minus HeapAlloc estimates the amount of memory
159	// that has been dedicated to particular size classes, but is
160	// not currently being used. This is an upper bound on
161	// fragmentation, but in general this memory can be reused
162	// efficiently.
163	HeapInuse uint64
164
165	// HeapReleased is bytes of physical memory returned to the OS.
166	//
167	// This counts heap memory from idle spans that was returned
168	// to the OS and has not yet been reacquired for the heap.
169	HeapReleased uint64
170
171	// HeapObjects is the number of allocated heap objects.
172	//
173	// Like HeapAlloc, this increases as objects are allocated and
174	// decreases as the heap is swept and unreachable objects are
175	// freed.
176	HeapObjects uint64
177
178	// Stack memory statistics.
179	//
180	// Stacks are not considered part of the heap, but the runtime
181	// can reuse a span of heap memory for stack memory, and
182	// vice-versa.
183
184	// StackInuse is bytes in stack spans.
185	//
186	// In-use stack spans have at least one stack in them. These
187	// spans can only be used for other stacks of the same size.
188	//
189	// There is no StackIdle because unused stack spans are
190	// returned to the heap (and hence counted toward HeapIdle).
191	StackInuse uint64
192
193	// StackSys is bytes of stack memory obtained from the OS.
194	//
195	// StackSys is StackInuse, plus any memory obtained directly
196	// from the OS for OS thread stacks.
197	//
198	// In non-cgo programs this metric is currently equal to StackInuse
199	// (but this should not be relied upon, and the value may change in
200	// the future).
201	//
202	// In cgo programs this metric includes OS thread stacks allocated
203	// directly from the OS. Currently, this only accounts for one stack in
204	// c-shared and c-archive build modes and other sources of stacks from
205	// the OS (notably, any allocated by C code) are not currently measured.
206	// Note this too may change in the future.
207	StackSys uint64
208
209	// Off-heap memory statistics.
210	//
211	// The following statistics measure runtime-internal
212	// structures that are not allocated from heap memory (usually
213	// because they are part of implementing the heap). Unlike
214	// heap or stack memory, any memory allocated to these
215	// structures is dedicated to these structures.
216	//
217	// These are primarily useful for debugging runtime memory
218	// overheads.
219
220	// MSpanInuse is bytes of allocated mspan structures.
221	MSpanInuse uint64
222
223	// MSpanSys is bytes of memory obtained from the OS for mspan
224	// structures.
225	MSpanSys uint64
226
227	// MCacheInuse is bytes of allocated mcache structures.
228	MCacheInuse uint64
229
230	// MCacheSys is bytes of memory obtained from the OS for
231	// mcache structures.
232	MCacheSys uint64
233
234	// BuckHashSys is bytes of memory in profiling bucket hash tables.
235	BuckHashSys uint64
236
237	// GCSys is bytes of memory in garbage collection metadata.
238	GCSys uint64
239
240	// OtherSys is bytes of memory in miscellaneous off-heap
241	// runtime allocations.
242	OtherSys uint64
243
244	// Garbage collector statistics.
245
246	// NextGC is the target heap size of the next GC cycle.
247	//
248	// The garbage collector's goal is to keep HeapAlloc ≤ NextGC.
249	// At the end of each GC cycle, the target for the next cycle
250	// is computed based on the amount of reachable data and the
251	// value of GOGC.
252	NextGC uint64
253
254	// LastGC is the time the last garbage collection finished, as
255	// nanoseconds since 1970 (the UNIX epoch).
256	LastGC uint64
257
258	// PauseTotalNs is the cumulative nanoseconds in GC
259	// stop-the-world pauses since the program started.
260	//
261	// During a stop-the-world pause, all goroutines are paused
262	// and only the garbage collector can run.
263	PauseTotalNs uint64
264
265	// PauseNs is a circular buffer of recent GC stop-the-world
266	// pause times in nanoseconds.
267	//
268	// The most recent pause is at PauseNs[(NumGC+255)%256]. In
269	// general, PauseNs[N%256] records the time paused in the most
270	// recent N%256th GC cycle. There may be multiple pauses per
271	// GC cycle; this is the sum of all pauses during a cycle.
272	PauseNs [256]uint64
273
274	// PauseEnd is a circular buffer of recent GC pause end times,
275	// as nanoseconds since 1970 (the UNIX epoch).
276	//
277	// This buffer is filled the same way as PauseNs. There may be
278	// multiple pauses per GC cycle; this records the end of the
279	// last pause in a cycle.
280	PauseEnd [256]uint64
281
282	// NumGC is the number of completed GC cycles.
283	NumGC uint32
284
285	// NumForcedGC is the number of GC cycles that were forced by
286	// the application calling the GC function.
287	NumForcedGC uint32
288
289	// GCCPUFraction is the fraction of this program's available
290	// CPU time used by the GC since the program started.
291	//
292	// GCCPUFraction is expressed as a number between 0 and 1,
293	// where 0 means GC has consumed none of this program's CPU. A
294	// program's available CPU time is defined as the integral of
295	// GOMAXPROCS since the program started. That is, if
296	// GOMAXPROCS is 2 and a program has been running for 10
297	// seconds, its "available CPU" is 20 seconds. GCCPUFraction
298	// does not include CPU time used for write barrier activity.
299	//
300	// This is the same as the fraction of CPU reported by
301	// GODEBUG=gctrace=1.
302	GCCPUFraction float64
303
304	// EnableGC indicates that GC is enabled. It is always true,
305	// even if GOGC=off.
306	EnableGC bool
307
308	// DebugGC is currently unused.
309	DebugGC bool
310
311	// BySize reports per-size class allocation statistics.
312	//
313	// BySize[N] gives statistics for allocations of size S where
314	// BySize[N-1].Size < S ≤ BySize[N].Size.
315	//
316	// This does not report allocations larger than BySize[60].Size.
317	BySize [61]struct {
318		// Size is the maximum byte size of an object in this
319		// size class.
320		Size uint32
321
322		// Mallocs is the cumulative count of heap objects
323		// allocated in this size class. The cumulative bytes
324		// of allocation is Size*Mallocs. The number of live
325		// objects in this size class is Mallocs - Frees.
326		Mallocs uint64
327
328		// Frees is the cumulative count of heap objects freed
329		// in this size class.
330		Frees uint64
331	}
332}
333
334func init() {
335	if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 {
336		println(offset)
337		throw("memstats.heapStats not aligned to 8 bytes")
338	}
339	// Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g.
340	// [3]heapStatsDelta) to be 8-byte aligned.
341	if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 {
342		println(size)
343		throw("heapStatsDelta not a multiple of 8 bytes in size")
344	}
345}
346
347// ReadMemStats populates m with memory allocator statistics.
348//
349// The returned memory allocator statistics are up to date as of the
350// call to ReadMemStats. This is in contrast with a heap profile,
351// which is a snapshot as of the most recently completed garbage
352// collection cycle.
353func ReadMemStats(m *MemStats) {
354	_ = m.Alloc // nil check test before we switch stacks, see issue 61158
355	stw := stopTheWorld(stwReadMemStats)
356
357	systemstack(func() {
358		readmemstats_m(m)
359	})
360
361	startTheWorld(stw)
362}
363
364// doubleCheckReadMemStats controls a double-check mode for ReadMemStats that
365// ensures consistency between the values that ReadMemStats is using and the
366// runtime-internal stats.
367var doubleCheckReadMemStats = false
368
369// readmemstats_m populates stats for internal runtime values.
370//
371// The world must be stopped.
372func readmemstats_m(stats *MemStats) {
373	assertWorldStopped()
374
375	// Flush mcaches to mcentral before doing anything else.
376	//
377	// Flushing to the mcentral may in general cause stats to
378	// change as mcentral data structures are manipulated.
379	systemstack(flushallmcaches)
380
381	// Calculate memory allocator stats.
382	// During program execution we only count number of frees and amount of freed memory.
383	// Current number of alive objects in the heap and amount of alive heap memory
384	// are calculated by scanning all spans.
385	// Total number of mallocs is calculated as number of frees plus number of alive objects.
386	// Similarly, total amount of allocated memory is calculated as amount of freed memory
387	// plus amount of alive heap memory.
388
389	// Collect consistent stats, which are the source-of-truth in some cases.
390	var consStats heapStatsDelta
391	memstats.heapStats.unsafeRead(&consStats)
392
393	// Collect large allocation stats.
394	totalAlloc := consStats.largeAlloc
395	nMalloc := consStats.largeAllocCount
396	totalFree := consStats.largeFree
397	nFree := consStats.largeFreeCount
398
399	// Collect per-sizeclass stats.
400	var bySize [_NumSizeClasses]struct {
401		Size    uint32
402		Mallocs uint64
403		Frees   uint64
404	}
405	for i := range bySize {
406		bySize[i].Size = uint32(class_to_size[i])
407
408		// Malloc stats.
409		a := consStats.smallAllocCount[i]
410		totalAlloc += a * uint64(class_to_size[i])
411		nMalloc += a
412		bySize[i].Mallocs = a
413
414		// Free stats.
415		f := consStats.smallFreeCount[i]
416		totalFree += f * uint64(class_to_size[i])
417		nFree += f
418		bySize[i].Frees = f
419	}
420
421	// Account for tiny allocations.
422	// For historical reasons, MemStats includes tiny allocations
423	// in both the total free and total alloc count. This double-counts
424	// memory in some sense because their tiny allocation block is also
425	// counted. Tracking the lifetime of individual tiny allocations is
426	// currently not done because it would be too expensive.
427	nFree += consStats.tinyAllocCount
428	nMalloc += consStats.tinyAllocCount
429
430	// Calculate derived stats.
431
432	stackInUse := uint64(consStats.inStacks)
433	gcWorkBufInUse := uint64(consStats.inWorkBufs)
434	gcProgPtrScalarBitsInUse := uint64(consStats.inPtrScalarBits)
435
436	totalMapped := gcController.heapInUse.load() + gcController.heapFree.load() + gcController.heapReleased.load() +
437		memstats.stacks_sys.load() + memstats.mspan_sys.load() + memstats.mcache_sys.load() +
438		memstats.buckhash_sys.load() + memstats.gcMiscSys.load() + memstats.other_sys.load() +
439		stackInUse + gcWorkBufInUse + gcProgPtrScalarBitsInUse
440
441	heapGoal := gcController.heapGoal()
442
443	if doubleCheckReadMemStats {
444		// Only check this if we're debugging. It would be bad to crash an application
445		// just because the debugging stats are wrong. We mostly rely on tests to catch
446		// these issues, and we enable the double check mode for tests.
447		//
448		// The world is stopped, so the consistent stats (after aggregation)
449		// should be identical to some combination of memstats. In particular:
450		//
451		// * memstats.heapInUse == inHeap
452		// * memstats.heapReleased == released
453		// * memstats.heapInUse + memstats.heapFree == committed - inStacks - inWorkBufs - inPtrScalarBits
454		// * memstats.totalAlloc == totalAlloc
455		// * memstats.totalFree == totalFree
456		//
457		// Check if that's actually true.
458		//
459		// Prevent sysmon and the tracer from skewing the stats since they can
460		// act without synchronizing with a STW. See #64401.
461		lock(&sched.sysmonlock)
462		lock(&trace.lock)
463		if gcController.heapInUse.load() != uint64(consStats.inHeap) {
464			print("runtime: heapInUse=", gcController.heapInUse.load(), "\n")
465			print("runtime: consistent value=", consStats.inHeap, "\n")
466			throw("heapInUse and consistent stats are not equal")
467		}
468		if gcController.heapReleased.load() != uint64(consStats.released) {
469			print("runtime: heapReleased=", gcController.heapReleased.load(), "\n")
470			print("runtime: consistent value=", consStats.released, "\n")
471			throw("heapReleased and consistent stats are not equal")
472		}
473		heapRetained := gcController.heapInUse.load() + gcController.heapFree.load()
474		consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits)
475		if heapRetained != consRetained {
476			print("runtime: global value=", heapRetained, "\n")
477			print("runtime: consistent value=", consRetained, "\n")
478			throw("measures of the retained heap are not equal")
479		}
480		if gcController.totalAlloc.Load() != totalAlloc {
481			print("runtime: totalAlloc=", gcController.totalAlloc.Load(), "\n")
482			print("runtime: consistent value=", totalAlloc, "\n")
483			throw("totalAlloc and consistent stats are not equal")
484		}
485		if gcController.totalFree.Load() != totalFree {
486			print("runtime: totalFree=", gcController.totalFree.Load(), "\n")
487			print("runtime: consistent value=", totalFree, "\n")
488			throw("totalFree and consistent stats are not equal")
489		}
490		// Also check that mappedReady lines up with totalMapped - released.
491		// This isn't really the same type of "make sure consistent stats line up" situation,
492		// but this is an opportune time to check.
493		if gcController.mappedReady.Load() != totalMapped-uint64(consStats.released) {
494			print("runtime: mappedReady=", gcController.mappedReady.Load(), "\n")
495			print("runtime: totalMapped=", totalMapped, "\n")
496			print("runtime: released=", uint64(consStats.released), "\n")
497			print("runtime: totalMapped-released=", totalMapped-uint64(consStats.released), "\n")
498			throw("mappedReady and other memstats are not equal")
499		}
500		unlock(&trace.lock)
501		unlock(&sched.sysmonlock)
502	}
503
504	// We've calculated all the values we need. Now, populate stats.
505
506	stats.Alloc = totalAlloc - totalFree
507	stats.TotalAlloc = totalAlloc
508	stats.Sys = totalMapped
509	stats.Mallocs = nMalloc
510	stats.Frees = nFree
511	stats.HeapAlloc = totalAlloc - totalFree
512	stats.HeapSys = gcController.heapInUse.load() + gcController.heapFree.load() + gcController.heapReleased.load()
513	// By definition, HeapIdle is memory that was mapped
514	// for the heap but is not currently used to hold heap
515	// objects. It also specifically is memory that can be
516	// used for other purposes, like stacks, but this memory
517	// is subtracted out of HeapSys before it makes that
518	// transition. Put another way:
519	//
520	// HeapSys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes
521	// HeapIdle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose
522	//
523	// or
524	//
525	// HeapSys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse
526	// HeapIdle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heapInUse
527	//
528	// => HeapIdle = HeapSys - heapInUse = heapFree + heapReleased
529	stats.HeapIdle = gcController.heapFree.load() + gcController.heapReleased.load()
530	stats.HeapInuse = gcController.heapInUse.load()
531	stats.HeapReleased = gcController.heapReleased.load()
532	stats.HeapObjects = nMalloc - nFree
533	stats.StackInuse = stackInUse
534	// memstats.stacks_sys is only memory mapped directly for OS stacks.
535	// Add in heap-allocated stack memory for user consumption.
536	stats.StackSys = stackInUse + memstats.stacks_sys.load()
537	stats.MSpanInuse = uint64(mheap_.spanalloc.inuse)
538	stats.MSpanSys = memstats.mspan_sys.load()
539	stats.MCacheInuse = uint64(mheap_.cachealloc.inuse)
540	stats.MCacheSys = memstats.mcache_sys.load()
541	stats.BuckHashSys = memstats.buckhash_sys.load()
542	// MemStats defines GCSys as an aggregate of all memory related
543	// to the memory management system, but we track this memory
544	// at a more granular level in the runtime.
545	stats.GCSys = memstats.gcMiscSys.load() + gcWorkBufInUse + gcProgPtrScalarBitsInUse
546	stats.OtherSys = memstats.other_sys.load()
547	stats.NextGC = heapGoal
548	stats.LastGC = memstats.last_gc_unix
549	stats.PauseTotalNs = memstats.pause_total_ns
550	stats.PauseNs = memstats.pause_ns
551	stats.PauseEnd = memstats.pause_end
552	stats.NumGC = memstats.numgc
553	stats.NumForcedGC = memstats.numforcedgc
554	stats.GCCPUFraction = memstats.gc_cpu_fraction
555	stats.EnableGC = true
556
557	// stats.BySize and bySize might not match in length.
558	// That's OK, stats.BySize cannot change due to backwards
559	// compatibility issues. copy will copy the minimum amount
560	// of values between the two of them.
561	copy(stats.BySize[:], bySize[:])
562}
563
564//go:linkname readGCStats runtime/debug.readGCStats
565func readGCStats(pauses *[]uint64) {
566	systemstack(func() {
567		readGCStats_m(pauses)
568	})
569}
570
571// readGCStats_m must be called on the system stack because it acquires the heap
572// lock. See mheap for details.
573//
574//go:systemstack
575func readGCStats_m(pauses *[]uint64) {
576	p := *pauses
577	// Calling code in runtime/debug should make the slice large enough.
578	if cap(p) < len(memstats.pause_ns)+3 {
579		throw("short slice passed to readGCStats")
580	}
581
582	// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
583	lock(&mheap_.lock)
584
585	n := memstats.numgc
586	if n > uint32(len(memstats.pause_ns)) {
587		n = uint32(len(memstats.pause_ns))
588	}
589
590	// The pause buffer is circular. The most recent pause is at
591	// pause_ns[(numgc-1)%len(pause_ns)], and then backward
592	// from there to go back farther in time. We deliver the times
593	// most recent first (in p[0]).
594	p = p[:cap(p)]
595	for i := uint32(0); i < n; i++ {
596		j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
597		p[i] = memstats.pause_ns[j]
598		p[n+i] = memstats.pause_end[j]
599	}
600
601	p[n+n] = memstats.last_gc_unix
602	p[n+n+1] = uint64(memstats.numgc)
603	p[n+n+2] = memstats.pause_total_ns
604	unlock(&mheap_.lock)
605	*pauses = p[:n+n+3]
606}
607
608// flushmcache flushes the mcache of allp[i].
609//
610// The world must be stopped.
611//
612//go:nowritebarrier
613func flushmcache(i int) {
614	assertWorldStopped()
615
616	p := allp[i]
617	c := p.mcache
618	if c == nil {
619		return
620	}
621	c.releaseAll()
622	stackcache_clear(c)
623}
624
625// flushallmcaches flushes the mcaches of all Ps.
626//
627// The world must be stopped.
628//
629//go:nowritebarrier
630func flushallmcaches() {
631	assertWorldStopped()
632
633	for i := 0; i < int(gomaxprocs); i++ {
634		flushmcache(i)
635	}
636}
637
638// sysMemStat represents a global system statistic that is managed atomically.
639//
640// This type must structurally be a uint64 so that mstats aligns with MemStats.
641type sysMemStat uint64
642
643// load atomically reads the value of the stat.
644//
645// Must be nosplit as it is called in runtime initialization, e.g. newosproc0.
646//
647//go:nosplit
648func (s *sysMemStat) load() uint64 {
649	return atomic.Load64((*uint64)(s))
650}
651
652// add atomically adds the sysMemStat by n.
653//
654// Must be nosplit as it is called in runtime initialization, e.g. newosproc0.
655//
656//go:nosplit
657func (s *sysMemStat) add(n int64) {
658	val := atomic.Xadd64((*uint64)(s), n)
659	if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) {
660		print("runtime: val=", val, " n=", n, "\n")
661		throw("sysMemStat overflow")
662	}
663}
664
665// heapStatsDelta contains deltas of various runtime memory statistics
666// that need to be updated together in order for them to be kept
667// consistent with one another.
668type heapStatsDelta struct {
669	// Memory stats.
670	committed       int64 // byte delta of memory committed
671	released        int64 // byte delta of released memory generated
672	inHeap          int64 // byte delta of memory placed in the heap
673	inStacks        int64 // byte delta of memory reserved for stacks
674	inWorkBufs      int64 // byte delta of memory reserved for work bufs
675	inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits
676
677	// Allocator stats.
678	//
679	// These are all uint64 because they're cumulative, and could quickly wrap
680	// around otherwise.
681	tinyAllocCount  uint64                  // number of tiny allocations
682	largeAlloc      uint64                  // bytes allocated for large objects
683	largeAllocCount uint64                  // number of large object allocations
684	smallAllocCount [_NumSizeClasses]uint64 // number of allocs for small objects
685	largeFree       uint64                  // bytes freed for large objects (>maxSmallSize)
686	largeFreeCount  uint64                  // number of frees for large objects (>maxSmallSize)
687	smallFreeCount  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxSmallSize)
688
689	// NOTE: This struct must be a multiple of 8 bytes in size because it
690	// is stored in an array. If it's not, atomic accesses to the above
691	// fields may be unaligned and fail on 32-bit platforms.
692}
693
694// merge adds in the deltas from b into a.
695func (a *heapStatsDelta) merge(b *heapStatsDelta) {
696	a.committed += b.committed
697	a.released += b.released
698	a.inHeap += b.inHeap
699	a.inStacks += b.inStacks
700	a.inWorkBufs += b.inWorkBufs
701	a.inPtrScalarBits += b.inPtrScalarBits
702
703	a.tinyAllocCount += b.tinyAllocCount
704	a.largeAlloc += b.largeAlloc
705	a.largeAllocCount += b.largeAllocCount
706	for i := range b.smallAllocCount {
707		a.smallAllocCount[i] += b.smallAllocCount[i]
708	}
709	a.largeFree += b.largeFree
710	a.largeFreeCount += b.largeFreeCount
711	for i := range b.smallFreeCount {
712		a.smallFreeCount[i] += b.smallFreeCount[i]
713	}
714}
715
716// consistentHeapStats represents a set of various memory statistics
717// whose updates must be viewed completely to get a consistent
718// state of the world.
719//
720// To write updates to memory stats use the acquire and release
721// methods. To obtain a consistent global snapshot of these statistics,
722// use read.
723type consistentHeapStats struct {
724	// stats is a ring buffer of heapStatsDelta values.
725	// Writers always atomically update the delta at index gen.
726	//
727	// Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...)
728	// and synchronizing with writers by observing each P's
729	// statsSeq field. If the reader observes a P not writing,
730	// it can be sure that it will pick up the new gen value the
731	// next time it writes.
732	//
733	// The reader then takes responsibility by clearing space
734	// in the ring buffer for the next reader to rotate gen to
735	// that space (i.e. it merges in values from index (gen-2) mod 3
736	// to index (gen-1) mod 3, then clears the former).
737	//
738	// Note that this means only one reader can be reading at a time.
739	// There is no way for readers to synchronize.
740	//
741	// This process is why we need a ring buffer of size 3 instead
742	// of 2: one is for the writers, one contains the most recent
743	// data, and the last one is clear so writers can begin writing
744	// to it the moment gen is updated.
745	stats [3]heapStatsDelta
746
747	// gen represents the current index into which writers
748	// are writing, and can take on the value of 0, 1, or 2.
749	gen atomic.Uint32
750
751	// noPLock is intended to provide mutual exclusion for updating
752	// stats when no P is available. It does not block other writers
753	// with a P, only other writers without a P and the reader. Because
754	// stats are usually updated when a P is available, contention on
755	// this lock should be minimal.
756	noPLock mutex
757}
758
759// acquire returns a heapStatsDelta to be updated. In effect,
760// it acquires the shard for writing. release must be called
761// as soon as the relevant deltas are updated.
762//
763// The returned heapStatsDelta must be updated atomically.
764//
765// The caller's P must not change between acquire and
766// release. This also means that the caller should not
767// acquire a P or release its P in between. A P also must
768// not acquire a given consistentHeapStats if it hasn't
769// yet released it.
770//
771// nosplit because a stack growth in this function could
772// lead to a stack allocation that could reenter the
773// function.
774//
775//go:nosplit
776func (m *consistentHeapStats) acquire() *heapStatsDelta {
777	if pp := getg().m.p.ptr(); pp != nil {
778		seq := pp.statsSeq.Add(1)
779		if seq%2 == 0 {
780			// Should have been incremented to odd.
781			print("runtime: seq=", seq, "\n")
782			throw("bad sequence number")
783		}
784	} else {
785		lock(&m.noPLock)
786	}
787	gen := m.gen.Load() % 3
788	return &m.stats[gen]
789}
790
791// release indicates that the writer is done modifying
792// the delta. The value returned by the corresponding
793// acquire must no longer be accessed or modified after
794// release is called.
795//
796// The caller's P must not change between acquire and
797// release. This also means that the caller should not
798// acquire a P or release its P in between.
799//
800// nosplit because a stack growth in this function could
801// lead to a stack allocation that causes another acquire
802// before this operation has completed.
803//
804//go:nosplit
805func (m *consistentHeapStats) release() {
806	if pp := getg().m.p.ptr(); pp != nil {
807		seq := pp.statsSeq.Add(1)
808		if seq%2 != 0 {
809			// Should have been incremented to even.
810			print("runtime: seq=", seq, "\n")
811			throw("bad sequence number")
812		}
813	} else {
814		unlock(&m.noPLock)
815	}
816}
817
818// unsafeRead aggregates the delta for this shard into out.
819//
820// Unsafe because it does so without any synchronization. The
821// world must be stopped.
822func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) {
823	assertWorldStopped()
824
825	for i := range m.stats {
826		out.merge(&m.stats[i])
827	}
828}
829
830// unsafeClear clears the shard.
831//
832// Unsafe because the world must be stopped and values should
833// be donated elsewhere before clearing.
834func (m *consistentHeapStats) unsafeClear() {
835	assertWorldStopped()
836
837	for i := range m.stats {
838		m.stats[i] = heapStatsDelta{}
839	}
840}
841
842// read takes a globally consistent snapshot of m
843// and puts the aggregated value in out. Even though out is a
844// heapStatsDelta, the resulting values should be complete and
845// valid statistic values.
846//
847// Not safe to call concurrently. The world must be stopped
848// or metricsSema must be held.
849func (m *consistentHeapStats) read(out *heapStatsDelta) {
850	// Getting preempted after this point is not safe because
851	// we read allp. We need to make sure a STW can't happen
852	// so it doesn't change out from under us.
853	mp := acquirem()
854
855	// Get the current generation. We can be confident that this
856	// will not change since read is serialized and is the only
857	// one that modifies currGen.
858	currGen := m.gen.Load()
859	prevGen := currGen - 1
860	if currGen == 0 {
861		prevGen = 2
862	}
863
864	// Prevent writers without a P from writing while we update gen.
865	lock(&m.noPLock)
866
867	// Rotate gen, effectively taking a snapshot of the state of
868	// these statistics at the point of the exchange by moving
869	// writers to the next set of deltas.
870	//
871	// This exchange is safe to do because we won't race
872	// with anyone else trying to update this value.
873	m.gen.Swap((currGen + 1) % 3)
874
875	// Allow P-less writers to continue. They'll be writing to the
876	// next generation now.
877	unlock(&m.noPLock)
878
879	for _, p := range allp {
880		// Spin until there are no more writers.
881		for p.statsSeq.Load()%2 != 0 {
882		}
883	}
884
885	// At this point we've observed that each sequence
886	// number is even, so any future writers will observe
887	// the new gen value. That means it's safe to read from
888	// the other deltas in the stats buffer.
889
890	// Perform our responsibilities and free up
891	// stats[prevGen] for the next time we want to take
892	// a snapshot.
893	m.stats[currGen].merge(&m.stats[prevGen])
894	m.stats[prevGen] = heapStatsDelta{}
895
896	// Finally, copy out the complete delta.
897	*out = m.stats[currGen]
898
899	releasem(mp)
900}
901
902type cpuStats struct {
903	// All fields are CPU time in nanoseconds computed by comparing
904	// calls of nanotime. This means they're all overestimates, because
905	// they don't accurately compute on-CPU time (so some of the time
906	// could be spent scheduled away by the OS).
907
908	GCAssistTime    int64 // GC assists
909	GCDedicatedTime int64 // GC dedicated mark workers + pauses
910	GCIdleTime      int64 // GC idle mark workers
911	GCPauseTime     int64 // GC pauses (all GOMAXPROCS, even if just 1 is running)
912	GCTotalTime     int64
913
914	ScavengeAssistTime int64 // background scavenger
915	ScavengeBgTime     int64 // scavenge assists
916	ScavengeTotalTime  int64
917
918	IdleTime int64 // Time Ps spent in _Pidle.
919	UserTime int64 // Time Ps spent in _Prunning or _Psyscall that's not any of the above.
920
921	TotalTime int64 // GOMAXPROCS * (monotonic wall clock time elapsed)
922}
923
924// accumulateGCPauseTime add dt*stwProcs to the GC CPU pause time stats. dt should be
925// the actual time spent paused, for orthogonality. maxProcs should be GOMAXPROCS,
926// not work.stwprocs, since this number must be comparable to a total time computed
927// from GOMAXPROCS.
928func (s *cpuStats) accumulateGCPauseTime(dt int64, maxProcs int32) {
929	cpu := dt * int64(maxProcs)
930	s.GCPauseTime += cpu
931	s.GCTotalTime += cpu
932}
933
934// accumulate takes a cpuStats and adds in the current state of all GC CPU
935// counters.
936//
937// gcMarkPhase indicates that we're in the mark phase and that certain counter
938// values should be used.
939func (s *cpuStats) accumulate(now int64, gcMarkPhase bool) {
940	// N.B. Mark termination and sweep termination pauses are
941	// accumulated in work.cpuStats at the end of their respective pauses.
942	var (
943		markAssistCpu     int64
944		markDedicatedCpu  int64
945		markFractionalCpu int64
946		markIdleCpu       int64
947	)
948	if gcMarkPhase {
949		// N.B. These stats may have stale values if the GC is not
950		// currently in the mark phase.
951		markAssistCpu = gcController.assistTime.Load()
952		markDedicatedCpu = gcController.dedicatedMarkTime.Load()
953		markFractionalCpu = gcController.fractionalMarkTime.Load()
954		markIdleCpu = gcController.idleMarkTime.Load()
955	}
956
957	// The rest of the stats below are either derived from the above or
958	// are reset on each mark termination.
959
960	scavAssistCpu := scavenge.assistTime.Load()
961	scavBgCpu := scavenge.backgroundTime.Load()
962
963	// Update cumulative GC CPU stats.
964	s.GCAssistTime += markAssistCpu
965	s.GCDedicatedTime += markDedicatedCpu + markFractionalCpu
966	s.GCIdleTime += markIdleCpu
967	s.GCTotalTime += markAssistCpu + markDedicatedCpu + markFractionalCpu + markIdleCpu
968
969	// Update cumulative scavenge CPU stats.
970	s.ScavengeAssistTime += scavAssistCpu
971	s.ScavengeBgTime += scavBgCpu
972	s.ScavengeTotalTime += scavAssistCpu + scavBgCpu
973
974	// Update total CPU.
975	s.TotalTime = sched.totaltime + (now-sched.procresizetime)*int64(gomaxprocs)
976	s.IdleTime += sched.idleTime.Load()
977
978	// Compute userTime. We compute this indirectly as everything that's not the above.
979	//
980	// Since time spent in _Pgcstop is covered by gcPauseTime, and time spent in _Pidle
981	// is covered by idleTime, what we're left with is time spent in _Prunning and _Psyscall,
982	// the latter of which is fine because the P will either go idle or get used for something
983	// else via sysmon. Meanwhile if we subtract GC time from whatever's left, we get non-GC
984	// _Prunning time. Note that this still leaves time spent in sweeping and in the scheduler,
985	// but that's fine. The overwhelming majority of this time will be actual user time.
986	s.UserTime = s.TotalTime - (s.GCTotalTime + s.ScavengeTotalTime + s.IdleTime)
987}
988