1// Copyright 2020 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime
6
7import (
8	"internal/cpu"
9	"internal/goarch"
10	"internal/runtime/atomic"
11	"unsafe"
12)
13
14// A spanSet is a set of *mspans.
15//
16// spanSet is safe for concurrent push and pop operations.
17type spanSet struct {
18	// A spanSet is a two-level data structure consisting of a
19	// growable spine that points to fixed-sized blocks. The spine
20	// can be accessed without locks, but adding a block or
21	// growing it requires taking the spine lock.
22	//
23	// Because each mspan covers at least 8K of heap and takes at
24	// most 8 bytes in the spanSet, the growth of the spine is
25	// quite limited.
26	//
27	// The spine and all blocks are allocated off-heap, which
28	// allows this to be used in the memory manager and avoids the
29	// need for write barriers on all of these. spanSetBlocks are
30	// managed in a pool, though never freed back to the operating
31	// system. We never release spine memory because there could be
32	// concurrent lock-free access and we're likely to reuse it
33	// anyway. (In principle, we could do this during STW.)
34
35	spineLock mutex
36	spine     atomicSpanSetSpinePointer // *[N]atomic.Pointer[spanSetBlock]
37	spineLen  atomic.Uintptr            // Spine array length
38	spineCap  uintptr                   // Spine array cap, accessed under spineLock
39
40	// index is the head and tail of the spanSet in a single field.
41	// The head and the tail both represent an index into the logical
42	// concatenation of all blocks, with the head always behind or
43	// equal to the tail (indicating an empty set). This field is
44	// always accessed atomically.
45	//
46	// The head and the tail are only 32 bits wide, which means we
47	// can only support up to 2^32 pushes before a reset. If every
48	// span in the heap were stored in this set, and each span were
49	// the minimum size (1 runtime page, 8 KiB), then roughly the
50	// smallest heap which would be unrepresentable is 32 TiB in size.
51	index atomicHeadTailIndex
52}
53
54const (
55	spanSetBlockEntries = 512 // 4KB on 64-bit
56	spanSetInitSpineCap = 256 // Enough for 1GB heap on 64-bit
57)
58
59type spanSetBlock struct {
60	// Free spanSetBlocks are managed via a lock-free stack.
61	lfnode
62
63	// popped is the number of pop operations that have occurred on
64	// this block. This number is used to help determine when a block
65	// may be safely recycled.
66	popped atomic.Uint32
67
68	// spans is the set of spans in this block.
69	spans [spanSetBlockEntries]atomicMSpanPointer
70}
71
72// push adds span s to buffer b. push is safe to call concurrently
73// with other push and pop operations.
74func (b *spanSet) push(s *mspan) {
75	// Obtain our slot.
76	cursor := uintptr(b.index.incTail().tail() - 1)
77	top, bottom := cursor/spanSetBlockEntries, cursor%spanSetBlockEntries
78
79	// Do we need to add a block?
80	spineLen := b.spineLen.Load()
81	var block *spanSetBlock
82retry:
83	if top < spineLen {
84		block = b.spine.Load().lookup(top).Load()
85	} else {
86		// Add a new block to the spine, potentially growing
87		// the spine.
88		lock(&b.spineLock)
89		// spineLen cannot change until we release the lock,
90		// but may have changed while we were waiting.
91		spineLen = b.spineLen.Load()
92		if top < spineLen {
93			unlock(&b.spineLock)
94			goto retry
95		}
96
97		spine := b.spine.Load()
98		if spineLen == b.spineCap {
99			// Grow the spine.
100			newCap := b.spineCap * 2
101			if newCap == 0 {
102				newCap = spanSetInitSpineCap
103			}
104			newSpine := persistentalloc(newCap*goarch.PtrSize, cpu.CacheLineSize, &memstats.gcMiscSys)
105			if b.spineCap != 0 {
106				// Blocks are allocated off-heap, so
107				// no write barriers.
108				memmove(newSpine, spine.p, b.spineCap*goarch.PtrSize)
109			}
110			spine = spanSetSpinePointer{newSpine}
111
112			// Spine is allocated off-heap, so no write barrier.
113			b.spine.StoreNoWB(spine)
114			b.spineCap = newCap
115			// We can't immediately free the old spine
116			// since a concurrent push with a lower index
117			// could still be reading from it. We let it
118			// leak because even a 1TB heap would waste
119			// less than 2MB of memory on old spines. If
120			// this is a problem, we could free old spines
121			// during STW.
122		}
123
124		// Allocate a new block from the pool.
125		block = spanSetBlockPool.alloc()
126
127		// Add it to the spine.
128		// Blocks are allocated off-heap, so no write barrier.
129		spine.lookup(top).StoreNoWB(block)
130		b.spineLen.Store(spineLen + 1)
131		unlock(&b.spineLock)
132	}
133
134	// We have a block. Insert the span atomically, since there may be
135	// concurrent readers via the block API.
136	block.spans[bottom].StoreNoWB(s)
137}
138
139// pop removes and returns a span from buffer b, or nil if b is empty.
140// pop is safe to call concurrently with other pop and push operations.
141func (b *spanSet) pop() *mspan {
142	var head, tail uint32
143claimLoop:
144	for {
145		headtail := b.index.load()
146		head, tail = headtail.split()
147		if head >= tail {
148			// The buf is empty, as far as we can tell.
149			return nil
150		}
151		// Check if the head position we want to claim is actually
152		// backed by a block.
153		spineLen := b.spineLen.Load()
154		if spineLen <= uintptr(head)/spanSetBlockEntries {
155			// We're racing with a spine growth and the allocation of
156			// a new block (and maybe a new spine!), and trying to grab
157			// the span at the index which is currently being pushed.
158			// Instead of spinning, let's just notify the caller that
159			// there's nothing currently here. Spinning on this is
160			// almost definitely not worth it.
161			return nil
162		}
163		// Try to claim the current head by CASing in an updated head.
164		// This may fail transiently due to a push which modifies the
165		// tail, so keep trying while the head isn't changing.
166		want := head
167		for want == head {
168			if b.index.cas(headtail, makeHeadTailIndex(want+1, tail)) {
169				break claimLoop
170			}
171			headtail = b.index.load()
172			head, tail = headtail.split()
173		}
174		// We failed to claim the spot we were after and the head changed,
175		// meaning a popper got ahead of us. Try again from the top because
176		// the buf may not be empty.
177	}
178	top, bottom := head/spanSetBlockEntries, head%spanSetBlockEntries
179
180	// We may be reading a stale spine pointer, but because the length
181	// grows monotonically and we've already verified it, we'll definitely
182	// be reading from a valid block.
183	blockp := b.spine.Load().lookup(uintptr(top))
184
185	// Given that the spine length is correct, we know we will never
186	// see a nil block here, since the length is always updated after
187	// the block is set.
188	block := blockp.Load()
189	s := block.spans[bottom].Load()
190	for s == nil {
191		// We raced with the span actually being set, but given that we
192		// know a block for this span exists, the race window here is
193		// extremely small. Try again.
194		s = block.spans[bottom].Load()
195	}
196	// Clear the pointer. This isn't strictly necessary, but defensively
197	// avoids accidentally re-using blocks which could lead to memory
198	// corruption. This way, we'll get a nil pointer access instead.
199	block.spans[bottom].StoreNoWB(nil)
200
201	// Increase the popped count. If we are the last possible popper
202	// in the block (note that bottom need not equal spanSetBlockEntries-1
203	// due to races) then it's our responsibility to free the block.
204	//
205	// If we increment popped to spanSetBlockEntries, we can be sure that
206	// we're the last popper for this block, and it's thus safe to free it.
207	// Every other popper must have crossed this barrier (and thus finished
208	// popping its corresponding mspan) by the time we get here. Because
209	// we're the last popper, we also don't have to worry about concurrent
210	// pushers (there can't be any). Note that we may not be the popper
211	// which claimed the last slot in the block, we're just the last one
212	// to finish popping.
213	if block.popped.Add(1) == spanSetBlockEntries {
214		// Clear the block's pointer.
215		blockp.StoreNoWB(nil)
216
217		// Return the block to the block pool.
218		spanSetBlockPool.free(block)
219	}
220	return s
221}
222
223// reset resets a spanSet which is empty. It will also clean up
224// any left over blocks.
225//
226// Throws if the buf is not empty.
227//
228// reset may not be called concurrently with any other operations
229// on the span set.
230func (b *spanSet) reset() {
231	head, tail := b.index.load().split()
232	if head < tail {
233		print("head = ", head, ", tail = ", tail, "\n")
234		throw("attempt to clear non-empty span set")
235	}
236	top := head / spanSetBlockEntries
237	if uintptr(top) < b.spineLen.Load() {
238		// If the head catches up to the tail and the set is empty,
239		// we may not clean up the block containing the head and tail
240		// since it may be pushed into again. In order to avoid leaking
241		// memory since we're going to reset the head and tail, clean
242		// up such a block now, if it exists.
243		blockp := b.spine.Load().lookup(uintptr(top))
244		block := blockp.Load()
245		if block != nil {
246			// Check the popped value.
247			if block.popped.Load() == 0 {
248				// popped should never be zero because that means we have
249				// pushed at least one value but not yet popped if this
250				// block pointer is not nil.
251				throw("span set block with unpopped elements found in reset")
252			}
253			if block.popped.Load() == spanSetBlockEntries {
254				// popped should also never be equal to spanSetBlockEntries
255				// because the last popper should have made the block pointer
256				// in this slot nil.
257				throw("fully empty unfreed span set block found in reset")
258			}
259
260			// Clear the pointer to the block.
261			blockp.StoreNoWB(nil)
262
263			// Return the block to the block pool.
264			spanSetBlockPool.free(block)
265		}
266	}
267	b.index.reset()
268	b.spineLen.Store(0)
269}
270
271// atomicSpanSetSpinePointer is an atomically-accessed spanSetSpinePointer.
272//
273// It has the same semantics as atomic.UnsafePointer.
274type atomicSpanSetSpinePointer struct {
275	a atomic.UnsafePointer
276}
277
278// Loads the spanSetSpinePointer and returns it.
279//
280// It has the same semantics as atomic.UnsafePointer.
281func (s *atomicSpanSetSpinePointer) Load() spanSetSpinePointer {
282	return spanSetSpinePointer{s.a.Load()}
283}
284
285// Stores the spanSetSpinePointer.
286//
287// It has the same semantics as [atomic.UnsafePointer].
288func (s *atomicSpanSetSpinePointer) StoreNoWB(p spanSetSpinePointer) {
289	s.a.StoreNoWB(p.p)
290}
291
292// spanSetSpinePointer represents a pointer to a contiguous block of atomic.Pointer[spanSetBlock].
293type spanSetSpinePointer struct {
294	p unsafe.Pointer
295}
296
297// lookup returns &s[idx].
298func (s spanSetSpinePointer) lookup(idx uintptr) *atomic.Pointer[spanSetBlock] {
299	return (*atomic.Pointer[spanSetBlock])(add(s.p, goarch.PtrSize*idx))
300}
301
302// spanSetBlockPool is a global pool of spanSetBlocks.
303var spanSetBlockPool spanSetBlockAlloc
304
305// spanSetBlockAlloc represents a concurrent pool of spanSetBlocks.
306type spanSetBlockAlloc struct {
307	stack lfstack
308}
309
310// alloc tries to grab a spanSetBlock out of the pool, and if it fails
311// persistentallocs a new one and returns it.
312func (p *spanSetBlockAlloc) alloc() *spanSetBlock {
313	if s := (*spanSetBlock)(p.stack.pop()); s != nil {
314		return s
315	}
316	return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), cpu.CacheLineSize, &memstats.gcMiscSys))
317}
318
319// free returns a spanSetBlock back to the pool.
320func (p *spanSetBlockAlloc) free(block *spanSetBlock) {
321	block.popped.Store(0)
322	p.stack.push(&block.lfnode)
323}
324
325// headTailIndex represents a combined 32-bit head and 32-bit tail
326// of a queue into a single 64-bit value.
327type headTailIndex uint64
328
329// makeHeadTailIndex creates a headTailIndex value from a separate
330// head and tail.
331func makeHeadTailIndex(head, tail uint32) headTailIndex {
332	return headTailIndex(uint64(head)<<32 | uint64(tail))
333}
334
335// head returns the head of a headTailIndex value.
336func (h headTailIndex) head() uint32 {
337	return uint32(h >> 32)
338}
339
340// tail returns the tail of a headTailIndex value.
341func (h headTailIndex) tail() uint32 {
342	return uint32(h)
343}
344
345// split splits the headTailIndex value into its parts.
346func (h headTailIndex) split() (head uint32, tail uint32) {
347	return h.head(), h.tail()
348}
349
350// atomicHeadTailIndex is an atomically-accessed headTailIndex.
351type atomicHeadTailIndex struct {
352	u atomic.Uint64
353}
354
355// load atomically reads a headTailIndex value.
356func (h *atomicHeadTailIndex) load() headTailIndex {
357	return headTailIndex(h.u.Load())
358}
359
360// cas atomically compares-and-swaps a headTailIndex value.
361func (h *atomicHeadTailIndex) cas(old, new headTailIndex) bool {
362	return h.u.CompareAndSwap(uint64(old), uint64(new))
363}
364
365// incHead atomically increments the head of a headTailIndex.
366func (h *atomicHeadTailIndex) incHead() headTailIndex {
367	return headTailIndex(h.u.Add(1 << 32))
368}
369
370// decHead atomically decrements the head of a headTailIndex.
371func (h *atomicHeadTailIndex) decHead() headTailIndex {
372	return headTailIndex(h.u.Add(-(1 << 32)))
373}
374
375// incTail atomically increments the tail of a headTailIndex.
376func (h *atomicHeadTailIndex) incTail() headTailIndex {
377	ht := headTailIndex(h.u.Add(1))
378	// Check for overflow.
379	if ht.tail() == 0 {
380		print("runtime: head = ", ht.head(), ", tail = ", ht.tail(), "\n")
381		throw("headTailIndex overflow")
382	}
383	return ht
384}
385
386// reset clears the headTailIndex to (0, 0).
387func (h *atomicHeadTailIndex) reset() {
388	h.u.Store(0)
389}
390
391// atomicMSpanPointer is an atomic.Pointer[mspan]. Can't use generics because it's NotInHeap.
392type atomicMSpanPointer struct {
393	p atomic.UnsafePointer
394}
395
396// Load returns the *mspan.
397func (p *atomicMSpanPointer) Load() *mspan {
398	return (*mspan)(p.p.Load())
399}
400
401// Store stores an *mspan.
402func (p *atomicMSpanPointer) StoreNoWB(s *mspan) {
403	p.p.StoreNoWB(unsafe.Pointer(s))
404}
405