1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ignore
6
7// Generate tables for small malloc size classes.
8//
9// See malloc.go for overview.
10//
11// The size classes are chosen so that rounding an allocation
12// request up to the next size class wastes at most 12.5% (1.125x).
13//
14// Each size class has its own page count that gets allocated
15// and chopped up when new objects of the size class are needed.
16// That page count is chosen so that chopping up the run of
17// pages into objects of the given size wastes at most 12.5% (1.125x)
18// of the memory. It is not necessary that the cutoff here be
19// the same as above.
20//
21// The two sources of waste multiply, so the worst possible case
22// for the above constraints would be that allocations of some
23// size might have a 26.6% (1.266x) overhead.
24// In practice, only one of the wastes comes into play for a
25// given size (sizes < 512 waste mainly on the round-up,
26// sizes > 512 waste mainly on the page chopping).
27// For really small sizes, alignment constraints force the
28// overhead higher.
29
30package main
31
32import (
33	"bytes"
34	"flag"
35	"fmt"
36	"go/format"
37	"io"
38	"log"
39	"math"
40	"math/bits"
41	"os"
42)
43
44// Generate msize.go
45
46var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go")
47
48func main() {
49	flag.Parse()
50
51	var b bytes.Buffer
52	fmt.Fprintln(&b, "// Code generated by mksizeclasses.go; DO NOT EDIT.")
53	fmt.Fprintln(&b, "//go:generate go run mksizeclasses.go")
54	fmt.Fprintln(&b)
55	fmt.Fprintln(&b, "package runtime")
56	classes := makeClasses()
57
58	printComment(&b, classes)
59
60	printClasses(&b, classes)
61
62	out, err := format.Source(b.Bytes())
63	if err != nil {
64		log.Fatal(err)
65	}
66	if *stdout {
67		_, err = os.Stdout.Write(out)
68	} else {
69		err = os.WriteFile("sizeclasses.go", out, 0666)
70	}
71	if err != nil {
72		log.Fatal(err)
73	}
74}
75
76const (
77	// Constants that we use and will transfer to the runtime.
78	minHeapAlign = 8
79	maxSmallSize = 32 << 10
80	smallSizeDiv = 8
81	smallSizeMax = 1024
82	largeSizeDiv = 128
83	pageShift    = 13
84
85	// Derived constants.
86	pageSize = 1 << pageShift
87)
88
89type class struct {
90	size   int // max size
91	npages int // number of pages
92}
93
94func powerOfTwo(x int) bool {
95	return x != 0 && x&(x-1) == 0
96}
97
98func makeClasses() []class {
99	var classes []class
100
101	classes = append(classes, class{}) // class #0 is a dummy entry
102
103	align := minHeapAlign
104	for size := align; size <= maxSmallSize; size += align {
105		if powerOfTwo(size) { // bump alignment once in a while
106			if size >= 2048 {
107				align = 256
108			} else if size >= 128 {
109				align = size / 8
110			} else if size >= 32 {
111				align = 16 // heap bitmaps assume 16 byte alignment for allocations >= 32 bytes.
112			}
113		}
114		if !powerOfTwo(align) {
115			panic("incorrect alignment")
116		}
117
118		// Make the allocnpages big enough that
119		// the leftover is less than 1/8 of the total,
120		// so wasted space is at most 12.5%.
121		allocsize := pageSize
122		for allocsize%size > allocsize/8 {
123			allocsize += pageSize
124		}
125		npages := allocsize / pageSize
126
127		// If the previous sizeclass chose the same
128		// allocation size and fit the same number of
129		// objects into the page, we might as well
130		// use just this size instead of having two
131		// different sizes.
132		if len(classes) > 1 && npages == classes[len(classes)-1].npages && allocsize/size == allocsize/classes[len(classes)-1].size {
133			classes[len(classes)-1].size = size
134			continue
135		}
136		classes = append(classes, class{size: size, npages: npages})
137	}
138
139	// Increase object sizes if we can fit the same number of larger objects
140	// into the same number of pages. For example, we choose size 8448 above
141	// with 6 objects in 7 pages. But we can well use object size 9472,
142	// which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
143	// We need to preserve at least largeSizeDiv alignment otherwise
144	// sizeToClass won't work.
145	for i := range classes {
146		if i == 0 {
147			continue
148		}
149		c := &classes[i]
150		psize := c.npages * pageSize
151		new_size := (psize / (psize / c.size)) &^ (largeSizeDiv - 1)
152		if new_size > c.size {
153			c.size = new_size
154		}
155	}
156
157	if len(classes) != 68 {
158		panic("number of size classes has changed")
159	}
160
161	for i := range classes {
162		computeDivMagic(&classes[i])
163	}
164
165	return classes
166}
167
168// computeDivMagic checks that the division required to compute object
169// index from span offset can be computed using 32-bit multiplication.
170// n / c.size is implemented as (n * (^uint32(0)/uint32(c.size) + 1)) >> 32
171// for all 0 <= n <= c.npages * pageSize
172func computeDivMagic(c *class) {
173	// divisor
174	d := c.size
175	if d == 0 {
176		return
177	}
178
179	// maximum input value for which the formula needs to work.
180	max := c.npages * pageSize
181
182	// As reported in [1], if n and d are unsigned N-bit integers, we
183	// can compute n / d as ⌊n * c / 2^F⌋, where c is ⌈2^F / d⌉ and F is
184	// computed with:
185	//
186	// 	Algorithm 2: Algorithm to select the number of fractional bits
187	// 	and the scaled approximate reciprocal in the case of unsigned
188	// 	integers.
189	//
190	// 	if d is a power of two then
191	// 		Let F ← log₂(d) and c = 1.
192	// 	else
193	// 		Let F ← N + L where L is the smallest integer
194	// 		such that d ≤ (2^(N+L) mod d) + 2^L.
195	// 	end if
196	//
197	// [1] "Faster Remainder by Direct Computation: Applications to
198	// Compilers and Software Libraries" Daniel Lemire, Owen Kaser,
199	// Nathan Kurz arXiv:1902.01961
200	//
201	// To minimize the risk of introducing errors, we implement the
202	// algorithm exactly as stated, rather than trying to adapt it to
203	// fit typical Go idioms.
204	N := bits.Len(uint(max))
205	var F int
206	if powerOfTwo(d) {
207		F = int(math.Log2(float64(d)))
208		if d != 1<<F {
209			panic("imprecise log2")
210		}
211	} else {
212		for L := 0; ; L++ {
213			if d <= ((1<<(N+L))%d)+(1<<L) {
214				F = N + L
215				break
216			}
217		}
218	}
219
220	// Also, noted in the paper, F is the smallest number of fractional
221	// bits required. We use 32 bits, because it works for all size
222	// classes and is fast on all CPU architectures that we support.
223	if F > 32 {
224		fmt.Printf("d=%d max=%d N=%d F=%d\n", c.size, max, N, F)
225		panic("size class requires more than 32 bits of precision")
226	}
227
228	// Brute force double-check with the exact computation that will be
229	// done by the runtime.
230	m := ^uint32(0)/uint32(c.size) + 1
231	for n := 0; n <= max; n++ {
232		if uint32((uint64(n)*uint64(m))>>32) != uint32(n/c.size) {
233			fmt.Printf("d=%d max=%d m=%d n=%d\n", d, max, m, n)
234			panic("bad 32-bit multiply magic")
235		}
236	}
237}
238
239func printComment(w io.Writer, classes []class) {
240	fmt.Fprintf(w, "// %-5s  %-9s  %-10s  %-7s  %-10s  %-9s  %-9s\n", "class", "bytes/obj", "bytes/span", "objects", "tail waste", "max waste", "min align")
241	prevSize := 0
242	var minAligns [pageShift + 1]int
243	for i, c := range classes {
244		if i == 0 {
245			continue
246		}
247		spanSize := c.npages * pageSize
248		objects := spanSize / c.size
249		tailWaste := spanSize - c.size*(spanSize/c.size)
250		maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize)
251		alignBits := bits.TrailingZeros(uint(c.size))
252		if alignBits > pageShift {
253			// object alignment is capped at page alignment
254			alignBits = pageShift
255		}
256		for i := range minAligns {
257			if i > alignBits {
258				minAligns[i] = 0
259			} else if minAligns[i] == 0 {
260				minAligns[i] = c.size
261			}
262		}
263		prevSize = c.size
264		fmt.Fprintf(w, "// %5d  %9d  %10d  %7d  %10d  %8.2f%%  %9d\n", i, c.size, spanSize, objects, tailWaste, 100*maxWaste, 1<<alignBits)
265	}
266	fmt.Fprintf(w, "\n")
267
268	fmt.Fprintf(w, "// %-9s  %-4s  %-12s\n", "alignment", "bits", "min obj size")
269	for bits, size := range minAligns {
270		if size == 0 {
271			break
272		}
273		if bits+1 < len(minAligns) && size == minAligns[bits+1] {
274			continue
275		}
276		fmt.Fprintf(w, "// %9d  %4d  %12d\n", 1<<bits, bits, size)
277	}
278	fmt.Fprintf(w, "\n")
279}
280
281func maxObjsPerSpan(classes []class) int {
282	most := 0
283	for _, c := range classes[1:] {
284		n := c.npages * pageSize / c.size
285		most = max(most, n)
286	}
287	return most
288}
289
290func printClasses(w io.Writer, classes []class) {
291	fmt.Fprintln(w, "const (")
292	fmt.Fprintf(w, "minHeapAlign = %d\n", minHeapAlign)
293	fmt.Fprintf(w, "_MaxSmallSize = %d\n", maxSmallSize)
294	fmt.Fprintf(w, "smallSizeDiv = %d\n", smallSizeDiv)
295	fmt.Fprintf(w, "smallSizeMax = %d\n", smallSizeMax)
296	fmt.Fprintf(w, "largeSizeDiv = %d\n", largeSizeDiv)
297	fmt.Fprintf(w, "_NumSizeClasses = %d\n", len(classes))
298	fmt.Fprintf(w, "_PageShift = %d\n", pageShift)
299	fmt.Fprintf(w, "maxObjsPerSpan = %d\n", maxObjsPerSpan(classes))
300	fmt.Fprintln(w, ")")
301
302	fmt.Fprint(w, "var class_to_size = [_NumSizeClasses]uint16 {")
303	for _, c := range classes {
304		fmt.Fprintf(w, "%d,", c.size)
305	}
306	fmt.Fprintln(w, "}")
307
308	fmt.Fprint(w, "var class_to_allocnpages = [_NumSizeClasses]uint8 {")
309	for _, c := range classes {
310		fmt.Fprintf(w, "%d,", c.npages)
311	}
312	fmt.Fprintln(w, "}")
313
314	fmt.Fprint(w, "var class_to_divmagic = [_NumSizeClasses]uint32 {")
315	for _, c := range classes {
316		if c.size == 0 {
317			fmt.Fprintf(w, "0,")
318			continue
319		}
320		fmt.Fprintf(w, "^uint32(0)/%d+1,", c.size)
321	}
322	fmt.Fprintln(w, "}")
323
324	// map from size to size class, for small sizes.
325	sc := make([]int, smallSizeMax/smallSizeDiv+1)
326	for i := range sc {
327		size := i * smallSizeDiv
328		for j, c := range classes {
329			if c.size >= size {
330				sc[i] = j
331				break
332			}
333		}
334	}
335	fmt.Fprint(w, "var size_to_class8 = [smallSizeMax/smallSizeDiv+1]uint8 {")
336	for _, v := range sc {
337		fmt.Fprintf(w, "%d,", v)
338	}
339	fmt.Fprintln(w, "}")
340
341	// map from size to size class, for large sizes.
342	sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1)
343	for i := range sc {
344		size := smallSizeMax + i*largeSizeDiv
345		for j, c := range classes {
346			if c.size >= size {
347				sc[i] = j
348				break
349			}
350		}
351	}
352	fmt.Fprint(w, "var size_to_class128 = [(_MaxSmallSize-smallSizeMax)/largeSizeDiv+1]uint8 {")
353	for _, v := range sc {
354		fmt.Fprintf(w, "%d,", v)
355	}
356	fmt.Fprintln(w, "}")
357}
358