1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:build ignore 6 7// Generate tables for small malloc size classes. 8// 9// See malloc.go for overview. 10// 11// The size classes are chosen so that rounding an allocation 12// request up to the next size class wastes at most 12.5% (1.125x). 13// 14// Each size class has its own page count that gets allocated 15// and chopped up when new objects of the size class are needed. 16// That page count is chosen so that chopping up the run of 17// pages into objects of the given size wastes at most 12.5% (1.125x) 18// of the memory. It is not necessary that the cutoff here be 19// the same as above. 20// 21// The two sources of waste multiply, so the worst possible case 22// for the above constraints would be that allocations of some 23// size might have a 26.6% (1.266x) overhead. 24// In practice, only one of the wastes comes into play for a 25// given size (sizes < 512 waste mainly on the round-up, 26// sizes > 512 waste mainly on the page chopping). 27// For really small sizes, alignment constraints force the 28// overhead higher. 29 30package main 31 32import ( 33 "bytes" 34 "flag" 35 "fmt" 36 "go/format" 37 "io" 38 "log" 39 "math" 40 "math/bits" 41 "os" 42) 43 44// Generate msize.go 45 46var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go") 47 48func main() { 49 flag.Parse() 50 51 var b bytes.Buffer 52 fmt.Fprintln(&b, "// Code generated by mksizeclasses.go; DO NOT EDIT.") 53 fmt.Fprintln(&b, "//go:generate go run mksizeclasses.go") 54 fmt.Fprintln(&b) 55 fmt.Fprintln(&b, "package runtime") 56 classes := makeClasses() 57 58 printComment(&b, classes) 59 60 printClasses(&b, classes) 61 62 out, err := format.Source(b.Bytes()) 63 if err != nil { 64 log.Fatal(err) 65 } 66 if *stdout { 67 _, err = os.Stdout.Write(out) 68 } else { 69 err = os.WriteFile("sizeclasses.go", out, 0666) 70 } 71 if err != nil { 72 log.Fatal(err) 73 } 74} 75 76const ( 77 // Constants that we use and will transfer to the runtime. 78 minHeapAlign = 8 79 maxSmallSize = 32 << 10 80 smallSizeDiv = 8 81 smallSizeMax = 1024 82 largeSizeDiv = 128 83 pageShift = 13 84 85 // Derived constants. 86 pageSize = 1 << pageShift 87) 88 89type class struct { 90 size int // max size 91 npages int // number of pages 92} 93 94func powerOfTwo(x int) bool { 95 return x != 0 && x&(x-1) == 0 96} 97 98func makeClasses() []class { 99 var classes []class 100 101 classes = append(classes, class{}) // class #0 is a dummy entry 102 103 align := minHeapAlign 104 for size := align; size <= maxSmallSize; size += align { 105 if powerOfTwo(size) { // bump alignment once in a while 106 if size >= 2048 { 107 align = 256 108 } else if size >= 128 { 109 align = size / 8 110 } else if size >= 32 { 111 align = 16 // heap bitmaps assume 16 byte alignment for allocations >= 32 bytes. 112 } 113 } 114 if !powerOfTwo(align) { 115 panic("incorrect alignment") 116 } 117 118 // Make the allocnpages big enough that 119 // the leftover is less than 1/8 of the total, 120 // so wasted space is at most 12.5%. 121 allocsize := pageSize 122 for allocsize%size > allocsize/8 { 123 allocsize += pageSize 124 } 125 npages := allocsize / pageSize 126 127 // If the previous sizeclass chose the same 128 // allocation size and fit the same number of 129 // objects into the page, we might as well 130 // use just this size instead of having two 131 // different sizes. 132 if len(classes) > 1 && npages == classes[len(classes)-1].npages && allocsize/size == allocsize/classes[len(classes)-1].size { 133 classes[len(classes)-1].size = size 134 continue 135 } 136 classes = append(classes, class{size: size, npages: npages}) 137 } 138 139 // Increase object sizes if we can fit the same number of larger objects 140 // into the same number of pages. For example, we choose size 8448 above 141 // with 6 objects in 7 pages. But we can well use object size 9472, 142 // which is also 6 objects in 7 pages but +1024 bytes (+12.12%). 143 // We need to preserve at least largeSizeDiv alignment otherwise 144 // sizeToClass won't work. 145 for i := range classes { 146 if i == 0 { 147 continue 148 } 149 c := &classes[i] 150 psize := c.npages * pageSize 151 new_size := (psize / (psize / c.size)) &^ (largeSizeDiv - 1) 152 if new_size > c.size { 153 c.size = new_size 154 } 155 } 156 157 if len(classes) != 68 { 158 panic("number of size classes has changed") 159 } 160 161 for i := range classes { 162 computeDivMagic(&classes[i]) 163 } 164 165 return classes 166} 167 168// computeDivMagic checks that the division required to compute object 169// index from span offset can be computed using 32-bit multiplication. 170// n / c.size is implemented as (n * (^uint32(0)/uint32(c.size) + 1)) >> 32 171// for all 0 <= n <= c.npages * pageSize 172func computeDivMagic(c *class) { 173 // divisor 174 d := c.size 175 if d == 0 { 176 return 177 } 178 179 // maximum input value for which the formula needs to work. 180 max := c.npages * pageSize 181 182 // As reported in [1], if n and d are unsigned N-bit integers, we 183 // can compute n / d as ⌊n * c / 2^F⌋, where c is ⌈2^F / d⌉ and F is 184 // computed with: 185 // 186 // Algorithm 2: Algorithm to select the number of fractional bits 187 // and the scaled approximate reciprocal in the case of unsigned 188 // integers. 189 // 190 // if d is a power of two then 191 // Let F ← log₂(d) and c = 1. 192 // else 193 // Let F ← N + L where L is the smallest integer 194 // such that d ≤ (2^(N+L) mod d) + 2^L. 195 // end if 196 // 197 // [1] "Faster Remainder by Direct Computation: Applications to 198 // Compilers and Software Libraries" Daniel Lemire, Owen Kaser, 199 // Nathan Kurz arXiv:1902.01961 200 // 201 // To minimize the risk of introducing errors, we implement the 202 // algorithm exactly as stated, rather than trying to adapt it to 203 // fit typical Go idioms. 204 N := bits.Len(uint(max)) 205 var F int 206 if powerOfTwo(d) { 207 F = int(math.Log2(float64(d))) 208 if d != 1<<F { 209 panic("imprecise log2") 210 } 211 } else { 212 for L := 0; ; L++ { 213 if d <= ((1<<(N+L))%d)+(1<<L) { 214 F = N + L 215 break 216 } 217 } 218 } 219 220 // Also, noted in the paper, F is the smallest number of fractional 221 // bits required. We use 32 bits, because it works for all size 222 // classes and is fast on all CPU architectures that we support. 223 if F > 32 { 224 fmt.Printf("d=%d max=%d N=%d F=%d\n", c.size, max, N, F) 225 panic("size class requires more than 32 bits of precision") 226 } 227 228 // Brute force double-check with the exact computation that will be 229 // done by the runtime. 230 m := ^uint32(0)/uint32(c.size) + 1 231 for n := 0; n <= max; n++ { 232 if uint32((uint64(n)*uint64(m))>>32) != uint32(n/c.size) { 233 fmt.Printf("d=%d max=%d m=%d n=%d\n", d, max, m, n) 234 panic("bad 32-bit multiply magic") 235 } 236 } 237} 238 239func printComment(w io.Writer, classes []class) { 240 fmt.Fprintf(w, "// %-5s %-9s %-10s %-7s %-10s %-9s %-9s\n", "class", "bytes/obj", "bytes/span", "objects", "tail waste", "max waste", "min align") 241 prevSize := 0 242 var minAligns [pageShift + 1]int 243 for i, c := range classes { 244 if i == 0 { 245 continue 246 } 247 spanSize := c.npages * pageSize 248 objects := spanSize / c.size 249 tailWaste := spanSize - c.size*(spanSize/c.size) 250 maxWaste := float64((c.size-prevSize-1)*objects+tailWaste) / float64(spanSize) 251 alignBits := bits.TrailingZeros(uint(c.size)) 252 if alignBits > pageShift { 253 // object alignment is capped at page alignment 254 alignBits = pageShift 255 } 256 for i := range minAligns { 257 if i > alignBits { 258 minAligns[i] = 0 259 } else if minAligns[i] == 0 { 260 minAligns[i] = c.size 261 } 262 } 263 prevSize = c.size 264 fmt.Fprintf(w, "// %5d %9d %10d %7d %10d %8.2f%% %9d\n", i, c.size, spanSize, objects, tailWaste, 100*maxWaste, 1<<alignBits) 265 } 266 fmt.Fprintf(w, "\n") 267 268 fmt.Fprintf(w, "// %-9s %-4s %-12s\n", "alignment", "bits", "min obj size") 269 for bits, size := range minAligns { 270 if size == 0 { 271 break 272 } 273 if bits+1 < len(minAligns) && size == minAligns[bits+1] { 274 continue 275 } 276 fmt.Fprintf(w, "// %9d %4d %12d\n", 1<<bits, bits, size) 277 } 278 fmt.Fprintf(w, "\n") 279} 280 281func maxObjsPerSpan(classes []class) int { 282 most := 0 283 for _, c := range classes[1:] { 284 n := c.npages * pageSize / c.size 285 most = max(most, n) 286 } 287 return most 288} 289 290func printClasses(w io.Writer, classes []class) { 291 fmt.Fprintln(w, "const (") 292 fmt.Fprintf(w, "minHeapAlign = %d\n", minHeapAlign) 293 fmt.Fprintf(w, "_MaxSmallSize = %d\n", maxSmallSize) 294 fmt.Fprintf(w, "smallSizeDiv = %d\n", smallSizeDiv) 295 fmt.Fprintf(w, "smallSizeMax = %d\n", smallSizeMax) 296 fmt.Fprintf(w, "largeSizeDiv = %d\n", largeSizeDiv) 297 fmt.Fprintf(w, "_NumSizeClasses = %d\n", len(classes)) 298 fmt.Fprintf(w, "_PageShift = %d\n", pageShift) 299 fmt.Fprintf(w, "maxObjsPerSpan = %d\n", maxObjsPerSpan(classes)) 300 fmt.Fprintln(w, ")") 301 302 fmt.Fprint(w, "var class_to_size = [_NumSizeClasses]uint16 {") 303 for _, c := range classes { 304 fmt.Fprintf(w, "%d,", c.size) 305 } 306 fmt.Fprintln(w, "}") 307 308 fmt.Fprint(w, "var class_to_allocnpages = [_NumSizeClasses]uint8 {") 309 for _, c := range classes { 310 fmt.Fprintf(w, "%d,", c.npages) 311 } 312 fmt.Fprintln(w, "}") 313 314 fmt.Fprint(w, "var class_to_divmagic = [_NumSizeClasses]uint32 {") 315 for _, c := range classes { 316 if c.size == 0 { 317 fmt.Fprintf(w, "0,") 318 continue 319 } 320 fmt.Fprintf(w, "^uint32(0)/%d+1,", c.size) 321 } 322 fmt.Fprintln(w, "}") 323 324 // map from size to size class, for small sizes. 325 sc := make([]int, smallSizeMax/smallSizeDiv+1) 326 for i := range sc { 327 size := i * smallSizeDiv 328 for j, c := range classes { 329 if c.size >= size { 330 sc[i] = j 331 break 332 } 333 } 334 } 335 fmt.Fprint(w, "var size_to_class8 = [smallSizeMax/smallSizeDiv+1]uint8 {") 336 for _, v := range sc { 337 fmt.Fprintf(w, "%d,", v) 338 } 339 fmt.Fprintln(w, "}") 340 341 // map from size to size class, for large sizes. 342 sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1) 343 for i := range sc { 344 size := smallSizeMax + i*largeSizeDiv 345 for j, c := range classes { 346 if c.size >= size { 347 sc[i] = j 348 break 349 } 350 } 351 } 352 fmt.Fprint(w, "var size_to_class128 = [(_MaxSmallSize-smallSizeMax)/largeSizeDiv+1]uint8 {") 353 for _, v := range sc { 354 fmt.Fprintf(w, "%d,", v) 355 } 356 fmt.Fprintln(w, "}") 357} 358