1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Garbage collector: type and heap bitmaps. 6// 7// Stack, data, and bss bitmaps 8// 9// Stack frames and global variables in the data and bss sections are 10// described by bitmaps with 1 bit per pointer-sized word. A "1" bit 11// means the word is a live pointer to be visited by the GC (referred to 12// as "pointer"). A "0" bit means the word should be ignored by GC 13// (referred to as "scalar", though it could be a dead pointer value). 14// 15// Heap bitmaps 16// 17// The heap bitmap comprises 1 bit for each pointer-sized word in the heap, 18// recording whether a pointer is stored in that word or not. This bitmap 19// is stored at the end of a span for small objects and is unrolled at 20// runtime from type metadata for all larger objects. Objects without 21// pointers have neither a bitmap nor associated type metadata. 22// 23// Bits in all cases correspond to words in little-endian order. 24// 25// For small objects, if s is the mspan for the span starting at "start", 26// then s.heapBits() returns a slice containing the bitmap for the whole span. 27// That is, s.heapBits()[0] holds the goarch.PtrSize*8 bits for the first 28// goarch.PtrSize*8 words from "start" through "start+63*ptrSize" in the span. 29// On a related note, small objects are always small enough that their bitmap 30// fits in goarch.PtrSize*8 bits, so writing out bitmap data takes two bitmap 31// writes at most (because object boundaries don't generally lie on 32// s.heapBits()[i] boundaries). 33// 34// For larger objects, if t is the type for the object starting at "start", 35// within some span whose mspan is s, then the bitmap at t.GCData is "tiled" 36// from "start" through "start+s.elemsize". 37// Specifically, the first bit of t.GCData corresponds to the word at "start", 38// the second to the word after "start", and so on up to t.PtrBytes. At t.PtrBytes, 39// we skip to "start+t.Size_" and begin again from there. This process is 40// repeated until we hit "start+s.elemsize". 41// This tiling algorithm supports array data, since the type always refers to 42// the element type of the array. Single objects are considered the same as 43// single-element arrays. 44// The tiling algorithm may scan data past the end of the compiler-recognized 45// object, but any unused data within the allocation slot (i.e. within s.elemsize) 46// is zeroed, so the GC just observes nil pointers. 47// Note that this "tiled" bitmap isn't stored anywhere; it is generated on-the-fly. 48// 49// For objects without their own span, the type metadata is stored in the first 50// word before the object at the beginning of the allocation slot. For objects 51// with their own span, the type metadata is stored in the mspan. 52// 53// The bitmap for small unallocated objects in scannable spans is not maintained 54// (can be junk). 55 56package runtime 57 58import ( 59 "internal/abi" 60 "internal/goarch" 61 "internal/runtime/atomic" 62 "runtime/internal/sys" 63 "unsafe" 64) 65 66const ( 67 // A malloc header is functionally a single type pointer, but 68 // we need to use 8 here to ensure 8-byte alignment of allocations 69 // on 32-bit platforms. It's wasteful, but a lot of code relies on 70 // 8-byte alignment for 8-byte atomics. 71 mallocHeaderSize = 8 72 73 // The minimum object size that has a malloc header, exclusive. 74 // 75 // The size of this value controls overheads from the malloc header. 76 // The minimum size is bound by writeHeapBitsSmall, which assumes that the 77 // pointer bitmap for objects of a size smaller than this doesn't cross 78 // more than one pointer-word boundary. This sets an upper-bound on this 79 // value at the number of bits in a uintptr, multiplied by the pointer 80 // size in bytes. 81 // 82 // We choose a value here that has a natural cutover point in terms of memory 83 // overheads. This value just happens to be the maximum possible value this 84 // can be. 85 // 86 // A span with heap bits in it will have 128 bytes of heap bits on 64-bit 87 // platforms, and 256 bytes of heap bits on 32-bit platforms. The first size 88 // class where malloc headers match this overhead for 64-bit platforms is 89 // 512 bytes (8 KiB / 512 bytes * 8 bytes-per-header = 128 bytes of overhead). 90 // On 32-bit platforms, this same point is the 256 byte size class 91 // (8 KiB / 256 bytes * 8 bytes-per-header = 256 bytes of overhead). 92 // 93 // Guaranteed to be exactly at a size class boundary. The reason this value is 94 // an exclusive minimum is subtle. Suppose we're allocating a 504-byte object 95 // and its rounded up to 512 bytes for the size class. If minSizeForMallocHeader 96 // is 512 and an inclusive minimum, then a comparison against minSizeForMallocHeader 97 // by the two values would produce different results. In other words, the comparison 98 // would not be invariant to size-class rounding. Eschewing this property means a 99 // more complex check or possibly storing additional state to determine whether a 100 // span has malloc headers. 101 minSizeForMallocHeader = goarch.PtrSize * ptrBits 102) 103 104// heapBitsInSpan returns true if the size of an object implies its ptr/scalar 105// data is stored at the end of the span, and is accessible via span.heapBits. 106// 107// Note: this works for both rounded-up sizes (span.elemsize) and unrounded 108// type sizes because minSizeForMallocHeader is guaranteed to be at a size 109// class boundary. 110// 111//go:nosplit 112func heapBitsInSpan(userSize uintptr) bool { 113 // N.B. minSizeForMallocHeader is an exclusive minimum so that this function is 114 // invariant under size-class rounding on its input. 115 return userSize <= minSizeForMallocHeader 116} 117 118// typePointers is an iterator over the pointers in a heap object. 119// 120// Iteration through this type implements the tiling algorithm described at the 121// top of this file. 122type typePointers struct { 123 // elem is the address of the current array element of type typ being iterated over. 124 // Objects that are not arrays are treated as single-element arrays, in which case 125 // this value does not change. 126 elem uintptr 127 128 // addr is the address the iterator is currently working from and describes 129 // the address of the first word referenced by mask. 130 addr uintptr 131 132 // mask is a bitmask where each bit corresponds to pointer-words after addr. 133 // Bit 0 is the pointer-word at addr, Bit 1 is the next word, and so on. 134 // If a bit is 1, then there is a pointer at that word. 135 // nextFast and next mask out bits in this mask as their pointers are processed. 136 mask uintptr 137 138 // typ is a pointer to the type information for the heap object's type. 139 // This may be nil if the object is in a span where heapBitsInSpan(span.elemsize) is true. 140 typ *_type 141} 142 143// typePointersOf returns an iterator over all heap pointers in the range [addr, addr+size). 144// 145// addr and addr+size must be in the range [span.base(), span.limit). 146// 147// Note: addr+size must be passed as the limit argument to the iterator's next method on 148// each iteration. This slightly awkward API is to allow typePointers to be destructured 149// by the compiler. 150// 151// nosplit because it is used during write barriers and must not be preempted. 152// 153//go:nosplit 154func (span *mspan) typePointersOf(addr, size uintptr) typePointers { 155 base := span.objBase(addr) 156 tp := span.typePointersOfUnchecked(base) 157 if base == addr && size == span.elemsize { 158 return tp 159 } 160 return tp.fastForward(addr-tp.addr, addr+size) 161} 162 163// typePointersOfUnchecked is like typePointersOf, but assumes addr is the base 164// of an allocation slot in a span (the start of the object if no header, the 165// header otherwise). It returns an iterator that generates all pointers 166// in the range [addr, addr+span.elemsize). 167// 168// nosplit because it is used during write barriers and must not be preempted. 169// 170//go:nosplit 171func (span *mspan) typePointersOfUnchecked(addr uintptr) typePointers { 172 const doubleCheck = false 173 if doubleCheck && span.objBase(addr) != addr { 174 print("runtime: addr=", addr, " base=", span.objBase(addr), "\n") 175 throw("typePointersOfUnchecked consisting of non-base-address for object") 176 } 177 178 spc := span.spanclass 179 if spc.noscan() { 180 return typePointers{} 181 } 182 if heapBitsInSpan(span.elemsize) { 183 // Handle header-less objects. 184 return typePointers{elem: addr, addr: addr, mask: span.heapBitsSmallForAddr(addr)} 185 } 186 187 // All of these objects have a header. 188 var typ *_type 189 if spc.sizeclass() != 0 { 190 // Pull the allocation header from the first word of the object. 191 typ = *(**_type)(unsafe.Pointer(addr)) 192 addr += mallocHeaderSize 193 } else { 194 typ = span.largeType 195 if typ == nil { 196 // Allow a nil type here for delayed zeroing. See mallocgc. 197 return typePointers{} 198 } 199 } 200 gcdata := typ.GCData 201 return typePointers{elem: addr, addr: addr, mask: readUintptr(gcdata), typ: typ} 202} 203 204// typePointersOfType is like typePointersOf, but assumes addr points to one or more 205// contiguous instances of the provided type. The provided type must not be nil and 206// it must not have its type metadata encoded as a gcprog. 207// 208// It returns an iterator that tiles typ.GCData starting from addr. It's the caller's 209// responsibility to limit iteration. 210// 211// nosplit because its callers are nosplit and require all their callees to be nosplit. 212// 213//go:nosplit 214func (span *mspan) typePointersOfType(typ *abi.Type, addr uintptr) typePointers { 215 const doubleCheck = false 216 if doubleCheck && (typ == nil || typ.Kind_&abi.KindGCProg != 0) { 217 throw("bad type passed to typePointersOfType") 218 } 219 if span.spanclass.noscan() { 220 return typePointers{} 221 } 222 // Since we have the type, pretend we have a header. 223 gcdata := typ.GCData 224 return typePointers{elem: addr, addr: addr, mask: readUintptr(gcdata), typ: typ} 225} 226 227// nextFast is the fast path of next. nextFast is written to be inlineable and, 228// as the name implies, fast. 229// 230// Callers that are performance-critical should iterate using the following 231// pattern: 232// 233// for { 234// var addr uintptr 235// if tp, addr = tp.nextFast(); addr == 0 { 236// if tp, addr = tp.next(limit); addr == 0 { 237// break 238// } 239// } 240// // Use addr. 241// ... 242// } 243// 244// nosplit because it is used during write barriers and must not be preempted. 245// 246//go:nosplit 247func (tp typePointers) nextFast() (typePointers, uintptr) { 248 // TESTQ/JEQ 249 if tp.mask == 0 { 250 return tp, 0 251 } 252 // BSFQ 253 var i int 254 if goarch.PtrSize == 8 { 255 i = sys.TrailingZeros64(uint64(tp.mask)) 256 } else { 257 i = sys.TrailingZeros32(uint32(tp.mask)) 258 } 259 // BTCQ 260 tp.mask ^= uintptr(1) << (i & (ptrBits - 1)) 261 // LEAQ (XX)(XX*8) 262 return tp, tp.addr + uintptr(i)*goarch.PtrSize 263} 264 265// next advances the pointers iterator, returning the updated iterator and 266// the address of the next pointer. 267// 268// limit must be the same each time it is passed to next. 269// 270// nosplit because it is used during write barriers and must not be preempted. 271// 272//go:nosplit 273func (tp typePointers) next(limit uintptr) (typePointers, uintptr) { 274 for { 275 if tp.mask != 0 { 276 return tp.nextFast() 277 } 278 279 // Stop if we don't actually have type information. 280 if tp.typ == nil { 281 return typePointers{}, 0 282 } 283 284 // Advance to the next element if necessary. 285 if tp.addr+goarch.PtrSize*ptrBits >= tp.elem+tp.typ.PtrBytes { 286 tp.elem += tp.typ.Size_ 287 tp.addr = tp.elem 288 } else { 289 tp.addr += ptrBits * goarch.PtrSize 290 } 291 292 // Check if we've exceeded the limit with the last update. 293 if tp.addr >= limit { 294 return typePointers{}, 0 295 } 296 297 // Grab more bits and try again. 298 tp.mask = readUintptr(addb(tp.typ.GCData, (tp.addr-tp.elem)/goarch.PtrSize/8)) 299 if tp.addr+goarch.PtrSize*ptrBits > limit { 300 bits := (tp.addr + goarch.PtrSize*ptrBits - limit) / goarch.PtrSize 301 tp.mask &^= ((1 << (bits)) - 1) << (ptrBits - bits) 302 } 303 } 304} 305 306// fastForward moves the iterator forward by n bytes. n must be a multiple 307// of goarch.PtrSize. limit must be the same limit passed to next for this 308// iterator. 309// 310// nosplit because it is used during write barriers and must not be preempted. 311// 312//go:nosplit 313func (tp typePointers) fastForward(n, limit uintptr) typePointers { 314 // Basic bounds check. 315 target := tp.addr + n 316 if target >= limit { 317 return typePointers{} 318 } 319 if tp.typ == nil { 320 // Handle small objects. 321 // Clear any bits before the target address. 322 tp.mask &^= (1 << ((target - tp.addr) / goarch.PtrSize)) - 1 323 // Clear any bits past the limit. 324 if tp.addr+goarch.PtrSize*ptrBits > limit { 325 bits := (tp.addr + goarch.PtrSize*ptrBits - limit) / goarch.PtrSize 326 tp.mask &^= ((1 << (bits)) - 1) << (ptrBits - bits) 327 } 328 return tp 329 } 330 331 // Move up elem and addr. 332 // Offsets within an element are always at a ptrBits*goarch.PtrSize boundary. 333 if n >= tp.typ.Size_ { 334 // elem needs to be moved to the element containing 335 // tp.addr + n. 336 oldelem := tp.elem 337 tp.elem += (tp.addr - tp.elem + n) / tp.typ.Size_ * tp.typ.Size_ 338 tp.addr = tp.elem + alignDown(n-(tp.elem-oldelem), ptrBits*goarch.PtrSize) 339 } else { 340 tp.addr += alignDown(n, ptrBits*goarch.PtrSize) 341 } 342 343 if tp.addr-tp.elem >= tp.typ.PtrBytes { 344 // We're starting in the non-pointer area of an array. 345 // Move up to the next element. 346 tp.elem += tp.typ.Size_ 347 tp.addr = tp.elem 348 tp.mask = readUintptr(tp.typ.GCData) 349 350 // We may have exceeded the limit after this. Bail just like next does. 351 if tp.addr >= limit { 352 return typePointers{} 353 } 354 } else { 355 // Grab the mask, but then clear any bits before the target address and any 356 // bits over the limit. 357 tp.mask = readUintptr(addb(tp.typ.GCData, (tp.addr-tp.elem)/goarch.PtrSize/8)) 358 tp.mask &^= (1 << ((target - tp.addr) / goarch.PtrSize)) - 1 359 } 360 if tp.addr+goarch.PtrSize*ptrBits > limit { 361 bits := (tp.addr + goarch.PtrSize*ptrBits - limit) / goarch.PtrSize 362 tp.mask &^= ((1 << (bits)) - 1) << (ptrBits - bits) 363 } 364 return tp 365} 366 367// objBase returns the base pointer for the object containing addr in span. 368// 369// Assumes that addr points into a valid part of span (span.base() <= addr < span.limit). 370// 371//go:nosplit 372func (span *mspan) objBase(addr uintptr) uintptr { 373 return span.base() + span.objIndex(addr)*span.elemsize 374} 375 376// bulkBarrierPreWrite executes a write barrier 377// for every pointer slot in the memory range [src, src+size), 378// using pointer/scalar information from [dst, dst+size). 379// This executes the write barriers necessary before a memmove. 380// src, dst, and size must be pointer-aligned. 381// The range [dst, dst+size) must lie within a single object. 382// It does not perform the actual writes. 383// 384// As a special case, src == 0 indicates that this is being used for a 385// memclr. bulkBarrierPreWrite will pass 0 for the src of each write 386// barrier. 387// 388// Callers should call bulkBarrierPreWrite immediately before 389// calling memmove(dst, src, size). This function is marked nosplit 390// to avoid being preempted; the GC must not stop the goroutine 391// between the memmove and the execution of the barriers. 392// The caller is also responsible for cgo pointer checks if this 393// may be writing Go pointers into non-Go memory. 394// 395// Pointer data is not maintained for allocations containing 396// no pointers at all; any caller of bulkBarrierPreWrite must first 397// make sure the underlying allocation contains pointers, usually 398// by checking typ.PtrBytes. 399// 400// The typ argument is the type of the space at src and dst (and the 401// element type if src and dst refer to arrays) and it is optional. 402// If typ is nil, the barrier will still behave as expected and typ 403// is used purely as an optimization. However, it must be used with 404// care. 405// 406// If typ is not nil, then src and dst must point to one or more values 407// of type typ. The caller must ensure that the ranges [src, src+size) 408// and [dst, dst+size) refer to one or more whole values of type src and 409// dst (leaving off the pointerless tail of the space is OK). If this 410// precondition is not followed, this function will fail to scan the 411// right pointers. 412// 413// When in doubt, pass nil for typ. That is safe and will always work. 414// 415// Callers must perform cgo checks if goexperiment.CgoCheck2. 416// 417//go:nosplit 418func bulkBarrierPreWrite(dst, src, size uintptr, typ *abi.Type) { 419 if (dst|src|size)&(goarch.PtrSize-1) != 0 { 420 throw("bulkBarrierPreWrite: unaligned arguments") 421 } 422 if !writeBarrier.enabled { 423 return 424 } 425 s := spanOf(dst) 426 if s == nil { 427 // If dst is a global, use the data or BSS bitmaps to 428 // execute write barriers. 429 for _, datap := range activeModules() { 430 if datap.data <= dst && dst < datap.edata { 431 bulkBarrierBitmap(dst, src, size, dst-datap.data, datap.gcdatamask.bytedata) 432 return 433 } 434 } 435 for _, datap := range activeModules() { 436 if datap.bss <= dst && dst < datap.ebss { 437 bulkBarrierBitmap(dst, src, size, dst-datap.bss, datap.gcbssmask.bytedata) 438 return 439 } 440 } 441 return 442 } else if s.state.get() != mSpanInUse || dst < s.base() || s.limit <= dst { 443 // dst was heap memory at some point, but isn't now. 444 // It can't be a global. It must be either our stack, 445 // or in the case of direct channel sends, it could be 446 // another stack. Either way, no need for barriers. 447 // This will also catch if dst is in a freed span, 448 // though that should never have. 449 return 450 } 451 buf := &getg().m.p.ptr().wbBuf 452 453 // Double-check that the bitmaps generated in the two possible paths match. 454 const doubleCheck = false 455 if doubleCheck { 456 doubleCheckTypePointersOfType(s, typ, dst, size) 457 } 458 459 var tp typePointers 460 if typ != nil && typ.Kind_&abi.KindGCProg == 0 { 461 tp = s.typePointersOfType(typ, dst) 462 } else { 463 tp = s.typePointersOf(dst, size) 464 } 465 if src == 0 { 466 for { 467 var addr uintptr 468 if tp, addr = tp.next(dst + size); addr == 0 { 469 break 470 } 471 dstx := (*uintptr)(unsafe.Pointer(addr)) 472 p := buf.get1() 473 p[0] = *dstx 474 } 475 } else { 476 for { 477 var addr uintptr 478 if tp, addr = tp.next(dst + size); addr == 0 { 479 break 480 } 481 dstx := (*uintptr)(unsafe.Pointer(addr)) 482 srcx := (*uintptr)(unsafe.Pointer(src + (addr - dst))) 483 p := buf.get2() 484 p[0] = *dstx 485 p[1] = *srcx 486 } 487 } 488} 489 490// bulkBarrierPreWriteSrcOnly is like bulkBarrierPreWrite but 491// does not execute write barriers for [dst, dst+size). 492// 493// In addition to the requirements of bulkBarrierPreWrite 494// callers need to ensure [dst, dst+size) is zeroed. 495// 496// This is used for special cases where e.g. dst was just 497// created and zeroed with malloc. 498// 499// The type of the space can be provided purely as an optimization. 500// See bulkBarrierPreWrite's comment for more details -- use this 501// optimization with great care. 502// 503//go:nosplit 504func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr, typ *abi.Type) { 505 if (dst|src|size)&(goarch.PtrSize-1) != 0 { 506 throw("bulkBarrierPreWrite: unaligned arguments") 507 } 508 if !writeBarrier.enabled { 509 return 510 } 511 buf := &getg().m.p.ptr().wbBuf 512 s := spanOf(dst) 513 514 // Double-check that the bitmaps generated in the two possible paths match. 515 const doubleCheck = false 516 if doubleCheck { 517 doubleCheckTypePointersOfType(s, typ, dst, size) 518 } 519 520 var tp typePointers 521 if typ != nil && typ.Kind_&abi.KindGCProg == 0 { 522 tp = s.typePointersOfType(typ, dst) 523 } else { 524 tp = s.typePointersOf(dst, size) 525 } 526 for { 527 var addr uintptr 528 if tp, addr = tp.next(dst + size); addr == 0 { 529 break 530 } 531 srcx := (*uintptr)(unsafe.Pointer(addr - dst + src)) 532 p := buf.get1() 533 p[0] = *srcx 534 } 535} 536 537// initHeapBits initializes the heap bitmap for a span. 538// 539// TODO(mknyszek): This should set the heap bits for single pointer 540// allocations eagerly to avoid calling heapSetType at allocation time, 541// just to write one bit. 542func (s *mspan) initHeapBits(forceClear bool) { 543 if (!s.spanclass.noscan() && heapBitsInSpan(s.elemsize)) || s.isUserArenaChunk { 544 b := s.heapBits() 545 clear(b) 546 } 547} 548 549// heapBits returns the heap ptr/scalar bits stored at the end of the span for 550// small object spans and heap arena spans. 551// 552// Note that the uintptr of each element means something different for small object 553// spans and for heap arena spans. Small object spans are easy: they're never interpreted 554// as anything but uintptr, so they're immune to differences in endianness. However, the 555// heapBits for user arena spans is exposed through a dummy type descriptor, so the byte 556// ordering needs to match the same byte ordering the compiler would emit. The compiler always 557// emits the bitmap data in little endian byte ordering, so on big endian platforms these 558// uintptrs will have their byte orders swapped from what they normally would be. 559// 560// heapBitsInSpan(span.elemsize) or span.isUserArenaChunk must be true. 561// 562//go:nosplit 563func (span *mspan) heapBits() []uintptr { 564 const doubleCheck = false 565 566 if doubleCheck && !span.isUserArenaChunk { 567 if span.spanclass.noscan() { 568 throw("heapBits called for noscan") 569 } 570 if span.elemsize > minSizeForMallocHeader { 571 throw("heapBits called for span class that should have a malloc header") 572 } 573 } 574 // Find the bitmap at the end of the span. 575 // 576 // Nearly every span with heap bits is exactly one page in size. Arenas are the only exception. 577 if span.npages == 1 { 578 // This will be inlined and constant-folded down. 579 return heapBitsSlice(span.base(), pageSize) 580 } 581 return heapBitsSlice(span.base(), span.npages*pageSize) 582} 583 584// Helper for constructing a slice for the span's heap bits. 585// 586//go:nosplit 587func heapBitsSlice(spanBase, spanSize uintptr) []uintptr { 588 bitmapSize := spanSize / goarch.PtrSize / 8 589 elems := int(bitmapSize / goarch.PtrSize) 590 var sl notInHeapSlice 591 sl = notInHeapSlice{(*notInHeap)(unsafe.Pointer(spanBase + spanSize - bitmapSize)), elems, elems} 592 return *(*[]uintptr)(unsafe.Pointer(&sl)) 593} 594 595// heapBitsSmallForAddr loads the heap bits for the object stored at addr from span.heapBits. 596// 597// addr must be the base pointer of an object in the span. heapBitsInSpan(span.elemsize) 598// must be true. 599// 600//go:nosplit 601func (span *mspan) heapBitsSmallForAddr(addr uintptr) uintptr { 602 spanSize := span.npages * pageSize 603 bitmapSize := spanSize / goarch.PtrSize / 8 604 hbits := (*byte)(unsafe.Pointer(span.base() + spanSize - bitmapSize)) 605 606 // These objects are always small enough that their bitmaps 607 // fit in a single word, so just load the word or two we need. 608 // 609 // Mirrors mspan.writeHeapBitsSmall. 610 // 611 // We should be using heapBits(), but unfortunately it introduces 612 // both bounds checks panics and throw which causes us to exceed 613 // the nosplit limit in quite a few cases. 614 i := (addr - span.base()) / goarch.PtrSize / ptrBits 615 j := (addr - span.base()) / goarch.PtrSize % ptrBits 616 bits := span.elemsize / goarch.PtrSize 617 word0 := (*uintptr)(unsafe.Pointer(addb(hbits, goarch.PtrSize*(i+0)))) 618 word1 := (*uintptr)(unsafe.Pointer(addb(hbits, goarch.PtrSize*(i+1)))) 619 620 var read uintptr 621 if j+bits > ptrBits { 622 // Two reads. 623 bits0 := ptrBits - j 624 bits1 := bits - bits0 625 read = *word0 >> j 626 read |= (*word1 & ((1 << bits1) - 1)) << bits0 627 } else { 628 // One read. 629 read = (*word0 >> j) & ((1 << bits) - 1) 630 } 631 return read 632} 633 634// writeHeapBitsSmall writes the heap bits for small objects whose ptr/scalar data is 635// stored as a bitmap at the end of the span. 636// 637// Assumes dataSize is <= ptrBits*goarch.PtrSize. x must be a pointer into the span. 638// heapBitsInSpan(dataSize) must be true. dataSize must be >= typ.Size_. 639// 640//go:nosplit 641func (span *mspan) writeHeapBitsSmall(x, dataSize uintptr, typ *_type) (scanSize uintptr) { 642 // The objects here are always really small, so a single load is sufficient. 643 src0 := readUintptr(typ.GCData) 644 645 // Create repetitions of the bitmap if we have a small array. 646 bits := span.elemsize / goarch.PtrSize 647 scanSize = typ.PtrBytes 648 src := src0 649 switch typ.Size_ { 650 case goarch.PtrSize: 651 src = (1 << (dataSize / goarch.PtrSize)) - 1 652 default: 653 for i := typ.Size_; i < dataSize; i += typ.Size_ { 654 src |= src0 << (i / goarch.PtrSize) 655 scanSize += typ.Size_ 656 } 657 } 658 659 // Since we're never writing more than one uintptr's worth of bits, we're either going 660 // to do one or two writes. 661 dst := span.heapBits() 662 o := (x - span.base()) / goarch.PtrSize 663 i := o / ptrBits 664 j := o % ptrBits 665 if j+bits > ptrBits { 666 // Two writes. 667 bits0 := ptrBits - j 668 bits1 := bits - bits0 669 dst[i+0] = dst[i+0]&(^uintptr(0)>>bits0) | (src << j) 670 dst[i+1] = dst[i+1]&^((1<<bits1)-1) | (src >> bits0) 671 } else { 672 // One write. 673 dst[i] = (dst[i] &^ (((1 << bits) - 1) << j)) | (src << j) 674 } 675 676 const doubleCheck = false 677 if doubleCheck { 678 srcRead := span.heapBitsSmallForAddr(x) 679 if srcRead != src { 680 print("runtime: x=", hex(x), " i=", i, " j=", j, " bits=", bits, "\n") 681 print("runtime: dataSize=", dataSize, " typ.Size_=", typ.Size_, " typ.PtrBytes=", typ.PtrBytes, "\n") 682 print("runtime: src0=", hex(src0), " src=", hex(src), " srcRead=", hex(srcRead), "\n") 683 throw("bad pointer bits written for small object") 684 } 685 } 686 return 687} 688 689// heapSetType records that the new allocation [x, x+size) 690// holds in [x, x+dataSize) one or more values of type typ. 691// (The number of values is given by dataSize / typ.Size.) 692// If dataSize < size, the fragment [x+dataSize, x+size) is 693// recorded as non-pointer data. 694// It is known that the type has pointers somewhere; 695// malloc does not call heapSetType when there are no pointers. 696// 697// There can be read-write races between heapSetType and things 698// that read the heap metadata like scanobject. However, since 699// heapSetType is only used for objects that have not yet been 700// made reachable, readers will ignore bits being modified by this 701// function. This does mean this function cannot transiently modify 702// shared memory that belongs to neighboring objects. Also, on weakly-ordered 703// machines, callers must execute a store/store (publication) barrier 704// between calling this function and making the object reachable. 705func heapSetType(x, dataSize uintptr, typ *_type, header **_type, span *mspan) (scanSize uintptr) { 706 const doubleCheck = false 707 708 gctyp := typ 709 if header == nil { 710 if doubleCheck && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(span.elemsize)) { 711 throw("tried to write heap bits, but no heap bits in span") 712 } 713 // Handle the case where we have no malloc header. 714 scanSize = span.writeHeapBitsSmall(x, dataSize, typ) 715 } else { 716 if typ.Kind_&abi.KindGCProg != 0 { 717 // Allocate space to unroll the gcprog. This space will consist of 718 // a dummy _type value and the unrolled gcprog. The dummy _type will 719 // refer to the bitmap, and the mspan will refer to the dummy _type. 720 if span.spanclass.sizeclass() != 0 { 721 throw("GCProg for type that isn't large") 722 } 723 spaceNeeded := alignUp(unsafe.Sizeof(_type{}), goarch.PtrSize) 724 heapBitsOff := spaceNeeded 725 spaceNeeded += alignUp(typ.PtrBytes/goarch.PtrSize/8, goarch.PtrSize) 726 npages := alignUp(spaceNeeded, pageSize) / pageSize 727 var progSpan *mspan 728 systemstack(func() { 729 progSpan = mheap_.allocManual(npages, spanAllocPtrScalarBits) 730 memclrNoHeapPointers(unsafe.Pointer(progSpan.base()), progSpan.npages*pageSize) 731 }) 732 // Write a dummy _type in the new space. 733 // 734 // We only need to write size, PtrBytes, and GCData, since that's all 735 // the GC cares about. 736 gctyp = (*_type)(unsafe.Pointer(progSpan.base())) 737 gctyp.Size_ = typ.Size_ 738 gctyp.PtrBytes = typ.PtrBytes 739 gctyp.GCData = (*byte)(add(unsafe.Pointer(progSpan.base()), heapBitsOff)) 740 gctyp.TFlag = abi.TFlagUnrolledBitmap 741 742 // Expand the GC program into space reserved at the end of the new span. 743 runGCProg(addb(typ.GCData, 4), gctyp.GCData) 744 } 745 746 // Write out the header. 747 *header = gctyp 748 scanSize = span.elemsize 749 } 750 751 if doubleCheck { 752 doubleCheckHeapPointers(x, dataSize, gctyp, header, span) 753 754 // To exercise the less common path more often, generate 755 // a random interior pointer and make sure iterating from 756 // that point works correctly too. 757 maxIterBytes := span.elemsize 758 if header == nil { 759 maxIterBytes = dataSize 760 } 761 off := alignUp(uintptr(cheaprand())%dataSize, goarch.PtrSize) 762 size := dataSize - off 763 if size == 0 { 764 off -= goarch.PtrSize 765 size += goarch.PtrSize 766 } 767 interior := x + off 768 size -= alignDown(uintptr(cheaprand())%size, goarch.PtrSize) 769 if size == 0 { 770 size = goarch.PtrSize 771 } 772 // Round up the type to the size of the type. 773 size = (size + gctyp.Size_ - 1) / gctyp.Size_ * gctyp.Size_ 774 if interior+size > x+maxIterBytes { 775 size = x + maxIterBytes - interior 776 } 777 doubleCheckHeapPointersInterior(x, interior, size, dataSize, gctyp, header, span) 778 } 779 return 780} 781 782func doubleCheckHeapPointers(x, dataSize uintptr, typ *_type, header **_type, span *mspan) { 783 // Check that scanning the full object works. 784 tp := span.typePointersOfUnchecked(span.objBase(x)) 785 maxIterBytes := span.elemsize 786 if header == nil { 787 maxIterBytes = dataSize 788 } 789 bad := false 790 for i := uintptr(0); i < maxIterBytes; i += goarch.PtrSize { 791 // Compute the pointer bit we want at offset i. 792 want := false 793 if i < span.elemsize { 794 off := i % typ.Size_ 795 if off < typ.PtrBytes { 796 j := off / goarch.PtrSize 797 want = *addb(typ.GCData, j/8)>>(j%8)&1 != 0 798 } 799 } 800 if want { 801 var addr uintptr 802 tp, addr = tp.next(x + span.elemsize) 803 if addr == 0 { 804 println("runtime: found bad iterator") 805 } 806 if addr != x+i { 807 print("runtime: addr=", hex(addr), " x+i=", hex(x+i), "\n") 808 bad = true 809 } 810 } 811 } 812 if !bad { 813 var addr uintptr 814 tp, addr = tp.next(x + span.elemsize) 815 if addr == 0 { 816 return 817 } 818 println("runtime: extra pointer:", hex(addr)) 819 } 820 print("runtime: hasHeader=", header != nil, " typ.Size_=", typ.Size_, " hasGCProg=", typ.Kind_&abi.KindGCProg != 0, "\n") 821 print("runtime: x=", hex(x), " dataSize=", dataSize, " elemsize=", span.elemsize, "\n") 822 print("runtime: typ=", unsafe.Pointer(typ), " typ.PtrBytes=", typ.PtrBytes, "\n") 823 print("runtime: limit=", hex(x+span.elemsize), "\n") 824 tp = span.typePointersOfUnchecked(x) 825 dumpTypePointers(tp) 826 for { 827 var addr uintptr 828 if tp, addr = tp.next(x + span.elemsize); addr == 0 { 829 println("runtime: would've stopped here") 830 dumpTypePointers(tp) 831 break 832 } 833 print("runtime: addr=", hex(addr), "\n") 834 dumpTypePointers(tp) 835 } 836 throw("heapSetType: pointer entry not correct") 837} 838 839func doubleCheckHeapPointersInterior(x, interior, size, dataSize uintptr, typ *_type, header **_type, span *mspan) { 840 bad := false 841 if interior < x { 842 print("runtime: interior=", hex(interior), " x=", hex(x), "\n") 843 throw("found bad interior pointer") 844 } 845 off := interior - x 846 tp := span.typePointersOf(interior, size) 847 for i := off; i < off+size; i += goarch.PtrSize { 848 // Compute the pointer bit we want at offset i. 849 want := false 850 if i < span.elemsize { 851 off := i % typ.Size_ 852 if off < typ.PtrBytes { 853 j := off / goarch.PtrSize 854 want = *addb(typ.GCData, j/8)>>(j%8)&1 != 0 855 } 856 } 857 if want { 858 var addr uintptr 859 tp, addr = tp.next(interior + size) 860 if addr == 0 { 861 println("runtime: found bad iterator") 862 bad = true 863 } 864 if addr != x+i { 865 print("runtime: addr=", hex(addr), " x+i=", hex(x+i), "\n") 866 bad = true 867 } 868 } 869 } 870 if !bad { 871 var addr uintptr 872 tp, addr = tp.next(interior + size) 873 if addr == 0 { 874 return 875 } 876 println("runtime: extra pointer:", hex(addr)) 877 } 878 print("runtime: hasHeader=", header != nil, " typ.Size_=", typ.Size_, "\n") 879 print("runtime: x=", hex(x), " dataSize=", dataSize, " elemsize=", span.elemsize, " interior=", hex(interior), " size=", size, "\n") 880 print("runtime: limit=", hex(interior+size), "\n") 881 tp = span.typePointersOf(interior, size) 882 dumpTypePointers(tp) 883 for { 884 var addr uintptr 885 if tp, addr = tp.next(interior + size); addr == 0 { 886 println("runtime: would've stopped here") 887 dumpTypePointers(tp) 888 break 889 } 890 print("runtime: addr=", hex(addr), "\n") 891 dumpTypePointers(tp) 892 } 893 894 print("runtime: want: ") 895 for i := off; i < off+size; i += goarch.PtrSize { 896 // Compute the pointer bit we want at offset i. 897 want := false 898 if i < dataSize { 899 off := i % typ.Size_ 900 if off < typ.PtrBytes { 901 j := off / goarch.PtrSize 902 want = *addb(typ.GCData, j/8)>>(j%8)&1 != 0 903 } 904 } 905 if want { 906 print("1") 907 } else { 908 print("0") 909 } 910 } 911 println() 912 913 throw("heapSetType: pointer entry not correct") 914} 915 916//go:nosplit 917func doubleCheckTypePointersOfType(s *mspan, typ *_type, addr, size uintptr) { 918 if typ == nil || typ.Kind_&abi.KindGCProg != 0 { 919 return 920 } 921 if typ.Kind_&abi.KindMask == abi.Interface { 922 // Interfaces are unfortunately inconsistently handled 923 // when it comes to the type pointer, so it's easy to 924 // produce a lot of false positives here. 925 return 926 } 927 tp0 := s.typePointersOfType(typ, addr) 928 tp1 := s.typePointersOf(addr, size) 929 failed := false 930 for { 931 var addr0, addr1 uintptr 932 tp0, addr0 = tp0.next(addr + size) 933 tp1, addr1 = tp1.next(addr + size) 934 if addr0 != addr1 { 935 failed = true 936 break 937 } 938 if addr0 == 0 { 939 break 940 } 941 } 942 if failed { 943 tp0 := s.typePointersOfType(typ, addr) 944 tp1 := s.typePointersOf(addr, size) 945 print("runtime: addr=", hex(addr), " size=", size, "\n") 946 print("runtime: type=", toRType(typ).string(), "\n") 947 dumpTypePointers(tp0) 948 dumpTypePointers(tp1) 949 for { 950 var addr0, addr1 uintptr 951 tp0, addr0 = tp0.next(addr + size) 952 tp1, addr1 = tp1.next(addr + size) 953 print("runtime: ", hex(addr0), " ", hex(addr1), "\n") 954 if addr0 == 0 && addr1 == 0 { 955 break 956 } 957 } 958 throw("mismatch between typePointersOfType and typePointersOf") 959 } 960} 961 962func dumpTypePointers(tp typePointers) { 963 print("runtime: tp.elem=", hex(tp.elem), " tp.typ=", unsafe.Pointer(tp.typ), "\n") 964 print("runtime: tp.addr=", hex(tp.addr), " tp.mask=") 965 for i := uintptr(0); i < ptrBits; i++ { 966 if tp.mask&(uintptr(1)<<i) != 0 { 967 print("1") 968 } else { 969 print("0") 970 } 971 } 972 println() 973} 974 975// addb returns the byte pointer p+n. 976// 977//go:nowritebarrier 978//go:nosplit 979func addb(p *byte, n uintptr) *byte { 980 // Note: wrote out full expression instead of calling add(p, n) 981 // to reduce the number of temporaries generated by the 982 // compiler for this trivial expression during inlining. 983 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n)) 984} 985 986// subtractb returns the byte pointer p-n. 987// 988//go:nowritebarrier 989//go:nosplit 990func subtractb(p *byte, n uintptr) *byte { 991 // Note: wrote out full expression instead of calling add(p, -n) 992 // to reduce the number of temporaries generated by the 993 // compiler for this trivial expression during inlining. 994 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n)) 995} 996 997// add1 returns the byte pointer p+1. 998// 999//go:nowritebarrier 1000//go:nosplit 1001func add1(p *byte) *byte { 1002 // Note: wrote out full expression instead of calling addb(p, 1) 1003 // to reduce the number of temporaries generated by the 1004 // compiler for this trivial expression during inlining. 1005 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1)) 1006} 1007 1008// subtract1 returns the byte pointer p-1. 1009// 1010// nosplit because it is used during write barriers and must not be preempted. 1011// 1012//go:nowritebarrier 1013//go:nosplit 1014func subtract1(p *byte) *byte { 1015 // Note: wrote out full expression instead of calling subtractb(p, 1) 1016 // to reduce the number of temporaries generated by the 1017 // compiler for this trivial expression during inlining. 1018 return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1)) 1019} 1020 1021// markBits provides access to the mark bit for an object in the heap. 1022// bytep points to the byte holding the mark bit. 1023// mask is a byte with a single bit set that can be &ed with *bytep 1024// to see if the bit has been set. 1025// *m.byte&m.mask != 0 indicates the mark bit is set. 1026// index can be used along with span information to generate 1027// the address of the object in the heap. 1028// We maintain one set of mark bits for allocation and one for 1029// marking purposes. 1030type markBits struct { 1031 bytep *uint8 1032 mask uint8 1033 index uintptr 1034} 1035 1036//go:nosplit 1037func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits { 1038 bytep, mask := s.allocBits.bitp(allocBitIndex) 1039 return markBits{bytep, mask, allocBitIndex} 1040} 1041 1042// refillAllocCache takes 8 bytes s.allocBits starting at whichByte 1043// and negates them so that ctz (count trailing zeros) instructions 1044// can be used. It then places these 8 bytes into the cached 64 bit 1045// s.allocCache. 1046func (s *mspan) refillAllocCache(whichByte uint16) { 1047 bytes := (*[8]uint8)(unsafe.Pointer(s.allocBits.bytep(uintptr(whichByte)))) 1048 aCache := uint64(0) 1049 aCache |= uint64(bytes[0]) 1050 aCache |= uint64(bytes[1]) << (1 * 8) 1051 aCache |= uint64(bytes[2]) << (2 * 8) 1052 aCache |= uint64(bytes[3]) << (3 * 8) 1053 aCache |= uint64(bytes[4]) << (4 * 8) 1054 aCache |= uint64(bytes[5]) << (5 * 8) 1055 aCache |= uint64(bytes[6]) << (6 * 8) 1056 aCache |= uint64(bytes[7]) << (7 * 8) 1057 s.allocCache = ^aCache 1058} 1059 1060// nextFreeIndex returns the index of the next free object in s at 1061// or after s.freeindex. 1062// There are hardware instructions that can be used to make this 1063// faster if profiling warrants it. 1064func (s *mspan) nextFreeIndex() uint16 { 1065 sfreeindex := s.freeindex 1066 snelems := s.nelems 1067 if sfreeindex == snelems { 1068 return sfreeindex 1069 } 1070 if sfreeindex > snelems { 1071 throw("s.freeindex > s.nelems") 1072 } 1073 1074 aCache := s.allocCache 1075 1076 bitIndex := sys.TrailingZeros64(aCache) 1077 for bitIndex == 64 { 1078 // Move index to start of next cached bits. 1079 sfreeindex = (sfreeindex + 64) &^ (64 - 1) 1080 if sfreeindex >= snelems { 1081 s.freeindex = snelems 1082 return snelems 1083 } 1084 whichByte := sfreeindex / 8 1085 // Refill s.allocCache with the next 64 alloc bits. 1086 s.refillAllocCache(whichByte) 1087 aCache = s.allocCache 1088 bitIndex = sys.TrailingZeros64(aCache) 1089 // nothing available in cached bits 1090 // grab the next 8 bytes and try again. 1091 } 1092 result := sfreeindex + uint16(bitIndex) 1093 if result >= snelems { 1094 s.freeindex = snelems 1095 return snelems 1096 } 1097 1098 s.allocCache >>= uint(bitIndex + 1) 1099 sfreeindex = result + 1 1100 1101 if sfreeindex%64 == 0 && sfreeindex != snelems { 1102 // We just incremented s.freeindex so it isn't 0. 1103 // As each 1 in s.allocCache was encountered and used for allocation 1104 // it was shifted away. At this point s.allocCache contains all 0s. 1105 // Refill s.allocCache so that it corresponds 1106 // to the bits at s.allocBits starting at s.freeindex. 1107 whichByte := sfreeindex / 8 1108 s.refillAllocCache(whichByte) 1109 } 1110 s.freeindex = sfreeindex 1111 return result 1112} 1113 1114// isFree reports whether the index'th object in s is unallocated. 1115// 1116// The caller must ensure s.state is mSpanInUse, and there must have 1117// been no preemption points since ensuring this (which could allow a 1118// GC transition, which would allow the state to change). 1119func (s *mspan) isFree(index uintptr) bool { 1120 if index < uintptr(s.freeIndexForScan) { 1121 return false 1122 } 1123 bytep, mask := s.allocBits.bitp(index) 1124 return *bytep&mask == 0 1125} 1126 1127// divideByElemSize returns n/s.elemsize. 1128// n must be within [0, s.npages*_PageSize), 1129// or may be exactly s.npages*_PageSize 1130// if s.elemsize is from sizeclasses.go. 1131// 1132// nosplit, because it is called by objIndex, which is nosplit 1133// 1134//go:nosplit 1135func (s *mspan) divideByElemSize(n uintptr) uintptr { 1136 const doubleCheck = false 1137 1138 // See explanation in mksizeclasses.go's computeDivMagic. 1139 q := uintptr((uint64(n) * uint64(s.divMul)) >> 32) 1140 1141 if doubleCheck && q != n/s.elemsize { 1142 println(n, "/", s.elemsize, "should be", n/s.elemsize, "but got", q) 1143 throw("bad magic division") 1144 } 1145 return q 1146} 1147 1148// nosplit, because it is called by other nosplit code like findObject 1149// 1150//go:nosplit 1151func (s *mspan) objIndex(p uintptr) uintptr { 1152 return s.divideByElemSize(p - s.base()) 1153} 1154 1155func markBitsForAddr(p uintptr) markBits { 1156 s := spanOf(p) 1157 objIndex := s.objIndex(p) 1158 return s.markBitsForIndex(objIndex) 1159} 1160 1161func (s *mspan) markBitsForIndex(objIndex uintptr) markBits { 1162 bytep, mask := s.gcmarkBits.bitp(objIndex) 1163 return markBits{bytep, mask, objIndex} 1164} 1165 1166func (s *mspan) markBitsForBase() markBits { 1167 return markBits{&s.gcmarkBits.x, uint8(1), 0} 1168} 1169 1170// isMarked reports whether mark bit m is set. 1171func (m markBits) isMarked() bool { 1172 return *m.bytep&m.mask != 0 1173} 1174 1175// setMarked sets the marked bit in the markbits, atomically. 1176func (m markBits) setMarked() { 1177 // Might be racing with other updates, so use atomic update always. 1178 // We used to be clever here and use a non-atomic update in certain 1179 // cases, but it's not worth the risk. 1180 atomic.Or8(m.bytep, m.mask) 1181} 1182 1183// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically. 1184func (m markBits) setMarkedNonAtomic() { 1185 *m.bytep |= m.mask 1186} 1187 1188// clearMarked clears the marked bit in the markbits, atomically. 1189func (m markBits) clearMarked() { 1190 // Might be racing with other updates, so use atomic update always. 1191 // We used to be clever here and use a non-atomic update in certain 1192 // cases, but it's not worth the risk. 1193 atomic.And8(m.bytep, ^m.mask) 1194} 1195 1196// markBitsForSpan returns the markBits for the span base address base. 1197func markBitsForSpan(base uintptr) (mbits markBits) { 1198 mbits = markBitsForAddr(base) 1199 if mbits.mask != 1 { 1200 throw("markBitsForSpan: unaligned start") 1201 } 1202 return mbits 1203} 1204 1205// advance advances the markBits to the next object in the span. 1206func (m *markBits) advance() { 1207 if m.mask == 1<<7 { 1208 m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1)) 1209 m.mask = 1 1210 } else { 1211 m.mask = m.mask << 1 1212 } 1213 m.index++ 1214} 1215 1216// clobberdeadPtr is a special value that is used by the compiler to 1217// clobber dead stack slots, when -clobberdead flag is set. 1218const clobberdeadPtr = uintptr(0xdeaddead | 0xdeaddead<<((^uintptr(0)>>63)*32)) 1219 1220// badPointer throws bad pointer in heap panic. 1221func badPointer(s *mspan, p, refBase, refOff uintptr) { 1222 // Typically this indicates an incorrect use 1223 // of unsafe or cgo to store a bad pointer in 1224 // the Go heap. It may also indicate a runtime 1225 // bug. 1226 // 1227 // TODO(austin): We could be more aggressive 1228 // and detect pointers to unallocated objects 1229 // in allocated spans. 1230 printlock() 1231 print("runtime: pointer ", hex(p)) 1232 if s != nil { 1233 state := s.state.get() 1234 if state != mSpanInUse { 1235 print(" to unallocated span") 1236 } else { 1237 print(" to unused region of span") 1238 } 1239 print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", state) 1240 } 1241 print("\n") 1242 if refBase != 0 { 1243 print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n") 1244 gcDumpObject("object", refBase, refOff) 1245 } 1246 getg().m.traceback = 2 1247 throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)") 1248} 1249 1250// findObject returns the base address for the heap object containing 1251// the address p, the object's span, and the index of the object in s. 1252// If p does not point into a heap object, it returns base == 0. 1253// 1254// If p points is an invalid heap pointer and debug.invalidptr != 0, 1255// findObject panics. 1256// 1257// refBase and refOff optionally give the base address of the object 1258// in which the pointer p was found and the byte offset at which it 1259// was found. These are used for error reporting. 1260// 1261// It is nosplit so it is safe for p to be a pointer to the current goroutine's stack. 1262// Since p is a uintptr, it would not be adjusted if the stack were to move. 1263// 1264// findObject should be an internal detail, 1265// but widely used packages access it using linkname. 1266// Notable members of the hall of shame include: 1267// - github.com/bytedance/sonic 1268// 1269// Do not remove or change the type signature. 1270// See go.dev/issue/67401. 1271// 1272//go:linkname findObject 1273//go:nosplit 1274func findObject(p, refBase, refOff uintptr) (base uintptr, s *mspan, objIndex uintptr) { 1275 s = spanOf(p) 1276 // If s is nil, the virtual address has never been part of the heap. 1277 // This pointer may be to some mmap'd region, so we allow it. 1278 if s == nil { 1279 if (GOARCH == "amd64" || GOARCH == "arm64") && p == clobberdeadPtr && debug.invalidptr != 0 { 1280 // Crash if clobberdeadPtr is seen. Only on AMD64 and ARM64 for now, 1281 // as they are the only platform where compiler's clobberdead mode is 1282 // implemented. On these platforms clobberdeadPtr cannot be a valid address. 1283 badPointer(s, p, refBase, refOff) 1284 } 1285 return 1286 } 1287 // If p is a bad pointer, it may not be in s's bounds. 1288 // 1289 // Check s.state to synchronize with span initialization 1290 // before checking other fields. See also spanOfHeap. 1291 if state := s.state.get(); state != mSpanInUse || p < s.base() || p >= s.limit { 1292 // Pointers into stacks are also ok, the runtime manages these explicitly. 1293 if state == mSpanManual { 1294 return 1295 } 1296 // The following ensures that we are rigorous about what data 1297 // structures hold valid pointers. 1298 if debug.invalidptr != 0 { 1299 badPointer(s, p, refBase, refOff) 1300 } 1301 return 1302 } 1303 1304 objIndex = s.objIndex(p) 1305 base = s.base() + objIndex*s.elemsize 1306 return 1307} 1308 1309// reflect_verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok. 1310// 1311//go:linkname reflect_verifyNotInHeapPtr reflect.verifyNotInHeapPtr 1312func reflect_verifyNotInHeapPtr(p uintptr) bool { 1313 // Conversion to a pointer is ok as long as findObject above does not call badPointer. 1314 // Since we're already promised that p doesn't point into the heap, just disallow heap 1315 // pointers and the special clobbered pointer. 1316 return spanOf(p) == nil && p != clobberdeadPtr 1317} 1318 1319const ptrBits = 8 * goarch.PtrSize 1320 1321// bulkBarrierBitmap executes write barriers for copying from [src, 1322// src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is 1323// assumed to start maskOffset bytes into the data covered by the 1324// bitmap in bits (which may not be a multiple of 8). 1325// 1326// This is used by bulkBarrierPreWrite for writes to data and BSS. 1327// 1328//go:nosplit 1329func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) { 1330 word := maskOffset / goarch.PtrSize 1331 bits = addb(bits, word/8) 1332 mask := uint8(1) << (word % 8) 1333 1334 buf := &getg().m.p.ptr().wbBuf 1335 for i := uintptr(0); i < size; i += goarch.PtrSize { 1336 if mask == 0 { 1337 bits = addb(bits, 1) 1338 if *bits == 0 { 1339 // Skip 8 words. 1340 i += 7 * goarch.PtrSize 1341 continue 1342 } 1343 mask = 1 1344 } 1345 if *bits&mask != 0 { 1346 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 1347 if src == 0 { 1348 p := buf.get1() 1349 p[0] = *dstx 1350 } else { 1351 srcx := (*uintptr)(unsafe.Pointer(src + i)) 1352 p := buf.get2() 1353 p[0] = *dstx 1354 p[1] = *srcx 1355 } 1356 } 1357 mask <<= 1 1358 } 1359} 1360 1361// typeBitsBulkBarrier executes a write barrier for every 1362// pointer that would be copied from [src, src+size) to [dst, 1363// dst+size) by a memmove using the type bitmap to locate those 1364// pointer slots. 1365// 1366// The type typ must correspond exactly to [src, src+size) and [dst, dst+size). 1367// dst, src, and size must be pointer-aligned. 1368// The type typ must have a plain bitmap, not a GC program. 1369// The only use of this function is in channel sends, and the 1370// 64 kB channel element limit takes care of this for us. 1371// 1372// Must not be preempted because it typically runs right before memmove, 1373// and the GC must observe them as an atomic action. 1374// 1375// Callers must perform cgo checks if goexperiment.CgoCheck2. 1376// 1377//go:nosplit 1378func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) { 1379 if typ == nil { 1380 throw("runtime: typeBitsBulkBarrier without type") 1381 } 1382 if typ.Size_ != size { 1383 println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " of size ", typ.Size_, " but memory size", size) 1384 throw("runtime: invalid typeBitsBulkBarrier") 1385 } 1386 if typ.Kind_&abi.KindGCProg != 0 { 1387 println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " with GC prog") 1388 throw("runtime: invalid typeBitsBulkBarrier") 1389 } 1390 if !writeBarrier.enabled { 1391 return 1392 } 1393 ptrmask := typ.GCData 1394 buf := &getg().m.p.ptr().wbBuf 1395 var bits uint32 1396 for i := uintptr(0); i < typ.PtrBytes; i += goarch.PtrSize { 1397 if i&(goarch.PtrSize*8-1) == 0 { 1398 bits = uint32(*ptrmask) 1399 ptrmask = addb(ptrmask, 1) 1400 } else { 1401 bits = bits >> 1 1402 } 1403 if bits&1 != 0 { 1404 dstx := (*uintptr)(unsafe.Pointer(dst + i)) 1405 srcx := (*uintptr)(unsafe.Pointer(src + i)) 1406 p := buf.get2() 1407 p[0] = *dstx 1408 p[1] = *srcx 1409 } 1410 } 1411} 1412 1413// countAlloc returns the number of objects allocated in span s by 1414// scanning the mark bitmap. 1415func (s *mspan) countAlloc() int { 1416 count := 0 1417 bytes := divRoundUp(uintptr(s.nelems), 8) 1418 // Iterate over each 8-byte chunk and count allocations 1419 // with an intrinsic. Note that newMarkBits guarantees that 1420 // gcmarkBits will be 8-byte aligned, so we don't have to 1421 // worry about edge cases, irrelevant bits will simply be zero. 1422 for i := uintptr(0); i < bytes; i += 8 { 1423 // Extract 64 bits from the byte pointer and get a OnesCount. 1424 // Note that the unsafe cast here doesn't preserve endianness, 1425 // but that's OK. We only care about how many bits are 1, not 1426 // about the order we discover them in. 1427 mrkBits := *(*uint64)(unsafe.Pointer(s.gcmarkBits.bytep(i))) 1428 count += sys.OnesCount64(mrkBits) 1429 } 1430 return count 1431} 1432 1433// Read the bytes starting at the aligned pointer p into a uintptr. 1434// Read is little-endian. 1435func readUintptr(p *byte) uintptr { 1436 x := *(*uintptr)(unsafe.Pointer(p)) 1437 if goarch.BigEndian { 1438 if goarch.PtrSize == 8 { 1439 return uintptr(sys.Bswap64(uint64(x))) 1440 } 1441 return uintptr(sys.Bswap32(uint32(x))) 1442 } 1443 return x 1444} 1445 1446var debugPtrmask struct { 1447 lock mutex 1448 data *byte 1449} 1450 1451// progToPointerMask returns the 1-bit pointer mask output by the GC program prog. 1452// size the size of the region described by prog, in bytes. 1453// The resulting bitvector will have no more than size/goarch.PtrSize bits. 1454func progToPointerMask(prog *byte, size uintptr) bitvector { 1455 n := (size/goarch.PtrSize + 7) / 8 1456 x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1] 1457 x[len(x)-1] = 0xa1 // overflow check sentinel 1458 n = runGCProg(prog, &x[0]) 1459 if x[len(x)-1] != 0xa1 { 1460 throw("progToPointerMask: overflow") 1461 } 1462 return bitvector{int32(n), &x[0]} 1463} 1464 1465// Packed GC pointer bitmaps, aka GC programs. 1466// 1467// For large types containing arrays, the type information has a 1468// natural repetition that can be encoded to save space in the 1469// binary and in the memory representation of the type information. 1470// 1471// The encoding is a simple Lempel-Ziv style bytecode machine 1472// with the following instructions: 1473// 1474// 00000000: stop 1475// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes 1476// 10000000 n c: repeat the previous n bits c times; n, c are varints 1477// 1nnnnnnn c: repeat the previous n bits c times; c is a varint 1478 1479// runGCProg returns the number of 1-bit entries written to memory. 1480func runGCProg(prog, dst *byte) uintptr { 1481 dstStart := dst 1482 1483 // Bits waiting to be written to memory. 1484 var bits uintptr 1485 var nbits uintptr 1486 1487 p := prog 1488Run: 1489 for { 1490 // Flush accumulated full bytes. 1491 // The rest of the loop assumes that nbits <= 7. 1492 for ; nbits >= 8; nbits -= 8 { 1493 *dst = uint8(bits) 1494 dst = add1(dst) 1495 bits >>= 8 1496 } 1497 1498 // Process one instruction. 1499 inst := uintptr(*p) 1500 p = add1(p) 1501 n := inst & 0x7F 1502 if inst&0x80 == 0 { 1503 // Literal bits; n == 0 means end of program. 1504 if n == 0 { 1505 // Program is over. 1506 break Run 1507 } 1508 nbyte := n / 8 1509 for i := uintptr(0); i < nbyte; i++ { 1510 bits |= uintptr(*p) << nbits 1511 p = add1(p) 1512 *dst = uint8(bits) 1513 dst = add1(dst) 1514 bits >>= 8 1515 } 1516 if n %= 8; n > 0 { 1517 bits |= uintptr(*p) << nbits 1518 p = add1(p) 1519 nbits += n 1520 } 1521 continue Run 1522 } 1523 1524 // Repeat. If n == 0, it is encoded in a varint in the next bytes. 1525 if n == 0 { 1526 for off := uint(0); ; off += 7 { 1527 x := uintptr(*p) 1528 p = add1(p) 1529 n |= (x & 0x7F) << off 1530 if x&0x80 == 0 { 1531 break 1532 } 1533 } 1534 } 1535 1536 // Count is encoded in a varint in the next bytes. 1537 c := uintptr(0) 1538 for off := uint(0); ; off += 7 { 1539 x := uintptr(*p) 1540 p = add1(p) 1541 c |= (x & 0x7F) << off 1542 if x&0x80 == 0 { 1543 break 1544 } 1545 } 1546 c *= n // now total number of bits to copy 1547 1548 // If the number of bits being repeated is small, load them 1549 // into a register and use that register for the entire loop 1550 // instead of repeatedly reading from memory. 1551 // Handling fewer than 8 bits here makes the general loop simpler. 1552 // The cutoff is goarch.PtrSize*8 - 7 to guarantee that when we add 1553 // the pattern to a bit buffer holding at most 7 bits (a partial byte) 1554 // it will not overflow. 1555 src := dst 1556 const maxBits = goarch.PtrSize*8 - 7 1557 if n <= maxBits { 1558 // Start with bits in output buffer. 1559 pattern := bits 1560 npattern := nbits 1561 1562 // If we need more bits, fetch them from memory. 1563 src = subtract1(src) 1564 for npattern < n { 1565 pattern <<= 8 1566 pattern |= uintptr(*src) 1567 src = subtract1(src) 1568 npattern += 8 1569 } 1570 1571 // We started with the whole bit output buffer, 1572 // and then we loaded bits from whole bytes. 1573 // Either way, we might now have too many instead of too few. 1574 // Discard the extra. 1575 if npattern > n { 1576 pattern >>= npattern - n 1577 npattern = n 1578 } 1579 1580 // Replicate pattern to at most maxBits. 1581 if npattern == 1 { 1582 // One bit being repeated. 1583 // If the bit is 1, make the pattern all 1s. 1584 // If the bit is 0, the pattern is already all 0s, 1585 // but we can claim that the number of bits 1586 // in the word is equal to the number we need (c), 1587 // because right shift of bits will zero fill. 1588 if pattern == 1 { 1589 pattern = 1<<maxBits - 1 1590 npattern = maxBits 1591 } else { 1592 npattern = c 1593 } 1594 } else { 1595 b := pattern 1596 nb := npattern 1597 if nb+nb <= maxBits { 1598 // Double pattern until the whole uintptr is filled. 1599 for nb <= goarch.PtrSize*8 { 1600 b |= b << nb 1601 nb += nb 1602 } 1603 // Trim away incomplete copy of original pattern in high bits. 1604 // TODO(rsc): Replace with table lookup or loop on systems without divide? 1605 nb = maxBits / npattern * npattern 1606 b &= 1<<nb - 1 1607 pattern = b 1608 npattern = nb 1609 } 1610 } 1611 1612 // Add pattern to bit buffer and flush bit buffer, c/npattern times. 1613 // Since pattern contains >8 bits, there will be full bytes to flush 1614 // on each iteration. 1615 for ; c >= npattern; c -= npattern { 1616 bits |= pattern << nbits 1617 nbits += npattern 1618 for nbits >= 8 { 1619 *dst = uint8(bits) 1620 dst = add1(dst) 1621 bits >>= 8 1622 nbits -= 8 1623 } 1624 } 1625 1626 // Add final fragment to bit buffer. 1627 if c > 0 { 1628 pattern &= 1<<c - 1 1629 bits |= pattern << nbits 1630 nbits += c 1631 } 1632 continue Run 1633 } 1634 1635 // Repeat; n too large to fit in a register. 1636 // Since nbits <= 7, we know the first few bytes of repeated data 1637 // are already written to memory. 1638 off := n - nbits // n > nbits because n > maxBits and nbits <= 7 1639 // Leading src fragment. 1640 src = subtractb(src, (off+7)/8) 1641 if frag := off & 7; frag != 0 { 1642 bits |= uintptr(*src) >> (8 - frag) << nbits 1643 src = add1(src) 1644 nbits += frag 1645 c -= frag 1646 } 1647 // Main loop: load one byte, write another. 1648 // The bits are rotating through the bit buffer. 1649 for i := c / 8; i > 0; i-- { 1650 bits |= uintptr(*src) << nbits 1651 src = add1(src) 1652 *dst = uint8(bits) 1653 dst = add1(dst) 1654 bits >>= 8 1655 } 1656 // Final src fragment. 1657 if c %= 8; c > 0 { 1658 bits |= (uintptr(*src) & (1<<c - 1)) << nbits 1659 nbits += c 1660 } 1661 } 1662 1663 // Write any final bits out, using full-byte writes, even for the final byte. 1664 totalBits := (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits 1665 nbits += -nbits & 7 1666 for ; nbits > 0; nbits -= 8 { 1667 *dst = uint8(bits) 1668 dst = add1(dst) 1669 bits >>= 8 1670 } 1671 return totalBits 1672} 1673 1674// materializeGCProg allocates space for the (1-bit) pointer bitmask 1675// for an object of size ptrdata. Then it fills that space with the 1676// pointer bitmask specified by the program prog. 1677// The bitmask starts at s.startAddr. 1678// The result must be deallocated with dematerializeGCProg. 1679func materializeGCProg(ptrdata uintptr, prog *byte) *mspan { 1680 // Each word of ptrdata needs one bit in the bitmap. 1681 bitmapBytes := divRoundUp(ptrdata, 8*goarch.PtrSize) 1682 // Compute the number of pages needed for bitmapBytes. 1683 pages := divRoundUp(bitmapBytes, pageSize) 1684 s := mheap_.allocManual(pages, spanAllocPtrScalarBits) 1685 runGCProg(addb(prog, 4), (*byte)(unsafe.Pointer(s.startAddr))) 1686 return s 1687} 1688func dematerializeGCProg(s *mspan) { 1689 mheap_.freeManual(s, spanAllocPtrScalarBits) 1690} 1691 1692func dumpGCProg(p *byte) { 1693 nptr := 0 1694 for { 1695 x := *p 1696 p = add1(p) 1697 if x == 0 { 1698 print("\t", nptr, " end\n") 1699 break 1700 } 1701 if x&0x80 == 0 { 1702 print("\t", nptr, " lit ", x, ":") 1703 n := int(x+7) / 8 1704 for i := 0; i < n; i++ { 1705 print(" ", hex(*p)) 1706 p = add1(p) 1707 } 1708 print("\n") 1709 nptr += int(x) 1710 } else { 1711 nbit := int(x &^ 0x80) 1712 if nbit == 0 { 1713 for nb := uint(0); ; nb += 7 { 1714 x := *p 1715 p = add1(p) 1716 nbit |= int(x&0x7f) << nb 1717 if x&0x80 == 0 { 1718 break 1719 } 1720 } 1721 } 1722 count := 0 1723 for nb := uint(0); ; nb += 7 { 1724 x := *p 1725 p = add1(p) 1726 count |= int(x&0x7f) << nb 1727 if x&0x80 == 0 { 1728 break 1729 } 1730 } 1731 print("\t", nptr, " repeat ", nbit, " × ", count, "\n") 1732 nptr += nbit * count 1733 } 1734 } 1735} 1736 1737// Testing. 1738 1739// reflect_gcbits returns the GC type info for x, for testing. 1740// The result is the bitmap entries (0 or 1), one entry per byte. 1741// 1742//go:linkname reflect_gcbits reflect.gcbits 1743func reflect_gcbits(x any) []byte { 1744 return getgcmask(x) 1745} 1746 1747// Returns GC type info for the pointer stored in ep for testing. 1748// If ep points to the stack, only static live information will be returned 1749// (i.e. not for objects which are only dynamically live stack objects). 1750func getgcmask(ep any) (mask []byte) { 1751 e := *efaceOf(&ep) 1752 p := e.data 1753 t := e._type 1754 1755 var et *_type 1756 if t.Kind_&abi.KindMask != abi.Pointer { 1757 throw("bad argument to getgcmask: expected type to be a pointer to the value type whose mask is being queried") 1758 } 1759 et = (*ptrtype)(unsafe.Pointer(t)).Elem 1760 1761 // data or bss 1762 for _, datap := range activeModules() { 1763 // data 1764 if datap.data <= uintptr(p) && uintptr(p) < datap.edata { 1765 bitmap := datap.gcdatamask.bytedata 1766 n := et.Size_ 1767 mask = make([]byte, n/goarch.PtrSize) 1768 for i := uintptr(0); i < n; i += goarch.PtrSize { 1769 off := (uintptr(p) + i - datap.data) / goarch.PtrSize 1770 mask[i/goarch.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1771 } 1772 return 1773 } 1774 1775 // bss 1776 if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss { 1777 bitmap := datap.gcbssmask.bytedata 1778 n := et.Size_ 1779 mask = make([]byte, n/goarch.PtrSize) 1780 for i := uintptr(0); i < n; i += goarch.PtrSize { 1781 off := (uintptr(p) + i - datap.bss) / goarch.PtrSize 1782 mask[i/goarch.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1 1783 } 1784 return 1785 } 1786 } 1787 1788 // heap 1789 if base, s, _ := findObject(uintptr(p), 0, 0); base != 0 { 1790 if s.spanclass.noscan() { 1791 return nil 1792 } 1793 limit := base + s.elemsize 1794 1795 // Move the base up to the iterator's start, because 1796 // we want to hide evidence of a malloc header from the 1797 // caller. 1798 tp := s.typePointersOfUnchecked(base) 1799 base = tp.addr 1800 1801 // Unroll the full bitmap the GC would actually observe. 1802 maskFromHeap := make([]byte, (limit-base)/goarch.PtrSize) 1803 for { 1804 var addr uintptr 1805 if tp, addr = tp.next(limit); addr == 0 { 1806 break 1807 } 1808 maskFromHeap[(addr-base)/goarch.PtrSize] = 1 1809 } 1810 1811 // Double-check that every part of the ptr/scalar we're not 1812 // showing the caller is zeroed. This keeps us honest that 1813 // that information is actually irrelevant. 1814 for i := limit; i < s.elemsize; i++ { 1815 if *(*byte)(unsafe.Pointer(i)) != 0 { 1816 throw("found non-zeroed tail of allocation") 1817 } 1818 } 1819 1820 // Callers (and a check we're about to run) expects this mask 1821 // to end at the last pointer. 1822 for len(maskFromHeap) > 0 && maskFromHeap[len(maskFromHeap)-1] == 0 { 1823 maskFromHeap = maskFromHeap[:len(maskFromHeap)-1] 1824 } 1825 1826 if et.Kind_&abi.KindGCProg == 0 { 1827 // Unroll again, but this time from the type information. 1828 maskFromType := make([]byte, (limit-base)/goarch.PtrSize) 1829 tp = s.typePointersOfType(et, base) 1830 for { 1831 var addr uintptr 1832 if tp, addr = tp.next(limit); addr == 0 { 1833 break 1834 } 1835 maskFromType[(addr-base)/goarch.PtrSize] = 1 1836 } 1837 1838 // Validate that the prefix of maskFromType is equal to 1839 // maskFromHeap. maskFromType may contain more pointers than 1840 // maskFromHeap produces because maskFromHeap may be able to 1841 // get exact type information for certain classes of objects. 1842 // With maskFromType, we're always just tiling the type bitmap 1843 // through to the elemsize. 1844 // 1845 // It's OK if maskFromType has pointers in elemsize that extend 1846 // past the actual populated space; we checked above that all 1847 // that space is zeroed, so just the GC will just see nil pointers. 1848 differs := false 1849 for i := range maskFromHeap { 1850 if maskFromHeap[i] != maskFromType[i] { 1851 differs = true 1852 break 1853 } 1854 } 1855 1856 if differs { 1857 print("runtime: heap mask=") 1858 for _, b := range maskFromHeap { 1859 print(b) 1860 } 1861 println() 1862 print("runtime: type mask=") 1863 for _, b := range maskFromType { 1864 print(b) 1865 } 1866 println() 1867 print("runtime: type=", toRType(et).string(), "\n") 1868 throw("found two different masks from two different methods") 1869 } 1870 } 1871 1872 // Select the heap mask to return. We may not have a type mask. 1873 mask = maskFromHeap 1874 1875 // Make sure we keep ep alive. We may have stopped referencing 1876 // ep's data pointer sometime before this point and it's possible 1877 // for that memory to get freed. 1878 KeepAlive(ep) 1879 return 1880 } 1881 1882 // stack 1883 if gp := getg(); gp.m.curg.stack.lo <= uintptr(p) && uintptr(p) < gp.m.curg.stack.hi { 1884 found := false 1885 var u unwinder 1886 for u.initAt(gp.m.curg.sched.pc, gp.m.curg.sched.sp, 0, gp.m.curg, 0); u.valid(); u.next() { 1887 if u.frame.sp <= uintptr(p) && uintptr(p) < u.frame.varp { 1888 found = true 1889 break 1890 } 1891 } 1892 if found { 1893 locals, _, _ := u.frame.getStackMap(false) 1894 if locals.n == 0 { 1895 return 1896 } 1897 size := uintptr(locals.n) * goarch.PtrSize 1898 n := (*ptrtype)(unsafe.Pointer(t)).Elem.Size_ 1899 mask = make([]byte, n/goarch.PtrSize) 1900 for i := uintptr(0); i < n; i += goarch.PtrSize { 1901 off := (uintptr(p) + i - u.frame.varp + size) / goarch.PtrSize 1902 mask[i/goarch.PtrSize] = locals.ptrbit(off) 1903 } 1904 } 1905 return 1906 } 1907 1908 // otherwise, not something the GC knows about. 1909 // possibly read-only data, like malloc(0). 1910 // must not have pointers 1911 return 1912} 1913