1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package runtime_test 6 7import ( 8 "crypto/rand" 9 "encoding/binary" 10 "fmt" 11 "internal/race" 12 "internal/testenv" 13 . "runtime" 14 "sync/atomic" 15 "testing" 16 "unsafe" 17) 18 19func TestMemmove(t *testing.T) { 20 if *flagQuick { 21 t.Skip("-quick") 22 } 23 t.Parallel() 24 size := 256 25 if testing.Short() { 26 size = 128 + 16 27 } 28 src := make([]byte, size) 29 dst := make([]byte, size) 30 for i := 0; i < size; i++ { 31 src[i] = byte(128 + (i & 127)) 32 } 33 for i := 0; i < size; i++ { 34 dst[i] = byte(i & 127) 35 } 36 for n := 0; n <= size; n++ { 37 for x := 0; x <= size-n; x++ { // offset in src 38 for y := 0; y <= size-n; y++ { // offset in dst 39 copy(dst[y:y+n], src[x:x+n]) 40 for i := 0; i < y; i++ { 41 if dst[i] != byte(i&127) { 42 t.Fatalf("prefix dst[%d] = %d", i, dst[i]) 43 } 44 } 45 for i := y; i < y+n; i++ { 46 if dst[i] != byte(128+((i-y+x)&127)) { 47 t.Fatalf("copied dst[%d] = %d", i, dst[i]) 48 } 49 dst[i] = byte(i & 127) // reset dst 50 } 51 for i := y + n; i < size; i++ { 52 if dst[i] != byte(i&127) { 53 t.Fatalf("suffix dst[%d] = %d", i, dst[i]) 54 } 55 } 56 } 57 } 58 } 59} 60 61func TestMemmoveAlias(t *testing.T) { 62 if *flagQuick { 63 t.Skip("-quick") 64 } 65 t.Parallel() 66 size := 256 67 if testing.Short() { 68 size = 128 + 16 69 } 70 buf := make([]byte, size) 71 for i := 0; i < size; i++ { 72 buf[i] = byte(i) 73 } 74 for n := 0; n <= size; n++ { 75 for x := 0; x <= size-n; x++ { // src offset 76 for y := 0; y <= size-n; y++ { // dst offset 77 copy(buf[y:y+n], buf[x:x+n]) 78 for i := 0; i < y; i++ { 79 if buf[i] != byte(i) { 80 t.Fatalf("prefix buf[%d] = %d", i, buf[i]) 81 } 82 } 83 for i := y; i < y+n; i++ { 84 if buf[i] != byte(i-y+x) { 85 t.Fatalf("copied buf[%d] = %d", i, buf[i]) 86 } 87 buf[i] = byte(i) // reset buf 88 } 89 for i := y + n; i < size; i++ { 90 if buf[i] != byte(i) { 91 t.Fatalf("suffix buf[%d] = %d", i, buf[i]) 92 } 93 } 94 } 95 } 96 } 97} 98 99func TestMemmoveLarge0x180000(t *testing.T) { 100 if testing.Short() && testenv.Builder() == "" { 101 t.Skip("-short") 102 } 103 104 t.Parallel() 105 if race.Enabled { 106 t.Skip("skipping large memmove test under race detector") 107 } 108 testSize(t, 0x180000) 109} 110 111func TestMemmoveOverlapLarge0x120000(t *testing.T) { 112 if testing.Short() && testenv.Builder() == "" { 113 t.Skip("-short") 114 } 115 116 t.Parallel() 117 if race.Enabled { 118 t.Skip("skipping large memmove test under race detector") 119 } 120 testOverlap(t, 0x120000) 121} 122 123func testSize(t *testing.T, size int) { 124 src := make([]byte, size) 125 dst := make([]byte, size) 126 _, _ = rand.Read(src) 127 _, _ = rand.Read(dst) 128 129 ref := make([]byte, size) 130 copyref(ref, dst) 131 132 for n := size - 50; n > 1; n >>= 1 { 133 for x := 0; x <= size-n; x = x*7 + 1 { // offset in src 134 for y := 0; y <= size-n; y = y*9 + 1 { // offset in dst 135 copy(dst[y:y+n], src[x:x+n]) 136 copyref(ref[y:y+n], src[x:x+n]) 137 p := cmpb(dst, ref) 138 if p >= 0 { 139 t.Fatalf("Copy failed, copying from src[%d:%d] to dst[%d:%d].\nOffset %d is different, %v != %v", x, x+n, y, y+n, p, dst[p], ref[p]) 140 } 141 } 142 } 143 } 144} 145 146func testOverlap(t *testing.T, size int) { 147 src := make([]byte, size) 148 test := make([]byte, size) 149 ref := make([]byte, size) 150 _, _ = rand.Read(src) 151 152 for n := size - 50; n > 1; n >>= 1 { 153 for x := 0; x <= size-n; x = x*7 + 1 { // offset in src 154 for y := 0; y <= size-n; y = y*9 + 1 { // offset in dst 155 // Reset input 156 copyref(test, src) 157 copyref(ref, src) 158 copy(test[y:y+n], test[x:x+n]) 159 if y <= x { 160 copyref(ref[y:y+n], ref[x:x+n]) 161 } else { 162 copybw(ref[y:y+n], ref[x:x+n]) 163 } 164 p := cmpb(test, ref) 165 if p >= 0 { 166 t.Fatalf("Copy failed, copying from src[%d:%d] to dst[%d:%d].\nOffset %d is different, %v != %v", x, x+n, y, y+n, p, test[p], ref[p]) 167 } 168 } 169 } 170 } 171 172} 173 174// Forward copy. 175func copyref(dst, src []byte) { 176 for i, v := range src { 177 dst[i] = v 178 } 179} 180 181// Backwards copy 182func copybw(dst, src []byte) { 183 if len(src) == 0 { 184 return 185 } 186 for i := len(src) - 1; i >= 0; i-- { 187 dst[i] = src[i] 188 } 189} 190 191// Returns offset of difference 192func matchLen(a, b []byte, max int) int { 193 a = a[:max] 194 b = b[:max] 195 for i, av := range a { 196 if b[i] != av { 197 return i 198 } 199 } 200 return max 201} 202 203func cmpb(a, b []byte) int { 204 l := matchLen(a, b, len(a)) 205 if l == len(a) { 206 return -1 207 } 208 return l 209} 210 211// Ensure that memmove writes pointers atomically, so the GC won't 212// observe a partially updated pointer. 213func TestMemmoveAtomicity(t *testing.T) { 214 if race.Enabled { 215 t.Skip("skip under the race detector -- this test is intentionally racy") 216 } 217 218 var x int 219 220 for _, backward := range []bool{true, false} { 221 for _, n := range []int{3, 4, 5, 6, 7, 8, 9, 10, 15, 25, 49} { 222 n := n 223 224 // test copying [N]*int. 225 sz := uintptr(n * PtrSize) 226 name := fmt.Sprint(sz) 227 if backward { 228 name += "-backward" 229 } else { 230 name += "-forward" 231 } 232 t.Run(name, func(t *testing.T) { 233 // Use overlapping src and dst to force forward/backward copy. 234 var s [100]*int 235 src := s[n-1 : 2*n-1] 236 dst := s[:n] 237 if backward { 238 src, dst = dst, src 239 } 240 for i := range src { 241 src[i] = &x 242 } 243 clear(dst) 244 245 var ready atomic.Uint32 246 go func() { 247 sp := unsafe.Pointer(&src[0]) 248 dp := unsafe.Pointer(&dst[0]) 249 ready.Store(1) 250 for i := 0; i < 10000; i++ { 251 Memmove(dp, sp, sz) 252 MemclrNoHeapPointers(dp, sz) 253 } 254 ready.Store(2) 255 }() 256 257 for ready.Load() == 0 { 258 Gosched() 259 } 260 261 for ready.Load() != 2 { 262 for i := range dst { 263 p := dst[i] 264 if p != nil && p != &x { 265 t.Fatalf("got partially updated pointer %p at dst[%d], want either nil or %p", p, i, &x) 266 } 267 } 268 } 269 }) 270 } 271 } 272} 273 274func benchmarkSizes(b *testing.B, sizes []int, fn func(b *testing.B, n int)) { 275 for _, n := range sizes { 276 b.Run(fmt.Sprint(n), func(b *testing.B) { 277 b.SetBytes(int64(n)) 278 fn(b, n) 279 }) 280 } 281} 282 283var bufSizes = []int{ 284 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 285 32, 64, 128, 256, 512, 1024, 2048, 4096, 286} 287var bufSizesOverlap = []int{ 288 32, 64, 128, 256, 512, 1024, 2048, 4096, 289} 290 291func BenchmarkMemmove(b *testing.B) { 292 benchmarkSizes(b, bufSizes, func(b *testing.B, n int) { 293 x := make([]byte, n) 294 y := make([]byte, n) 295 for i := 0; i < b.N; i++ { 296 copy(x, y) 297 } 298 }) 299} 300 301func BenchmarkMemmoveOverlap(b *testing.B) { 302 benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) { 303 x := make([]byte, n+16) 304 for i := 0; i < b.N; i++ { 305 copy(x[16:n+16], x[:n]) 306 } 307 }) 308} 309 310func BenchmarkMemmoveUnalignedDst(b *testing.B) { 311 benchmarkSizes(b, bufSizes, func(b *testing.B, n int) { 312 x := make([]byte, n+1) 313 y := make([]byte, n) 314 for i := 0; i < b.N; i++ { 315 copy(x[1:], y) 316 } 317 }) 318} 319 320func BenchmarkMemmoveUnalignedDstOverlap(b *testing.B) { 321 benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) { 322 x := make([]byte, n+16) 323 for i := 0; i < b.N; i++ { 324 copy(x[16:n+16], x[1:n+1]) 325 } 326 }) 327} 328 329func BenchmarkMemmoveUnalignedSrc(b *testing.B) { 330 benchmarkSizes(b, bufSizes, func(b *testing.B, n int) { 331 x := make([]byte, n) 332 y := make([]byte, n+1) 333 for i := 0; i < b.N; i++ { 334 copy(x, y[1:]) 335 } 336 }) 337} 338 339func BenchmarkMemmoveUnalignedSrcDst(b *testing.B) { 340 for _, n := range []int{16, 64, 256, 4096, 65536} { 341 buf := make([]byte, (n+8)*2) 342 x := buf[:len(buf)/2] 343 y := buf[len(buf)/2:] 344 for _, off := range []int{0, 1, 4, 7} { 345 b.Run(fmt.Sprint("f_", n, off), func(b *testing.B) { 346 b.SetBytes(int64(n)) 347 for i := 0; i < b.N; i++ { 348 copy(x[off:n+off], y[off:n+off]) 349 } 350 }) 351 352 b.Run(fmt.Sprint("b_", n, off), func(b *testing.B) { 353 b.SetBytes(int64(n)) 354 for i := 0; i < b.N; i++ { 355 copy(y[off:n+off], x[off:n+off]) 356 } 357 }) 358 } 359 } 360} 361 362func BenchmarkMemmoveUnalignedSrcOverlap(b *testing.B) { 363 benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) { 364 x := make([]byte, n+1) 365 for i := 0; i < b.N; i++ { 366 copy(x[1:n+1], x[:n]) 367 } 368 }) 369} 370 371func TestMemclr(t *testing.T) { 372 size := 512 373 if testing.Short() { 374 size = 128 + 16 375 } 376 mem := make([]byte, size) 377 for i := 0; i < size; i++ { 378 mem[i] = 0xee 379 } 380 for n := 0; n < size; n++ { 381 for x := 0; x <= size-n; x++ { // offset in mem 382 MemclrBytes(mem[x : x+n]) 383 for i := 0; i < x; i++ { 384 if mem[i] != 0xee { 385 t.Fatalf("overwrite prefix mem[%d] = %d", i, mem[i]) 386 } 387 } 388 for i := x; i < x+n; i++ { 389 if mem[i] != 0 { 390 t.Fatalf("failed clear mem[%d] = %d", i, mem[i]) 391 } 392 mem[i] = 0xee 393 } 394 for i := x + n; i < size; i++ { 395 if mem[i] != 0xee { 396 t.Fatalf("overwrite suffix mem[%d] = %d", i, mem[i]) 397 } 398 } 399 } 400 } 401} 402 403func BenchmarkMemclr(b *testing.B) { 404 for _, n := range []int{5, 16, 64, 256, 4096, 65536} { 405 x := make([]byte, n) 406 b.Run(fmt.Sprint(n), func(b *testing.B) { 407 b.SetBytes(int64(n)) 408 for i := 0; i < b.N; i++ { 409 MemclrBytes(x) 410 } 411 }) 412 } 413 for _, m := range []int{1, 4, 8, 16, 64} { 414 x := make([]byte, m<<20) 415 b.Run(fmt.Sprint(m, "M"), func(b *testing.B) { 416 b.SetBytes(int64(m << 20)) 417 for i := 0; i < b.N; i++ { 418 MemclrBytes(x) 419 } 420 }) 421 } 422} 423 424func BenchmarkMemclrUnaligned(b *testing.B) { 425 for _, off := range []int{0, 1, 4, 7} { 426 for _, n := range []int{5, 16, 64, 256, 4096, 65536} { 427 x := make([]byte, n+off) 428 b.Run(fmt.Sprint(off, n), func(b *testing.B) { 429 b.SetBytes(int64(n)) 430 for i := 0; i < b.N; i++ { 431 MemclrBytes(x[off:]) 432 } 433 }) 434 } 435 } 436 437 for _, off := range []int{0, 1, 4, 7} { 438 for _, m := range []int{1, 4, 8, 16, 64} { 439 x := make([]byte, (m<<20)+off) 440 b.Run(fmt.Sprint(off, m, "M"), func(b *testing.B) { 441 b.SetBytes(int64(m << 20)) 442 for i := 0; i < b.N; i++ { 443 MemclrBytes(x[off:]) 444 } 445 }) 446 } 447 } 448} 449 450func BenchmarkGoMemclr(b *testing.B) { 451 benchmarkSizes(b, []int{5, 16, 64, 256}, func(b *testing.B, n int) { 452 x := make([]byte, n) 453 for i := 0; i < b.N; i++ { 454 clear(x) 455 } 456 }) 457} 458 459func BenchmarkMemclrRange(b *testing.B) { 460 type RunData struct { 461 data []int 462 } 463 464 benchSizes := []RunData{ 465 {[]int{1043, 1078, 1894, 1582, 1044, 1165, 1467, 1100, 1919, 1562, 1932, 1645, 466 1412, 1038, 1576, 1200, 1029, 1336, 1095, 1494, 1350, 1025, 1502, 1548, 1316, 1296, 467 1868, 1639, 1546, 1626, 1642, 1308, 1726, 1665, 1678, 1187, 1515, 1598, 1353, 1237, 468 1977, 1452, 2012, 1914, 1514, 1136, 1975, 1618, 1536, 1695, 1600, 1733, 1392, 1099, 469 1358, 1996, 1224, 1783, 1197, 1838, 1460, 1556, 1554, 2020}}, // 1kb-2kb 470 {[]int{3964, 5139, 6573, 7775, 6553, 2413, 3466, 5394, 2469, 7336, 7091, 6745, 471 4028, 5643, 6164, 3475, 4138, 6908, 7559, 3335, 5660, 4122, 3945, 2082, 7564, 6584, 472 5111, 2288, 6789, 2797, 4928, 7986, 5163, 5447, 2999, 4968, 3174, 3202, 7908, 8137, 473 4735, 6161, 4646, 7592, 3083, 5329, 3687, 2754, 3599, 7231, 6455, 2549, 8063, 2189, 474 7121, 5048, 4277, 6626, 6306, 2815, 7473, 3963, 7549, 7255}}, // 2kb-8kb 475 {[]int{16304, 15936, 15760, 4736, 9136, 11184, 10160, 5952, 14560, 15744, 476 6624, 5872, 13088, 14656, 14192, 10304, 4112, 10384, 9344, 4496, 11392, 7024, 477 5200, 10064, 14784, 5808, 13504, 10480, 8512, 4896, 13264, 5600}}, // 4kb-16kb 478 {[]int{164576, 233136, 220224, 183280, 214112, 217248, 228560, 201728}}, // 128kb-256kb 479 } 480 481 for _, t := range benchSizes { 482 total := 0 483 minLen := 0 484 maxLen := 0 485 486 for _, clrLen := range t.data { 487 maxLen = max(maxLen, clrLen) 488 if clrLen < minLen || minLen == 0 { 489 minLen = clrLen 490 } 491 total += clrLen 492 } 493 buffer := make([]byte, maxLen) 494 495 text := "" 496 if minLen >= (1 << 20) { 497 text = fmt.Sprint(minLen>>20, "M ", (maxLen+(1<<20-1))>>20, "M") 498 } else if minLen >= (1 << 10) { 499 text = fmt.Sprint(minLen>>10, "K ", (maxLen+(1<<10-1))>>10, "K") 500 } else { 501 text = fmt.Sprint(minLen, " ", maxLen) 502 } 503 b.Run(text, func(b *testing.B) { 504 b.SetBytes(int64(total)) 505 for i := 0; i < b.N; i++ { 506 for _, clrLen := range t.data { 507 MemclrBytes(buffer[:clrLen]) 508 } 509 } 510 }) 511 } 512} 513 514func BenchmarkClearFat7(b *testing.B) { 515 p := new([7]byte) 516 Escape(p) 517 b.ResetTimer() 518 for i := 0; i < b.N; i++ { 519 *p = [7]byte{} 520 } 521} 522 523func BenchmarkClearFat8(b *testing.B) { 524 p := new([8 / 4]uint32) 525 Escape(p) 526 b.ResetTimer() 527 for i := 0; i < b.N; i++ { 528 *p = [8 / 4]uint32{} 529 } 530} 531 532func BenchmarkClearFat11(b *testing.B) { 533 p := new([11]byte) 534 Escape(p) 535 b.ResetTimer() 536 for i := 0; i < b.N; i++ { 537 *p = [11]byte{} 538 } 539} 540 541func BenchmarkClearFat12(b *testing.B) { 542 p := new([12 / 4]uint32) 543 Escape(p) 544 b.ResetTimer() 545 for i := 0; i < b.N; i++ { 546 *p = [12 / 4]uint32{} 547 } 548} 549 550func BenchmarkClearFat13(b *testing.B) { 551 p := new([13]byte) 552 Escape(p) 553 b.ResetTimer() 554 for i := 0; i < b.N; i++ { 555 *p = [13]byte{} 556 } 557} 558 559func BenchmarkClearFat14(b *testing.B) { 560 p := new([14]byte) 561 Escape(p) 562 b.ResetTimer() 563 for i := 0; i < b.N; i++ { 564 *p = [14]byte{} 565 } 566} 567 568func BenchmarkClearFat15(b *testing.B) { 569 p := new([15]byte) 570 Escape(p) 571 b.ResetTimer() 572 for i := 0; i < b.N; i++ { 573 *p = [15]byte{} 574 } 575} 576 577func BenchmarkClearFat16(b *testing.B) { 578 p := new([16 / 4]uint32) 579 Escape(p) 580 b.ResetTimer() 581 for i := 0; i < b.N; i++ { 582 *p = [16 / 4]uint32{} 583 } 584} 585 586func BenchmarkClearFat24(b *testing.B) { 587 p := new([24 / 4]uint32) 588 Escape(p) 589 b.ResetTimer() 590 for i := 0; i < b.N; i++ { 591 *p = [24 / 4]uint32{} 592 } 593} 594 595func BenchmarkClearFat32(b *testing.B) { 596 p := new([32 / 4]uint32) 597 Escape(p) 598 b.ResetTimer() 599 for i := 0; i < b.N; i++ { 600 *p = [32 / 4]uint32{} 601 } 602} 603 604func BenchmarkClearFat40(b *testing.B) { 605 p := new([40 / 4]uint32) 606 Escape(p) 607 b.ResetTimer() 608 for i := 0; i < b.N; i++ { 609 *p = [40 / 4]uint32{} 610 } 611} 612 613func BenchmarkClearFat48(b *testing.B) { 614 p := new([48 / 4]uint32) 615 Escape(p) 616 b.ResetTimer() 617 for i := 0; i < b.N; i++ { 618 *p = [48 / 4]uint32{} 619 } 620} 621 622func BenchmarkClearFat56(b *testing.B) { 623 p := new([56 / 4]uint32) 624 Escape(p) 625 b.ResetTimer() 626 for i := 0; i < b.N; i++ { 627 *p = [56 / 4]uint32{} 628 } 629} 630 631func BenchmarkClearFat64(b *testing.B) { 632 p := new([64 / 4]uint32) 633 Escape(p) 634 b.ResetTimer() 635 for i := 0; i < b.N; i++ { 636 *p = [64 / 4]uint32{} 637 } 638} 639 640func BenchmarkClearFat72(b *testing.B) { 641 p := new([72 / 4]uint32) 642 Escape(p) 643 b.ResetTimer() 644 for i := 0; i < b.N; i++ { 645 *p = [72 / 4]uint32{} 646 } 647} 648 649func BenchmarkClearFat128(b *testing.B) { 650 p := new([128 / 4]uint32) 651 Escape(p) 652 b.ResetTimer() 653 for i := 0; i < b.N; i++ { 654 *p = [128 / 4]uint32{} 655 } 656} 657 658func BenchmarkClearFat256(b *testing.B) { 659 p := new([256 / 4]uint32) 660 Escape(p) 661 b.ResetTimer() 662 for i := 0; i < b.N; i++ { 663 *p = [256 / 4]uint32{} 664 } 665} 666 667func BenchmarkClearFat512(b *testing.B) { 668 p := new([512 / 4]uint32) 669 Escape(p) 670 b.ResetTimer() 671 for i := 0; i < b.N; i++ { 672 *p = [512 / 4]uint32{} 673 } 674} 675 676func BenchmarkClearFat1024(b *testing.B) { 677 p := new([1024 / 4]uint32) 678 Escape(p) 679 b.ResetTimer() 680 for i := 0; i < b.N; i++ { 681 *p = [1024 / 4]uint32{} 682 } 683} 684 685func BenchmarkClearFat1032(b *testing.B) { 686 p := new([1032 / 4]uint32) 687 Escape(p) 688 b.ResetTimer() 689 for i := 0; i < b.N; i++ { 690 *p = [1032 / 4]uint32{} 691 } 692} 693 694func BenchmarkClearFat1040(b *testing.B) { 695 p := new([1040 / 4]uint32) 696 Escape(p) 697 b.ResetTimer() 698 for i := 0; i < b.N; i++ { 699 *p = [1040 / 4]uint32{} 700 } 701} 702 703func BenchmarkCopyFat7(b *testing.B) { 704 var x [7]byte 705 p := new([7]byte) 706 Escape(p) 707 b.ResetTimer() 708 for i := 0; i < b.N; i++ { 709 *p = x 710 } 711} 712 713func BenchmarkCopyFat8(b *testing.B) { 714 var x [8 / 4]uint32 715 p := new([8 / 4]uint32) 716 Escape(p) 717 b.ResetTimer() 718 for i := 0; i < b.N; i++ { 719 *p = x 720 } 721} 722 723func BenchmarkCopyFat11(b *testing.B) { 724 var x [11]byte 725 p := new([11]byte) 726 Escape(p) 727 b.ResetTimer() 728 for i := 0; i < b.N; i++ { 729 *p = x 730 } 731} 732 733func BenchmarkCopyFat12(b *testing.B) { 734 var x [12 / 4]uint32 735 p := new([12 / 4]uint32) 736 Escape(p) 737 b.ResetTimer() 738 for i := 0; i < b.N; i++ { 739 *p = x 740 } 741} 742 743func BenchmarkCopyFat13(b *testing.B) { 744 var x [13]byte 745 p := new([13]byte) 746 Escape(p) 747 b.ResetTimer() 748 for i := 0; i < b.N; i++ { 749 *p = x 750 } 751} 752 753func BenchmarkCopyFat14(b *testing.B) { 754 var x [14]byte 755 p := new([14]byte) 756 Escape(p) 757 b.ResetTimer() 758 for i := 0; i < b.N; i++ { 759 *p = x 760 } 761} 762 763func BenchmarkCopyFat15(b *testing.B) { 764 var x [15]byte 765 p := new([15]byte) 766 Escape(p) 767 b.ResetTimer() 768 for i := 0; i < b.N; i++ { 769 *p = x 770 } 771} 772 773func BenchmarkCopyFat16(b *testing.B) { 774 var x [16 / 4]uint32 775 p := new([16 / 4]uint32) 776 Escape(p) 777 b.ResetTimer() 778 for i := 0; i < b.N; i++ { 779 *p = x 780 } 781} 782 783func BenchmarkCopyFat24(b *testing.B) { 784 var x [24 / 4]uint32 785 p := new([24 / 4]uint32) 786 Escape(p) 787 b.ResetTimer() 788 for i := 0; i < b.N; i++ { 789 *p = x 790 } 791} 792 793func BenchmarkCopyFat32(b *testing.B) { 794 var x [32 / 4]uint32 795 p := new([32 / 4]uint32) 796 Escape(p) 797 b.ResetTimer() 798 for i := 0; i < b.N; i++ { 799 *p = x 800 } 801} 802 803func BenchmarkCopyFat64(b *testing.B) { 804 var x [64 / 4]uint32 805 p := new([64 / 4]uint32) 806 Escape(p) 807 b.ResetTimer() 808 for i := 0; i < b.N; i++ { 809 *p = x 810 } 811} 812 813func BenchmarkCopyFat72(b *testing.B) { 814 var x [72 / 4]uint32 815 p := new([72 / 4]uint32) 816 Escape(p) 817 b.ResetTimer() 818 for i := 0; i < b.N; i++ { 819 *p = x 820 } 821} 822 823func BenchmarkCopyFat128(b *testing.B) { 824 var x [128 / 4]uint32 825 p := new([128 / 4]uint32) 826 Escape(p) 827 b.ResetTimer() 828 for i := 0; i < b.N; i++ { 829 *p = x 830 } 831} 832 833func BenchmarkCopyFat256(b *testing.B) { 834 var x [256 / 4]uint32 835 p := new([256 / 4]uint32) 836 Escape(p) 837 b.ResetTimer() 838 for i := 0; i < b.N; i++ { 839 *p = x 840 } 841} 842 843func BenchmarkCopyFat512(b *testing.B) { 844 var x [512 / 4]uint32 845 p := new([512 / 4]uint32) 846 Escape(p) 847 b.ResetTimer() 848 for i := 0; i < b.N; i++ { 849 *p = x 850 } 851} 852 853func BenchmarkCopyFat520(b *testing.B) { 854 var x [520 / 4]uint32 855 p := new([520 / 4]uint32) 856 Escape(p) 857 b.ResetTimer() 858 for i := 0; i < b.N; i++ { 859 *p = x 860 } 861} 862 863func BenchmarkCopyFat1024(b *testing.B) { 864 var x [1024 / 4]uint32 865 p := new([1024 / 4]uint32) 866 Escape(p) 867 b.ResetTimer() 868 for i := 0; i < b.N; i++ { 869 *p = x 870 } 871} 872 873func BenchmarkCopyFat1032(b *testing.B) { 874 var x [1032 / 4]uint32 875 p := new([1032 / 4]uint32) 876 Escape(p) 877 b.ResetTimer() 878 for i := 0; i < b.N; i++ { 879 *p = x 880 } 881} 882 883func BenchmarkCopyFat1040(b *testing.B) { 884 var x [1040 / 4]uint32 885 p := new([1040 / 4]uint32) 886 Escape(p) 887 b.ResetTimer() 888 for i := 0; i < b.N; i++ { 889 *p = x 890 } 891} 892 893// BenchmarkIssue18740 ensures that memmove uses 4 and 8 byte load/store to move 4 and 8 bytes. 894// It used to do 2 2-byte load/stores, which leads to a pipeline stall 895// when we try to read the result with one 4-byte load. 896func BenchmarkIssue18740(b *testing.B) { 897 benchmarks := []struct { 898 name string 899 nbyte int 900 f func([]byte) uint64 901 }{ 902 {"2byte", 2, func(buf []byte) uint64 { return uint64(binary.LittleEndian.Uint16(buf)) }}, 903 {"4byte", 4, func(buf []byte) uint64 { return uint64(binary.LittleEndian.Uint32(buf)) }}, 904 {"8byte", 8, func(buf []byte) uint64 { return binary.LittleEndian.Uint64(buf) }}, 905 } 906 907 var g [4096]byte 908 for _, bm := range benchmarks { 909 buf := make([]byte, bm.nbyte) 910 b.Run(bm.name, func(b *testing.B) { 911 for j := 0; j < b.N; j++ { 912 for i := 0; i < 4096; i += bm.nbyte { 913 copy(buf[:], g[i:]) 914 sink += bm.f(buf[:]) 915 } 916 } 917 }) 918 } 919} 920 921var memclrSink []int8 922 923func BenchmarkMemclrKnownSize1(b *testing.B) { 924 var x [1]int8 925 926 b.SetBytes(1) 927 for i := 0; i < b.N; i++ { 928 for a := range x { 929 x[a] = 0 930 } 931 } 932 933 memclrSink = x[:] 934} 935func BenchmarkMemclrKnownSize2(b *testing.B) { 936 var x [2]int8 937 938 b.SetBytes(2) 939 for i := 0; i < b.N; i++ { 940 for a := range x { 941 x[a] = 0 942 } 943 } 944 945 memclrSink = x[:] 946} 947func BenchmarkMemclrKnownSize4(b *testing.B) { 948 var x [4]int8 949 950 b.SetBytes(4) 951 for i := 0; i < b.N; i++ { 952 for a := range x { 953 x[a] = 0 954 } 955 } 956 957 memclrSink = x[:] 958} 959func BenchmarkMemclrKnownSize8(b *testing.B) { 960 var x [8]int8 961 962 b.SetBytes(8) 963 for i := 0; i < b.N; i++ { 964 for a := range x { 965 x[a] = 0 966 } 967 } 968 969 memclrSink = x[:] 970} 971func BenchmarkMemclrKnownSize16(b *testing.B) { 972 var x [16]int8 973 974 b.SetBytes(16) 975 for i := 0; i < b.N; i++ { 976 for a := range x { 977 x[a] = 0 978 } 979 } 980 981 memclrSink = x[:] 982} 983func BenchmarkMemclrKnownSize32(b *testing.B) { 984 var x [32]int8 985 986 b.SetBytes(32) 987 for i := 0; i < b.N; i++ { 988 for a := range x { 989 x[a] = 0 990 } 991 } 992 993 memclrSink = x[:] 994} 995func BenchmarkMemclrKnownSize64(b *testing.B) { 996 var x [64]int8 997 998 b.SetBytes(64) 999 for i := 0; i < b.N; i++ { 1000 for a := range x { 1001 x[a] = 0 1002 } 1003 } 1004 1005 memclrSink = x[:] 1006} 1007func BenchmarkMemclrKnownSize112(b *testing.B) { 1008 var x [112]int8 1009 1010 b.SetBytes(112) 1011 for i := 0; i < b.N; i++ { 1012 for a := range x { 1013 x[a] = 0 1014 } 1015 } 1016 1017 memclrSink = x[:] 1018} 1019 1020func BenchmarkMemclrKnownSize128(b *testing.B) { 1021 var x [128]int8 1022 1023 b.SetBytes(128) 1024 for i := 0; i < b.N; i++ { 1025 for a := range x { 1026 x[a] = 0 1027 } 1028 } 1029 1030 memclrSink = x[:] 1031} 1032 1033func BenchmarkMemclrKnownSize192(b *testing.B) { 1034 var x [192]int8 1035 1036 b.SetBytes(192) 1037 for i := 0; i < b.N; i++ { 1038 for a := range x { 1039 x[a] = 0 1040 } 1041 } 1042 1043 memclrSink = x[:] 1044} 1045 1046func BenchmarkMemclrKnownSize248(b *testing.B) { 1047 var x [248]int8 1048 1049 b.SetBytes(248) 1050 for i := 0; i < b.N; i++ { 1051 for a := range x { 1052 x[a] = 0 1053 } 1054 } 1055 1056 memclrSink = x[:] 1057} 1058 1059func BenchmarkMemclrKnownSize256(b *testing.B) { 1060 var x [256]int8 1061 1062 b.SetBytes(256) 1063 for i := 0; i < b.N; i++ { 1064 for a := range x { 1065 x[a] = 0 1066 } 1067 } 1068 1069 memclrSink = x[:] 1070} 1071func BenchmarkMemclrKnownSize512(b *testing.B) { 1072 var x [512]int8 1073 1074 b.SetBytes(512) 1075 for i := 0; i < b.N; i++ { 1076 for a := range x { 1077 x[a] = 0 1078 } 1079 } 1080 1081 memclrSink = x[:] 1082} 1083func BenchmarkMemclrKnownSize1024(b *testing.B) { 1084 var x [1024]int8 1085 1086 b.SetBytes(1024) 1087 for i := 0; i < b.N; i++ { 1088 for a := range x { 1089 x[a] = 0 1090 } 1091 } 1092 1093 memclrSink = x[:] 1094} 1095func BenchmarkMemclrKnownSize4096(b *testing.B) { 1096 var x [4096]int8 1097 1098 b.SetBytes(4096) 1099 for i := 0; i < b.N; i++ { 1100 for a := range x { 1101 x[a] = 0 1102 } 1103 } 1104 1105 memclrSink = x[:] 1106} 1107func BenchmarkMemclrKnownSize512KiB(b *testing.B) { 1108 var x [524288]int8 1109 1110 b.SetBytes(524288) 1111 for i := 0; i < b.N; i++ { 1112 for a := range x { 1113 x[a] = 0 1114 } 1115 } 1116 1117 memclrSink = x[:] 1118} 1119