1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime_test
6
7import (
8	"crypto/rand"
9	"encoding/binary"
10	"fmt"
11	"internal/race"
12	"internal/testenv"
13	. "runtime"
14	"sync/atomic"
15	"testing"
16	"unsafe"
17)
18
19func TestMemmove(t *testing.T) {
20	if *flagQuick {
21		t.Skip("-quick")
22	}
23	t.Parallel()
24	size := 256
25	if testing.Short() {
26		size = 128 + 16
27	}
28	src := make([]byte, size)
29	dst := make([]byte, size)
30	for i := 0; i < size; i++ {
31		src[i] = byte(128 + (i & 127))
32	}
33	for i := 0; i < size; i++ {
34		dst[i] = byte(i & 127)
35	}
36	for n := 0; n <= size; n++ {
37		for x := 0; x <= size-n; x++ { // offset in src
38			for y := 0; y <= size-n; y++ { // offset in dst
39				copy(dst[y:y+n], src[x:x+n])
40				for i := 0; i < y; i++ {
41					if dst[i] != byte(i&127) {
42						t.Fatalf("prefix dst[%d] = %d", i, dst[i])
43					}
44				}
45				for i := y; i < y+n; i++ {
46					if dst[i] != byte(128+((i-y+x)&127)) {
47						t.Fatalf("copied dst[%d] = %d", i, dst[i])
48					}
49					dst[i] = byte(i & 127) // reset dst
50				}
51				for i := y + n; i < size; i++ {
52					if dst[i] != byte(i&127) {
53						t.Fatalf("suffix dst[%d] = %d", i, dst[i])
54					}
55				}
56			}
57		}
58	}
59}
60
61func TestMemmoveAlias(t *testing.T) {
62	if *flagQuick {
63		t.Skip("-quick")
64	}
65	t.Parallel()
66	size := 256
67	if testing.Short() {
68		size = 128 + 16
69	}
70	buf := make([]byte, size)
71	for i := 0; i < size; i++ {
72		buf[i] = byte(i)
73	}
74	for n := 0; n <= size; n++ {
75		for x := 0; x <= size-n; x++ { // src offset
76			for y := 0; y <= size-n; y++ { // dst offset
77				copy(buf[y:y+n], buf[x:x+n])
78				for i := 0; i < y; i++ {
79					if buf[i] != byte(i) {
80						t.Fatalf("prefix buf[%d] = %d", i, buf[i])
81					}
82				}
83				for i := y; i < y+n; i++ {
84					if buf[i] != byte(i-y+x) {
85						t.Fatalf("copied buf[%d] = %d", i, buf[i])
86					}
87					buf[i] = byte(i) // reset buf
88				}
89				for i := y + n; i < size; i++ {
90					if buf[i] != byte(i) {
91						t.Fatalf("suffix buf[%d] = %d", i, buf[i])
92					}
93				}
94			}
95		}
96	}
97}
98
99func TestMemmoveLarge0x180000(t *testing.T) {
100	if testing.Short() && testenv.Builder() == "" {
101		t.Skip("-short")
102	}
103
104	t.Parallel()
105	if race.Enabled {
106		t.Skip("skipping large memmove test under race detector")
107	}
108	testSize(t, 0x180000)
109}
110
111func TestMemmoveOverlapLarge0x120000(t *testing.T) {
112	if testing.Short() && testenv.Builder() == "" {
113		t.Skip("-short")
114	}
115
116	t.Parallel()
117	if race.Enabled {
118		t.Skip("skipping large memmove test under race detector")
119	}
120	testOverlap(t, 0x120000)
121}
122
123func testSize(t *testing.T, size int) {
124	src := make([]byte, size)
125	dst := make([]byte, size)
126	_, _ = rand.Read(src)
127	_, _ = rand.Read(dst)
128
129	ref := make([]byte, size)
130	copyref(ref, dst)
131
132	for n := size - 50; n > 1; n >>= 1 {
133		for x := 0; x <= size-n; x = x*7 + 1 { // offset in src
134			for y := 0; y <= size-n; y = y*9 + 1 { // offset in dst
135				copy(dst[y:y+n], src[x:x+n])
136				copyref(ref[y:y+n], src[x:x+n])
137				p := cmpb(dst, ref)
138				if p >= 0 {
139					t.Fatalf("Copy failed, copying from src[%d:%d] to dst[%d:%d].\nOffset %d is different, %v != %v", x, x+n, y, y+n, p, dst[p], ref[p])
140				}
141			}
142		}
143	}
144}
145
146func testOverlap(t *testing.T, size int) {
147	src := make([]byte, size)
148	test := make([]byte, size)
149	ref := make([]byte, size)
150	_, _ = rand.Read(src)
151
152	for n := size - 50; n > 1; n >>= 1 {
153		for x := 0; x <= size-n; x = x*7 + 1 { // offset in src
154			for y := 0; y <= size-n; y = y*9 + 1 { // offset in dst
155				// Reset input
156				copyref(test, src)
157				copyref(ref, src)
158				copy(test[y:y+n], test[x:x+n])
159				if y <= x {
160					copyref(ref[y:y+n], ref[x:x+n])
161				} else {
162					copybw(ref[y:y+n], ref[x:x+n])
163				}
164				p := cmpb(test, ref)
165				if p >= 0 {
166					t.Fatalf("Copy failed, copying from src[%d:%d] to dst[%d:%d].\nOffset %d is different, %v != %v", x, x+n, y, y+n, p, test[p], ref[p])
167				}
168			}
169		}
170	}
171
172}
173
174// Forward copy.
175func copyref(dst, src []byte) {
176	for i, v := range src {
177		dst[i] = v
178	}
179}
180
181// Backwards copy
182func copybw(dst, src []byte) {
183	if len(src) == 0 {
184		return
185	}
186	for i := len(src) - 1; i >= 0; i-- {
187		dst[i] = src[i]
188	}
189}
190
191// Returns offset of difference
192func matchLen(a, b []byte, max int) int {
193	a = a[:max]
194	b = b[:max]
195	for i, av := range a {
196		if b[i] != av {
197			return i
198		}
199	}
200	return max
201}
202
203func cmpb(a, b []byte) int {
204	l := matchLen(a, b, len(a))
205	if l == len(a) {
206		return -1
207	}
208	return l
209}
210
211// Ensure that memmove writes pointers atomically, so the GC won't
212// observe a partially updated pointer.
213func TestMemmoveAtomicity(t *testing.T) {
214	if race.Enabled {
215		t.Skip("skip under the race detector -- this test is intentionally racy")
216	}
217
218	var x int
219
220	for _, backward := range []bool{true, false} {
221		for _, n := range []int{3, 4, 5, 6, 7, 8, 9, 10, 15, 25, 49} {
222			n := n
223
224			// test copying [N]*int.
225			sz := uintptr(n * PtrSize)
226			name := fmt.Sprint(sz)
227			if backward {
228				name += "-backward"
229			} else {
230				name += "-forward"
231			}
232			t.Run(name, func(t *testing.T) {
233				// Use overlapping src and dst to force forward/backward copy.
234				var s [100]*int
235				src := s[n-1 : 2*n-1]
236				dst := s[:n]
237				if backward {
238					src, dst = dst, src
239				}
240				for i := range src {
241					src[i] = &x
242				}
243				clear(dst)
244
245				var ready atomic.Uint32
246				go func() {
247					sp := unsafe.Pointer(&src[0])
248					dp := unsafe.Pointer(&dst[0])
249					ready.Store(1)
250					for i := 0; i < 10000; i++ {
251						Memmove(dp, sp, sz)
252						MemclrNoHeapPointers(dp, sz)
253					}
254					ready.Store(2)
255				}()
256
257				for ready.Load() == 0 {
258					Gosched()
259				}
260
261				for ready.Load() != 2 {
262					for i := range dst {
263						p := dst[i]
264						if p != nil && p != &x {
265							t.Fatalf("got partially updated pointer %p at dst[%d], want either nil or %p", p, i, &x)
266						}
267					}
268				}
269			})
270		}
271	}
272}
273
274func benchmarkSizes(b *testing.B, sizes []int, fn func(b *testing.B, n int)) {
275	for _, n := range sizes {
276		b.Run(fmt.Sprint(n), func(b *testing.B) {
277			b.SetBytes(int64(n))
278			fn(b, n)
279		})
280	}
281}
282
283var bufSizes = []int{
284	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
285	32, 64, 128, 256, 512, 1024, 2048, 4096,
286}
287var bufSizesOverlap = []int{
288	32, 64, 128, 256, 512, 1024, 2048, 4096,
289}
290
291func BenchmarkMemmove(b *testing.B) {
292	benchmarkSizes(b, bufSizes, func(b *testing.B, n int) {
293		x := make([]byte, n)
294		y := make([]byte, n)
295		for i := 0; i < b.N; i++ {
296			copy(x, y)
297		}
298	})
299}
300
301func BenchmarkMemmoveOverlap(b *testing.B) {
302	benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) {
303		x := make([]byte, n+16)
304		for i := 0; i < b.N; i++ {
305			copy(x[16:n+16], x[:n])
306		}
307	})
308}
309
310func BenchmarkMemmoveUnalignedDst(b *testing.B) {
311	benchmarkSizes(b, bufSizes, func(b *testing.B, n int) {
312		x := make([]byte, n+1)
313		y := make([]byte, n)
314		for i := 0; i < b.N; i++ {
315			copy(x[1:], y)
316		}
317	})
318}
319
320func BenchmarkMemmoveUnalignedDstOverlap(b *testing.B) {
321	benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) {
322		x := make([]byte, n+16)
323		for i := 0; i < b.N; i++ {
324			copy(x[16:n+16], x[1:n+1])
325		}
326	})
327}
328
329func BenchmarkMemmoveUnalignedSrc(b *testing.B) {
330	benchmarkSizes(b, bufSizes, func(b *testing.B, n int) {
331		x := make([]byte, n)
332		y := make([]byte, n+1)
333		for i := 0; i < b.N; i++ {
334			copy(x, y[1:])
335		}
336	})
337}
338
339func BenchmarkMemmoveUnalignedSrcDst(b *testing.B) {
340	for _, n := range []int{16, 64, 256, 4096, 65536} {
341		buf := make([]byte, (n+8)*2)
342		x := buf[:len(buf)/2]
343		y := buf[len(buf)/2:]
344		for _, off := range []int{0, 1, 4, 7} {
345			b.Run(fmt.Sprint("f_", n, off), func(b *testing.B) {
346				b.SetBytes(int64(n))
347				for i := 0; i < b.N; i++ {
348					copy(x[off:n+off], y[off:n+off])
349				}
350			})
351
352			b.Run(fmt.Sprint("b_", n, off), func(b *testing.B) {
353				b.SetBytes(int64(n))
354				for i := 0; i < b.N; i++ {
355					copy(y[off:n+off], x[off:n+off])
356				}
357			})
358		}
359	}
360}
361
362func BenchmarkMemmoveUnalignedSrcOverlap(b *testing.B) {
363	benchmarkSizes(b, bufSizesOverlap, func(b *testing.B, n int) {
364		x := make([]byte, n+1)
365		for i := 0; i < b.N; i++ {
366			copy(x[1:n+1], x[:n])
367		}
368	})
369}
370
371func TestMemclr(t *testing.T) {
372	size := 512
373	if testing.Short() {
374		size = 128 + 16
375	}
376	mem := make([]byte, size)
377	for i := 0; i < size; i++ {
378		mem[i] = 0xee
379	}
380	for n := 0; n < size; n++ {
381		for x := 0; x <= size-n; x++ { // offset in mem
382			MemclrBytes(mem[x : x+n])
383			for i := 0; i < x; i++ {
384				if mem[i] != 0xee {
385					t.Fatalf("overwrite prefix mem[%d] = %d", i, mem[i])
386				}
387			}
388			for i := x; i < x+n; i++ {
389				if mem[i] != 0 {
390					t.Fatalf("failed clear mem[%d] = %d", i, mem[i])
391				}
392				mem[i] = 0xee
393			}
394			for i := x + n; i < size; i++ {
395				if mem[i] != 0xee {
396					t.Fatalf("overwrite suffix mem[%d] = %d", i, mem[i])
397				}
398			}
399		}
400	}
401}
402
403func BenchmarkMemclr(b *testing.B) {
404	for _, n := range []int{5, 16, 64, 256, 4096, 65536} {
405		x := make([]byte, n)
406		b.Run(fmt.Sprint(n), func(b *testing.B) {
407			b.SetBytes(int64(n))
408			for i := 0; i < b.N; i++ {
409				MemclrBytes(x)
410			}
411		})
412	}
413	for _, m := range []int{1, 4, 8, 16, 64} {
414		x := make([]byte, m<<20)
415		b.Run(fmt.Sprint(m, "M"), func(b *testing.B) {
416			b.SetBytes(int64(m << 20))
417			for i := 0; i < b.N; i++ {
418				MemclrBytes(x)
419			}
420		})
421	}
422}
423
424func BenchmarkMemclrUnaligned(b *testing.B) {
425	for _, off := range []int{0, 1, 4, 7} {
426		for _, n := range []int{5, 16, 64, 256, 4096, 65536} {
427			x := make([]byte, n+off)
428			b.Run(fmt.Sprint(off, n), func(b *testing.B) {
429				b.SetBytes(int64(n))
430				for i := 0; i < b.N; i++ {
431					MemclrBytes(x[off:])
432				}
433			})
434		}
435	}
436
437	for _, off := range []int{0, 1, 4, 7} {
438		for _, m := range []int{1, 4, 8, 16, 64} {
439			x := make([]byte, (m<<20)+off)
440			b.Run(fmt.Sprint(off, m, "M"), func(b *testing.B) {
441				b.SetBytes(int64(m << 20))
442				for i := 0; i < b.N; i++ {
443					MemclrBytes(x[off:])
444				}
445			})
446		}
447	}
448}
449
450func BenchmarkGoMemclr(b *testing.B) {
451	benchmarkSizes(b, []int{5, 16, 64, 256}, func(b *testing.B, n int) {
452		x := make([]byte, n)
453		for i := 0; i < b.N; i++ {
454			clear(x)
455		}
456	})
457}
458
459func BenchmarkMemclrRange(b *testing.B) {
460	type RunData struct {
461		data []int
462	}
463
464	benchSizes := []RunData{
465		{[]int{1043, 1078, 1894, 1582, 1044, 1165, 1467, 1100, 1919, 1562, 1932, 1645,
466			1412, 1038, 1576, 1200, 1029, 1336, 1095, 1494, 1350, 1025, 1502, 1548, 1316, 1296,
467			1868, 1639, 1546, 1626, 1642, 1308, 1726, 1665, 1678, 1187, 1515, 1598, 1353, 1237,
468			1977, 1452, 2012, 1914, 1514, 1136, 1975, 1618, 1536, 1695, 1600, 1733, 1392, 1099,
469			1358, 1996, 1224, 1783, 1197, 1838, 1460, 1556, 1554, 2020}}, // 1kb-2kb
470		{[]int{3964, 5139, 6573, 7775, 6553, 2413, 3466, 5394, 2469, 7336, 7091, 6745,
471			4028, 5643, 6164, 3475, 4138, 6908, 7559, 3335, 5660, 4122, 3945, 2082, 7564, 6584,
472			5111, 2288, 6789, 2797, 4928, 7986, 5163, 5447, 2999, 4968, 3174, 3202, 7908, 8137,
473			4735, 6161, 4646, 7592, 3083, 5329, 3687, 2754, 3599, 7231, 6455, 2549, 8063, 2189,
474			7121, 5048, 4277, 6626, 6306, 2815, 7473, 3963, 7549, 7255}}, // 2kb-8kb
475		{[]int{16304, 15936, 15760, 4736, 9136, 11184, 10160, 5952, 14560, 15744,
476			6624, 5872, 13088, 14656, 14192, 10304, 4112, 10384, 9344, 4496, 11392, 7024,
477			5200, 10064, 14784, 5808, 13504, 10480, 8512, 4896, 13264, 5600}}, // 4kb-16kb
478		{[]int{164576, 233136, 220224, 183280, 214112, 217248, 228560, 201728}}, // 128kb-256kb
479	}
480
481	for _, t := range benchSizes {
482		total := 0
483		minLen := 0
484		maxLen := 0
485
486		for _, clrLen := range t.data {
487			maxLen = max(maxLen, clrLen)
488			if clrLen < minLen || minLen == 0 {
489				minLen = clrLen
490			}
491			total += clrLen
492		}
493		buffer := make([]byte, maxLen)
494
495		text := ""
496		if minLen >= (1 << 20) {
497			text = fmt.Sprint(minLen>>20, "M ", (maxLen+(1<<20-1))>>20, "M")
498		} else if minLen >= (1 << 10) {
499			text = fmt.Sprint(minLen>>10, "K ", (maxLen+(1<<10-1))>>10, "K")
500		} else {
501			text = fmt.Sprint(minLen, " ", maxLen)
502		}
503		b.Run(text, func(b *testing.B) {
504			b.SetBytes(int64(total))
505			for i := 0; i < b.N; i++ {
506				for _, clrLen := range t.data {
507					MemclrBytes(buffer[:clrLen])
508				}
509			}
510		})
511	}
512}
513
514func BenchmarkClearFat7(b *testing.B) {
515	p := new([7]byte)
516	Escape(p)
517	b.ResetTimer()
518	for i := 0; i < b.N; i++ {
519		*p = [7]byte{}
520	}
521}
522
523func BenchmarkClearFat8(b *testing.B) {
524	p := new([8 / 4]uint32)
525	Escape(p)
526	b.ResetTimer()
527	for i := 0; i < b.N; i++ {
528		*p = [8 / 4]uint32{}
529	}
530}
531
532func BenchmarkClearFat11(b *testing.B) {
533	p := new([11]byte)
534	Escape(p)
535	b.ResetTimer()
536	for i := 0; i < b.N; i++ {
537		*p = [11]byte{}
538	}
539}
540
541func BenchmarkClearFat12(b *testing.B) {
542	p := new([12 / 4]uint32)
543	Escape(p)
544	b.ResetTimer()
545	for i := 0; i < b.N; i++ {
546		*p = [12 / 4]uint32{}
547	}
548}
549
550func BenchmarkClearFat13(b *testing.B) {
551	p := new([13]byte)
552	Escape(p)
553	b.ResetTimer()
554	for i := 0; i < b.N; i++ {
555		*p = [13]byte{}
556	}
557}
558
559func BenchmarkClearFat14(b *testing.B) {
560	p := new([14]byte)
561	Escape(p)
562	b.ResetTimer()
563	for i := 0; i < b.N; i++ {
564		*p = [14]byte{}
565	}
566}
567
568func BenchmarkClearFat15(b *testing.B) {
569	p := new([15]byte)
570	Escape(p)
571	b.ResetTimer()
572	for i := 0; i < b.N; i++ {
573		*p = [15]byte{}
574	}
575}
576
577func BenchmarkClearFat16(b *testing.B) {
578	p := new([16 / 4]uint32)
579	Escape(p)
580	b.ResetTimer()
581	for i := 0; i < b.N; i++ {
582		*p = [16 / 4]uint32{}
583	}
584}
585
586func BenchmarkClearFat24(b *testing.B) {
587	p := new([24 / 4]uint32)
588	Escape(p)
589	b.ResetTimer()
590	for i := 0; i < b.N; i++ {
591		*p = [24 / 4]uint32{}
592	}
593}
594
595func BenchmarkClearFat32(b *testing.B) {
596	p := new([32 / 4]uint32)
597	Escape(p)
598	b.ResetTimer()
599	for i := 0; i < b.N; i++ {
600		*p = [32 / 4]uint32{}
601	}
602}
603
604func BenchmarkClearFat40(b *testing.B) {
605	p := new([40 / 4]uint32)
606	Escape(p)
607	b.ResetTimer()
608	for i := 0; i < b.N; i++ {
609		*p = [40 / 4]uint32{}
610	}
611}
612
613func BenchmarkClearFat48(b *testing.B) {
614	p := new([48 / 4]uint32)
615	Escape(p)
616	b.ResetTimer()
617	for i := 0; i < b.N; i++ {
618		*p = [48 / 4]uint32{}
619	}
620}
621
622func BenchmarkClearFat56(b *testing.B) {
623	p := new([56 / 4]uint32)
624	Escape(p)
625	b.ResetTimer()
626	for i := 0; i < b.N; i++ {
627		*p = [56 / 4]uint32{}
628	}
629}
630
631func BenchmarkClearFat64(b *testing.B) {
632	p := new([64 / 4]uint32)
633	Escape(p)
634	b.ResetTimer()
635	for i := 0; i < b.N; i++ {
636		*p = [64 / 4]uint32{}
637	}
638}
639
640func BenchmarkClearFat72(b *testing.B) {
641	p := new([72 / 4]uint32)
642	Escape(p)
643	b.ResetTimer()
644	for i := 0; i < b.N; i++ {
645		*p = [72 / 4]uint32{}
646	}
647}
648
649func BenchmarkClearFat128(b *testing.B) {
650	p := new([128 / 4]uint32)
651	Escape(p)
652	b.ResetTimer()
653	for i := 0; i < b.N; i++ {
654		*p = [128 / 4]uint32{}
655	}
656}
657
658func BenchmarkClearFat256(b *testing.B) {
659	p := new([256 / 4]uint32)
660	Escape(p)
661	b.ResetTimer()
662	for i := 0; i < b.N; i++ {
663		*p = [256 / 4]uint32{}
664	}
665}
666
667func BenchmarkClearFat512(b *testing.B) {
668	p := new([512 / 4]uint32)
669	Escape(p)
670	b.ResetTimer()
671	for i := 0; i < b.N; i++ {
672		*p = [512 / 4]uint32{}
673	}
674}
675
676func BenchmarkClearFat1024(b *testing.B) {
677	p := new([1024 / 4]uint32)
678	Escape(p)
679	b.ResetTimer()
680	for i := 0; i < b.N; i++ {
681		*p = [1024 / 4]uint32{}
682	}
683}
684
685func BenchmarkClearFat1032(b *testing.B) {
686	p := new([1032 / 4]uint32)
687	Escape(p)
688	b.ResetTimer()
689	for i := 0; i < b.N; i++ {
690		*p = [1032 / 4]uint32{}
691	}
692}
693
694func BenchmarkClearFat1040(b *testing.B) {
695	p := new([1040 / 4]uint32)
696	Escape(p)
697	b.ResetTimer()
698	for i := 0; i < b.N; i++ {
699		*p = [1040 / 4]uint32{}
700	}
701}
702
703func BenchmarkCopyFat7(b *testing.B) {
704	var x [7]byte
705	p := new([7]byte)
706	Escape(p)
707	b.ResetTimer()
708	for i := 0; i < b.N; i++ {
709		*p = x
710	}
711}
712
713func BenchmarkCopyFat8(b *testing.B) {
714	var x [8 / 4]uint32
715	p := new([8 / 4]uint32)
716	Escape(p)
717	b.ResetTimer()
718	for i := 0; i < b.N; i++ {
719		*p = x
720	}
721}
722
723func BenchmarkCopyFat11(b *testing.B) {
724	var x [11]byte
725	p := new([11]byte)
726	Escape(p)
727	b.ResetTimer()
728	for i := 0; i < b.N; i++ {
729		*p = x
730	}
731}
732
733func BenchmarkCopyFat12(b *testing.B) {
734	var x [12 / 4]uint32
735	p := new([12 / 4]uint32)
736	Escape(p)
737	b.ResetTimer()
738	for i := 0; i < b.N; i++ {
739		*p = x
740	}
741}
742
743func BenchmarkCopyFat13(b *testing.B) {
744	var x [13]byte
745	p := new([13]byte)
746	Escape(p)
747	b.ResetTimer()
748	for i := 0; i < b.N; i++ {
749		*p = x
750	}
751}
752
753func BenchmarkCopyFat14(b *testing.B) {
754	var x [14]byte
755	p := new([14]byte)
756	Escape(p)
757	b.ResetTimer()
758	for i := 0; i < b.N; i++ {
759		*p = x
760	}
761}
762
763func BenchmarkCopyFat15(b *testing.B) {
764	var x [15]byte
765	p := new([15]byte)
766	Escape(p)
767	b.ResetTimer()
768	for i := 0; i < b.N; i++ {
769		*p = x
770	}
771}
772
773func BenchmarkCopyFat16(b *testing.B) {
774	var x [16 / 4]uint32
775	p := new([16 / 4]uint32)
776	Escape(p)
777	b.ResetTimer()
778	for i := 0; i < b.N; i++ {
779		*p = x
780	}
781}
782
783func BenchmarkCopyFat24(b *testing.B) {
784	var x [24 / 4]uint32
785	p := new([24 / 4]uint32)
786	Escape(p)
787	b.ResetTimer()
788	for i := 0; i < b.N; i++ {
789		*p = x
790	}
791}
792
793func BenchmarkCopyFat32(b *testing.B) {
794	var x [32 / 4]uint32
795	p := new([32 / 4]uint32)
796	Escape(p)
797	b.ResetTimer()
798	for i := 0; i < b.N; i++ {
799		*p = x
800	}
801}
802
803func BenchmarkCopyFat64(b *testing.B) {
804	var x [64 / 4]uint32
805	p := new([64 / 4]uint32)
806	Escape(p)
807	b.ResetTimer()
808	for i := 0; i < b.N; i++ {
809		*p = x
810	}
811}
812
813func BenchmarkCopyFat72(b *testing.B) {
814	var x [72 / 4]uint32
815	p := new([72 / 4]uint32)
816	Escape(p)
817	b.ResetTimer()
818	for i := 0; i < b.N; i++ {
819		*p = x
820	}
821}
822
823func BenchmarkCopyFat128(b *testing.B) {
824	var x [128 / 4]uint32
825	p := new([128 / 4]uint32)
826	Escape(p)
827	b.ResetTimer()
828	for i := 0; i < b.N; i++ {
829		*p = x
830	}
831}
832
833func BenchmarkCopyFat256(b *testing.B) {
834	var x [256 / 4]uint32
835	p := new([256 / 4]uint32)
836	Escape(p)
837	b.ResetTimer()
838	for i := 0; i < b.N; i++ {
839		*p = x
840	}
841}
842
843func BenchmarkCopyFat512(b *testing.B) {
844	var x [512 / 4]uint32
845	p := new([512 / 4]uint32)
846	Escape(p)
847	b.ResetTimer()
848	for i := 0; i < b.N; i++ {
849		*p = x
850	}
851}
852
853func BenchmarkCopyFat520(b *testing.B) {
854	var x [520 / 4]uint32
855	p := new([520 / 4]uint32)
856	Escape(p)
857	b.ResetTimer()
858	for i := 0; i < b.N; i++ {
859		*p = x
860	}
861}
862
863func BenchmarkCopyFat1024(b *testing.B) {
864	var x [1024 / 4]uint32
865	p := new([1024 / 4]uint32)
866	Escape(p)
867	b.ResetTimer()
868	for i := 0; i < b.N; i++ {
869		*p = x
870	}
871}
872
873func BenchmarkCopyFat1032(b *testing.B) {
874	var x [1032 / 4]uint32
875	p := new([1032 / 4]uint32)
876	Escape(p)
877	b.ResetTimer()
878	for i := 0; i < b.N; i++ {
879		*p = x
880	}
881}
882
883func BenchmarkCopyFat1040(b *testing.B) {
884	var x [1040 / 4]uint32
885	p := new([1040 / 4]uint32)
886	Escape(p)
887	b.ResetTimer()
888	for i := 0; i < b.N; i++ {
889		*p = x
890	}
891}
892
893// BenchmarkIssue18740 ensures that memmove uses 4 and 8 byte load/store to move 4 and 8 bytes.
894// It used to do 2 2-byte load/stores, which leads to a pipeline stall
895// when we try to read the result with one 4-byte load.
896func BenchmarkIssue18740(b *testing.B) {
897	benchmarks := []struct {
898		name  string
899		nbyte int
900		f     func([]byte) uint64
901	}{
902		{"2byte", 2, func(buf []byte) uint64 { return uint64(binary.LittleEndian.Uint16(buf)) }},
903		{"4byte", 4, func(buf []byte) uint64 { return uint64(binary.LittleEndian.Uint32(buf)) }},
904		{"8byte", 8, func(buf []byte) uint64 { return binary.LittleEndian.Uint64(buf) }},
905	}
906
907	var g [4096]byte
908	for _, bm := range benchmarks {
909		buf := make([]byte, bm.nbyte)
910		b.Run(bm.name, func(b *testing.B) {
911			for j := 0; j < b.N; j++ {
912				for i := 0; i < 4096; i += bm.nbyte {
913					copy(buf[:], g[i:])
914					sink += bm.f(buf[:])
915				}
916			}
917		})
918	}
919}
920
921var memclrSink []int8
922
923func BenchmarkMemclrKnownSize1(b *testing.B) {
924	var x [1]int8
925
926	b.SetBytes(1)
927	for i := 0; i < b.N; i++ {
928		for a := range x {
929			x[a] = 0
930		}
931	}
932
933	memclrSink = x[:]
934}
935func BenchmarkMemclrKnownSize2(b *testing.B) {
936	var x [2]int8
937
938	b.SetBytes(2)
939	for i := 0; i < b.N; i++ {
940		for a := range x {
941			x[a] = 0
942		}
943	}
944
945	memclrSink = x[:]
946}
947func BenchmarkMemclrKnownSize4(b *testing.B) {
948	var x [4]int8
949
950	b.SetBytes(4)
951	for i := 0; i < b.N; i++ {
952		for a := range x {
953			x[a] = 0
954		}
955	}
956
957	memclrSink = x[:]
958}
959func BenchmarkMemclrKnownSize8(b *testing.B) {
960	var x [8]int8
961
962	b.SetBytes(8)
963	for i := 0; i < b.N; i++ {
964		for a := range x {
965			x[a] = 0
966		}
967	}
968
969	memclrSink = x[:]
970}
971func BenchmarkMemclrKnownSize16(b *testing.B) {
972	var x [16]int8
973
974	b.SetBytes(16)
975	for i := 0; i < b.N; i++ {
976		for a := range x {
977			x[a] = 0
978		}
979	}
980
981	memclrSink = x[:]
982}
983func BenchmarkMemclrKnownSize32(b *testing.B) {
984	var x [32]int8
985
986	b.SetBytes(32)
987	for i := 0; i < b.N; i++ {
988		for a := range x {
989			x[a] = 0
990		}
991	}
992
993	memclrSink = x[:]
994}
995func BenchmarkMemclrKnownSize64(b *testing.B) {
996	var x [64]int8
997
998	b.SetBytes(64)
999	for i := 0; i < b.N; i++ {
1000		for a := range x {
1001			x[a] = 0
1002		}
1003	}
1004
1005	memclrSink = x[:]
1006}
1007func BenchmarkMemclrKnownSize112(b *testing.B) {
1008	var x [112]int8
1009
1010	b.SetBytes(112)
1011	for i := 0; i < b.N; i++ {
1012		for a := range x {
1013			x[a] = 0
1014		}
1015	}
1016
1017	memclrSink = x[:]
1018}
1019
1020func BenchmarkMemclrKnownSize128(b *testing.B) {
1021	var x [128]int8
1022
1023	b.SetBytes(128)
1024	for i := 0; i < b.N; i++ {
1025		for a := range x {
1026			x[a] = 0
1027		}
1028	}
1029
1030	memclrSink = x[:]
1031}
1032
1033func BenchmarkMemclrKnownSize192(b *testing.B) {
1034	var x [192]int8
1035
1036	b.SetBytes(192)
1037	for i := 0; i < b.N; i++ {
1038		for a := range x {
1039			x[a] = 0
1040		}
1041	}
1042
1043	memclrSink = x[:]
1044}
1045
1046func BenchmarkMemclrKnownSize248(b *testing.B) {
1047	var x [248]int8
1048
1049	b.SetBytes(248)
1050	for i := 0; i < b.N; i++ {
1051		for a := range x {
1052			x[a] = 0
1053		}
1054	}
1055
1056	memclrSink = x[:]
1057}
1058
1059func BenchmarkMemclrKnownSize256(b *testing.B) {
1060	var x [256]int8
1061
1062	b.SetBytes(256)
1063	for i := 0; i < b.N; i++ {
1064		for a := range x {
1065			x[a] = 0
1066		}
1067	}
1068
1069	memclrSink = x[:]
1070}
1071func BenchmarkMemclrKnownSize512(b *testing.B) {
1072	var x [512]int8
1073
1074	b.SetBytes(512)
1075	for i := 0; i < b.N; i++ {
1076		for a := range x {
1077			x[a] = 0
1078		}
1079	}
1080
1081	memclrSink = x[:]
1082}
1083func BenchmarkMemclrKnownSize1024(b *testing.B) {
1084	var x [1024]int8
1085
1086	b.SetBytes(1024)
1087	for i := 0; i < b.N; i++ {
1088		for a := range x {
1089			x[a] = 0
1090		}
1091	}
1092
1093	memclrSink = x[:]
1094}
1095func BenchmarkMemclrKnownSize4096(b *testing.B) {
1096	var x [4096]int8
1097
1098	b.SetBytes(4096)
1099	for i := 0; i < b.N; i++ {
1100		for a := range x {
1101			x[a] = 0
1102		}
1103	}
1104
1105	memclrSink = x[:]
1106}
1107func BenchmarkMemclrKnownSize512KiB(b *testing.B) {
1108	var x [524288]int8
1109
1110	b.SetBytes(524288)
1111	for i := 0; i < b.N; i++ {
1112		for a := range x {
1113			x[a] = 0
1114		}
1115	}
1116
1117	memclrSink = x[:]
1118}
1119