1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "textflag.h"
7
8// memequal(a, b unsafe.Pointer, size uintptr) bool
9TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
10	// short path to handle 0-byte case
11	CBZ	R2, equal
12	// short path to handle equal pointers
13	CMP	R0, R1
14	BEQ	equal
15	B	memeqbody<>(SB)
16equal:
17	MOVD	$1, R0
18	RET
19
20// memequal_varlen(a, b unsafe.Pointer) bool
21TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
22	CMP	R0, R1
23	BEQ	eq
24	MOVD	8(R26), R2    // compiler stores size at offset 8 in the closure
25	CBZ	R2, eq
26	B	memeqbody<>(SB)
27eq:
28	MOVD	$1, R0
29	RET
30
31// input:
32// R0: pointer a
33// R1: pointer b
34// R2: data len
35// at return: result in R0
36TEXT memeqbody<>(SB),NOSPLIT,$0
37	CMP	$1, R2
38	// handle 1-byte special case for better performance
39	BEQ	one
40	CMP	$16, R2
41	// handle specially if length < 16
42	BLO	tail
43	BIC	$0x3f, R2, R3
44	CBZ	R3, chunk16
45	// work with 64-byte chunks
46	ADD	R3, R0, R6	// end of chunks
47chunk64_loop:
48	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
49	VLD1.P	(R1), [V4.D2, V5.D2, V6.D2, V7.D2]
50	VCMEQ	V0.D2, V4.D2, V8.D2
51	VCMEQ	V1.D2, V5.D2, V9.D2
52	VCMEQ	V2.D2, V6.D2, V10.D2
53	VCMEQ	V3.D2, V7.D2, V11.D2
54	VAND	V8.B16, V9.B16, V8.B16
55	VAND	V8.B16, V10.B16, V8.B16
56	VAND	V8.B16, V11.B16, V8.B16
57	CMP	R0, R6
58	VMOV	V8.D[0], R4
59	VMOV	V8.D[1], R5
60	CBZ	R4, not_equal
61	CBZ	R5, not_equal
62	BNE	chunk64_loop
63	AND	$0x3f, R2, R2
64	CBZ	R2, equal
65chunk16:
66	// work with 16-byte chunks
67	BIC	$0xf, R2, R3
68	CBZ	R3, tail
69	ADD	R3, R0, R6	// end of chunks
70chunk16_loop:
71	LDP.P	16(R0), (R4, R5)
72	LDP.P	16(R1), (R7, R9)
73	EOR	R4, R7
74	CBNZ	R7, not_equal
75	EOR	R5, R9
76	CBNZ	R9, not_equal
77	CMP	R0, R6
78	BNE	chunk16_loop
79	AND	$0xf, R2, R2
80	CBZ	R2, equal
81tail:
82	// special compare of tail with length < 16
83	TBZ	$3, R2, lt_8
84	MOVD	(R0), R4
85	MOVD	(R1), R5
86	EOR	R4, R5
87	CBNZ	R5, not_equal
88	SUB	$8, R2, R6	// offset of the last 8 bytes
89	MOVD	(R0)(R6), R4
90	MOVD	(R1)(R6), R5
91	EOR	R4, R5
92	CBNZ	R5, not_equal
93	B	equal
94lt_8:
95	TBZ	$2, R2, lt_4
96	MOVWU	(R0), R4
97	MOVWU	(R1), R5
98	EOR	R4, R5
99	CBNZ	R5, not_equal
100	SUB	$4, R2, R6	// offset of the last 4 bytes
101	MOVWU	(R0)(R6), R4
102	MOVWU	(R1)(R6), R5
103	EOR	R4, R5
104	CBNZ	R5, not_equal
105	B	equal
106lt_4:
107	TBZ	$1, R2, lt_2
108	MOVHU.P	2(R0), R4
109	MOVHU.P	2(R1), R5
110	CMP	R4, R5
111	BNE	not_equal
112lt_2:
113	TBZ	$0, R2, equal
114one:
115	MOVBU	(R0), R4
116	MOVBU	(R1), R5
117	CMP	R4, R5
118	BNE	not_equal
119equal:
120	MOVD	$1, R0
121	RET
122not_equal:
123	MOVB	ZR, R0
124	RET
125