1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "textflag.h"
7
8TEXT ·Index(SB),NOSPLIT,$0-56
9	MOVD	a_base+0(FP), R0
10	MOVD	a_len+8(FP), R1
11	MOVD	b_base+24(FP), R2
12	MOVD	b_len+32(FP), R3
13	MOVD	$ret+48(FP), R9
14	B	indexbody<>(SB)
15
16TEXT ·IndexString(SB),NOSPLIT,$0-40
17	MOVD	a_base+0(FP), R0
18	MOVD	a_len+8(FP), R1
19	MOVD	b_base+16(FP), R2
20	MOVD	b_len+24(FP), R3
21	MOVD	$ret+32(FP), R9
22	B	indexbody<>(SB)
23
24// input:
25//   R0: haystack
26//   R1: length of haystack
27//   R2: needle
28//   R3: length of needle (2 <= len <= 32)
29//   R9: address to put result
30TEXT indexbody<>(SB),NOSPLIT,$0-56
31	// main idea is to load 'sep' into separate register(s)
32	// to avoid repeatedly re-load it again and again
33	// for sebsequent substring comparisons
34	SUB	R3, R1, R4
35	// R4 contains the start of last substring for comparison
36	ADD	R0, R4, R4
37	ADD	$1, R0, R8
38
39	CMP	$8, R3
40	BHI	greater_8
41	TBZ	$3, R3, len_2_7
42len_8:
43	// R5 contains 8-byte of sep
44	MOVD	(R2), R5
45loop_8:
46	// R6 contains substring for comparison
47	CMP	R4, R0
48	BHI	not_found
49	MOVD.P	1(R0), R6
50	CMP	R5, R6
51	BNE	loop_8
52	B	found
53len_2_7:
54	TBZ	$2, R3, len_2_3
55	TBZ	$1, R3, len_4_5
56	TBZ	$0, R3, len_6
57len_7:
58	// R5 and R6 contain 7-byte of sep
59	MOVWU	(R2), R5
60	// 1-byte overlap with R5
61	MOVWU	3(R2), R6
62loop_7:
63	CMP	R4, R0
64	BHI	not_found
65	MOVWU.P	1(R0), R3
66	CMP	R5, R3
67	BNE	loop_7
68	MOVWU	2(R0), R3
69	CMP	R6, R3
70	BNE	loop_7
71	B	found
72len_6:
73	// R5 and R6 contain 6-byte of sep
74	MOVWU	(R2), R5
75	MOVHU	4(R2), R6
76loop_6:
77	CMP	R4, R0
78	BHI	not_found
79	MOVWU.P	1(R0), R3
80	CMP	R5, R3
81	BNE	loop_6
82	MOVHU	3(R0), R3
83	CMP	R6, R3
84	BNE	loop_6
85	B	found
86len_4_5:
87	TBZ	$0, R3, len_4
88len_5:
89	// R5 and R7 contain 5-byte of sep
90	MOVWU	(R2), R5
91	MOVBU	4(R2), R7
92loop_5:
93	CMP	R4, R0
94	BHI	not_found
95	MOVWU.P	1(R0), R3
96	CMP	R5, R3
97	BNE	loop_5
98	MOVBU	3(R0), R3
99	CMP	R7, R3
100	BNE	loop_5
101	B	found
102len_4:
103	// R5 contains 4-byte of sep
104	MOVWU	(R2), R5
105loop_4:
106	CMP	R4, R0
107	BHI	not_found
108	MOVWU.P	1(R0), R6
109	CMP	R5, R6
110	BNE	loop_4
111	B	found
112len_2_3:
113	TBZ	$0, R3, len_2
114len_3:
115	// R6 and R7 contain 3-byte of sep
116	MOVHU	(R2), R6
117	MOVBU	2(R2), R7
118loop_3:
119	CMP	R4, R0
120	BHI	not_found
121	MOVHU.P	1(R0), R3
122	CMP	R6, R3
123	BNE	loop_3
124	MOVBU	1(R0), R3
125	CMP	R7, R3
126	BNE	loop_3
127	B	found
128len_2:
129	// R5 contains 2-byte of sep
130	MOVHU	(R2), R5
131loop_2:
132	CMP	R4, R0
133	BHI	not_found
134	MOVHU.P	1(R0), R6
135	CMP	R5, R6
136	BNE	loop_2
137found:
138	SUB	R8, R0, R0
139	MOVD	R0, (R9)
140	RET
141not_found:
142	MOVD	$-1, R0
143	MOVD	R0, (R9)
144	RET
145greater_8:
146	SUB	$9, R3, R11	// len(sep) - 9, offset of R0 for last 8 bytes
147	CMP	$16, R3
148	BHI	greater_16
149len_9_16:
150	MOVD.P	8(R2), R5	// R5 contains the first 8-byte of sep
151	SUB	$16, R3, R7	// len(sep) - 16, offset of R2 for last 8 bytes
152	MOVD	(R2)(R7), R6	// R6 contains the last 8-byte of sep
153loop_9_16:
154	// search the first 8 bytes first
155	CMP	R4, R0
156	BHI	not_found
157	MOVD.P	1(R0), R7
158	CMP	R5, R7
159	BNE	loop_9_16
160	MOVD	(R0)(R11), R7
161	CMP	R6, R7		// compare the last 8 bytes
162	BNE	loop_9_16
163	B	found
164greater_16:
165	CMP	$24, R3
166	BHI	len_25_32
167len_17_24:
168	LDP.P	16(R2), (R5, R6)	// R5 and R6 contain the first 16-byte of sep
169	SUB	$24, R3, R10		// len(sep) - 24
170	MOVD	(R2)(R10), R7		// R7 contains the last 8-byte of sep
171loop_17_24:
172	// search the first 16 bytes first
173	CMP	R4, R0
174	BHI	not_found
175	MOVD.P	1(R0), R10
176	CMP	R5, R10
177	BNE	loop_17_24
178	MOVD	7(R0), R10
179	CMP	R6, R10
180	BNE	loop_17_24
181	MOVD	(R0)(R11), R10
182	CMP	R7, R10		// compare the last 8 bytes
183	BNE	loop_17_24
184	B	found
185len_25_32:
186	LDP.P	16(R2), (R5, R6)
187	MOVD.P	8(R2), R7	// R5, R6 and R7 contain the first 24-byte of sep
188	SUB	$32, R3, R12	// len(sep) - 32
189	MOVD	(R2)(R12), R10	// R10 contains the last 8-byte of sep
190loop_25_32:
191	// search the first 24 bytes first
192	CMP	R4, R0
193	BHI	not_found
194	MOVD.P	1(R0), R12
195	CMP	R5, R12
196	BNE	loop_25_32
197	MOVD	7(R0), R12
198	CMP	R6, R12
199	BNE	loop_25_32
200	MOVD	15(R0), R12
201	CMP	R7, R12
202	BNE	loop_25_32
203	MOVD	(R0)(R11), R12
204	CMP	R10, R12	// compare the last 8 bytes
205	BNE	loop_25_32
206	B	found
207