xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/bcm/aesv8-armv7-linux.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
7#include <openssl/arm_arch.h>
8
9#if __ARM_MAX_ARCH__>=7
10.text
11.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
12.fpu	neon
13.code	32
14#undef	__thumb2__
15.align	5
16.Lrcon:
17.long	0x01,0x01,0x01,0x01
18.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
19.long	0x1b,0x1b,0x1b,0x1b
20
21.text
22
23.globl	aes_hw_set_encrypt_key
24.hidden	aes_hw_set_encrypt_key
25.type	aes_hw_set_encrypt_key,%function
26.align	5
27aes_hw_set_encrypt_key:
28.Lenc_key:
29	mov	r3,#-1
30	cmp	r0,#0
31	beq	.Lenc_key_abort
32	cmp	r2,#0
33	beq	.Lenc_key_abort
34	mov	r3,#-2
35	cmp	r1,#128
36	blt	.Lenc_key_abort
37	cmp	r1,#256
38	bgt	.Lenc_key_abort
39	tst	r1,#0x3f
40	bne	.Lenc_key_abort
41
42	adr	r3,.Lrcon
43	cmp	r1,#192
44
45	veor	q0,q0,q0
46	vld1.8	{q3},[r0]!
47	mov	r1,#8		@ reuse r1
48	vld1.32	{q1,q2},[r3]!
49
50	blt	.Loop128
51	beq	.L192
52	b	.L256
53
54.align	4
55.Loop128:
56	vtbl.8	d20,{q3},d4
57	vtbl.8	d21,{q3},d5
58	vext.8	q9,q0,q3,#12
59	vst1.32	{q3},[r2]!
60.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
61	subs	r1,r1,#1
62
63	veor	q3,q3,q9
64	vext.8	q9,q0,q9,#12
65	veor	q3,q3,q9
66	vext.8	q9,q0,q9,#12
67	veor	q10,q10,q1
68	veor	q3,q3,q9
69	vshl.u8	q1,q1,#1
70	veor	q3,q3,q10
71	bne	.Loop128
72
73	vld1.32	{q1},[r3]
74
75	vtbl.8	d20,{q3},d4
76	vtbl.8	d21,{q3},d5
77	vext.8	q9,q0,q3,#12
78	vst1.32	{q3},[r2]!
79.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
80
81	veor	q3,q3,q9
82	vext.8	q9,q0,q9,#12
83	veor	q3,q3,q9
84	vext.8	q9,q0,q9,#12
85	veor	q10,q10,q1
86	veor	q3,q3,q9
87	vshl.u8	q1,q1,#1
88	veor	q3,q3,q10
89
90	vtbl.8	d20,{q3},d4
91	vtbl.8	d21,{q3},d5
92	vext.8	q9,q0,q3,#12
93	vst1.32	{q3},[r2]!
94.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
95
96	veor	q3,q3,q9
97	vext.8	q9,q0,q9,#12
98	veor	q3,q3,q9
99	vext.8	q9,q0,q9,#12
100	veor	q10,q10,q1
101	veor	q3,q3,q9
102	veor	q3,q3,q10
103	vst1.32	{q3},[r2]
104	add	r2,r2,#0x50
105
106	mov	r12,#10
107	b	.Ldone
108
109.align	4
110.L192:
111	vld1.8	{d16},[r0]!
112	vmov.i8	q10,#8			@ borrow q10
113	vst1.32	{q3},[r2]!
114	vsub.i8	q2,q2,q10	@ adjust the mask
115
116.Loop192:
117	vtbl.8	d20,{q8},d4
118	vtbl.8	d21,{q8},d5
119	vext.8	q9,q0,q3,#12
120	vst1.32	{d16},[r2]!
121.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
122	subs	r1,r1,#1
123
124	veor	q3,q3,q9
125	vext.8	q9,q0,q9,#12
126	veor	q3,q3,q9
127	vext.8	q9,q0,q9,#12
128	veor	q3,q3,q9
129
130	vdup.32	q9,d7[1]
131	veor	q9,q9,q8
132	veor	q10,q10,q1
133	vext.8	q8,q0,q8,#12
134	vshl.u8	q1,q1,#1
135	veor	q8,q8,q9
136	veor	q3,q3,q10
137	veor	q8,q8,q10
138	vst1.32	{q3},[r2]!
139	bne	.Loop192
140
141	mov	r12,#12
142	add	r2,r2,#0x20
143	b	.Ldone
144
145.align	4
146.L256:
147	vld1.8	{q8},[r0]
148	mov	r1,#7
149	mov	r12,#14
150	vst1.32	{q3},[r2]!
151
152.Loop256:
153	vtbl.8	d20,{q8},d4
154	vtbl.8	d21,{q8},d5
155	vext.8	q9,q0,q3,#12
156	vst1.32	{q8},[r2]!
157.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
158	subs	r1,r1,#1
159
160	veor	q3,q3,q9
161	vext.8	q9,q0,q9,#12
162	veor	q3,q3,q9
163	vext.8	q9,q0,q9,#12
164	veor	q10,q10,q1
165	veor	q3,q3,q9
166	vshl.u8	q1,q1,#1
167	veor	q3,q3,q10
168	vst1.32	{q3},[r2]!
169	beq	.Ldone
170
171	vdup.32	q10,d7[1]
172	vext.8	q9,q0,q8,#12
173.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
174
175	veor	q8,q8,q9
176	vext.8	q9,q0,q9,#12
177	veor	q8,q8,q9
178	vext.8	q9,q0,q9,#12
179	veor	q8,q8,q9
180
181	veor	q8,q8,q10
182	b	.Loop256
183
184.Ldone:
185	str	r12,[r2]
186	mov	r3,#0
187
188.Lenc_key_abort:
189	mov	r0,r3			@ return value
190
191	bx	lr
192.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
193
194.globl	aes_hw_set_decrypt_key
195.hidden	aes_hw_set_decrypt_key
196.type	aes_hw_set_decrypt_key,%function
197.align	5
198aes_hw_set_decrypt_key:
199	stmdb	sp!,{r4,lr}
200	bl	.Lenc_key
201
202	cmp	r0,#0
203	bne	.Ldec_key_abort
204
205	sub	r2,r2,#240		@ restore original r2
206	mov	r4,#-16
207	add	r0,r2,r12,lsl#4	@ end of key schedule
208
209	vld1.32	{q0},[r2]
210	vld1.32	{q1},[r0]
211	vst1.32	{q0},[r0],r4
212	vst1.32	{q1},[r2]!
213
214.Loop_imc:
215	vld1.32	{q0},[r2]
216	vld1.32	{q1},[r0]
217.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
218.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
219	vst1.32	{q0},[r0],r4
220	vst1.32	{q1},[r2]!
221	cmp	r0,r2
222	bhi	.Loop_imc
223
224	vld1.32	{q0},[r2]
225.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
226	vst1.32	{q0},[r0]
227
228	eor	r0,r0,r0		@ return value
229.Ldec_key_abort:
230	ldmia	sp!,{r4,pc}
231.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
232.globl	aes_hw_encrypt
233.hidden	aes_hw_encrypt
234.type	aes_hw_encrypt,%function
235.align	5
236aes_hw_encrypt:
237	AARCH64_VALID_CALL_TARGET
238	ldr	r3,[r2,#240]
239	vld1.32	{q0},[r2]!
240	vld1.8	{q2},[r0]
241	sub	r3,r3,#2
242	vld1.32	{q1},[r2]!
243
244.Loop_enc:
245.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
246.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
247	vld1.32	{q0},[r2]!
248	subs	r3,r3,#2
249.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
250.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
251	vld1.32	{q1},[r2]!
252	bgt	.Loop_enc
253
254.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
255.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
256	vld1.32	{q0},[r2]
257.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
258	veor	q2,q2,q0
259
260	vst1.8	{q2},[r1]
261	bx	lr
262.size	aes_hw_encrypt,.-aes_hw_encrypt
263.globl	aes_hw_decrypt
264.hidden	aes_hw_decrypt
265.type	aes_hw_decrypt,%function
266.align	5
267aes_hw_decrypt:
268	AARCH64_VALID_CALL_TARGET
269	ldr	r3,[r2,#240]
270	vld1.32	{q0},[r2]!
271	vld1.8	{q2},[r0]
272	sub	r3,r3,#2
273	vld1.32	{q1},[r2]!
274
275.Loop_dec:
276.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
277.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
278	vld1.32	{q0},[r2]!
279	subs	r3,r3,#2
280.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
281.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
282	vld1.32	{q1},[r2]!
283	bgt	.Loop_dec
284
285.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
286.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
287	vld1.32	{q0},[r2]
288.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
289	veor	q2,q2,q0
290
291	vst1.8	{q2},[r1]
292	bx	lr
293.size	aes_hw_decrypt,.-aes_hw_decrypt
294.globl	aes_hw_cbc_encrypt
295.hidden	aes_hw_cbc_encrypt
296.type	aes_hw_cbc_encrypt,%function
297.align	5
298aes_hw_cbc_encrypt:
299	mov	ip,sp
300	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
301	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
302	ldmia	ip,{r4,r5}		@ load remaining args
303	subs	r2,r2,#16
304	mov	r8,#16
305	blo	.Lcbc_abort
306	moveq	r8,#0
307
308	cmp	r5,#0			@ en- or decrypting?
309	ldr	r5,[r3,#240]
310	and	r2,r2,#-16
311	vld1.8	{q6},[r4]
312	vld1.8	{q0},[r0],r8
313
314	vld1.32	{q8,q9},[r3]		@ load key schedule...
315	sub	r5,r5,#6
316	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
317	sub	r5,r5,#2
318	vld1.32	{q10,q11},[r7]!
319	vld1.32	{q12,q13},[r7]!
320	vld1.32	{q14,q15},[r7]!
321	vld1.32	{q7},[r7]
322
323	add	r7,r3,#32
324	mov	r6,r5
325	beq	.Lcbc_dec
326
327	cmp	r5,#2
328	veor	q0,q0,q6
329	veor	q5,q8,q7
330	beq	.Lcbc_enc128
331
332	vld1.32	{q2,q3},[r7]
333	add	r7,r3,#16
334	add	r6,r3,#16*4
335	add	r12,r3,#16*5
336.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
337.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
338	add	r14,r3,#16*6
339	add	r3,r3,#16*7
340	b	.Lenter_cbc_enc
341
342.align	4
343.Loop_cbc_enc:
344.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
345.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
346	vst1.8	{q6},[r1]!
347.Lenter_cbc_enc:
348.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
349.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
350.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
351.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
352	vld1.32	{q8},[r6]
353	cmp	r5,#4
354.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
355.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
356	vld1.32	{q9},[r12]
357	beq	.Lcbc_enc192
358
359.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
360.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
361	vld1.32	{q8},[r14]
362.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
363.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
364	vld1.32	{q9},[r3]
365	nop
366
367.Lcbc_enc192:
368.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
369.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
370	subs	r2,r2,#16
371.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
372.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
373	moveq	r8,#0
374.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
375.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
376.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
377.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
378	vld1.8	{q8},[r0],r8
379.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
380.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
381	veor	q8,q8,q5
382.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
383.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
384	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
385.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
386.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
387.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
388	veor	q6,q0,q7
389	bhs	.Loop_cbc_enc
390
391	vst1.8	{q6},[r1]!
392	b	.Lcbc_done
393
394.align	5
395.Lcbc_enc128:
396	vld1.32	{q2,q3},[r7]
397.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
398.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
399	b	.Lenter_cbc_enc128
400.Loop_cbc_enc128:
401.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
402.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
403	vst1.8	{q6},[r1]!
404.Lenter_cbc_enc128:
405.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
406.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
407	subs	r2,r2,#16
408.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
409.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
410	moveq	r8,#0
411.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
412.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
413.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
414.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
415.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
416.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
417	vld1.8	{q8},[r0],r8
418.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
419.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
420.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
421.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
422.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
423.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
424	veor	q8,q8,q5
425.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
426	veor	q6,q0,q7
427	bhs	.Loop_cbc_enc128
428
429	vst1.8	{q6},[r1]!
430	b	.Lcbc_done
431.align	5
432.Lcbc_dec:
433	vld1.8	{q10},[r0]!
434	subs	r2,r2,#32		@ bias
435	add	r6,r5,#2
436	vorr	q3,q0,q0
437	vorr	q1,q0,q0
438	vorr	q11,q10,q10
439	blo	.Lcbc_dec_tail
440
441	vorr	q1,q10,q10
442	vld1.8	{q10},[r0]!
443	vorr	q2,q0,q0
444	vorr	q3,q1,q1
445	vorr	q11,q10,q10
446
447.Loop3x_cbc_dec:
448.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
449.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
450.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
451.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
452.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
453.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
454	vld1.32	{q8},[r7]!
455	subs	r6,r6,#2
456.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
457.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
458.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
459.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
460.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
461.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
462	vld1.32	{q9},[r7]!
463	bgt	.Loop3x_cbc_dec
464
465.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
466.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
467.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
468.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
469.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
470.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
471	veor	q4,q6,q7
472	subs	r2,r2,#0x30
473	veor	q5,q2,q7
474	movlo	r6,r2			@ r6, r6, is zero at this point
475.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
476.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
477.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
478.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
479.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
480.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
481	veor	q9,q3,q7
482	add	r0,r0,r6		@ r0 is adjusted in such way that
483					@ at exit from the loop q1-q10
484					@ are loaded with last "words"
485	vorr	q6,q11,q11
486	mov	r7,r3
487.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
488.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
489.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
490.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
491.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
492.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
493	vld1.8	{q2},[r0]!
494.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
495.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
496.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
497.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
498.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
499.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
500	vld1.8	{q3},[r0]!
501.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
502.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
503.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
504.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
505.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
506.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
507	vld1.8	{q11},[r0]!
508.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
509.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
510.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
511	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
512	add	r6,r5,#2
513	veor	q4,q4,q0
514	veor	q5,q5,q1
515	veor	q10,q10,q9
516	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
517	vst1.8	{q4},[r1]!
518	vorr	q0,q2,q2
519	vst1.8	{q5},[r1]!
520	vorr	q1,q3,q3
521	vst1.8	{q10},[r1]!
522	vorr	q10,q11,q11
523	bhs	.Loop3x_cbc_dec
524
525	cmn	r2,#0x30
526	beq	.Lcbc_done
527	nop
528
529.Lcbc_dec_tail:
530.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
531.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
532.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
533.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
534	vld1.32	{q8},[r7]!
535	subs	r6,r6,#2
536.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
537.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
538.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
539.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
540	vld1.32	{q9},[r7]!
541	bgt	.Lcbc_dec_tail
542
543.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
544.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
545.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
546.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
547.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
548.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
549.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
550.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
551.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
552.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
553.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
554.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
555	cmn	r2,#0x20
556.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
557.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
558.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
559.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
560	veor	q5,q6,q7
561.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
562.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
563.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
564.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
565	veor	q9,q3,q7
566.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
567.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
568	beq	.Lcbc_dec_one
569	veor	q5,q5,q1
570	veor	q9,q9,q10
571	vorr	q6,q11,q11
572	vst1.8	{q5},[r1]!
573	vst1.8	{q9},[r1]!
574	b	.Lcbc_done
575
576.Lcbc_dec_one:
577	veor	q5,q5,q10
578	vorr	q6,q11,q11
579	vst1.8	{q5},[r1]!
580
581.Lcbc_done:
582	vst1.8	{q6},[r4]
583.Lcbc_abort:
584	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
585	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
586.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
587.globl	aes_hw_ctr32_encrypt_blocks
588.hidden	aes_hw_ctr32_encrypt_blocks
589.type	aes_hw_ctr32_encrypt_blocks,%function
590.align	5
591aes_hw_ctr32_encrypt_blocks:
592	mov	ip,sp
593	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
594	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
595	ldr	r4, [ip]		@ load remaining arg
596	ldr	r5,[r3,#240]
597
598	ldr	r8, [r4, #12]
599	vld1.32	{q0},[r4]
600
601	vld1.32	{q8,q9},[r3]		@ load key schedule...
602	sub	r5,r5,#4
603	mov	r12,#16
604	cmp	r2,#2
605	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
606	sub	r5,r5,#2
607	vld1.32	{q12,q13},[r7]!
608	vld1.32	{q14,q15},[r7]!
609	vld1.32	{q7},[r7]
610	add	r7,r3,#32
611	mov	r6,r5
612	movlo	r12,#0
613
614	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
615	@ affected by silicon errata #1742098 [0] and #1655431 [1],
616	@ respectively, where the second instruction of an aese/aesmc
617	@ instruction pair may execute twice if an interrupt is taken right
618	@ after the first instruction consumes an input register of which a
619	@ single 32-bit lane has been updated the last time it was modified.
620	@
621	@ This function uses a counter in one 32-bit lane. The
622	@ could write to q1 and q10 directly, but that trips this bugs.
623	@ We write to q6 and copy to the final register as a workaround.
624	@
625	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
626	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
627#ifndef __ARMEB__
628	rev	r8, r8
629#endif
630	add	r10, r8, #1
631	vorr	q6,q0,q0
632	rev	r10, r10
633	vmov.32	d13[1],r10
634	add	r8, r8, #2
635	vorr	q1,q6,q6
636	bls	.Lctr32_tail
637	rev	r12, r8
638	vmov.32	d13[1],r12
639	sub	r2,r2,#3		@ bias
640	vorr	q10,q6,q6
641	b	.Loop3x_ctr32
642
643.align	4
644.Loop3x_ctr32:
645.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
646.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
647.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
648.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
649.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
650.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
651	vld1.32	{q8},[r7]!
652	subs	r6,r6,#2
653.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
654.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
655.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
656.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
657.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
658.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
659	vld1.32	{q9},[r7]!
660	bgt	.Loop3x_ctr32
661
662.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
663.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
664.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
665.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
666	vld1.8	{q2},[r0]!
667	add	r9,r8,#1
668.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
669.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
670	vld1.8	{q3},[r0]!
671	rev	r9,r9
672.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
673.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
674.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
675.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
676	vld1.8	{q11},[r0]!
677	mov	r7,r3
678.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
679.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
680.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
681.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
682.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
683.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
684	veor	q2,q2,q7
685	add	r10,r8,#2
686.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
687.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
688	veor	q3,q3,q7
689	add	r8,r8,#3
690.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
691.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
692.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
693.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
694	 @ Note the logic to update q0, q1, and q1 is written to work
695	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
696	 @ 32-bit mode. See the comment above.
697	veor	q11,q11,q7
698	vmov.32	d13[1], r9
699.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
700.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
701	vorr	q0,q6,q6
702	rev	r10,r10
703.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
704.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
705	vmov.32	d13[1], r10
706	rev	r12,r8
707.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
708.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
709	vorr	q1,q6,q6
710	vmov.32	d13[1], r12
711.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
712.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
713	vorr	q10,q6,q6
714	subs	r2,r2,#3
715.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
716.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
717.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
718
719	veor	q2,q2,q4
720	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
721	vst1.8	{q2},[r1]!
722	veor	q3,q3,q5
723	mov	r6,r5
724	vst1.8	{q3},[r1]!
725	veor	q11,q11,q9
726	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
727	vst1.8	{q11},[r1]!
728	bhs	.Loop3x_ctr32
729
730	adds	r2,r2,#3
731	beq	.Lctr32_done
732	cmp	r2,#1
733	mov	r12,#16
734	moveq	r12,#0
735
736.Lctr32_tail:
737.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
738.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
739.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
740.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
741	vld1.32	{q8},[r7]!
742	subs	r6,r6,#2
743.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
744.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
745.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
746.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
747	vld1.32	{q9},[r7]!
748	bgt	.Lctr32_tail
749
750.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
751.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
752.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
753.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
754.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
755.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
756.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
757.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
758	vld1.8	{q2},[r0],r12
759.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
760.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
761.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
762.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
763	vld1.8	{q3},[r0]
764.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
765.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
766.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
767.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
768	veor	q2,q2,q7
769.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
770.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
771.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
772.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
773	veor	q3,q3,q7
774.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
775.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
776
777	cmp	r2,#1
778	veor	q2,q2,q0
779	veor	q3,q3,q1
780	vst1.8	{q2},[r1]!
781	beq	.Lctr32_done
782	vst1.8	{q3},[r1]
783
784.Lctr32_done:
785	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
786	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
787.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
788#endif
789#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
790