xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/bcm/sha256-armv4-linux.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__)
7@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
8@
9@ Licensed under the OpenSSL license (the "License").  You may not use
10@ this file except in compliance with the License.  You can obtain a copy
11@ in the file LICENSE in the source distribution or at
12@ https://www.openssl.org/source/license.html
13
14
15@ ====================================================================
16@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
17@ project. The module is, however, dual licensed under OpenSSL and
18@ CRYPTOGAMS licenses depending on where you obtain it. For further
19@ details see http://www.openssl.org/~appro/cryptogams/.
20@
21@ Permission to use under GPL terms is granted.
22@ ====================================================================
23
24@ SHA256 block procedure for ARMv4. May 2007.
25
26@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
27@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
28@ byte [on single-issue Xscale PXA250 core].
29
30@ July 2010.
31@
32@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
33@ Cortex A8 core and ~20 cycles per processed byte.
34
35@ February 2011.
36@
37@ Profiler-assisted and platform-specific optimization resulted in 16%
38@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
39
40@ September 2013.
41@
42@ Add NEON implementation. On Cortex A8 it was measured to process one
43@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
44@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
45@ code (meaning that latter performs sub-optimally, nothing was done
46@ about it).
47
48@ May 2014.
49@
50@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
51
52#ifndef __KERNEL__
53# include <openssl/arm_arch.h>
54#else
55# define __ARM_ARCH __LINUX_ARM_ARCH__
56# define __ARM_MAX_ARCH__ 7
57#endif
58
59@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
60@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
61@ instructions are manually-encoded. (See unsha256.)
62.arch	armv7-a
63
64.text
65#if defined(__thumb2__)
66.syntax	unified
67.thumb
68#else
69.code	32
70#endif
71
72.type	K256,%object
73.align	5
74K256:
75.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
76.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
77.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
78.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
79.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
80.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
81.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
82.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
83.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
84.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
85.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
86.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
87.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
88.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
89.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
90.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
91.size	K256,.-K256
92.word	0				@ terminator
93.align	5
94
95.globl	sha256_block_data_order_nohw
96.hidden	sha256_block_data_order_nohw
97.type	sha256_block_data_order_nohw,%function
98sha256_block_data_order_nohw:
99	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
100	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
101	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
102	adr	r14,K256
103	sub	sp,sp,#16*4		@ alloca(X[16])
104.Loop:
105# if __ARM_ARCH>=7
106	ldr	r2,[r1],#4
107# else
108	ldrb	r2,[r1,#3]
109# endif
110	eor	r3,r5,r6		@ magic
111	eor	r12,r12,r12
112#if __ARM_ARCH>=7
113	@ ldr	r2,[r1],#4			@ 0
114# if 0==15
115	str	r1,[sp,#17*4]			@ make room for r1
116# endif
117	eor	r0,r8,r8,ror#5
118	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
119	eor	r0,r0,r8,ror#19	@ Sigma1(e)
120# ifndef __ARMEB__
121	rev	r2,r2
122# endif
123#else
124	@ ldrb	r2,[r1,#3]			@ 0
125	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
126	ldrb	r12,[r1,#2]
127	ldrb	r0,[r1,#1]
128	orr	r2,r2,r12,lsl#8
129	ldrb	r12,[r1],#4
130	orr	r2,r2,r0,lsl#16
131# if 0==15
132	str	r1,[sp,#17*4]			@ make room for r1
133# endif
134	eor	r0,r8,r8,ror#5
135	orr	r2,r2,r12,lsl#24
136	eor	r0,r0,r8,ror#19	@ Sigma1(e)
137#endif
138	ldr	r12,[r14],#4			@ *K256++
139	add	r11,r11,r2			@ h+=X[i]
140	str	r2,[sp,#0*4]
141	eor	r2,r9,r10
142	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
143	and	r2,r2,r8
144	add	r11,r11,r12			@ h+=K256[i]
145	eor	r2,r2,r10			@ Ch(e,f,g)
146	eor	r0,r4,r4,ror#11
147	add	r11,r11,r2			@ h+=Ch(e,f,g)
148#if 0==31
149	and	r12,r12,#0xff
150	cmp	r12,#0xf2			@ done?
151#endif
152#if 0<15
153# if __ARM_ARCH>=7
154	ldr	r2,[r1],#4			@ prefetch
155# else
156	ldrb	r2,[r1,#3]
157# endif
158	eor	r12,r4,r5			@ a^b, b^c in next round
159#else
160	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
161	eor	r12,r4,r5			@ a^b, b^c in next round
162	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
163#endif
164	eor	r0,r0,r4,ror#20	@ Sigma0(a)
165	and	r3,r3,r12			@ (b^c)&=(a^b)
166	add	r7,r7,r11			@ d+=h
167	eor	r3,r3,r5			@ Maj(a,b,c)
168	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
169	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
170#if __ARM_ARCH>=7
171	@ ldr	r2,[r1],#4			@ 1
172# if 1==15
173	str	r1,[sp,#17*4]			@ make room for r1
174# endif
175	eor	r0,r7,r7,ror#5
176	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
177	eor	r0,r0,r7,ror#19	@ Sigma1(e)
178# ifndef __ARMEB__
179	rev	r2,r2
180# endif
181#else
182	@ ldrb	r2,[r1,#3]			@ 1
183	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
184	ldrb	r3,[r1,#2]
185	ldrb	r0,[r1,#1]
186	orr	r2,r2,r3,lsl#8
187	ldrb	r3,[r1],#4
188	orr	r2,r2,r0,lsl#16
189# if 1==15
190	str	r1,[sp,#17*4]			@ make room for r1
191# endif
192	eor	r0,r7,r7,ror#5
193	orr	r2,r2,r3,lsl#24
194	eor	r0,r0,r7,ror#19	@ Sigma1(e)
195#endif
196	ldr	r3,[r14],#4			@ *K256++
197	add	r10,r10,r2			@ h+=X[i]
198	str	r2,[sp,#1*4]
199	eor	r2,r8,r9
200	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
201	and	r2,r2,r7
202	add	r10,r10,r3			@ h+=K256[i]
203	eor	r2,r2,r9			@ Ch(e,f,g)
204	eor	r0,r11,r11,ror#11
205	add	r10,r10,r2			@ h+=Ch(e,f,g)
206#if 1==31
207	and	r3,r3,#0xff
208	cmp	r3,#0xf2			@ done?
209#endif
210#if 1<15
211# if __ARM_ARCH>=7
212	ldr	r2,[r1],#4			@ prefetch
213# else
214	ldrb	r2,[r1,#3]
215# endif
216	eor	r3,r11,r4			@ a^b, b^c in next round
217#else
218	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
219	eor	r3,r11,r4			@ a^b, b^c in next round
220	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
221#endif
222	eor	r0,r0,r11,ror#20	@ Sigma0(a)
223	and	r12,r12,r3			@ (b^c)&=(a^b)
224	add	r6,r6,r10			@ d+=h
225	eor	r12,r12,r4			@ Maj(a,b,c)
226	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
227	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
228#if __ARM_ARCH>=7
229	@ ldr	r2,[r1],#4			@ 2
230# if 2==15
231	str	r1,[sp,#17*4]			@ make room for r1
232# endif
233	eor	r0,r6,r6,ror#5
234	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
235	eor	r0,r0,r6,ror#19	@ Sigma1(e)
236# ifndef __ARMEB__
237	rev	r2,r2
238# endif
239#else
240	@ ldrb	r2,[r1,#3]			@ 2
241	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
242	ldrb	r12,[r1,#2]
243	ldrb	r0,[r1,#1]
244	orr	r2,r2,r12,lsl#8
245	ldrb	r12,[r1],#4
246	orr	r2,r2,r0,lsl#16
247# if 2==15
248	str	r1,[sp,#17*4]			@ make room for r1
249# endif
250	eor	r0,r6,r6,ror#5
251	orr	r2,r2,r12,lsl#24
252	eor	r0,r0,r6,ror#19	@ Sigma1(e)
253#endif
254	ldr	r12,[r14],#4			@ *K256++
255	add	r9,r9,r2			@ h+=X[i]
256	str	r2,[sp,#2*4]
257	eor	r2,r7,r8
258	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
259	and	r2,r2,r6
260	add	r9,r9,r12			@ h+=K256[i]
261	eor	r2,r2,r8			@ Ch(e,f,g)
262	eor	r0,r10,r10,ror#11
263	add	r9,r9,r2			@ h+=Ch(e,f,g)
264#if 2==31
265	and	r12,r12,#0xff
266	cmp	r12,#0xf2			@ done?
267#endif
268#if 2<15
269# if __ARM_ARCH>=7
270	ldr	r2,[r1],#4			@ prefetch
271# else
272	ldrb	r2,[r1,#3]
273# endif
274	eor	r12,r10,r11			@ a^b, b^c in next round
275#else
276	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
277	eor	r12,r10,r11			@ a^b, b^c in next round
278	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
279#endif
280	eor	r0,r0,r10,ror#20	@ Sigma0(a)
281	and	r3,r3,r12			@ (b^c)&=(a^b)
282	add	r5,r5,r9			@ d+=h
283	eor	r3,r3,r11			@ Maj(a,b,c)
284	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
285	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
286#if __ARM_ARCH>=7
287	@ ldr	r2,[r1],#4			@ 3
288# if 3==15
289	str	r1,[sp,#17*4]			@ make room for r1
290# endif
291	eor	r0,r5,r5,ror#5
292	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
293	eor	r0,r0,r5,ror#19	@ Sigma1(e)
294# ifndef __ARMEB__
295	rev	r2,r2
296# endif
297#else
298	@ ldrb	r2,[r1,#3]			@ 3
299	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
300	ldrb	r3,[r1,#2]
301	ldrb	r0,[r1,#1]
302	orr	r2,r2,r3,lsl#8
303	ldrb	r3,[r1],#4
304	orr	r2,r2,r0,lsl#16
305# if 3==15
306	str	r1,[sp,#17*4]			@ make room for r1
307# endif
308	eor	r0,r5,r5,ror#5
309	orr	r2,r2,r3,lsl#24
310	eor	r0,r0,r5,ror#19	@ Sigma1(e)
311#endif
312	ldr	r3,[r14],#4			@ *K256++
313	add	r8,r8,r2			@ h+=X[i]
314	str	r2,[sp,#3*4]
315	eor	r2,r6,r7
316	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
317	and	r2,r2,r5
318	add	r8,r8,r3			@ h+=K256[i]
319	eor	r2,r2,r7			@ Ch(e,f,g)
320	eor	r0,r9,r9,ror#11
321	add	r8,r8,r2			@ h+=Ch(e,f,g)
322#if 3==31
323	and	r3,r3,#0xff
324	cmp	r3,#0xf2			@ done?
325#endif
326#if 3<15
327# if __ARM_ARCH>=7
328	ldr	r2,[r1],#4			@ prefetch
329# else
330	ldrb	r2,[r1,#3]
331# endif
332	eor	r3,r9,r10			@ a^b, b^c in next round
333#else
334	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
335	eor	r3,r9,r10			@ a^b, b^c in next round
336	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
337#endif
338	eor	r0,r0,r9,ror#20	@ Sigma0(a)
339	and	r12,r12,r3			@ (b^c)&=(a^b)
340	add	r4,r4,r8			@ d+=h
341	eor	r12,r12,r10			@ Maj(a,b,c)
342	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
343	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
344#if __ARM_ARCH>=7
345	@ ldr	r2,[r1],#4			@ 4
346# if 4==15
347	str	r1,[sp,#17*4]			@ make room for r1
348# endif
349	eor	r0,r4,r4,ror#5
350	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
351	eor	r0,r0,r4,ror#19	@ Sigma1(e)
352# ifndef __ARMEB__
353	rev	r2,r2
354# endif
355#else
356	@ ldrb	r2,[r1,#3]			@ 4
357	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
358	ldrb	r12,[r1,#2]
359	ldrb	r0,[r1,#1]
360	orr	r2,r2,r12,lsl#8
361	ldrb	r12,[r1],#4
362	orr	r2,r2,r0,lsl#16
363# if 4==15
364	str	r1,[sp,#17*4]			@ make room for r1
365# endif
366	eor	r0,r4,r4,ror#5
367	orr	r2,r2,r12,lsl#24
368	eor	r0,r0,r4,ror#19	@ Sigma1(e)
369#endif
370	ldr	r12,[r14],#4			@ *K256++
371	add	r7,r7,r2			@ h+=X[i]
372	str	r2,[sp,#4*4]
373	eor	r2,r5,r6
374	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
375	and	r2,r2,r4
376	add	r7,r7,r12			@ h+=K256[i]
377	eor	r2,r2,r6			@ Ch(e,f,g)
378	eor	r0,r8,r8,ror#11
379	add	r7,r7,r2			@ h+=Ch(e,f,g)
380#if 4==31
381	and	r12,r12,#0xff
382	cmp	r12,#0xf2			@ done?
383#endif
384#if 4<15
385# if __ARM_ARCH>=7
386	ldr	r2,[r1],#4			@ prefetch
387# else
388	ldrb	r2,[r1,#3]
389# endif
390	eor	r12,r8,r9			@ a^b, b^c in next round
391#else
392	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
393	eor	r12,r8,r9			@ a^b, b^c in next round
394	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
395#endif
396	eor	r0,r0,r8,ror#20	@ Sigma0(a)
397	and	r3,r3,r12			@ (b^c)&=(a^b)
398	add	r11,r11,r7			@ d+=h
399	eor	r3,r3,r9			@ Maj(a,b,c)
400	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
401	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
402#if __ARM_ARCH>=7
403	@ ldr	r2,[r1],#4			@ 5
404# if 5==15
405	str	r1,[sp,#17*4]			@ make room for r1
406# endif
407	eor	r0,r11,r11,ror#5
408	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
409	eor	r0,r0,r11,ror#19	@ Sigma1(e)
410# ifndef __ARMEB__
411	rev	r2,r2
412# endif
413#else
414	@ ldrb	r2,[r1,#3]			@ 5
415	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
416	ldrb	r3,[r1,#2]
417	ldrb	r0,[r1,#1]
418	orr	r2,r2,r3,lsl#8
419	ldrb	r3,[r1],#4
420	orr	r2,r2,r0,lsl#16
421# if 5==15
422	str	r1,[sp,#17*4]			@ make room for r1
423# endif
424	eor	r0,r11,r11,ror#5
425	orr	r2,r2,r3,lsl#24
426	eor	r0,r0,r11,ror#19	@ Sigma1(e)
427#endif
428	ldr	r3,[r14],#4			@ *K256++
429	add	r6,r6,r2			@ h+=X[i]
430	str	r2,[sp,#5*4]
431	eor	r2,r4,r5
432	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
433	and	r2,r2,r11
434	add	r6,r6,r3			@ h+=K256[i]
435	eor	r2,r2,r5			@ Ch(e,f,g)
436	eor	r0,r7,r7,ror#11
437	add	r6,r6,r2			@ h+=Ch(e,f,g)
438#if 5==31
439	and	r3,r3,#0xff
440	cmp	r3,#0xf2			@ done?
441#endif
442#if 5<15
443# if __ARM_ARCH>=7
444	ldr	r2,[r1],#4			@ prefetch
445# else
446	ldrb	r2,[r1,#3]
447# endif
448	eor	r3,r7,r8			@ a^b, b^c in next round
449#else
450	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
451	eor	r3,r7,r8			@ a^b, b^c in next round
452	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
453#endif
454	eor	r0,r0,r7,ror#20	@ Sigma0(a)
455	and	r12,r12,r3			@ (b^c)&=(a^b)
456	add	r10,r10,r6			@ d+=h
457	eor	r12,r12,r8			@ Maj(a,b,c)
458	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
459	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
460#if __ARM_ARCH>=7
461	@ ldr	r2,[r1],#4			@ 6
462# if 6==15
463	str	r1,[sp,#17*4]			@ make room for r1
464# endif
465	eor	r0,r10,r10,ror#5
466	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
467	eor	r0,r0,r10,ror#19	@ Sigma1(e)
468# ifndef __ARMEB__
469	rev	r2,r2
470# endif
471#else
472	@ ldrb	r2,[r1,#3]			@ 6
473	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
474	ldrb	r12,[r1,#2]
475	ldrb	r0,[r1,#1]
476	orr	r2,r2,r12,lsl#8
477	ldrb	r12,[r1],#4
478	orr	r2,r2,r0,lsl#16
479# if 6==15
480	str	r1,[sp,#17*4]			@ make room for r1
481# endif
482	eor	r0,r10,r10,ror#5
483	orr	r2,r2,r12,lsl#24
484	eor	r0,r0,r10,ror#19	@ Sigma1(e)
485#endif
486	ldr	r12,[r14],#4			@ *K256++
487	add	r5,r5,r2			@ h+=X[i]
488	str	r2,[sp,#6*4]
489	eor	r2,r11,r4
490	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
491	and	r2,r2,r10
492	add	r5,r5,r12			@ h+=K256[i]
493	eor	r2,r2,r4			@ Ch(e,f,g)
494	eor	r0,r6,r6,ror#11
495	add	r5,r5,r2			@ h+=Ch(e,f,g)
496#if 6==31
497	and	r12,r12,#0xff
498	cmp	r12,#0xf2			@ done?
499#endif
500#if 6<15
501# if __ARM_ARCH>=7
502	ldr	r2,[r1],#4			@ prefetch
503# else
504	ldrb	r2,[r1,#3]
505# endif
506	eor	r12,r6,r7			@ a^b, b^c in next round
507#else
508	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
509	eor	r12,r6,r7			@ a^b, b^c in next round
510	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
511#endif
512	eor	r0,r0,r6,ror#20	@ Sigma0(a)
513	and	r3,r3,r12			@ (b^c)&=(a^b)
514	add	r9,r9,r5			@ d+=h
515	eor	r3,r3,r7			@ Maj(a,b,c)
516	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
517	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
518#if __ARM_ARCH>=7
519	@ ldr	r2,[r1],#4			@ 7
520# if 7==15
521	str	r1,[sp,#17*4]			@ make room for r1
522# endif
523	eor	r0,r9,r9,ror#5
524	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
525	eor	r0,r0,r9,ror#19	@ Sigma1(e)
526# ifndef __ARMEB__
527	rev	r2,r2
528# endif
529#else
530	@ ldrb	r2,[r1,#3]			@ 7
531	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
532	ldrb	r3,[r1,#2]
533	ldrb	r0,[r1,#1]
534	orr	r2,r2,r3,lsl#8
535	ldrb	r3,[r1],#4
536	orr	r2,r2,r0,lsl#16
537# if 7==15
538	str	r1,[sp,#17*4]			@ make room for r1
539# endif
540	eor	r0,r9,r9,ror#5
541	orr	r2,r2,r3,lsl#24
542	eor	r0,r0,r9,ror#19	@ Sigma1(e)
543#endif
544	ldr	r3,[r14],#4			@ *K256++
545	add	r4,r4,r2			@ h+=X[i]
546	str	r2,[sp,#7*4]
547	eor	r2,r10,r11
548	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
549	and	r2,r2,r9
550	add	r4,r4,r3			@ h+=K256[i]
551	eor	r2,r2,r11			@ Ch(e,f,g)
552	eor	r0,r5,r5,ror#11
553	add	r4,r4,r2			@ h+=Ch(e,f,g)
554#if 7==31
555	and	r3,r3,#0xff
556	cmp	r3,#0xf2			@ done?
557#endif
558#if 7<15
559# if __ARM_ARCH>=7
560	ldr	r2,[r1],#4			@ prefetch
561# else
562	ldrb	r2,[r1,#3]
563# endif
564	eor	r3,r5,r6			@ a^b, b^c in next round
565#else
566	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
567	eor	r3,r5,r6			@ a^b, b^c in next round
568	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
569#endif
570	eor	r0,r0,r5,ror#20	@ Sigma0(a)
571	and	r12,r12,r3			@ (b^c)&=(a^b)
572	add	r8,r8,r4			@ d+=h
573	eor	r12,r12,r6			@ Maj(a,b,c)
574	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
575	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
576#if __ARM_ARCH>=7
577	@ ldr	r2,[r1],#4			@ 8
578# if 8==15
579	str	r1,[sp,#17*4]			@ make room for r1
580# endif
581	eor	r0,r8,r8,ror#5
582	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
583	eor	r0,r0,r8,ror#19	@ Sigma1(e)
584# ifndef __ARMEB__
585	rev	r2,r2
586# endif
587#else
588	@ ldrb	r2,[r1,#3]			@ 8
589	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
590	ldrb	r12,[r1,#2]
591	ldrb	r0,[r1,#1]
592	orr	r2,r2,r12,lsl#8
593	ldrb	r12,[r1],#4
594	orr	r2,r2,r0,lsl#16
595# if 8==15
596	str	r1,[sp,#17*4]			@ make room for r1
597# endif
598	eor	r0,r8,r8,ror#5
599	orr	r2,r2,r12,lsl#24
600	eor	r0,r0,r8,ror#19	@ Sigma1(e)
601#endif
602	ldr	r12,[r14],#4			@ *K256++
603	add	r11,r11,r2			@ h+=X[i]
604	str	r2,[sp,#8*4]
605	eor	r2,r9,r10
606	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
607	and	r2,r2,r8
608	add	r11,r11,r12			@ h+=K256[i]
609	eor	r2,r2,r10			@ Ch(e,f,g)
610	eor	r0,r4,r4,ror#11
611	add	r11,r11,r2			@ h+=Ch(e,f,g)
612#if 8==31
613	and	r12,r12,#0xff
614	cmp	r12,#0xf2			@ done?
615#endif
616#if 8<15
617# if __ARM_ARCH>=7
618	ldr	r2,[r1],#4			@ prefetch
619# else
620	ldrb	r2,[r1,#3]
621# endif
622	eor	r12,r4,r5			@ a^b, b^c in next round
623#else
624	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
625	eor	r12,r4,r5			@ a^b, b^c in next round
626	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
627#endif
628	eor	r0,r0,r4,ror#20	@ Sigma0(a)
629	and	r3,r3,r12			@ (b^c)&=(a^b)
630	add	r7,r7,r11			@ d+=h
631	eor	r3,r3,r5			@ Maj(a,b,c)
632	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
633	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
634#if __ARM_ARCH>=7
635	@ ldr	r2,[r1],#4			@ 9
636# if 9==15
637	str	r1,[sp,#17*4]			@ make room for r1
638# endif
639	eor	r0,r7,r7,ror#5
640	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
641	eor	r0,r0,r7,ror#19	@ Sigma1(e)
642# ifndef __ARMEB__
643	rev	r2,r2
644# endif
645#else
646	@ ldrb	r2,[r1,#3]			@ 9
647	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
648	ldrb	r3,[r1,#2]
649	ldrb	r0,[r1,#1]
650	orr	r2,r2,r3,lsl#8
651	ldrb	r3,[r1],#4
652	orr	r2,r2,r0,lsl#16
653# if 9==15
654	str	r1,[sp,#17*4]			@ make room for r1
655# endif
656	eor	r0,r7,r7,ror#5
657	orr	r2,r2,r3,lsl#24
658	eor	r0,r0,r7,ror#19	@ Sigma1(e)
659#endif
660	ldr	r3,[r14],#4			@ *K256++
661	add	r10,r10,r2			@ h+=X[i]
662	str	r2,[sp,#9*4]
663	eor	r2,r8,r9
664	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
665	and	r2,r2,r7
666	add	r10,r10,r3			@ h+=K256[i]
667	eor	r2,r2,r9			@ Ch(e,f,g)
668	eor	r0,r11,r11,ror#11
669	add	r10,r10,r2			@ h+=Ch(e,f,g)
670#if 9==31
671	and	r3,r3,#0xff
672	cmp	r3,#0xf2			@ done?
673#endif
674#if 9<15
675# if __ARM_ARCH>=7
676	ldr	r2,[r1],#4			@ prefetch
677# else
678	ldrb	r2,[r1,#3]
679# endif
680	eor	r3,r11,r4			@ a^b, b^c in next round
681#else
682	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
683	eor	r3,r11,r4			@ a^b, b^c in next round
684	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
685#endif
686	eor	r0,r0,r11,ror#20	@ Sigma0(a)
687	and	r12,r12,r3			@ (b^c)&=(a^b)
688	add	r6,r6,r10			@ d+=h
689	eor	r12,r12,r4			@ Maj(a,b,c)
690	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
691	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
692#if __ARM_ARCH>=7
693	@ ldr	r2,[r1],#4			@ 10
694# if 10==15
695	str	r1,[sp,#17*4]			@ make room for r1
696# endif
697	eor	r0,r6,r6,ror#5
698	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
699	eor	r0,r0,r6,ror#19	@ Sigma1(e)
700# ifndef __ARMEB__
701	rev	r2,r2
702# endif
703#else
704	@ ldrb	r2,[r1,#3]			@ 10
705	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
706	ldrb	r12,[r1,#2]
707	ldrb	r0,[r1,#1]
708	orr	r2,r2,r12,lsl#8
709	ldrb	r12,[r1],#4
710	orr	r2,r2,r0,lsl#16
711# if 10==15
712	str	r1,[sp,#17*4]			@ make room for r1
713# endif
714	eor	r0,r6,r6,ror#5
715	orr	r2,r2,r12,lsl#24
716	eor	r0,r0,r6,ror#19	@ Sigma1(e)
717#endif
718	ldr	r12,[r14],#4			@ *K256++
719	add	r9,r9,r2			@ h+=X[i]
720	str	r2,[sp,#10*4]
721	eor	r2,r7,r8
722	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
723	and	r2,r2,r6
724	add	r9,r9,r12			@ h+=K256[i]
725	eor	r2,r2,r8			@ Ch(e,f,g)
726	eor	r0,r10,r10,ror#11
727	add	r9,r9,r2			@ h+=Ch(e,f,g)
728#if 10==31
729	and	r12,r12,#0xff
730	cmp	r12,#0xf2			@ done?
731#endif
732#if 10<15
733# if __ARM_ARCH>=7
734	ldr	r2,[r1],#4			@ prefetch
735# else
736	ldrb	r2,[r1,#3]
737# endif
738	eor	r12,r10,r11			@ a^b, b^c in next round
739#else
740	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
741	eor	r12,r10,r11			@ a^b, b^c in next round
742	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
743#endif
744	eor	r0,r0,r10,ror#20	@ Sigma0(a)
745	and	r3,r3,r12			@ (b^c)&=(a^b)
746	add	r5,r5,r9			@ d+=h
747	eor	r3,r3,r11			@ Maj(a,b,c)
748	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
749	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
750#if __ARM_ARCH>=7
751	@ ldr	r2,[r1],#4			@ 11
752# if 11==15
753	str	r1,[sp,#17*4]			@ make room for r1
754# endif
755	eor	r0,r5,r5,ror#5
756	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
757	eor	r0,r0,r5,ror#19	@ Sigma1(e)
758# ifndef __ARMEB__
759	rev	r2,r2
760# endif
761#else
762	@ ldrb	r2,[r1,#3]			@ 11
763	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
764	ldrb	r3,[r1,#2]
765	ldrb	r0,[r1,#1]
766	orr	r2,r2,r3,lsl#8
767	ldrb	r3,[r1],#4
768	orr	r2,r2,r0,lsl#16
769# if 11==15
770	str	r1,[sp,#17*4]			@ make room for r1
771# endif
772	eor	r0,r5,r5,ror#5
773	orr	r2,r2,r3,lsl#24
774	eor	r0,r0,r5,ror#19	@ Sigma1(e)
775#endif
776	ldr	r3,[r14],#4			@ *K256++
777	add	r8,r8,r2			@ h+=X[i]
778	str	r2,[sp,#11*4]
779	eor	r2,r6,r7
780	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
781	and	r2,r2,r5
782	add	r8,r8,r3			@ h+=K256[i]
783	eor	r2,r2,r7			@ Ch(e,f,g)
784	eor	r0,r9,r9,ror#11
785	add	r8,r8,r2			@ h+=Ch(e,f,g)
786#if 11==31
787	and	r3,r3,#0xff
788	cmp	r3,#0xf2			@ done?
789#endif
790#if 11<15
791# if __ARM_ARCH>=7
792	ldr	r2,[r1],#4			@ prefetch
793# else
794	ldrb	r2,[r1,#3]
795# endif
796	eor	r3,r9,r10			@ a^b, b^c in next round
797#else
798	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
799	eor	r3,r9,r10			@ a^b, b^c in next round
800	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
801#endif
802	eor	r0,r0,r9,ror#20	@ Sigma0(a)
803	and	r12,r12,r3			@ (b^c)&=(a^b)
804	add	r4,r4,r8			@ d+=h
805	eor	r12,r12,r10			@ Maj(a,b,c)
806	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
807	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
808#if __ARM_ARCH>=7
809	@ ldr	r2,[r1],#4			@ 12
810# if 12==15
811	str	r1,[sp,#17*4]			@ make room for r1
812# endif
813	eor	r0,r4,r4,ror#5
814	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
815	eor	r0,r0,r4,ror#19	@ Sigma1(e)
816# ifndef __ARMEB__
817	rev	r2,r2
818# endif
819#else
820	@ ldrb	r2,[r1,#3]			@ 12
821	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
822	ldrb	r12,[r1,#2]
823	ldrb	r0,[r1,#1]
824	orr	r2,r2,r12,lsl#8
825	ldrb	r12,[r1],#4
826	orr	r2,r2,r0,lsl#16
827# if 12==15
828	str	r1,[sp,#17*4]			@ make room for r1
829# endif
830	eor	r0,r4,r4,ror#5
831	orr	r2,r2,r12,lsl#24
832	eor	r0,r0,r4,ror#19	@ Sigma1(e)
833#endif
834	ldr	r12,[r14],#4			@ *K256++
835	add	r7,r7,r2			@ h+=X[i]
836	str	r2,[sp,#12*4]
837	eor	r2,r5,r6
838	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
839	and	r2,r2,r4
840	add	r7,r7,r12			@ h+=K256[i]
841	eor	r2,r2,r6			@ Ch(e,f,g)
842	eor	r0,r8,r8,ror#11
843	add	r7,r7,r2			@ h+=Ch(e,f,g)
844#if 12==31
845	and	r12,r12,#0xff
846	cmp	r12,#0xf2			@ done?
847#endif
848#if 12<15
849# if __ARM_ARCH>=7
850	ldr	r2,[r1],#4			@ prefetch
851# else
852	ldrb	r2,[r1,#3]
853# endif
854	eor	r12,r8,r9			@ a^b, b^c in next round
855#else
856	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
857	eor	r12,r8,r9			@ a^b, b^c in next round
858	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
859#endif
860	eor	r0,r0,r8,ror#20	@ Sigma0(a)
861	and	r3,r3,r12			@ (b^c)&=(a^b)
862	add	r11,r11,r7			@ d+=h
863	eor	r3,r3,r9			@ Maj(a,b,c)
864	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
865	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
866#if __ARM_ARCH>=7
867	@ ldr	r2,[r1],#4			@ 13
868# if 13==15
869	str	r1,[sp,#17*4]			@ make room for r1
870# endif
871	eor	r0,r11,r11,ror#5
872	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
873	eor	r0,r0,r11,ror#19	@ Sigma1(e)
874# ifndef __ARMEB__
875	rev	r2,r2
876# endif
877#else
878	@ ldrb	r2,[r1,#3]			@ 13
879	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
880	ldrb	r3,[r1,#2]
881	ldrb	r0,[r1,#1]
882	orr	r2,r2,r3,lsl#8
883	ldrb	r3,[r1],#4
884	orr	r2,r2,r0,lsl#16
885# if 13==15
886	str	r1,[sp,#17*4]			@ make room for r1
887# endif
888	eor	r0,r11,r11,ror#5
889	orr	r2,r2,r3,lsl#24
890	eor	r0,r0,r11,ror#19	@ Sigma1(e)
891#endif
892	ldr	r3,[r14],#4			@ *K256++
893	add	r6,r6,r2			@ h+=X[i]
894	str	r2,[sp,#13*4]
895	eor	r2,r4,r5
896	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
897	and	r2,r2,r11
898	add	r6,r6,r3			@ h+=K256[i]
899	eor	r2,r2,r5			@ Ch(e,f,g)
900	eor	r0,r7,r7,ror#11
901	add	r6,r6,r2			@ h+=Ch(e,f,g)
902#if 13==31
903	and	r3,r3,#0xff
904	cmp	r3,#0xf2			@ done?
905#endif
906#if 13<15
907# if __ARM_ARCH>=7
908	ldr	r2,[r1],#4			@ prefetch
909# else
910	ldrb	r2,[r1,#3]
911# endif
912	eor	r3,r7,r8			@ a^b, b^c in next round
913#else
914	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
915	eor	r3,r7,r8			@ a^b, b^c in next round
916	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
917#endif
918	eor	r0,r0,r7,ror#20	@ Sigma0(a)
919	and	r12,r12,r3			@ (b^c)&=(a^b)
920	add	r10,r10,r6			@ d+=h
921	eor	r12,r12,r8			@ Maj(a,b,c)
922	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
923	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
924#if __ARM_ARCH>=7
925	@ ldr	r2,[r1],#4			@ 14
926# if 14==15
927	str	r1,[sp,#17*4]			@ make room for r1
928# endif
929	eor	r0,r10,r10,ror#5
930	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
931	eor	r0,r0,r10,ror#19	@ Sigma1(e)
932# ifndef __ARMEB__
933	rev	r2,r2
934# endif
935#else
936	@ ldrb	r2,[r1,#3]			@ 14
937	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
938	ldrb	r12,[r1,#2]
939	ldrb	r0,[r1,#1]
940	orr	r2,r2,r12,lsl#8
941	ldrb	r12,[r1],#4
942	orr	r2,r2,r0,lsl#16
943# if 14==15
944	str	r1,[sp,#17*4]			@ make room for r1
945# endif
946	eor	r0,r10,r10,ror#5
947	orr	r2,r2,r12,lsl#24
948	eor	r0,r0,r10,ror#19	@ Sigma1(e)
949#endif
950	ldr	r12,[r14],#4			@ *K256++
951	add	r5,r5,r2			@ h+=X[i]
952	str	r2,[sp,#14*4]
953	eor	r2,r11,r4
954	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
955	and	r2,r2,r10
956	add	r5,r5,r12			@ h+=K256[i]
957	eor	r2,r2,r4			@ Ch(e,f,g)
958	eor	r0,r6,r6,ror#11
959	add	r5,r5,r2			@ h+=Ch(e,f,g)
960#if 14==31
961	and	r12,r12,#0xff
962	cmp	r12,#0xf2			@ done?
963#endif
964#if 14<15
965# if __ARM_ARCH>=7
966	ldr	r2,[r1],#4			@ prefetch
967# else
968	ldrb	r2,[r1,#3]
969# endif
970	eor	r12,r6,r7			@ a^b, b^c in next round
971#else
972	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
973	eor	r12,r6,r7			@ a^b, b^c in next round
974	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
975#endif
976	eor	r0,r0,r6,ror#20	@ Sigma0(a)
977	and	r3,r3,r12			@ (b^c)&=(a^b)
978	add	r9,r9,r5			@ d+=h
979	eor	r3,r3,r7			@ Maj(a,b,c)
980	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
981	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
982#if __ARM_ARCH>=7
983	@ ldr	r2,[r1],#4			@ 15
984# if 15==15
985	str	r1,[sp,#17*4]			@ make room for r1
986# endif
987	eor	r0,r9,r9,ror#5
988	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
989	eor	r0,r0,r9,ror#19	@ Sigma1(e)
990# ifndef __ARMEB__
991	rev	r2,r2
992# endif
993#else
994	@ ldrb	r2,[r1,#3]			@ 15
995	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
996	ldrb	r3,[r1,#2]
997	ldrb	r0,[r1,#1]
998	orr	r2,r2,r3,lsl#8
999	ldrb	r3,[r1],#4
1000	orr	r2,r2,r0,lsl#16
1001# if 15==15
1002	str	r1,[sp,#17*4]			@ make room for r1
1003# endif
1004	eor	r0,r9,r9,ror#5
1005	orr	r2,r2,r3,lsl#24
1006	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1007#endif
1008	ldr	r3,[r14],#4			@ *K256++
1009	add	r4,r4,r2			@ h+=X[i]
1010	str	r2,[sp,#15*4]
1011	eor	r2,r10,r11
1012	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1013	and	r2,r2,r9
1014	add	r4,r4,r3			@ h+=K256[i]
1015	eor	r2,r2,r11			@ Ch(e,f,g)
1016	eor	r0,r5,r5,ror#11
1017	add	r4,r4,r2			@ h+=Ch(e,f,g)
1018#if 15==31
1019	and	r3,r3,#0xff
1020	cmp	r3,#0xf2			@ done?
1021#endif
1022#if 15<15
1023# if __ARM_ARCH>=7
1024	ldr	r2,[r1],#4			@ prefetch
1025# else
1026	ldrb	r2,[r1,#3]
1027# endif
1028	eor	r3,r5,r6			@ a^b, b^c in next round
1029#else
1030	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1031	eor	r3,r5,r6			@ a^b, b^c in next round
1032	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1033#endif
1034	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1035	and	r12,r12,r3			@ (b^c)&=(a^b)
1036	add	r8,r8,r4			@ d+=h
1037	eor	r12,r12,r6			@ Maj(a,b,c)
1038	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1039	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1040.Lrounds_16_xx:
1041	@ ldr	r2,[sp,#1*4]		@ 16
1042	@ ldr	r1,[sp,#14*4]
1043	mov	r0,r2,ror#7
1044	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1045	mov	r12,r1,ror#17
1046	eor	r0,r0,r2,ror#18
1047	eor	r12,r12,r1,ror#19
1048	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1049	ldr	r2,[sp,#0*4]
1050	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1051	ldr	r1,[sp,#9*4]
1052
1053	add	r12,r12,r0
1054	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1055	add	r2,r2,r12
1056	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1057	add	r2,r2,r1			@ X[i]
1058	ldr	r12,[r14],#4			@ *K256++
1059	add	r11,r11,r2			@ h+=X[i]
1060	str	r2,[sp,#0*4]
1061	eor	r2,r9,r10
1062	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1063	and	r2,r2,r8
1064	add	r11,r11,r12			@ h+=K256[i]
1065	eor	r2,r2,r10			@ Ch(e,f,g)
1066	eor	r0,r4,r4,ror#11
1067	add	r11,r11,r2			@ h+=Ch(e,f,g)
1068#if 16==31
1069	and	r12,r12,#0xff
1070	cmp	r12,#0xf2			@ done?
1071#endif
1072#if 16<15
1073# if __ARM_ARCH>=7
1074	ldr	r2,[r1],#4			@ prefetch
1075# else
1076	ldrb	r2,[r1,#3]
1077# endif
1078	eor	r12,r4,r5			@ a^b, b^c in next round
1079#else
1080	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1081	eor	r12,r4,r5			@ a^b, b^c in next round
1082	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1083#endif
1084	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1085	and	r3,r3,r12			@ (b^c)&=(a^b)
1086	add	r7,r7,r11			@ d+=h
1087	eor	r3,r3,r5			@ Maj(a,b,c)
1088	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1089	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1090	@ ldr	r2,[sp,#2*4]		@ 17
1091	@ ldr	r1,[sp,#15*4]
1092	mov	r0,r2,ror#7
1093	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1094	mov	r3,r1,ror#17
1095	eor	r0,r0,r2,ror#18
1096	eor	r3,r3,r1,ror#19
1097	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1098	ldr	r2,[sp,#1*4]
1099	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1100	ldr	r1,[sp,#10*4]
1101
1102	add	r3,r3,r0
1103	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1104	add	r2,r2,r3
1105	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1106	add	r2,r2,r1			@ X[i]
1107	ldr	r3,[r14],#4			@ *K256++
1108	add	r10,r10,r2			@ h+=X[i]
1109	str	r2,[sp,#1*4]
1110	eor	r2,r8,r9
1111	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1112	and	r2,r2,r7
1113	add	r10,r10,r3			@ h+=K256[i]
1114	eor	r2,r2,r9			@ Ch(e,f,g)
1115	eor	r0,r11,r11,ror#11
1116	add	r10,r10,r2			@ h+=Ch(e,f,g)
1117#if 17==31
1118	and	r3,r3,#0xff
1119	cmp	r3,#0xf2			@ done?
1120#endif
1121#if 17<15
1122# if __ARM_ARCH>=7
1123	ldr	r2,[r1],#4			@ prefetch
1124# else
1125	ldrb	r2,[r1,#3]
1126# endif
1127	eor	r3,r11,r4			@ a^b, b^c in next round
1128#else
1129	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1130	eor	r3,r11,r4			@ a^b, b^c in next round
1131	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1132#endif
1133	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1134	and	r12,r12,r3			@ (b^c)&=(a^b)
1135	add	r6,r6,r10			@ d+=h
1136	eor	r12,r12,r4			@ Maj(a,b,c)
1137	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1138	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1139	@ ldr	r2,[sp,#3*4]		@ 18
1140	@ ldr	r1,[sp,#0*4]
1141	mov	r0,r2,ror#7
1142	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1143	mov	r12,r1,ror#17
1144	eor	r0,r0,r2,ror#18
1145	eor	r12,r12,r1,ror#19
1146	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1147	ldr	r2,[sp,#2*4]
1148	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1149	ldr	r1,[sp,#11*4]
1150
1151	add	r12,r12,r0
1152	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1153	add	r2,r2,r12
1154	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1155	add	r2,r2,r1			@ X[i]
1156	ldr	r12,[r14],#4			@ *K256++
1157	add	r9,r9,r2			@ h+=X[i]
1158	str	r2,[sp,#2*4]
1159	eor	r2,r7,r8
1160	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1161	and	r2,r2,r6
1162	add	r9,r9,r12			@ h+=K256[i]
1163	eor	r2,r2,r8			@ Ch(e,f,g)
1164	eor	r0,r10,r10,ror#11
1165	add	r9,r9,r2			@ h+=Ch(e,f,g)
1166#if 18==31
1167	and	r12,r12,#0xff
1168	cmp	r12,#0xf2			@ done?
1169#endif
1170#if 18<15
1171# if __ARM_ARCH>=7
1172	ldr	r2,[r1],#4			@ prefetch
1173# else
1174	ldrb	r2,[r1,#3]
1175# endif
1176	eor	r12,r10,r11			@ a^b, b^c in next round
1177#else
1178	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1179	eor	r12,r10,r11			@ a^b, b^c in next round
1180	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1181#endif
1182	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1183	and	r3,r3,r12			@ (b^c)&=(a^b)
1184	add	r5,r5,r9			@ d+=h
1185	eor	r3,r3,r11			@ Maj(a,b,c)
1186	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1187	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1188	@ ldr	r2,[sp,#4*4]		@ 19
1189	@ ldr	r1,[sp,#1*4]
1190	mov	r0,r2,ror#7
1191	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1192	mov	r3,r1,ror#17
1193	eor	r0,r0,r2,ror#18
1194	eor	r3,r3,r1,ror#19
1195	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1196	ldr	r2,[sp,#3*4]
1197	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1198	ldr	r1,[sp,#12*4]
1199
1200	add	r3,r3,r0
1201	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1202	add	r2,r2,r3
1203	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1204	add	r2,r2,r1			@ X[i]
1205	ldr	r3,[r14],#4			@ *K256++
1206	add	r8,r8,r2			@ h+=X[i]
1207	str	r2,[sp,#3*4]
1208	eor	r2,r6,r7
1209	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1210	and	r2,r2,r5
1211	add	r8,r8,r3			@ h+=K256[i]
1212	eor	r2,r2,r7			@ Ch(e,f,g)
1213	eor	r0,r9,r9,ror#11
1214	add	r8,r8,r2			@ h+=Ch(e,f,g)
1215#if 19==31
1216	and	r3,r3,#0xff
1217	cmp	r3,#0xf2			@ done?
1218#endif
1219#if 19<15
1220# if __ARM_ARCH>=7
1221	ldr	r2,[r1],#4			@ prefetch
1222# else
1223	ldrb	r2,[r1,#3]
1224# endif
1225	eor	r3,r9,r10			@ a^b, b^c in next round
1226#else
1227	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1228	eor	r3,r9,r10			@ a^b, b^c in next round
1229	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1230#endif
1231	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1232	and	r12,r12,r3			@ (b^c)&=(a^b)
1233	add	r4,r4,r8			@ d+=h
1234	eor	r12,r12,r10			@ Maj(a,b,c)
1235	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1236	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1237	@ ldr	r2,[sp,#5*4]		@ 20
1238	@ ldr	r1,[sp,#2*4]
1239	mov	r0,r2,ror#7
1240	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1241	mov	r12,r1,ror#17
1242	eor	r0,r0,r2,ror#18
1243	eor	r12,r12,r1,ror#19
1244	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1245	ldr	r2,[sp,#4*4]
1246	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1247	ldr	r1,[sp,#13*4]
1248
1249	add	r12,r12,r0
1250	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1251	add	r2,r2,r12
1252	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1253	add	r2,r2,r1			@ X[i]
1254	ldr	r12,[r14],#4			@ *K256++
1255	add	r7,r7,r2			@ h+=X[i]
1256	str	r2,[sp,#4*4]
1257	eor	r2,r5,r6
1258	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1259	and	r2,r2,r4
1260	add	r7,r7,r12			@ h+=K256[i]
1261	eor	r2,r2,r6			@ Ch(e,f,g)
1262	eor	r0,r8,r8,ror#11
1263	add	r7,r7,r2			@ h+=Ch(e,f,g)
1264#if 20==31
1265	and	r12,r12,#0xff
1266	cmp	r12,#0xf2			@ done?
1267#endif
1268#if 20<15
1269# if __ARM_ARCH>=7
1270	ldr	r2,[r1],#4			@ prefetch
1271# else
1272	ldrb	r2,[r1,#3]
1273# endif
1274	eor	r12,r8,r9			@ a^b, b^c in next round
1275#else
1276	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1277	eor	r12,r8,r9			@ a^b, b^c in next round
1278	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1279#endif
1280	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1281	and	r3,r3,r12			@ (b^c)&=(a^b)
1282	add	r11,r11,r7			@ d+=h
1283	eor	r3,r3,r9			@ Maj(a,b,c)
1284	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1285	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1286	@ ldr	r2,[sp,#6*4]		@ 21
1287	@ ldr	r1,[sp,#3*4]
1288	mov	r0,r2,ror#7
1289	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1290	mov	r3,r1,ror#17
1291	eor	r0,r0,r2,ror#18
1292	eor	r3,r3,r1,ror#19
1293	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1294	ldr	r2,[sp,#5*4]
1295	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1296	ldr	r1,[sp,#14*4]
1297
1298	add	r3,r3,r0
1299	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1300	add	r2,r2,r3
1301	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1302	add	r2,r2,r1			@ X[i]
1303	ldr	r3,[r14],#4			@ *K256++
1304	add	r6,r6,r2			@ h+=X[i]
1305	str	r2,[sp,#5*4]
1306	eor	r2,r4,r5
1307	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1308	and	r2,r2,r11
1309	add	r6,r6,r3			@ h+=K256[i]
1310	eor	r2,r2,r5			@ Ch(e,f,g)
1311	eor	r0,r7,r7,ror#11
1312	add	r6,r6,r2			@ h+=Ch(e,f,g)
1313#if 21==31
1314	and	r3,r3,#0xff
1315	cmp	r3,#0xf2			@ done?
1316#endif
1317#if 21<15
1318# if __ARM_ARCH>=7
1319	ldr	r2,[r1],#4			@ prefetch
1320# else
1321	ldrb	r2,[r1,#3]
1322# endif
1323	eor	r3,r7,r8			@ a^b, b^c in next round
1324#else
1325	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1326	eor	r3,r7,r8			@ a^b, b^c in next round
1327	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1328#endif
1329	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1330	and	r12,r12,r3			@ (b^c)&=(a^b)
1331	add	r10,r10,r6			@ d+=h
1332	eor	r12,r12,r8			@ Maj(a,b,c)
1333	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1334	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1335	@ ldr	r2,[sp,#7*4]		@ 22
1336	@ ldr	r1,[sp,#4*4]
1337	mov	r0,r2,ror#7
1338	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1339	mov	r12,r1,ror#17
1340	eor	r0,r0,r2,ror#18
1341	eor	r12,r12,r1,ror#19
1342	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1343	ldr	r2,[sp,#6*4]
1344	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1345	ldr	r1,[sp,#15*4]
1346
1347	add	r12,r12,r0
1348	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1349	add	r2,r2,r12
1350	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1351	add	r2,r2,r1			@ X[i]
1352	ldr	r12,[r14],#4			@ *K256++
1353	add	r5,r5,r2			@ h+=X[i]
1354	str	r2,[sp,#6*4]
1355	eor	r2,r11,r4
1356	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1357	and	r2,r2,r10
1358	add	r5,r5,r12			@ h+=K256[i]
1359	eor	r2,r2,r4			@ Ch(e,f,g)
1360	eor	r0,r6,r6,ror#11
1361	add	r5,r5,r2			@ h+=Ch(e,f,g)
1362#if 22==31
1363	and	r12,r12,#0xff
1364	cmp	r12,#0xf2			@ done?
1365#endif
1366#if 22<15
1367# if __ARM_ARCH>=7
1368	ldr	r2,[r1],#4			@ prefetch
1369# else
1370	ldrb	r2,[r1,#3]
1371# endif
1372	eor	r12,r6,r7			@ a^b, b^c in next round
1373#else
1374	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1375	eor	r12,r6,r7			@ a^b, b^c in next round
1376	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1377#endif
1378	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1379	and	r3,r3,r12			@ (b^c)&=(a^b)
1380	add	r9,r9,r5			@ d+=h
1381	eor	r3,r3,r7			@ Maj(a,b,c)
1382	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1383	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1384	@ ldr	r2,[sp,#8*4]		@ 23
1385	@ ldr	r1,[sp,#5*4]
1386	mov	r0,r2,ror#7
1387	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1388	mov	r3,r1,ror#17
1389	eor	r0,r0,r2,ror#18
1390	eor	r3,r3,r1,ror#19
1391	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1392	ldr	r2,[sp,#7*4]
1393	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1394	ldr	r1,[sp,#0*4]
1395
1396	add	r3,r3,r0
1397	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1398	add	r2,r2,r3
1399	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1400	add	r2,r2,r1			@ X[i]
1401	ldr	r3,[r14],#4			@ *K256++
1402	add	r4,r4,r2			@ h+=X[i]
1403	str	r2,[sp,#7*4]
1404	eor	r2,r10,r11
1405	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1406	and	r2,r2,r9
1407	add	r4,r4,r3			@ h+=K256[i]
1408	eor	r2,r2,r11			@ Ch(e,f,g)
1409	eor	r0,r5,r5,ror#11
1410	add	r4,r4,r2			@ h+=Ch(e,f,g)
1411#if 23==31
1412	and	r3,r3,#0xff
1413	cmp	r3,#0xf2			@ done?
1414#endif
1415#if 23<15
1416# if __ARM_ARCH>=7
1417	ldr	r2,[r1],#4			@ prefetch
1418# else
1419	ldrb	r2,[r1,#3]
1420# endif
1421	eor	r3,r5,r6			@ a^b, b^c in next round
1422#else
1423	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1424	eor	r3,r5,r6			@ a^b, b^c in next round
1425	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1426#endif
1427	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1428	and	r12,r12,r3			@ (b^c)&=(a^b)
1429	add	r8,r8,r4			@ d+=h
1430	eor	r12,r12,r6			@ Maj(a,b,c)
1431	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1432	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1433	@ ldr	r2,[sp,#9*4]		@ 24
1434	@ ldr	r1,[sp,#6*4]
1435	mov	r0,r2,ror#7
1436	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1437	mov	r12,r1,ror#17
1438	eor	r0,r0,r2,ror#18
1439	eor	r12,r12,r1,ror#19
1440	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1441	ldr	r2,[sp,#8*4]
1442	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1443	ldr	r1,[sp,#1*4]
1444
1445	add	r12,r12,r0
1446	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1447	add	r2,r2,r12
1448	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1449	add	r2,r2,r1			@ X[i]
1450	ldr	r12,[r14],#4			@ *K256++
1451	add	r11,r11,r2			@ h+=X[i]
1452	str	r2,[sp,#8*4]
1453	eor	r2,r9,r10
1454	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1455	and	r2,r2,r8
1456	add	r11,r11,r12			@ h+=K256[i]
1457	eor	r2,r2,r10			@ Ch(e,f,g)
1458	eor	r0,r4,r4,ror#11
1459	add	r11,r11,r2			@ h+=Ch(e,f,g)
1460#if 24==31
1461	and	r12,r12,#0xff
1462	cmp	r12,#0xf2			@ done?
1463#endif
1464#if 24<15
1465# if __ARM_ARCH>=7
1466	ldr	r2,[r1],#4			@ prefetch
1467# else
1468	ldrb	r2,[r1,#3]
1469# endif
1470	eor	r12,r4,r5			@ a^b, b^c in next round
1471#else
1472	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1473	eor	r12,r4,r5			@ a^b, b^c in next round
1474	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1475#endif
1476	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1477	and	r3,r3,r12			@ (b^c)&=(a^b)
1478	add	r7,r7,r11			@ d+=h
1479	eor	r3,r3,r5			@ Maj(a,b,c)
1480	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1481	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1482	@ ldr	r2,[sp,#10*4]		@ 25
1483	@ ldr	r1,[sp,#7*4]
1484	mov	r0,r2,ror#7
1485	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1486	mov	r3,r1,ror#17
1487	eor	r0,r0,r2,ror#18
1488	eor	r3,r3,r1,ror#19
1489	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1490	ldr	r2,[sp,#9*4]
1491	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1492	ldr	r1,[sp,#2*4]
1493
1494	add	r3,r3,r0
1495	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1496	add	r2,r2,r3
1497	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1498	add	r2,r2,r1			@ X[i]
1499	ldr	r3,[r14],#4			@ *K256++
1500	add	r10,r10,r2			@ h+=X[i]
1501	str	r2,[sp,#9*4]
1502	eor	r2,r8,r9
1503	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1504	and	r2,r2,r7
1505	add	r10,r10,r3			@ h+=K256[i]
1506	eor	r2,r2,r9			@ Ch(e,f,g)
1507	eor	r0,r11,r11,ror#11
1508	add	r10,r10,r2			@ h+=Ch(e,f,g)
1509#if 25==31
1510	and	r3,r3,#0xff
1511	cmp	r3,#0xf2			@ done?
1512#endif
1513#if 25<15
1514# if __ARM_ARCH>=7
1515	ldr	r2,[r1],#4			@ prefetch
1516# else
1517	ldrb	r2,[r1,#3]
1518# endif
1519	eor	r3,r11,r4			@ a^b, b^c in next round
1520#else
1521	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1522	eor	r3,r11,r4			@ a^b, b^c in next round
1523	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1524#endif
1525	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1526	and	r12,r12,r3			@ (b^c)&=(a^b)
1527	add	r6,r6,r10			@ d+=h
1528	eor	r12,r12,r4			@ Maj(a,b,c)
1529	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1530	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1531	@ ldr	r2,[sp,#11*4]		@ 26
1532	@ ldr	r1,[sp,#8*4]
1533	mov	r0,r2,ror#7
1534	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1535	mov	r12,r1,ror#17
1536	eor	r0,r0,r2,ror#18
1537	eor	r12,r12,r1,ror#19
1538	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1539	ldr	r2,[sp,#10*4]
1540	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1541	ldr	r1,[sp,#3*4]
1542
1543	add	r12,r12,r0
1544	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1545	add	r2,r2,r12
1546	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1547	add	r2,r2,r1			@ X[i]
1548	ldr	r12,[r14],#4			@ *K256++
1549	add	r9,r9,r2			@ h+=X[i]
1550	str	r2,[sp,#10*4]
1551	eor	r2,r7,r8
1552	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1553	and	r2,r2,r6
1554	add	r9,r9,r12			@ h+=K256[i]
1555	eor	r2,r2,r8			@ Ch(e,f,g)
1556	eor	r0,r10,r10,ror#11
1557	add	r9,r9,r2			@ h+=Ch(e,f,g)
1558#if 26==31
1559	and	r12,r12,#0xff
1560	cmp	r12,#0xf2			@ done?
1561#endif
1562#if 26<15
1563# if __ARM_ARCH>=7
1564	ldr	r2,[r1],#4			@ prefetch
1565# else
1566	ldrb	r2,[r1,#3]
1567# endif
1568	eor	r12,r10,r11			@ a^b, b^c in next round
1569#else
1570	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1571	eor	r12,r10,r11			@ a^b, b^c in next round
1572	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1573#endif
1574	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1575	and	r3,r3,r12			@ (b^c)&=(a^b)
1576	add	r5,r5,r9			@ d+=h
1577	eor	r3,r3,r11			@ Maj(a,b,c)
1578	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1579	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1580	@ ldr	r2,[sp,#12*4]		@ 27
1581	@ ldr	r1,[sp,#9*4]
1582	mov	r0,r2,ror#7
1583	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1584	mov	r3,r1,ror#17
1585	eor	r0,r0,r2,ror#18
1586	eor	r3,r3,r1,ror#19
1587	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1588	ldr	r2,[sp,#11*4]
1589	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1590	ldr	r1,[sp,#4*4]
1591
1592	add	r3,r3,r0
1593	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1594	add	r2,r2,r3
1595	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1596	add	r2,r2,r1			@ X[i]
1597	ldr	r3,[r14],#4			@ *K256++
1598	add	r8,r8,r2			@ h+=X[i]
1599	str	r2,[sp,#11*4]
1600	eor	r2,r6,r7
1601	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1602	and	r2,r2,r5
1603	add	r8,r8,r3			@ h+=K256[i]
1604	eor	r2,r2,r7			@ Ch(e,f,g)
1605	eor	r0,r9,r9,ror#11
1606	add	r8,r8,r2			@ h+=Ch(e,f,g)
1607#if 27==31
1608	and	r3,r3,#0xff
1609	cmp	r3,#0xf2			@ done?
1610#endif
1611#if 27<15
1612# if __ARM_ARCH>=7
1613	ldr	r2,[r1],#4			@ prefetch
1614# else
1615	ldrb	r2,[r1,#3]
1616# endif
1617	eor	r3,r9,r10			@ a^b, b^c in next round
1618#else
1619	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1620	eor	r3,r9,r10			@ a^b, b^c in next round
1621	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1622#endif
1623	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1624	and	r12,r12,r3			@ (b^c)&=(a^b)
1625	add	r4,r4,r8			@ d+=h
1626	eor	r12,r12,r10			@ Maj(a,b,c)
1627	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1628	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1629	@ ldr	r2,[sp,#13*4]		@ 28
1630	@ ldr	r1,[sp,#10*4]
1631	mov	r0,r2,ror#7
1632	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1633	mov	r12,r1,ror#17
1634	eor	r0,r0,r2,ror#18
1635	eor	r12,r12,r1,ror#19
1636	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1637	ldr	r2,[sp,#12*4]
1638	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1639	ldr	r1,[sp,#5*4]
1640
1641	add	r12,r12,r0
1642	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1643	add	r2,r2,r12
1644	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1645	add	r2,r2,r1			@ X[i]
1646	ldr	r12,[r14],#4			@ *K256++
1647	add	r7,r7,r2			@ h+=X[i]
1648	str	r2,[sp,#12*4]
1649	eor	r2,r5,r6
1650	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1651	and	r2,r2,r4
1652	add	r7,r7,r12			@ h+=K256[i]
1653	eor	r2,r2,r6			@ Ch(e,f,g)
1654	eor	r0,r8,r8,ror#11
1655	add	r7,r7,r2			@ h+=Ch(e,f,g)
1656#if 28==31
1657	and	r12,r12,#0xff
1658	cmp	r12,#0xf2			@ done?
1659#endif
1660#if 28<15
1661# if __ARM_ARCH>=7
1662	ldr	r2,[r1],#4			@ prefetch
1663# else
1664	ldrb	r2,[r1,#3]
1665# endif
1666	eor	r12,r8,r9			@ a^b, b^c in next round
1667#else
1668	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1669	eor	r12,r8,r9			@ a^b, b^c in next round
1670	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1671#endif
1672	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1673	and	r3,r3,r12			@ (b^c)&=(a^b)
1674	add	r11,r11,r7			@ d+=h
1675	eor	r3,r3,r9			@ Maj(a,b,c)
1676	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1677	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1678	@ ldr	r2,[sp,#14*4]		@ 29
1679	@ ldr	r1,[sp,#11*4]
1680	mov	r0,r2,ror#7
1681	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1682	mov	r3,r1,ror#17
1683	eor	r0,r0,r2,ror#18
1684	eor	r3,r3,r1,ror#19
1685	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1686	ldr	r2,[sp,#13*4]
1687	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1688	ldr	r1,[sp,#6*4]
1689
1690	add	r3,r3,r0
1691	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1692	add	r2,r2,r3
1693	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1694	add	r2,r2,r1			@ X[i]
1695	ldr	r3,[r14],#4			@ *K256++
1696	add	r6,r6,r2			@ h+=X[i]
1697	str	r2,[sp,#13*4]
1698	eor	r2,r4,r5
1699	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1700	and	r2,r2,r11
1701	add	r6,r6,r3			@ h+=K256[i]
1702	eor	r2,r2,r5			@ Ch(e,f,g)
1703	eor	r0,r7,r7,ror#11
1704	add	r6,r6,r2			@ h+=Ch(e,f,g)
1705#if 29==31
1706	and	r3,r3,#0xff
1707	cmp	r3,#0xf2			@ done?
1708#endif
1709#if 29<15
1710# if __ARM_ARCH>=7
1711	ldr	r2,[r1],#4			@ prefetch
1712# else
1713	ldrb	r2,[r1,#3]
1714# endif
1715	eor	r3,r7,r8			@ a^b, b^c in next round
1716#else
1717	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1718	eor	r3,r7,r8			@ a^b, b^c in next round
1719	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1720#endif
1721	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1722	and	r12,r12,r3			@ (b^c)&=(a^b)
1723	add	r10,r10,r6			@ d+=h
1724	eor	r12,r12,r8			@ Maj(a,b,c)
1725	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1726	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1727	@ ldr	r2,[sp,#15*4]		@ 30
1728	@ ldr	r1,[sp,#12*4]
1729	mov	r0,r2,ror#7
1730	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1731	mov	r12,r1,ror#17
1732	eor	r0,r0,r2,ror#18
1733	eor	r12,r12,r1,ror#19
1734	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1735	ldr	r2,[sp,#14*4]
1736	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1737	ldr	r1,[sp,#7*4]
1738
1739	add	r12,r12,r0
1740	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1741	add	r2,r2,r12
1742	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1743	add	r2,r2,r1			@ X[i]
1744	ldr	r12,[r14],#4			@ *K256++
1745	add	r5,r5,r2			@ h+=X[i]
1746	str	r2,[sp,#14*4]
1747	eor	r2,r11,r4
1748	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1749	and	r2,r2,r10
1750	add	r5,r5,r12			@ h+=K256[i]
1751	eor	r2,r2,r4			@ Ch(e,f,g)
1752	eor	r0,r6,r6,ror#11
1753	add	r5,r5,r2			@ h+=Ch(e,f,g)
1754#if 30==31
1755	and	r12,r12,#0xff
1756	cmp	r12,#0xf2			@ done?
1757#endif
1758#if 30<15
1759# if __ARM_ARCH>=7
1760	ldr	r2,[r1],#4			@ prefetch
1761# else
1762	ldrb	r2,[r1,#3]
1763# endif
1764	eor	r12,r6,r7			@ a^b, b^c in next round
1765#else
1766	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1767	eor	r12,r6,r7			@ a^b, b^c in next round
1768	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1769#endif
1770	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1771	and	r3,r3,r12			@ (b^c)&=(a^b)
1772	add	r9,r9,r5			@ d+=h
1773	eor	r3,r3,r7			@ Maj(a,b,c)
1774	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1775	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1776	@ ldr	r2,[sp,#0*4]		@ 31
1777	@ ldr	r1,[sp,#13*4]
1778	mov	r0,r2,ror#7
1779	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1780	mov	r3,r1,ror#17
1781	eor	r0,r0,r2,ror#18
1782	eor	r3,r3,r1,ror#19
1783	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1784	ldr	r2,[sp,#15*4]
1785	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1786	ldr	r1,[sp,#8*4]
1787
1788	add	r3,r3,r0
1789	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1790	add	r2,r2,r3
1791	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1792	add	r2,r2,r1			@ X[i]
1793	ldr	r3,[r14],#4			@ *K256++
1794	add	r4,r4,r2			@ h+=X[i]
1795	str	r2,[sp,#15*4]
1796	eor	r2,r10,r11
1797	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1798	and	r2,r2,r9
1799	add	r4,r4,r3			@ h+=K256[i]
1800	eor	r2,r2,r11			@ Ch(e,f,g)
1801	eor	r0,r5,r5,ror#11
1802	add	r4,r4,r2			@ h+=Ch(e,f,g)
1803#if 31==31
1804	and	r3,r3,#0xff
1805	cmp	r3,#0xf2			@ done?
1806#endif
1807#if 31<15
1808# if __ARM_ARCH>=7
1809	ldr	r2,[r1],#4			@ prefetch
1810# else
1811	ldrb	r2,[r1,#3]
1812# endif
1813	eor	r3,r5,r6			@ a^b, b^c in next round
1814#else
1815	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1816	eor	r3,r5,r6			@ a^b, b^c in next round
1817	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1818#endif
1819	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1820	and	r12,r12,r3			@ (b^c)&=(a^b)
1821	add	r8,r8,r4			@ d+=h
1822	eor	r12,r12,r6			@ Maj(a,b,c)
1823	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1824	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1825#if __ARM_ARCH>=7
1826	ite	eq			@ Thumb2 thing, sanity check in ARM
1827#endif
1828	ldreq	r3,[sp,#16*4]		@ pull ctx
1829	bne	.Lrounds_16_xx
1830
1831	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1832	ldr	r0,[r3,#0]
1833	ldr	r2,[r3,#4]
1834	ldr	r12,[r3,#8]
1835	add	r4,r4,r0
1836	ldr	r0,[r3,#12]
1837	add	r5,r5,r2
1838	ldr	r2,[r3,#16]
1839	add	r6,r6,r12
1840	ldr	r12,[r3,#20]
1841	add	r7,r7,r0
1842	ldr	r0,[r3,#24]
1843	add	r8,r8,r2
1844	ldr	r2,[r3,#28]
1845	add	r9,r9,r12
1846	ldr	r1,[sp,#17*4]		@ pull inp
1847	ldr	r12,[sp,#18*4]		@ pull inp+len
1848	add	r10,r10,r0
1849	add	r11,r11,r2
1850	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1851	cmp	r1,r12
1852	sub	r14,r14,#256	@ rewind Ktbl
1853	bne	.Loop
1854
1855	add	sp,sp,#19*4	@ destroy frame
1856#if __ARM_ARCH>=5
1857	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1858#else
1859	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1860	tst	lr,#1
1861	moveq	pc,lr			@ be binary compatible with V4, yet
1862.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1863#endif
1864.size	sha256_block_data_order_nohw,.-sha256_block_data_order_nohw
1865#if __ARM_MAX_ARCH__>=7
1866.arch	armv7-a
1867.fpu	neon
1868
1869.LK256_shortcut_neon:
1870@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
1871#if defined(__thumb2__)
1872.word	K256-(.LK256_add_neon+4)
1873#else
1874.word	K256-(.LK256_add_neon+8)
1875#endif
1876
1877.globl	sha256_block_data_order_neon
1878.hidden	sha256_block_data_order_neon
1879.type	sha256_block_data_order_neon,%function
1880.align	5
1881.skip	16
1882sha256_block_data_order_neon:
1883	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1884
1885	sub	r11,sp,#16*4+16
1886
1887	@ K256 is just at the boundary of being easily referenced by an ADR from
1888	@ this function. In Arm mode, when building with __ARM_ARCH=6, it does
1889	@ not fit. By moving code around, we could make it fit, but this is too
1890	@ fragile. For simplicity, just load the offset from
1891	@ .LK256_shortcut_neon.
1892	@
1893	@ TODO(davidben): adrl would avoid a load, but clang-assembler does not
1894	@ support it. We might be able to emulate it with a macro, but Android's
1895	@ did not work when I tried it.
1896	@ https://android.googlesource.com/platform/ndk/+/refs/heads/master/docs/ClangMigration.md#arm
1897	ldr	r14,.LK256_shortcut_neon
1898.LK256_add_neon:
1899	add	r14,pc,r14
1900
1901	bic	r11,r11,#15		@ align for 128-bit stores
1902	mov	r12,sp
1903	mov	sp,r11			@ alloca
1904	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1905
1906	vld1.8	{q0},[r1]!
1907	vld1.8	{q1},[r1]!
1908	vld1.8	{q2},[r1]!
1909	vld1.8	{q3},[r1]!
1910	vld1.32	{q8},[r14,:128]!
1911	vld1.32	{q9},[r14,:128]!
1912	vld1.32	{q10},[r14,:128]!
1913	vld1.32	{q11},[r14,:128]!
1914	vrev32.8	q0,q0		@ yes, even on
1915	str	r0,[sp,#64]
1916	vrev32.8	q1,q1		@ big-endian
1917	str	r1,[sp,#68]
1918	mov	r1,sp
1919	vrev32.8	q2,q2
1920	str	r2,[sp,#72]
1921	vrev32.8	q3,q3
1922	str	r12,[sp,#76]		@ save original sp
1923	vadd.i32	q8,q8,q0
1924	vadd.i32	q9,q9,q1
1925	vst1.32	{q8},[r1,:128]!
1926	vadd.i32	q10,q10,q2
1927	vst1.32	{q9},[r1,:128]!
1928	vadd.i32	q11,q11,q3
1929	vst1.32	{q10},[r1,:128]!
1930	vst1.32	{q11},[r1,:128]!
1931
1932	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1933	sub	r1,r1,#64
1934	ldr	r2,[sp,#0]
1935	eor	r12,r12,r12
1936	eor	r3,r5,r6
1937	b	.L_00_48
1938
1939.align	4
1940.L_00_48:
1941	vext.8	q8,q0,q1,#4
1942	add	r11,r11,r2
1943	eor	r2,r9,r10
1944	eor	r0,r8,r8,ror#5
1945	vext.8	q9,q2,q3,#4
1946	add	r4,r4,r12
1947	and	r2,r2,r8
1948	eor	r12,r0,r8,ror#19
1949	vshr.u32	q10,q8,#7
1950	eor	r0,r4,r4,ror#11
1951	eor	r2,r2,r10
1952	vadd.i32	q0,q0,q9
1953	add	r11,r11,r12,ror#6
1954	eor	r12,r4,r5
1955	vshr.u32	q9,q8,#3
1956	eor	r0,r0,r4,ror#20
1957	add	r11,r11,r2
1958	vsli.32	q10,q8,#25
1959	ldr	r2,[sp,#4]
1960	and	r3,r3,r12
1961	vshr.u32	q11,q8,#18
1962	add	r7,r7,r11
1963	add	r11,r11,r0,ror#2
1964	eor	r3,r3,r5
1965	veor	q9,q9,q10
1966	add	r10,r10,r2
1967	vsli.32	q11,q8,#14
1968	eor	r2,r8,r9
1969	eor	r0,r7,r7,ror#5
1970	vshr.u32	d24,d7,#17
1971	add	r11,r11,r3
1972	and	r2,r2,r7
1973	veor	q9,q9,q11
1974	eor	r3,r0,r7,ror#19
1975	eor	r0,r11,r11,ror#11
1976	vsli.32	d24,d7,#15
1977	eor	r2,r2,r9
1978	add	r10,r10,r3,ror#6
1979	vshr.u32	d25,d7,#10
1980	eor	r3,r11,r4
1981	eor	r0,r0,r11,ror#20
1982	vadd.i32	q0,q0,q9
1983	add	r10,r10,r2
1984	ldr	r2,[sp,#8]
1985	veor	d25,d25,d24
1986	and	r12,r12,r3
1987	add	r6,r6,r10
1988	vshr.u32	d24,d7,#19
1989	add	r10,r10,r0,ror#2
1990	eor	r12,r12,r4
1991	vsli.32	d24,d7,#13
1992	add	r9,r9,r2
1993	eor	r2,r7,r8
1994	veor	d25,d25,d24
1995	eor	r0,r6,r6,ror#5
1996	add	r10,r10,r12
1997	vadd.i32	d0,d0,d25
1998	and	r2,r2,r6
1999	eor	r12,r0,r6,ror#19
2000	vshr.u32	d24,d0,#17
2001	eor	r0,r10,r10,ror#11
2002	eor	r2,r2,r8
2003	vsli.32	d24,d0,#15
2004	add	r9,r9,r12,ror#6
2005	eor	r12,r10,r11
2006	vshr.u32	d25,d0,#10
2007	eor	r0,r0,r10,ror#20
2008	add	r9,r9,r2
2009	veor	d25,d25,d24
2010	ldr	r2,[sp,#12]
2011	and	r3,r3,r12
2012	vshr.u32	d24,d0,#19
2013	add	r5,r5,r9
2014	add	r9,r9,r0,ror#2
2015	eor	r3,r3,r11
2016	vld1.32	{q8},[r14,:128]!
2017	add	r8,r8,r2
2018	vsli.32	d24,d0,#13
2019	eor	r2,r6,r7
2020	eor	r0,r5,r5,ror#5
2021	veor	d25,d25,d24
2022	add	r9,r9,r3
2023	and	r2,r2,r5
2024	vadd.i32	d1,d1,d25
2025	eor	r3,r0,r5,ror#19
2026	eor	r0,r9,r9,ror#11
2027	vadd.i32	q8,q8,q0
2028	eor	r2,r2,r7
2029	add	r8,r8,r3,ror#6
2030	eor	r3,r9,r10
2031	eor	r0,r0,r9,ror#20
2032	add	r8,r8,r2
2033	ldr	r2,[sp,#16]
2034	and	r12,r12,r3
2035	add	r4,r4,r8
2036	vst1.32	{q8},[r1,:128]!
2037	add	r8,r8,r0,ror#2
2038	eor	r12,r12,r10
2039	vext.8	q8,q1,q2,#4
2040	add	r7,r7,r2
2041	eor	r2,r5,r6
2042	eor	r0,r4,r4,ror#5
2043	vext.8	q9,q3,q0,#4
2044	add	r8,r8,r12
2045	and	r2,r2,r4
2046	eor	r12,r0,r4,ror#19
2047	vshr.u32	q10,q8,#7
2048	eor	r0,r8,r8,ror#11
2049	eor	r2,r2,r6
2050	vadd.i32	q1,q1,q9
2051	add	r7,r7,r12,ror#6
2052	eor	r12,r8,r9
2053	vshr.u32	q9,q8,#3
2054	eor	r0,r0,r8,ror#20
2055	add	r7,r7,r2
2056	vsli.32	q10,q8,#25
2057	ldr	r2,[sp,#20]
2058	and	r3,r3,r12
2059	vshr.u32	q11,q8,#18
2060	add	r11,r11,r7
2061	add	r7,r7,r0,ror#2
2062	eor	r3,r3,r9
2063	veor	q9,q9,q10
2064	add	r6,r6,r2
2065	vsli.32	q11,q8,#14
2066	eor	r2,r4,r5
2067	eor	r0,r11,r11,ror#5
2068	vshr.u32	d24,d1,#17
2069	add	r7,r7,r3
2070	and	r2,r2,r11
2071	veor	q9,q9,q11
2072	eor	r3,r0,r11,ror#19
2073	eor	r0,r7,r7,ror#11
2074	vsli.32	d24,d1,#15
2075	eor	r2,r2,r5
2076	add	r6,r6,r3,ror#6
2077	vshr.u32	d25,d1,#10
2078	eor	r3,r7,r8
2079	eor	r0,r0,r7,ror#20
2080	vadd.i32	q1,q1,q9
2081	add	r6,r6,r2
2082	ldr	r2,[sp,#24]
2083	veor	d25,d25,d24
2084	and	r12,r12,r3
2085	add	r10,r10,r6
2086	vshr.u32	d24,d1,#19
2087	add	r6,r6,r0,ror#2
2088	eor	r12,r12,r8
2089	vsli.32	d24,d1,#13
2090	add	r5,r5,r2
2091	eor	r2,r11,r4
2092	veor	d25,d25,d24
2093	eor	r0,r10,r10,ror#5
2094	add	r6,r6,r12
2095	vadd.i32	d2,d2,d25
2096	and	r2,r2,r10
2097	eor	r12,r0,r10,ror#19
2098	vshr.u32	d24,d2,#17
2099	eor	r0,r6,r6,ror#11
2100	eor	r2,r2,r4
2101	vsli.32	d24,d2,#15
2102	add	r5,r5,r12,ror#6
2103	eor	r12,r6,r7
2104	vshr.u32	d25,d2,#10
2105	eor	r0,r0,r6,ror#20
2106	add	r5,r5,r2
2107	veor	d25,d25,d24
2108	ldr	r2,[sp,#28]
2109	and	r3,r3,r12
2110	vshr.u32	d24,d2,#19
2111	add	r9,r9,r5
2112	add	r5,r5,r0,ror#2
2113	eor	r3,r3,r7
2114	vld1.32	{q8},[r14,:128]!
2115	add	r4,r4,r2
2116	vsli.32	d24,d2,#13
2117	eor	r2,r10,r11
2118	eor	r0,r9,r9,ror#5
2119	veor	d25,d25,d24
2120	add	r5,r5,r3
2121	and	r2,r2,r9
2122	vadd.i32	d3,d3,d25
2123	eor	r3,r0,r9,ror#19
2124	eor	r0,r5,r5,ror#11
2125	vadd.i32	q8,q8,q1
2126	eor	r2,r2,r11
2127	add	r4,r4,r3,ror#6
2128	eor	r3,r5,r6
2129	eor	r0,r0,r5,ror#20
2130	add	r4,r4,r2
2131	ldr	r2,[sp,#32]
2132	and	r12,r12,r3
2133	add	r8,r8,r4
2134	vst1.32	{q8},[r1,:128]!
2135	add	r4,r4,r0,ror#2
2136	eor	r12,r12,r6
2137	vext.8	q8,q2,q3,#4
2138	add	r11,r11,r2
2139	eor	r2,r9,r10
2140	eor	r0,r8,r8,ror#5
2141	vext.8	q9,q0,q1,#4
2142	add	r4,r4,r12
2143	and	r2,r2,r8
2144	eor	r12,r0,r8,ror#19
2145	vshr.u32	q10,q8,#7
2146	eor	r0,r4,r4,ror#11
2147	eor	r2,r2,r10
2148	vadd.i32	q2,q2,q9
2149	add	r11,r11,r12,ror#6
2150	eor	r12,r4,r5
2151	vshr.u32	q9,q8,#3
2152	eor	r0,r0,r4,ror#20
2153	add	r11,r11,r2
2154	vsli.32	q10,q8,#25
2155	ldr	r2,[sp,#36]
2156	and	r3,r3,r12
2157	vshr.u32	q11,q8,#18
2158	add	r7,r7,r11
2159	add	r11,r11,r0,ror#2
2160	eor	r3,r3,r5
2161	veor	q9,q9,q10
2162	add	r10,r10,r2
2163	vsli.32	q11,q8,#14
2164	eor	r2,r8,r9
2165	eor	r0,r7,r7,ror#5
2166	vshr.u32	d24,d3,#17
2167	add	r11,r11,r3
2168	and	r2,r2,r7
2169	veor	q9,q9,q11
2170	eor	r3,r0,r7,ror#19
2171	eor	r0,r11,r11,ror#11
2172	vsli.32	d24,d3,#15
2173	eor	r2,r2,r9
2174	add	r10,r10,r3,ror#6
2175	vshr.u32	d25,d3,#10
2176	eor	r3,r11,r4
2177	eor	r0,r0,r11,ror#20
2178	vadd.i32	q2,q2,q9
2179	add	r10,r10,r2
2180	ldr	r2,[sp,#40]
2181	veor	d25,d25,d24
2182	and	r12,r12,r3
2183	add	r6,r6,r10
2184	vshr.u32	d24,d3,#19
2185	add	r10,r10,r0,ror#2
2186	eor	r12,r12,r4
2187	vsli.32	d24,d3,#13
2188	add	r9,r9,r2
2189	eor	r2,r7,r8
2190	veor	d25,d25,d24
2191	eor	r0,r6,r6,ror#5
2192	add	r10,r10,r12
2193	vadd.i32	d4,d4,d25
2194	and	r2,r2,r6
2195	eor	r12,r0,r6,ror#19
2196	vshr.u32	d24,d4,#17
2197	eor	r0,r10,r10,ror#11
2198	eor	r2,r2,r8
2199	vsli.32	d24,d4,#15
2200	add	r9,r9,r12,ror#6
2201	eor	r12,r10,r11
2202	vshr.u32	d25,d4,#10
2203	eor	r0,r0,r10,ror#20
2204	add	r9,r9,r2
2205	veor	d25,d25,d24
2206	ldr	r2,[sp,#44]
2207	and	r3,r3,r12
2208	vshr.u32	d24,d4,#19
2209	add	r5,r5,r9
2210	add	r9,r9,r0,ror#2
2211	eor	r3,r3,r11
2212	vld1.32	{q8},[r14,:128]!
2213	add	r8,r8,r2
2214	vsli.32	d24,d4,#13
2215	eor	r2,r6,r7
2216	eor	r0,r5,r5,ror#5
2217	veor	d25,d25,d24
2218	add	r9,r9,r3
2219	and	r2,r2,r5
2220	vadd.i32	d5,d5,d25
2221	eor	r3,r0,r5,ror#19
2222	eor	r0,r9,r9,ror#11
2223	vadd.i32	q8,q8,q2
2224	eor	r2,r2,r7
2225	add	r8,r8,r3,ror#6
2226	eor	r3,r9,r10
2227	eor	r0,r0,r9,ror#20
2228	add	r8,r8,r2
2229	ldr	r2,[sp,#48]
2230	and	r12,r12,r3
2231	add	r4,r4,r8
2232	vst1.32	{q8},[r1,:128]!
2233	add	r8,r8,r0,ror#2
2234	eor	r12,r12,r10
2235	vext.8	q8,q3,q0,#4
2236	add	r7,r7,r2
2237	eor	r2,r5,r6
2238	eor	r0,r4,r4,ror#5
2239	vext.8	q9,q1,q2,#4
2240	add	r8,r8,r12
2241	and	r2,r2,r4
2242	eor	r12,r0,r4,ror#19
2243	vshr.u32	q10,q8,#7
2244	eor	r0,r8,r8,ror#11
2245	eor	r2,r2,r6
2246	vadd.i32	q3,q3,q9
2247	add	r7,r7,r12,ror#6
2248	eor	r12,r8,r9
2249	vshr.u32	q9,q8,#3
2250	eor	r0,r0,r8,ror#20
2251	add	r7,r7,r2
2252	vsli.32	q10,q8,#25
2253	ldr	r2,[sp,#52]
2254	and	r3,r3,r12
2255	vshr.u32	q11,q8,#18
2256	add	r11,r11,r7
2257	add	r7,r7,r0,ror#2
2258	eor	r3,r3,r9
2259	veor	q9,q9,q10
2260	add	r6,r6,r2
2261	vsli.32	q11,q8,#14
2262	eor	r2,r4,r5
2263	eor	r0,r11,r11,ror#5
2264	vshr.u32	d24,d5,#17
2265	add	r7,r7,r3
2266	and	r2,r2,r11
2267	veor	q9,q9,q11
2268	eor	r3,r0,r11,ror#19
2269	eor	r0,r7,r7,ror#11
2270	vsli.32	d24,d5,#15
2271	eor	r2,r2,r5
2272	add	r6,r6,r3,ror#6
2273	vshr.u32	d25,d5,#10
2274	eor	r3,r7,r8
2275	eor	r0,r0,r7,ror#20
2276	vadd.i32	q3,q3,q9
2277	add	r6,r6,r2
2278	ldr	r2,[sp,#56]
2279	veor	d25,d25,d24
2280	and	r12,r12,r3
2281	add	r10,r10,r6
2282	vshr.u32	d24,d5,#19
2283	add	r6,r6,r0,ror#2
2284	eor	r12,r12,r8
2285	vsli.32	d24,d5,#13
2286	add	r5,r5,r2
2287	eor	r2,r11,r4
2288	veor	d25,d25,d24
2289	eor	r0,r10,r10,ror#5
2290	add	r6,r6,r12
2291	vadd.i32	d6,d6,d25
2292	and	r2,r2,r10
2293	eor	r12,r0,r10,ror#19
2294	vshr.u32	d24,d6,#17
2295	eor	r0,r6,r6,ror#11
2296	eor	r2,r2,r4
2297	vsli.32	d24,d6,#15
2298	add	r5,r5,r12,ror#6
2299	eor	r12,r6,r7
2300	vshr.u32	d25,d6,#10
2301	eor	r0,r0,r6,ror#20
2302	add	r5,r5,r2
2303	veor	d25,d25,d24
2304	ldr	r2,[sp,#60]
2305	and	r3,r3,r12
2306	vshr.u32	d24,d6,#19
2307	add	r9,r9,r5
2308	add	r5,r5,r0,ror#2
2309	eor	r3,r3,r7
2310	vld1.32	{q8},[r14,:128]!
2311	add	r4,r4,r2
2312	vsli.32	d24,d6,#13
2313	eor	r2,r10,r11
2314	eor	r0,r9,r9,ror#5
2315	veor	d25,d25,d24
2316	add	r5,r5,r3
2317	and	r2,r2,r9
2318	vadd.i32	d7,d7,d25
2319	eor	r3,r0,r9,ror#19
2320	eor	r0,r5,r5,ror#11
2321	vadd.i32	q8,q8,q3
2322	eor	r2,r2,r11
2323	add	r4,r4,r3,ror#6
2324	eor	r3,r5,r6
2325	eor	r0,r0,r5,ror#20
2326	add	r4,r4,r2
2327	ldr	r2,[r14]
2328	and	r12,r12,r3
2329	add	r8,r8,r4
2330	vst1.32	{q8},[r1,:128]!
2331	add	r4,r4,r0,ror#2
2332	eor	r12,r12,r6
2333	teq	r2,#0				@ check for K256 terminator
2334	ldr	r2,[sp,#0]
2335	sub	r1,r1,#64
2336	bne	.L_00_48
2337
2338	ldr	r1,[sp,#68]
2339	ldr	r0,[sp,#72]
2340	sub	r14,r14,#256	@ rewind r14
2341	teq	r1,r0
2342	it	eq
2343	subeq	r1,r1,#64		@ avoid SEGV
2344	vld1.8	{q0},[r1]!		@ load next input block
2345	vld1.8	{q1},[r1]!
2346	vld1.8	{q2},[r1]!
2347	vld1.8	{q3},[r1]!
2348	it	ne
2349	strne	r1,[sp,#68]
2350	mov	r1,sp
2351	add	r11,r11,r2
2352	eor	r2,r9,r10
2353	eor	r0,r8,r8,ror#5
2354	add	r4,r4,r12
2355	vld1.32	{q8},[r14,:128]!
2356	and	r2,r2,r8
2357	eor	r12,r0,r8,ror#19
2358	eor	r0,r4,r4,ror#11
2359	eor	r2,r2,r10
2360	vrev32.8	q0,q0
2361	add	r11,r11,r12,ror#6
2362	eor	r12,r4,r5
2363	eor	r0,r0,r4,ror#20
2364	add	r11,r11,r2
2365	vadd.i32	q8,q8,q0
2366	ldr	r2,[sp,#4]
2367	and	r3,r3,r12
2368	add	r7,r7,r11
2369	add	r11,r11,r0,ror#2
2370	eor	r3,r3,r5
2371	add	r10,r10,r2
2372	eor	r2,r8,r9
2373	eor	r0,r7,r7,ror#5
2374	add	r11,r11,r3
2375	and	r2,r2,r7
2376	eor	r3,r0,r7,ror#19
2377	eor	r0,r11,r11,ror#11
2378	eor	r2,r2,r9
2379	add	r10,r10,r3,ror#6
2380	eor	r3,r11,r4
2381	eor	r0,r0,r11,ror#20
2382	add	r10,r10,r2
2383	ldr	r2,[sp,#8]
2384	and	r12,r12,r3
2385	add	r6,r6,r10
2386	add	r10,r10,r0,ror#2
2387	eor	r12,r12,r4
2388	add	r9,r9,r2
2389	eor	r2,r7,r8
2390	eor	r0,r6,r6,ror#5
2391	add	r10,r10,r12
2392	and	r2,r2,r6
2393	eor	r12,r0,r6,ror#19
2394	eor	r0,r10,r10,ror#11
2395	eor	r2,r2,r8
2396	add	r9,r9,r12,ror#6
2397	eor	r12,r10,r11
2398	eor	r0,r0,r10,ror#20
2399	add	r9,r9,r2
2400	ldr	r2,[sp,#12]
2401	and	r3,r3,r12
2402	add	r5,r5,r9
2403	add	r9,r9,r0,ror#2
2404	eor	r3,r3,r11
2405	add	r8,r8,r2
2406	eor	r2,r6,r7
2407	eor	r0,r5,r5,ror#5
2408	add	r9,r9,r3
2409	and	r2,r2,r5
2410	eor	r3,r0,r5,ror#19
2411	eor	r0,r9,r9,ror#11
2412	eor	r2,r2,r7
2413	add	r8,r8,r3,ror#6
2414	eor	r3,r9,r10
2415	eor	r0,r0,r9,ror#20
2416	add	r8,r8,r2
2417	ldr	r2,[sp,#16]
2418	and	r12,r12,r3
2419	add	r4,r4,r8
2420	add	r8,r8,r0,ror#2
2421	eor	r12,r12,r10
2422	vst1.32	{q8},[r1,:128]!
2423	add	r7,r7,r2
2424	eor	r2,r5,r6
2425	eor	r0,r4,r4,ror#5
2426	add	r8,r8,r12
2427	vld1.32	{q8},[r14,:128]!
2428	and	r2,r2,r4
2429	eor	r12,r0,r4,ror#19
2430	eor	r0,r8,r8,ror#11
2431	eor	r2,r2,r6
2432	vrev32.8	q1,q1
2433	add	r7,r7,r12,ror#6
2434	eor	r12,r8,r9
2435	eor	r0,r0,r8,ror#20
2436	add	r7,r7,r2
2437	vadd.i32	q8,q8,q1
2438	ldr	r2,[sp,#20]
2439	and	r3,r3,r12
2440	add	r11,r11,r7
2441	add	r7,r7,r0,ror#2
2442	eor	r3,r3,r9
2443	add	r6,r6,r2
2444	eor	r2,r4,r5
2445	eor	r0,r11,r11,ror#5
2446	add	r7,r7,r3
2447	and	r2,r2,r11
2448	eor	r3,r0,r11,ror#19
2449	eor	r0,r7,r7,ror#11
2450	eor	r2,r2,r5
2451	add	r6,r6,r3,ror#6
2452	eor	r3,r7,r8
2453	eor	r0,r0,r7,ror#20
2454	add	r6,r6,r2
2455	ldr	r2,[sp,#24]
2456	and	r12,r12,r3
2457	add	r10,r10,r6
2458	add	r6,r6,r0,ror#2
2459	eor	r12,r12,r8
2460	add	r5,r5,r2
2461	eor	r2,r11,r4
2462	eor	r0,r10,r10,ror#5
2463	add	r6,r6,r12
2464	and	r2,r2,r10
2465	eor	r12,r0,r10,ror#19
2466	eor	r0,r6,r6,ror#11
2467	eor	r2,r2,r4
2468	add	r5,r5,r12,ror#6
2469	eor	r12,r6,r7
2470	eor	r0,r0,r6,ror#20
2471	add	r5,r5,r2
2472	ldr	r2,[sp,#28]
2473	and	r3,r3,r12
2474	add	r9,r9,r5
2475	add	r5,r5,r0,ror#2
2476	eor	r3,r3,r7
2477	add	r4,r4,r2
2478	eor	r2,r10,r11
2479	eor	r0,r9,r9,ror#5
2480	add	r5,r5,r3
2481	and	r2,r2,r9
2482	eor	r3,r0,r9,ror#19
2483	eor	r0,r5,r5,ror#11
2484	eor	r2,r2,r11
2485	add	r4,r4,r3,ror#6
2486	eor	r3,r5,r6
2487	eor	r0,r0,r5,ror#20
2488	add	r4,r4,r2
2489	ldr	r2,[sp,#32]
2490	and	r12,r12,r3
2491	add	r8,r8,r4
2492	add	r4,r4,r0,ror#2
2493	eor	r12,r12,r6
2494	vst1.32	{q8},[r1,:128]!
2495	add	r11,r11,r2
2496	eor	r2,r9,r10
2497	eor	r0,r8,r8,ror#5
2498	add	r4,r4,r12
2499	vld1.32	{q8},[r14,:128]!
2500	and	r2,r2,r8
2501	eor	r12,r0,r8,ror#19
2502	eor	r0,r4,r4,ror#11
2503	eor	r2,r2,r10
2504	vrev32.8	q2,q2
2505	add	r11,r11,r12,ror#6
2506	eor	r12,r4,r5
2507	eor	r0,r0,r4,ror#20
2508	add	r11,r11,r2
2509	vadd.i32	q8,q8,q2
2510	ldr	r2,[sp,#36]
2511	and	r3,r3,r12
2512	add	r7,r7,r11
2513	add	r11,r11,r0,ror#2
2514	eor	r3,r3,r5
2515	add	r10,r10,r2
2516	eor	r2,r8,r9
2517	eor	r0,r7,r7,ror#5
2518	add	r11,r11,r3
2519	and	r2,r2,r7
2520	eor	r3,r0,r7,ror#19
2521	eor	r0,r11,r11,ror#11
2522	eor	r2,r2,r9
2523	add	r10,r10,r3,ror#6
2524	eor	r3,r11,r4
2525	eor	r0,r0,r11,ror#20
2526	add	r10,r10,r2
2527	ldr	r2,[sp,#40]
2528	and	r12,r12,r3
2529	add	r6,r6,r10
2530	add	r10,r10,r0,ror#2
2531	eor	r12,r12,r4
2532	add	r9,r9,r2
2533	eor	r2,r7,r8
2534	eor	r0,r6,r6,ror#5
2535	add	r10,r10,r12
2536	and	r2,r2,r6
2537	eor	r12,r0,r6,ror#19
2538	eor	r0,r10,r10,ror#11
2539	eor	r2,r2,r8
2540	add	r9,r9,r12,ror#6
2541	eor	r12,r10,r11
2542	eor	r0,r0,r10,ror#20
2543	add	r9,r9,r2
2544	ldr	r2,[sp,#44]
2545	and	r3,r3,r12
2546	add	r5,r5,r9
2547	add	r9,r9,r0,ror#2
2548	eor	r3,r3,r11
2549	add	r8,r8,r2
2550	eor	r2,r6,r7
2551	eor	r0,r5,r5,ror#5
2552	add	r9,r9,r3
2553	and	r2,r2,r5
2554	eor	r3,r0,r5,ror#19
2555	eor	r0,r9,r9,ror#11
2556	eor	r2,r2,r7
2557	add	r8,r8,r3,ror#6
2558	eor	r3,r9,r10
2559	eor	r0,r0,r9,ror#20
2560	add	r8,r8,r2
2561	ldr	r2,[sp,#48]
2562	and	r12,r12,r3
2563	add	r4,r4,r8
2564	add	r8,r8,r0,ror#2
2565	eor	r12,r12,r10
2566	vst1.32	{q8},[r1,:128]!
2567	add	r7,r7,r2
2568	eor	r2,r5,r6
2569	eor	r0,r4,r4,ror#5
2570	add	r8,r8,r12
2571	vld1.32	{q8},[r14,:128]!
2572	and	r2,r2,r4
2573	eor	r12,r0,r4,ror#19
2574	eor	r0,r8,r8,ror#11
2575	eor	r2,r2,r6
2576	vrev32.8	q3,q3
2577	add	r7,r7,r12,ror#6
2578	eor	r12,r8,r9
2579	eor	r0,r0,r8,ror#20
2580	add	r7,r7,r2
2581	vadd.i32	q8,q8,q3
2582	ldr	r2,[sp,#52]
2583	and	r3,r3,r12
2584	add	r11,r11,r7
2585	add	r7,r7,r0,ror#2
2586	eor	r3,r3,r9
2587	add	r6,r6,r2
2588	eor	r2,r4,r5
2589	eor	r0,r11,r11,ror#5
2590	add	r7,r7,r3
2591	and	r2,r2,r11
2592	eor	r3,r0,r11,ror#19
2593	eor	r0,r7,r7,ror#11
2594	eor	r2,r2,r5
2595	add	r6,r6,r3,ror#6
2596	eor	r3,r7,r8
2597	eor	r0,r0,r7,ror#20
2598	add	r6,r6,r2
2599	ldr	r2,[sp,#56]
2600	and	r12,r12,r3
2601	add	r10,r10,r6
2602	add	r6,r6,r0,ror#2
2603	eor	r12,r12,r8
2604	add	r5,r5,r2
2605	eor	r2,r11,r4
2606	eor	r0,r10,r10,ror#5
2607	add	r6,r6,r12
2608	and	r2,r2,r10
2609	eor	r12,r0,r10,ror#19
2610	eor	r0,r6,r6,ror#11
2611	eor	r2,r2,r4
2612	add	r5,r5,r12,ror#6
2613	eor	r12,r6,r7
2614	eor	r0,r0,r6,ror#20
2615	add	r5,r5,r2
2616	ldr	r2,[sp,#60]
2617	and	r3,r3,r12
2618	add	r9,r9,r5
2619	add	r5,r5,r0,ror#2
2620	eor	r3,r3,r7
2621	add	r4,r4,r2
2622	eor	r2,r10,r11
2623	eor	r0,r9,r9,ror#5
2624	add	r5,r5,r3
2625	and	r2,r2,r9
2626	eor	r3,r0,r9,ror#19
2627	eor	r0,r5,r5,ror#11
2628	eor	r2,r2,r11
2629	add	r4,r4,r3,ror#6
2630	eor	r3,r5,r6
2631	eor	r0,r0,r5,ror#20
2632	add	r4,r4,r2
2633	ldr	r2,[sp,#64]
2634	and	r12,r12,r3
2635	add	r8,r8,r4
2636	add	r4,r4,r0,ror#2
2637	eor	r12,r12,r6
2638	vst1.32	{q8},[r1,:128]!
2639	ldr	r0,[r2,#0]
2640	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2641	ldr	r12,[r2,#4]
2642	ldr	r3,[r2,#8]
2643	ldr	r1,[r2,#12]
2644	add	r4,r4,r0			@ accumulate
2645	ldr	r0,[r2,#16]
2646	add	r5,r5,r12
2647	ldr	r12,[r2,#20]
2648	add	r6,r6,r3
2649	ldr	r3,[r2,#24]
2650	add	r7,r7,r1
2651	ldr	r1,[r2,#28]
2652	add	r8,r8,r0
2653	str	r4,[r2],#4
2654	add	r9,r9,r12
2655	str	r5,[r2],#4
2656	add	r10,r10,r3
2657	str	r6,[r2],#4
2658	add	r11,r11,r1
2659	str	r7,[r2],#4
2660	stmia	r2,{r8,r9,r10,r11}
2661
2662	ittte	ne
2663	movne	r1,sp
2664	ldrne	r2,[sp,#0]
2665	eorne	r12,r12,r12
2666	ldreq	sp,[sp,#76]			@ restore original sp
2667	itt	ne
2668	eorne	r3,r5,r6
2669	bne	.L_00_48
2670
2671	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2672.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2673#endif
2674#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2675
2676# if defined(__thumb2__)
2677#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2678# else
2679#  define INST(a,b,c,d)	.byte	a,b,c,d
2680# endif
2681
2682.LK256_shortcut_hw:
2683@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
2684#if defined(__thumb2__)
2685.word	K256-(.LK256_add_hw+4)
2686#else
2687.word	K256-(.LK256_add_hw+8)
2688#endif
2689
2690.globl	sha256_block_data_order_hw
2691.hidden	sha256_block_data_order_hw
2692.type	sha256_block_data_order_hw,%function
2693.align	5
2694sha256_block_data_order_hw:
2695	@ K256 is too far to reference from one ADR command in Thumb mode. In
2696	@ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
2697	@ boundary. For simplicity, just load the offset from .LK256_shortcut_hw.
2698	ldr	r3,.LK256_shortcut_hw
2699.LK256_add_hw:
2700	add	r3,pc,r3
2701
2702	vld1.32	{q0,q1},[r0]
2703	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2704	b	.Loop_v8
2705
2706.align	4
2707.Loop_v8:
2708	vld1.8	{q8,q9},[r1]!
2709	vld1.8	{q10,q11},[r1]!
2710	vld1.32	{q12},[r3]!
2711	vrev32.8	q8,q8
2712	vrev32.8	q9,q9
2713	vrev32.8	q10,q10
2714	vrev32.8	q11,q11
2715	vmov	q14,q0	@ offload
2716	vmov	q15,q1
2717	teq	r1,r2
2718	vld1.32	{q13},[r3]!
2719	vadd.i32	q12,q12,q8
2720	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2721	vmov	q2,q0
2722	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2723	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2724	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2725	vld1.32	{q12},[r3]!
2726	vadd.i32	q13,q13,q9
2727	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2728	vmov	q2,q0
2729	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2730	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2731	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2732	vld1.32	{q13},[r3]!
2733	vadd.i32	q12,q12,q10
2734	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2735	vmov	q2,q0
2736	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2737	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2738	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2739	vld1.32	{q12},[r3]!
2740	vadd.i32	q13,q13,q11
2741	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2742	vmov	q2,q0
2743	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2744	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2745	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2746	vld1.32	{q13},[r3]!
2747	vadd.i32	q12,q12,q8
2748	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2749	vmov	q2,q0
2750	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2751	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2752	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2753	vld1.32	{q12},[r3]!
2754	vadd.i32	q13,q13,q9
2755	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2756	vmov	q2,q0
2757	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2758	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2759	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2760	vld1.32	{q13},[r3]!
2761	vadd.i32	q12,q12,q10
2762	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2763	vmov	q2,q0
2764	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2765	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2766	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2767	vld1.32	{q12},[r3]!
2768	vadd.i32	q13,q13,q11
2769	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2770	vmov	q2,q0
2771	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2772	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2773	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2774	vld1.32	{q13},[r3]!
2775	vadd.i32	q12,q12,q8
2776	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2777	vmov	q2,q0
2778	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2779	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2780	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2781	vld1.32	{q12},[r3]!
2782	vadd.i32	q13,q13,q9
2783	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2784	vmov	q2,q0
2785	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2786	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2787	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2788	vld1.32	{q13},[r3]!
2789	vadd.i32	q12,q12,q10
2790	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2791	vmov	q2,q0
2792	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2793	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2794	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2795	vld1.32	{q12},[r3]!
2796	vadd.i32	q13,q13,q11
2797	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2798	vmov	q2,q0
2799	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2800	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2801	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2802	vld1.32	{q13},[r3]!
2803	vadd.i32	q12,q12,q8
2804	vmov	q2,q0
2805	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2806	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2807
2808	vld1.32	{q12},[r3]!
2809	vadd.i32	q13,q13,q9
2810	vmov	q2,q0
2811	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2812	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2813
2814	vld1.32	{q13},[r3]
2815	vadd.i32	q12,q12,q10
2816	sub	r3,r3,#256-16	@ rewind
2817	vmov	q2,q0
2818	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2819	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2820
2821	vadd.i32	q13,q13,q11
2822	vmov	q2,q0
2823	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2824	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2825
2826	vadd.i32	q0,q0,q14
2827	vadd.i32	q1,q1,q15
2828	it	ne
2829	bne	.Loop_v8
2830
2831	vst1.32	{q0,q1},[r0]
2832
2833	bx	lr		@ bx lr
2834.size	sha256_block_data_order_hw,.-sha256_block_data_order_hw
2835#endif
2836.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2837.align	2
2838.align	2
2839#endif  // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)
2840