xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/bcm/aesni-x86_64-linux.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
7.text
8.extern	OPENSSL_ia32cap_P
9.hidden OPENSSL_ia32cap_P
10.globl	aes_hw_encrypt
11.hidden aes_hw_encrypt
12.type	aes_hw_encrypt,@function
13.align	16
14aes_hw_encrypt:
15.cfi_startproc
16_CET_ENDBR
17#ifdef BORINGSSL_DISPATCH_TEST
18.extern	BORINGSSL_function_hit
19.hidden BORINGSSL_function_hit
20	movb	$1,BORINGSSL_function_hit+1(%rip)
21#endif
22	movups	(%rdi),%xmm2
23	movl	240(%rdx),%eax
24	movups	(%rdx),%xmm0
25	movups	16(%rdx),%xmm1
26	leaq	32(%rdx),%rdx
27	xorps	%xmm0,%xmm2
28.Loop_enc1_1:
29.byte	102,15,56,220,209
30	decl	%eax
31	movups	(%rdx),%xmm1
32	leaq	16(%rdx),%rdx
33	jnz	.Loop_enc1_1
34.byte	102,15,56,221,209
35	pxor	%xmm0,%xmm0
36	pxor	%xmm1,%xmm1
37	movups	%xmm2,(%rsi)
38	pxor	%xmm2,%xmm2
39	ret
40.cfi_endproc
41.size	aes_hw_encrypt,.-aes_hw_encrypt
42
43.globl	aes_hw_decrypt
44.hidden aes_hw_decrypt
45.type	aes_hw_decrypt,@function
46.align	16
47aes_hw_decrypt:
48.cfi_startproc
49_CET_ENDBR
50	movups	(%rdi),%xmm2
51	movl	240(%rdx),%eax
52	movups	(%rdx),%xmm0
53	movups	16(%rdx),%xmm1
54	leaq	32(%rdx),%rdx
55	xorps	%xmm0,%xmm2
56.Loop_dec1_2:
57.byte	102,15,56,222,209
58	decl	%eax
59	movups	(%rdx),%xmm1
60	leaq	16(%rdx),%rdx
61	jnz	.Loop_dec1_2
62.byte	102,15,56,223,209
63	pxor	%xmm0,%xmm0
64	pxor	%xmm1,%xmm1
65	movups	%xmm2,(%rsi)
66	pxor	%xmm2,%xmm2
67	ret
68.cfi_endproc
69.size	aes_hw_decrypt, .-aes_hw_decrypt
70.type	_aesni_encrypt2,@function
71.align	16
72_aesni_encrypt2:
73.cfi_startproc
74	movups	(%rcx),%xmm0
75	shll	$4,%eax
76	movups	16(%rcx),%xmm1
77	xorps	%xmm0,%xmm2
78	xorps	%xmm0,%xmm3
79	movups	32(%rcx),%xmm0
80	leaq	32(%rcx,%rax,1),%rcx
81	negq	%rax
82	addq	$16,%rax
83
84.Lenc_loop2:
85.byte	102,15,56,220,209
86.byte	102,15,56,220,217
87	movups	(%rcx,%rax,1),%xmm1
88	addq	$32,%rax
89.byte	102,15,56,220,208
90.byte	102,15,56,220,216
91	movups	-16(%rcx,%rax,1),%xmm0
92	jnz	.Lenc_loop2
93
94.byte	102,15,56,220,209
95.byte	102,15,56,220,217
96.byte	102,15,56,221,208
97.byte	102,15,56,221,216
98	ret
99.cfi_endproc
100.size	_aesni_encrypt2,.-_aesni_encrypt2
101.type	_aesni_decrypt2,@function
102.align	16
103_aesni_decrypt2:
104.cfi_startproc
105	movups	(%rcx),%xmm0
106	shll	$4,%eax
107	movups	16(%rcx),%xmm1
108	xorps	%xmm0,%xmm2
109	xorps	%xmm0,%xmm3
110	movups	32(%rcx),%xmm0
111	leaq	32(%rcx,%rax,1),%rcx
112	negq	%rax
113	addq	$16,%rax
114
115.Ldec_loop2:
116.byte	102,15,56,222,209
117.byte	102,15,56,222,217
118	movups	(%rcx,%rax,1),%xmm1
119	addq	$32,%rax
120.byte	102,15,56,222,208
121.byte	102,15,56,222,216
122	movups	-16(%rcx,%rax,1),%xmm0
123	jnz	.Ldec_loop2
124
125.byte	102,15,56,222,209
126.byte	102,15,56,222,217
127.byte	102,15,56,223,208
128.byte	102,15,56,223,216
129	ret
130.cfi_endproc
131.size	_aesni_decrypt2,.-_aesni_decrypt2
132.type	_aesni_encrypt3,@function
133.align	16
134_aesni_encrypt3:
135.cfi_startproc
136	movups	(%rcx),%xmm0
137	shll	$4,%eax
138	movups	16(%rcx),%xmm1
139	xorps	%xmm0,%xmm2
140	xorps	%xmm0,%xmm3
141	xorps	%xmm0,%xmm4
142	movups	32(%rcx),%xmm0
143	leaq	32(%rcx,%rax,1),%rcx
144	negq	%rax
145	addq	$16,%rax
146
147.Lenc_loop3:
148.byte	102,15,56,220,209
149.byte	102,15,56,220,217
150.byte	102,15,56,220,225
151	movups	(%rcx,%rax,1),%xmm1
152	addq	$32,%rax
153.byte	102,15,56,220,208
154.byte	102,15,56,220,216
155.byte	102,15,56,220,224
156	movups	-16(%rcx,%rax,1),%xmm0
157	jnz	.Lenc_loop3
158
159.byte	102,15,56,220,209
160.byte	102,15,56,220,217
161.byte	102,15,56,220,225
162.byte	102,15,56,221,208
163.byte	102,15,56,221,216
164.byte	102,15,56,221,224
165	ret
166.cfi_endproc
167.size	_aesni_encrypt3,.-_aesni_encrypt3
168.type	_aesni_decrypt3,@function
169.align	16
170_aesni_decrypt3:
171.cfi_startproc
172	movups	(%rcx),%xmm0
173	shll	$4,%eax
174	movups	16(%rcx),%xmm1
175	xorps	%xmm0,%xmm2
176	xorps	%xmm0,%xmm3
177	xorps	%xmm0,%xmm4
178	movups	32(%rcx),%xmm0
179	leaq	32(%rcx,%rax,1),%rcx
180	negq	%rax
181	addq	$16,%rax
182
183.Ldec_loop3:
184.byte	102,15,56,222,209
185.byte	102,15,56,222,217
186.byte	102,15,56,222,225
187	movups	(%rcx,%rax,1),%xmm1
188	addq	$32,%rax
189.byte	102,15,56,222,208
190.byte	102,15,56,222,216
191.byte	102,15,56,222,224
192	movups	-16(%rcx,%rax,1),%xmm0
193	jnz	.Ldec_loop3
194
195.byte	102,15,56,222,209
196.byte	102,15,56,222,217
197.byte	102,15,56,222,225
198.byte	102,15,56,223,208
199.byte	102,15,56,223,216
200.byte	102,15,56,223,224
201	ret
202.cfi_endproc
203.size	_aesni_decrypt3,.-_aesni_decrypt3
204.type	_aesni_encrypt4,@function
205.align	16
206_aesni_encrypt4:
207.cfi_startproc
208	movups	(%rcx),%xmm0
209	shll	$4,%eax
210	movups	16(%rcx),%xmm1
211	xorps	%xmm0,%xmm2
212	xorps	%xmm0,%xmm3
213	xorps	%xmm0,%xmm4
214	xorps	%xmm0,%xmm5
215	movups	32(%rcx),%xmm0
216	leaq	32(%rcx,%rax,1),%rcx
217	negq	%rax
218.byte	0x0f,0x1f,0x00
219	addq	$16,%rax
220
221.Lenc_loop4:
222.byte	102,15,56,220,209
223.byte	102,15,56,220,217
224.byte	102,15,56,220,225
225.byte	102,15,56,220,233
226	movups	(%rcx,%rax,1),%xmm1
227	addq	$32,%rax
228.byte	102,15,56,220,208
229.byte	102,15,56,220,216
230.byte	102,15,56,220,224
231.byte	102,15,56,220,232
232	movups	-16(%rcx,%rax,1),%xmm0
233	jnz	.Lenc_loop4
234
235.byte	102,15,56,220,209
236.byte	102,15,56,220,217
237.byte	102,15,56,220,225
238.byte	102,15,56,220,233
239.byte	102,15,56,221,208
240.byte	102,15,56,221,216
241.byte	102,15,56,221,224
242.byte	102,15,56,221,232
243	ret
244.cfi_endproc
245.size	_aesni_encrypt4,.-_aesni_encrypt4
246.type	_aesni_decrypt4,@function
247.align	16
248_aesni_decrypt4:
249.cfi_startproc
250	movups	(%rcx),%xmm0
251	shll	$4,%eax
252	movups	16(%rcx),%xmm1
253	xorps	%xmm0,%xmm2
254	xorps	%xmm0,%xmm3
255	xorps	%xmm0,%xmm4
256	xorps	%xmm0,%xmm5
257	movups	32(%rcx),%xmm0
258	leaq	32(%rcx,%rax,1),%rcx
259	negq	%rax
260.byte	0x0f,0x1f,0x00
261	addq	$16,%rax
262
263.Ldec_loop4:
264.byte	102,15,56,222,209
265.byte	102,15,56,222,217
266.byte	102,15,56,222,225
267.byte	102,15,56,222,233
268	movups	(%rcx,%rax,1),%xmm1
269	addq	$32,%rax
270.byte	102,15,56,222,208
271.byte	102,15,56,222,216
272.byte	102,15,56,222,224
273.byte	102,15,56,222,232
274	movups	-16(%rcx,%rax,1),%xmm0
275	jnz	.Ldec_loop4
276
277.byte	102,15,56,222,209
278.byte	102,15,56,222,217
279.byte	102,15,56,222,225
280.byte	102,15,56,222,233
281.byte	102,15,56,223,208
282.byte	102,15,56,223,216
283.byte	102,15,56,223,224
284.byte	102,15,56,223,232
285	ret
286.cfi_endproc
287.size	_aesni_decrypt4,.-_aesni_decrypt4
288.type	_aesni_encrypt6,@function
289.align	16
290_aesni_encrypt6:
291.cfi_startproc
292	movups	(%rcx),%xmm0
293	shll	$4,%eax
294	movups	16(%rcx),%xmm1
295	xorps	%xmm0,%xmm2
296	pxor	%xmm0,%xmm3
297	pxor	%xmm0,%xmm4
298.byte	102,15,56,220,209
299	leaq	32(%rcx,%rax,1),%rcx
300	negq	%rax
301.byte	102,15,56,220,217
302	pxor	%xmm0,%xmm5
303	pxor	%xmm0,%xmm6
304.byte	102,15,56,220,225
305	pxor	%xmm0,%xmm7
306	movups	(%rcx,%rax,1),%xmm0
307	addq	$16,%rax
308	jmp	.Lenc_loop6_enter
309.align	16
310.Lenc_loop6:
311.byte	102,15,56,220,209
312.byte	102,15,56,220,217
313.byte	102,15,56,220,225
314.Lenc_loop6_enter:
315.byte	102,15,56,220,233
316.byte	102,15,56,220,241
317.byte	102,15,56,220,249
318	movups	(%rcx,%rax,1),%xmm1
319	addq	$32,%rax
320.byte	102,15,56,220,208
321.byte	102,15,56,220,216
322.byte	102,15,56,220,224
323.byte	102,15,56,220,232
324.byte	102,15,56,220,240
325.byte	102,15,56,220,248
326	movups	-16(%rcx,%rax,1),%xmm0
327	jnz	.Lenc_loop6
328
329.byte	102,15,56,220,209
330.byte	102,15,56,220,217
331.byte	102,15,56,220,225
332.byte	102,15,56,220,233
333.byte	102,15,56,220,241
334.byte	102,15,56,220,249
335.byte	102,15,56,221,208
336.byte	102,15,56,221,216
337.byte	102,15,56,221,224
338.byte	102,15,56,221,232
339.byte	102,15,56,221,240
340.byte	102,15,56,221,248
341	ret
342.cfi_endproc
343.size	_aesni_encrypt6,.-_aesni_encrypt6
344.type	_aesni_decrypt6,@function
345.align	16
346_aesni_decrypt6:
347.cfi_startproc
348	movups	(%rcx),%xmm0
349	shll	$4,%eax
350	movups	16(%rcx),%xmm1
351	xorps	%xmm0,%xmm2
352	pxor	%xmm0,%xmm3
353	pxor	%xmm0,%xmm4
354.byte	102,15,56,222,209
355	leaq	32(%rcx,%rax,1),%rcx
356	negq	%rax
357.byte	102,15,56,222,217
358	pxor	%xmm0,%xmm5
359	pxor	%xmm0,%xmm6
360.byte	102,15,56,222,225
361	pxor	%xmm0,%xmm7
362	movups	(%rcx,%rax,1),%xmm0
363	addq	$16,%rax
364	jmp	.Ldec_loop6_enter
365.align	16
366.Ldec_loop6:
367.byte	102,15,56,222,209
368.byte	102,15,56,222,217
369.byte	102,15,56,222,225
370.Ldec_loop6_enter:
371.byte	102,15,56,222,233
372.byte	102,15,56,222,241
373.byte	102,15,56,222,249
374	movups	(%rcx,%rax,1),%xmm1
375	addq	$32,%rax
376.byte	102,15,56,222,208
377.byte	102,15,56,222,216
378.byte	102,15,56,222,224
379.byte	102,15,56,222,232
380.byte	102,15,56,222,240
381.byte	102,15,56,222,248
382	movups	-16(%rcx,%rax,1),%xmm0
383	jnz	.Ldec_loop6
384
385.byte	102,15,56,222,209
386.byte	102,15,56,222,217
387.byte	102,15,56,222,225
388.byte	102,15,56,222,233
389.byte	102,15,56,222,241
390.byte	102,15,56,222,249
391.byte	102,15,56,223,208
392.byte	102,15,56,223,216
393.byte	102,15,56,223,224
394.byte	102,15,56,223,232
395.byte	102,15,56,223,240
396.byte	102,15,56,223,248
397	ret
398.cfi_endproc
399.size	_aesni_decrypt6,.-_aesni_decrypt6
400.type	_aesni_encrypt8,@function
401.align	16
402_aesni_encrypt8:
403.cfi_startproc
404	movups	(%rcx),%xmm0
405	shll	$4,%eax
406	movups	16(%rcx),%xmm1
407	xorps	%xmm0,%xmm2
408	xorps	%xmm0,%xmm3
409	pxor	%xmm0,%xmm4
410	pxor	%xmm0,%xmm5
411	pxor	%xmm0,%xmm6
412	leaq	32(%rcx,%rax,1),%rcx
413	negq	%rax
414.byte	102,15,56,220,209
415	pxor	%xmm0,%xmm7
416	pxor	%xmm0,%xmm8
417.byte	102,15,56,220,217
418	pxor	%xmm0,%xmm9
419	movups	(%rcx,%rax,1),%xmm0
420	addq	$16,%rax
421	jmp	.Lenc_loop8_inner
422.align	16
423.Lenc_loop8:
424.byte	102,15,56,220,209
425.byte	102,15,56,220,217
426.Lenc_loop8_inner:
427.byte	102,15,56,220,225
428.byte	102,15,56,220,233
429.byte	102,15,56,220,241
430.byte	102,15,56,220,249
431.byte	102,68,15,56,220,193
432.byte	102,68,15,56,220,201
433.Lenc_loop8_enter:
434	movups	(%rcx,%rax,1),%xmm1
435	addq	$32,%rax
436.byte	102,15,56,220,208
437.byte	102,15,56,220,216
438.byte	102,15,56,220,224
439.byte	102,15,56,220,232
440.byte	102,15,56,220,240
441.byte	102,15,56,220,248
442.byte	102,68,15,56,220,192
443.byte	102,68,15,56,220,200
444	movups	-16(%rcx,%rax,1),%xmm0
445	jnz	.Lenc_loop8
446
447.byte	102,15,56,220,209
448.byte	102,15,56,220,217
449.byte	102,15,56,220,225
450.byte	102,15,56,220,233
451.byte	102,15,56,220,241
452.byte	102,15,56,220,249
453.byte	102,68,15,56,220,193
454.byte	102,68,15,56,220,201
455.byte	102,15,56,221,208
456.byte	102,15,56,221,216
457.byte	102,15,56,221,224
458.byte	102,15,56,221,232
459.byte	102,15,56,221,240
460.byte	102,15,56,221,248
461.byte	102,68,15,56,221,192
462.byte	102,68,15,56,221,200
463	ret
464.cfi_endproc
465.size	_aesni_encrypt8,.-_aesni_encrypt8
466.type	_aesni_decrypt8,@function
467.align	16
468_aesni_decrypt8:
469.cfi_startproc
470	movups	(%rcx),%xmm0
471	shll	$4,%eax
472	movups	16(%rcx),%xmm1
473	xorps	%xmm0,%xmm2
474	xorps	%xmm0,%xmm3
475	pxor	%xmm0,%xmm4
476	pxor	%xmm0,%xmm5
477	pxor	%xmm0,%xmm6
478	leaq	32(%rcx,%rax,1),%rcx
479	negq	%rax
480.byte	102,15,56,222,209
481	pxor	%xmm0,%xmm7
482	pxor	%xmm0,%xmm8
483.byte	102,15,56,222,217
484	pxor	%xmm0,%xmm9
485	movups	(%rcx,%rax,1),%xmm0
486	addq	$16,%rax
487	jmp	.Ldec_loop8_inner
488.align	16
489.Ldec_loop8:
490.byte	102,15,56,222,209
491.byte	102,15,56,222,217
492.Ldec_loop8_inner:
493.byte	102,15,56,222,225
494.byte	102,15,56,222,233
495.byte	102,15,56,222,241
496.byte	102,15,56,222,249
497.byte	102,68,15,56,222,193
498.byte	102,68,15,56,222,201
499.Ldec_loop8_enter:
500	movups	(%rcx,%rax,1),%xmm1
501	addq	$32,%rax
502.byte	102,15,56,222,208
503.byte	102,15,56,222,216
504.byte	102,15,56,222,224
505.byte	102,15,56,222,232
506.byte	102,15,56,222,240
507.byte	102,15,56,222,248
508.byte	102,68,15,56,222,192
509.byte	102,68,15,56,222,200
510	movups	-16(%rcx,%rax,1),%xmm0
511	jnz	.Ldec_loop8
512
513.byte	102,15,56,222,209
514.byte	102,15,56,222,217
515.byte	102,15,56,222,225
516.byte	102,15,56,222,233
517.byte	102,15,56,222,241
518.byte	102,15,56,222,249
519.byte	102,68,15,56,222,193
520.byte	102,68,15,56,222,201
521.byte	102,15,56,223,208
522.byte	102,15,56,223,216
523.byte	102,15,56,223,224
524.byte	102,15,56,223,232
525.byte	102,15,56,223,240
526.byte	102,15,56,223,248
527.byte	102,68,15,56,223,192
528.byte	102,68,15,56,223,200
529	ret
530.cfi_endproc
531.size	_aesni_decrypt8,.-_aesni_decrypt8
532.globl	aes_hw_ecb_encrypt
533.hidden aes_hw_ecb_encrypt
534.type	aes_hw_ecb_encrypt,@function
535.align	16
536aes_hw_ecb_encrypt:
537.cfi_startproc
538_CET_ENDBR
539	andq	$-16,%rdx
540	jz	.Lecb_ret
541
542	movl	240(%rcx),%eax
543	movups	(%rcx),%xmm0
544	movq	%rcx,%r11
545	movl	%eax,%r10d
546	testl	%r8d,%r8d
547	jz	.Lecb_decrypt
548
549	cmpq	$0x80,%rdx
550	jb	.Lecb_enc_tail
551
552	movdqu	(%rdi),%xmm2
553	movdqu	16(%rdi),%xmm3
554	movdqu	32(%rdi),%xmm4
555	movdqu	48(%rdi),%xmm5
556	movdqu	64(%rdi),%xmm6
557	movdqu	80(%rdi),%xmm7
558	movdqu	96(%rdi),%xmm8
559	movdqu	112(%rdi),%xmm9
560	leaq	128(%rdi),%rdi
561	subq	$0x80,%rdx
562	jmp	.Lecb_enc_loop8_enter
563.align	16
564.Lecb_enc_loop8:
565	movups	%xmm2,(%rsi)
566	movq	%r11,%rcx
567	movdqu	(%rdi),%xmm2
568	movl	%r10d,%eax
569	movups	%xmm3,16(%rsi)
570	movdqu	16(%rdi),%xmm3
571	movups	%xmm4,32(%rsi)
572	movdqu	32(%rdi),%xmm4
573	movups	%xmm5,48(%rsi)
574	movdqu	48(%rdi),%xmm5
575	movups	%xmm6,64(%rsi)
576	movdqu	64(%rdi),%xmm6
577	movups	%xmm7,80(%rsi)
578	movdqu	80(%rdi),%xmm7
579	movups	%xmm8,96(%rsi)
580	movdqu	96(%rdi),%xmm8
581	movups	%xmm9,112(%rsi)
582	leaq	128(%rsi),%rsi
583	movdqu	112(%rdi),%xmm9
584	leaq	128(%rdi),%rdi
585.Lecb_enc_loop8_enter:
586
587	call	_aesni_encrypt8
588
589	subq	$0x80,%rdx
590	jnc	.Lecb_enc_loop8
591
592	movups	%xmm2,(%rsi)
593	movq	%r11,%rcx
594	movups	%xmm3,16(%rsi)
595	movl	%r10d,%eax
596	movups	%xmm4,32(%rsi)
597	movups	%xmm5,48(%rsi)
598	movups	%xmm6,64(%rsi)
599	movups	%xmm7,80(%rsi)
600	movups	%xmm8,96(%rsi)
601	movups	%xmm9,112(%rsi)
602	leaq	128(%rsi),%rsi
603	addq	$0x80,%rdx
604	jz	.Lecb_ret
605
606.Lecb_enc_tail:
607	movups	(%rdi),%xmm2
608	cmpq	$0x20,%rdx
609	jb	.Lecb_enc_one
610	movups	16(%rdi),%xmm3
611	je	.Lecb_enc_two
612	movups	32(%rdi),%xmm4
613	cmpq	$0x40,%rdx
614	jb	.Lecb_enc_three
615	movups	48(%rdi),%xmm5
616	je	.Lecb_enc_four
617	movups	64(%rdi),%xmm6
618	cmpq	$0x60,%rdx
619	jb	.Lecb_enc_five
620	movups	80(%rdi),%xmm7
621	je	.Lecb_enc_six
622	movdqu	96(%rdi),%xmm8
623	xorps	%xmm9,%xmm9
624	call	_aesni_encrypt8
625	movups	%xmm2,(%rsi)
626	movups	%xmm3,16(%rsi)
627	movups	%xmm4,32(%rsi)
628	movups	%xmm5,48(%rsi)
629	movups	%xmm6,64(%rsi)
630	movups	%xmm7,80(%rsi)
631	movups	%xmm8,96(%rsi)
632	jmp	.Lecb_ret
633.align	16
634.Lecb_enc_one:
635	movups	(%rcx),%xmm0
636	movups	16(%rcx),%xmm1
637	leaq	32(%rcx),%rcx
638	xorps	%xmm0,%xmm2
639.Loop_enc1_3:
640.byte	102,15,56,220,209
641	decl	%eax
642	movups	(%rcx),%xmm1
643	leaq	16(%rcx),%rcx
644	jnz	.Loop_enc1_3
645.byte	102,15,56,221,209
646	movups	%xmm2,(%rsi)
647	jmp	.Lecb_ret
648.align	16
649.Lecb_enc_two:
650	call	_aesni_encrypt2
651	movups	%xmm2,(%rsi)
652	movups	%xmm3,16(%rsi)
653	jmp	.Lecb_ret
654.align	16
655.Lecb_enc_three:
656	call	_aesni_encrypt3
657	movups	%xmm2,(%rsi)
658	movups	%xmm3,16(%rsi)
659	movups	%xmm4,32(%rsi)
660	jmp	.Lecb_ret
661.align	16
662.Lecb_enc_four:
663	call	_aesni_encrypt4
664	movups	%xmm2,(%rsi)
665	movups	%xmm3,16(%rsi)
666	movups	%xmm4,32(%rsi)
667	movups	%xmm5,48(%rsi)
668	jmp	.Lecb_ret
669.align	16
670.Lecb_enc_five:
671	xorps	%xmm7,%xmm7
672	call	_aesni_encrypt6
673	movups	%xmm2,(%rsi)
674	movups	%xmm3,16(%rsi)
675	movups	%xmm4,32(%rsi)
676	movups	%xmm5,48(%rsi)
677	movups	%xmm6,64(%rsi)
678	jmp	.Lecb_ret
679.align	16
680.Lecb_enc_six:
681	call	_aesni_encrypt6
682	movups	%xmm2,(%rsi)
683	movups	%xmm3,16(%rsi)
684	movups	%xmm4,32(%rsi)
685	movups	%xmm5,48(%rsi)
686	movups	%xmm6,64(%rsi)
687	movups	%xmm7,80(%rsi)
688	jmp	.Lecb_ret
689
690.align	16
691.Lecb_decrypt:
692	cmpq	$0x80,%rdx
693	jb	.Lecb_dec_tail
694
695	movdqu	(%rdi),%xmm2
696	movdqu	16(%rdi),%xmm3
697	movdqu	32(%rdi),%xmm4
698	movdqu	48(%rdi),%xmm5
699	movdqu	64(%rdi),%xmm6
700	movdqu	80(%rdi),%xmm7
701	movdqu	96(%rdi),%xmm8
702	movdqu	112(%rdi),%xmm9
703	leaq	128(%rdi),%rdi
704	subq	$0x80,%rdx
705	jmp	.Lecb_dec_loop8_enter
706.align	16
707.Lecb_dec_loop8:
708	movups	%xmm2,(%rsi)
709	movq	%r11,%rcx
710	movdqu	(%rdi),%xmm2
711	movl	%r10d,%eax
712	movups	%xmm3,16(%rsi)
713	movdqu	16(%rdi),%xmm3
714	movups	%xmm4,32(%rsi)
715	movdqu	32(%rdi),%xmm4
716	movups	%xmm5,48(%rsi)
717	movdqu	48(%rdi),%xmm5
718	movups	%xmm6,64(%rsi)
719	movdqu	64(%rdi),%xmm6
720	movups	%xmm7,80(%rsi)
721	movdqu	80(%rdi),%xmm7
722	movups	%xmm8,96(%rsi)
723	movdqu	96(%rdi),%xmm8
724	movups	%xmm9,112(%rsi)
725	leaq	128(%rsi),%rsi
726	movdqu	112(%rdi),%xmm9
727	leaq	128(%rdi),%rdi
728.Lecb_dec_loop8_enter:
729
730	call	_aesni_decrypt8
731
732	movups	(%r11),%xmm0
733	subq	$0x80,%rdx
734	jnc	.Lecb_dec_loop8
735
736	movups	%xmm2,(%rsi)
737	pxor	%xmm2,%xmm2
738	movq	%r11,%rcx
739	movups	%xmm3,16(%rsi)
740	pxor	%xmm3,%xmm3
741	movl	%r10d,%eax
742	movups	%xmm4,32(%rsi)
743	pxor	%xmm4,%xmm4
744	movups	%xmm5,48(%rsi)
745	pxor	%xmm5,%xmm5
746	movups	%xmm6,64(%rsi)
747	pxor	%xmm6,%xmm6
748	movups	%xmm7,80(%rsi)
749	pxor	%xmm7,%xmm7
750	movups	%xmm8,96(%rsi)
751	pxor	%xmm8,%xmm8
752	movups	%xmm9,112(%rsi)
753	pxor	%xmm9,%xmm9
754	leaq	128(%rsi),%rsi
755	addq	$0x80,%rdx
756	jz	.Lecb_ret
757
758.Lecb_dec_tail:
759	movups	(%rdi),%xmm2
760	cmpq	$0x20,%rdx
761	jb	.Lecb_dec_one
762	movups	16(%rdi),%xmm3
763	je	.Lecb_dec_two
764	movups	32(%rdi),%xmm4
765	cmpq	$0x40,%rdx
766	jb	.Lecb_dec_three
767	movups	48(%rdi),%xmm5
768	je	.Lecb_dec_four
769	movups	64(%rdi),%xmm6
770	cmpq	$0x60,%rdx
771	jb	.Lecb_dec_five
772	movups	80(%rdi),%xmm7
773	je	.Lecb_dec_six
774	movups	96(%rdi),%xmm8
775	movups	(%rcx),%xmm0
776	xorps	%xmm9,%xmm9
777	call	_aesni_decrypt8
778	movups	%xmm2,(%rsi)
779	pxor	%xmm2,%xmm2
780	movups	%xmm3,16(%rsi)
781	pxor	%xmm3,%xmm3
782	movups	%xmm4,32(%rsi)
783	pxor	%xmm4,%xmm4
784	movups	%xmm5,48(%rsi)
785	pxor	%xmm5,%xmm5
786	movups	%xmm6,64(%rsi)
787	pxor	%xmm6,%xmm6
788	movups	%xmm7,80(%rsi)
789	pxor	%xmm7,%xmm7
790	movups	%xmm8,96(%rsi)
791	pxor	%xmm8,%xmm8
792	pxor	%xmm9,%xmm9
793	jmp	.Lecb_ret
794.align	16
795.Lecb_dec_one:
796	movups	(%rcx),%xmm0
797	movups	16(%rcx),%xmm1
798	leaq	32(%rcx),%rcx
799	xorps	%xmm0,%xmm2
800.Loop_dec1_4:
801.byte	102,15,56,222,209
802	decl	%eax
803	movups	(%rcx),%xmm1
804	leaq	16(%rcx),%rcx
805	jnz	.Loop_dec1_4
806.byte	102,15,56,223,209
807	movups	%xmm2,(%rsi)
808	pxor	%xmm2,%xmm2
809	jmp	.Lecb_ret
810.align	16
811.Lecb_dec_two:
812	call	_aesni_decrypt2
813	movups	%xmm2,(%rsi)
814	pxor	%xmm2,%xmm2
815	movups	%xmm3,16(%rsi)
816	pxor	%xmm3,%xmm3
817	jmp	.Lecb_ret
818.align	16
819.Lecb_dec_three:
820	call	_aesni_decrypt3
821	movups	%xmm2,(%rsi)
822	pxor	%xmm2,%xmm2
823	movups	%xmm3,16(%rsi)
824	pxor	%xmm3,%xmm3
825	movups	%xmm4,32(%rsi)
826	pxor	%xmm4,%xmm4
827	jmp	.Lecb_ret
828.align	16
829.Lecb_dec_four:
830	call	_aesni_decrypt4
831	movups	%xmm2,(%rsi)
832	pxor	%xmm2,%xmm2
833	movups	%xmm3,16(%rsi)
834	pxor	%xmm3,%xmm3
835	movups	%xmm4,32(%rsi)
836	pxor	%xmm4,%xmm4
837	movups	%xmm5,48(%rsi)
838	pxor	%xmm5,%xmm5
839	jmp	.Lecb_ret
840.align	16
841.Lecb_dec_five:
842	xorps	%xmm7,%xmm7
843	call	_aesni_decrypt6
844	movups	%xmm2,(%rsi)
845	pxor	%xmm2,%xmm2
846	movups	%xmm3,16(%rsi)
847	pxor	%xmm3,%xmm3
848	movups	%xmm4,32(%rsi)
849	pxor	%xmm4,%xmm4
850	movups	%xmm5,48(%rsi)
851	pxor	%xmm5,%xmm5
852	movups	%xmm6,64(%rsi)
853	pxor	%xmm6,%xmm6
854	pxor	%xmm7,%xmm7
855	jmp	.Lecb_ret
856.align	16
857.Lecb_dec_six:
858	call	_aesni_decrypt6
859	movups	%xmm2,(%rsi)
860	pxor	%xmm2,%xmm2
861	movups	%xmm3,16(%rsi)
862	pxor	%xmm3,%xmm3
863	movups	%xmm4,32(%rsi)
864	pxor	%xmm4,%xmm4
865	movups	%xmm5,48(%rsi)
866	pxor	%xmm5,%xmm5
867	movups	%xmm6,64(%rsi)
868	pxor	%xmm6,%xmm6
869	movups	%xmm7,80(%rsi)
870	pxor	%xmm7,%xmm7
871
872.Lecb_ret:
873	xorps	%xmm0,%xmm0
874	pxor	%xmm1,%xmm1
875	ret
876.cfi_endproc
877.size	aes_hw_ecb_encrypt,.-aes_hw_ecb_encrypt
878.globl	aes_hw_ctr32_encrypt_blocks
879.hidden aes_hw_ctr32_encrypt_blocks
880.type	aes_hw_ctr32_encrypt_blocks,@function
881.align	16
882aes_hw_ctr32_encrypt_blocks:
883.cfi_startproc
884_CET_ENDBR
885#ifdef BORINGSSL_DISPATCH_TEST
886	movb	$1,BORINGSSL_function_hit(%rip)
887#endif
888	cmpq	$1,%rdx
889	jne	.Lctr32_bulk
890
891
892
893	movups	(%r8),%xmm2
894	movups	(%rdi),%xmm3
895	movl	240(%rcx),%edx
896	movups	(%rcx),%xmm0
897	movups	16(%rcx),%xmm1
898	leaq	32(%rcx),%rcx
899	xorps	%xmm0,%xmm2
900.Loop_enc1_5:
901.byte	102,15,56,220,209
902	decl	%edx
903	movups	(%rcx),%xmm1
904	leaq	16(%rcx),%rcx
905	jnz	.Loop_enc1_5
906.byte	102,15,56,221,209
907	pxor	%xmm0,%xmm0
908	pxor	%xmm1,%xmm1
909	xorps	%xmm3,%xmm2
910	pxor	%xmm3,%xmm3
911	movups	%xmm2,(%rsi)
912	xorps	%xmm2,%xmm2
913	jmp	.Lctr32_epilogue
914
915.align	16
916.Lctr32_bulk:
917	leaq	(%rsp),%r11
918.cfi_def_cfa_register	%r11
919	pushq	%rbp
920.cfi_offset	%rbp,-16
921	subq	$128,%rsp
922	andq	$-16,%rsp
923
924
925
926
927	movdqu	(%r8),%xmm2
928	movdqu	(%rcx),%xmm0
929	movl	12(%r8),%r8d
930	pxor	%xmm0,%xmm2
931	movl	12(%rcx),%ebp
932	movdqa	%xmm2,0(%rsp)
933	bswapl	%r8d
934	movdqa	%xmm2,%xmm3
935	movdqa	%xmm2,%xmm4
936	movdqa	%xmm2,%xmm5
937	movdqa	%xmm2,64(%rsp)
938	movdqa	%xmm2,80(%rsp)
939	movdqa	%xmm2,96(%rsp)
940	movq	%rdx,%r10
941	movdqa	%xmm2,112(%rsp)
942
943	leaq	1(%r8),%rax
944	leaq	2(%r8),%rdx
945	bswapl	%eax
946	bswapl	%edx
947	xorl	%ebp,%eax
948	xorl	%ebp,%edx
949.byte	102,15,58,34,216,3
950	leaq	3(%r8),%rax
951	movdqa	%xmm3,16(%rsp)
952.byte	102,15,58,34,226,3
953	bswapl	%eax
954	movq	%r10,%rdx
955	leaq	4(%r8),%r10
956	movdqa	%xmm4,32(%rsp)
957	xorl	%ebp,%eax
958	bswapl	%r10d
959.byte	102,15,58,34,232,3
960	xorl	%ebp,%r10d
961	movdqa	%xmm5,48(%rsp)
962	leaq	5(%r8),%r9
963	movl	%r10d,64+12(%rsp)
964	bswapl	%r9d
965	leaq	6(%r8),%r10
966	movl	240(%rcx),%eax
967	xorl	%ebp,%r9d
968	bswapl	%r10d
969	movl	%r9d,80+12(%rsp)
970	xorl	%ebp,%r10d
971	leaq	7(%r8),%r9
972	movl	%r10d,96+12(%rsp)
973	bswapl	%r9d
974	xorl	%ebp,%r9d
975	movl	%r9d,112+12(%rsp)
976
977	movups	16(%rcx),%xmm1
978
979	movdqa	64(%rsp),%xmm6
980	movdqa	80(%rsp),%xmm7
981
982	cmpq	$8,%rdx
983	jb	.Lctr32_tail
984
985	leaq	128(%rcx),%rcx
986	subq	$8,%rdx
987	jmp	.Lctr32_loop8
988
989.align	32
990.Lctr32_loop8:
991	addl	$8,%r8d
992	movdqa	96(%rsp),%xmm8
993.byte	102,15,56,220,209
994	movl	%r8d,%r9d
995	movdqa	112(%rsp),%xmm9
996.byte	102,15,56,220,217
997	bswapl	%r9d
998	movups	32-128(%rcx),%xmm0
999.byte	102,15,56,220,225
1000	xorl	%ebp,%r9d
1001	nop
1002.byte	102,15,56,220,233
1003	movl	%r9d,0+12(%rsp)
1004	leaq	1(%r8),%r9
1005.byte	102,15,56,220,241
1006.byte	102,15,56,220,249
1007.byte	102,68,15,56,220,193
1008.byte	102,68,15,56,220,201
1009	movups	48-128(%rcx),%xmm1
1010	bswapl	%r9d
1011.byte	102,15,56,220,208
1012.byte	102,15,56,220,216
1013	xorl	%ebp,%r9d
1014.byte	0x66,0x90
1015.byte	102,15,56,220,224
1016.byte	102,15,56,220,232
1017	movl	%r9d,16+12(%rsp)
1018	leaq	2(%r8),%r9
1019.byte	102,15,56,220,240
1020.byte	102,15,56,220,248
1021.byte	102,68,15,56,220,192
1022.byte	102,68,15,56,220,200
1023	movups	64-128(%rcx),%xmm0
1024	bswapl	%r9d
1025.byte	102,15,56,220,209
1026.byte	102,15,56,220,217
1027	xorl	%ebp,%r9d
1028.byte	0x66,0x90
1029.byte	102,15,56,220,225
1030.byte	102,15,56,220,233
1031	movl	%r9d,32+12(%rsp)
1032	leaq	3(%r8),%r9
1033.byte	102,15,56,220,241
1034.byte	102,15,56,220,249
1035.byte	102,68,15,56,220,193
1036.byte	102,68,15,56,220,201
1037	movups	80-128(%rcx),%xmm1
1038	bswapl	%r9d
1039.byte	102,15,56,220,208
1040.byte	102,15,56,220,216
1041	xorl	%ebp,%r9d
1042.byte	0x66,0x90
1043.byte	102,15,56,220,224
1044.byte	102,15,56,220,232
1045	movl	%r9d,48+12(%rsp)
1046	leaq	4(%r8),%r9
1047.byte	102,15,56,220,240
1048.byte	102,15,56,220,248
1049.byte	102,68,15,56,220,192
1050.byte	102,68,15,56,220,200
1051	movups	96-128(%rcx),%xmm0
1052	bswapl	%r9d
1053.byte	102,15,56,220,209
1054.byte	102,15,56,220,217
1055	xorl	%ebp,%r9d
1056.byte	0x66,0x90
1057.byte	102,15,56,220,225
1058.byte	102,15,56,220,233
1059	movl	%r9d,64+12(%rsp)
1060	leaq	5(%r8),%r9
1061.byte	102,15,56,220,241
1062.byte	102,15,56,220,249
1063.byte	102,68,15,56,220,193
1064.byte	102,68,15,56,220,201
1065	movups	112-128(%rcx),%xmm1
1066	bswapl	%r9d
1067.byte	102,15,56,220,208
1068.byte	102,15,56,220,216
1069	xorl	%ebp,%r9d
1070.byte	0x66,0x90
1071.byte	102,15,56,220,224
1072.byte	102,15,56,220,232
1073	movl	%r9d,80+12(%rsp)
1074	leaq	6(%r8),%r9
1075.byte	102,15,56,220,240
1076.byte	102,15,56,220,248
1077.byte	102,68,15,56,220,192
1078.byte	102,68,15,56,220,200
1079	movups	128-128(%rcx),%xmm0
1080	bswapl	%r9d
1081.byte	102,15,56,220,209
1082.byte	102,15,56,220,217
1083	xorl	%ebp,%r9d
1084.byte	0x66,0x90
1085.byte	102,15,56,220,225
1086.byte	102,15,56,220,233
1087	movl	%r9d,96+12(%rsp)
1088	leaq	7(%r8),%r9
1089.byte	102,15,56,220,241
1090.byte	102,15,56,220,249
1091.byte	102,68,15,56,220,193
1092.byte	102,68,15,56,220,201
1093	movups	144-128(%rcx),%xmm1
1094	bswapl	%r9d
1095.byte	102,15,56,220,208
1096.byte	102,15,56,220,216
1097.byte	102,15,56,220,224
1098	xorl	%ebp,%r9d
1099	movdqu	0(%rdi),%xmm10
1100.byte	102,15,56,220,232
1101	movl	%r9d,112+12(%rsp)
1102	cmpl	$11,%eax
1103.byte	102,15,56,220,240
1104.byte	102,15,56,220,248
1105.byte	102,68,15,56,220,192
1106.byte	102,68,15,56,220,200
1107	movups	160-128(%rcx),%xmm0
1108
1109	jb	.Lctr32_enc_done
1110
1111.byte	102,15,56,220,209
1112.byte	102,15,56,220,217
1113.byte	102,15,56,220,225
1114.byte	102,15,56,220,233
1115.byte	102,15,56,220,241
1116.byte	102,15,56,220,249
1117.byte	102,68,15,56,220,193
1118.byte	102,68,15,56,220,201
1119	movups	176-128(%rcx),%xmm1
1120
1121.byte	102,15,56,220,208
1122.byte	102,15,56,220,216
1123.byte	102,15,56,220,224
1124.byte	102,15,56,220,232
1125.byte	102,15,56,220,240
1126.byte	102,15,56,220,248
1127.byte	102,68,15,56,220,192
1128.byte	102,68,15,56,220,200
1129	movups	192-128(%rcx),%xmm0
1130	je	.Lctr32_enc_done
1131
1132.byte	102,15,56,220,209
1133.byte	102,15,56,220,217
1134.byte	102,15,56,220,225
1135.byte	102,15,56,220,233
1136.byte	102,15,56,220,241
1137.byte	102,15,56,220,249
1138.byte	102,68,15,56,220,193
1139.byte	102,68,15,56,220,201
1140	movups	208-128(%rcx),%xmm1
1141
1142.byte	102,15,56,220,208
1143.byte	102,15,56,220,216
1144.byte	102,15,56,220,224
1145.byte	102,15,56,220,232
1146.byte	102,15,56,220,240
1147.byte	102,15,56,220,248
1148.byte	102,68,15,56,220,192
1149.byte	102,68,15,56,220,200
1150	movups	224-128(%rcx),%xmm0
1151	jmp	.Lctr32_enc_done
1152
1153.align	16
1154.Lctr32_enc_done:
1155	movdqu	16(%rdi),%xmm11
1156	pxor	%xmm0,%xmm10
1157	movdqu	32(%rdi),%xmm12
1158	pxor	%xmm0,%xmm11
1159	movdqu	48(%rdi),%xmm13
1160	pxor	%xmm0,%xmm12
1161	movdqu	64(%rdi),%xmm14
1162	pxor	%xmm0,%xmm13
1163	movdqu	80(%rdi),%xmm15
1164	pxor	%xmm0,%xmm14
1165	prefetcht0	448(%rdi)
1166	prefetcht0	512(%rdi)
1167	pxor	%xmm0,%xmm15
1168.byte	102,15,56,220,209
1169.byte	102,15,56,220,217
1170.byte	102,15,56,220,225
1171.byte	102,15,56,220,233
1172.byte	102,15,56,220,241
1173.byte	102,15,56,220,249
1174.byte	102,68,15,56,220,193
1175.byte	102,68,15,56,220,201
1176	movdqu	96(%rdi),%xmm1
1177	leaq	128(%rdi),%rdi
1178
1179.byte	102,65,15,56,221,210
1180	pxor	%xmm0,%xmm1
1181	movdqu	112-128(%rdi),%xmm10
1182.byte	102,65,15,56,221,219
1183	pxor	%xmm0,%xmm10
1184	movdqa	0(%rsp),%xmm11
1185.byte	102,65,15,56,221,228
1186.byte	102,65,15,56,221,237
1187	movdqa	16(%rsp),%xmm12
1188	movdqa	32(%rsp),%xmm13
1189.byte	102,65,15,56,221,246
1190.byte	102,65,15,56,221,255
1191	movdqa	48(%rsp),%xmm14
1192	movdqa	64(%rsp),%xmm15
1193.byte	102,68,15,56,221,193
1194	movdqa	80(%rsp),%xmm0
1195	movups	16-128(%rcx),%xmm1
1196.byte	102,69,15,56,221,202
1197
1198	movups	%xmm2,(%rsi)
1199	movdqa	%xmm11,%xmm2
1200	movups	%xmm3,16(%rsi)
1201	movdqa	%xmm12,%xmm3
1202	movups	%xmm4,32(%rsi)
1203	movdqa	%xmm13,%xmm4
1204	movups	%xmm5,48(%rsi)
1205	movdqa	%xmm14,%xmm5
1206	movups	%xmm6,64(%rsi)
1207	movdqa	%xmm15,%xmm6
1208	movups	%xmm7,80(%rsi)
1209	movdqa	%xmm0,%xmm7
1210	movups	%xmm8,96(%rsi)
1211	movups	%xmm9,112(%rsi)
1212	leaq	128(%rsi),%rsi
1213
1214	subq	$8,%rdx
1215	jnc	.Lctr32_loop8
1216
1217	addq	$8,%rdx
1218	jz	.Lctr32_done
1219	leaq	-128(%rcx),%rcx
1220
1221.Lctr32_tail:
1222
1223
1224	leaq	16(%rcx),%rcx
1225	cmpq	$4,%rdx
1226	jb	.Lctr32_loop3
1227	je	.Lctr32_loop4
1228
1229
1230	shll	$4,%eax
1231	movdqa	96(%rsp),%xmm8
1232	pxor	%xmm9,%xmm9
1233
1234	movups	16(%rcx),%xmm0
1235.byte	102,15,56,220,209
1236.byte	102,15,56,220,217
1237	leaq	32-16(%rcx,%rax,1),%rcx
1238	negq	%rax
1239.byte	102,15,56,220,225
1240	addq	$16,%rax
1241	movups	(%rdi),%xmm10
1242.byte	102,15,56,220,233
1243.byte	102,15,56,220,241
1244	movups	16(%rdi),%xmm11
1245	movups	32(%rdi),%xmm12
1246.byte	102,15,56,220,249
1247.byte	102,68,15,56,220,193
1248
1249	call	.Lenc_loop8_enter
1250
1251	movdqu	48(%rdi),%xmm13
1252	pxor	%xmm10,%xmm2
1253	movdqu	64(%rdi),%xmm10
1254	pxor	%xmm11,%xmm3
1255	movdqu	%xmm2,(%rsi)
1256	pxor	%xmm12,%xmm4
1257	movdqu	%xmm3,16(%rsi)
1258	pxor	%xmm13,%xmm5
1259	movdqu	%xmm4,32(%rsi)
1260	pxor	%xmm10,%xmm6
1261	movdqu	%xmm5,48(%rsi)
1262	movdqu	%xmm6,64(%rsi)
1263	cmpq	$6,%rdx
1264	jb	.Lctr32_done
1265
1266	movups	80(%rdi),%xmm11
1267	xorps	%xmm11,%xmm7
1268	movups	%xmm7,80(%rsi)
1269	je	.Lctr32_done
1270
1271	movups	96(%rdi),%xmm12
1272	xorps	%xmm12,%xmm8
1273	movups	%xmm8,96(%rsi)
1274	jmp	.Lctr32_done
1275
1276.align	32
1277.Lctr32_loop4:
1278.byte	102,15,56,220,209
1279	leaq	16(%rcx),%rcx
1280	decl	%eax
1281.byte	102,15,56,220,217
1282.byte	102,15,56,220,225
1283.byte	102,15,56,220,233
1284	movups	(%rcx),%xmm1
1285	jnz	.Lctr32_loop4
1286.byte	102,15,56,221,209
1287.byte	102,15,56,221,217
1288	movups	(%rdi),%xmm10
1289	movups	16(%rdi),%xmm11
1290.byte	102,15,56,221,225
1291.byte	102,15,56,221,233
1292	movups	32(%rdi),%xmm12
1293	movups	48(%rdi),%xmm13
1294
1295	xorps	%xmm10,%xmm2
1296	movups	%xmm2,(%rsi)
1297	xorps	%xmm11,%xmm3
1298	movups	%xmm3,16(%rsi)
1299	pxor	%xmm12,%xmm4
1300	movdqu	%xmm4,32(%rsi)
1301	pxor	%xmm13,%xmm5
1302	movdqu	%xmm5,48(%rsi)
1303	jmp	.Lctr32_done
1304
1305.align	32
1306.Lctr32_loop3:
1307.byte	102,15,56,220,209
1308	leaq	16(%rcx),%rcx
1309	decl	%eax
1310.byte	102,15,56,220,217
1311.byte	102,15,56,220,225
1312	movups	(%rcx),%xmm1
1313	jnz	.Lctr32_loop3
1314.byte	102,15,56,221,209
1315.byte	102,15,56,221,217
1316.byte	102,15,56,221,225
1317
1318	movups	(%rdi),%xmm10
1319	xorps	%xmm10,%xmm2
1320	movups	%xmm2,(%rsi)
1321	cmpq	$2,%rdx
1322	jb	.Lctr32_done
1323
1324	movups	16(%rdi),%xmm11
1325	xorps	%xmm11,%xmm3
1326	movups	%xmm3,16(%rsi)
1327	je	.Lctr32_done
1328
1329	movups	32(%rdi),%xmm12
1330	xorps	%xmm12,%xmm4
1331	movups	%xmm4,32(%rsi)
1332
1333.Lctr32_done:
1334	xorps	%xmm0,%xmm0
1335	xorl	%ebp,%ebp
1336	pxor	%xmm1,%xmm1
1337	pxor	%xmm2,%xmm2
1338	pxor	%xmm3,%xmm3
1339	pxor	%xmm4,%xmm4
1340	pxor	%xmm5,%xmm5
1341	pxor	%xmm6,%xmm6
1342	pxor	%xmm7,%xmm7
1343	movaps	%xmm0,0(%rsp)
1344	pxor	%xmm8,%xmm8
1345	movaps	%xmm0,16(%rsp)
1346	pxor	%xmm9,%xmm9
1347	movaps	%xmm0,32(%rsp)
1348	pxor	%xmm10,%xmm10
1349	movaps	%xmm0,48(%rsp)
1350	pxor	%xmm11,%xmm11
1351	movaps	%xmm0,64(%rsp)
1352	pxor	%xmm12,%xmm12
1353	movaps	%xmm0,80(%rsp)
1354	pxor	%xmm13,%xmm13
1355	movaps	%xmm0,96(%rsp)
1356	pxor	%xmm14,%xmm14
1357	movaps	%xmm0,112(%rsp)
1358	pxor	%xmm15,%xmm15
1359	movq	-8(%r11),%rbp
1360.cfi_restore	%rbp
1361	leaq	(%r11),%rsp
1362.cfi_def_cfa_register	%rsp
1363.Lctr32_epilogue:
1364	ret
1365.cfi_endproc
1366.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
1367.globl	aes_hw_cbc_encrypt
1368.hidden aes_hw_cbc_encrypt
1369.type	aes_hw_cbc_encrypt,@function
1370.align	16
1371aes_hw_cbc_encrypt:
1372.cfi_startproc
1373_CET_ENDBR
1374	testq	%rdx,%rdx
1375	jz	.Lcbc_ret
1376
1377	movl	240(%rcx),%r10d
1378	movq	%rcx,%r11
1379	testl	%r9d,%r9d
1380	jz	.Lcbc_decrypt
1381
1382	movups	(%r8),%xmm2
1383	movl	%r10d,%eax
1384	cmpq	$16,%rdx
1385	jb	.Lcbc_enc_tail
1386	subq	$16,%rdx
1387	jmp	.Lcbc_enc_loop
1388.align	16
1389.Lcbc_enc_loop:
1390	movups	(%rdi),%xmm3
1391	leaq	16(%rdi),%rdi
1392
1393	movups	(%rcx),%xmm0
1394	movups	16(%rcx),%xmm1
1395	xorps	%xmm0,%xmm3
1396	leaq	32(%rcx),%rcx
1397	xorps	%xmm3,%xmm2
1398.Loop_enc1_6:
1399.byte	102,15,56,220,209
1400	decl	%eax
1401	movups	(%rcx),%xmm1
1402	leaq	16(%rcx),%rcx
1403	jnz	.Loop_enc1_6
1404.byte	102,15,56,221,209
1405	movl	%r10d,%eax
1406	movq	%r11,%rcx
1407	movups	%xmm2,0(%rsi)
1408	leaq	16(%rsi),%rsi
1409	subq	$16,%rdx
1410	jnc	.Lcbc_enc_loop
1411	addq	$16,%rdx
1412	jnz	.Lcbc_enc_tail
1413	pxor	%xmm0,%xmm0
1414	pxor	%xmm1,%xmm1
1415	movups	%xmm2,(%r8)
1416	pxor	%xmm2,%xmm2
1417	pxor	%xmm3,%xmm3
1418	jmp	.Lcbc_ret
1419
1420.Lcbc_enc_tail:
1421	movq	%rdx,%rcx
1422	xchgq	%rdi,%rsi
1423.long	0x9066A4F3
1424	movl	$16,%ecx
1425	subq	%rdx,%rcx
1426	xorl	%eax,%eax
1427.long	0x9066AAF3
1428	leaq	-16(%rdi),%rdi
1429	movl	%r10d,%eax
1430	movq	%rdi,%rsi
1431	movq	%r11,%rcx
1432	xorq	%rdx,%rdx
1433	jmp	.Lcbc_enc_loop
1434
1435.align	16
1436.Lcbc_decrypt:
1437	cmpq	$16,%rdx
1438	jne	.Lcbc_decrypt_bulk
1439
1440
1441
1442	movdqu	(%rdi),%xmm2
1443	movdqu	(%r8),%xmm3
1444	movdqa	%xmm2,%xmm4
1445	movups	(%rcx),%xmm0
1446	movups	16(%rcx),%xmm1
1447	leaq	32(%rcx),%rcx
1448	xorps	%xmm0,%xmm2
1449.Loop_dec1_7:
1450.byte	102,15,56,222,209
1451	decl	%r10d
1452	movups	(%rcx),%xmm1
1453	leaq	16(%rcx),%rcx
1454	jnz	.Loop_dec1_7
1455.byte	102,15,56,223,209
1456	pxor	%xmm0,%xmm0
1457	pxor	%xmm1,%xmm1
1458	movdqu	%xmm4,(%r8)
1459	xorps	%xmm3,%xmm2
1460	pxor	%xmm3,%xmm3
1461	movups	%xmm2,(%rsi)
1462	pxor	%xmm2,%xmm2
1463	jmp	.Lcbc_ret
1464.align	16
1465.Lcbc_decrypt_bulk:
1466	leaq	(%rsp),%r11
1467.cfi_def_cfa_register	%r11
1468	pushq	%rbp
1469.cfi_offset	%rbp,-16
1470	subq	$16,%rsp
1471	andq	$-16,%rsp
1472	movq	%rcx,%rbp
1473	movups	(%r8),%xmm10
1474	movl	%r10d,%eax
1475	cmpq	$0x50,%rdx
1476	jbe	.Lcbc_dec_tail
1477
1478	movups	(%rcx),%xmm0
1479	movdqu	0(%rdi),%xmm2
1480	movdqu	16(%rdi),%xmm3
1481	movdqa	%xmm2,%xmm11
1482	movdqu	32(%rdi),%xmm4
1483	movdqa	%xmm3,%xmm12
1484	movdqu	48(%rdi),%xmm5
1485	movdqa	%xmm4,%xmm13
1486	movdqu	64(%rdi),%xmm6
1487	movdqa	%xmm5,%xmm14
1488	movdqu	80(%rdi),%xmm7
1489	movdqa	%xmm6,%xmm15
1490	cmpq	$0x70,%rdx
1491	jbe	.Lcbc_dec_six_or_seven
1492
1493	subq	$0x70,%rdx
1494	leaq	112(%rcx),%rcx
1495	jmp	.Lcbc_dec_loop8_enter
1496.align	16
1497.Lcbc_dec_loop8:
1498	movups	%xmm9,(%rsi)
1499	leaq	16(%rsi),%rsi
1500.Lcbc_dec_loop8_enter:
1501	movdqu	96(%rdi),%xmm8
1502	pxor	%xmm0,%xmm2
1503	movdqu	112(%rdi),%xmm9
1504	pxor	%xmm0,%xmm3
1505	movups	16-112(%rcx),%xmm1
1506	pxor	%xmm0,%xmm4
1507	movq	$-1,%rbp
1508	cmpq	$0x70,%rdx
1509	pxor	%xmm0,%xmm5
1510	pxor	%xmm0,%xmm6
1511	pxor	%xmm0,%xmm7
1512	pxor	%xmm0,%xmm8
1513
1514.byte	102,15,56,222,209
1515	pxor	%xmm0,%xmm9
1516	movups	32-112(%rcx),%xmm0
1517.byte	102,15,56,222,217
1518.byte	102,15,56,222,225
1519.byte	102,15,56,222,233
1520.byte	102,15,56,222,241
1521.byte	102,15,56,222,249
1522.byte	102,68,15,56,222,193
1523	adcq	$0,%rbp
1524	andq	$128,%rbp
1525.byte	102,68,15,56,222,201
1526	addq	%rdi,%rbp
1527	movups	48-112(%rcx),%xmm1
1528.byte	102,15,56,222,208
1529.byte	102,15,56,222,216
1530.byte	102,15,56,222,224
1531.byte	102,15,56,222,232
1532.byte	102,15,56,222,240
1533.byte	102,15,56,222,248
1534.byte	102,68,15,56,222,192
1535.byte	102,68,15,56,222,200
1536	movups	64-112(%rcx),%xmm0
1537	nop
1538.byte	102,15,56,222,209
1539.byte	102,15,56,222,217
1540.byte	102,15,56,222,225
1541.byte	102,15,56,222,233
1542.byte	102,15,56,222,241
1543.byte	102,15,56,222,249
1544.byte	102,68,15,56,222,193
1545.byte	102,68,15,56,222,201
1546	movups	80-112(%rcx),%xmm1
1547	nop
1548.byte	102,15,56,222,208
1549.byte	102,15,56,222,216
1550.byte	102,15,56,222,224
1551.byte	102,15,56,222,232
1552.byte	102,15,56,222,240
1553.byte	102,15,56,222,248
1554.byte	102,68,15,56,222,192
1555.byte	102,68,15,56,222,200
1556	movups	96-112(%rcx),%xmm0
1557	nop
1558.byte	102,15,56,222,209
1559.byte	102,15,56,222,217
1560.byte	102,15,56,222,225
1561.byte	102,15,56,222,233
1562.byte	102,15,56,222,241
1563.byte	102,15,56,222,249
1564.byte	102,68,15,56,222,193
1565.byte	102,68,15,56,222,201
1566	movups	112-112(%rcx),%xmm1
1567	nop
1568.byte	102,15,56,222,208
1569.byte	102,15,56,222,216
1570.byte	102,15,56,222,224
1571.byte	102,15,56,222,232
1572.byte	102,15,56,222,240
1573.byte	102,15,56,222,248
1574.byte	102,68,15,56,222,192
1575.byte	102,68,15,56,222,200
1576	movups	128-112(%rcx),%xmm0
1577	nop
1578.byte	102,15,56,222,209
1579.byte	102,15,56,222,217
1580.byte	102,15,56,222,225
1581.byte	102,15,56,222,233
1582.byte	102,15,56,222,241
1583.byte	102,15,56,222,249
1584.byte	102,68,15,56,222,193
1585.byte	102,68,15,56,222,201
1586	movups	144-112(%rcx),%xmm1
1587	cmpl	$11,%eax
1588.byte	102,15,56,222,208
1589.byte	102,15,56,222,216
1590.byte	102,15,56,222,224
1591.byte	102,15,56,222,232
1592.byte	102,15,56,222,240
1593.byte	102,15,56,222,248
1594.byte	102,68,15,56,222,192
1595.byte	102,68,15,56,222,200
1596	movups	160-112(%rcx),%xmm0
1597	jb	.Lcbc_dec_done
1598.byte	102,15,56,222,209
1599.byte	102,15,56,222,217
1600.byte	102,15,56,222,225
1601.byte	102,15,56,222,233
1602.byte	102,15,56,222,241
1603.byte	102,15,56,222,249
1604.byte	102,68,15,56,222,193
1605.byte	102,68,15,56,222,201
1606	movups	176-112(%rcx),%xmm1
1607	nop
1608.byte	102,15,56,222,208
1609.byte	102,15,56,222,216
1610.byte	102,15,56,222,224
1611.byte	102,15,56,222,232
1612.byte	102,15,56,222,240
1613.byte	102,15,56,222,248
1614.byte	102,68,15,56,222,192
1615.byte	102,68,15,56,222,200
1616	movups	192-112(%rcx),%xmm0
1617	je	.Lcbc_dec_done
1618.byte	102,15,56,222,209
1619.byte	102,15,56,222,217
1620.byte	102,15,56,222,225
1621.byte	102,15,56,222,233
1622.byte	102,15,56,222,241
1623.byte	102,15,56,222,249
1624.byte	102,68,15,56,222,193
1625.byte	102,68,15,56,222,201
1626	movups	208-112(%rcx),%xmm1
1627	nop
1628.byte	102,15,56,222,208
1629.byte	102,15,56,222,216
1630.byte	102,15,56,222,224
1631.byte	102,15,56,222,232
1632.byte	102,15,56,222,240
1633.byte	102,15,56,222,248
1634.byte	102,68,15,56,222,192
1635.byte	102,68,15,56,222,200
1636	movups	224-112(%rcx),%xmm0
1637	jmp	.Lcbc_dec_done
1638.align	16
1639.Lcbc_dec_done:
1640.byte	102,15,56,222,209
1641.byte	102,15,56,222,217
1642	pxor	%xmm0,%xmm10
1643	pxor	%xmm0,%xmm11
1644.byte	102,15,56,222,225
1645.byte	102,15,56,222,233
1646	pxor	%xmm0,%xmm12
1647	pxor	%xmm0,%xmm13
1648.byte	102,15,56,222,241
1649.byte	102,15,56,222,249
1650	pxor	%xmm0,%xmm14
1651	pxor	%xmm0,%xmm15
1652.byte	102,68,15,56,222,193
1653.byte	102,68,15,56,222,201
1654	movdqu	80(%rdi),%xmm1
1655
1656.byte	102,65,15,56,223,210
1657	movdqu	96(%rdi),%xmm10
1658	pxor	%xmm0,%xmm1
1659.byte	102,65,15,56,223,219
1660	pxor	%xmm0,%xmm10
1661	movdqu	112(%rdi),%xmm0
1662.byte	102,65,15,56,223,228
1663	leaq	128(%rdi),%rdi
1664	movdqu	0(%rbp),%xmm11
1665.byte	102,65,15,56,223,237
1666.byte	102,65,15,56,223,246
1667	movdqu	16(%rbp),%xmm12
1668	movdqu	32(%rbp),%xmm13
1669.byte	102,65,15,56,223,255
1670.byte	102,68,15,56,223,193
1671	movdqu	48(%rbp),%xmm14
1672	movdqu	64(%rbp),%xmm15
1673.byte	102,69,15,56,223,202
1674	movdqa	%xmm0,%xmm10
1675	movdqu	80(%rbp),%xmm1
1676	movups	-112(%rcx),%xmm0
1677
1678	movups	%xmm2,(%rsi)
1679	movdqa	%xmm11,%xmm2
1680	movups	%xmm3,16(%rsi)
1681	movdqa	%xmm12,%xmm3
1682	movups	%xmm4,32(%rsi)
1683	movdqa	%xmm13,%xmm4
1684	movups	%xmm5,48(%rsi)
1685	movdqa	%xmm14,%xmm5
1686	movups	%xmm6,64(%rsi)
1687	movdqa	%xmm15,%xmm6
1688	movups	%xmm7,80(%rsi)
1689	movdqa	%xmm1,%xmm7
1690	movups	%xmm8,96(%rsi)
1691	leaq	112(%rsi),%rsi
1692
1693	subq	$0x80,%rdx
1694	ja	.Lcbc_dec_loop8
1695
1696	movaps	%xmm9,%xmm2
1697	leaq	-112(%rcx),%rcx
1698	addq	$0x70,%rdx
1699	jle	.Lcbc_dec_clear_tail_collected
1700	movups	%xmm9,(%rsi)
1701	leaq	16(%rsi),%rsi
1702	cmpq	$0x50,%rdx
1703	jbe	.Lcbc_dec_tail
1704
1705	movaps	%xmm11,%xmm2
1706.Lcbc_dec_six_or_seven:
1707	cmpq	$0x60,%rdx
1708	ja	.Lcbc_dec_seven
1709
1710	movaps	%xmm7,%xmm8
1711	call	_aesni_decrypt6
1712	pxor	%xmm10,%xmm2
1713	movaps	%xmm8,%xmm10
1714	pxor	%xmm11,%xmm3
1715	movdqu	%xmm2,(%rsi)
1716	pxor	%xmm12,%xmm4
1717	movdqu	%xmm3,16(%rsi)
1718	pxor	%xmm3,%xmm3
1719	pxor	%xmm13,%xmm5
1720	movdqu	%xmm4,32(%rsi)
1721	pxor	%xmm4,%xmm4
1722	pxor	%xmm14,%xmm6
1723	movdqu	%xmm5,48(%rsi)
1724	pxor	%xmm5,%xmm5
1725	pxor	%xmm15,%xmm7
1726	movdqu	%xmm6,64(%rsi)
1727	pxor	%xmm6,%xmm6
1728	leaq	80(%rsi),%rsi
1729	movdqa	%xmm7,%xmm2
1730	pxor	%xmm7,%xmm7
1731	jmp	.Lcbc_dec_tail_collected
1732
1733.align	16
1734.Lcbc_dec_seven:
1735	movups	96(%rdi),%xmm8
1736	xorps	%xmm9,%xmm9
1737	call	_aesni_decrypt8
1738	movups	80(%rdi),%xmm9
1739	pxor	%xmm10,%xmm2
1740	movups	96(%rdi),%xmm10
1741	pxor	%xmm11,%xmm3
1742	movdqu	%xmm2,(%rsi)
1743	pxor	%xmm12,%xmm4
1744	movdqu	%xmm3,16(%rsi)
1745	pxor	%xmm3,%xmm3
1746	pxor	%xmm13,%xmm5
1747	movdqu	%xmm4,32(%rsi)
1748	pxor	%xmm4,%xmm4
1749	pxor	%xmm14,%xmm6
1750	movdqu	%xmm5,48(%rsi)
1751	pxor	%xmm5,%xmm5
1752	pxor	%xmm15,%xmm7
1753	movdqu	%xmm6,64(%rsi)
1754	pxor	%xmm6,%xmm6
1755	pxor	%xmm9,%xmm8
1756	movdqu	%xmm7,80(%rsi)
1757	pxor	%xmm7,%xmm7
1758	leaq	96(%rsi),%rsi
1759	movdqa	%xmm8,%xmm2
1760	pxor	%xmm8,%xmm8
1761	pxor	%xmm9,%xmm9
1762	jmp	.Lcbc_dec_tail_collected
1763
1764.Lcbc_dec_tail:
1765	movups	(%rdi),%xmm2
1766	subq	$0x10,%rdx
1767	jbe	.Lcbc_dec_one
1768
1769	movups	16(%rdi),%xmm3
1770	movaps	%xmm2,%xmm11
1771	subq	$0x10,%rdx
1772	jbe	.Lcbc_dec_two
1773
1774	movups	32(%rdi),%xmm4
1775	movaps	%xmm3,%xmm12
1776	subq	$0x10,%rdx
1777	jbe	.Lcbc_dec_three
1778
1779	movups	48(%rdi),%xmm5
1780	movaps	%xmm4,%xmm13
1781	subq	$0x10,%rdx
1782	jbe	.Lcbc_dec_four
1783
1784	movups	64(%rdi),%xmm6
1785	movaps	%xmm5,%xmm14
1786	movaps	%xmm6,%xmm15
1787	xorps	%xmm7,%xmm7
1788	call	_aesni_decrypt6
1789	pxor	%xmm10,%xmm2
1790	movaps	%xmm15,%xmm10
1791	pxor	%xmm11,%xmm3
1792	movdqu	%xmm2,(%rsi)
1793	pxor	%xmm12,%xmm4
1794	movdqu	%xmm3,16(%rsi)
1795	pxor	%xmm3,%xmm3
1796	pxor	%xmm13,%xmm5
1797	movdqu	%xmm4,32(%rsi)
1798	pxor	%xmm4,%xmm4
1799	pxor	%xmm14,%xmm6
1800	movdqu	%xmm5,48(%rsi)
1801	pxor	%xmm5,%xmm5
1802	leaq	64(%rsi),%rsi
1803	movdqa	%xmm6,%xmm2
1804	pxor	%xmm6,%xmm6
1805	pxor	%xmm7,%xmm7
1806	subq	$0x10,%rdx
1807	jmp	.Lcbc_dec_tail_collected
1808
1809.align	16
1810.Lcbc_dec_one:
1811	movaps	%xmm2,%xmm11
1812	movups	(%rcx),%xmm0
1813	movups	16(%rcx),%xmm1
1814	leaq	32(%rcx),%rcx
1815	xorps	%xmm0,%xmm2
1816.Loop_dec1_8:
1817.byte	102,15,56,222,209
1818	decl	%eax
1819	movups	(%rcx),%xmm1
1820	leaq	16(%rcx),%rcx
1821	jnz	.Loop_dec1_8
1822.byte	102,15,56,223,209
1823	xorps	%xmm10,%xmm2
1824	movaps	%xmm11,%xmm10
1825	jmp	.Lcbc_dec_tail_collected
1826.align	16
1827.Lcbc_dec_two:
1828	movaps	%xmm3,%xmm12
1829	call	_aesni_decrypt2
1830	pxor	%xmm10,%xmm2
1831	movaps	%xmm12,%xmm10
1832	pxor	%xmm11,%xmm3
1833	movdqu	%xmm2,(%rsi)
1834	movdqa	%xmm3,%xmm2
1835	pxor	%xmm3,%xmm3
1836	leaq	16(%rsi),%rsi
1837	jmp	.Lcbc_dec_tail_collected
1838.align	16
1839.Lcbc_dec_three:
1840	movaps	%xmm4,%xmm13
1841	call	_aesni_decrypt3
1842	pxor	%xmm10,%xmm2
1843	movaps	%xmm13,%xmm10
1844	pxor	%xmm11,%xmm3
1845	movdqu	%xmm2,(%rsi)
1846	pxor	%xmm12,%xmm4
1847	movdqu	%xmm3,16(%rsi)
1848	pxor	%xmm3,%xmm3
1849	movdqa	%xmm4,%xmm2
1850	pxor	%xmm4,%xmm4
1851	leaq	32(%rsi),%rsi
1852	jmp	.Lcbc_dec_tail_collected
1853.align	16
1854.Lcbc_dec_four:
1855	movaps	%xmm5,%xmm14
1856	call	_aesni_decrypt4
1857	pxor	%xmm10,%xmm2
1858	movaps	%xmm14,%xmm10
1859	pxor	%xmm11,%xmm3
1860	movdqu	%xmm2,(%rsi)
1861	pxor	%xmm12,%xmm4
1862	movdqu	%xmm3,16(%rsi)
1863	pxor	%xmm3,%xmm3
1864	pxor	%xmm13,%xmm5
1865	movdqu	%xmm4,32(%rsi)
1866	pxor	%xmm4,%xmm4
1867	movdqa	%xmm5,%xmm2
1868	pxor	%xmm5,%xmm5
1869	leaq	48(%rsi),%rsi
1870	jmp	.Lcbc_dec_tail_collected
1871
1872.align	16
1873.Lcbc_dec_clear_tail_collected:
1874	pxor	%xmm3,%xmm3
1875	pxor	%xmm4,%xmm4
1876	pxor	%xmm5,%xmm5
1877	pxor	%xmm6,%xmm6
1878	pxor	%xmm7,%xmm7
1879	pxor	%xmm8,%xmm8
1880	pxor	%xmm9,%xmm9
1881.Lcbc_dec_tail_collected:
1882	movups	%xmm10,(%r8)
1883	andq	$15,%rdx
1884	jnz	.Lcbc_dec_tail_partial
1885	movups	%xmm2,(%rsi)
1886	pxor	%xmm2,%xmm2
1887	jmp	.Lcbc_dec_ret
1888.align	16
1889.Lcbc_dec_tail_partial:
1890	movaps	%xmm2,(%rsp)
1891	pxor	%xmm2,%xmm2
1892	movq	$16,%rcx
1893	movq	%rsi,%rdi
1894	subq	%rdx,%rcx
1895	leaq	(%rsp),%rsi
1896.long	0x9066A4F3
1897	movdqa	%xmm2,(%rsp)
1898
1899.Lcbc_dec_ret:
1900	xorps	%xmm0,%xmm0
1901	pxor	%xmm1,%xmm1
1902	movq	-8(%r11),%rbp
1903.cfi_restore	%rbp
1904	leaq	(%r11),%rsp
1905.cfi_def_cfa_register	%rsp
1906.Lcbc_ret:
1907	ret
1908.cfi_endproc
1909.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
1910.globl	aes_hw_set_decrypt_key
1911.hidden aes_hw_set_decrypt_key
1912.type	aes_hw_set_decrypt_key,@function
1913.align	16
1914aes_hw_set_decrypt_key:
1915.cfi_startproc
1916_CET_ENDBR
1917.byte	0x48,0x83,0xEC,0x08
1918.cfi_adjust_cfa_offset	8
1919	call	__aesni_set_encrypt_key
1920	shll	$4,%esi
1921	testl	%eax,%eax
1922	jnz	.Ldec_key_ret
1923	leaq	16(%rdx,%rsi,1),%rdi
1924
1925	movups	(%rdx),%xmm0
1926	movups	(%rdi),%xmm1
1927	movups	%xmm0,(%rdi)
1928	movups	%xmm1,(%rdx)
1929	leaq	16(%rdx),%rdx
1930	leaq	-16(%rdi),%rdi
1931
1932.Ldec_key_inverse:
1933	movups	(%rdx),%xmm0
1934	movups	(%rdi),%xmm1
1935.byte	102,15,56,219,192
1936.byte	102,15,56,219,201
1937	leaq	16(%rdx),%rdx
1938	leaq	-16(%rdi),%rdi
1939	movups	%xmm0,16(%rdi)
1940	movups	%xmm1,-16(%rdx)
1941	cmpq	%rdx,%rdi
1942	ja	.Ldec_key_inverse
1943
1944	movups	(%rdx),%xmm0
1945.byte	102,15,56,219,192
1946	pxor	%xmm1,%xmm1
1947	movups	%xmm0,(%rdi)
1948	pxor	%xmm0,%xmm0
1949.Ldec_key_ret:
1950	addq	$8,%rsp
1951.cfi_adjust_cfa_offset	-8
1952	ret
1953.cfi_endproc
1954.LSEH_end_set_decrypt_key:
1955.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
1956.globl	aes_hw_set_encrypt_key
1957.hidden aes_hw_set_encrypt_key
1958.type	aes_hw_set_encrypt_key,@function
1959.align	16
1960aes_hw_set_encrypt_key:
1961__aesni_set_encrypt_key:
1962.cfi_startproc
1963_CET_ENDBR
1964#ifdef BORINGSSL_DISPATCH_TEST
1965	movb	$1,BORINGSSL_function_hit+3(%rip)
1966#endif
1967.byte	0x48,0x83,0xEC,0x08
1968.cfi_adjust_cfa_offset	8
1969	movq	$-1,%rax
1970	testq	%rdi,%rdi
1971	jz	.Lenc_key_ret
1972	testq	%rdx,%rdx
1973	jz	.Lenc_key_ret
1974
1975	movups	(%rdi),%xmm0
1976	xorps	%xmm4,%xmm4
1977	leaq	OPENSSL_ia32cap_P(%rip),%r10
1978	movl	4(%r10),%r10d
1979	andl	$268437504,%r10d
1980	leaq	16(%rdx),%rax
1981	cmpl	$256,%esi
1982	je	.L14rounds
1983	cmpl	$192,%esi
1984	je	.L12rounds
1985	cmpl	$128,%esi
1986	jne	.Lbad_keybits
1987
1988.L10rounds:
1989	movl	$9,%esi
1990	cmpl	$268435456,%r10d
1991	je	.L10rounds_alt
1992
1993	movups	%xmm0,(%rdx)
1994.byte	102,15,58,223,200,1
1995	call	.Lkey_expansion_128_cold
1996.byte	102,15,58,223,200,2
1997	call	.Lkey_expansion_128
1998.byte	102,15,58,223,200,4
1999	call	.Lkey_expansion_128
2000.byte	102,15,58,223,200,8
2001	call	.Lkey_expansion_128
2002.byte	102,15,58,223,200,16
2003	call	.Lkey_expansion_128
2004.byte	102,15,58,223,200,32
2005	call	.Lkey_expansion_128
2006.byte	102,15,58,223,200,64
2007	call	.Lkey_expansion_128
2008.byte	102,15,58,223,200,128
2009	call	.Lkey_expansion_128
2010.byte	102,15,58,223,200,27
2011	call	.Lkey_expansion_128
2012.byte	102,15,58,223,200,54
2013	call	.Lkey_expansion_128
2014	movups	%xmm0,(%rax)
2015	movl	%esi,80(%rax)
2016	xorl	%eax,%eax
2017	jmp	.Lenc_key_ret
2018
2019.align	16
2020.L10rounds_alt:
2021	movdqa	.Lkey_rotate(%rip),%xmm5
2022	movl	$8,%r10d
2023	movdqa	.Lkey_rcon1(%rip),%xmm4
2024	movdqa	%xmm0,%xmm2
2025	movdqu	%xmm0,(%rdx)
2026	jmp	.Loop_key128
2027
2028.align	16
2029.Loop_key128:
2030.byte	102,15,56,0,197
2031.byte	102,15,56,221,196
2032	pslld	$1,%xmm4
2033	leaq	16(%rax),%rax
2034
2035	movdqa	%xmm2,%xmm3
2036	pslldq	$4,%xmm2
2037	pxor	%xmm2,%xmm3
2038	pslldq	$4,%xmm2
2039	pxor	%xmm2,%xmm3
2040	pslldq	$4,%xmm2
2041	pxor	%xmm3,%xmm2
2042
2043	pxor	%xmm2,%xmm0
2044	movdqu	%xmm0,-16(%rax)
2045	movdqa	%xmm0,%xmm2
2046
2047	decl	%r10d
2048	jnz	.Loop_key128
2049
2050	movdqa	.Lkey_rcon1b(%rip),%xmm4
2051
2052.byte	102,15,56,0,197
2053.byte	102,15,56,221,196
2054	pslld	$1,%xmm4
2055
2056	movdqa	%xmm2,%xmm3
2057	pslldq	$4,%xmm2
2058	pxor	%xmm2,%xmm3
2059	pslldq	$4,%xmm2
2060	pxor	%xmm2,%xmm3
2061	pslldq	$4,%xmm2
2062	pxor	%xmm3,%xmm2
2063
2064	pxor	%xmm2,%xmm0
2065	movdqu	%xmm0,(%rax)
2066
2067	movdqa	%xmm0,%xmm2
2068.byte	102,15,56,0,197
2069.byte	102,15,56,221,196
2070
2071	movdqa	%xmm2,%xmm3
2072	pslldq	$4,%xmm2
2073	pxor	%xmm2,%xmm3
2074	pslldq	$4,%xmm2
2075	pxor	%xmm2,%xmm3
2076	pslldq	$4,%xmm2
2077	pxor	%xmm3,%xmm2
2078
2079	pxor	%xmm2,%xmm0
2080	movdqu	%xmm0,16(%rax)
2081
2082	movl	%esi,96(%rax)
2083	xorl	%eax,%eax
2084	jmp	.Lenc_key_ret
2085
2086.align	16
2087.L12rounds:
2088	movq	16(%rdi),%xmm2
2089	movl	$11,%esi
2090	cmpl	$268435456,%r10d
2091	je	.L12rounds_alt
2092
2093	movups	%xmm0,(%rdx)
2094.byte	102,15,58,223,202,1
2095	call	.Lkey_expansion_192a_cold
2096.byte	102,15,58,223,202,2
2097	call	.Lkey_expansion_192b
2098.byte	102,15,58,223,202,4
2099	call	.Lkey_expansion_192a
2100.byte	102,15,58,223,202,8
2101	call	.Lkey_expansion_192b
2102.byte	102,15,58,223,202,16
2103	call	.Lkey_expansion_192a
2104.byte	102,15,58,223,202,32
2105	call	.Lkey_expansion_192b
2106.byte	102,15,58,223,202,64
2107	call	.Lkey_expansion_192a
2108.byte	102,15,58,223,202,128
2109	call	.Lkey_expansion_192b
2110	movups	%xmm0,(%rax)
2111	movl	%esi,48(%rax)
2112	xorq	%rax,%rax
2113	jmp	.Lenc_key_ret
2114
2115.align	16
2116.L12rounds_alt:
2117	movdqa	.Lkey_rotate192(%rip),%xmm5
2118	movdqa	.Lkey_rcon1(%rip),%xmm4
2119	movl	$8,%r10d
2120	movdqu	%xmm0,(%rdx)
2121	jmp	.Loop_key192
2122
2123.align	16
2124.Loop_key192:
2125	movq	%xmm2,0(%rax)
2126	movdqa	%xmm2,%xmm1
2127.byte	102,15,56,0,213
2128.byte	102,15,56,221,212
2129	pslld	$1,%xmm4
2130	leaq	24(%rax),%rax
2131
2132	movdqa	%xmm0,%xmm3
2133	pslldq	$4,%xmm0
2134	pxor	%xmm0,%xmm3
2135	pslldq	$4,%xmm0
2136	pxor	%xmm0,%xmm3
2137	pslldq	$4,%xmm0
2138	pxor	%xmm3,%xmm0
2139
2140	pshufd	$0xff,%xmm0,%xmm3
2141	pxor	%xmm1,%xmm3
2142	pslldq	$4,%xmm1
2143	pxor	%xmm1,%xmm3
2144
2145	pxor	%xmm2,%xmm0
2146	pxor	%xmm3,%xmm2
2147	movdqu	%xmm0,-16(%rax)
2148
2149	decl	%r10d
2150	jnz	.Loop_key192
2151
2152	movl	%esi,32(%rax)
2153	xorl	%eax,%eax
2154	jmp	.Lenc_key_ret
2155
2156.align	16
2157.L14rounds:
2158	movups	16(%rdi),%xmm2
2159	movl	$13,%esi
2160	leaq	16(%rax),%rax
2161	cmpl	$268435456,%r10d
2162	je	.L14rounds_alt
2163
2164	movups	%xmm0,(%rdx)
2165	movups	%xmm2,16(%rdx)
2166.byte	102,15,58,223,202,1
2167	call	.Lkey_expansion_256a_cold
2168.byte	102,15,58,223,200,1
2169	call	.Lkey_expansion_256b
2170.byte	102,15,58,223,202,2
2171	call	.Lkey_expansion_256a
2172.byte	102,15,58,223,200,2
2173	call	.Lkey_expansion_256b
2174.byte	102,15,58,223,202,4
2175	call	.Lkey_expansion_256a
2176.byte	102,15,58,223,200,4
2177	call	.Lkey_expansion_256b
2178.byte	102,15,58,223,202,8
2179	call	.Lkey_expansion_256a
2180.byte	102,15,58,223,200,8
2181	call	.Lkey_expansion_256b
2182.byte	102,15,58,223,202,16
2183	call	.Lkey_expansion_256a
2184.byte	102,15,58,223,200,16
2185	call	.Lkey_expansion_256b
2186.byte	102,15,58,223,202,32
2187	call	.Lkey_expansion_256a
2188.byte	102,15,58,223,200,32
2189	call	.Lkey_expansion_256b
2190.byte	102,15,58,223,202,64
2191	call	.Lkey_expansion_256a
2192	movups	%xmm0,(%rax)
2193	movl	%esi,16(%rax)
2194	xorq	%rax,%rax
2195	jmp	.Lenc_key_ret
2196
2197.align	16
2198.L14rounds_alt:
2199	movdqa	.Lkey_rotate(%rip),%xmm5
2200	movdqa	.Lkey_rcon1(%rip),%xmm4
2201	movl	$7,%r10d
2202	movdqu	%xmm0,0(%rdx)
2203	movdqa	%xmm2,%xmm1
2204	movdqu	%xmm2,16(%rdx)
2205	jmp	.Loop_key256
2206
2207.align	16
2208.Loop_key256:
2209.byte	102,15,56,0,213
2210.byte	102,15,56,221,212
2211
2212	movdqa	%xmm0,%xmm3
2213	pslldq	$4,%xmm0
2214	pxor	%xmm0,%xmm3
2215	pslldq	$4,%xmm0
2216	pxor	%xmm0,%xmm3
2217	pslldq	$4,%xmm0
2218	pxor	%xmm3,%xmm0
2219	pslld	$1,%xmm4
2220
2221	pxor	%xmm2,%xmm0
2222	movdqu	%xmm0,(%rax)
2223
2224	decl	%r10d
2225	jz	.Ldone_key256
2226
2227	pshufd	$0xff,%xmm0,%xmm2
2228	pxor	%xmm3,%xmm3
2229.byte	102,15,56,221,211
2230
2231	movdqa	%xmm1,%xmm3
2232	pslldq	$4,%xmm1
2233	pxor	%xmm1,%xmm3
2234	pslldq	$4,%xmm1
2235	pxor	%xmm1,%xmm3
2236	pslldq	$4,%xmm1
2237	pxor	%xmm3,%xmm1
2238
2239	pxor	%xmm1,%xmm2
2240	movdqu	%xmm2,16(%rax)
2241	leaq	32(%rax),%rax
2242	movdqa	%xmm2,%xmm1
2243
2244	jmp	.Loop_key256
2245
2246.Ldone_key256:
2247	movl	%esi,16(%rax)
2248	xorl	%eax,%eax
2249	jmp	.Lenc_key_ret
2250
2251.align	16
2252.Lbad_keybits:
2253	movq	$-2,%rax
2254.Lenc_key_ret:
2255	pxor	%xmm0,%xmm0
2256	pxor	%xmm1,%xmm1
2257	pxor	%xmm2,%xmm2
2258	pxor	%xmm3,%xmm3
2259	pxor	%xmm4,%xmm4
2260	pxor	%xmm5,%xmm5
2261	addq	$8,%rsp
2262.cfi_adjust_cfa_offset	-8
2263	ret
2264.cfi_endproc
2265.LSEH_end_set_encrypt_key:
2266
2267.align	16
2268.Lkey_expansion_128:
2269	movups	%xmm0,(%rax)
2270	leaq	16(%rax),%rax
2271.Lkey_expansion_128_cold:
2272	shufps	$16,%xmm0,%xmm4
2273	xorps	%xmm4,%xmm0
2274	shufps	$140,%xmm0,%xmm4
2275	xorps	%xmm4,%xmm0
2276	shufps	$255,%xmm1,%xmm1
2277	xorps	%xmm1,%xmm0
2278	ret
2279
2280.align	16
2281.Lkey_expansion_192a:
2282	movups	%xmm0,(%rax)
2283	leaq	16(%rax),%rax
2284.Lkey_expansion_192a_cold:
2285	movaps	%xmm2,%xmm5
2286.Lkey_expansion_192b_warm:
2287	shufps	$16,%xmm0,%xmm4
2288	movdqa	%xmm2,%xmm3
2289	xorps	%xmm4,%xmm0
2290	shufps	$140,%xmm0,%xmm4
2291	pslldq	$4,%xmm3
2292	xorps	%xmm4,%xmm0
2293	pshufd	$85,%xmm1,%xmm1
2294	pxor	%xmm3,%xmm2
2295	pxor	%xmm1,%xmm0
2296	pshufd	$255,%xmm0,%xmm3
2297	pxor	%xmm3,%xmm2
2298	ret
2299
2300.align	16
2301.Lkey_expansion_192b:
2302	movaps	%xmm0,%xmm3
2303	shufps	$68,%xmm0,%xmm5
2304	movups	%xmm5,(%rax)
2305	shufps	$78,%xmm2,%xmm3
2306	movups	%xmm3,16(%rax)
2307	leaq	32(%rax),%rax
2308	jmp	.Lkey_expansion_192b_warm
2309
2310.align	16
2311.Lkey_expansion_256a:
2312	movups	%xmm2,(%rax)
2313	leaq	16(%rax),%rax
2314.Lkey_expansion_256a_cold:
2315	shufps	$16,%xmm0,%xmm4
2316	xorps	%xmm4,%xmm0
2317	shufps	$140,%xmm0,%xmm4
2318	xorps	%xmm4,%xmm0
2319	shufps	$255,%xmm1,%xmm1
2320	xorps	%xmm1,%xmm0
2321	ret
2322
2323.align	16
2324.Lkey_expansion_256b:
2325	movups	%xmm0,(%rax)
2326	leaq	16(%rax),%rax
2327
2328	shufps	$16,%xmm2,%xmm4
2329	xorps	%xmm4,%xmm2
2330	shufps	$140,%xmm2,%xmm4
2331	xorps	%xmm4,%xmm2
2332	shufps	$170,%xmm1,%xmm1
2333	xorps	%xmm1,%xmm2
2334	ret
2335.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
2336.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
2337.section	.rodata
2338.align	64
2339.Lbswap_mask:
2340.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2341.Lincrement32:
2342.long	6,6,6,0
2343.Lincrement64:
2344.long	1,0,0,0
2345.Lxts_magic:
2346.long	0x87,0,1,0
2347.Lincrement1:
2348.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2349.Lkey_rotate:
2350.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
2351.Lkey_rotate192:
2352.long	0x04070605,0x04070605,0x04070605,0x04070605
2353.Lkey_rcon1:
2354.long	1,1,1,1
2355.Lkey_rcon1b:
2356.long	0x1b,0x1b,0x1b,0x1b
2357
2358.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2359.align	64
2360.text
2361#endif
2362