1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <ring-core/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__)
7.text
8
9.globl	_aes_hw_encrypt
10.private_extern _aes_hw_encrypt
11
12.p2align	4
13_aes_hw_encrypt:
14
15_CET_ENDBR
16#ifdef BORINGSSL_DISPATCH_TEST
17
18	movb	$1,_BORINGSSL_function_hit+1(%rip)
19#endif
20	movups	(%rdi),%xmm2
21	movl	240(%rdx),%eax
22	movups	(%rdx),%xmm0
23	movups	16(%rdx),%xmm1
24	leaq	32(%rdx),%rdx
25	xorps	%xmm0,%xmm2
26L$oop_enc1_1:
27.byte	102,15,56,220,209
28	decl	%eax
29	movups	(%rdx),%xmm1
30	leaq	16(%rdx),%rdx
31	jnz	L$oop_enc1_1
32.byte	102,15,56,221,209
33	pxor	%xmm0,%xmm0
34	pxor	%xmm1,%xmm1
35	movups	%xmm2,(%rsi)
36	pxor	%xmm2,%xmm2
37	ret
38
39
40
41.p2align	4
42_aesni_encrypt2:
43
44	movups	(%rcx),%xmm0
45	shll	$4,%eax
46	movups	16(%rcx),%xmm1
47	xorps	%xmm0,%xmm2
48	xorps	%xmm0,%xmm3
49	movups	32(%rcx),%xmm0
50	leaq	32(%rcx,%rax,1),%rcx
51	negq	%rax
52	addq	$16,%rax
53
54L$enc_loop2:
55.byte	102,15,56,220,209
56.byte	102,15,56,220,217
57	movups	(%rcx,%rax,1),%xmm1
58	addq	$32,%rax
59.byte	102,15,56,220,208
60.byte	102,15,56,220,216
61	movups	-16(%rcx,%rax,1),%xmm0
62	jnz	L$enc_loop2
63
64.byte	102,15,56,220,209
65.byte	102,15,56,220,217
66.byte	102,15,56,221,208
67.byte	102,15,56,221,216
68	ret
69
70
71
72.p2align	4
73_aesni_encrypt3:
74
75	movups	(%rcx),%xmm0
76	shll	$4,%eax
77	movups	16(%rcx),%xmm1
78	xorps	%xmm0,%xmm2
79	xorps	%xmm0,%xmm3
80	xorps	%xmm0,%xmm4
81	movups	32(%rcx),%xmm0
82	leaq	32(%rcx,%rax,1),%rcx
83	negq	%rax
84	addq	$16,%rax
85
86L$enc_loop3:
87.byte	102,15,56,220,209
88.byte	102,15,56,220,217
89.byte	102,15,56,220,225
90	movups	(%rcx,%rax,1),%xmm1
91	addq	$32,%rax
92.byte	102,15,56,220,208
93.byte	102,15,56,220,216
94.byte	102,15,56,220,224
95	movups	-16(%rcx,%rax,1),%xmm0
96	jnz	L$enc_loop3
97
98.byte	102,15,56,220,209
99.byte	102,15,56,220,217
100.byte	102,15,56,220,225
101.byte	102,15,56,221,208
102.byte	102,15,56,221,216
103.byte	102,15,56,221,224
104	ret
105
106
107
108.p2align	4
109_aesni_encrypt4:
110
111	movups	(%rcx),%xmm0
112	shll	$4,%eax
113	movups	16(%rcx),%xmm1
114	xorps	%xmm0,%xmm2
115	xorps	%xmm0,%xmm3
116	xorps	%xmm0,%xmm4
117	xorps	%xmm0,%xmm5
118	movups	32(%rcx),%xmm0
119	leaq	32(%rcx,%rax,1),%rcx
120	negq	%rax
121.byte	0x0f,0x1f,0x00
122	addq	$16,%rax
123
124L$enc_loop4:
125.byte	102,15,56,220,209
126.byte	102,15,56,220,217
127.byte	102,15,56,220,225
128.byte	102,15,56,220,233
129	movups	(%rcx,%rax,1),%xmm1
130	addq	$32,%rax
131.byte	102,15,56,220,208
132.byte	102,15,56,220,216
133.byte	102,15,56,220,224
134.byte	102,15,56,220,232
135	movups	-16(%rcx,%rax,1),%xmm0
136	jnz	L$enc_loop4
137
138.byte	102,15,56,220,209
139.byte	102,15,56,220,217
140.byte	102,15,56,220,225
141.byte	102,15,56,220,233
142.byte	102,15,56,221,208
143.byte	102,15,56,221,216
144.byte	102,15,56,221,224
145.byte	102,15,56,221,232
146	ret
147
148
149
150.p2align	4
151_aesni_encrypt6:
152
153	movups	(%rcx),%xmm0
154	shll	$4,%eax
155	movups	16(%rcx),%xmm1
156	xorps	%xmm0,%xmm2
157	pxor	%xmm0,%xmm3
158	pxor	%xmm0,%xmm4
159.byte	102,15,56,220,209
160	leaq	32(%rcx,%rax,1),%rcx
161	negq	%rax
162.byte	102,15,56,220,217
163	pxor	%xmm0,%xmm5
164	pxor	%xmm0,%xmm6
165.byte	102,15,56,220,225
166	pxor	%xmm0,%xmm7
167	movups	(%rcx,%rax,1),%xmm0
168	addq	$16,%rax
169	jmp	L$enc_loop6_enter
170.p2align	4
171L$enc_loop6:
172.byte	102,15,56,220,209
173.byte	102,15,56,220,217
174.byte	102,15,56,220,225
175L$enc_loop6_enter:
176.byte	102,15,56,220,233
177.byte	102,15,56,220,241
178.byte	102,15,56,220,249
179	movups	(%rcx,%rax,1),%xmm1
180	addq	$32,%rax
181.byte	102,15,56,220,208
182.byte	102,15,56,220,216
183.byte	102,15,56,220,224
184.byte	102,15,56,220,232
185.byte	102,15,56,220,240
186.byte	102,15,56,220,248
187	movups	-16(%rcx,%rax,1),%xmm0
188	jnz	L$enc_loop6
189
190.byte	102,15,56,220,209
191.byte	102,15,56,220,217
192.byte	102,15,56,220,225
193.byte	102,15,56,220,233
194.byte	102,15,56,220,241
195.byte	102,15,56,220,249
196.byte	102,15,56,221,208
197.byte	102,15,56,221,216
198.byte	102,15,56,221,224
199.byte	102,15,56,221,232
200.byte	102,15,56,221,240
201.byte	102,15,56,221,248
202	ret
203
204
205
206.p2align	4
207_aesni_encrypt8:
208
209	movups	(%rcx),%xmm0
210	shll	$4,%eax
211	movups	16(%rcx),%xmm1
212	xorps	%xmm0,%xmm2
213	xorps	%xmm0,%xmm3
214	pxor	%xmm0,%xmm4
215	pxor	%xmm0,%xmm5
216	pxor	%xmm0,%xmm6
217	leaq	32(%rcx,%rax,1),%rcx
218	negq	%rax
219.byte	102,15,56,220,209
220	pxor	%xmm0,%xmm7
221	pxor	%xmm0,%xmm8
222.byte	102,15,56,220,217
223	pxor	%xmm0,%xmm9
224	movups	(%rcx,%rax,1),%xmm0
225	addq	$16,%rax
226	jmp	L$enc_loop8_inner
227.p2align	4
228L$enc_loop8:
229.byte	102,15,56,220,209
230.byte	102,15,56,220,217
231L$enc_loop8_inner:
232.byte	102,15,56,220,225
233.byte	102,15,56,220,233
234.byte	102,15,56,220,241
235.byte	102,15,56,220,249
236.byte	102,68,15,56,220,193
237.byte	102,68,15,56,220,201
238L$enc_loop8_enter:
239	movups	(%rcx,%rax,1),%xmm1
240	addq	$32,%rax
241.byte	102,15,56,220,208
242.byte	102,15,56,220,216
243.byte	102,15,56,220,224
244.byte	102,15,56,220,232
245.byte	102,15,56,220,240
246.byte	102,15,56,220,248
247.byte	102,68,15,56,220,192
248.byte	102,68,15,56,220,200
249	movups	-16(%rcx,%rax,1),%xmm0
250	jnz	L$enc_loop8
251
252.byte	102,15,56,220,209
253.byte	102,15,56,220,217
254.byte	102,15,56,220,225
255.byte	102,15,56,220,233
256.byte	102,15,56,220,241
257.byte	102,15,56,220,249
258.byte	102,68,15,56,220,193
259.byte	102,68,15,56,220,201
260.byte	102,15,56,221,208
261.byte	102,15,56,221,216
262.byte	102,15,56,221,224
263.byte	102,15,56,221,232
264.byte	102,15,56,221,240
265.byte	102,15,56,221,248
266.byte	102,68,15,56,221,192
267.byte	102,68,15,56,221,200
268	ret
269
270
271.globl	_aes_hw_ctr32_encrypt_blocks
272.private_extern _aes_hw_ctr32_encrypt_blocks
273
274.p2align	4
275_aes_hw_ctr32_encrypt_blocks:
276
277_CET_ENDBR
278#ifdef BORINGSSL_DISPATCH_TEST
279	movb	$1,_BORINGSSL_function_hit(%rip)
280#endif
281	cmpq	$1,%rdx
282	jne	L$ctr32_bulk
283
284
285
286	movups	(%r8),%xmm2
287	movups	(%rdi),%xmm3
288	movl	240(%rcx),%edx
289	movups	(%rcx),%xmm0
290	movups	16(%rcx),%xmm1
291	leaq	32(%rcx),%rcx
292	xorps	%xmm0,%xmm2
293L$oop_enc1_2:
294.byte	102,15,56,220,209
295	decl	%edx
296	movups	(%rcx),%xmm1
297	leaq	16(%rcx),%rcx
298	jnz	L$oop_enc1_2
299.byte	102,15,56,221,209
300	pxor	%xmm0,%xmm0
301	pxor	%xmm1,%xmm1
302	xorps	%xmm3,%xmm2
303	pxor	%xmm3,%xmm3
304	movups	%xmm2,(%rsi)
305	xorps	%xmm2,%xmm2
306	jmp	L$ctr32_epilogue
307
308.p2align	4
309L$ctr32_bulk:
310	leaq	(%rsp),%r11
311
312	pushq	%rbp
313
314	subq	$128,%rsp
315	andq	$-16,%rsp
316
317
318
319
320	movdqu	(%r8),%xmm2
321	movdqu	(%rcx),%xmm0
322	movl	12(%r8),%r8d
323	pxor	%xmm0,%xmm2
324	movl	12(%rcx),%ebp
325	movdqa	%xmm2,0(%rsp)
326	bswapl	%r8d
327	movdqa	%xmm2,%xmm3
328	movdqa	%xmm2,%xmm4
329	movdqa	%xmm2,%xmm5
330	movdqa	%xmm2,64(%rsp)
331	movdqa	%xmm2,80(%rsp)
332	movdqa	%xmm2,96(%rsp)
333	movq	%rdx,%r10
334	movdqa	%xmm2,112(%rsp)
335
336	leaq	1(%r8),%rax
337	leaq	2(%r8),%rdx
338	bswapl	%eax
339	bswapl	%edx
340	xorl	%ebp,%eax
341	xorl	%ebp,%edx
342.byte	102,15,58,34,216,3
343	leaq	3(%r8),%rax
344	movdqa	%xmm3,16(%rsp)
345.byte	102,15,58,34,226,3
346	bswapl	%eax
347	movq	%r10,%rdx
348	leaq	4(%r8),%r10
349	movdqa	%xmm4,32(%rsp)
350	xorl	%ebp,%eax
351	bswapl	%r10d
352.byte	102,15,58,34,232,3
353	xorl	%ebp,%r10d
354	movdqa	%xmm5,48(%rsp)
355	leaq	5(%r8),%r9
356	movl	%r10d,64+12(%rsp)
357	bswapl	%r9d
358	leaq	6(%r8),%r10
359	movl	240(%rcx),%eax
360	xorl	%ebp,%r9d
361	bswapl	%r10d
362	movl	%r9d,80+12(%rsp)
363	xorl	%ebp,%r10d
364	leaq	7(%r8),%r9
365	movl	%r10d,96+12(%rsp)
366	bswapl	%r9d
367	leaq	_OPENSSL_ia32cap_P(%rip),%r10
368	movl	4(%r10),%r10d
369	xorl	%ebp,%r9d
370	andl	$71303168,%r10d
371	movl	%r9d,112+12(%rsp)
372
373	movups	16(%rcx),%xmm1
374
375	movdqa	64(%rsp),%xmm6
376	movdqa	80(%rsp),%xmm7
377
378	cmpq	$8,%rdx
379	jb	L$ctr32_tail
380
381	subq	$6,%rdx
382	cmpl	$4194304,%r10d
383	je	L$ctr32_6x
384
385	leaq	128(%rcx),%rcx
386	subq	$2,%rdx
387	jmp	L$ctr32_loop8
388
389.p2align	4
390L$ctr32_6x:
391	shll	$4,%eax
392	movl	$48,%r10d
393	bswapl	%ebp
394	leaq	32(%rcx,%rax,1),%rcx
395	subq	%rax,%r10
396	jmp	L$ctr32_loop6
397
398.p2align	4
399L$ctr32_loop6:
400	addl	$6,%r8d
401	movups	-48(%rcx,%r10,1),%xmm0
402.byte	102,15,56,220,209
403	movl	%r8d,%eax
404	xorl	%ebp,%eax
405.byte	102,15,56,220,217
406.byte	0x0f,0x38,0xf1,0x44,0x24,12
407	leal	1(%r8),%eax
408.byte	102,15,56,220,225
409	xorl	%ebp,%eax
410.byte	0x0f,0x38,0xf1,0x44,0x24,28
411.byte	102,15,56,220,233
412	leal	2(%r8),%eax
413	xorl	%ebp,%eax
414.byte	102,15,56,220,241
415.byte	0x0f,0x38,0xf1,0x44,0x24,44
416	leal	3(%r8),%eax
417.byte	102,15,56,220,249
418	movups	-32(%rcx,%r10,1),%xmm1
419	xorl	%ebp,%eax
420
421.byte	102,15,56,220,208
422.byte	0x0f,0x38,0xf1,0x44,0x24,60
423	leal	4(%r8),%eax
424.byte	102,15,56,220,216
425	xorl	%ebp,%eax
426.byte	0x0f,0x38,0xf1,0x44,0x24,76
427.byte	102,15,56,220,224
428	leal	5(%r8),%eax
429	xorl	%ebp,%eax
430.byte	102,15,56,220,232
431.byte	0x0f,0x38,0xf1,0x44,0x24,92
432	movq	%r10,%rax
433.byte	102,15,56,220,240
434.byte	102,15,56,220,248
435	movups	-16(%rcx,%r10,1),%xmm0
436
437	call	L$enc_loop6
438
439	movdqu	(%rdi),%xmm8
440	movdqu	16(%rdi),%xmm9
441	movdqu	32(%rdi),%xmm10
442	movdqu	48(%rdi),%xmm11
443	movdqu	64(%rdi),%xmm12
444	movdqu	80(%rdi),%xmm13
445	leaq	96(%rdi),%rdi
446	movups	-64(%rcx,%r10,1),%xmm1
447	pxor	%xmm2,%xmm8
448	movaps	0(%rsp),%xmm2
449	pxor	%xmm3,%xmm9
450	movaps	16(%rsp),%xmm3
451	pxor	%xmm4,%xmm10
452	movaps	32(%rsp),%xmm4
453	pxor	%xmm5,%xmm11
454	movaps	48(%rsp),%xmm5
455	pxor	%xmm6,%xmm12
456	movaps	64(%rsp),%xmm6
457	pxor	%xmm7,%xmm13
458	movaps	80(%rsp),%xmm7
459	movdqu	%xmm8,(%rsi)
460	movdqu	%xmm9,16(%rsi)
461	movdqu	%xmm10,32(%rsi)
462	movdqu	%xmm11,48(%rsi)
463	movdqu	%xmm12,64(%rsi)
464	movdqu	%xmm13,80(%rsi)
465	leaq	96(%rsi),%rsi
466
467	subq	$6,%rdx
468	jnc	L$ctr32_loop6
469
470	addq	$6,%rdx
471	jz	L$ctr32_done
472
473	leal	-48(%r10),%eax
474	leaq	-80(%rcx,%r10,1),%rcx
475	negl	%eax
476	shrl	$4,%eax
477	jmp	L$ctr32_tail
478
479.p2align	5
480L$ctr32_loop8:
481	addl	$8,%r8d
482	movdqa	96(%rsp),%xmm8
483.byte	102,15,56,220,209
484	movl	%r8d,%r9d
485	movdqa	112(%rsp),%xmm9
486.byte	102,15,56,220,217
487	bswapl	%r9d
488	movups	32-128(%rcx),%xmm0
489.byte	102,15,56,220,225
490	xorl	%ebp,%r9d
491	nop
492.byte	102,15,56,220,233
493	movl	%r9d,0+12(%rsp)
494	leaq	1(%r8),%r9
495.byte	102,15,56,220,241
496.byte	102,15,56,220,249
497.byte	102,68,15,56,220,193
498.byte	102,68,15,56,220,201
499	movups	48-128(%rcx),%xmm1
500	bswapl	%r9d
501.byte	102,15,56,220,208
502.byte	102,15,56,220,216
503	xorl	%ebp,%r9d
504.byte	0x66,0x90
505.byte	102,15,56,220,224
506.byte	102,15,56,220,232
507	movl	%r9d,16+12(%rsp)
508	leaq	2(%r8),%r9
509.byte	102,15,56,220,240
510.byte	102,15,56,220,248
511.byte	102,68,15,56,220,192
512.byte	102,68,15,56,220,200
513	movups	64-128(%rcx),%xmm0
514	bswapl	%r9d
515.byte	102,15,56,220,209
516.byte	102,15,56,220,217
517	xorl	%ebp,%r9d
518.byte	0x66,0x90
519.byte	102,15,56,220,225
520.byte	102,15,56,220,233
521	movl	%r9d,32+12(%rsp)
522	leaq	3(%r8),%r9
523.byte	102,15,56,220,241
524.byte	102,15,56,220,249
525.byte	102,68,15,56,220,193
526.byte	102,68,15,56,220,201
527	movups	80-128(%rcx),%xmm1
528	bswapl	%r9d
529.byte	102,15,56,220,208
530.byte	102,15,56,220,216
531	xorl	%ebp,%r9d
532.byte	0x66,0x90
533.byte	102,15,56,220,224
534.byte	102,15,56,220,232
535	movl	%r9d,48+12(%rsp)
536	leaq	4(%r8),%r9
537.byte	102,15,56,220,240
538.byte	102,15,56,220,248
539.byte	102,68,15,56,220,192
540.byte	102,68,15,56,220,200
541	movups	96-128(%rcx),%xmm0
542	bswapl	%r9d
543.byte	102,15,56,220,209
544.byte	102,15,56,220,217
545	xorl	%ebp,%r9d
546.byte	0x66,0x90
547.byte	102,15,56,220,225
548.byte	102,15,56,220,233
549	movl	%r9d,64+12(%rsp)
550	leaq	5(%r8),%r9
551.byte	102,15,56,220,241
552.byte	102,15,56,220,249
553.byte	102,68,15,56,220,193
554.byte	102,68,15,56,220,201
555	movups	112-128(%rcx),%xmm1
556	bswapl	%r9d
557.byte	102,15,56,220,208
558.byte	102,15,56,220,216
559	xorl	%ebp,%r9d
560.byte	0x66,0x90
561.byte	102,15,56,220,224
562.byte	102,15,56,220,232
563	movl	%r9d,80+12(%rsp)
564	leaq	6(%r8),%r9
565.byte	102,15,56,220,240
566.byte	102,15,56,220,248
567.byte	102,68,15,56,220,192
568.byte	102,68,15,56,220,200
569	movups	128-128(%rcx),%xmm0
570	bswapl	%r9d
571.byte	102,15,56,220,209
572.byte	102,15,56,220,217
573	xorl	%ebp,%r9d
574.byte	0x66,0x90
575.byte	102,15,56,220,225
576.byte	102,15,56,220,233
577	movl	%r9d,96+12(%rsp)
578	leaq	7(%r8),%r9
579.byte	102,15,56,220,241
580.byte	102,15,56,220,249
581.byte	102,68,15,56,220,193
582.byte	102,68,15,56,220,201
583	movups	144-128(%rcx),%xmm1
584	bswapl	%r9d
585.byte	102,15,56,220,208
586.byte	102,15,56,220,216
587.byte	102,15,56,220,224
588	xorl	%ebp,%r9d
589	movdqu	0(%rdi),%xmm10
590.byte	102,15,56,220,232
591	movl	%r9d,112+12(%rsp)
592	cmpl	$11,%eax
593.byte	102,15,56,220,240
594.byte	102,15,56,220,248
595.byte	102,68,15,56,220,192
596.byte	102,68,15,56,220,200
597	movups	160-128(%rcx),%xmm0
598
599	jb	L$ctr32_enc_done
600
601.byte	102,15,56,220,209
602.byte	102,15,56,220,217
603.byte	102,15,56,220,225
604.byte	102,15,56,220,233
605.byte	102,15,56,220,241
606.byte	102,15,56,220,249
607.byte	102,68,15,56,220,193
608.byte	102,68,15,56,220,201
609	movups	176-128(%rcx),%xmm1
610
611.byte	102,15,56,220,208
612.byte	102,15,56,220,216
613.byte	102,15,56,220,224
614.byte	102,15,56,220,232
615.byte	102,15,56,220,240
616.byte	102,15,56,220,248
617.byte	102,68,15,56,220,192
618.byte	102,68,15,56,220,200
619	movups	192-128(%rcx),%xmm0
620
621
622
623.byte	102,15,56,220,209
624.byte	102,15,56,220,217
625.byte	102,15,56,220,225
626.byte	102,15,56,220,233
627.byte	102,15,56,220,241
628.byte	102,15,56,220,249
629.byte	102,68,15,56,220,193
630.byte	102,68,15,56,220,201
631	movups	208-128(%rcx),%xmm1
632
633.byte	102,15,56,220,208
634.byte	102,15,56,220,216
635.byte	102,15,56,220,224
636.byte	102,15,56,220,232
637.byte	102,15,56,220,240
638.byte	102,15,56,220,248
639.byte	102,68,15,56,220,192
640.byte	102,68,15,56,220,200
641	movups	224-128(%rcx),%xmm0
642	jmp	L$ctr32_enc_done
643
644.p2align	4
645L$ctr32_enc_done:
646	movdqu	16(%rdi),%xmm11
647	pxor	%xmm0,%xmm10
648	movdqu	32(%rdi),%xmm12
649	pxor	%xmm0,%xmm11
650	movdqu	48(%rdi),%xmm13
651	pxor	%xmm0,%xmm12
652	movdqu	64(%rdi),%xmm14
653	pxor	%xmm0,%xmm13
654	movdqu	80(%rdi),%xmm15
655	pxor	%xmm0,%xmm14
656	prefetcht0	448(%rdi)
657	prefetcht0	512(%rdi)
658	pxor	%xmm0,%xmm15
659.byte	102,15,56,220,209
660.byte	102,15,56,220,217
661.byte	102,15,56,220,225
662.byte	102,15,56,220,233
663.byte	102,15,56,220,241
664.byte	102,15,56,220,249
665.byte	102,68,15,56,220,193
666.byte	102,68,15,56,220,201
667	movdqu	96(%rdi),%xmm1
668	leaq	128(%rdi),%rdi
669
670.byte	102,65,15,56,221,210
671	pxor	%xmm0,%xmm1
672	movdqu	112-128(%rdi),%xmm10
673.byte	102,65,15,56,221,219
674	pxor	%xmm0,%xmm10
675	movdqa	0(%rsp),%xmm11
676.byte	102,65,15,56,221,228
677.byte	102,65,15,56,221,237
678	movdqa	16(%rsp),%xmm12
679	movdqa	32(%rsp),%xmm13
680.byte	102,65,15,56,221,246
681.byte	102,65,15,56,221,255
682	movdqa	48(%rsp),%xmm14
683	movdqa	64(%rsp),%xmm15
684.byte	102,68,15,56,221,193
685	movdqa	80(%rsp),%xmm0
686	movups	16-128(%rcx),%xmm1
687.byte	102,69,15,56,221,202
688
689	movups	%xmm2,(%rsi)
690	movdqa	%xmm11,%xmm2
691	movups	%xmm3,16(%rsi)
692	movdqa	%xmm12,%xmm3
693	movups	%xmm4,32(%rsi)
694	movdqa	%xmm13,%xmm4
695	movups	%xmm5,48(%rsi)
696	movdqa	%xmm14,%xmm5
697	movups	%xmm6,64(%rsi)
698	movdqa	%xmm15,%xmm6
699	movups	%xmm7,80(%rsi)
700	movdqa	%xmm0,%xmm7
701	movups	%xmm8,96(%rsi)
702	movups	%xmm9,112(%rsi)
703	leaq	128(%rsi),%rsi
704
705	subq	$8,%rdx
706	jnc	L$ctr32_loop8
707
708	addq	$8,%rdx
709	jz	L$ctr32_done
710	leaq	-128(%rcx),%rcx
711
712L$ctr32_tail:
713
714
715	leaq	16(%rcx),%rcx
716	cmpq	$4,%rdx
717	jb	L$ctr32_loop3
718	je	L$ctr32_loop4
719
720
721	shll	$4,%eax
722	movdqa	96(%rsp),%xmm8
723	pxor	%xmm9,%xmm9
724
725	movups	16(%rcx),%xmm0
726.byte	102,15,56,220,209
727.byte	102,15,56,220,217
728	leaq	32-16(%rcx,%rax,1),%rcx
729	negq	%rax
730.byte	102,15,56,220,225
731	addq	$16,%rax
732	movups	(%rdi),%xmm10
733.byte	102,15,56,220,233
734.byte	102,15,56,220,241
735	movups	16(%rdi),%xmm11
736	movups	32(%rdi),%xmm12
737.byte	102,15,56,220,249
738.byte	102,68,15,56,220,193
739
740	call	L$enc_loop8_enter
741
742	movdqu	48(%rdi),%xmm13
743	pxor	%xmm10,%xmm2
744	movdqu	64(%rdi),%xmm10
745	pxor	%xmm11,%xmm3
746	movdqu	%xmm2,(%rsi)
747	pxor	%xmm12,%xmm4
748	movdqu	%xmm3,16(%rsi)
749	pxor	%xmm13,%xmm5
750	movdqu	%xmm4,32(%rsi)
751	pxor	%xmm10,%xmm6
752	movdqu	%xmm5,48(%rsi)
753	movdqu	%xmm6,64(%rsi)
754	cmpq	$6,%rdx
755	jb	L$ctr32_done
756
757	movups	80(%rdi),%xmm11
758	xorps	%xmm11,%xmm7
759	movups	%xmm7,80(%rsi)
760	je	L$ctr32_done
761
762	movups	96(%rdi),%xmm12
763	xorps	%xmm12,%xmm8
764	movups	%xmm8,96(%rsi)
765	jmp	L$ctr32_done
766
767.p2align	5
768L$ctr32_loop4:
769.byte	102,15,56,220,209
770	leaq	16(%rcx),%rcx
771	decl	%eax
772.byte	102,15,56,220,217
773.byte	102,15,56,220,225
774.byte	102,15,56,220,233
775	movups	(%rcx),%xmm1
776	jnz	L$ctr32_loop4
777.byte	102,15,56,221,209
778.byte	102,15,56,221,217
779	movups	(%rdi),%xmm10
780	movups	16(%rdi),%xmm11
781.byte	102,15,56,221,225
782.byte	102,15,56,221,233
783	movups	32(%rdi),%xmm12
784	movups	48(%rdi),%xmm13
785
786	xorps	%xmm10,%xmm2
787	movups	%xmm2,(%rsi)
788	xorps	%xmm11,%xmm3
789	movups	%xmm3,16(%rsi)
790	pxor	%xmm12,%xmm4
791	movdqu	%xmm4,32(%rsi)
792	pxor	%xmm13,%xmm5
793	movdqu	%xmm5,48(%rsi)
794	jmp	L$ctr32_done
795
796.p2align	5
797L$ctr32_loop3:
798.byte	102,15,56,220,209
799	leaq	16(%rcx),%rcx
800	decl	%eax
801.byte	102,15,56,220,217
802.byte	102,15,56,220,225
803	movups	(%rcx),%xmm1
804	jnz	L$ctr32_loop3
805.byte	102,15,56,221,209
806.byte	102,15,56,221,217
807.byte	102,15,56,221,225
808
809	movups	(%rdi),%xmm10
810	xorps	%xmm10,%xmm2
811	movups	%xmm2,(%rsi)
812	cmpq	$2,%rdx
813	jb	L$ctr32_done
814
815	movups	16(%rdi),%xmm11
816	xorps	%xmm11,%xmm3
817	movups	%xmm3,16(%rsi)
818	je	L$ctr32_done
819
820	movups	32(%rdi),%xmm12
821	xorps	%xmm12,%xmm4
822	movups	%xmm4,32(%rsi)
823
824L$ctr32_done:
825	xorps	%xmm0,%xmm0
826	xorl	%ebp,%ebp
827	pxor	%xmm1,%xmm1
828	pxor	%xmm2,%xmm2
829	pxor	%xmm3,%xmm3
830	pxor	%xmm4,%xmm4
831	pxor	%xmm5,%xmm5
832	pxor	%xmm6,%xmm6
833	pxor	%xmm7,%xmm7
834	movaps	%xmm0,0(%rsp)
835	pxor	%xmm8,%xmm8
836	movaps	%xmm0,16(%rsp)
837	pxor	%xmm9,%xmm9
838	movaps	%xmm0,32(%rsp)
839	pxor	%xmm10,%xmm10
840	movaps	%xmm0,48(%rsp)
841	pxor	%xmm11,%xmm11
842	movaps	%xmm0,64(%rsp)
843	pxor	%xmm12,%xmm12
844	movaps	%xmm0,80(%rsp)
845	pxor	%xmm13,%xmm13
846	movaps	%xmm0,96(%rsp)
847	pxor	%xmm14,%xmm14
848	movaps	%xmm0,112(%rsp)
849	pxor	%xmm15,%xmm15
850	movq	-8(%r11),%rbp
851
852	leaq	(%r11),%rsp
853
854L$ctr32_epilogue:
855	ret
856
857
858.globl	_aes_hw_set_encrypt_key
859.private_extern _aes_hw_set_encrypt_key
860
861.p2align	4
862_aes_hw_set_encrypt_key:
863__aesni_set_encrypt_key:
864
865_CET_ENDBR
866#ifdef BORINGSSL_DISPATCH_TEST
867	movb	$1,_BORINGSSL_function_hit+3(%rip)
868#endif
869.byte	0x48,0x83,0xEC,0x08
870
871	movq	$-1,%rax
872	testq	%rdi,%rdi
873	jz	L$enc_key_ret
874	testq	%rdx,%rdx
875	jz	L$enc_key_ret
876
877	movups	(%rdi),%xmm0
878	xorps	%xmm4,%xmm4
879	leaq	_OPENSSL_ia32cap_P(%rip),%r10
880	movl	4(%r10),%r10d
881	andl	$268437504,%r10d
882	leaq	16(%rdx),%rax
883	cmpl	$256,%esi
884	je	L$14rounds
885
886	cmpl	$128,%esi
887	jne	L$bad_keybits
888
889L$10rounds:
890	movl	$9,%esi
891	cmpl	$268435456,%r10d
892	je	L$10rounds_alt
893
894	movups	%xmm0,(%rdx)
895.byte	102,15,58,223,200,1
896	call	L$key_expansion_128_cold
897.byte	102,15,58,223,200,2
898	call	L$key_expansion_128
899.byte	102,15,58,223,200,4
900	call	L$key_expansion_128
901.byte	102,15,58,223,200,8
902	call	L$key_expansion_128
903.byte	102,15,58,223,200,16
904	call	L$key_expansion_128
905.byte	102,15,58,223,200,32
906	call	L$key_expansion_128
907.byte	102,15,58,223,200,64
908	call	L$key_expansion_128
909.byte	102,15,58,223,200,128
910	call	L$key_expansion_128
911.byte	102,15,58,223,200,27
912	call	L$key_expansion_128
913.byte	102,15,58,223,200,54
914	call	L$key_expansion_128
915	movups	%xmm0,(%rax)
916	movl	%esi,80(%rax)
917	xorl	%eax,%eax
918	jmp	L$enc_key_ret
919
920.p2align	4
921L$10rounds_alt:
922	movdqa	L$key_rotate(%rip),%xmm5
923	movl	$8,%r10d
924	movdqa	L$key_rcon1(%rip),%xmm4
925	movdqa	%xmm0,%xmm2
926	movdqu	%xmm0,(%rdx)
927	jmp	L$oop_key128
928
929.p2align	4
930L$oop_key128:
931.byte	102,15,56,0,197
932.byte	102,15,56,221,196
933	pslld	$1,%xmm4
934	leaq	16(%rax),%rax
935
936	movdqa	%xmm2,%xmm3
937	pslldq	$4,%xmm2
938	pxor	%xmm2,%xmm3
939	pslldq	$4,%xmm2
940	pxor	%xmm2,%xmm3
941	pslldq	$4,%xmm2
942	pxor	%xmm3,%xmm2
943
944	pxor	%xmm2,%xmm0
945	movdqu	%xmm0,-16(%rax)
946	movdqa	%xmm0,%xmm2
947
948	decl	%r10d
949	jnz	L$oop_key128
950
951	movdqa	L$key_rcon1b(%rip),%xmm4
952
953.byte	102,15,56,0,197
954.byte	102,15,56,221,196
955	pslld	$1,%xmm4
956
957	movdqa	%xmm2,%xmm3
958	pslldq	$4,%xmm2
959	pxor	%xmm2,%xmm3
960	pslldq	$4,%xmm2
961	pxor	%xmm2,%xmm3
962	pslldq	$4,%xmm2
963	pxor	%xmm3,%xmm2
964
965	pxor	%xmm2,%xmm0
966	movdqu	%xmm0,(%rax)
967
968	movdqa	%xmm0,%xmm2
969.byte	102,15,56,0,197
970.byte	102,15,56,221,196
971
972	movdqa	%xmm2,%xmm3
973	pslldq	$4,%xmm2
974	pxor	%xmm2,%xmm3
975	pslldq	$4,%xmm2
976	pxor	%xmm2,%xmm3
977	pslldq	$4,%xmm2
978	pxor	%xmm3,%xmm2
979
980	pxor	%xmm2,%xmm0
981	movdqu	%xmm0,16(%rax)
982
983	movl	%esi,96(%rax)
984	xorl	%eax,%eax
985	jmp	L$enc_key_ret
986
987
988
989.p2align	4
990L$14rounds:
991	movups	16(%rdi),%xmm2
992	movl	$13,%esi
993	leaq	16(%rax),%rax
994	cmpl	$268435456,%r10d
995	je	L$14rounds_alt
996
997	movups	%xmm0,(%rdx)
998	movups	%xmm2,16(%rdx)
999.byte	102,15,58,223,202,1
1000	call	L$key_expansion_256a_cold
1001.byte	102,15,58,223,200,1
1002	call	L$key_expansion_256b
1003.byte	102,15,58,223,202,2
1004	call	L$key_expansion_256a
1005.byte	102,15,58,223,200,2
1006	call	L$key_expansion_256b
1007.byte	102,15,58,223,202,4
1008	call	L$key_expansion_256a
1009.byte	102,15,58,223,200,4
1010	call	L$key_expansion_256b
1011.byte	102,15,58,223,202,8
1012	call	L$key_expansion_256a
1013.byte	102,15,58,223,200,8
1014	call	L$key_expansion_256b
1015.byte	102,15,58,223,202,16
1016	call	L$key_expansion_256a
1017.byte	102,15,58,223,200,16
1018	call	L$key_expansion_256b
1019.byte	102,15,58,223,202,32
1020	call	L$key_expansion_256a
1021.byte	102,15,58,223,200,32
1022	call	L$key_expansion_256b
1023.byte	102,15,58,223,202,64
1024	call	L$key_expansion_256a
1025	movups	%xmm0,(%rax)
1026	movl	%esi,16(%rax)
1027	xorq	%rax,%rax
1028	jmp	L$enc_key_ret
1029
1030.p2align	4
1031L$14rounds_alt:
1032	movdqa	L$key_rotate(%rip),%xmm5
1033	movdqa	L$key_rcon1(%rip),%xmm4
1034	movl	$7,%r10d
1035	movdqu	%xmm0,0(%rdx)
1036	movdqa	%xmm2,%xmm1
1037	movdqu	%xmm2,16(%rdx)
1038	jmp	L$oop_key256
1039
1040.p2align	4
1041L$oop_key256:
1042.byte	102,15,56,0,213
1043.byte	102,15,56,221,212
1044
1045	movdqa	%xmm0,%xmm3
1046	pslldq	$4,%xmm0
1047	pxor	%xmm0,%xmm3
1048	pslldq	$4,%xmm0
1049	pxor	%xmm0,%xmm3
1050	pslldq	$4,%xmm0
1051	pxor	%xmm3,%xmm0
1052	pslld	$1,%xmm4
1053
1054	pxor	%xmm2,%xmm0
1055	movdqu	%xmm0,(%rax)
1056
1057	decl	%r10d
1058	jz	L$done_key256
1059
1060	pshufd	$0xff,%xmm0,%xmm2
1061	pxor	%xmm3,%xmm3
1062.byte	102,15,56,221,211
1063
1064	movdqa	%xmm1,%xmm3
1065	pslldq	$4,%xmm1
1066	pxor	%xmm1,%xmm3
1067	pslldq	$4,%xmm1
1068	pxor	%xmm1,%xmm3
1069	pslldq	$4,%xmm1
1070	pxor	%xmm3,%xmm1
1071
1072	pxor	%xmm1,%xmm2
1073	movdqu	%xmm2,16(%rax)
1074	leaq	32(%rax),%rax
1075	movdqa	%xmm2,%xmm1
1076
1077	jmp	L$oop_key256
1078
1079L$done_key256:
1080	movl	%esi,16(%rax)
1081	xorl	%eax,%eax
1082	jmp	L$enc_key_ret
1083
1084.p2align	4
1085L$bad_keybits:
1086	movq	$-2,%rax
1087L$enc_key_ret:
1088	pxor	%xmm0,%xmm0
1089	pxor	%xmm1,%xmm1
1090	pxor	%xmm2,%xmm2
1091	pxor	%xmm3,%xmm3
1092	pxor	%xmm4,%xmm4
1093	pxor	%xmm5,%xmm5
1094	addq	$8,%rsp
1095
1096	ret
1097
1098L$SEH_end_set_encrypt_key:
1099
1100.p2align	4
1101L$key_expansion_128:
1102	movups	%xmm0,(%rax)
1103	leaq	16(%rax),%rax
1104L$key_expansion_128_cold:
1105	shufps	$16,%xmm0,%xmm4
1106	xorps	%xmm4,%xmm0
1107	shufps	$140,%xmm0,%xmm4
1108	xorps	%xmm4,%xmm0
1109	shufps	$255,%xmm1,%xmm1
1110	xorps	%xmm1,%xmm0
1111	ret
1112
1113.p2align	4
1114L$key_expansion_192a:
1115	movups	%xmm0,(%rax)
1116	leaq	16(%rax),%rax
1117L$key_expansion_192a_cold:
1118	movaps	%xmm2,%xmm5
1119L$key_expansion_192b_warm:
1120	shufps	$16,%xmm0,%xmm4
1121	movdqa	%xmm2,%xmm3
1122	xorps	%xmm4,%xmm0
1123	shufps	$140,%xmm0,%xmm4
1124	pslldq	$4,%xmm3
1125	xorps	%xmm4,%xmm0
1126	pshufd	$85,%xmm1,%xmm1
1127	pxor	%xmm3,%xmm2
1128	pxor	%xmm1,%xmm0
1129	pshufd	$255,%xmm0,%xmm3
1130	pxor	%xmm3,%xmm2
1131	ret
1132
1133.p2align	4
1134L$key_expansion_192b:
1135	movaps	%xmm0,%xmm3
1136	shufps	$68,%xmm0,%xmm5
1137	movups	%xmm5,(%rax)
1138	shufps	$78,%xmm2,%xmm3
1139	movups	%xmm3,16(%rax)
1140	leaq	32(%rax),%rax
1141	jmp	L$key_expansion_192b_warm
1142
1143.p2align	4
1144L$key_expansion_256a:
1145	movups	%xmm2,(%rax)
1146	leaq	16(%rax),%rax
1147L$key_expansion_256a_cold:
1148	shufps	$16,%xmm0,%xmm4
1149	xorps	%xmm4,%xmm0
1150	shufps	$140,%xmm0,%xmm4
1151	xorps	%xmm4,%xmm0
1152	shufps	$255,%xmm1,%xmm1
1153	xorps	%xmm1,%xmm0
1154	ret
1155
1156.p2align	4
1157L$key_expansion_256b:
1158	movups	%xmm0,(%rax)
1159	leaq	16(%rax),%rax
1160
1161	shufps	$16,%xmm2,%xmm4
1162	xorps	%xmm4,%xmm2
1163	shufps	$140,%xmm2,%xmm4
1164	xorps	%xmm4,%xmm2
1165	shufps	$170,%xmm1,%xmm1
1166	xorps	%xmm1,%xmm2
1167	ret
1168
1169
1170.section	__DATA,__const
1171.p2align	6
1172L$bswap_mask:
1173.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1174L$increment32:
1175.long	6,6,6,0
1176L$increment64:
1177.long	1,0,0,0
1178L$increment1:
1179.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1180L$key_rotate:
1181.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
1182L$key_rotate192:
1183.long	0x04070605,0x04070605,0x04070605,0x04070605
1184L$key_rcon1:
1185.long	1,1,1,1
1186L$key_rcon1b:
1187.long	0x1b,0x1b,0x1b,0x1b
1188
1189.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1190.p2align	6
1191.text
1192#endif
1193