xref: /aosp_15_r20/external/cronet/third_party/boringssl/src/gen/bcm/ghash-x86_64-linux.S (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__)
7.text
8.globl	gcm_init_clmul
9.hidden gcm_init_clmul
10.type	gcm_init_clmul,@function
11.align	16
12gcm_init_clmul:
13.cfi_startproc
14
15_CET_ENDBR
16.L_init_clmul:
17	movdqu	(%rsi),%xmm2
18	pshufd	$78,%xmm2,%xmm2
19
20
21	pshufd	$255,%xmm2,%xmm4
22	movdqa	%xmm2,%xmm3
23	psllq	$1,%xmm2
24	pxor	%xmm5,%xmm5
25	psrlq	$63,%xmm3
26	pcmpgtd	%xmm4,%xmm5
27	pslldq	$8,%xmm3
28	por	%xmm3,%xmm2
29
30
31	pand	.L0x1c2_polynomial(%rip),%xmm5
32	pxor	%xmm5,%xmm2
33
34
35	pshufd	$78,%xmm2,%xmm6
36	movdqa	%xmm2,%xmm0
37	pxor	%xmm2,%xmm6
38	movdqa	%xmm0,%xmm1
39	pshufd	$78,%xmm0,%xmm3
40	pxor	%xmm0,%xmm3
41.byte	102,15,58,68,194,0
42.byte	102,15,58,68,202,17
43.byte	102,15,58,68,222,0
44	pxor	%xmm0,%xmm3
45	pxor	%xmm1,%xmm3
46
47	movdqa	%xmm3,%xmm4
48	psrldq	$8,%xmm3
49	pslldq	$8,%xmm4
50	pxor	%xmm3,%xmm1
51	pxor	%xmm4,%xmm0
52
53	movdqa	%xmm0,%xmm4
54	movdqa	%xmm0,%xmm3
55	psllq	$5,%xmm0
56	pxor	%xmm0,%xmm3
57	psllq	$1,%xmm0
58	pxor	%xmm3,%xmm0
59	psllq	$57,%xmm0
60	movdqa	%xmm0,%xmm3
61	pslldq	$8,%xmm0
62	psrldq	$8,%xmm3
63	pxor	%xmm4,%xmm0
64	pxor	%xmm3,%xmm1
65
66
67	movdqa	%xmm0,%xmm4
68	psrlq	$1,%xmm0
69	pxor	%xmm4,%xmm1
70	pxor	%xmm0,%xmm4
71	psrlq	$5,%xmm0
72	pxor	%xmm4,%xmm0
73	psrlq	$1,%xmm0
74	pxor	%xmm1,%xmm0
75	pshufd	$78,%xmm2,%xmm3
76	pshufd	$78,%xmm0,%xmm4
77	pxor	%xmm2,%xmm3
78	movdqu	%xmm2,0(%rdi)
79	pxor	%xmm0,%xmm4
80	movdqu	%xmm0,16(%rdi)
81.byte	102,15,58,15,227,8
82	movdqu	%xmm4,32(%rdi)
83	movdqa	%xmm0,%xmm1
84	pshufd	$78,%xmm0,%xmm3
85	pxor	%xmm0,%xmm3
86.byte	102,15,58,68,194,0
87.byte	102,15,58,68,202,17
88.byte	102,15,58,68,222,0
89	pxor	%xmm0,%xmm3
90	pxor	%xmm1,%xmm3
91
92	movdqa	%xmm3,%xmm4
93	psrldq	$8,%xmm3
94	pslldq	$8,%xmm4
95	pxor	%xmm3,%xmm1
96	pxor	%xmm4,%xmm0
97
98	movdqa	%xmm0,%xmm4
99	movdqa	%xmm0,%xmm3
100	psllq	$5,%xmm0
101	pxor	%xmm0,%xmm3
102	psllq	$1,%xmm0
103	pxor	%xmm3,%xmm0
104	psllq	$57,%xmm0
105	movdqa	%xmm0,%xmm3
106	pslldq	$8,%xmm0
107	psrldq	$8,%xmm3
108	pxor	%xmm4,%xmm0
109	pxor	%xmm3,%xmm1
110
111
112	movdqa	%xmm0,%xmm4
113	psrlq	$1,%xmm0
114	pxor	%xmm4,%xmm1
115	pxor	%xmm0,%xmm4
116	psrlq	$5,%xmm0
117	pxor	%xmm4,%xmm0
118	psrlq	$1,%xmm0
119	pxor	%xmm1,%xmm0
120	movdqa	%xmm0,%xmm5
121	movdqa	%xmm0,%xmm1
122	pshufd	$78,%xmm0,%xmm3
123	pxor	%xmm0,%xmm3
124.byte	102,15,58,68,194,0
125.byte	102,15,58,68,202,17
126.byte	102,15,58,68,222,0
127	pxor	%xmm0,%xmm3
128	pxor	%xmm1,%xmm3
129
130	movdqa	%xmm3,%xmm4
131	psrldq	$8,%xmm3
132	pslldq	$8,%xmm4
133	pxor	%xmm3,%xmm1
134	pxor	%xmm4,%xmm0
135
136	movdqa	%xmm0,%xmm4
137	movdqa	%xmm0,%xmm3
138	psllq	$5,%xmm0
139	pxor	%xmm0,%xmm3
140	psllq	$1,%xmm0
141	pxor	%xmm3,%xmm0
142	psllq	$57,%xmm0
143	movdqa	%xmm0,%xmm3
144	pslldq	$8,%xmm0
145	psrldq	$8,%xmm3
146	pxor	%xmm4,%xmm0
147	pxor	%xmm3,%xmm1
148
149
150	movdqa	%xmm0,%xmm4
151	psrlq	$1,%xmm0
152	pxor	%xmm4,%xmm1
153	pxor	%xmm0,%xmm4
154	psrlq	$5,%xmm0
155	pxor	%xmm4,%xmm0
156	psrlq	$1,%xmm0
157	pxor	%xmm1,%xmm0
158	pshufd	$78,%xmm5,%xmm3
159	pshufd	$78,%xmm0,%xmm4
160	pxor	%xmm5,%xmm3
161	movdqu	%xmm5,48(%rdi)
162	pxor	%xmm0,%xmm4
163	movdqu	%xmm0,64(%rdi)
164.byte	102,15,58,15,227,8
165	movdqu	%xmm4,80(%rdi)
166	ret
167.cfi_endproc
168
169.size	gcm_init_clmul,.-gcm_init_clmul
170.globl	gcm_gmult_clmul
171.hidden gcm_gmult_clmul
172.type	gcm_gmult_clmul,@function
173.align	16
174gcm_gmult_clmul:
175.cfi_startproc
176_CET_ENDBR
177.L_gmult_clmul:
178	movdqu	(%rdi),%xmm0
179	movdqa	.Lbswap_mask(%rip),%xmm5
180	movdqu	(%rsi),%xmm2
181	movdqu	32(%rsi),%xmm4
182.byte	102,15,56,0,197
183	movdqa	%xmm0,%xmm1
184	pshufd	$78,%xmm0,%xmm3
185	pxor	%xmm0,%xmm3
186.byte	102,15,58,68,194,0
187.byte	102,15,58,68,202,17
188.byte	102,15,58,68,220,0
189	pxor	%xmm0,%xmm3
190	pxor	%xmm1,%xmm3
191
192	movdqa	%xmm3,%xmm4
193	psrldq	$8,%xmm3
194	pslldq	$8,%xmm4
195	pxor	%xmm3,%xmm1
196	pxor	%xmm4,%xmm0
197
198	movdqa	%xmm0,%xmm4
199	movdqa	%xmm0,%xmm3
200	psllq	$5,%xmm0
201	pxor	%xmm0,%xmm3
202	psllq	$1,%xmm0
203	pxor	%xmm3,%xmm0
204	psllq	$57,%xmm0
205	movdqa	%xmm0,%xmm3
206	pslldq	$8,%xmm0
207	psrldq	$8,%xmm3
208	pxor	%xmm4,%xmm0
209	pxor	%xmm3,%xmm1
210
211
212	movdqa	%xmm0,%xmm4
213	psrlq	$1,%xmm0
214	pxor	%xmm4,%xmm1
215	pxor	%xmm0,%xmm4
216	psrlq	$5,%xmm0
217	pxor	%xmm4,%xmm0
218	psrlq	$1,%xmm0
219	pxor	%xmm1,%xmm0
220.byte	102,15,56,0,197
221	movdqu	%xmm0,(%rdi)
222	ret
223.cfi_endproc
224.size	gcm_gmult_clmul,.-gcm_gmult_clmul
225.globl	gcm_ghash_clmul
226.hidden gcm_ghash_clmul
227.type	gcm_ghash_clmul,@function
228.align	32
229gcm_ghash_clmul:
230.cfi_startproc
231
232_CET_ENDBR
233.L_ghash_clmul:
234	movdqa	.Lbswap_mask(%rip),%xmm10
235
236	movdqu	(%rdi),%xmm0
237	movdqu	(%rsi),%xmm2
238	movdqu	32(%rsi),%xmm7
239.byte	102,65,15,56,0,194
240
241	subq	$0x10,%rcx
242	jz	.Lodd_tail
243
244	movdqu	16(%rsi),%xmm6
245	cmpq	$0x30,%rcx
246	jb	.Lskip4x
247
248	subq	$0x30,%rcx
249	movq	$0xA040608020C0E000,%rax
250	movdqu	48(%rsi),%xmm14
251	movdqu	64(%rsi),%xmm15
252
253
254
255
256	movdqu	48(%rdx),%xmm3
257	movdqu	32(%rdx),%xmm11
258.byte	102,65,15,56,0,218
259.byte	102,69,15,56,0,218
260	movdqa	%xmm3,%xmm5
261	pshufd	$78,%xmm3,%xmm4
262	pxor	%xmm3,%xmm4
263.byte	102,15,58,68,218,0
264.byte	102,15,58,68,234,17
265.byte	102,15,58,68,231,0
266
267	movdqa	%xmm11,%xmm13
268	pshufd	$78,%xmm11,%xmm12
269	pxor	%xmm11,%xmm12
270.byte	102,68,15,58,68,222,0
271.byte	102,68,15,58,68,238,17
272.byte	102,68,15,58,68,231,16
273	xorps	%xmm11,%xmm3
274	xorps	%xmm13,%xmm5
275	movups	80(%rsi),%xmm7
276	xorps	%xmm12,%xmm4
277
278	movdqu	16(%rdx),%xmm11
279	movdqu	0(%rdx),%xmm8
280.byte	102,69,15,56,0,218
281.byte	102,69,15,56,0,194
282	movdqa	%xmm11,%xmm13
283	pshufd	$78,%xmm11,%xmm12
284	pxor	%xmm8,%xmm0
285	pxor	%xmm11,%xmm12
286.byte	102,69,15,58,68,222,0
287	movdqa	%xmm0,%xmm1
288	pshufd	$78,%xmm0,%xmm8
289	pxor	%xmm0,%xmm8
290.byte	102,69,15,58,68,238,17
291.byte	102,68,15,58,68,231,0
292	xorps	%xmm11,%xmm3
293	xorps	%xmm13,%xmm5
294
295	leaq	64(%rdx),%rdx
296	subq	$0x40,%rcx
297	jc	.Ltail4x
298
299	jmp	.Lmod4_loop
300.align	32
301.Lmod4_loop:
302.byte	102,65,15,58,68,199,0
303	xorps	%xmm12,%xmm4
304	movdqu	48(%rdx),%xmm11
305.byte	102,69,15,56,0,218
306.byte	102,65,15,58,68,207,17
307	xorps	%xmm3,%xmm0
308	movdqu	32(%rdx),%xmm3
309	movdqa	%xmm11,%xmm13
310.byte	102,68,15,58,68,199,16
311	pshufd	$78,%xmm11,%xmm12
312	xorps	%xmm5,%xmm1
313	pxor	%xmm11,%xmm12
314.byte	102,65,15,56,0,218
315	movups	32(%rsi),%xmm7
316	xorps	%xmm4,%xmm8
317.byte	102,68,15,58,68,218,0
318	pshufd	$78,%xmm3,%xmm4
319
320	pxor	%xmm0,%xmm8
321	movdqa	%xmm3,%xmm5
322	pxor	%xmm1,%xmm8
323	pxor	%xmm3,%xmm4
324	movdqa	%xmm8,%xmm9
325.byte	102,68,15,58,68,234,17
326	pslldq	$8,%xmm8
327	psrldq	$8,%xmm9
328	pxor	%xmm8,%xmm0
329	movdqa	.L7_mask(%rip),%xmm8
330	pxor	%xmm9,%xmm1
331.byte	102,76,15,110,200
332
333	pand	%xmm0,%xmm8
334.byte	102,69,15,56,0,200
335	pxor	%xmm0,%xmm9
336.byte	102,68,15,58,68,231,0
337	psllq	$57,%xmm9
338	movdqa	%xmm9,%xmm8
339	pslldq	$8,%xmm9
340.byte	102,15,58,68,222,0
341	psrldq	$8,%xmm8
342	pxor	%xmm9,%xmm0
343	pxor	%xmm8,%xmm1
344	movdqu	0(%rdx),%xmm8
345
346	movdqa	%xmm0,%xmm9
347	psrlq	$1,%xmm0
348.byte	102,15,58,68,238,17
349	xorps	%xmm11,%xmm3
350	movdqu	16(%rdx),%xmm11
351.byte	102,69,15,56,0,218
352.byte	102,15,58,68,231,16
353	xorps	%xmm13,%xmm5
354	movups	80(%rsi),%xmm7
355.byte	102,69,15,56,0,194
356	pxor	%xmm9,%xmm1
357	pxor	%xmm0,%xmm9
358	psrlq	$5,%xmm0
359
360	movdqa	%xmm11,%xmm13
361	pxor	%xmm12,%xmm4
362	pshufd	$78,%xmm11,%xmm12
363	pxor	%xmm9,%xmm0
364	pxor	%xmm8,%xmm1
365	pxor	%xmm11,%xmm12
366.byte	102,69,15,58,68,222,0
367	psrlq	$1,%xmm0
368	pxor	%xmm1,%xmm0
369	movdqa	%xmm0,%xmm1
370.byte	102,69,15,58,68,238,17
371	xorps	%xmm11,%xmm3
372	pshufd	$78,%xmm0,%xmm8
373	pxor	%xmm0,%xmm8
374
375.byte	102,68,15,58,68,231,0
376	xorps	%xmm13,%xmm5
377
378	leaq	64(%rdx),%rdx
379	subq	$0x40,%rcx
380	jnc	.Lmod4_loop
381
382.Ltail4x:
383.byte	102,65,15,58,68,199,0
384.byte	102,65,15,58,68,207,17
385.byte	102,68,15,58,68,199,16
386	xorps	%xmm12,%xmm4
387	xorps	%xmm3,%xmm0
388	xorps	%xmm5,%xmm1
389	pxor	%xmm0,%xmm1
390	pxor	%xmm4,%xmm8
391
392	pxor	%xmm1,%xmm8
393	pxor	%xmm0,%xmm1
394
395	movdqa	%xmm8,%xmm9
396	psrldq	$8,%xmm8
397	pslldq	$8,%xmm9
398	pxor	%xmm8,%xmm1
399	pxor	%xmm9,%xmm0
400
401	movdqa	%xmm0,%xmm4
402	movdqa	%xmm0,%xmm3
403	psllq	$5,%xmm0
404	pxor	%xmm0,%xmm3
405	psllq	$1,%xmm0
406	pxor	%xmm3,%xmm0
407	psllq	$57,%xmm0
408	movdqa	%xmm0,%xmm3
409	pslldq	$8,%xmm0
410	psrldq	$8,%xmm3
411	pxor	%xmm4,%xmm0
412	pxor	%xmm3,%xmm1
413
414
415	movdqa	%xmm0,%xmm4
416	psrlq	$1,%xmm0
417	pxor	%xmm4,%xmm1
418	pxor	%xmm0,%xmm4
419	psrlq	$5,%xmm0
420	pxor	%xmm4,%xmm0
421	psrlq	$1,%xmm0
422	pxor	%xmm1,%xmm0
423	addq	$0x40,%rcx
424	jz	.Ldone
425	movdqu	32(%rsi),%xmm7
426	subq	$0x10,%rcx
427	jz	.Lodd_tail
428.Lskip4x:
429
430
431
432
433
434	movdqu	(%rdx),%xmm8
435	movdqu	16(%rdx),%xmm3
436.byte	102,69,15,56,0,194
437.byte	102,65,15,56,0,218
438	pxor	%xmm8,%xmm0
439
440	movdqa	%xmm3,%xmm5
441	pshufd	$78,%xmm3,%xmm4
442	pxor	%xmm3,%xmm4
443.byte	102,15,58,68,218,0
444.byte	102,15,58,68,234,17
445.byte	102,15,58,68,231,0
446
447	leaq	32(%rdx),%rdx
448	nop
449	subq	$0x20,%rcx
450	jbe	.Leven_tail
451	nop
452	jmp	.Lmod_loop
453
454.align	32
455.Lmod_loop:
456	movdqa	%xmm0,%xmm1
457	movdqa	%xmm4,%xmm8
458	pshufd	$78,%xmm0,%xmm4
459	pxor	%xmm0,%xmm4
460
461.byte	102,15,58,68,198,0
462.byte	102,15,58,68,206,17
463.byte	102,15,58,68,231,16
464
465	pxor	%xmm3,%xmm0
466	pxor	%xmm5,%xmm1
467	movdqu	(%rdx),%xmm9
468	pxor	%xmm0,%xmm8
469.byte	102,69,15,56,0,202
470	movdqu	16(%rdx),%xmm3
471
472	pxor	%xmm1,%xmm8
473	pxor	%xmm9,%xmm1
474	pxor	%xmm8,%xmm4
475.byte	102,65,15,56,0,218
476	movdqa	%xmm4,%xmm8
477	psrldq	$8,%xmm8
478	pslldq	$8,%xmm4
479	pxor	%xmm8,%xmm1
480	pxor	%xmm4,%xmm0
481
482	movdqa	%xmm3,%xmm5
483
484	movdqa	%xmm0,%xmm9
485	movdqa	%xmm0,%xmm8
486	psllq	$5,%xmm0
487	pxor	%xmm0,%xmm8
488.byte	102,15,58,68,218,0
489	psllq	$1,%xmm0
490	pxor	%xmm8,%xmm0
491	psllq	$57,%xmm0
492	movdqa	%xmm0,%xmm8
493	pslldq	$8,%xmm0
494	psrldq	$8,%xmm8
495	pxor	%xmm9,%xmm0
496	pshufd	$78,%xmm5,%xmm4
497	pxor	%xmm8,%xmm1
498	pxor	%xmm5,%xmm4
499
500	movdqa	%xmm0,%xmm9
501	psrlq	$1,%xmm0
502.byte	102,15,58,68,234,17
503	pxor	%xmm9,%xmm1
504	pxor	%xmm0,%xmm9
505	psrlq	$5,%xmm0
506	pxor	%xmm9,%xmm0
507	leaq	32(%rdx),%rdx
508	psrlq	$1,%xmm0
509.byte	102,15,58,68,231,0
510	pxor	%xmm1,%xmm0
511
512	subq	$0x20,%rcx
513	ja	.Lmod_loop
514
515.Leven_tail:
516	movdqa	%xmm0,%xmm1
517	movdqa	%xmm4,%xmm8
518	pshufd	$78,%xmm0,%xmm4
519	pxor	%xmm0,%xmm4
520
521.byte	102,15,58,68,198,0
522.byte	102,15,58,68,206,17
523.byte	102,15,58,68,231,16
524
525	pxor	%xmm3,%xmm0
526	pxor	%xmm5,%xmm1
527	pxor	%xmm0,%xmm8
528	pxor	%xmm1,%xmm8
529	pxor	%xmm8,%xmm4
530	movdqa	%xmm4,%xmm8
531	psrldq	$8,%xmm8
532	pslldq	$8,%xmm4
533	pxor	%xmm8,%xmm1
534	pxor	%xmm4,%xmm0
535
536	movdqa	%xmm0,%xmm4
537	movdqa	%xmm0,%xmm3
538	psllq	$5,%xmm0
539	pxor	%xmm0,%xmm3
540	psllq	$1,%xmm0
541	pxor	%xmm3,%xmm0
542	psllq	$57,%xmm0
543	movdqa	%xmm0,%xmm3
544	pslldq	$8,%xmm0
545	psrldq	$8,%xmm3
546	pxor	%xmm4,%xmm0
547	pxor	%xmm3,%xmm1
548
549
550	movdqa	%xmm0,%xmm4
551	psrlq	$1,%xmm0
552	pxor	%xmm4,%xmm1
553	pxor	%xmm0,%xmm4
554	psrlq	$5,%xmm0
555	pxor	%xmm4,%xmm0
556	psrlq	$1,%xmm0
557	pxor	%xmm1,%xmm0
558	testq	%rcx,%rcx
559	jnz	.Ldone
560
561.Lodd_tail:
562	movdqu	(%rdx),%xmm8
563.byte	102,69,15,56,0,194
564	pxor	%xmm8,%xmm0
565	movdqa	%xmm0,%xmm1
566	pshufd	$78,%xmm0,%xmm3
567	pxor	%xmm0,%xmm3
568.byte	102,15,58,68,194,0
569.byte	102,15,58,68,202,17
570.byte	102,15,58,68,223,0
571	pxor	%xmm0,%xmm3
572	pxor	%xmm1,%xmm3
573
574	movdqa	%xmm3,%xmm4
575	psrldq	$8,%xmm3
576	pslldq	$8,%xmm4
577	pxor	%xmm3,%xmm1
578	pxor	%xmm4,%xmm0
579
580	movdqa	%xmm0,%xmm4
581	movdqa	%xmm0,%xmm3
582	psllq	$5,%xmm0
583	pxor	%xmm0,%xmm3
584	psllq	$1,%xmm0
585	pxor	%xmm3,%xmm0
586	psllq	$57,%xmm0
587	movdqa	%xmm0,%xmm3
588	pslldq	$8,%xmm0
589	psrldq	$8,%xmm3
590	pxor	%xmm4,%xmm0
591	pxor	%xmm3,%xmm1
592
593
594	movdqa	%xmm0,%xmm4
595	psrlq	$1,%xmm0
596	pxor	%xmm4,%xmm1
597	pxor	%xmm0,%xmm4
598	psrlq	$5,%xmm0
599	pxor	%xmm4,%xmm0
600	psrlq	$1,%xmm0
601	pxor	%xmm1,%xmm0
602.Ldone:
603.byte	102,65,15,56,0,194
604	movdqu	%xmm0,(%rdi)
605	ret
606.cfi_endproc
607
608.size	gcm_ghash_clmul,.-gcm_ghash_clmul
609.globl	gcm_init_avx
610.hidden gcm_init_avx
611.type	gcm_init_avx,@function
612.align	32
613gcm_init_avx:
614.cfi_startproc
615_CET_ENDBR
616	vzeroupper
617
618	vmovdqu	(%rsi),%xmm2
619	vpshufd	$78,%xmm2,%xmm2
620
621
622	vpshufd	$255,%xmm2,%xmm4
623	vpsrlq	$63,%xmm2,%xmm3
624	vpsllq	$1,%xmm2,%xmm2
625	vpxor	%xmm5,%xmm5,%xmm5
626	vpcmpgtd	%xmm4,%xmm5,%xmm5
627	vpslldq	$8,%xmm3,%xmm3
628	vpor	%xmm3,%xmm2,%xmm2
629
630
631	vpand	.L0x1c2_polynomial(%rip),%xmm5,%xmm5
632	vpxor	%xmm5,%xmm2,%xmm2
633
634	vpunpckhqdq	%xmm2,%xmm2,%xmm6
635	vmovdqa	%xmm2,%xmm0
636	vpxor	%xmm2,%xmm6,%xmm6
637	movq	$4,%r10
638	jmp	.Linit_start_avx
639.align	32
640.Linit_loop_avx:
641	vpalignr	$8,%xmm3,%xmm4,%xmm5
642	vmovdqu	%xmm5,-16(%rdi)
643	vpunpckhqdq	%xmm0,%xmm0,%xmm3
644	vpxor	%xmm0,%xmm3,%xmm3
645	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
646	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
647	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
648	vpxor	%xmm0,%xmm1,%xmm4
649	vpxor	%xmm4,%xmm3,%xmm3
650
651	vpslldq	$8,%xmm3,%xmm4
652	vpsrldq	$8,%xmm3,%xmm3
653	vpxor	%xmm4,%xmm0,%xmm0
654	vpxor	%xmm3,%xmm1,%xmm1
655	vpsllq	$57,%xmm0,%xmm3
656	vpsllq	$62,%xmm0,%xmm4
657	vpxor	%xmm3,%xmm4,%xmm4
658	vpsllq	$63,%xmm0,%xmm3
659	vpxor	%xmm3,%xmm4,%xmm4
660	vpslldq	$8,%xmm4,%xmm3
661	vpsrldq	$8,%xmm4,%xmm4
662	vpxor	%xmm3,%xmm0,%xmm0
663	vpxor	%xmm4,%xmm1,%xmm1
664
665	vpsrlq	$1,%xmm0,%xmm4
666	vpxor	%xmm0,%xmm1,%xmm1
667	vpxor	%xmm4,%xmm0,%xmm0
668	vpsrlq	$5,%xmm4,%xmm4
669	vpxor	%xmm4,%xmm0,%xmm0
670	vpsrlq	$1,%xmm0,%xmm0
671	vpxor	%xmm1,%xmm0,%xmm0
672.Linit_start_avx:
673	vmovdqa	%xmm0,%xmm5
674	vpunpckhqdq	%xmm0,%xmm0,%xmm3
675	vpxor	%xmm0,%xmm3,%xmm3
676	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
677	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
678	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
679	vpxor	%xmm0,%xmm1,%xmm4
680	vpxor	%xmm4,%xmm3,%xmm3
681
682	vpslldq	$8,%xmm3,%xmm4
683	vpsrldq	$8,%xmm3,%xmm3
684	vpxor	%xmm4,%xmm0,%xmm0
685	vpxor	%xmm3,%xmm1,%xmm1
686	vpsllq	$57,%xmm0,%xmm3
687	vpsllq	$62,%xmm0,%xmm4
688	vpxor	%xmm3,%xmm4,%xmm4
689	vpsllq	$63,%xmm0,%xmm3
690	vpxor	%xmm3,%xmm4,%xmm4
691	vpslldq	$8,%xmm4,%xmm3
692	vpsrldq	$8,%xmm4,%xmm4
693	vpxor	%xmm3,%xmm0,%xmm0
694	vpxor	%xmm4,%xmm1,%xmm1
695
696	vpsrlq	$1,%xmm0,%xmm4
697	vpxor	%xmm0,%xmm1,%xmm1
698	vpxor	%xmm4,%xmm0,%xmm0
699	vpsrlq	$5,%xmm4,%xmm4
700	vpxor	%xmm4,%xmm0,%xmm0
701	vpsrlq	$1,%xmm0,%xmm0
702	vpxor	%xmm1,%xmm0,%xmm0
703	vpshufd	$78,%xmm5,%xmm3
704	vpshufd	$78,%xmm0,%xmm4
705	vpxor	%xmm5,%xmm3,%xmm3
706	vmovdqu	%xmm5,0(%rdi)
707	vpxor	%xmm0,%xmm4,%xmm4
708	vmovdqu	%xmm0,16(%rdi)
709	leaq	48(%rdi),%rdi
710	subq	$1,%r10
711	jnz	.Linit_loop_avx
712
713	vpalignr	$8,%xmm4,%xmm3,%xmm5
714	vmovdqu	%xmm5,-16(%rdi)
715
716	vzeroupper
717	ret
718
719.cfi_endproc
720.size	gcm_init_avx,.-gcm_init_avx
721.globl	gcm_gmult_avx
722.hidden gcm_gmult_avx
723.type	gcm_gmult_avx,@function
724.align	32
725gcm_gmult_avx:
726.cfi_startproc
727_CET_ENDBR
728	jmp	.L_gmult_clmul
729.cfi_endproc
730.size	gcm_gmult_avx,.-gcm_gmult_avx
731.globl	gcm_ghash_avx
732.hidden gcm_ghash_avx
733.type	gcm_ghash_avx,@function
734.align	32
735gcm_ghash_avx:
736.cfi_startproc
737_CET_ENDBR
738	vzeroupper
739
740	vmovdqu	(%rdi),%xmm10
741	leaq	.L0x1c2_polynomial(%rip),%r10
742	leaq	64(%rsi),%rsi
743	vmovdqu	.Lbswap_mask(%rip),%xmm13
744	vpshufb	%xmm13,%xmm10,%xmm10
745	cmpq	$0x80,%rcx
746	jb	.Lshort_avx
747	subq	$0x80,%rcx
748
749	vmovdqu	112(%rdx),%xmm14
750	vmovdqu	0-64(%rsi),%xmm6
751	vpshufb	%xmm13,%xmm14,%xmm14
752	vmovdqu	32-64(%rsi),%xmm7
753
754	vpunpckhqdq	%xmm14,%xmm14,%xmm9
755	vmovdqu	96(%rdx),%xmm15
756	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
757	vpxor	%xmm14,%xmm9,%xmm9
758	vpshufb	%xmm13,%xmm15,%xmm15
759	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
760	vmovdqu	16-64(%rsi),%xmm6
761	vpunpckhqdq	%xmm15,%xmm15,%xmm8
762	vmovdqu	80(%rdx),%xmm14
763	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
764	vpxor	%xmm15,%xmm8,%xmm8
765
766	vpshufb	%xmm13,%xmm14,%xmm14
767	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
768	vpunpckhqdq	%xmm14,%xmm14,%xmm9
769	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
770	vmovdqu	48-64(%rsi),%xmm6
771	vpxor	%xmm14,%xmm9,%xmm9
772	vmovdqu	64(%rdx),%xmm15
773	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
774	vmovdqu	80-64(%rsi),%xmm7
775
776	vpshufb	%xmm13,%xmm15,%xmm15
777	vpxor	%xmm0,%xmm3,%xmm3
778	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
779	vpxor	%xmm1,%xmm4,%xmm4
780	vpunpckhqdq	%xmm15,%xmm15,%xmm8
781	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
782	vmovdqu	64-64(%rsi),%xmm6
783	vpxor	%xmm2,%xmm5,%xmm5
784	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
785	vpxor	%xmm15,%xmm8,%xmm8
786
787	vmovdqu	48(%rdx),%xmm14
788	vpxor	%xmm3,%xmm0,%xmm0
789	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
790	vpxor	%xmm4,%xmm1,%xmm1
791	vpshufb	%xmm13,%xmm14,%xmm14
792	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
793	vmovdqu	96-64(%rsi),%xmm6
794	vpxor	%xmm5,%xmm2,%xmm2
795	vpunpckhqdq	%xmm14,%xmm14,%xmm9
796	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
797	vmovdqu	128-64(%rsi),%xmm7
798	vpxor	%xmm14,%xmm9,%xmm9
799
800	vmovdqu	32(%rdx),%xmm15
801	vpxor	%xmm0,%xmm3,%xmm3
802	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
803	vpxor	%xmm1,%xmm4,%xmm4
804	vpshufb	%xmm13,%xmm15,%xmm15
805	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
806	vmovdqu	112-64(%rsi),%xmm6
807	vpxor	%xmm2,%xmm5,%xmm5
808	vpunpckhqdq	%xmm15,%xmm15,%xmm8
809	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
810	vpxor	%xmm15,%xmm8,%xmm8
811
812	vmovdqu	16(%rdx),%xmm14
813	vpxor	%xmm3,%xmm0,%xmm0
814	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
815	vpxor	%xmm4,%xmm1,%xmm1
816	vpshufb	%xmm13,%xmm14,%xmm14
817	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
818	vmovdqu	144-64(%rsi),%xmm6
819	vpxor	%xmm5,%xmm2,%xmm2
820	vpunpckhqdq	%xmm14,%xmm14,%xmm9
821	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
822	vmovdqu	176-64(%rsi),%xmm7
823	vpxor	%xmm14,%xmm9,%xmm9
824
825	vmovdqu	(%rdx),%xmm15
826	vpxor	%xmm0,%xmm3,%xmm3
827	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
828	vpxor	%xmm1,%xmm4,%xmm4
829	vpshufb	%xmm13,%xmm15,%xmm15
830	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
831	vmovdqu	160-64(%rsi),%xmm6
832	vpxor	%xmm2,%xmm5,%xmm5
833	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
834
835	leaq	128(%rdx),%rdx
836	cmpq	$0x80,%rcx
837	jb	.Ltail_avx
838
839	vpxor	%xmm10,%xmm15,%xmm15
840	subq	$0x80,%rcx
841	jmp	.Loop8x_avx
842
843.align	32
844.Loop8x_avx:
845	vpunpckhqdq	%xmm15,%xmm15,%xmm8
846	vmovdqu	112(%rdx),%xmm14
847	vpxor	%xmm0,%xmm3,%xmm3
848	vpxor	%xmm15,%xmm8,%xmm8
849	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm10
850	vpshufb	%xmm13,%xmm14,%xmm14
851	vpxor	%xmm1,%xmm4,%xmm4
852	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm11
853	vmovdqu	0-64(%rsi),%xmm6
854	vpunpckhqdq	%xmm14,%xmm14,%xmm9
855	vpxor	%xmm2,%xmm5,%xmm5
856	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm12
857	vmovdqu	32-64(%rsi),%xmm7
858	vpxor	%xmm14,%xmm9,%xmm9
859
860	vmovdqu	96(%rdx),%xmm15
861	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
862	vpxor	%xmm3,%xmm10,%xmm10
863	vpshufb	%xmm13,%xmm15,%xmm15
864	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
865	vxorps	%xmm4,%xmm11,%xmm11
866	vmovdqu	16-64(%rsi),%xmm6
867	vpunpckhqdq	%xmm15,%xmm15,%xmm8
868	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
869	vpxor	%xmm5,%xmm12,%xmm12
870	vxorps	%xmm15,%xmm8,%xmm8
871
872	vmovdqu	80(%rdx),%xmm14
873	vpxor	%xmm10,%xmm12,%xmm12
874	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
875	vpxor	%xmm11,%xmm12,%xmm12
876	vpslldq	$8,%xmm12,%xmm9
877	vpxor	%xmm0,%xmm3,%xmm3
878	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
879	vpsrldq	$8,%xmm12,%xmm12
880	vpxor	%xmm9,%xmm10,%xmm10
881	vmovdqu	48-64(%rsi),%xmm6
882	vpshufb	%xmm13,%xmm14,%xmm14
883	vxorps	%xmm12,%xmm11,%xmm11
884	vpxor	%xmm1,%xmm4,%xmm4
885	vpunpckhqdq	%xmm14,%xmm14,%xmm9
886	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
887	vmovdqu	80-64(%rsi),%xmm7
888	vpxor	%xmm14,%xmm9,%xmm9
889	vpxor	%xmm2,%xmm5,%xmm5
890
891	vmovdqu	64(%rdx),%xmm15
892	vpalignr	$8,%xmm10,%xmm10,%xmm12
893	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
894	vpshufb	%xmm13,%xmm15,%xmm15
895	vpxor	%xmm3,%xmm0,%xmm0
896	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
897	vmovdqu	64-64(%rsi),%xmm6
898	vpunpckhqdq	%xmm15,%xmm15,%xmm8
899	vpxor	%xmm4,%xmm1,%xmm1
900	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
901	vxorps	%xmm15,%xmm8,%xmm8
902	vpxor	%xmm5,%xmm2,%xmm2
903
904	vmovdqu	48(%rdx),%xmm14
905	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
906	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
907	vpshufb	%xmm13,%xmm14,%xmm14
908	vpxor	%xmm0,%xmm3,%xmm3
909	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
910	vmovdqu	96-64(%rsi),%xmm6
911	vpunpckhqdq	%xmm14,%xmm14,%xmm9
912	vpxor	%xmm1,%xmm4,%xmm4
913	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
914	vmovdqu	128-64(%rsi),%xmm7
915	vpxor	%xmm14,%xmm9,%xmm9
916	vpxor	%xmm2,%xmm5,%xmm5
917
918	vmovdqu	32(%rdx),%xmm15
919	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
920	vpshufb	%xmm13,%xmm15,%xmm15
921	vpxor	%xmm3,%xmm0,%xmm0
922	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
923	vmovdqu	112-64(%rsi),%xmm6
924	vpunpckhqdq	%xmm15,%xmm15,%xmm8
925	vpxor	%xmm4,%xmm1,%xmm1
926	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
927	vpxor	%xmm15,%xmm8,%xmm8
928	vpxor	%xmm5,%xmm2,%xmm2
929	vxorps	%xmm12,%xmm10,%xmm10
930
931	vmovdqu	16(%rdx),%xmm14
932	vpalignr	$8,%xmm10,%xmm10,%xmm12
933	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
934	vpshufb	%xmm13,%xmm14,%xmm14
935	vpxor	%xmm0,%xmm3,%xmm3
936	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
937	vmovdqu	144-64(%rsi),%xmm6
938	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
939	vxorps	%xmm11,%xmm12,%xmm12
940	vpunpckhqdq	%xmm14,%xmm14,%xmm9
941	vpxor	%xmm1,%xmm4,%xmm4
942	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
943	vmovdqu	176-64(%rsi),%xmm7
944	vpxor	%xmm14,%xmm9,%xmm9
945	vpxor	%xmm2,%xmm5,%xmm5
946
947	vmovdqu	(%rdx),%xmm15
948	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
949	vpshufb	%xmm13,%xmm15,%xmm15
950	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
951	vmovdqu	160-64(%rsi),%xmm6
952	vpxor	%xmm12,%xmm15,%xmm15
953	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
954	vpxor	%xmm10,%xmm15,%xmm15
955
956	leaq	128(%rdx),%rdx
957	subq	$0x80,%rcx
958	jnc	.Loop8x_avx
959
960	addq	$0x80,%rcx
961	jmp	.Ltail_no_xor_avx
962
963.align	32
964.Lshort_avx:
965	vmovdqu	-16(%rdx,%rcx,1),%xmm14
966	leaq	(%rdx,%rcx,1),%rdx
967	vmovdqu	0-64(%rsi),%xmm6
968	vmovdqu	32-64(%rsi),%xmm7
969	vpshufb	%xmm13,%xmm14,%xmm15
970
971	vmovdqa	%xmm0,%xmm3
972	vmovdqa	%xmm1,%xmm4
973	vmovdqa	%xmm2,%xmm5
974	subq	$0x10,%rcx
975	jz	.Ltail_avx
976
977	vpunpckhqdq	%xmm15,%xmm15,%xmm8
978	vpxor	%xmm0,%xmm3,%xmm3
979	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
980	vpxor	%xmm15,%xmm8,%xmm8
981	vmovdqu	-32(%rdx),%xmm14
982	vpxor	%xmm1,%xmm4,%xmm4
983	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
984	vmovdqu	16-64(%rsi),%xmm6
985	vpshufb	%xmm13,%xmm14,%xmm15
986	vpxor	%xmm2,%xmm5,%xmm5
987	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
988	vpsrldq	$8,%xmm7,%xmm7
989	subq	$0x10,%rcx
990	jz	.Ltail_avx
991
992	vpunpckhqdq	%xmm15,%xmm15,%xmm8
993	vpxor	%xmm0,%xmm3,%xmm3
994	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
995	vpxor	%xmm15,%xmm8,%xmm8
996	vmovdqu	-48(%rdx),%xmm14
997	vpxor	%xmm1,%xmm4,%xmm4
998	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
999	vmovdqu	48-64(%rsi),%xmm6
1000	vpshufb	%xmm13,%xmm14,%xmm15
1001	vpxor	%xmm2,%xmm5,%xmm5
1002	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1003	vmovdqu	80-64(%rsi),%xmm7
1004	subq	$0x10,%rcx
1005	jz	.Ltail_avx
1006
1007	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1008	vpxor	%xmm0,%xmm3,%xmm3
1009	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1010	vpxor	%xmm15,%xmm8,%xmm8
1011	vmovdqu	-64(%rdx),%xmm14
1012	vpxor	%xmm1,%xmm4,%xmm4
1013	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1014	vmovdqu	64-64(%rsi),%xmm6
1015	vpshufb	%xmm13,%xmm14,%xmm15
1016	vpxor	%xmm2,%xmm5,%xmm5
1017	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1018	vpsrldq	$8,%xmm7,%xmm7
1019	subq	$0x10,%rcx
1020	jz	.Ltail_avx
1021
1022	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1023	vpxor	%xmm0,%xmm3,%xmm3
1024	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1025	vpxor	%xmm15,%xmm8,%xmm8
1026	vmovdqu	-80(%rdx),%xmm14
1027	vpxor	%xmm1,%xmm4,%xmm4
1028	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1029	vmovdqu	96-64(%rsi),%xmm6
1030	vpshufb	%xmm13,%xmm14,%xmm15
1031	vpxor	%xmm2,%xmm5,%xmm5
1032	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1033	vmovdqu	128-64(%rsi),%xmm7
1034	subq	$0x10,%rcx
1035	jz	.Ltail_avx
1036
1037	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1038	vpxor	%xmm0,%xmm3,%xmm3
1039	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1040	vpxor	%xmm15,%xmm8,%xmm8
1041	vmovdqu	-96(%rdx),%xmm14
1042	vpxor	%xmm1,%xmm4,%xmm4
1043	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1044	vmovdqu	112-64(%rsi),%xmm6
1045	vpshufb	%xmm13,%xmm14,%xmm15
1046	vpxor	%xmm2,%xmm5,%xmm5
1047	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1048	vpsrldq	$8,%xmm7,%xmm7
1049	subq	$0x10,%rcx
1050	jz	.Ltail_avx
1051
1052	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1053	vpxor	%xmm0,%xmm3,%xmm3
1054	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1055	vpxor	%xmm15,%xmm8,%xmm8
1056	vmovdqu	-112(%rdx),%xmm14
1057	vpxor	%xmm1,%xmm4,%xmm4
1058	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1059	vmovdqu	144-64(%rsi),%xmm6
1060	vpshufb	%xmm13,%xmm14,%xmm15
1061	vpxor	%xmm2,%xmm5,%xmm5
1062	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1063	vmovq	184-64(%rsi),%xmm7
1064	subq	$0x10,%rcx
1065	jmp	.Ltail_avx
1066
1067.align	32
1068.Ltail_avx:
1069	vpxor	%xmm10,%xmm15,%xmm15
1070.Ltail_no_xor_avx:
1071	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1072	vpxor	%xmm0,%xmm3,%xmm3
1073	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1074	vpxor	%xmm15,%xmm8,%xmm8
1075	vpxor	%xmm1,%xmm4,%xmm4
1076	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1077	vpxor	%xmm2,%xmm5,%xmm5
1078	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1079
1080	vmovdqu	(%r10),%xmm12
1081
1082	vpxor	%xmm0,%xmm3,%xmm10
1083	vpxor	%xmm1,%xmm4,%xmm11
1084	vpxor	%xmm2,%xmm5,%xmm5
1085
1086	vpxor	%xmm10,%xmm5,%xmm5
1087	vpxor	%xmm11,%xmm5,%xmm5
1088	vpslldq	$8,%xmm5,%xmm9
1089	vpsrldq	$8,%xmm5,%xmm5
1090	vpxor	%xmm9,%xmm10,%xmm10
1091	vpxor	%xmm5,%xmm11,%xmm11
1092
1093	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1094	vpalignr	$8,%xmm10,%xmm10,%xmm10
1095	vpxor	%xmm9,%xmm10,%xmm10
1096
1097	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1098	vpalignr	$8,%xmm10,%xmm10,%xmm10
1099	vpxor	%xmm11,%xmm10,%xmm10
1100	vpxor	%xmm9,%xmm10,%xmm10
1101
1102	cmpq	$0,%rcx
1103	jne	.Lshort_avx
1104
1105	vpshufb	%xmm13,%xmm10,%xmm10
1106	vmovdqu	%xmm10,(%rdi)
1107	vzeroupper
1108	ret
1109.cfi_endproc
1110
1111.size	gcm_ghash_avx,.-gcm_ghash_avx
1112.section	.rodata
1113.align	64
1114.Lbswap_mask:
1115.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1116.L0x1c2_polynomial:
1117.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1118.L7_mask:
1119.long	7,0,7,0
1120.align	64
1121
1122.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1123.align	64
1124.text
1125#endif
1126