1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifidn __OUTPUT_FORMAT__, win64
5default	rel
6%define XMMWORD
7%define YMMWORD
8%define ZMMWORD
9%define _CET_ENDBR
10
11%include "ring_core_generated/prefix_symbols_nasm.inc"
12section	.text code align=64
13
14EXTERN	OPENSSL_ia32cap_P
15global	gcm_init_clmul
16
17ALIGN	16
18gcm_init_clmul:
19
20$L$SEH_begin_gcm_init_clmul_1:
21_CET_ENDBR
22$L$_init_clmul:
23	sub	rsp,0x18
24$L$SEH_prolog_gcm_init_clmul_2:
25	movaps	XMMWORD[rsp],xmm6
26$L$SEH_prolog_gcm_init_clmul_3:
27	movdqu	xmm2,XMMWORD[rdx]
28	pshufd	xmm2,xmm2,78
29
30
31	pshufd	xmm4,xmm2,255
32	movdqa	xmm3,xmm2
33	psllq	xmm2,1
34	pxor	xmm5,xmm5
35	psrlq	xmm3,63
36	pcmpgtd	xmm5,xmm4
37	pslldq	xmm3,8
38	por	xmm2,xmm3
39
40
41	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
42	pxor	xmm2,xmm5
43
44
45	pshufd	xmm6,xmm2,78
46	movdqa	xmm0,xmm2
47	pxor	xmm6,xmm2
48	movdqa	xmm1,xmm0
49	pshufd	xmm3,xmm0,78
50	pxor	xmm3,xmm0
51DB	102,15,58,68,194,0
52DB	102,15,58,68,202,17
53DB	102,15,58,68,222,0
54	pxor	xmm3,xmm0
55	pxor	xmm3,xmm1
56
57	movdqa	xmm4,xmm3
58	psrldq	xmm3,8
59	pslldq	xmm4,8
60	pxor	xmm1,xmm3
61	pxor	xmm0,xmm4
62
63	movdqa	xmm4,xmm0
64	movdqa	xmm3,xmm0
65	psllq	xmm0,5
66	pxor	xmm3,xmm0
67	psllq	xmm0,1
68	pxor	xmm0,xmm3
69	psllq	xmm0,57
70	movdqa	xmm3,xmm0
71	pslldq	xmm0,8
72	psrldq	xmm3,8
73	pxor	xmm0,xmm4
74	pxor	xmm1,xmm3
75
76
77	movdqa	xmm4,xmm0
78	psrlq	xmm0,1
79	pxor	xmm1,xmm4
80	pxor	xmm4,xmm0
81	psrlq	xmm0,5
82	pxor	xmm0,xmm4
83	psrlq	xmm0,1
84	pxor	xmm0,xmm1
85	pshufd	xmm3,xmm2,78
86	pshufd	xmm4,xmm0,78
87	pxor	xmm3,xmm2
88	movdqu	XMMWORD[rcx],xmm2
89	pxor	xmm4,xmm0
90	movdqu	XMMWORD[16+rcx],xmm0
91DB	102,15,58,15,227,8
92	movdqu	XMMWORD[32+rcx],xmm4
93	movdqa	xmm1,xmm0
94	pshufd	xmm3,xmm0,78
95	pxor	xmm3,xmm0
96DB	102,15,58,68,194,0
97DB	102,15,58,68,202,17
98DB	102,15,58,68,222,0
99	pxor	xmm3,xmm0
100	pxor	xmm3,xmm1
101
102	movdqa	xmm4,xmm3
103	psrldq	xmm3,8
104	pslldq	xmm4,8
105	pxor	xmm1,xmm3
106	pxor	xmm0,xmm4
107
108	movdqa	xmm4,xmm0
109	movdqa	xmm3,xmm0
110	psllq	xmm0,5
111	pxor	xmm3,xmm0
112	psllq	xmm0,1
113	pxor	xmm0,xmm3
114	psllq	xmm0,57
115	movdqa	xmm3,xmm0
116	pslldq	xmm0,8
117	psrldq	xmm3,8
118	pxor	xmm0,xmm4
119	pxor	xmm1,xmm3
120
121
122	movdqa	xmm4,xmm0
123	psrlq	xmm0,1
124	pxor	xmm1,xmm4
125	pxor	xmm4,xmm0
126	psrlq	xmm0,5
127	pxor	xmm0,xmm4
128	psrlq	xmm0,1
129	pxor	xmm0,xmm1
130	movdqa	xmm5,xmm0
131	movdqa	xmm1,xmm0
132	pshufd	xmm3,xmm0,78
133	pxor	xmm3,xmm0
134DB	102,15,58,68,194,0
135DB	102,15,58,68,202,17
136DB	102,15,58,68,222,0
137	pxor	xmm3,xmm0
138	pxor	xmm3,xmm1
139
140	movdqa	xmm4,xmm3
141	psrldq	xmm3,8
142	pslldq	xmm4,8
143	pxor	xmm1,xmm3
144	pxor	xmm0,xmm4
145
146	movdqa	xmm4,xmm0
147	movdqa	xmm3,xmm0
148	psllq	xmm0,5
149	pxor	xmm3,xmm0
150	psllq	xmm0,1
151	pxor	xmm0,xmm3
152	psllq	xmm0,57
153	movdqa	xmm3,xmm0
154	pslldq	xmm0,8
155	psrldq	xmm3,8
156	pxor	xmm0,xmm4
157	pxor	xmm1,xmm3
158
159
160	movdqa	xmm4,xmm0
161	psrlq	xmm0,1
162	pxor	xmm1,xmm4
163	pxor	xmm4,xmm0
164	psrlq	xmm0,5
165	pxor	xmm0,xmm4
166	psrlq	xmm0,1
167	pxor	xmm0,xmm1
168	pshufd	xmm3,xmm5,78
169	pshufd	xmm4,xmm0,78
170	pxor	xmm3,xmm5
171	movdqu	XMMWORD[48+rcx],xmm5
172	pxor	xmm4,xmm0
173	movdqu	XMMWORD[64+rcx],xmm0
174DB	102,15,58,15,227,8
175	movdqu	XMMWORD[80+rcx],xmm4
176	movaps	xmm6,XMMWORD[rsp]
177	lea	rsp,[24+rsp]
178	ret
179
180$L$SEH_end_gcm_init_clmul_4:
181
182global	gcm_gmult_clmul
183
184ALIGN	16
185gcm_gmult_clmul:
186
187_CET_ENDBR
188$L$_gmult_clmul:
189	movdqu	xmm0,XMMWORD[rcx]
190	movdqa	xmm5,XMMWORD[$L$bswap_mask]
191	movdqu	xmm2,XMMWORD[rdx]
192	movdqu	xmm4,XMMWORD[32+rdx]
193DB	102,15,56,0,197
194	movdqa	xmm1,xmm0
195	pshufd	xmm3,xmm0,78
196	pxor	xmm3,xmm0
197DB	102,15,58,68,194,0
198DB	102,15,58,68,202,17
199DB	102,15,58,68,220,0
200	pxor	xmm3,xmm0
201	pxor	xmm3,xmm1
202
203	movdqa	xmm4,xmm3
204	psrldq	xmm3,8
205	pslldq	xmm4,8
206	pxor	xmm1,xmm3
207	pxor	xmm0,xmm4
208
209	movdqa	xmm4,xmm0
210	movdqa	xmm3,xmm0
211	psllq	xmm0,5
212	pxor	xmm3,xmm0
213	psllq	xmm0,1
214	pxor	xmm0,xmm3
215	psllq	xmm0,57
216	movdqa	xmm3,xmm0
217	pslldq	xmm0,8
218	psrldq	xmm3,8
219	pxor	xmm0,xmm4
220	pxor	xmm1,xmm3
221
222
223	movdqa	xmm4,xmm0
224	psrlq	xmm0,1
225	pxor	xmm1,xmm4
226	pxor	xmm4,xmm0
227	psrlq	xmm0,5
228	pxor	xmm0,xmm4
229	psrlq	xmm0,1
230	pxor	xmm0,xmm1
231DB	102,15,56,0,197
232	movdqu	XMMWORD[rcx],xmm0
233	ret
234
235
236global	gcm_ghash_clmul
237
238ALIGN	32
239gcm_ghash_clmul:
240
241$L$SEH_begin_gcm_ghash_clmul_1:
242_CET_ENDBR
243$L$_ghash_clmul:
244	lea	rax,[((-136))+rsp]
245	lea	rsp,[((-32))+rax]
246$L$SEH_prolog_gcm_ghash_clmul_2:
247	movaps	XMMWORD[(-32)+rax],xmm6
248$L$SEH_prolog_gcm_ghash_clmul_3:
249	movaps	XMMWORD[(-16)+rax],xmm7
250$L$SEH_prolog_gcm_ghash_clmul_4:
251	movaps	XMMWORD[rax],xmm8
252$L$SEH_prolog_gcm_ghash_clmul_5:
253	movaps	XMMWORD[16+rax],xmm9
254$L$SEH_prolog_gcm_ghash_clmul_6:
255	movaps	XMMWORD[32+rax],xmm10
256$L$SEH_prolog_gcm_ghash_clmul_7:
257	movaps	XMMWORD[48+rax],xmm11
258$L$SEH_prolog_gcm_ghash_clmul_8:
259	movaps	XMMWORD[64+rax],xmm12
260$L$SEH_prolog_gcm_ghash_clmul_9:
261	movaps	XMMWORD[80+rax],xmm13
262$L$SEH_prolog_gcm_ghash_clmul_10:
263	movaps	XMMWORD[96+rax],xmm14
264$L$SEH_prolog_gcm_ghash_clmul_11:
265	movaps	XMMWORD[112+rax],xmm15
266$L$SEH_prolog_gcm_ghash_clmul_12:
267	movdqa	xmm10,XMMWORD[$L$bswap_mask]
268
269	movdqu	xmm0,XMMWORD[rcx]
270	movdqu	xmm2,XMMWORD[rdx]
271	movdqu	xmm7,XMMWORD[32+rdx]
272DB	102,65,15,56,0,194
273
274	sub	r9,0x10
275	jz	NEAR $L$odd_tail
276
277	movdqu	xmm6,XMMWORD[16+rdx]
278	lea	rax,[OPENSSL_ia32cap_P]
279	mov	eax,DWORD[4+rax]
280	cmp	r9,0x30
281	jb	NEAR $L$skip4x
282
283	and	eax,71303168
284	cmp	eax,4194304
285	je	NEAR $L$skip4x
286
287	sub	r9,0x30
288	mov	rax,0xA040608020C0E000
289	movdqu	xmm14,XMMWORD[48+rdx]
290	movdqu	xmm15,XMMWORD[64+rdx]
291
292
293
294
295	movdqu	xmm3,XMMWORD[48+r8]
296	movdqu	xmm11,XMMWORD[32+r8]
297DB	102,65,15,56,0,218
298DB	102,69,15,56,0,218
299	movdqa	xmm5,xmm3
300	pshufd	xmm4,xmm3,78
301	pxor	xmm4,xmm3
302DB	102,15,58,68,218,0
303DB	102,15,58,68,234,17
304DB	102,15,58,68,231,0
305
306	movdqa	xmm13,xmm11
307	pshufd	xmm12,xmm11,78
308	pxor	xmm12,xmm11
309DB	102,68,15,58,68,222,0
310DB	102,68,15,58,68,238,17
311DB	102,68,15,58,68,231,16
312	xorps	xmm3,xmm11
313	xorps	xmm5,xmm13
314	movups	xmm7,XMMWORD[80+rdx]
315	xorps	xmm4,xmm12
316
317	movdqu	xmm11,XMMWORD[16+r8]
318	movdqu	xmm8,XMMWORD[r8]
319DB	102,69,15,56,0,218
320DB	102,69,15,56,0,194
321	movdqa	xmm13,xmm11
322	pshufd	xmm12,xmm11,78
323	pxor	xmm0,xmm8
324	pxor	xmm12,xmm11
325DB	102,69,15,58,68,222,0
326	movdqa	xmm1,xmm0
327	pshufd	xmm8,xmm0,78
328	pxor	xmm8,xmm0
329DB	102,69,15,58,68,238,17
330DB	102,68,15,58,68,231,0
331	xorps	xmm3,xmm11
332	xorps	xmm5,xmm13
333
334	lea	r8,[64+r8]
335	sub	r9,0x40
336	jc	NEAR $L$tail4x
337
338	jmp	NEAR $L$mod4_loop
339ALIGN	32
340$L$mod4_loop:
341DB	102,65,15,58,68,199,0
342	xorps	xmm4,xmm12
343	movdqu	xmm11,XMMWORD[48+r8]
344DB	102,69,15,56,0,218
345DB	102,65,15,58,68,207,17
346	xorps	xmm0,xmm3
347	movdqu	xmm3,XMMWORD[32+r8]
348	movdqa	xmm13,xmm11
349DB	102,68,15,58,68,199,16
350	pshufd	xmm12,xmm11,78
351	xorps	xmm1,xmm5
352	pxor	xmm12,xmm11
353DB	102,65,15,56,0,218
354	movups	xmm7,XMMWORD[32+rdx]
355	xorps	xmm8,xmm4
356DB	102,68,15,58,68,218,0
357	pshufd	xmm4,xmm3,78
358
359	pxor	xmm8,xmm0
360	movdqa	xmm5,xmm3
361	pxor	xmm8,xmm1
362	pxor	xmm4,xmm3
363	movdqa	xmm9,xmm8
364DB	102,68,15,58,68,234,17
365	pslldq	xmm8,8
366	psrldq	xmm9,8
367	pxor	xmm0,xmm8
368	movdqa	xmm8,XMMWORD[$L$7_mask]
369	pxor	xmm1,xmm9
370DB	102,76,15,110,200
371
372	pand	xmm8,xmm0
373DB	102,69,15,56,0,200
374	pxor	xmm9,xmm0
375DB	102,68,15,58,68,231,0
376	psllq	xmm9,57
377	movdqa	xmm8,xmm9
378	pslldq	xmm9,8
379DB	102,15,58,68,222,0
380	psrldq	xmm8,8
381	pxor	xmm0,xmm9
382	pxor	xmm1,xmm8
383	movdqu	xmm8,XMMWORD[r8]
384
385	movdqa	xmm9,xmm0
386	psrlq	xmm0,1
387DB	102,15,58,68,238,17
388	xorps	xmm3,xmm11
389	movdqu	xmm11,XMMWORD[16+r8]
390DB	102,69,15,56,0,218
391DB	102,15,58,68,231,16
392	xorps	xmm5,xmm13
393	movups	xmm7,XMMWORD[80+rdx]
394DB	102,69,15,56,0,194
395	pxor	xmm1,xmm9
396	pxor	xmm9,xmm0
397	psrlq	xmm0,5
398
399	movdqa	xmm13,xmm11
400	pxor	xmm4,xmm12
401	pshufd	xmm12,xmm11,78
402	pxor	xmm0,xmm9
403	pxor	xmm1,xmm8
404	pxor	xmm12,xmm11
405DB	102,69,15,58,68,222,0
406	psrlq	xmm0,1
407	pxor	xmm0,xmm1
408	movdqa	xmm1,xmm0
409DB	102,69,15,58,68,238,17
410	xorps	xmm3,xmm11
411	pshufd	xmm8,xmm0,78
412	pxor	xmm8,xmm0
413
414DB	102,68,15,58,68,231,0
415	xorps	xmm5,xmm13
416
417	lea	r8,[64+r8]
418	sub	r9,0x40
419	jnc	NEAR $L$mod4_loop
420
421$L$tail4x:
422DB	102,65,15,58,68,199,0
423DB	102,65,15,58,68,207,17
424DB	102,68,15,58,68,199,16
425	xorps	xmm4,xmm12
426	xorps	xmm0,xmm3
427	xorps	xmm1,xmm5
428	pxor	xmm1,xmm0
429	pxor	xmm8,xmm4
430
431	pxor	xmm8,xmm1
432	pxor	xmm1,xmm0
433
434	movdqa	xmm9,xmm8
435	psrldq	xmm8,8
436	pslldq	xmm9,8
437	pxor	xmm1,xmm8
438	pxor	xmm0,xmm9
439
440	movdqa	xmm4,xmm0
441	movdqa	xmm3,xmm0
442	psllq	xmm0,5
443	pxor	xmm3,xmm0
444	psllq	xmm0,1
445	pxor	xmm0,xmm3
446	psllq	xmm0,57
447	movdqa	xmm3,xmm0
448	pslldq	xmm0,8
449	psrldq	xmm3,8
450	pxor	xmm0,xmm4
451	pxor	xmm1,xmm3
452
453
454	movdqa	xmm4,xmm0
455	psrlq	xmm0,1
456	pxor	xmm1,xmm4
457	pxor	xmm4,xmm0
458	psrlq	xmm0,5
459	pxor	xmm0,xmm4
460	psrlq	xmm0,1
461	pxor	xmm0,xmm1
462	add	r9,0x40
463	jz	NEAR $L$done
464	movdqu	xmm7,XMMWORD[32+rdx]
465	sub	r9,0x10
466	jz	NEAR $L$odd_tail
467$L$skip4x:
468
469
470
471
472
473	movdqu	xmm8,XMMWORD[r8]
474	movdqu	xmm3,XMMWORD[16+r8]
475DB	102,69,15,56,0,194
476DB	102,65,15,56,0,218
477	pxor	xmm0,xmm8
478
479	movdqa	xmm5,xmm3
480	pshufd	xmm4,xmm3,78
481	pxor	xmm4,xmm3
482DB	102,15,58,68,218,0
483DB	102,15,58,68,234,17
484DB	102,15,58,68,231,0
485
486	lea	r8,[32+r8]
487	nop
488	sub	r9,0x20
489	jbe	NEAR $L$even_tail
490	nop
491	jmp	NEAR $L$mod_loop
492
493ALIGN	32
494$L$mod_loop:
495	movdqa	xmm1,xmm0
496	movdqa	xmm8,xmm4
497	pshufd	xmm4,xmm0,78
498	pxor	xmm4,xmm0
499
500DB	102,15,58,68,198,0
501DB	102,15,58,68,206,17
502DB	102,15,58,68,231,16
503
504	pxor	xmm0,xmm3
505	pxor	xmm1,xmm5
506	movdqu	xmm9,XMMWORD[r8]
507	pxor	xmm8,xmm0
508DB	102,69,15,56,0,202
509	movdqu	xmm3,XMMWORD[16+r8]
510
511	pxor	xmm8,xmm1
512	pxor	xmm1,xmm9
513	pxor	xmm4,xmm8
514DB	102,65,15,56,0,218
515	movdqa	xmm8,xmm4
516	psrldq	xmm8,8
517	pslldq	xmm4,8
518	pxor	xmm1,xmm8
519	pxor	xmm0,xmm4
520
521	movdqa	xmm5,xmm3
522
523	movdqa	xmm9,xmm0
524	movdqa	xmm8,xmm0
525	psllq	xmm0,5
526	pxor	xmm8,xmm0
527DB	102,15,58,68,218,0
528	psllq	xmm0,1
529	pxor	xmm0,xmm8
530	psllq	xmm0,57
531	movdqa	xmm8,xmm0
532	pslldq	xmm0,8
533	psrldq	xmm8,8
534	pxor	xmm0,xmm9
535	pshufd	xmm4,xmm5,78
536	pxor	xmm1,xmm8
537	pxor	xmm4,xmm5
538
539	movdqa	xmm9,xmm0
540	psrlq	xmm0,1
541DB	102,15,58,68,234,17
542	pxor	xmm1,xmm9
543	pxor	xmm9,xmm0
544	psrlq	xmm0,5
545	pxor	xmm0,xmm9
546	lea	r8,[32+r8]
547	psrlq	xmm0,1
548DB	102,15,58,68,231,0
549	pxor	xmm0,xmm1
550
551	sub	r9,0x20
552	ja	NEAR $L$mod_loop
553
554$L$even_tail:
555	movdqa	xmm1,xmm0
556	movdqa	xmm8,xmm4
557	pshufd	xmm4,xmm0,78
558	pxor	xmm4,xmm0
559
560DB	102,15,58,68,198,0
561DB	102,15,58,68,206,17
562DB	102,15,58,68,231,16
563
564	pxor	xmm0,xmm3
565	pxor	xmm1,xmm5
566	pxor	xmm8,xmm0
567	pxor	xmm8,xmm1
568	pxor	xmm4,xmm8
569	movdqa	xmm8,xmm4
570	psrldq	xmm8,8
571	pslldq	xmm4,8
572	pxor	xmm1,xmm8
573	pxor	xmm0,xmm4
574
575	movdqa	xmm4,xmm0
576	movdqa	xmm3,xmm0
577	psllq	xmm0,5
578	pxor	xmm3,xmm0
579	psllq	xmm0,1
580	pxor	xmm0,xmm3
581	psllq	xmm0,57
582	movdqa	xmm3,xmm0
583	pslldq	xmm0,8
584	psrldq	xmm3,8
585	pxor	xmm0,xmm4
586	pxor	xmm1,xmm3
587
588
589	movdqa	xmm4,xmm0
590	psrlq	xmm0,1
591	pxor	xmm1,xmm4
592	pxor	xmm4,xmm0
593	psrlq	xmm0,5
594	pxor	xmm0,xmm4
595	psrlq	xmm0,1
596	pxor	xmm0,xmm1
597	test	r9,r9
598	jnz	NEAR $L$done
599
600$L$odd_tail:
601	movdqu	xmm8,XMMWORD[r8]
602DB	102,69,15,56,0,194
603	pxor	xmm0,xmm8
604	movdqa	xmm1,xmm0
605	pshufd	xmm3,xmm0,78
606	pxor	xmm3,xmm0
607DB	102,15,58,68,194,0
608DB	102,15,58,68,202,17
609DB	102,15,58,68,223,0
610	pxor	xmm3,xmm0
611	pxor	xmm3,xmm1
612
613	movdqa	xmm4,xmm3
614	psrldq	xmm3,8
615	pslldq	xmm4,8
616	pxor	xmm1,xmm3
617	pxor	xmm0,xmm4
618
619	movdqa	xmm4,xmm0
620	movdqa	xmm3,xmm0
621	psllq	xmm0,5
622	pxor	xmm3,xmm0
623	psllq	xmm0,1
624	pxor	xmm0,xmm3
625	psllq	xmm0,57
626	movdqa	xmm3,xmm0
627	pslldq	xmm0,8
628	psrldq	xmm3,8
629	pxor	xmm0,xmm4
630	pxor	xmm1,xmm3
631
632
633	movdqa	xmm4,xmm0
634	psrlq	xmm0,1
635	pxor	xmm1,xmm4
636	pxor	xmm4,xmm0
637	psrlq	xmm0,5
638	pxor	xmm0,xmm4
639	psrlq	xmm0,1
640	pxor	xmm0,xmm1
641$L$done:
642DB	102,65,15,56,0,194
643	movdqu	XMMWORD[rcx],xmm0
644	movaps	xmm6,XMMWORD[rsp]
645	movaps	xmm7,XMMWORD[16+rsp]
646	movaps	xmm8,XMMWORD[32+rsp]
647	movaps	xmm9,XMMWORD[48+rsp]
648	movaps	xmm10,XMMWORD[64+rsp]
649	movaps	xmm11,XMMWORD[80+rsp]
650	movaps	xmm12,XMMWORD[96+rsp]
651	movaps	xmm13,XMMWORD[112+rsp]
652	movaps	xmm14,XMMWORD[128+rsp]
653	movaps	xmm15,XMMWORD[144+rsp]
654	lea	rsp,[168+rsp]
655	ret
656
657$L$SEH_end_gcm_ghash_clmul_13:
658
659global	gcm_init_avx
660
661ALIGN	32
662gcm_init_avx:
663
664_CET_ENDBR
665$L$SEH_begin_gcm_init_avx_1:
666	sub	rsp,0x18
667$L$SEH_prolog_gcm_init_avx_2:
668	movaps	XMMWORD[rsp],xmm6
669$L$SEH_prolog_gcm_init_avx_3:
670	vzeroupper
671
672	vmovdqu	xmm2,XMMWORD[rdx]
673	vpshufd	xmm2,xmm2,78
674
675
676	vpshufd	xmm4,xmm2,255
677	vpsrlq	xmm3,xmm2,63
678	vpsllq	xmm2,xmm2,1
679	vpxor	xmm5,xmm5,xmm5
680	vpcmpgtd	xmm5,xmm5,xmm4
681	vpslldq	xmm3,xmm3,8
682	vpor	xmm2,xmm2,xmm3
683
684
685	vpand	xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
686	vpxor	xmm2,xmm2,xmm5
687
688	vpunpckhqdq	xmm6,xmm2,xmm2
689	vmovdqa	xmm0,xmm2
690	vpxor	xmm6,xmm6,xmm2
691	mov	r10,4
692	jmp	NEAR $L$init_start_avx
693ALIGN	32
694$L$init_loop_avx:
695	vpalignr	xmm5,xmm4,xmm3,8
696	vmovdqu	XMMWORD[(-16)+rcx],xmm5
697	vpunpckhqdq	xmm3,xmm0,xmm0
698	vpxor	xmm3,xmm3,xmm0
699	vpclmulqdq	xmm1,xmm0,xmm2,0x11
700	vpclmulqdq	xmm0,xmm0,xmm2,0x00
701	vpclmulqdq	xmm3,xmm3,xmm6,0x00
702	vpxor	xmm4,xmm1,xmm0
703	vpxor	xmm3,xmm3,xmm4
704
705	vpslldq	xmm4,xmm3,8
706	vpsrldq	xmm3,xmm3,8
707	vpxor	xmm0,xmm0,xmm4
708	vpxor	xmm1,xmm1,xmm3
709	vpsllq	xmm3,xmm0,57
710	vpsllq	xmm4,xmm0,62
711	vpxor	xmm4,xmm4,xmm3
712	vpsllq	xmm3,xmm0,63
713	vpxor	xmm4,xmm4,xmm3
714	vpslldq	xmm3,xmm4,8
715	vpsrldq	xmm4,xmm4,8
716	vpxor	xmm0,xmm0,xmm3
717	vpxor	xmm1,xmm1,xmm4
718
719	vpsrlq	xmm4,xmm0,1
720	vpxor	xmm1,xmm1,xmm0
721	vpxor	xmm0,xmm0,xmm4
722	vpsrlq	xmm4,xmm4,5
723	vpxor	xmm0,xmm0,xmm4
724	vpsrlq	xmm0,xmm0,1
725	vpxor	xmm0,xmm0,xmm1
726$L$init_start_avx:
727	vmovdqa	xmm5,xmm0
728	vpunpckhqdq	xmm3,xmm0,xmm0
729	vpxor	xmm3,xmm3,xmm0
730	vpclmulqdq	xmm1,xmm0,xmm2,0x11
731	vpclmulqdq	xmm0,xmm0,xmm2,0x00
732	vpclmulqdq	xmm3,xmm3,xmm6,0x00
733	vpxor	xmm4,xmm1,xmm0
734	vpxor	xmm3,xmm3,xmm4
735
736	vpslldq	xmm4,xmm3,8
737	vpsrldq	xmm3,xmm3,8
738	vpxor	xmm0,xmm0,xmm4
739	vpxor	xmm1,xmm1,xmm3
740	vpsllq	xmm3,xmm0,57
741	vpsllq	xmm4,xmm0,62
742	vpxor	xmm4,xmm4,xmm3
743	vpsllq	xmm3,xmm0,63
744	vpxor	xmm4,xmm4,xmm3
745	vpslldq	xmm3,xmm4,8
746	vpsrldq	xmm4,xmm4,8
747	vpxor	xmm0,xmm0,xmm3
748	vpxor	xmm1,xmm1,xmm4
749
750	vpsrlq	xmm4,xmm0,1
751	vpxor	xmm1,xmm1,xmm0
752	vpxor	xmm0,xmm0,xmm4
753	vpsrlq	xmm4,xmm4,5
754	vpxor	xmm0,xmm0,xmm4
755	vpsrlq	xmm0,xmm0,1
756	vpxor	xmm0,xmm0,xmm1
757	vpshufd	xmm3,xmm5,78
758	vpshufd	xmm4,xmm0,78
759	vpxor	xmm3,xmm3,xmm5
760	vmovdqu	XMMWORD[rcx],xmm5
761	vpxor	xmm4,xmm4,xmm0
762	vmovdqu	XMMWORD[16+rcx],xmm0
763	lea	rcx,[48+rcx]
764	sub	r10,1
765	jnz	NEAR $L$init_loop_avx
766
767	vpalignr	xmm5,xmm3,xmm4,8
768	vmovdqu	XMMWORD[(-16)+rcx],xmm5
769
770	vzeroupper
771	movaps	xmm6,XMMWORD[rsp]
772	lea	rsp,[24+rsp]
773	ret
774$L$SEH_end_gcm_init_avx_4:
775
776
777global	gcm_ghash_avx
778
779ALIGN	32
780gcm_ghash_avx:
781
782_CET_ENDBR
783$L$SEH_begin_gcm_ghash_avx_1:
784	lea	rax,[((-136))+rsp]
785	lea	rsp,[((-32))+rax]
786$L$SEH_prolog_gcm_ghash_avx_2:
787	movaps	XMMWORD[(-32)+rax],xmm6
788$L$SEH_prolog_gcm_ghash_avx_3:
789	movaps	XMMWORD[(-16)+rax],xmm7
790$L$SEH_prolog_gcm_ghash_avx_4:
791	movaps	XMMWORD[rax],xmm8
792$L$SEH_prolog_gcm_ghash_avx_5:
793	movaps	XMMWORD[16+rax],xmm9
794$L$SEH_prolog_gcm_ghash_avx_6:
795	movaps	XMMWORD[32+rax],xmm10
796$L$SEH_prolog_gcm_ghash_avx_7:
797	movaps	XMMWORD[48+rax],xmm11
798$L$SEH_prolog_gcm_ghash_avx_8:
799	movaps	XMMWORD[64+rax],xmm12
800$L$SEH_prolog_gcm_ghash_avx_9:
801	movaps	XMMWORD[80+rax],xmm13
802$L$SEH_prolog_gcm_ghash_avx_10:
803	movaps	XMMWORD[96+rax],xmm14
804$L$SEH_prolog_gcm_ghash_avx_11:
805	movaps	XMMWORD[112+rax],xmm15
806$L$SEH_prolog_gcm_ghash_avx_12:
807	vzeroupper
808
809	vmovdqu	xmm10,XMMWORD[rcx]
810	lea	r10,[$L$0x1c2_polynomial]
811	lea	rdx,[64+rdx]
812	vmovdqu	xmm13,XMMWORD[$L$bswap_mask]
813	vpshufb	xmm10,xmm10,xmm13
814	cmp	r9,0x80
815	jb	NEAR $L$short_avx
816	sub	r9,0x80
817
818	vmovdqu	xmm14,XMMWORD[112+r8]
819	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
820	vpshufb	xmm14,xmm14,xmm13
821	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
822
823	vpunpckhqdq	xmm9,xmm14,xmm14
824	vmovdqu	xmm15,XMMWORD[96+r8]
825	vpclmulqdq	xmm0,xmm14,xmm6,0x00
826	vpxor	xmm9,xmm9,xmm14
827	vpshufb	xmm15,xmm15,xmm13
828	vpclmulqdq	xmm1,xmm14,xmm6,0x11
829	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
830	vpunpckhqdq	xmm8,xmm15,xmm15
831	vmovdqu	xmm14,XMMWORD[80+r8]
832	vpclmulqdq	xmm2,xmm9,xmm7,0x00
833	vpxor	xmm8,xmm8,xmm15
834
835	vpshufb	xmm14,xmm14,xmm13
836	vpclmulqdq	xmm3,xmm15,xmm6,0x00
837	vpunpckhqdq	xmm9,xmm14,xmm14
838	vpclmulqdq	xmm4,xmm15,xmm6,0x11
839	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
840	vpxor	xmm9,xmm9,xmm14
841	vmovdqu	xmm15,XMMWORD[64+r8]
842	vpclmulqdq	xmm5,xmm8,xmm7,0x10
843	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
844
845	vpshufb	xmm15,xmm15,xmm13
846	vpxor	xmm3,xmm3,xmm0
847	vpclmulqdq	xmm0,xmm14,xmm6,0x00
848	vpxor	xmm4,xmm4,xmm1
849	vpunpckhqdq	xmm8,xmm15,xmm15
850	vpclmulqdq	xmm1,xmm14,xmm6,0x11
851	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
852	vpxor	xmm5,xmm5,xmm2
853	vpclmulqdq	xmm2,xmm9,xmm7,0x00
854	vpxor	xmm8,xmm8,xmm15
855
856	vmovdqu	xmm14,XMMWORD[48+r8]
857	vpxor	xmm0,xmm0,xmm3
858	vpclmulqdq	xmm3,xmm15,xmm6,0x00
859	vpxor	xmm1,xmm1,xmm4
860	vpshufb	xmm14,xmm14,xmm13
861	vpclmulqdq	xmm4,xmm15,xmm6,0x11
862	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
863	vpxor	xmm2,xmm2,xmm5
864	vpunpckhqdq	xmm9,xmm14,xmm14
865	vpclmulqdq	xmm5,xmm8,xmm7,0x10
866	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
867	vpxor	xmm9,xmm9,xmm14
868
869	vmovdqu	xmm15,XMMWORD[32+r8]
870	vpxor	xmm3,xmm3,xmm0
871	vpclmulqdq	xmm0,xmm14,xmm6,0x00
872	vpxor	xmm4,xmm4,xmm1
873	vpshufb	xmm15,xmm15,xmm13
874	vpclmulqdq	xmm1,xmm14,xmm6,0x11
875	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
876	vpxor	xmm5,xmm5,xmm2
877	vpunpckhqdq	xmm8,xmm15,xmm15
878	vpclmulqdq	xmm2,xmm9,xmm7,0x00
879	vpxor	xmm8,xmm8,xmm15
880
881	vmovdqu	xmm14,XMMWORD[16+r8]
882	vpxor	xmm0,xmm0,xmm3
883	vpclmulqdq	xmm3,xmm15,xmm6,0x00
884	vpxor	xmm1,xmm1,xmm4
885	vpshufb	xmm14,xmm14,xmm13
886	vpclmulqdq	xmm4,xmm15,xmm6,0x11
887	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
888	vpxor	xmm2,xmm2,xmm5
889	vpunpckhqdq	xmm9,xmm14,xmm14
890	vpclmulqdq	xmm5,xmm8,xmm7,0x10
891	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
892	vpxor	xmm9,xmm9,xmm14
893
894	vmovdqu	xmm15,XMMWORD[r8]
895	vpxor	xmm3,xmm3,xmm0
896	vpclmulqdq	xmm0,xmm14,xmm6,0x00
897	vpxor	xmm4,xmm4,xmm1
898	vpshufb	xmm15,xmm15,xmm13
899	vpclmulqdq	xmm1,xmm14,xmm6,0x11
900	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
901	vpxor	xmm5,xmm5,xmm2
902	vpclmulqdq	xmm2,xmm9,xmm7,0x10
903
904	lea	r8,[128+r8]
905	cmp	r9,0x80
906	jb	NEAR $L$tail_avx
907
908	vpxor	xmm15,xmm15,xmm10
909	sub	r9,0x80
910	jmp	NEAR $L$oop8x_avx
911
912ALIGN	32
913$L$oop8x_avx:
914	vpunpckhqdq	xmm8,xmm15,xmm15
915	vmovdqu	xmm14,XMMWORD[112+r8]
916	vpxor	xmm3,xmm3,xmm0
917	vpxor	xmm8,xmm8,xmm15
918	vpclmulqdq	xmm10,xmm15,xmm6,0x00
919	vpshufb	xmm14,xmm14,xmm13
920	vpxor	xmm4,xmm4,xmm1
921	vpclmulqdq	xmm11,xmm15,xmm6,0x11
922	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
923	vpunpckhqdq	xmm9,xmm14,xmm14
924	vpxor	xmm5,xmm5,xmm2
925	vpclmulqdq	xmm12,xmm8,xmm7,0x00
926	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
927	vpxor	xmm9,xmm9,xmm14
928
929	vmovdqu	xmm15,XMMWORD[96+r8]
930	vpclmulqdq	xmm0,xmm14,xmm6,0x00
931	vpxor	xmm10,xmm10,xmm3
932	vpshufb	xmm15,xmm15,xmm13
933	vpclmulqdq	xmm1,xmm14,xmm6,0x11
934	vxorps	xmm11,xmm11,xmm4
935	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
936	vpunpckhqdq	xmm8,xmm15,xmm15
937	vpclmulqdq	xmm2,xmm9,xmm7,0x00
938	vpxor	xmm12,xmm12,xmm5
939	vxorps	xmm8,xmm8,xmm15
940
941	vmovdqu	xmm14,XMMWORD[80+r8]
942	vpxor	xmm12,xmm12,xmm10
943	vpclmulqdq	xmm3,xmm15,xmm6,0x00
944	vpxor	xmm12,xmm12,xmm11
945	vpslldq	xmm9,xmm12,8
946	vpxor	xmm3,xmm3,xmm0
947	vpclmulqdq	xmm4,xmm15,xmm6,0x11
948	vpsrldq	xmm12,xmm12,8
949	vpxor	xmm10,xmm10,xmm9
950	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
951	vpshufb	xmm14,xmm14,xmm13
952	vxorps	xmm11,xmm11,xmm12
953	vpxor	xmm4,xmm4,xmm1
954	vpunpckhqdq	xmm9,xmm14,xmm14
955	vpclmulqdq	xmm5,xmm8,xmm7,0x10
956	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
957	vpxor	xmm9,xmm9,xmm14
958	vpxor	xmm5,xmm5,xmm2
959
960	vmovdqu	xmm15,XMMWORD[64+r8]
961	vpalignr	xmm12,xmm10,xmm10,8
962	vpclmulqdq	xmm0,xmm14,xmm6,0x00
963	vpshufb	xmm15,xmm15,xmm13
964	vpxor	xmm0,xmm0,xmm3
965	vpclmulqdq	xmm1,xmm14,xmm6,0x11
966	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
967	vpunpckhqdq	xmm8,xmm15,xmm15
968	vpxor	xmm1,xmm1,xmm4
969	vpclmulqdq	xmm2,xmm9,xmm7,0x00
970	vxorps	xmm8,xmm8,xmm15
971	vpxor	xmm2,xmm2,xmm5
972
973	vmovdqu	xmm14,XMMWORD[48+r8]
974	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
975	vpclmulqdq	xmm3,xmm15,xmm6,0x00
976	vpshufb	xmm14,xmm14,xmm13
977	vpxor	xmm3,xmm3,xmm0
978	vpclmulqdq	xmm4,xmm15,xmm6,0x11
979	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
980	vpunpckhqdq	xmm9,xmm14,xmm14
981	vpxor	xmm4,xmm4,xmm1
982	vpclmulqdq	xmm5,xmm8,xmm7,0x10
983	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
984	vpxor	xmm9,xmm9,xmm14
985	vpxor	xmm5,xmm5,xmm2
986
987	vmovdqu	xmm15,XMMWORD[32+r8]
988	vpclmulqdq	xmm0,xmm14,xmm6,0x00
989	vpshufb	xmm15,xmm15,xmm13
990	vpxor	xmm0,xmm0,xmm3
991	vpclmulqdq	xmm1,xmm14,xmm6,0x11
992	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
993	vpunpckhqdq	xmm8,xmm15,xmm15
994	vpxor	xmm1,xmm1,xmm4
995	vpclmulqdq	xmm2,xmm9,xmm7,0x00
996	vpxor	xmm8,xmm8,xmm15
997	vpxor	xmm2,xmm2,xmm5
998	vxorps	xmm10,xmm10,xmm12
999
1000	vmovdqu	xmm14,XMMWORD[16+r8]
1001	vpalignr	xmm12,xmm10,xmm10,8
1002	vpclmulqdq	xmm3,xmm15,xmm6,0x00
1003	vpshufb	xmm14,xmm14,xmm13
1004	vpxor	xmm3,xmm3,xmm0
1005	vpclmulqdq	xmm4,xmm15,xmm6,0x11
1006	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1007	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
1008	vxorps	xmm12,xmm12,xmm11
1009	vpunpckhqdq	xmm9,xmm14,xmm14
1010	vpxor	xmm4,xmm4,xmm1
1011	vpclmulqdq	xmm5,xmm8,xmm7,0x10
1012	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
1013	vpxor	xmm9,xmm9,xmm14
1014	vpxor	xmm5,xmm5,xmm2
1015
1016	vmovdqu	xmm15,XMMWORD[r8]
1017	vpclmulqdq	xmm0,xmm14,xmm6,0x00
1018	vpshufb	xmm15,xmm15,xmm13
1019	vpclmulqdq	xmm1,xmm14,xmm6,0x11
1020	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
1021	vpxor	xmm15,xmm15,xmm12
1022	vpclmulqdq	xmm2,xmm9,xmm7,0x10
1023	vpxor	xmm15,xmm15,xmm10
1024
1025	lea	r8,[128+r8]
1026	sub	r9,0x80
1027	jnc	NEAR $L$oop8x_avx
1028
1029	add	r9,0x80
1030	jmp	NEAR $L$tail_no_xor_avx
1031
1032ALIGN	32
1033$L$short_avx:
1034	vmovdqu	xmm14,XMMWORD[((-16))+r9*1+r8]
1035	lea	r8,[r9*1+r8]
1036	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
1037	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
1038	vpshufb	xmm15,xmm14,xmm13
1039
1040	vmovdqa	xmm3,xmm0
1041	vmovdqa	xmm4,xmm1
1042	vmovdqa	xmm5,xmm2
1043	sub	r9,0x10
1044	jz	NEAR $L$tail_avx
1045
1046	vpunpckhqdq	xmm8,xmm15,xmm15
1047	vpxor	xmm3,xmm3,xmm0
1048	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1049	vpxor	xmm8,xmm8,xmm15
1050	vmovdqu	xmm14,XMMWORD[((-32))+r8]
1051	vpxor	xmm4,xmm4,xmm1
1052	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1053	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
1054	vpshufb	xmm15,xmm14,xmm13
1055	vpxor	xmm5,xmm5,xmm2
1056	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1057	vpsrldq	xmm7,xmm7,8
1058	sub	r9,0x10
1059	jz	NEAR $L$tail_avx
1060
1061	vpunpckhqdq	xmm8,xmm15,xmm15
1062	vpxor	xmm3,xmm3,xmm0
1063	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1064	vpxor	xmm8,xmm8,xmm15
1065	vmovdqu	xmm14,XMMWORD[((-48))+r8]
1066	vpxor	xmm4,xmm4,xmm1
1067	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1068	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
1069	vpshufb	xmm15,xmm14,xmm13
1070	vpxor	xmm5,xmm5,xmm2
1071	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1072	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
1073	sub	r9,0x10
1074	jz	NEAR $L$tail_avx
1075
1076	vpunpckhqdq	xmm8,xmm15,xmm15
1077	vpxor	xmm3,xmm3,xmm0
1078	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1079	vpxor	xmm8,xmm8,xmm15
1080	vmovdqu	xmm14,XMMWORD[((-64))+r8]
1081	vpxor	xmm4,xmm4,xmm1
1082	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1083	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
1084	vpshufb	xmm15,xmm14,xmm13
1085	vpxor	xmm5,xmm5,xmm2
1086	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1087	vpsrldq	xmm7,xmm7,8
1088	sub	r9,0x10
1089	jz	NEAR $L$tail_avx
1090
1091	vpunpckhqdq	xmm8,xmm15,xmm15
1092	vpxor	xmm3,xmm3,xmm0
1093	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1094	vpxor	xmm8,xmm8,xmm15
1095	vmovdqu	xmm14,XMMWORD[((-80))+r8]
1096	vpxor	xmm4,xmm4,xmm1
1097	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1098	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
1099	vpshufb	xmm15,xmm14,xmm13
1100	vpxor	xmm5,xmm5,xmm2
1101	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1102	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
1103	sub	r9,0x10
1104	jz	NEAR $L$tail_avx
1105
1106	vpunpckhqdq	xmm8,xmm15,xmm15
1107	vpxor	xmm3,xmm3,xmm0
1108	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1109	vpxor	xmm8,xmm8,xmm15
1110	vmovdqu	xmm14,XMMWORD[((-96))+r8]
1111	vpxor	xmm4,xmm4,xmm1
1112	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1113	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
1114	vpshufb	xmm15,xmm14,xmm13
1115	vpxor	xmm5,xmm5,xmm2
1116	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1117	vpsrldq	xmm7,xmm7,8
1118	sub	r9,0x10
1119	jz	NEAR $L$tail_avx
1120
1121	vpunpckhqdq	xmm8,xmm15,xmm15
1122	vpxor	xmm3,xmm3,xmm0
1123	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1124	vpxor	xmm8,xmm8,xmm15
1125	vmovdqu	xmm14,XMMWORD[((-112))+r8]
1126	vpxor	xmm4,xmm4,xmm1
1127	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1128	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
1129	vpshufb	xmm15,xmm14,xmm13
1130	vpxor	xmm5,xmm5,xmm2
1131	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1132	vmovq	xmm7,QWORD[((184-64))+rdx]
1133	sub	r9,0x10
1134	jmp	NEAR $L$tail_avx
1135
1136ALIGN	32
1137$L$tail_avx:
1138	vpxor	xmm15,xmm15,xmm10
1139$L$tail_no_xor_avx:
1140	vpunpckhqdq	xmm8,xmm15,xmm15
1141	vpxor	xmm3,xmm3,xmm0
1142	vpclmulqdq	xmm0,xmm15,xmm6,0x00
1143	vpxor	xmm8,xmm8,xmm15
1144	vpxor	xmm4,xmm4,xmm1
1145	vpclmulqdq	xmm1,xmm15,xmm6,0x11
1146	vpxor	xmm5,xmm5,xmm2
1147	vpclmulqdq	xmm2,xmm8,xmm7,0x00
1148
1149	vmovdqu	xmm12,XMMWORD[r10]
1150
1151	vpxor	xmm10,xmm3,xmm0
1152	vpxor	xmm11,xmm4,xmm1
1153	vpxor	xmm5,xmm5,xmm2
1154
1155	vpxor	xmm5,xmm5,xmm10
1156	vpxor	xmm5,xmm5,xmm11
1157	vpslldq	xmm9,xmm5,8
1158	vpsrldq	xmm5,xmm5,8
1159	vpxor	xmm10,xmm10,xmm9
1160	vpxor	xmm11,xmm11,xmm5
1161
1162	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1163	vpalignr	xmm10,xmm10,xmm10,8
1164	vpxor	xmm10,xmm10,xmm9
1165
1166	vpclmulqdq	xmm9,xmm10,xmm12,0x10
1167	vpalignr	xmm10,xmm10,xmm10,8
1168	vpxor	xmm10,xmm10,xmm11
1169	vpxor	xmm10,xmm10,xmm9
1170
1171	cmp	r9,0
1172	jne	NEAR $L$short_avx
1173
1174	vpshufb	xmm10,xmm10,xmm13
1175	vmovdqu	XMMWORD[rcx],xmm10
1176	vzeroupper
1177	movaps	xmm6,XMMWORD[rsp]
1178	movaps	xmm7,XMMWORD[16+rsp]
1179	movaps	xmm8,XMMWORD[32+rsp]
1180	movaps	xmm9,XMMWORD[48+rsp]
1181	movaps	xmm10,XMMWORD[64+rsp]
1182	movaps	xmm11,XMMWORD[80+rsp]
1183	movaps	xmm12,XMMWORD[96+rsp]
1184	movaps	xmm13,XMMWORD[112+rsp]
1185	movaps	xmm14,XMMWORD[128+rsp]
1186	movaps	xmm15,XMMWORD[144+rsp]
1187	lea	rsp,[168+rsp]
1188	ret
1189
1190$L$SEH_end_gcm_ghash_avx_13:
1191
1192section	.rdata rdata align=8
1193ALIGN	64
1194$L$bswap_mask:
1195	DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1196$L$0x1c2_polynomial:
1197	DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1198$L$7_mask:
1199	DD	7,0,7,0
1200ALIGN	64
1201
1202	DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
1203	DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1204	DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1205	DB	114,103,62,0
1206ALIGN	64
1207section	.text
1208
1209section	.pdata rdata align=4
1210ALIGN	4
1211	DD	$L$SEH_begin_gcm_init_clmul_1 wrt ..imagebase
1212	DD	$L$SEH_end_gcm_init_clmul_4 wrt ..imagebase
1213	DD	$L$SEH_info_gcm_init_clmul_0 wrt ..imagebase
1214
1215	DD	$L$SEH_begin_gcm_ghash_clmul_1 wrt ..imagebase
1216	DD	$L$SEH_end_gcm_ghash_clmul_13 wrt ..imagebase
1217	DD	$L$SEH_info_gcm_ghash_clmul_0 wrt ..imagebase
1218
1219	DD	$L$SEH_begin_gcm_init_avx_1 wrt ..imagebase
1220	DD	$L$SEH_end_gcm_init_avx_4 wrt ..imagebase
1221	DD	$L$SEH_info_gcm_init_avx_0 wrt ..imagebase
1222
1223	DD	$L$SEH_begin_gcm_ghash_avx_1 wrt ..imagebase
1224	DD	$L$SEH_end_gcm_ghash_avx_13 wrt ..imagebase
1225	DD	$L$SEH_info_gcm_ghash_avx_0 wrt ..imagebase
1226
1227
1228section	.xdata rdata align=8
1229ALIGN	4
1230$L$SEH_info_gcm_init_clmul_0:
1231	DB	1
1232	DB	$L$SEH_prolog_gcm_init_clmul_3-$L$SEH_begin_gcm_init_clmul_1
1233	DB	3
1234	DB	0
1235	DB	$L$SEH_prolog_gcm_init_clmul_3-$L$SEH_begin_gcm_init_clmul_1
1236	DB	104
1237	DW	0
1238	DB	$L$SEH_prolog_gcm_init_clmul_2-$L$SEH_begin_gcm_init_clmul_1
1239	DB	34
1240
1241$L$SEH_info_gcm_ghash_clmul_0:
1242	DB	1
1243	DB	$L$SEH_prolog_gcm_ghash_clmul_12-$L$SEH_begin_gcm_ghash_clmul_1
1244	DB	22
1245	DB	0
1246	DB	$L$SEH_prolog_gcm_ghash_clmul_12-$L$SEH_begin_gcm_ghash_clmul_1
1247	DB	248
1248	DW	9
1249	DB	$L$SEH_prolog_gcm_ghash_clmul_11-$L$SEH_begin_gcm_ghash_clmul_1
1250	DB	232
1251	DW	8
1252	DB	$L$SEH_prolog_gcm_ghash_clmul_10-$L$SEH_begin_gcm_ghash_clmul_1
1253	DB	216
1254	DW	7
1255	DB	$L$SEH_prolog_gcm_ghash_clmul_9-$L$SEH_begin_gcm_ghash_clmul_1
1256	DB	200
1257	DW	6
1258	DB	$L$SEH_prolog_gcm_ghash_clmul_8-$L$SEH_begin_gcm_ghash_clmul_1
1259	DB	184
1260	DW	5
1261	DB	$L$SEH_prolog_gcm_ghash_clmul_7-$L$SEH_begin_gcm_ghash_clmul_1
1262	DB	168
1263	DW	4
1264	DB	$L$SEH_prolog_gcm_ghash_clmul_6-$L$SEH_begin_gcm_ghash_clmul_1
1265	DB	152
1266	DW	3
1267	DB	$L$SEH_prolog_gcm_ghash_clmul_5-$L$SEH_begin_gcm_ghash_clmul_1
1268	DB	136
1269	DW	2
1270	DB	$L$SEH_prolog_gcm_ghash_clmul_4-$L$SEH_begin_gcm_ghash_clmul_1
1271	DB	120
1272	DW	1
1273	DB	$L$SEH_prolog_gcm_ghash_clmul_3-$L$SEH_begin_gcm_ghash_clmul_1
1274	DB	104
1275	DW	0
1276	DB	$L$SEH_prolog_gcm_ghash_clmul_2-$L$SEH_begin_gcm_ghash_clmul_1
1277	DB	1
1278	DW	21
1279
1280$L$SEH_info_gcm_init_avx_0:
1281	DB	1
1282	DB	$L$SEH_prolog_gcm_init_avx_3-$L$SEH_begin_gcm_init_avx_1
1283	DB	3
1284	DB	0
1285	DB	$L$SEH_prolog_gcm_init_avx_3-$L$SEH_begin_gcm_init_avx_1
1286	DB	104
1287	DW	0
1288	DB	$L$SEH_prolog_gcm_init_avx_2-$L$SEH_begin_gcm_init_avx_1
1289	DB	34
1290
1291$L$SEH_info_gcm_ghash_avx_0:
1292	DB	1
1293	DB	$L$SEH_prolog_gcm_ghash_avx_12-$L$SEH_begin_gcm_ghash_avx_1
1294	DB	22
1295	DB	0
1296	DB	$L$SEH_prolog_gcm_ghash_avx_12-$L$SEH_begin_gcm_ghash_avx_1
1297	DB	248
1298	DW	9
1299	DB	$L$SEH_prolog_gcm_ghash_avx_11-$L$SEH_begin_gcm_ghash_avx_1
1300	DB	232
1301	DW	8
1302	DB	$L$SEH_prolog_gcm_ghash_avx_10-$L$SEH_begin_gcm_ghash_avx_1
1303	DB	216
1304	DW	7
1305	DB	$L$SEH_prolog_gcm_ghash_avx_9-$L$SEH_begin_gcm_ghash_avx_1
1306	DB	200
1307	DW	6
1308	DB	$L$SEH_prolog_gcm_ghash_avx_8-$L$SEH_begin_gcm_ghash_avx_1
1309	DB	184
1310	DW	5
1311	DB	$L$SEH_prolog_gcm_ghash_avx_7-$L$SEH_begin_gcm_ghash_avx_1
1312	DB	168
1313	DW	4
1314	DB	$L$SEH_prolog_gcm_ghash_avx_6-$L$SEH_begin_gcm_ghash_avx_1
1315	DB	152
1316	DW	3
1317	DB	$L$SEH_prolog_gcm_ghash_avx_5-$L$SEH_begin_gcm_ghash_avx_1
1318	DB	136
1319	DW	2
1320	DB	$L$SEH_prolog_gcm_ghash_avx_4-$L$SEH_begin_gcm_ghash_avx_1
1321	DB	120
1322	DW	1
1323	DB	$L$SEH_prolog_gcm_ghash_avx_3-$L$SEH_begin_gcm_ghash_avx_1
1324	DB	104
1325	DW	0
1326	DB	$L$SEH_prolog_gcm_ghash_avx_2-$L$SEH_begin_gcm_ghash_avx_1
1327	DB	1
1328	DW	21
1329%else
1330; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
1331ret
1332%endif
1333