1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%include "ring_core_generated/prefix_symbols_nasm.inc"
5%ifidn __OUTPUT_FORMAT__, win32
6%ifidn __OUTPUT_FORMAT__,obj
7section	code	use32 class=code align=64
8%elifidn __OUTPUT_FORMAT__,win32
9$@feat.00 equ 1
10section	.text	code align=64
11%else
12section	.text	code
13%endif
14;extern	_OPENSSL_ia32cap_P
15global	_bn_mul_mont
16align	16
17_bn_mul_mont:
18L$_bn_mul_mont_begin:
19	push	ebp
20	push	ebx
21	push	esi
22	push	edi
23	xor	eax,eax
24	mov	edi,DWORD [40+esp]
25	lea	esi,[20+esp]
26	lea	edx,[24+esp]
27	add	edi,2
28	neg	edi
29	lea	ebp,[edi*4+esp-32]
30	neg	edi
31	mov	eax,ebp
32	sub	eax,edx
33	and	eax,2047
34	sub	ebp,eax
35	xor	edx,ebp
36	and	edx,2048
37	xor	edx,2048
38	sub	ebp,edx
39	and	ebp,-64
40	mov	eax,esp
41	sub	eax,ebp
42	and	eax,-4096
43	mov	edx,esp
44	lea	esp,[eax*1+ebp]
45	mov	eax,DWORD [esp]
46	cmp	esp,ebp
47	ja	NEAR L$000page_walk
48	jmp	NEAR L$001page_walk_done
49align	16
50L$000page_walk:
51	lea	esp,[esp-4096]
52	mov	eax,DWORD [esp]
53	cmp	esp,ebp
54	ja	NEAR L$000page_walk
55L$001page_walk_done:
56	mov	eax,DWORD [esi]
57	mov	ebx,DWORD [4+esi]
58	mov	ecx,DWORD [8+esi]
59	mov	ebp,DWORD [12+esi]
60	mov	esi,DWORD [16+esi]
61	mov	esi,DWORD [esi]
62	mov	DWORD [4+esp],eax
63	mov	DWORD [8+esp],ebx
64	mov	DWORD [12+esp],ecx
65	mov	DWORD [16+esp],ebp
66	mov	DWORD [20+esp],esi
67	lea	ebx,[edi-3]
68	mov	DWORD [24+esp],edx
69	lea	eax,[_OPENSSL_ia32cap_P]
70	bt	DWORD [eax],26
71	mov	eax,-1
72	movd	mm7,eax
73	mov	esi,DWORD [8+esp]
74	mov	edi,DWORD [12+esp]
75	mov	ebp,DWORD [16+esp]
76	xor	edx,edx
77	xor	ecx,ecx
78	movd	mm4,DWORD [edi]
79	movd	mm5,DWORD [esi]
80	movd	mm3,DWORD [ebp]
81	pmuludq	mm5,mm4
82	movq	mm2,mm5
83	movq	mm0,mm5
84	pand	mm0,mm7
85	pmuludq	mm5,[20+esp]
86	pmuludq	mm3,mm5
87	paddq	mm3,mm0
88	movd	mm1,DWORD [4+ebp]
89	movd	mm0,DWORD [4+esi]
90	psrlq	mm2,32
91	psrlq	mm3,32
92	inc	ecx
93align	16
94L$0021st:
95	pmuludq	mm0,mm4
96	pmuludq	mm1,mm5
97	paddq	mm2,mm0
98	paddq	mm3,mm1
99	movq	mm0,mm2
100	pand	mm0,mm7
101	movd	mm1,DWORD [4+ecx*4+ebp]
102	paddq	mm3,mm0
103	movd	mm0,DWORD [4+ecx*4+esi]
104	psrlq	mm2,32
105	movd	DWORD [28+ecx*4+esp],mm3
106	psrlq	mm3,32
107	lea	ecx,[1+ecx]
108	cmp	ecx,ebx
109	jl	NEAR L$0021st
110	pmuludq	mm0,mm4
111	pmuludq	mm1,mm5
112	paddq	mm2,mm0
113	paddq	mm3,mm1
114	movq	mm0,mm2
115	pand	mm0,mm7
116	paddq	mm3,mm0
117	movd	DWORD [28+ecx*4+esp],mm3
118	psrlq	mm2,32
119	psrlq	mm3,32
120	paddq	mm3,mm2
121	movq	[32+ebx*4+esp],mm3
122	inc	edx
123L$003outer:
124	xor	ecx,ecx
125	movd	mm4,DWORD [edx*4+edi]
126	movd	mm5,DWORD [esi]
127	movd	mm6,DWORD [32+esp]
128	movd	mm3,DWORD [ebp]
129	pmuludq	mm5,mm4
130	paddq	mm5,mm6
131	movq	mm0,mm5
132	movq	mm2,mm5
133	pand	mm0,mm7
134	pmuludq	mm5,[20+esp]
135	pmuludq	mm3,mm5
136	paddq	mm3,mm0
137	movd	mm6,DWORD [36+esp]
138	movd	mm1,DWORD [4+ebp]
139	movd	mm0,DWORD [4+esi]
140	psrlq	mm2,32
141	psrlq	mm3,32
142	paddq	mm2,mm6
143	inc	ecx
144	dec	ebx
145L$004inner:
146	pmuludq	mm0,mm4
147	pmuludq	mm1,mm5
148	paddq	mm2,mm0
149	paddq	mm3,mm1
150	movq	mm0,mm2
151	movd	mm6,DWORD [36+ecx*4+esp]
152	pand	mm0,mm7
153	movd	mm1,DWORD [4+ecx*4+ebp]
154	paddq	mm3,mm0
155	movd	mm0,DWORD [4+ecx*4+esi]
156	psrlq	mm2,32
157	movd	DWORD [28+ecx*4+esp],mm3
158	psrlq	mm3,32
159	paddq	mm2,mm6
160	dec	ebx
161	lea	ecx,[1+ecx]
162	jnz	NEAR L$004inner
163	mov	ebx,ecx
164	pmuludq	mm0,mm4
165	pmuludq	mm1,mm5
166	paddq	mm2,mm0
167	paddq	mm3,mm1
168	movq	mm0,mm2
169	pand	mm0,mm7
170	paddq	mm3,mm0
171	movd	DWORD [28+ecx*4+esp],mm3
172	psrlq	mm2,32
173	psrlq	mm3,32
174	movd	mm6,DWORD [36+ebx*4+esp]
175	paddq	mm3,mm2
176	paddq	mm3,mm6
177	movq	[32+ebx*4+esp],mm3
178	lea	edx,[1+edx]
179	cmp	edx,ebx
180	jle	NEAR L$003outer
181	emms
182align	16
183L$005common_tail:
184	mov	ebp,DWORD [16+esp]
185	mov	edi,DWORD [4+esp]
186	lea	esi,[32+esp]
187	mov	eax,DWORD [esi]
188	mov	ecx,ebx
189	xor	edx,edx
190align	16
191L$006sub:
192	sbb	eax,DWORD [edx*4+ebp]
193	mov	DWORD [edx*4+edi],eax
194	dec	ecx
195	mov	eax,DWORD [4+edx*4+esi]
196	lea	edx,[1+edx]
197	jge	NEAR L$006sub
198	sbb	eax,0
199	mov	edx,-1
200	xor	edx,eax
201	jmp	NEAR L$007copy
202align	16
203L$007copy:
204	mov	esi,DWORD [32+ebx*4+esp]
205	mov	ebp,DWORD [ebx*4+edi]
206	mov	DWORD [32+ebx*4+esp],ecx
207	and	esi,eax
208	and	ebp,edx
209	or	ebp,esi
210	mov	DWORD [ebx*4+edi],ebp
211	dec	ebx
212	jge	NEAR L$007copy
213	mov	esp,DWORD [24+esp]
214	mov	eax,1
215	pop	edi
216	pop	esi
217	pop	ebx
218	pop	ebp
219	ret
220db	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
221db	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
222db	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
223db	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
224db	111,114,103,62,0
225segment	.bss
226common	_OPENSSL_ia32cap_P 16
227%else
228; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
229ret
230%endif
231