1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "textflag.h"
6#include "funcdata.h"
7
8// bool Cas(int32 *val, int32 old, int32 new)
9// Atomically:
10//	if(*val == old){
11//		*val = new;
12//		return 1;
13//	}else
14//		return 0;
15TEXT ·Cas(SB), NOSPLIT, $0-13
16	MOVL	ptr+0(FP), BX
17	MOVL	old+4(FP), AX
18	MOVL	new+8(FP), CX
19	LOCK
20	CMPXCHGL	CX, 0(BX)
21	SETEQ	ret+12(FP)
22	RET
23
24TEXT ·Casint32(SB), NOSPLIT, $0-13
25	JMP	·Cas(SB)
26
27TEXT ·Casint64(SB), NOSPLIT, $0-21
28	JMP	·Cas64(SB)
29
30TEXT ·Casuintptr(SB), NOSPLIT, $0-13
31	JMP	·Cas(SB)
32
33TEXT ·CasRel(SB), NOSPLIT, $0-13
34	JMP	·Cas(SB)
35
36TEXT ·Loaduintptr(SB), NOSPLIT, $0-8
37	JMP	·Load(SB)
38
39TEXT ·Loaduint(SB), NOSPLIT, $0-8
40	JMP	·Load(SB)
41
42TEXT ·Storeint32(SB), NOSPLIT, $0-8
43	JMP	·Store(SB)
44
45TEXT ·Storeint64(SB), NOSPLIT, $0-12
46	JMP	·Store64(SB)
47
48TEXT ·Storeuintptr(SB), NOSPLIT, $0-8
49	JMP	·Store(SB)
50
51TEXT ·Xadduintptr(SB), NOSPLIT, $0-12
52	JMP	·Xadd(SB)
53
54TEXT ·Loadint32(SB), NOSPLIT, $0-8
55	JMP	·Load(SB)
56
57TEXT ·Loadint64(SB), NOSPLIT, $0-12
58	JMP	·Load64(SB)
59
60TEXT ·Xaddint32(SB), NOSPLIT, $0-12
61	JMP	·Xadd(SB)
62
63TEXT ·Xaddint64(SB), NOSPLIT, $0-20
64	JMP	·Xadd64(SB)
65
66// bool ·Cas64(uint64 *val, uint64 old, uint64 new)
67// Atomically:
68//	if(*val == old){
69//		*val = new;
70//		return 1;
71//	} else {
72//		return 0;
73//	}
74TEXT ·Cas64(SB), NOSPLIT, $0-21
75	NO_LOCAL_POINTERS
76	MOVL	ptr+0(FP), BP
77	TESTL	$7, BP
78	JZ	2(PC)
79	CALL	·panicUnaligned(SB)
80	MOVL	old_lo+4(FP), AX
81	MOVL	old_hi+8(FP), DX
82	MOVL	new_lo+12(FP), BX
83	MOVL	new_hi+16(FP), CX
84	LOCK
85	CMPXCHG8B	0(BP)
86	SETEQ	ret+20(FP)
87	RET
88
89// bool Casp1(void **p, void *old, void *new)
90// Atomically:
91//	if(*p == old){
92//		*p = new;
93//		return 1;
94//	}else
95//		return 0;
96TEXT ·Casp1(SB), NOSPLIT, $0-13
97	MOVL	ptr+0(FP), BX
98	MOVL	old+4(FP), AX
99	MOVL	new+8(FP), CX
100	LOCK
101	CMPXCHGL	CX, 0(BX)
102	SETEQ	ret+12(FP)
103	RET
104
105// uint32 Xadd(uint32 volatile *val, int32 delta)
106// Atomically:
107//	*val += delta;
108//	return *val;
109TEXT ·Xadd(SB), NOSPLIT, $0-12
110	MOVL	ptr+0(FP), BX
111	MOVL	delta+4(FP), AX
112	MOVL	AX, CX
113	LOCK
114	XADDL	AX, 0(BX)
115	ADDL	CX, AX
116	MOVL	AX, ret+8(FP)
117	RET
118
119TEXT ·Xadd64(SB), NOSPLIT, $0-20
120	NO_LOCAL_POINTERS
121	// no XADDQ so use CMPXCHG8B loop
122	MOVL	ptr+0(FP), BP
123	TESTL	$7, BP
124	JZ	2(PC)
125	CALL	·panicUnaligned(SB)
126	// DI:SI = delta
127	MOVL	delta_lo+4(FP), SI
128	MOVL	delta_hi+8(FP), DI
129	// DX:AX = *addr
130	MOVL	0(BP), AX
131	MOVL	4(BP), DX
132addloop:
133	// CX:BX = DX:AX (*addr) + DI:SI (delta)
134	MOVL	AX, BX
135	MOVL	DX, CX
136	ADDL	SI, BX
137	ADCL	DI, CX
138
139	// if *addr == DX:AX {
140	//	*addr = CX:BX
141	// } else {
142	//	DX:AX = *addr
143	// }
144	// all in one instruction
145	LOCK
146	CMPXCHG8B	0(BP)
147
148	JNZ	addloop
149
150	// success
151	// return CX:BX
152	MOVL	BX, ret_lo+12(FP)
153	MOVL	CX, ret_hi+16(FP)
154	RET
155
156TEXT ·Xchg(SB), NOSPLIT, $0-12
157	MOVL	ptr+0(FP), BX
158	MOVL	new+4(FP), AX
159	XCHGL	AX, 0(BX)
160	MOVL	AX, ret+8(FP)
161	RET
162
163TEXT ·Xchgint32(SB), NOSPLIT, $0-12
164	JMP	·Xchg(SB)
165
166TEXT ·Xchgint64(SB), NOSPLIT, $0-20
167	JMP	·Xchg64(SB)
168
169TEXT ·Xchguintptr(SB), NOSPLIT, $0-12
170	JMP	·Xchg(SB)
171
172TEXT ·Xchg64(SB),NOSPLIT,$0-20
173	NO_LOCAL_POINTERS
174	// no XCHGQ so use CMPXCHG8B loop
175	MOVL	ptr+0(FP), BP
176	TESTL	$7, BP
177	JZ	2(PC)
178	CALL	·panicUnaligned(SB)
179	// CX:BX = new
180	MOVL	new_lo+4(FP), BX
181	MOVL	new_hi+8(FP), CX
182	// DX:AX = *addr
183	MOVL	0(BP), AX
184	MOVL	4(BP), DX
185swaploop:
186	// if *addr == DX:AX
187	//	*addr = CX:BX
188	// else
189	//	DX:AX = *addr
190	// all in one instruction
191	LOCK
192	CMPXCHG8B	0(BP)
193	JNZ	swaploop
194
195	// success
196	// return DX:AX
197	MOVL	AX, ret_lo+12(FP)
198	MOVL	DX, ret_hi+16(FP)
199	RET
200
201TEXT ·StorepNoWB(SB), NOSPLIT, $0-8
202	MOVL	ptr+0(FP), BX
203	MOVL	val+4(FP), AX
204	XCHGL	AX, 0(BX)
205	RET
206
207TEXT ·Store(SB), NOSPLIT, $0-8
208	MOVL	ptr+0(FP), BX
209	MOVL	val+4(FP), AX
210	XCHGL	AX, 0(BX)
211	RET
212
213TEXT ·StoreRel(SB), NOSPLIT, $0-8
214	JMP	·Store(SB)
215
216TEXT ·StoreReluintptr(SB), NOSPLIT, $0-8
217	JMP	·Store(SB)
218
219// uint64 atomicload64(uint64 volatile* addr);
220TEXT ·Load64(SB), NOSPLIT, $0-12
221	NO_LOCAL_POINTERS
222	MOVL	ptr+0(FP), AX
223	TESTL	$7, AX
224	JZ	2(PC)
225	CALL	·panicUnaligned(SB)
226	MOVQ	(AX), M0
227	MOVQ	M0, ret+4(FP)
228	EMMS
229	RET
230
231// void ·Store64(uint64 volatile* addr, uint64 v);
232TEXT ·Store64(SB), NOSPLIT, $0-12
233	NO_LOCAL_POINTERS
234	MOVL	ptr+0(FP), AX
235	TESTL	$7, AX
236	JZ	2(PC)
237	CALL	·panicUnaligned(SB)
238	// MOVQ and EMMS were introduced on the Pentium MMX.
239	MOVQ	val+4(FP), M0
240	MOVQ	M0, (AX)
241	EMMS
242	// This is essentially a no-op, but it provides required memory fencing.
243	// It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
244	XORL	AX, AX
245	LOCK
246	XADDL	AX, (SP)
247	RET
248
249// void	·Or8(byte volatile*, byte);
250TEXT ·Or8(SB), NOSPLIT, $0-5
251	MOVL	ptr+0(FP), AX
252	MOVB	val+4(FP), BX
253	LOCK
254	ORB	BX, (AX)
255	RET
256
257// void	·And8(byte volatile*, byte);
258TEXT ·And8(SB), NOSPLIT, $0-5
259	MOVL	ptr+0(FP), AX
260	MOVB	val+4(FP), BX
261	LOCK
262	ANDB	BX, (AX)
263	RET
264
265TEXT ·Store8(SB), NOSPLIT, $0-5
266	MOVL	ptr+0(FP), BX
267	MOVB	val+4(FP), AX
268	XCHGB	AX, 0(BX)
269	RET
270
271// func Or(addr *uint32, v uint32)
272TEXT ·Or(SB), NOSPLIT, $0-8
273	MOVL	ptr+0(FP), AX
274	MOVL	val+4(FP), BX
275	LOCK
276	ORL	BX, (AX)
277	RET
278
279// func And(addr *uint32, v uint32)
280TEXT ·And(SB), NOSPLIT, $0-8
281	MOVL	ptr+0(FP), AX
282	MOVL	val+4(FP), BX
283	LOCK
284	ANDL	BX, (AX)
285	RET
286
287// func And32(addr *uint32, v uint32) old uint32
288TEXT ·And32(SB), NOSPLIT, $0-12
289	MOVL	ptr+0(FP), BX
290	MOVL	val+4(FP), CX
291casloop:
292	MOVL 	CX, DX
293	MOVL	(BX), AX
294	ANDL	AX, DX
295	LOCK
296	CMPXCHGL	DX, (BX)
297	JNZ casloop
298	MOVL 	AX, ret+8(FP)
299	RET
300
301// func Or32(addr *uint32, v uint32) old uint32
302TEXT ·Or32(SB), NOSPLIT, $0-12
303	MOVL	ptr+0(FP), BX
304	MOVL	val+4(FP), CX
305casloop:
306	MOVL 	CX, DX
307	MOVL	(BX), AX
308	ORL	AX, DX
309	LOCK
310	CMPXCHGL	DX, (BX)
311	JNZ casloop
312	MOVL 	AX, ret+8(FP)
313	RET
314
315// func And64(addr *uint64, v uint64) old uint64
316TEXT ·And64(SB), NOSPLIT, $0-20
317	MOVL	ptr+0(FP), BP
318	// DI:SI = v
319	MOVL	val_lo+4(FP), SI
320	MOVL	val_hi+8(FP), DI
321	// DX:AX = *addr
322	MOVL	0(BP), AX
323	MOVL	4(BP), DX
324casloop:
325	// CX:BX = DX:AX (*addr) & DI:SI (mask)
326	MOVL	AX, BX
327	MOVL	DX, CX
328	ANDL	SI, BX
329	ANDL	DI, CX
330	LOCK
331	CMPXCHG8B	0(BP)
332	JNZ casloop
333	MOVL	AX, ret_lo+12(FP)
334	MOVL	DX, ret_hi+16(FP)
335	RET
336
337
338// func Or64(addr *uint64, v uint64) old uint64
339TEXT ·Or64(SB), NOSPLIT, $0-20
340	MOVL	ptr+0(FP), BP
341	// DI:SI = v
342	MOVL	val_lo+4(FP), SI
343	MOVL	val_hi+8(FP), DI
344	// DX:AX = *addr
345	MOVL	0(BP), AX
346	MOVL	4(BP), DX
347casloop:
348	// CX:BX = DX:AX (*addr) | DI:SI (mask)
349	MOVL	AX, BX
350	MOVL	DX, CX
351	ORL	SI, BX
352	ORL	DI, CX
353	LOCK
354	CMPXCHG8B	0(BP)
355	JNZ casloop
356	MOVL	AX, ret_lo+12(FP)
357	MOVL	DX, ret_hi+16(FP)
358	RET
359
360// func Anduintptr(addr *uintptr, v uintptr) old uintptr
361TEXT ·Anduintptr(SB), NOSPLIT, $0-12
362	JMP	·And32(SB)
363
364// func Oruintptr(addr *uintptr, v uintptr) old uintptr
365TEXT ·Oruintptr(SB), NOSPLIT, $0-12
366	JMP	·Or32(SB)
367