1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - ESP
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - RSP
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
65 #define TMP_FREG (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
66
67 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
68
69 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3
71 };
72
73 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
74 0, 1, 2, 3, 4, 5, 6, 7, 0
75 };
76
77 #define CHECK_EXTRA_REGS(p, w, do) \
78 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
79 w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
80 p = SLJIT_MEM1(SLJIT_SP); \
81 do; \
82 }
83
84 #else /* SLJIT_CONFIG_X86_32 */
85
86 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
87
88 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
89 Note: avoid to use r12 and r13 for memory addressing
90 therefore r12 is better to be a higher saved register. */
91 #ifndef _WIN64
92 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
93 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
94 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
95 };
96 /* low-map. reg_map & 0x7. */
97 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
98 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
99 };
100 #else
101 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
102 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
103 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
104 };
105 /* low-map. reg_map & 0x7. */
106 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
107 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
108 };
109 #endif
110
111 /* Args: xmm0-xmm3 */
112 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
113 0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4
114 };
115 /* low-map. freg_map & 0x7. */
116 static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
117 0, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 4
118 };
119
120 #define REX_W 0x48
121 #define REX_R 0x44
122 #define REX_X 0x42
123 #define REX_B 0x41
124 #define REX 0x40
125
126 #ifndef _WIN64
127 #define HALFWORD_MAX 0x7fffffffl
128 #define HALFWORD_MIN -0x80000000l
129 #else
130 #define HALFWORD_MAX 0x7fffffffll
131 #define HALFWORD_MIN -0x80000000ll
132 #endif
133
134 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
135 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
136
137 #define CHECK_EXTRA_REGS(p, w, do)
138
139 #endif /* SLJIT_CONFIG_X86_32 */
140
141 #define U8(v) ((sljit_u8)(v))
142
143 /* Size flags for emit_x86_instruction: */
144 #define EX86_BIN_INS ((sljit_uw)0x000010)
145 #define EX86_SHIFT_INS ((sljit_uw)0x000020)
146 #define EX86_BYTE_ARG ((sljit_uw)0x000040)
147 #define EX86_HALF_ARG ((sljit_uw)0x000080)
148 /* Size flags for both emit_x86_instruction and emit_vex_instruction: */
149 #define EX86_REX ((sljit_uw)0x000100)
150 #define EX86_NO_REXW ((sljit_uw)0x000200)
151 #define EX86_PREF_66 ((sljit_uw)0x000400)
152 #define EX86_PREF_F2 ((sljit_uw)0x000800)
153 #define EX86_PREF_F3 ((sljit_uw)0x001000)
154 #define EX86_SSE2_OP1 ((sljit_uw)0x002000)
155 #define EX86_SSE2_OP2 ((sljit_uw)0x004000)
156 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
157 #define EX86_VEX_EXT ((sljit_uw)0x008000)
158 /* Op flags for emit_vex_instruction: */
159 #define VEX_OP_0F38 ((sljit_uw)0x010000)
160 #define VEX_OP_0F3A ((sljit_uw)0x020000)
161 #define VEX_SSE2_OPV ((sljit_uw)0x040000)
162 #define VEX_AUTO_W ((sljit_uw)0x080000)
163 #define VEX_W ((sljit_uw)0x100000)
164 #define VEX_256 ((sljit_uw)0x200000)
165
166 #define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66)
167 #define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2)
168
169 /* --------------------------------------------------------------------- */
170 /* Instruction forms */
171 /* --------------------------------------------------------------------- */
172
173 #define ADD (/* BINARY */ 0 << 3)
174 #define ADD_EAX_i32 0x05
175 #define ADD_r_rm 0x03
176 #define ADD_rm_r 0x01
177 #define ADDSD_x_xm 0x58
178 #define ADC (/* BINARY */ 2 << 3)
179 #define ADC_EAX_i32 0x15
180 #define ADC_r_rm 0x13
181 #define ADC_rm_r 0x11
182 #define AND (/* BINARY */ 4 << 3)
183 #define AND_EAX_i32 0x25
184 #define AND_r_rm 0x23
185 #define AND_rm_r 0x21
186 #define ANDPD_x_xm 0x54
187 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
188 #define BSF_r_rm (/* GROUP_0F */ 0xbc)
189 #define BSWAP_r (/* GROUP_0F */ 0xc8)
190 #define CALL_i32 0xe8
191 #define CALL_rm (/* GROUP_FF */ 2 << 3)
192 #define CDQ 0x99
193 #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
194 #define CMP (/* BINARY */ 7 << 3)
195 #define CMP_EAX_i32 0x3d
196 #define CMP_r_rm 0x3b
197 #define CMP_rm_r 0x39
198 #define CMPS_x_xm 0xc2
199 #define CMPXCHG_rm_r 0xb1
200 #define CMPXCHG_rm8_r 0xb0
201 #define CVTPD2PS_x_xm 0x5a
202 #define CVTPS2PD_x_xm 0x5a
203 #define CVTSI2SD_x_rm 0x2a
204 #define CVTTSD2SI_r_xm 0x2c
205 #define DIV (/* GROUP_F7 */ 6 << 3)
206 #define DIVSD_x_xm 0x5e
207 #define EXTRACTPS_x_xm 0x17
208 #define FLDS 0xd9
209 #define FLDL 0xdd
210 #define FSTPS 0xd9
211 #define FSTPD 0xdd
212 #define INSERTPS_x_xm 0x21
213 #define INT3 0xcc
214 #define IDIV (/* GROUP_F7 */ 7 << 3)
215 #define IMUL (/* GROUP_F7 */ 5 << 3)
216 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
217 #define IMUL_r_rm_i8 0x6b
218 #define IMUL_r_rm_i32 0x69
219 #define JL_i8 0x7c
220 #define JE_i8 0x74
221 #define JNC_i8 0x73
222 #define JNE_i8 0x75
223 #define JMP_i8 0xeb
224 #define JMP_i32 0xe9
225 #define JMP_rm (/* GROUP_FF */ 4 << 3)
226 #define LEA_r_m 0x8d
227 #define LOOP_i8 0xe2
228 #define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
229 #define MOV_r_rm 0x8b
230 #define MOV_r_i32 0xb8
231 #define MOV_rm_r 0x89
232 #define MOV_rm_i32 0xc7
233 #define MOV_rm8_i8 0xc6
234 #define MOV_rm8_r8 0x88
235 #define MOVAPS_x_xm 0x28
236 #define MOVAPS_xm_x 0x29
237 #define MOVD_x_rm 0x6e
238 #define MOVD_rm_x 0x7e
239 #define MOVDDUP_x_xm 0x12
240 #define MOVDQA_x_xm 0x6f
241 #define MOVDQA_xm_x 0x7f
242 #define MOVHLPS_x_x 0x12
243 #define MOVHPD_m_x 0x17
244 #define MOVHPD_x_m 0x16
245 #define MOVLHPS_x_x 0x16
246 #define MOVLPD_m_x 0x13
247 #define MOVLPD_x_m 0x12
248 #define MOVMSKPS_r_x (/* GROUP_0F */ 0x50)
249 #define MOVQ_x_xm (/* GROUP_0F */ 0x7e)
250 #define MOVSD_x_xm 0x10
251 #define MOVSD_xm_x 0x11
252 #define MOVSHDUP_x_xm 0x16
253 #define MOVSXD_r_rm 0x63
254 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
255 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
256 #define MOVUPS_x_xm 0x10
257 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
258 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
259 #define MUL (/* GROUP_F7 */ 4 << 3)
260 #define MULSD_x_xm 0x59
261 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
262 #define NOP 0x90
263 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
264 #define OR (/* BINARY */ 1 << 3)
265 #define OR_r_rm 0x0b
266 #define OR_EAX_i32 0x0d
267 #define OR_rm_r 0x09
268 #define OR_rm8_r8 0x08
269 #define ORPD_x_xm 0x56
270 #define PACKSSWB_x_xm (/* GROUP_0F */ 0x63)
271 #define PAND_x_xm 0xdb
272 #define PCMPEQD_x_xm 0x76
273 #define PINSRB_x_rm_i8 0x20
274 #define PINSRW_x_rm_i8 0xc4
275 #define PINSRD_x_rm_i8 0x22
276 #define PEXTRB_rm_x_i8 0x14
277 #define PEXTRW_rm_x_i8 0x15
278 #define PEXTRD_rm_x_i8 0x16
279 #define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7)
280 #define PMOVSXBD_x_xm 0x21
281 #define PMOVSXBQ_x_xm 0x22
282 #define PMOVSXBW_x_xm 0x20
283 #define PMOVSXDQ_x_xm 0x25
284 #define PMOVSXWD_x_xm 0x23
285 #define PMOVSXWQ_x_xm 0x24
286 #define PMOVZXBD_x_xm 0x31
287 #define PMOVZXBQ_x_xm 0x32
288 #define PMOVZXBW_x_xm 0x30
289 #define PMOVZXDQ_x_xm 0x35
290 #define PMOVZXWD_x_xm 0x33
291 #define PMOVZXWQ_x_xm 0x34
292 #define POP_r 0x58
293 #define POP_rm 0x8f
294 #define POPF 0x9d
295 #define POR_x_xm 0xeb
296 #define PREFETCH 0x18
297 #define PSHUFB_x_xm 0x00
298 #define PSHUFD_x_xm 0x70
299 #define PSHUFLW_x_xm 0x70
300 #define PSRLDQ_x 0x73
301 #define PSLLD_x_i8 0x72
302 #define PSLLQ_x_i8 0x73
303 #define PUSH_i32 0x68
304 #define PUSH_r 0x50
305 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
306 #define PUSHF 0x9c
307 #define PXOR_x_xm 0xef
308 #define ROL (/* SHIFT */ 0 << 3)
309 #define ROR (/* SHIFT */ 1 << 3)
310 #define RET_near 0xc3
311 #define RET_i16 0xc2
312 #define SBB (/* BINARY */ 3 << 3)
313 #define SBB_EAX_i32 0x1d
314 #define SBB_r_rm 0x1b
315 #define SBB_rm_r 0x19
316 #define SAR (/* SHIFT */ 7 << 3)
317 #define SHL (/* SHIFT */ 4 << 3)
318 #define SHLD (/* GROUP_0F */ 0xa5)
319 #define SHRD (/* GROUP_0F */ 0xad)
320 #define SHR (/* SHIFT */ 5 << 3)
321 #define SHUFPS_x_xm 0xc6
322 #define SUB (/* BINARY */ 5 << 3)
323 #define SUB_EAX_i32 0x2d
324 #define SUB_r_rm 0x2b
325 #define SUB_rm_r 0x29
326 #define SUBSD_x_xm 0x5c
327 #define TEST_EAX_i32 0xa9
328 #define TEST_rm_r 0x85
329 #define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
330 #define UCOMISD_x_xm 0x2e
331 #define UNPCKLPD_x_xm 0x14
332 #define UNPCKLPS_x_xm 0x14
333 #define VBROADCASTSD_x_xm 0x19
334 #define VBROADCASTSS_x_xm 0x18
335 #define VEXTRACTF128_x_ym 0x19
336 #define VEXTRACTI128_x_ym 0x39
337 #define VINSERTF128_y_y_xm 0x18
338 #define VINSERTI128_y_y_xm 0x38
339 #define VPBROADCASTB_x_xm 0x78
340 #define VPBROADCASTD_x_xm 0x58
341 #define VPBROADCASTQ_x_xm 0x59
342 #define VPBROADCASTW_x_xm 0x79
343 #define VPERMPD_y_ym 0x01
344 #define VPERMQ_y_ym 0x00
345 #define XCHG_EAX_r 0x90
346 #define XCHG_r_rm 0x87
347 #define XOR (/* BINARY */ 6 << 3)
348 #define XOR_EAX_i32 0x35
349 #define XOR_r_rm 0x33
350 #define XOR_rm_r 0x31
351 #define XORPD_x_xm 0x57
352
353 #define GROUP_0F 0x0f
354 #define GROUP_66 0x66
355 #define GROUP_F3 0xf3
356 #define GROUP_F7 0xf7
357 #define GROUP_FF 0xff
358 #define GROUP_BINARY_81 0x81
359 #define GROUP_BINARY_83 0x83
360 #define GROUP_SHIFT_1 0xd1
361 #define GROUP_SHIFT_N 0xc1
362 #define GROUP_SHIFT_CL 0xd3
363 #define GROUP_LOCK 0xf0
364
365 #define MOD_REG 0xc0
366 #define MOD_DISP8 0x40
367
368 #define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
369
370 #define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
371 #define POP_REG(r) (*inst++ = U8(POP_r + (r)))
372 #define RET() (*inst++ = RET_near)
373 #define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
374
375 #define SLJIT_INST_LABEL 255
376 #define SLJIT_INST_JUMP 254
377 #define SLJIT_INST_MOV_ADDR 253
378 #define SLJIT_INST_CONST 252
379
380 /* Multithreading does not affect these static variables, since they store
381 built-in CPU features. Therefore they can be overwritten by different threads
382 if they detect the CPU features in the same time. */
383 #define CPU_FEATURE_DETECTED 0x001
384 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
385 #define CPU_FEATURE_SSE2 0x002
386 #endif
387 #define CPU_FEATURE_SSE41 0x004
388 #define CPU_FEATURE_LZCNT 0x008
389 #define CPU_FEATURE_TZCNT 0x010
390 #define CPU_FEATURE_CMOV 0x020
391 #define CPU_FEATURE_AVX 0x040
392 #define CPU_FEATURE_AVX2 0x080
393 #define CPU_FEATURE_OSXSAVE 0x100
394
395 static sljit_u32 cpu_feature_list = 0;
396
397 #ifdef _WIN32_WCE
398 #include <cmnintrin.h>
399 #elif defined(_MSC_VER) && _MSC_VER >= 1400
400 #include <intrin.h>
401 #endif
402
403 /******************************************************/
404 /* Unaligned-store functions */
405 /******************************************************/
406
sljit_unaligned_store_s16(void * addr,sljit_s16 value)407 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
408 {
409 SLJIT_MEMCPY(addr, &value, sizeof(value));
410 }
411
sljit_unaligned_store_s32(void * addr,sljit_s32 value)412 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
413 {
414 SLJIT_MEMCPY(addr, &value, sizeof(value));
415 }
416
sljit_unaligned_store_sw(void * addr,sljit_sw value)417 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
418 {
419 SLJIT_MEMCPY(addr, &value, sizeof(value));
420 }
421
422 /******************************************************/
423 /* Utility functions */
424 /******************************************************/
425
execute_cpu_id(sljit_u32 info[4])426 static void execute_cpu_id(sljit_u32 info[4])
427 {
428 #if defined(_MSC_VER) && _MSC_VER >= 1400
429
430 __cpuidex((int*)info, (int)info[0], (int)info[2]);
431
432 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__)
433
434 /* AT&T syntax. */
435 __asm__ (
436 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
437 "movl %0, %%esi\n"
438 "movl (%%esi), %%eax\n"
439 "movl 8(%%esi), %%ecx\n"
440 "pushl %%ebx\n"
441 "cpuid\n"
442 "movl %%eax, (%%esi)\n"
443 "movl %%ebx, 4(%%esi)\n"
444 "popl %%ebx\n"
445 "movl %%ecx, 8(%%esi)\n"
446 "movl %%edx, 12(%%esi)\n"
447 #else /* !SLJIT_CONFIG_X86_32 */
448 "movq %0, %%rsi\n"
449 "movl (%%rsi), %%eax\n"
450 "movl 8(%%rsi), %%ecx\n"
451 "cpuid\n"
452 "movl %%eax, (%%rsi)\n"
453 "movl %%ebx, 4(%%rsi)\n"
454 "movl %%ecx, 8(%%rsi)\n"
455 "movl %%edx, 12(%%rsi)\n"
456 #endif /* SLJIT_CONFIG_X86_32 */
457 :
458 : "r" (info)
459 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
460 : "memory", "eax", "ecx", "edx", "esi"
461 #else /* !SLJIT_CONFIG_X86_32 */
462 : "memory", "rax", "rbx", "rcx", "rdx", "rsi"
463 #endif /* SLJIT_CONFIG_X86_32 */
464 );
465
466 #else /* _MSC_VER < 1400 */
467
468 /* Intel syntax. */
469 __asm {
470 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
471 mov esi, info
472 mov eax, [esi]
473 mov ecx, [esi + 8]
474 cpuid
475 mov [esi], eax
476 mov [esi + 4], ebx
477 mov [esi + 8], ecx
478 mov [esi + 12], edx
479 #else /* !SLJIT_CONFIG_X86_32 */
480 mov rsi, info
481 mov eax, [rsi]
482 mov ecx, [rsi + 8]
483 cpuid
484 mov [rsi], eax
485 mov [rsi + 4], ebx
486 mov [rsi + 8], ecx
487 mov [rsi + 12], edx
488 #endif /* SLJIT_CONFIG_X86_32 */
489 }
490
491 #endif /* _MSC_VER && _MSC_VER >= 1400 */
492 }
493
execute_get_xcr0_low(void)494 static sljit_u32 execute_get_xcr0_low(void)
495 {
496 sljit_u32 xcr0;
497
498 #if defined(_MSC_VER) && _MSC_VER >= 1400
499
500 xcr0 = (sljit_u32)_xgetbv(0);
501
502 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__)
503
504 /* AT&T syntax. */
505 __asm__ (
506 "xorl %%ecx, %%ecx\n"
507 "xgetbv\n"
508 : "=a" (xcr0)
509 :
510 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
511 : "ecx", "edx"
512 #else /* !SLJIT_CONFIG_X86_32 */
513 : "rcx", "rdx"
514 #endif /* SLJIT_CONFIG_X86_32 */
515 );
516
517 #else /* _MSC_VER < 1400 */
518
519 /* Intel syntax. */
520 __asm {
521 mov ecx, 0
522 xgetbv
523 mov xcr0, eax
524 }
525
526 #endif /* _MSC_VER && _MSC_VER >= 1400 */
527 return xcr0;
528 }
529
get_cpu_features(void)530 static void get_cpu_features(void)
531 {
532 sljit_u32 feature_list = CPU_FEATURE_DETECTED;
533 sljit_u32 info[4] = {0};
534 sljit_u32 max_id;
535
536 execute_cpu_id(info);
537 max_id = info[0];
538
539 if (max_id >= 7) {
540 info[0] = 7;
541 info[2] = 0;
542 execute_cpu_id(info);
543
544 if (info[1] & 0x8)
545 feature_list |= CPU_FEATURE_TZCNT;
546 if (info[1] & 0x20)
547 feature_list |= CPU_FEATURE_AVX2;
548 }
549
550 if (max_id >= 1) {
551 info[0] = 1;
552 execute_cpu_id(info);
553
554 if (info[2] & 0x80000)
555 feature_list |= CPU_FEATURE_SSE41;
556 if (info[2] & 0x8000000)
557 feature_list |= CPU_FEATURE_OSXSAVE;
558 if (info[2] & 0x10000000)
559 feature_list |= CPU_FEATURE_AVX;
560 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
561 if (info[3] & 0x4000000)
562 feature_list |= CPU_FEATURE_SSE2;
563 #endif
564 if (info[3] & 0x8000)
565 feature_list |= CPU_FEATURE_CMOV;
566 }
567
568 info[0] = 0x80000001;
569 execute_cpu_id(info);
570
571 if (info[2] & 0x20)
572 feature_list |= CPU_FEATURE_LZCNT;
573
574 if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0)
575 feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2);
576
577 cpu_feature_list = feature_list;
578 }
579
get_jump_code(sljit_uw type)580 static sljit_u8 get_jump_code(sljit_uw type)
581 {
582 switch (type) {
583 case SLJIT_EQUAL:
584 case SLJIT_ATOMIC_STORED:
585 case SLJIT_F_EQUAL:
586 case SLJIT_UNORDERED_OR_EQUAL:
587 return 0x84 /* je */;
588
589 case SLJIT_NOT_EQUAL:
590 case SLJIT_ATOMIC_NOT_STORED:
591 case SLJIT_F_NOT_EQUAL:
592 case SLJIT_ORDERED_NOT_EQUAL:
593 return 0x85 /* jne */;
594
595 case SLJIT_LESS:
596 case SLJIT_CARRY:
597 case SLJIT_F_LESS:
598 case SLJIT_UNORDERED_OR_LESS:
599 case SLJIT_UNORDERED_OR_GREATER:
600 return 0x82 /* jc */;
601
602 case SLJIT_GREATER_EQUAL:
603 case SLJIT_NOT_CARRY:
604 case SLJIT_F_GREATER_EQUAL:
605 case SLJIT_ORDERED_GREATER_EQUAL:
606 case SLJIT_ORDERED_LESS_EQUAL:
607 return 0x83 /* jae */;
608
609 case SLJIT_GREATER:
610 case SLJIT_F_GREATER:
611 case SLJIT_ORDERED_LESS:
612 case SLJIT_ORDERED_GREATER:
613 return 0x87 /* jnbe */;
614
615 case SLJIT_LESS_EQUAL:
616 case SLJIT_F_LESS_EQUAL:
617 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
618 case SLJIT_UNORDERED_OR_LESS_EQUAL:
619 return 0x86 /* jbe */;
620
621 case SLJIT_SIG_LESS:
622 return 0x8c /* jl */;
623
624 case SLJIT_SIG_GREATER_EQUAL:
625 return 0x8d /* jnl */;
626
627 case SLJIT_SIG_GREATER:
628 return 0x8f /* jnle */;
629
630 case SLJIT_SIG_LESS_EQUAL:
631 return 0x8e /* jle */;
632
633 case SLJIT_OVERFLOW:
634 return 0x80 /* jo */;
635
636 case SLJIT_NOT_OVERFLOW:
637 return 0x81 /* jno */;
638
639 case SLJIT_UNORDERED:
640 case SLJIT_ORDERED_EQUAL: /* NaN. */
641 return 0x8a /* jp */;
642
643 case SLJIT_ORDERED:
644 case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */
645 return 0x8b /* jpo */;
646 }
647 return 0;
648 }
649
650 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
651 static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
652 #else /* !SLJIT_CONFIG_X86_32 */
653 static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr);
654 static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset);
655 #endif /* SLJIT_CONFIG_X86_32 */
656
detect_near_jump_type(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_sw executable_offset)657 static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
658 {
659 sljit_uw type = jump->flags >> TYPE_SHIFT;
660 sljit_s32 short_jump;
661 sljit_uw label_addr;
662
663 if (jump->flags & JUMP_ADDR)
664 label_addr = jump->u.target - (sljit_uw)executable_offset;
665 else
666 label_addr = (sljit_uw)(code + jump->u.label->size);
667
668 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
669 if ((sljit_sw)(label_addr - (sljit_uw)(code_ptr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 5)) < HALFWORD_MIN)
670 return detect_far_jump_type(jump, code_ptr);
671 #endif /* SLJIT_CONFIG_X86_64 */
672
673 short_jump = (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) >= -0x80 && (sljit_sw)(label_addr - (sljit_uw)(code_ptr + 2)) <= 0x7f;
674
675 if (type == SLJIT_JUMP) {
676 if (short_jump)
677 *code_ptr++ = JMP_i8;
678 else
679 *code_ptr++ = JMP_i32;
680 } else if (type > SLJIT_JUMP) {
681 short_jump = 0;
682 *code_ptr++ = CALL_i32;
683 } else if (short_jump) {
684 *code_ptr++ = U8(get_jump_code(type) - 0x10);
685 } else {
686 *code_ptr++ = GROUP_0F;
687 *code_ptr++ = get_jump_code(type);
688 }
689
690 jump->addr = (sljit_uw)code_ptr;
691
692 if (short_jump) {
693 jump->flags |= PATCH_MB;
694 code_ptr += sizeof(sljit_s8);
695 } else {
696 jump->flags |= PATCH_MW;
697 code_ptr += sizeof(sljit_s32);
698 }
699
700 return code_ptr;
701 }
702
generate_jump_or_mov_addr(struct sljit_jump * jump,sljit_sw executable_offset)703 static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)
704 {
705 sljit_uw flags = jump->flags;
706 sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
707 sljit_uw jump_addr = jump->addr;
708 SLJIT_UNUSED_ARG(executable_offset);
709
710 if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) {
711 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
712 sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);
713 #else /* SLJIT_CONFIG_X86_32 */
714 if (flags & PATCH_MD) {
715 SLJIT_ASSERT(addr > HALFWORD_MAX);
716 sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);
717 return;
718 }
719
720 if (flags & PATCH_MW) {
721 addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);
722 SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);
723 } else {
724 SLJIT_ASSERT(addr <= HALFWORD_MAX);
725 }
726 sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr);
727 #endif /* !SLJIT_CONFIG_X86_32 */
728 return;
729 }
730
731 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
732 if (SLJIT_UNLIKELY(flags & PATCH_MD)) {
733 SLJIT_ASSERT(!(flags & JUMP_ADDR));
734 sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);
735 return;
736 }
737 #endif /* SLJIT_CONFIG_X86_64 */
738
739 addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);
740
741 if (flags & PATCH_MB) {
742 addr -= sizeof(sljit_s8);
743 SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80);
744 *(sljit_u8*)jump_addr = U8(addr);
745 return;
746 } else if (flags & PATCH_MW) {
747 addr -= sizeof(sljit_s32);
748 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
749 sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);
750 #else /* !SLJIT_CONFIG_X86_32 */
751 SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);
752 sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr);
753 #endif /* SLJIT_CONFIG_X86_32 */
754 }
755 }
756
reduce_code_size(struct sljit_compiler * compiler)757 static void reduce_code_size(struct sljit_compiler *compiler)
758 {
759 struct sljit_label *label;
760 struct sljit_jump *jump;
761 sljit_uw next_label_size;
762 sljit_uw next_jump_addr;
763 sljit_uw next_min_addr;
764 sljit_uw size_reduce = 0;
765 sljit_sw diff;
766 sljit_uw type;
767 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
768 sljit_uw size_reduce_max;
769 #endif /* SLJIT_DEBUG */
770
771 label = compiler->labels;
772 jump = compiler->jumps;
773
774 next_label_size = SLJIT_GET_NEXT_SIZE(label);
775 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
776
777 while (1) {
778 next_min_addr = next_label_size;
779 if (next_jump_addr < next_min_addr)
780 next_min_addr = next_jump_addr;
781
782 if (next_min_addr == SLJIT_MAX_ADDRESS)
783 break;
784
785 if (next_min_addr == next_label_size) {
786 label->size -= size_reduce;
787
788 label = label->next;
789 next_label_size = SLJIT_GET_NEXT_SIZE(label);
790 }
791
792 if (next_min_addr != next_jump_addr)
793 continue;
794
795 if (!(jump->flags & JUMP_MOV_ADDR)) {
796 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
797 size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE);
798 #endif /* SLJIT_DEBUG */
799
800 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
801 if (jump->flags & JUMP_ADDR) {
802 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
803 if (jump->u.target <= 0xffffffffl)
804 size_reduce += sizeof(sljit_s32);
805 #endif /* SLJIT_CONFIG_X86_64 */
806 } else {
807 /* Unit size: instruction. */
808 diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce);
809 type = jump->flags >> TYPE_SHIFT;
810
811 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
812 if (type == SLJIT_JUMP) {
813 if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
814 size_reduce += JUMP_MAX_SIZE - 2;
815 else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)
816 size_reduce += JUMP_MAX_SIZE - 5;
817 } else if (type < SLJIT_JUMP) {
818 if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
819 size_reduce += CJUMP_MAX_SIZE - 2;
820 else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6)
821 size_reduce += CJUMP_MAX_SIZE - 6;
822 } else {
823 if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)
824 size_reduce += JUMP_MAX_SIZE - 5;
825 }
826 #else /* !SLJIT_CONFIG_X86_64 */
827 if (type == SLJIT_JUMP) {
828 if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
829 size_reduce += JUMP_MAX_SIZE - 2;
830 } else if (type < SLJIT_JUMP) {
831 if (diff <= 0x7f + 2 && diff >= -0x80 + 2)
832 size_reduce += CJUMP_MAX_SIZE - 2;
833 }
834 #endif /* SLJIT_CONFIG_X86_64 */
835 }
836 }
837
838 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
839 jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;
840 #endif /* SLJIT_DEBUG */
841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
842 } else {
843 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
844 size_reduce_max = size_reduce + 10;
845 #endif /* SLJIT_DEBUG */
846
847 if (!(jump->flags & JUMP_ADDR)) {
848 diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - size_reduce - 3);
849
850 if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN)
851 size_reduce += 3;
852 } else if (jump->u.target <= 0xffffffffl)
853 size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5;
854
855 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
856 jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;
857 #endif /* SLJIT_DEBUG */
858 #endif /* SLJIT_CONFIG_X86_64 */
859 }
860
861 jump = jump->next;
862 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
863 }
864
865 compiler->size -= size_reduce;
866 }
867
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)868 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
869 {
870 struct sljit_memory_fragment *buf;
871 sljit_u8 *code;
872 sljit_u8 *code_ptr;
873 sljit_u8 *buf_ptr;
874 sljit_u8 *buf_end;
875 sljit_u8 len;
876 sljit_sw executable_offset;
877 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
878 sljit_uw addr;
879 #endif /* SLJIT_DEBUG */
880
881 struct sljit_label *label;
882 struct sljit_jump *jump;
883 struct sljit_const *const_;
884
885 CHECK_ERROR_PTR();
886 CHECK_PTR(check_sljit_generate_code(compiler));
887
888 reduce_code_size(compiler);
889
890 /* Second code generation pass. */
891 code = (sljit_u8*)allocate_executable_memory(compiler->size, options, exec_allocator_data, &executable_offset);
892 PTR_FAIL_WITH_EXEC_IF(code);
893
894 reverse_buf(compiler);
895 buf = compiler->buf;
896
897 code_ptr = code;
898 label = compiler->labels;
899 jump = compiler->jumps;
900 const_ = compiler->consts;
901
902 do {
903 buf_ptr = buf->memory;
904 buf_end = buf_ptr + buf->used_size;
905 do {
906 len = *buf_ptr++;
907 SLJIT_ASSERT(len > 0);
908 if (len < SLJIT_INST_CONST) {
909 /* The code is already generated. */
910 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
911 code_ptr += len;
912 buf_ptr += len;
913 } else {
914 switch (len) {
915 case SLJIT_INST_LABEL:
916 label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
917 label->size = (sljit_uw)(code_ptr - code);
918 label = label->next;
919 break;
920 case SLJIT_INST_JUMP:
921 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
922 addr = (sljit_uw)code_ptr;
923 #endif /* SLJIT_DEBUG */
924 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
925 code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset);
926 else {
927 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
928 code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset);
929 #else /* !SLJIT_CONFIG_X86_32 */
930 code_ptr = detect_far_jump_type(jump, code_ptr);
931 #endif /* SLJIT_CONFIG_X86_32 */
932 }
933
934 SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f));
935 jump = jump->next;
936 break;
937 case SLJIT_INST_MOV_ADDR:
938 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
939 code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset);
940 #endif /* SLJIT_CONFIG_X86_64 */
941 jump->addr = (sljit_uw)code_ptr;
942 jump = jump->next;
943 break;
944 default:
945 SLJIT_ASSERT(len == SLJIT_INST_CONST);
946 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
947 const_ = const_->next;
948 break;
949 }
950 }
951 } while (buf_ptr < buf_end);
952
953 SLJIT_ASSERT(buf_ptr == buf_end);
954 buf = buf->next;
955 } while (buf);
956
957 SLJIT_ASSERT(!label);
958 SLJIT_ASSERT(!jump);
959 SLJIT_ASSERT(!const_);
960 SLJIT_ASSERT(code_ptr <= code + compiler->size);
961
962 jump = compiler->jumps;
963 while (jump) {
964 generate_jump_or_mov_addr(jump, executable_offset);
965 jump = jump->next;
966 }
967
968 compiler->error = SLJIT_ERR_COMPILED;
969 compiler->executable_offset = executable_offset;
970 compiler->executable_size = (sljit_uw)(code_ptr - code);
971
972 code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
973
974 SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);
975 return (void*)code;
976 }
977
sljit_has_cpu_feature(sljit_s32 feature_type)978 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
979 {
980 switch (feature_type) {
981 case SLJIT_HAS_FPU:
982 #ifdef SLJIT_IS_FPU_AVAILABLE
983 return (SLJIT_IS_FPU_AVAILABLE) != 0;
984 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
985 if (cpu_feature_list == 0)
986 get_cpu_features();
987 return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
988 #else /* SLJIT_DETECT_SSE2 */
989 return 1;
990 #endif /* SLJIT_DETECT_SSE2 */
991
992 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
993 case SLJIT_HAS_VIRTUAL_REGISTERS:
994 return 1;
995 #endif /* SLJIT_CONFIG_X86_32 */
996
997 case SLJIT_HAS_CLZ:
998 if (cpu_feature_list == 0)
999 get_cpu_features();
1000
1001 return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;
1002
1003 case SLJIT_HAS_CTZ:
1004 if (cpu_feature_list == 0)
1005 get_cpu_features();
1006
1007 return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;
1008
1009 case SLJIT_HAS_CMOV:
1010 if (cpu_feature_list == 0)
1011 get_cpu_features();
1012 return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;
1013
1014 case SLJIT_HAS_REV:
1015 case SLJIT_HAS_ROT:
1016 case SLJIT_HAS_PREFETCH:
1017 case SLJIT_HAS_COPY_F32:
1018 case SLJIT_HAS_COPY_F64:
1019 case SLJIT_HAS_ATOMIC:
1020 return 1;
1021
1022 #if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE
1023 case SLJIT_HAS_AVX:
1024 if (cpu_feature_list == 0)
1025 get_cpu_features();
1026 return (cpu_feature_list & CPU_FEATURE_AVX) != 0;
1027 case SLJIT_HAS_AVX2:
1028 if (cpu_feature_list == 0)
1029 get_cpu_features();
1030 return (cpu_feature_list & CPU_FEATURE_AVX2) != 0;
1031 case SLJIT_HAS_SIMD:
1032 if (cpu_feature_list == 0)
1033 get_cpu_features();
1034 return (cpu_feature_list & CPU_FEATURE_SSE41) != 0;
1035 #endif /* SLJIT_IS_FPU_AVAILABLE */
1036 default:
1037 return 0;
1038 }
1039 }
1040
sljit_cmp_info(sljit_s32 type)1041 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1042 {
1043 switch (type) {
1044 case SLJIT_ORDERED_EQUAL:
1045 case SLJIT_UNORDERED_OR_NOT_EQUAL:
1046 return 2;
1047 }
1048
1049 return 0;
1050 }
1051
1052 /* --------------------------------------------------------------------- */
1053 /* Operators */
1054 /* --------------------------------------------------------------------- */
1055
1056 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
1057
1058 #define BINARY_IMM32(op_imm, immw, arg, argw) \
1059 do { \
1060 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1061 FAIL_IF(!inst); \
1062 *(inst + 1) |= (op_imm); \
1063 } while (0)
1064
1065 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1066
1067 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1068 do { \
1069 if (IS_HALFWORD(immw) || compiler->mode32) { \
1070 BINARY_IMM32(op_imm, immw, arg, argw); \
1071 } \
1072 else { \
1073 FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \
1074 inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1075 FAIL_IF(!inst); \
1076 *inst = (op_mr); \
1077 } \
1078 } while (0)
1079
1080 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1081 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1082
1083 #else /* !SLJIT_CONFIG_X86_64 */
1084
1085 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1086 BINARY_IMM32(op_imm, immw, arg, argw)
1087
1088 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1089 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1090
1091 #endif /* SLJIT_CONFIG_X86_64 */
1092
emit_byte(struct sljit_compiler * compiler,sljit_u8 byte)1093 static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte)
1094 {
1095 sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1096 FAIL_IF(!inst);
1097 INC_SIZE(1);
1098 *inst = byte;
1099 return SLJIT_SUCCESS;
1100 }
1101
1102 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1103 sljit_s32 dst, sljit_sw dstw,
1104 sljit_s32 src, sljit_sw srcw);
1105
1106 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
1107 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1108
1109 static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
1110 sljit_uw op,
1111 sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1112
1113 static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
1114 sljit_uw op,
1115 sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1116
1117 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
1118 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
1119
1120 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
1121 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
1122
1123 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1124 sljit_s32 src1, sljit_sw src1w,
1125 sljit_s32 src2, sljit_sw src2w);
1126
1127 static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1128 sljit_s32 dst_reg,
1129 sljit_s32 src, sljit_sw srcw);
1130
emit_endbranch(struct sljit_compiler * compiler)1131 static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
1132 {
1133 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
1134 /* Emit endbr32/endbr64 when CET is enabled. */
1135 sljit_u8 *inst;
1136 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1137 FAIL_IF(!inst);
1138 INC_SIZE(4);
1139 inst[0] = GROUP_F3;
1140 inst[1] = GROUP_0F;
1141 inst[2] = 0x1e;
1142 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1143 inst[3] = 0xfb;
1144 #else /* !SLJIT_CONFIG_X86_32 */
1145 inst[3] = 0xfa;
1146 #endif /* SLJIT_CONFIG_X86_32 */
1147 #else /* !SLJIT_CONFIG_X86_CET */
1148 SLJIT_UNUSED_ARG(compiler);
1149 #endif /* SLJIT_CONFIG_X86_CET */
1150 return SLJIT_SUCCESS;
1151 }
1152
1153 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1154
emit_rdssp(struct sljit_compiler * compiler,sljit_s32 reg)1155 static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
1156 {
1157 sljit_u8 *inst;
1158 sljit_s32 size;
1159
1160 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1161 size = 5;
1162 #else
1163 size = 4;
1164 #endif
1165
1166 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1167 FAIL_IF(!inst);
1168 INC_SIZE(size);
1169 *inst++ = GROUP_F3;
1170 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1171 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1172 #endif
1173 inst[0] = GROUP_0F;
1174 inst[1] = 0x1e;
1175 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1176 inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]);
1177 #else
1178 inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]);
1179 #endif
1180 return SLJIT_SUCCESS;
1181 }
1182
emit_incssp(struct sljit_compiler * compiler,sljit_s32 reg)1183 static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
1184 {
1185 sljit_u8 *inst;
1186 sljit_s32 size;
1187
1188 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1189 size = 5;
1190 #else
1191 size = 4;
1192 #endif
1193
1194 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1195 FAIL_IF(!inst);
1196 INC_SIZE(size);
1197 *inst++ = GROUP_F3;
1198 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1199 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1200 #endif
1201 inst[0] = GROUP_0F;
1202 inst[1] = 0xae;
1203 inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
1204 return SLJIT_SUCCESS;
1205 }
1206
1207 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1208
cpu_has_shadow_stack(void)1209 static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
1210 {
1211 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1212 return _get_ssp() != 0;
1213 #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1214 return 0;
1215 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1216 }
1217
adjust_shadow_stack(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1218 static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
1219 sljit_s32 src, sljit_sw srcw)
1220 {
1221 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1222 sljit_u8 *inst, *jz_after_cmp_inst;
1223 sljit_uw size_jz_after_cmp_inst;
1224
1225 sljit_uw size_before_rdssp_inst = compiler->size;
1226
1227 /* Generate "RDSSP TMP_REG1". */
1228 FAIL_IF(emit_rdssp(compiler, TMP_REG1));
1229
1230 /* Load return address on shadow stack into TMP_REG1. */
1231 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
1232
1233 /* Compare return address against TMP_REG1. */
1234 FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
1235
1236 /* Generate JZ to skip shadow stack ajdustment when shadow
1237 stack matches normal stack. */
1238 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1239 FAIL_IF(!inst);
1240 INC_SIZE(2);
1241 *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
1242 size_jz_after_cmp_inst = compiler->size;
1243 jz_after_cmp_inst = inst;
1244
1245 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1246 /* REX_W is not necessary. */
1247 compiler->mode32 = 1;
1248 #endif
1249 /* Load 1 into TMP_REG1. */
1250 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
1251
1252 /* Generate "INCSSP TMP_REG1". */
1253 FAIL_IF(emit_incssp(compiler, TMP_REG1));
1254
1255 /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
1256 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1257 FAIL_IF(!inst);
1258 INC_SIZE(2);
1259 inst[0] = JMP_i8;
1260 inst[1] = size_before_rdssp_inst - compiler->size;
1261
1262 *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
1263 #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1264 SLJIT_UNUSED_ARG(compiler);
1265 SLJIT_UNUSED_ARG(src);
1266 SLJIT_UNUSED_ARG(srcw);
1267 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1268 return SLJIT_SUCCESS;
1269 }
1270
1271 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1272 #include "sljitNativeX86_32.c"
1273 #else
1274 #include "sljitNativeX86_64.c"
1275 #endif
1276
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1277 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1278 sljit_s32 dst, sljit_sw dstw,
1279 sljit_s32 src, sljit_sw srcw)
1280 {
1281 sljit_u8* inst;
1282
1283 if (FAST_IS_REG(src)) {
1284 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
1285 FAIL_IF(!inst);
1286 *inst = MOV_rm_r;
1287 return SLJIT_SUCCESS;
1288 }
1289
1290 if (src == SLJIT_IMM) {
1291 if (FAST_IS_REG(dst)) {
1292 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1293 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1294 #else
1295 if (!compiler->mode32) {
1296 if (NOT_HALFWORD(srcw))
1297 return emit_load_imm64(compiler, dst, srcw);
1298 }
1299 else
1300 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
1301 #endif
1302 }
1303 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1304 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
1305 /* Immediate to memory move. Only SLJIT_MOV operation copies
1306 an immediate directly into memory so TMP_REG1 can be used. */
1307 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
1308 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1309 FAIL_IF(!inst);
1310 *inst = MOV_rm_r;
1311 return SLJIT_SUCCESS;
1312 }
1313 #endif
1314 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
1315 FAIL_IF(!inst);
1316 *inst = MOV_rm_i32;
1317 return SLJIT_SUCCESS;
1318 }
1319 if (FAST_IS_REG(dst)) {
1320 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
1321 FAIL_IF(!inst);
1322 *inst = MOV_r_rm;
1323 return SLJIT_SUCCESS;
1324 }
1325
1326 /* Memory to memory move. Only SLJIT_MOV operation copies
1327 data from memory to memory so TMP_REG1 can be used. */
1328 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
1329 FAIL_IF(!inst);
1330 *inst = MOV_r_rm;
1331 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1332 FAIL_IF(!inst);
1333 *inst = MOV_rm_r;
1334 return SLJIT_SUCCESS;
1335 }
1336
emit_cmov_generic(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)1337 static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1338 sljit_s32 dst_reg,
1339 sljit_s32 src, sljit_sw srcw)
1340 {
1341 sljit_u8* inst;
1342 sljit_uw size;
1343
1344 SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);
1345
1346 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1347 FAIL_IF(!inst);
1348 INC_SIZE(2);
1349 inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10);
1350
1351 size = compiler->size;
1352 EMIT_MOV(compiler, dst_reg, 0, src, srcw);
1353
1354 inst[1] = U8(compiler->size - size);
1355 return SLJIT_SUCCESS;
1356 }
1357
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1358 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1359 {
1360 sljit_u8 *inst;
1361 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1362 sljit_uw size;
1363 #endif
1364
1365 CHECK_ERROR();
1366 CHECK(check_sljit_emit_op0(compiler, op));
1367
1368 switch (GET_OPCODE(op)) {
1369 case SLJIT_BREAKPOINT:
1370 return emit_byte(compiler, INT3);
1371 case SLJIT_NOP:
1372 return emit_byte(compiler, NOP);
1373 case SLJIT_LMUL_UW:
1374 case SLJIT_LMUL_SW:
1375 case SLJIT_DIVMOD_UW:
1376 case SLJIT_DIVMOD_SW:
1377 case SLJIT_DIV_UW:
1378 case SLJIT_DIV_SW:
1379 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1380 #ifdef _WIN64
1381 SLJIT_ASSERT(
1382 reg_map[SLJIT_R0] == 0
1383 && reg_map[SLJIT_R1] == 2
1384 && reg_map[TMP_REG1] > 7);
1385 #else
1386 SLJIT_ASSERT(
1387 reg_map[SLJIT_R0] == 0
1388 && reg_map[SLJIT_R1] < 7
1389 && reg_map[TMP_REG1] == 2);
1390 #endif
1391 compiler->mode32 = op & SLJIT_32;
1392 #endif
1393 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1394
1395 op = GET_OPCODE(op);
1396 if ((op | 0x2) == SLJIT_DIV_UW) {
1397 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1398 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1399 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
1400 #else
1401 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1402 #endif
1403 FAIL_IF(!inst);
1404 *inst = XOR_r_rm;
1405 }
1406
1407 if ((op | 0x2) == SLJIT_DIV_SW) {
1408 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1409 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1410 #endif
1411
1412 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1413 FAIL_IF(emit_byte(compiler, CDQ));
1414 #else
1415 if (!compiler->mode32) {
1416 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1417 FAIL_IF(!inst);
1418 INC_SIZE(2);
1419 inst[0] = REX_W;
1420 inst[1] = CDQ;
1421 } else
1422 FAIL_IF(emit_byte(compiler, CDQ));
1423 #endif
1424 }
1425
1426 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1427 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1428 FAIL_IF(!inst);
1429 INC_SIZE(2);
1430 inst[0] = GROUP_F7;
1431 inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
1432 #else /* !SLJIT_CONFIG_X86_32 */
1433 #ifdef _WIN64
1434 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
1435 #else /* !_WIN64 */
1436 size = (!compiler->mode32) ? 3 : 2;
1437 #endif /* _WIN64 */
1438 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1439 FAIL_IF(!inst);
1440 INC_SIZE(size);
1441 #ifdef _WIN64
1442 if (!compiler->mode32)
1443 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
1444 else if (op >= SLJIT_DIVMOD_UW)
1445 *inst++ = REX_B;
1446 inst[0] = GROUP_F7;
1447 inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
1448 #else /* !_WIN64 */
1449 if (!compiler->mode32)
1450 *inst++ = REX_W;
1451 inst[0] = GROUP_F7;
1452 inst[1] = MOD_REG | reg_map[SLJIT_R1];
1453 #endif /* _WIN64 */
1454 #endif /* SLJIT_CONFIG_X86_32 */
1455 switch (op) {
1456 case SLJIT_LMUL_UW:
1457 inst[1] |= MUL;
1458 break;
1459 case SLJIT_LMUL_SW:
1460 inst[1] |= IMUL;
1461 break;
1462 case SLJIT_DIVMOD_UW:
1463 case SLJIT_DIV_UW:
1464 inst[1] |= DIV;
1465 break;
1466 case SLJIT_DIVMOD_SW:
1467 case SLJIT_DIV_SW:
1468 inst[1] |= IDIV;
1469 break;
1470 }
1471 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
1472 if (op <= SLJIT_DIVMOD_SW)
1473 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1474 #else
1475 if (op >= SLJIT_DIV_UW)
1476 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1477 #endif
1478 break;
1479 case SLJIT_ENDBR:
1480 return emit_endbranch(compiler);
1481 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1482 return skip_frames_before_return(compiler);
1483 }
1484
1485 return SLJIT_SUCCESS;
1486 }
1487
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1488 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
1489 sljit_s32 dst, sljit_sw dstw,
1490 sljit_s32 src, sljit_sw srcw)
1491 {
1492 sljit_u8* inst;
1493 sljit_s32 dst_r;
1494
1495 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1496 compiler->mode32 = 0;
1497 #endif
1498
1499 if (src == SLJIT_IMM) {
1500 if (FAST_IS_REG(dst)) {
1501 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1502 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1503 #else
1504 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1505 FAIL_IF(!inst);
1506 *inst = MOV_rm_i32;
1507 return SLJIT_SUCCESS;
1508 #endif
1509 }
1510 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
1511 FAIL_IF(!inst);
1512 *inst = MOV_rm8_i8;
1513 return SLJIT_SUCCESS;
1514 }
1515
1516 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1517
1518 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
1519 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1520 if (reg_map[src] >= 4) {
1521 SLJIT_ASSERT(dst_r == TMP_REG1);
1522 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1523 } else
1524 dst_r = src;
1525 #else
1526 dst_r = src;
1527 #endif
1528 } else {
1529 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1530 if (FAST_IS_REG(src) && reg_map[src] >= 4) {
1531 /* Both src and dst are registers. */
1532 SLJIT_ASSERT(FAST_IS_REG(dst));
1533
1534 if (src == dst && !sign) {
1535 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
1536 FAIL_IF(!inst);
1537 *(inst + 1) |= AND;
1538 return SLJIT_SUCCESS;
1539 }
1540
1541 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1542 src = TMP_REG1;
1543 srcw = 0;
1544 }
1545 #endif /* !SLJIT_CONFIG_X86_32 */
1546
1547 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1548 FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw));
1549 }
1550
1551 if (dst & SLJIT_MEM) {
1552 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1553 FAIL_IF(!inst);
1554 *inst = MOV_rm8_r8;
1555 }
1556
1557 return SLJIT_SUCCESS;
1558 }
1559
emit_prefetch(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1560 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1561 sljit_s32 src, sljit_sw srcw)
1562 {
1563 sljit_u8* inst;
1564
1565 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1566 compiler->mode32 = 1;
1567 #endif
1568
1569 inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1570 FAIL_IF(!inst);
1571 inst[0] = GROUP_0F;
1572 inst[1] = PREFETCH;
1573
1574 if (op == SLJIT_PREFETCH_L1)
1575 inst[2] |= (1 << 3);
1576 else if (op == SLJIT_PREFETCH_L2)
1577 inst[2] |= (2 << 3);
1578 else if (op == SLJIT_PREFETCH_L3)
1579 inst[2] |= (3 << 3);
1580
1581 return SLJIT_SUCCESS;
1582 }
1583
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1584 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1585 sljit_s32 dst, sljit_sw dstw,
1586 sljit_s32 src, sljit_sw srcw)
1587 {
1588 sljit_u8* inst;
1589 sljit_s32 dst_r;
1590
1591 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1592 compiler->mode32 = 0;
1593 #endif
1594
1595 if (src == SLJIT_IMM) {
1596 if (FAST_IS_REG(dst)) {
1597 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1598 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1599 #else
1600 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1601 FAIL_IF(!inst);
1602 *inst = MOV_rm_i32;
1603 return SLJIT_SUCCESS;
1604 #endif
1605 }
1606 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1607 FAIL_IF(!inst);
1608 *inst = MOV_rm_i32;
1609 return SLJIT_SUCCESS;
1610 }
1611
1612 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1613
1614 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1615 dst_r = src;
1616 else
1617 FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw));
1618
1619 if (dst & SLJIT_MEM) {
1620 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1621 FAIL_IF(!inst);
1622 *inst = MOV_rm_r;
1623 }
1624
1625 return SLJIT_SUCCESS;
1626 }
1627
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1628 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1629 sljit_s32 dst, sljit_sw dstw,
1630 sljit_s32 src, sljit_sw srcw)
1631 {
1632 sljit_u8* inst;
1633
1634 if (dst == src && dstw == srcw) {
1635 /* Same input and output */
1636 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1637 FAIL_IF(!inst);
1638 inst[0] = GROUP_F7;
1639 inst[1] |= opcode;
1640 return SLJIT_SUCCESS;
1641 }
1642
1643 if (FAST_IS_REG(dst)) {
1644 EMIT_MOV(compiler, dst, 0, src, srcw);
1645 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1646 FAIL_IF(!inst);
1647 inst[0] = GROUP_F7;
1648 inst[1] |= opcode;
1649 return SLJIT_SUCCESS;
1650 }
1651
1652 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1653 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1654 FAIL_IF(!inst);
1655 inst[0] = GROUP_F7;
1656 inst[1] |= opcode;
1657 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1658 return SLJIT_SUCCESS;
1659 }
1660
1661 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1662 static const sljit_sw emit_clz_arg = 32 + 31;
1663 static const sljit_sw emit_ctz_arg = 32;
1664 #endif
1665
emit_clz_ctz(struct sljit_compiler * compiler,sljit_s32 is_clz,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1666 static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
1667 sljit_s32 dst, sljit_sw dstw,
1668 sljit_s32 src, sljit_sw srcw)
1669 {
1670 sljit_u8* inst;
1671 sljit_s32 dst_r;
1672 sljit_sw max;
1673
1674 SLJIT_ASSERT(cpu_feature_list != 0);
1675
1676 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1677
1678 if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
1679 FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw));
1680
1681 if (dst & SLJIT_MEM)
1682 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1683 return SLJIT_SUCCESS;
1684 }
1685
1686 FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw));
1687
1688 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1689 max = is_clz ? (32 + 31) : 32;
1690
1691 if (cpu_feature_list & CPU_FEATURE_CMOV) {
1692 if (dst_r != TMP_REG1) {
1693 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);
1694 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1695 }
1696 else
1697 inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);
1698
1699 FAIL_IF(!inst);
1700 inst[0] = GROUP_0F;
1701 inst[1] = CMOVE_r_rm;
1702 }
1703 else
1704 FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1705
1706 if (is_clz) {
1707 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1708 FAIL_IF(!inst);
1709 *(inst + 1) |= XOR;
1710 }
1711 #else
1712 if (is_clz)
1713 max = compiler->mode32 ? (32 + 31) : (64 + 63);
1714 else
1715 max = compiler->mode32 ? 32 : 64;
1716
1717 if (cpu_feature_list & CPU_FEATURE_CMOV) {
1718 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
1719 FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0));
1720 } else
1721 FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1722
1723 if (is_clz) {
1724 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);
1725 FAIL_IF(!inst);
1726 *(inst + 1) |= XOR;
1727 }
1728 #endif
1729
1730 if (dst & SLJIT_MEM)
1731 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1732 return SLJIT_SUCCESS;
1733 }
1734
emit_bswap(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1735 static sljit_s32 emit_bswap(struct sljit_compiler *compiler,
1736 sljit_s32 op,
1737 sljit_s32 dst, sljit_sw dstw,
1738 sljit_s32 src, sljit_sw srcw)
1739 {
1740 sljit_u8 *inst;
1741 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1742 sljit_uw size;
1743 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1744 sljit_u8 rex = 0;
1745 #else /* !SLJIT_CONFIG_X86_64 */
1746 sljit_s32 dst_is_ereg = op & SLJIT_32;
1747 #endif /* SLJIT_CONFIG_X86_64 */
1748
1749 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1750 if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32)
1751 compiler->mode32 = 1;
1752 #else /* !SLJIT_CONFIG_X86_64 */
1753 op &= ~SLJIT_32;
1754 #endif /* SLJIT_CONFIG_X86_64 */
1755
1756 if (src != dst_r) {
1757 /* Only the lower 16 bit is read for eregs. */
1758 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1759 FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw));
1760 else
1761 EMIT_MOV(compiler, dst_r, 0, src, srcw);
1762 }
1763
1764 size = 2;
1765 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1766 if (!compiler->mode32)
1767 rex = REX_W;
1768
1769 if (reg_map[dst_r] >= 8)
1770 rex |= REX_B;
1771
1772 if (rex != 0)
1773 size++;
1774 #endif /* SLJIT_CONFIG_X86_64 */
1775
1776 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1777 FAIL_IF(!inst);
1778 INC_SIZE(size);
1779
1780 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1781 if (rex != 0)
1782 *inst++ = rex;
1783
1784 inst[0] = GROUP_0F;
1785 inst[1] = BSWAP_r | reg_lmap[dst_r];
1786 #else /* !SLJIT_CONFIG_X86_64 */
1787 inst[0] = GROUP_0F;
1788 inst[1] = BSWAP_r | reg_map[dst_r];
1789 #endif /* SLJIT_CONFIG_X86_64 */
1790
1791 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
1792 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1793 size = compiler->mode32 ? 16 : 48;
1794 #else /* !SLJIT_CONFIG_X86_64 */
1795 size = 16;
1796 #endif /* SLJIT_CONFIG_X86_64 */
1797
1798 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0);
1799 FAIL_IF(!inst);
1800 if (op == SLJIT_REV_U16)
1801 inst[1] |= SHR;
1802 else
1803 inst[1] |= SAR;
1804 }
1805
1806 if (dst & SLJIT_MEM) {
1807 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1808 if (dst_is_ereg)
1809 op = SLJIT_REV;
1810 #endif /* SLJIT_CONFIG_X86_32 */
1811 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1812 return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0);
1813
1814 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1815 }
1816
1817 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1818 if (op == SLJIT_REV_S32) {
1819 compiler->mode32 = 0;
1820 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1821 FAIL_IF(!inst);
1822 *inst = MOVSXD_r_rm;
1823 }
1824 #endif /* SLJIT_CONFIG_X86_64 */
1825
1826 return SLJIT_SUCCESS;
1827 }
1828
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1829 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1830 sljit_s32 dst, sljit_sw dstw,
1831 sljit_s32 src, sljit_sw srcw)
1832 {
1833 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1834 sljit_s32 dst_is_ereg = 0;
1835 #else /* !SLJIT_CONFIG_X86_32 */
1836 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1837 #endif /* SLJIT_CONFIG_X86_32 */
1838
1839 CHECK_ERROR();
1840 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1841 ADJUST_LOCAL_OFFSET(dst, dstw);
1842 ADJUST_LOCAL_OFFSET(src, srcw);
1843
1844 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1845 CHECK_EXTRA_REGS(src, srcw, (void)0);
1846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1847 compiler->mode32 = op_flags & SLJIT_32;
1848 #endif /* SLJIT_CONFIG_X86_64 */
1849
1850 op = GET_OPCODE(op);
1851
1852 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1853 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1854 compiler->mode32 = 0;
1855 #endif /* SLJIT_CONFIG_X86_64 */
1856
1857 if (FAST_IS_REG(src) && src == dst) {
1858 if (!TYPE_CAST_NEEDED(op))
1859 return SLJIT_SUCCESS;
1860 }
1861
1862 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1863 if (op_flags & SLJIT_32) {
1864 if (src & SLJIT_MEM) {
1865 if (op == SLJIT_MOV_S32)
1866 op = SLJIT_MOV_U32;
1867 }
1868 else if (src == SLJIT_IMM) {
1869 if (op == SLJIT_MOV_U32)
1870 op = SLJIT_MOV_S32;
1871 }
1872 }
1873 #endif /* SLJIT_CONFIG_X86_64 */
1874
1875 if (src == SLJIT_IMM) {
1876 switch (op) {
1877 case SLJIT_MOV_U8:
1878 srcw = (sljit_u8)srcw;
1879 break;
1880 case SLJIT_MOV_S8:
1881 srcw = (sljit_s8)srcw;
1882 break;
1883 case SLJIT_MOV_U16:
1884 srcw = (sljit_u16)srcw;
1885 break;
1886 case SLJIT_MOV_S16:
1887 srcw = (sljit_s16)srcw;
1888 break;
1889 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1890 case SLJIT_MOV_U32:
1891 srcw = (sljit_u32)srcw;
1892 break;
1893 case SLJIT_MOV_S32:
1894 srcw = (sljit_s32)srcw;
1895 break;
1896 #endif /* SLJIT_CONFIG_X86_64 */
1897 }
1898 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1899 if (SLJIT_UNLIKELY(dst_is_ereg))
1900 return emit_mov(compiler, dst, dstw, src, srcw);
1901 #endif /* SLJIT_CONFIG_X86_32 */
1902 }
1903
1904 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1905 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1906 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1907 dst = TMP_REG1;
1908 }
1909 #endif /* SLJIT_CONFIG_X86_32 */
1910
1911 switch (op) {
1912 case SLJIT_MOV:
1913 case SLJIT_MOV_P:
1914 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1915 case SLJIT_MOV_U32:
1916 case SLJIT_MOV_S32:
1917 case SLJIT_MOV32:
1918 #endif /* SLJIT_CONFIG_X86_32 */
1919 EMIT_MOV(compiler, dst, dstw, src, srcw);
1920 break;
1921 case SLJIT_MOV_U8:
1922 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1923 break;
1924 case SLJIT_MOV_S8:
1925 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1926 break;
1927 case SLJIT_MOV_U16:
1928 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1929 break;
1930 case SLJIT_MOV_S16:
1931 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1932 break;
1933 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1934 case SLJIT_MOV_U32:
1935 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1936 break;
1937 case SLJIT_MOV_S32:
1938 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1939 break;
1940 case SLJIT_MOV32:
1941 compiler->mode32 = 1;
1942 EMIT_MOV(compiler, dst, dstw, src, srcw);
1943 compiler->mode32 = 0;
1944 break;
1945 #endif /* SLJIT_CONFIG_X86_64 */
1946 }
1947
1948 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1949 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1950 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1951 #endif /* SLJIT_CONFIG_X86_32 */
1952 return SLJIT_SUCCESS;
1953 }
1954
1955 switch (op) {
1956 case SLJIT_CLZ:
1957 case SLJIT_CTZ:
1958 return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);
1959 case SLJIT_REV:
1960 case SLJIT_REV_U16:
1961 case SLJIT_REV_S16:
1962 case SLJIT_REV_U32:
1963 case SLJIT_REV_S32:
1964 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1965 if (dst_is_ereg)
1966 op |= SLJIT_32;
1967 #endif /* SLJIT_CONFIG_X86_32 */
1968 return emit_bswap(compiler, op, dst, dstw, src, srcw);
1969 }
1970
1971 return SLJIT_SUCCESS;
1972 }
1973
emit_cum_binary(struct sljit_compiler * compiler,sljit_u32 op_types,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1974 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1975 sljit_u32 op_types,
1976 sljit_s32 dst, sljit_sw dstw,
1977 sljit_s32 src1, sljit_sw src1w,
1978 sljit_s32 src2, sljit_sw src2w)
1979 {
1980 sljit_u8* inst;
1981 sljit_u8 op_eax_imm = U8(op_types >> 24);
1982 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1983 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1984 sljit_u8 op_imm = U8(op_types & 0xff);
1985
1986 if (dst == src1 && dstw == src1w) {
1987 if (src2 == SLJIT_IMM) {
1988 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1989 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1990 #else
1991 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1992 #endif
1993 BINARY_EAX_IMM(op_eax_imm, src2w);
1994 }
1995 else {
1996 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1997 }
1998 }
1999 else if (FAST_IS_REG(dst)) {
2000 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
2001 FAIL_IF(!inst);
2002 *inst = op_rm;
2003 }
2004 else if (FAST_IS_REG(src2)) {
2005 /* Special exception for sljit_emit_op_flags. */
2006 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
2007 FAIL_IF(!inst);
2008 *inst = op_mr;
2009 }
2010 else {
2011 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
2012 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2013 FAIL_IF(!inst);
2014 *inst = op_mr;
2015 }
2016 return SLJIT_SUCCESS;
2017 }
2018
2019 /* Only for cumulative operations. */
2020 if (dst == src2 && dstw == src2w) {
2021 if (src1 == SLJIT_IMM) {
2022 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2023 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2024 #else
2025 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
2026 #endif
2027 BINARY_EAX_IMM(op_eax_imm, src1w);
2028 }
2029 else {
2030 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
2031 }
2032 }
2033 else if (FAST_IS_REG(dst)) {
2034 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
2035 FAIL_IF(!inst);
2036 *inst = op_rm;
2037 }
2038 else if (FAST_IS_REG(src1)) {
2039 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
2040 FAIL_IF(!inst);
2041 *inst = op_mr;
2042 }
2043 else {
2044 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2045 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2046 FAIL_IF(!inst);
2047 *inst = op_mr;
2048 }
2049 return SLJIT_SUCCESS;
2050 }
2051
2052 /* General version. */
2053 if (FAST_IS_REG(dst)) {
2054 EMIT_MOV(compiler, dst, 0, src1, src1w);
2055 if (src2 == SLJIT_IMM) {
2056 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
2057 }
2058 else {
2059 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
2060 FAIL_IF(!inst);
2061 *inst = op_rm;
2062 }
2063 }
2064 else {
2065 /* This version requires less memory writing. */
2066 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2067 if (src2 == SLJIT_IMM) {
2068 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
2069 }
2070 else {
2071 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2072 FAIL_IF(!inst);
2073 *inst = op_rm;
2074 }
2075 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2076 }
2077
2078 return SLJIT_SUCCESS;
2079 }
2080
2081 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
2082 sljit_u32 op_types,
2083 sljit_s32 dst, sljit_sw dstw,
2084 sljit_s32 src1, sljit_sw src1w,
2085 sljit_s32 src2, sljit_sw src2w)
2086 {
2087 sljit_u8* inst;
2088 sljit_u8 op_eax_imm = U8(op_types >> 24);
2089 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
2090 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
2091 sljit_u8 op_imm = U8(op_types & 0xff);
2092
2093 if (dst == src1 && dstw == src1w) {
2094 if (src2 == SLJIT_IMM) {
2095 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2096 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2097 #else
2098 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
2099 #endif
2100 BINARY_EAX_IMM(op_eax_imm, src2w);
2101 }
2102 else {
2103 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
2104 }
2105 }
2106 else if (FAST_IS_REG(dst)) {
2107 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
2108 FAIL_IF(!inst);
2109 *inst = op_rm;
2110 }
2111 else if (FAST_IS_REG(src2)) {
2112 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
2113 FAIL_IF(!inst);
2114 *inst = op_mr;
2115 }
2116 else {
2117 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
2118 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
2119 FAIL_IF(!inst);
2120 *inst = op_mr;
2121 }
2122 return SLJIT_SUCCESS;
2123 }
2124
2125 /* General version. */
2126 if (FAST_IS_REG(dst) && dst != src2) {
2127 EMIT_MOV(compiler, dst, 0, src1, src1w);
2128 if (src2 == SLJIT_IMM) {
2129 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
2130 }
2131 else {
2132 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
2133 FAIL_IF(!inst);
2134 *inst = op_rm;
2135 }
2136 }
2137 else {
2138 /* This version requires less memory writing. */
2139 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2140 if (src2 == SLJIT_IMM) {
2141 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
2142 }
2143 else {
2144 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2145 FAIL_IF(!inst);
2146 *inst = op_rm;
2147 }
2148 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2149 }
2150
2151 return SLJIT_SUCCESS;
2152 }
2153
2154 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
2155 sljit_s32 dst, sljit_sw dstw,
2156 sljit_s32 src1, sljit_sw src1w,
2157 sljit_s32 src2, sljit_sw src2w)
2158 {
2159 sljit_u8* inst;
2160 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2161
2162 /* Register destination. */
2163 if (dst_r == src1 && src2 != SLJIT_IMM) {
2164 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2165 } else if (dst_r == src2 && src1 != SLJIT_IMM) {
2166 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w));
2167 } else if (src1 == SLJIT_IMM) {
2168 if (src2 == SLJIT_IMM) {
2169 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
2170 src2 = dst_r;
2171 src2w = 0;
2172 }
2173
2174 if (src1w <= 127 && src1w >= -128) {
2175 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2176 FAIL_IF(!inst);
2177 *inst = IMUL_r_rm_i8;
2178
2179 FAIL_IF(emit_byte(compiler, U8(src1w)));
2180 }
2181 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2182 else {
2183 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2184 FAIL_IF(!inst);
2185 *inst = IMUL_r_rm_i32;
2186 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2187 FAIL_IF(!inst);
2188 INC_SIZE(4);
2189 sljit_unaligned_store_sw(inst, src1w);
2190 }
2191 #else
2192 else if (IS_HALFWORD(src1w)) {
2193 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2194 FAIL_IF(!inst);
2195 *inst = IMUL_r_rm_i32;
2196 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2197 FAIL_IF(!inst);
2198 INC_SIZE(4);
2199 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
2200 }
2201 else {
2202 if (dst_r != src2)
2203 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
2204 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
2205 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2206 }
2207 #endif
2208 }
2209 else if (src2 == SLJIT_IMM) {
2210 /* Note: src1 is NOT immediate. */
2211
2212 if (src2w <= 127 && src2w >= -128) {
2213 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2214 FAIL_IF(!inst);
2215 *inst = IMUL_r_rm_i8;
2216
2217 FAIL_IF(emit_byte(compiler, U8(src2w)));
2218 }
2219 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2220 else {
2221 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2222 FAIL_IF(!inst);
2223 *inst = IMUL_r_rm_i32;
2224
2225 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2226 FAIL_IF(!inst);
2227 INC_SIZE(4);
2228 sljit_unaligned_store_sw(inst, src2w);
2229 }
2230 #else
2231 else if (IS_HALFWORD(src2w)) {
2232 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2233 FAIL_IF(!inst);
2234 *inst = IMUL_r_rm_i32;
2235
2236 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2237 FAIL_IF(!inst);
2238 INC_SIZE(4);
2239 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
2240 } else {
2241 if (dst_r != src1)
2242 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2243 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2244 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2245 }
2246 #endif
2247 } else {
2248 /* Neither argument is immediate. */
2249 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
2250 dst_r = TMP_REG1;
2251 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2252 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2253 }
2254
2255 if (dst & SLJIT_MEM)
2256 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2257
2258 return SLJIT_SUCCESS;
2259 }
2260
2261 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
2262 sljit_s32 dst, sljit_sw dstw,
2263 sljit_s32 src1, sljit_sw src1w,
2264 sljit_s32 src2, sljit_sw src2w)
2265 {
2266 sljit_u8* inst;
2267 sljit_s32 dst_r, done = 0;
2268
2269 /* These cases better be left to handled by normal way. */
2270 if (dst == src1 && dstw == src1w)
2271 return SLJIT_ERR_UNSUPPORTED;
2272 if (dst == src2 && dstw == src2w)
2273 return SLJIT_ERR_UNSUPPORTED;
2274
2275 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2276
2277 if (FAST_IS_REG(src1)) {
2278 if (FAST_IS_REG(src2)) {
2279 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
2280 FAIL_IF(!inst);
2281 *inst = LEA_r_m;
2282 done = 1;
2283 }
2284 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2285 if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) {
2286 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
2287 #else
2288 if (src2 == SLJIT_IMM) {
2289 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
2290 #endif
2291 FAIL_IF(!inst);
2292 *inst = LEA_r_m;
2293 done = 1;
2294 }
2295 }
2296 else if (FAST_IS_REG(src2)) {
2297 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2298 if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) {
2299 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
2300 #else
2301 if (src1 == SLJIT_IMM) {
2302 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
2303 #endif
2304 FAIL_IF(!inst);
2305 *inst = LEA_r_m;
2306 done = 1;
2307 }
2308 }
2309
2310 if (done) {
2311 if (dst_r == TMP_REG1)
2312 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2313 return SLJIT_SUCCESS;
2314 }
2315 return SLJIT_ERR_UNSUPPORTED;
2316 }
2317
2318 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
2319 sljit_s32 src1, sljit_sw src1w,
2320 sljit_s32 src2, sljit_sw src2w)
2321 {
2322 sljit_u8* inst;
2323
2324 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2325 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2326 #else
2327 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2328 #endif
2329 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
2330 return SLJIT_SUCCESS;
2331 }
2332
2333 if (FAST_IS_REG(src1)) {
2334 if (src2 == SLJIT_IMM) {
2335 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
2336 }
2337 else {
2338 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2339 FAIL_IF(!inst);
2340 *inst = CMP_r_rm;
2341 }
2342 return SLJIT_SUCCESS;
2343 }
2344
2345 if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) {
2346 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2347 FAIL_IF(!inst);
2348 *inst = CMP_rm_r;
2349 return SLJIT_SUCCESS;
2350 }
2351
2352 if (src2 == SLJIT_IMM) {
2353 if (src1 == SLJIT_IMM) {
2354 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2355 src1 = TMP_REG1;
2356 src1w = 0;
2357 }
2358 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
2359 }
2360 else {
2361 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2362 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2363 FAIL_IF(!inst);
2364 *inst = CMP_r_rm;
2365 }
2366 return SLJIT_SUCCESS;
2367 }
2368
2369 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
2370 sljit_s32 src1, sljit_sw src1w,
2371 sljit_s32 src2, sljit_sw src2w)
2372 {
2373 sljit_u8* inst;
2374
2375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2376 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2377 #else
2378 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2379 #endif
2380 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
2381 return SLJIT_SUCCESS;
2382 }
2383
2384 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2385 if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2386 #else
2387 if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) {
2388 #endif
2389 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
2390 return SLJIT_SUCCESS;
2391 }
2392
2393 if (src1 != SLJIT_IMM) {
2394 if (src2 == SLJIT_IMM) {
2395 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2396 if (IS_HALFWORD(src2w) || compiler->mode32) {
2397 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2398 FAIL_IF(!inst);
2399 *inst = GROUP_F7;
2400 } else {
2401 FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w));
2402 inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w);
2403 FAIL_IF(!inst);
2404 *inst = TEST_rm_r;
2405 }
2406 #else
2407 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2408 FAIL_IF(!inst);
2409 *inst = GROUP_F7;
2410 #endif
2411 return SLJIT_SUCCESS;
2412 }
2413 else if (FAST_IS_REG(src1)) {
2414 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2415 FAIL_IF(!inst);
2416 *inst = TEST_rm_r;
2417 return SLJIT_SUCCESS;
2418 }
2419 }
2420
2421 if (src2 != SLJIT_IMM) {
2422 if (src1 == SLJIT_IMM) {
2423 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2424 if (IS_HALFWORD(src1w) || compiler->mode32) {
2425 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
2426 FAIL_IF(!inst);
2427 *inst = GROUP_F7;
2428 }
2429 else {
2430 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
2431 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2432 FAIL_IF(!inst);
2433 *inst = TEST_rm_r;
2434 }
2435 #else
2436 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
2437 FAIL_IF(!inst);
2438 *inst = GROUP_F7;
2439 #endif
2440 return SLJIT_SUCCESS;
2441 }
2442 else if (FAST_IS_REG(src2)) {
2443 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2444 FAIL_IF(!inst);
2445 *inst = TEST_rm_r;
2446 return SLJIT_SUCCESS;
2447 }
2448 }
2449
2450 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2451 if (src2 == SLJIT_IMM) {
2452 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2453 if (IS_HALFWORD(src2w) || compiler->mode32) {
2454 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2455 FAIL_IF(!inst);
2456 *inst = GROUP_F7;
2457 }
2458 else {
2459 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2460 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2461 FAIL_IF(!inst);
2462 *inst = TEST_rm_r;
2463 }
2464 #else
2465 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2466 FAIL_IF(!inst);
2467 *inst = GROUP_F7;
2468 #endif
2469 }
2470 else {
2471 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2472 FAIL_IF(!inst);
2473 *inst = TEST_rm_r;
2474 }
2475 return SLJIT_SUCCESS;
2476 }
2477
2478 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2479 sljit_u8 mode,
2480 sljit_s32 dst, sljit_sw dstw,
2481 sljit_s32 src1, sljit_sw src1w,
2482 sljit_s32 src2, sljit_sw src2w)
2483 {
2484 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2485 sljit_s32 mode32;
2486 #endif
2487 sljit_u8* inst;
2488
2489 if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) {
2490 if (dst == src1 && dstw == src1w) {
2491 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2492 FAIL_IF(!inst);
2493 inst[1] |= mode;
2494 return SLJIT_SUCCESS;
2495 }
2496 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2497 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2498 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2499 FAIL_IF(!inst);
2500 inst[1] |= mode;
2501 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2502 return SLJIT_SUCCESS;
2503 }
2504 if (FAST_IS_REG(dst)) {
2505 EMIT_MOV(compiler, dst, 0, src1, src1w);
2506 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2507 FAIL_IF(!inst);
2508 inst[1] |= mode;
2509 return SLJIT_SUCCESS;
2510 }
2511
2512 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2513 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2514 FAIL_IF(!inst);
2515 inst[1] |= mode;
2516 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2517 return SLJIT_SUCCESS;
2518 }
2519
2520 if (dst == SLJIT_PREF_SHIFT_REG) {
2521 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2522 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2523 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2524 FAIL_IF(!inst);
2525 inst[1] |= mode;
2526 return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2527 }
2528
2529 if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2530 if (src1 != dst)
2531 EMIT_MOV(compiler, dst, 0, src1, src1w);
2532 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2533 mode32 = compiler->mode32;
2534 compiler->mode32 = 0;
2535 #endif
2536 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2537 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2538 compiler->mode32 = mode32;
2539 #endif
2540 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2541 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2542 FAIL_IF(!inst);
2543 inst[1] |= mode;
2544 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2545 compiler->mode32 = 0;
2546 #endif
2547 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2548 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2549 compiler->mode32 = mode32;
2550 #endif
2551 return SLJIT_SUCCESS;
2552 }
2553
2554 /* This case is complex since ecx itself may be used for
2555 addressing, and this case must be supported as well. */
2556 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2557 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2558 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2559 #else /* !SLJIT_CONFIG_X86_32 */
2560 mode32 = compiler->mode32;
2561 compiler->mode32 = 0;
2562 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2563 compiler->mode32 = mode32;
2564 #endif /* SLJIT_CONFIG_X86_32 */
2565
2566 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2567 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2568 FAIL_IF(!inst);
2569 inst[1] |= mode;
2570
2571 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2572 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2573 #else
2574 compiler->mode32 = 0;
2575 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2576 compiler->mode32 = mode32;
2577 #endif /* SLJIT_CONFIG_X86_32 */
2578
2579 if (dst != TMP_REG1)
2580 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2581
2582 return SLJIT_SUCCESS;
2583 }
2584
2585 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2586 sljit_u8 mode, sljit_s32 set_flags,
2587 sljit_s32 dst, sljit_sw dstw,
2588 sljit_s32 src1, sljit_sw src1w,
2589 sljit_s32 src2, sljit_sw src2w)
2590 {
2591 /* The CPU does not set flags if the shift count is 0. */
2592 if (src2 == SLJIT_IMM) {
2593 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2594 src2w &= compiler->mode32 ? 0x1f : 0x3f;
2595 #else /* !SLJIT_CONFIG_X86_64 */
2596 src2w &= 0x1f;
2597 #endif /* SLJIT_CONFIG_X86_64 */
2598 if (src2w != 0)
2599 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2600
2601 if (!set_flags)
2602 return emit_mov(compiler, dst, dstw, src1, src1w);
2603 /* OR dst, src, 0 */
2604 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2605 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2606 }
2607
2608 if (!set_flags)
2609 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2610
2611 if (!FAST_IS_REG(dst))
2612 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2613
2614 FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2615
2616 if (FAST_IS_REG(dst))
2617 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2618 return SLJIT_SUCCESS;
2619 }
2620
2621 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2622 sljit_s32 dst, sljit_sw dstw,
2623 sljit_s32 src1, sljit_sw src1w,
2624 sljit_s32 src2, sljit_sw src2w)
2625 {
2626 CHECK_ERROR();
2627 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2628 ADJUST_LOCAL_OFFSET(dst, dstw);
2629 ADJUST_LOCAL_OFFSET(src1, src1w);
2630 ADJUST_LOCAL_OFFSET(src2, src2w);
2631
2632 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2633 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2634 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2635 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2636 compiler->mode32 = op & SLJIT_32;
2637 #endif
2638
2639 switch (GET_OPCODE(op)) {
2640 case SLJIT_ADD:
2641 if (!HAS_FLAGS(op)) {
2642 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2643 return compiler->error;
2644 }
2645 return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2646 dst, dstw, src1, src1w, src2, src2w);
2647 case SLJIT_ADDC:
2648 return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2649 dst, dstw, src1, src1w, src2, src2w);
2650 case SLJIT_SUB:
2651 if (src1 == SLJIT_IMM && src1w == 0)
2652 return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
2653
2654 if (!HAS_FLAGS(op)) {
2655 if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2656 return compiler->error;
2657 if (FAST_IS_REG(dst) && src2 == dst) {
2658 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
2659 return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
2660 }
2661 }
2662
2663 return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2664 dst, dstw, src1, src1w, src2, src2w);
2665 case SLJIT_SUBC:
2666 return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2667 dst, dstw, src1, src1w, src2, src2w);
2668 case SLJIT_MUL:
2669 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2670 case SLJIT_AND:
2671 return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2672 dst, dstw, src1, src1w, src2, src2w);
2673 case SLJIT_OR:
2674 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2675 dst, dstw, src1, src1w, src2, src2w);
2676 case SLJIT_XOR:
2677 if (!HAS_FLAGS(op)) {
2678 if (src2 == SLJIT_IMM && src2w == -1)
2679 return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w);
2680 if (src1 == SLJIT_IMM && src1w == -1)
2681 return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w);
2682 }
2683
2684 return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2685 dst, dstw, src1, src1w, src2, src2w);
2686 case SLJIT_SHL:
2687 case SLJIT_MSHL:
2688 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2689 dst, dstw, src1, src1w, src2, src2w);
2690 case SLJIT_LSHR:
2691 case SLJIT_MLSHR:
2692 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2693 dst, dstw, src1, src1w, src2, src2w);
2694 case SLJIT_ASHR:
2695 case SLJIT_MASHR:
2696 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2697 dst, dstw, src1, src1w, src2, src2w);
2698 case SLJIT_ROTL:
2699 return emit_shift_with_flags(compiler, ROL, 0,
2700 dst, dstw, src1, src1w, src2, src2w);
2701 case SLJIT_ROTR:
2702 return emit_shift_with_flags(compiler, ROR, 0,
2703 dst, dstw, src1, src1w, src2, src2w);
2704 }
2705
2706 return SLJIT_SUCCESS;
2707 }
2708
2709 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2710 sljit_s32 src1, sljit_sw src1w,
2711 sljit_s32 src2, sljit_sw src2w)
2712 {
2713 sljit_s32 opcode = GET_OPCODE(op);
2714
2715 CHECK_ERROR();
2716 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2717
2718 if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
2719 SLJIT_SKIP_CHECKS(compiler);
2720 return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2721 }
2722
2723 ADJUST_LOCAL_OFFSET(src1, src1w);
2724 ADJUST_LOCAL_OFFSET(src2, src2w);
2725
2726 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2727 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2728 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2729 compiler->mode32 = op & SLJIT_32;
2730 #endif
2731
2732 if (opcode == SLJIT_SUB)
2733 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2734
2735 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2736 }
2737
2738 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2739 sljit_s32 dst_reg,
2740 sljit_s32 src1, sljit_sw src1w,
2741 sljit_s32 src2, sljit_sw src2w)
2742 {
2743 sljit_u8* inst;
2744 sljit_sw dstw = 0;
2745
2746 CHECK_ERROR();
2747 CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2748 ADJUST_LOCAL_OFFSET(src1, src1w);
2749 ADJUST_LOCAL_OFFSET(src2, src2w);
2750
2751 CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2752 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2753 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2754 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2755 compiler->mode32 = op & SLJIT_32;
2756 #endif
2757
2758 switch (GET_OPCODE(op)) {
2759 case SLJIT_MULADD:
2760 FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w));
2761 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw);
2762 FAIL_IF(!inst);
2763 *inst = ADD_rm_r;
2764 return SLJIT_SUCCESS;
2765 }
2766
2767 return SLJIT_SUCCESS;
2768 }
2769
2770 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2771 sljit_s32 dst_reg,
2772 sljit_s32 src1_reg,
2773 sljit_s32 src2_reg,
2774 sljit_s32 src3, sljit_sw src3w)
2775 {
2776 sljit_s32 is_rotate, is_left, move_src1;
2777 sljit_u8* inst;
2778 sljit_sw src1w = 0;
2779 sljit_sw dstw = 0;
2780 /* The whole register must be saved even for 32 bit operations. */
2781 sljit_u8 restore_ecx = 0;
2782 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2783 sljit_sw src2w = 0;
2784 sljit_s32 restore_sp4 = 0;
2785 #endif /* SLJIT_CONFIG_X86_32 */
2786
2787 CHECK_ERROR();
2788 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2789 ADJUST_LOCAL_OFFSET(src3, src3w);
2790
2791 CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2792 CHECK_EXTRA_REGS(src3, src3w, (void)0);
2793
2794 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2795 compiler->mode32 = op & SLJIT_32;
2796 #endif /* SLJIT_CONFIG_X86_64 */
2797
2798 if (src3 == SLJIT_IMM) {
2799 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2800 src3w &= 0x1f;
2801 #else /* !SLJIT_CONFIG_X86_32 */
2802 src3w &= (op & SLJIT_32) ? 0x1f : 0x3f;
2803 #endif /* SLJIT_CONFIG_X86_32 */
2804
2805 if (src3w == 0)
2806 return SLJIT_SUCCESS;
2807 }
2808
2809 is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2810
2811 is_rotate = (src1_reg == src2_reg);
2812 CHECK_EXTRA_REGS(src1_reg, src1w, (void)0);
2813 CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);
2814
2815 if (is_rotate)
2816 return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w);
2817
2818 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2819 if (src2_reg & SLJIT_MEM) {
2820 EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);
2821 src2_reg = TMP_REG1;
2822 }
2823 #endif /* SLJIT_CONFIG_X86_32 */
2824
2825 if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {
2826 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2827 EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2828 src1_reg = TMP_REG1;
2829 src1w = 0;
2830 #else /* !SLJIT_CONFIG_X86_64 */
2831 if (src2_reg != TMP_REG1) {
2832 EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2833 src1_reg = TMP_REG1;
2834 src1w = 0;
2835 } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2836 restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2837 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2838 EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2839 src1_reg = restore_sp4;
2840 src1w = 0;
2841 } else {
2842 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2843 restore_sp4 = src1_reg;
2844 }
2845 #endif /* SLJIT_CONFIG_X86_64 */
2846
2847 if (src3 != SLJIT_PREF_SHIFT_REG)
2848 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2849 } else {
2850 if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2851 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2852 compiler->mode32 = 0;
2853 #endif /* SLJIT_CONFIG_X86_64 */
2854 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2855 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2856 compiler->mode32 = op & SLJIT_32;
2857 #endif /* SLJIT_CONFIG_X86_64 */
2858 src2_reg = TMP_REG1;
2859 restore_ecx = 1;
2860 }
2861
2862 move_src1 = 0;
2863 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2864 if (dst_reg != src1_reg) {
2865 if (dst_reg != src3) {
2866 EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2867 src1_reg = dst_reg;
2868 src1w = 0;
2869 } else
2870 move_src1 = 1;
2871 }
2872 #else /* !SLJIT_CONFIG_X86_64 */
2873 if (dst_reg & SLJIT_MEM) {
2874 if (src2_reg != TMP_REG1) {
2875 EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2876 src1_reg = TMP_REG1;
2877 src1w = 0;
2878 } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2879 restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2880 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2881 EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2882 src1_reg = restore_sp4;
2883 src1w = 0;
2884 } else {
2885 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2886 restore_sp4 = src1_reg;
2887 }
2888 } else if (dst_reg != src1_reg) {
2889 if (dst_reg != src3) {
2890 EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2891 src1_reg = dst_reg;
2892 src1w = 0;
2893 } else
2894 move_src1 = 1;
2895 }
2896 #endif /* SLJIT_CONFIG_X86_64 */
2897
2898 if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2899 if (!restore_ecx) {
2900 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2901 compiler->mode32 = 0;
2902 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2903 compiler->mode32 = op & SLJIT_32;
2904 restore_ecx = 1;
2905 #else /* !SLJIT_CONFIG_X86_64 */
2906 if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) {
2907 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2908 restore_ecx = 1;
2909 } else {
2910 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2911 restore_ecx = 2;
2912 }
2913 #endif /* SLJIT_CONFIG_X86_64 */
2914 }
2915 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2916 }
2917
2918 if (move_src1) {
2919 EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2920 src1_reg = dst_reg;
2921 src1w = 0;
2922 }
2923 }
2924
2925 inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w);
2926 FAIL_IF(!inst);
2927 inst[0] = GROUP_0F;
2928
2929 if (src3 == SLJIT_IMM) {
2930 inst[1] = U8((is_left ? SHLD : SHRD) - 1);
2931
2932 /* Immediate argument is added separately. */
2933 FAIL_IF(emit_byte(compiler, U8(src3w)));
2934 } else
2935 inst[1] = U8(is_left ? SHLD : SHRD);
2936
2937 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2938 if (restore_ecx) {
2939 compiler->mode32 = 0;
2940 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2941 }
2942
2943 if (src1_reg != dst_reg) {
2944 compiler->mode32 = op & SLJIT_32;
2945 return emit_mov(compiler, dst_reg, dstw, src1_reg, 0);
2946 }
2947 #else /* !SLJIT_CONFIG_X86_64 */
2948 if (restore_ecx)
2949 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0);
2950
2951 if (src1_reg != dst_reg)
2952 EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0);
2953
2954 if (restore_sp4)
2955 return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32));
2956 #endif /* SLJIT_CONFIG_X86_32 */
2957
2958 return SLJIT_SUCCESS;
2959 }
2960
2961 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2962 sljit_s32 src, sljit_sw srcw)
2963 {
2964 CHECK_ERROR();
2965 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2966 ADJUST_LOCAL_OFFSET(src, srcw);
2967
2968 CHECK_EXTRA_REGS(src, srcw, (void)0);
2969
2970 switch (op) {
2971 case SLJIT_FAST_RETURN:
2972 return emit_fast_return(compiler, src, srcw);
2973 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2974 /* Don't adjust shadow stack if it isn't enabled. */
2975 if (!cpu_has_shadow_stack ())
2976 return SLJIT_SUCCESS;
2977 return adjust_shadow_stack(compiler, src, srcw);
2978 case SLJIT_PREFETCH_L1:
2979 case SLJIT_PREFETCH_L2:
2980 case SLJIT_PREFETCH_L3:
2981 case SLJIT_PREFETCH_ONCE:
2982 return emit_prefetch(compiler, op, src, srcw);
2983 }
2984
2985 return SLJIT_SUCCESS;
2986 }
2987
2988 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2989 sljit_s32 dst, sljit_sw dstw)
2990 {
2991 CHECK_ERROR();
2992 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2993 ADJUST_LOCAL_OFFSET(dst, dstw);
2994
2995 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2996
2997 switch (op) {
2998 case SLJIT_FAST_ENTER:
2999 return emit_fast_enter(compiler, dst, dstw);
3000 case SLJIT_GET_RETURN_ADDRESS:
3001 return sljit_emit_get_return_address(compiler, dst, dstw);
3002 }
3003
3004 return SLJIT_SUCCESS;
3005 }
3006
3007 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
3008 {
3009 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3010
3011 if (type == SLJIT_GP_REGISTER) {
3012 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3013 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
3014 return -1;
3015 #endif /* SLJIT_CONFIG_X86_32 */
3016 return reg_map[reg];
3017 }
3018
3019 if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512)
3020 return -1;
3021
3022 return freg_map[reg];
3023 }
3024
3025 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3026 void *instruction, sljit_u32 size)
3027 {
3028 sljit_u8 *inst;
3029
3030 CHECK_ERROR();
3031 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3032
3033 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
3034 FAIL_IF(!inst);
3035 INC_SIZE(size);
3036 SLJIT_MEMCPY(inst, instruction, size);
3037 return SLJIT_SUCCESS;
3038 }
3039
3040 /* --------------------------------------------------------------------- */
3041 /* Floating point operators */
3042 /* --------------------------------------------------------------------- */
3043
3044 /* Alignment(3) + 4 * 16 bytes. */
3045 static sljit_u32 sse2_data[3 + (4 * 4)];
3046 static sljit_u32 *sse2_buffer;
3047
3048 static void init_compiler(void)
3049 {
3050 get_cpu_features();
3051
3052 /* Align to 16 bytes. */
3053 sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);
3054
3055 /* Single precision constants (each constant is 16 byte long). */
3056 sse2_buffer[0] = 0x80000000;
3057 sse2_buffer[4] = 0x7fffffff;
3058 /* Double precision constants (each constant is 16 byte long). */
3059 sse2_buffer[8] = 0;
3060 sse2_buffer[9] = 0x80000000;
3061 sse2_buffer[12] = 0xffffffff;
3062 sse2_buffer[13] = 0x7fffffff;
3063 }
3064
3065 static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
3066 sljit_uw op,
3067 sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3068 {
3069 sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw);
3070 FAIL_IF(!inst);
3071 inst[0] = GROUP_0F;
3072 inst[1] = op & 0xff;
3073 return SLJIT_SUCCESS;
3074 }
3075
3076 static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
3077 sljit_uw op,
3078 sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3079 {
3080 sljit_u8 *inst;
3081
3082 SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A)));
3083
3084 inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw);
3085 FAIL_IF(!inst);
3086 inst[0] = GROUP_0F;
3087 inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A);
3088 inst[2] = op & 0xff;
3089 return SLJIT_SUCCESS;
3090 }
3091
3092 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
3093 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
3094 {
3095 return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw);
3096 }
3097
3098 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
3099 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
3100 {
3101 return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw);
3102 }
3103
3104 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3105 sljit_s32 dst, sljit_sw dstw,
3106 sljit_s32 src, sljit_sw srcw)
3107 {
3108 sljit_s32 dst_r;
3109
3110 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3111 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3112
3113 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3114 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3115 compiler->mode32 = 0;
3116 #endif
3117
3118 FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw));
3119
3120 if (dst & SLJIT_MEM)
3121 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3122 return SLJIT_SUCCESS;
3123 }
3124
3125 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3126 sljit_s32 dst, sljit_sw dstw,
3127 sljit_s32 src, sljit_sw srcw)
3128 {
3129 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3130
3131 CHECK_EXTRA_REGS(src, srcw, (void)0);
3132
3133 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3134 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3135 compiler->mode32 = 0;
3136 #endif
3137
3138 if (src == SLJIT_IMM) {
3139 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3140 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3141 srcw = (sljit_s32)srcw;
3142 #endif
3143 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
3144 src = TMP_REG1;
3145 srcw = 0;
3146 }
3147
3148 FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw));
3149
3150 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3151 compiler->mode32 = 1;
3152 #endif
3153 if (dst_r == TMP_FREG)
3154 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3155 return SLJIT_SUCCESS;
3156 }
3157
3158 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3159 sljit_s32 src1, sljit_sw src1w,
3160 sljit_s32 src2, sljit_sw src2w)
3161 {
3162 switch (GET_FLAG_TYPE(op)) {
3163 case SLJIT_ORDERED_EQUAL:
3164 /* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */
3165 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3166 FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w));
3167
3168 /* EQ */
3169 FAIL_IF(emit_byte(compiler, 0));
3170
3171 src1 = TMP_FREG;
3172 src2 = TMP_FREG;
3173 src2w = 0;
3174 break;
3175
3176 case SLJIT_ORDERED_LESS:
3177 case SLJIT_UNORDERED_OR_GREATER:
3178 /* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL */
3179 if (!FAST_IS_REG(src2)) {
3180 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3181 src2 = TMP_FREG;
3182 }
3183
3184 return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w);
3185 }
3186
3187 if (!FAST_IS_REG(src1)) {
3188 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3189 src1 = TMP_FREG;
3190 }
3191
3192 return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w);
3193 }
3194
3195 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3196 sljit_s32 dst, sljit_sw dstw,
3197 sljit_s32 src, sljit_sw srcw)
3198 {
3199 sljit_s32 dst_r;
3200 sljit_u8 *inst;
3201
3202 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3203 compiler->mode32 = 1;
3204 #endif
3205
3206 CHECK_ERROR();
3207 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3208
3209 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
3210 if (FAST_IS_REG(dst))
3211 return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);
3212 if (FAST_IS_REG(src))
3213 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);
3214 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3215 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3216 }
3217
3218 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
3219 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3220 if (FAST_IS_REG(src)) {
3221 /* We overwrite the high bits of source. From SLJIT point of view,
3222 this is not an issue.
3223 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
3224 FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0));
3225 } else {
3226 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
3227 src = TMP_FREG;
3228 }
3229
3230 FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0));
3231 if (dst_r == TMP_FREG)
3232 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3233 return SLJIT_SUCCESS;
3234 }
3235
3236 if (FAST_IS_REG(dst)) {
3237 dst_r = (dst == src) ? TMP_FREG : dst;
3238
3239 if (src & SLJIT_MEM)
3240 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3241
3242 FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0));
3243
3244 inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0);
3245 inst[0] = GROUP_0F;
3246 /* Same as PSRLD_x / PSRLQ_x */
3247 inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8;
3248
3249 if (GET_OPCODE(op) == SLJIT_ABS_F64) {
3250 inst[2] |= 2 << 3;
3251 FAIL_IF(emit_byte(compiler, 1));
3252 } else {
3253 inst[2] |= 6 << 3;
3254 FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63)));
3255 }
3256
3257 if (dst_r != TMP_FREG)
3258 dst_r = (src & SLJIT_MEM) ? TMP_FREG : src;
3259 return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0);
3260 }
3261
3262 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3263
3264 switch (GET_OPCODE(op)) {
3265 case SLJIT_NEG_F64:
3266 FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3267 break;
3268
3269 case SLJIT_ABS_F64:
3270 FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));
3271 break;
3272 }
3273
3274 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3275 }
3276
3277 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3278 sljit_s32 dst, sljit_sw dstw,
3279 sljit_s32 src1, sljit_sw src1w,
3280 sljit_s32 src2, sljit_sw src2w)
3281 {
3282 sljit_s32 dst_r;
3283
3284 CHECK_ERROR();
3285 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3286 ADJUST_LOCAL_OFFSET(dst, dstw);
3287 ADJUST_LOCAL_OFFSET(src1, src1w);
3288 ADJUST_LOCAL_OFFSET(src2, src2w);
3289
3290 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3291 compiler->mode32 = 1;
3292 #endif
3293
3294 if (FAST_IS_REG(dst)) {
3295 dst_r = dst;
3296 if (dst == src1)
3297 ; /* Do nothing here. */
3298 else if (dst == src2 && (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64)) {
3299 /* Swap arguments. */
3300 src2 = src1;
3301 src2w = src1w;
3302 } else if (dst != src2)
3303 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));
3304 else {
3305 dst_r = TMP_FREG;
3306 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3307 }
3308 } else {
3309 dst_r = TMP_FREG;
3310 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3311 }
3312
3313 switch (GET_OPCODE(op)) {
3314 case SLJIT_ADD_F64:
3315 FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3316 break;
3317
3318 case SLJIT_SUB_F64:
3319 FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3320 break;
3321
3322 case SLJIT_MUL_F64:
3323 FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3324 break;
3325
3326 case SLJIT_DIV_F64:
3327 FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3328 break;
3329 }
3330
3331 if (dst_r != dst)
3332 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3333 return SLJIT_SUCCESS;
3334 }
3335
3336 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3337 sljit_s32 dst_freg,
3338 sljit_s32 src1, sljit_sw src1w,
3339 sljit_s32 src2, sljit_sw src2w)
3340 {
3341 sljit_uw pref;
3342
3343 CHECK_ERROR();
3344 CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3345 ADJUST_LOCAL_OFFSET(src1, src1w);
3346 ADJUST_LOCAL_OFFSET(src2, src2w);
3347
3348 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3349 compiler->mode32 = 1;
3350 #endif
3351
3352 if (dst_freg == src1) {
3353 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3354 pref = EX86_SELECT_66(op) | EX86_SSE2;
3355 FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w));
3356 FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3357 return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0);
3358 }
3359
3360 if (src1 & SLJIT_MEM) {
3361 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3362 src1 = TMP_FREG;
3363 src1w = 0;
3364 }
3365
3366 if (dst_freg != src2)
3367 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w));
3368
3369 pref = EX86_SELECT_66(op) | EX86_SSE2;
3370 FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w));
3371 FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3372 return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w);
3373 }
3374
3375 /* --------------------------------------------------------------------- */
3376 /* Conditional instructions */
3377 /* --------------------------------------------------------------------- */
3378
3379 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3380 {
3381 sljit_u8 *inst;
3382 struct sljit_label *label;
3383
3384 CHECK_ERROR_PTR();
3385 CHECK_PTR(check_sljit_emit_label(compiler));
3386
3387 if (compiler->last_label && compiler->last_label->size == compiler->size)
3388 return compiler->last_label;
3389
3390 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3391 PTR_FAIL_IF(!label);
3392 set_label(label, compiler);
3393
3394 inst = (sljit_u8*)ensure_buf(compiler, 1);
3395 PTR_FAIL_IF(!inst);
3396 inst[0] = SLJIT_INST_LABEL;
3397
3398 return label;
3399 }
3400
3401 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3402 {
3403 sljit_u8 *inst;
3404 struct sljit_jump *jump;
3405
3406 CHECK_ERROR_PTR();
3407 CHECK_PTR(check_sljit_emit_jump(compiler, type));
3408
3409 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3410 PTR_FAIL_IF_NULL(jump);
3411 set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));
3412 type &= 0xff;
3413
3414 jump->addr = compiler->size;
3415 /* Worst case size. */
3416 compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE;
3417 inst = (sljit_u8*)ensure_buf(compiler, 1);
3418 PTR_FAIL_IF_NULL(inst);
3419
3420 inst[0] = SLJIT_INST_JUMP;
3421 return jump;
3422 }
3423
3424 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3425 {
3426 sljit_u8 *inst;
3427 struct sljit_jump *jump;
3428
3429 CHECK_ERROR();
3430 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3431 ADJUST_LOCAL_OFFSET(src, srcw);
3432
3433 CHECK_EXTRA_REGS(src, srcw, (void)0);
3434
3435 if (src == SLJIT_IMM) {
3436 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3437 FAIL_IF_NULL(jump);
3438 set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));
3439 jump->u.target = (sljit_uw)srcw;
3440
3441 jump->addr = compiler->size;
3442 /* Worst case size. */
3443 compiler->size += JUMP_MAX_SIZE;
3444 inst = (sljit_u8*)ensure_buf(compiler, 1);
3445 FAIL_IF_NULL(inst);
3446
3447 inst[0] = SLJIT_INST_JUMP;
3448 } else {
3449 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3450 /* REX_W is not necessary (src is not immediate). */
3451 compiler->mode32 = 1;
3452 #endif
3453 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
3454 FAIL_IF(!inst);
3455 inst[0] = GROUP_FF;
3456 inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
3457 }
3458 return SLJIT_SUCCESS;
3459 }
3460
3461 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3462 sljit_s32 dst, sljit_sw dstw,
3463 sljit_s32 type)
3464 {
3465 sljit_u8 *inst;
3466 sljit_u8 cond_set;
3467 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3468 sljit_s32 reg;
3469 #endif /* !SLJIT_CONFIG_X86_64 */
3470 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
3471 sljit_s32 dst_save = dst;
3472 sljit_sw dstw_save = dstw;
3473
3474 CHECK_ERROR();
3475 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3476
3477 ADJUST_LOCAL_OFFSET(dst, dstw);
3478 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3479
3480 /* setcc = jcc + 0x10. */
3481 cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
3482
3483 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3484 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
3485 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
3486 FAIL_IF(!inst);
3487 INC_SIZE(4 + 3);
3488 /* Set low register to conditional flag. */
3489 inst[0] = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
3490 inst[1] = GROUP_0F;
3491 inst[2] = cond_set;
3492 inst[3] = MOD_REG | reg_lmap[TMP_REG1];
3493 inst[4] = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
3494 inst[5] = OR_rm8_r8;
3495 inst[6] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
3496 return SLJIT_SUCCESS;
3497 }
3498
3499 reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
3500
3501 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
3502 FAIL_IF(!inst);
3503 INC_SIZE(4 + 4);
3504 /* Set low register to conditional flag. */
3505 inst[0] = (reg_map[reg] <= 7) ? REX : REX_B;
3506 inst[1] = GROUP_0F;
3507 inst[2] = cond_set;
3508 inst[3] = MOD_REG | reg_lmap[reg];
3509 inst[4] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
3510 /* The movzx instruction does not affect flags. */
3511 inst[5] = GROUP_0F;
3512 inst[6] = MOVZX_r_rm8;
3513 inst[7] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
3514
3515 if (reg != TMP_REG1)
3516 return SLJIT_SUCCESS;
3517
3518 if (GET_OPCODE(op) < SLJIT_ADD) {
3519 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
3520 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3521 }
3522
3523 SLJIT_SKIP_CHECKS(compiler);
3524 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3525
3526 #else /* !SLJIT_CONFIG_X86_64 */
3527 SLJIT_ASSERT(reg_map[TMP_REG1] < 4);
3528
3529 /* The SLJIT_CONFIG_X86_32 code path starts here. */
3530 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3531 /* Low byte is accessible. */
3532 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3533 FAIL_IF(!inst);
3534 INC_SIZE(3 + 3);
3535 /* Set low byte to conditional flag. */
3536 inst[0] = GROUP_0F;
3537 inst[1] = cond_set;
3538 inst[2] = U8(MOD_REG | reg_map[dst]);
3539
3540 inst[3] = GROUP_0F;
3541 inst[4] = MOVZX_r_rm8;
3542 inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
3543 return SLJIT_SUCCESS;
3544 }
3545
3546 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3547 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2);
3548 FAIL_IF(!inst);
3549 INC_SIZE(3 + 2);
3550
3551 /* Set low byte to conditional flag. */
3552 inst[0] = GROUP_0F;
3553 inst[1] = cond_set;
3554 inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3555
3556 inst[3] = OR_rm8_r8;
3557 inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]);
3558 return SLJIT_SUCCESS;
3559 }
3560
3561 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3562 FAIL_IF(!inst);
3563 INC_SIZE(3 + 3);
3564 /* Set low byte to conditional flag. */
3565 inst[0] = GROUP_0F;
3566 inst[1] = cond_set;
3567 inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3568
3569 inst[3] = GROUP_0F;
3570 inst[4] = MOVZX_r_rm8;
3571 inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]);
3572
3573 if (GET_OPCODE(op) < SLJIT_ADD)
3574 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3575
3576 SLJIT_SKIP_CHECKS(compiler);
3577 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3578 #endif /* SLJIT_CONFIG_X86_64 */
3579 }
3580
3581 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3582 sljit_s32 dst_freg,
3583 sljit_s32 src1, sljit_sw src1w,
3584 sljit_s32 src2_freg)
3585 {
3586 sljit_u8* inst;
3587 sljit_uw size;
3588
3589 CHECK_ERROR();
3590 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3591
3592 ADJUST_LOCAL_OFFSET(src1, src1w);
3593
3594 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3595 compiler->mode32 = 1;
3596 #endif /* SLJIT_CONFIG_X86_64 */
3597
3598 if (dst_freg != src2_freg) {
3599 if (dst_freg == src1) {
3600 src1 = src2_freg;
3601 src1w = 0;
3602 type ^= 0x1;
3603 } else
3604 FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0));
3605 }
3606
3607 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
3608 FAIL_IF(!inst);
3609 INC_SIZE(2);
3610 inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10);
3611
3612 size = compiler->size;
3613 FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w));
3614
3615 inst[1] = U8(compiler->size - size);
3616 return SLJIT_SUCCESS;
3617 }
3618
3619 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3620 sljit_s32 freg,
3621 sljit_s32 srcdst, sljit_sw srcdstw)
3622 {
3623 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3624 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3625 sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3626 sljit_uw op;
3627
3628 CHECK_ERROR();
3629 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3630
3631 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3632
3633 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3634 compiler->mode32 = 1;
3635 #endif /* SLJIT_CONFIG_X86_64 */
3636
3637 switch (reg_size) {
3638 case 4:
3639 op = EX86_SSE2;
3640 break;
3641 case 5:
3642 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
3643 return SLJIT_ERR_UNSUPPORTED;
3644 op = EX86_SSE2 | VEX_256;
3645 break;
3646 default:
3647 return SLJIT_ERR_UNSUPPORTED;
3648 }
3649
3650 if (!(srcdst & SLJIT_MEM))
3651 alignment = reg_size;
3652
3653 if (type & SLJIT_SIMD_FLOAT) {
3654 if (elem_size == 2 || elem_size == 3) {
3655 op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm;
3656
3657 if (elem_size == 3)
3658 op |= EX86_PREF_66;
3659
3660 if (type & SLJIT_SIMD_STORE)
3661 op += 1;
3662 } else
3663 return SLJIT_ERR_UNSUPPORTED;
3664 } else {
3665 op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm)
3666 | (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3);
3667 }
3668
3669 if (type & SLJIT_SIMD_TEST)
3670 return SLJIT_SUCCESS;
3671
3672 if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX)))
3673 return emit_vex_instruction(compiler, op, freg, 0, srcdst, srcdstw);
3674
3675 return emit_groupf(compiler, op, freg, srcdst, srcdstw);
3676 }
3677
3678 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3679 sljit_s32 freg,
3680 sljit_s32 src, sljit_sw srcw)
3681 {
3682 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3683 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3684 sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
3685 sljit_u8 *inst;
3686 sljit_u8 opcode = 0;
3687 sljit_uw op;
3688
3689 CHECK_ERROR();
3690 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3691
3692 ADJUST_LOCAL_OFFSET(src, srcw);
3693
3694 if (!(type & SLJIT_SIMD_FLOAT)) {
3695 CHECK_EXTRA_REGS(src, srcw, (void)0);
3696 }
3697
3698 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3699 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3700 return SLJIT_ERR_UNSUPPORTED;
3701 #else /* !SLJIT_CONFIG_X86_32 */
3702 compiler->mode32 = 1;
3703
3704 if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3705 return SLJIT_ERR_UNSUPPORTED;
3706 #endif /* SLJIT_CONFIG_X86_32 */
3707
3708 if (reg_size != 4 && (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2)))
3709 return SLJIT_ERR_UNSUPPORTED;
3710
3711 if (type & SLJIT_SIMD_TEST)
3712 return SLJIT_SUCCESS;
3713
3714 if (reg_size == 5)
3715 use_vex = 1;
3716
3717 if (use_vex && src != SLJIT_IMM) {
3718 op = 0;
3719
3720 switch (elem_size) {
3721 case 0:
3722 if (cpu_feature_list & CPU_FEATURE_AVX2)
3723 op = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3724 break;
3725 case 1:
3726 if (cpu_feature_list & CPU_FEATURE_AVX2)
3727 op = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3728 break;
3729 case 2:
3730 if (type & SLJIT_SIMD_FLOAT) {
3731 if ((cpu_feature_list & CPU_FEATURE_AVX2) || ((cpu_feature_list & CPU_FEATURE_AVX) && (src & SLJIT_MEM)))
3732 op = VBROADCASTSS_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3733 } else if (cpu_feature_list & CPU_FEATURE_AVX2)
3734 op = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3735 break;
3736 default:
3737 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3738 if (!(type & SLJIT_SIMD_FLOAT)) {
3739 if (cpu_feature_list & CPU_FEATURE_AVX2)
3740 op = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3741 break;
3742 }
3743 #endif /* SLJIT_CONFIG_X86_64 */
3744
3745 if (reg_size == 5)
3746 op = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3747 break;
3748 }
3749
3750 if (op != 0) {
3751 if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) {
3752 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3753 if (elem_size >= 3)
3754 compiler->mode32 = 0;
3755 #endif /* SLJIT_CONFIG_X86_64 */
3756 FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw));
3757 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3758 compiler->mode32 = 1;
3759 #endif /* SLJIT_CONFIG_X86_64 */
3760 src = freg;
3761 srcw = 0;
3762 }
3763
3764 if (reg_size == 5)
3765 op |= VEX_256;
3766
3767 return emit_vex_instruction(compiler, op, freg, 0, src, srcw);
3768 }
3769 }
3770
3771 if (type & SLJIT_SIMD_FLOAT) {
3772 if (src == SLJIT_IMM) {
3773 if (use_vex)
3774 return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
3775
3776 return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, freg, 0);
3777 }
3778
3779 SLJIT_ASSERT(reg_size == 4);
3780
3781 if (use_vex) {
3782 if (elem_size == 3)
3783 return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, srcw);
3784
3785 SLJIT_ASSERT(!(src & SLJIT_MEM));
3786 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0));
3787 return emit_byte(compiler, 0);
3788 }
3789
3790 if (elem_size == 2 && freg != src) {
3791 FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw));
3792 src = freg;
3793 srcw = 0;
3794 }
3795
3796 op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2;
3797 FAIL_IF(emit_groupf(compiler, op, freg, src, srcw));
3798
3799 if (elem_size == 2)
3800 return emit_byte(compiler, 0);
3801 return SLJIT_SUCCESS;
3802 }
3803
3804 if (src == SLJIT_IMM) {
3805 if (elem_size == 0) {
3806 srcw = (sljit_u8)srcw;
3807 srcw |= srcw << 8;
3808 srcw |= srcw << 16;
3809 elem_size = 2;
3810 } else if (elem_size == 1) {
3811 srcw = (sljit_u16)srcw;
3812 srcw |= srcw << 16;
3813 elem_size = 2;
3814 }
3815
3816 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3817 if (elem_size == 2 && (sljit_s32)srcw == -1)
3818 srcw = -1;
3819 #endif /* SLJIT_CONFIG_X86_64 */
3820
3821 if (srcw == 0 || srcw == -1) {
3822 if (use_vex)
3823 return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
3824
3825 return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
3826 }
3827
3828 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3829 if (elem_size == 3)
3830 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
3831 else
3832 #endif /* SLJIT_CONFIG_X86_64 */
3833 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
3834
3835 src = TMP_REG1;
3836 srcw = 0;
3837
3838 }
3839
3840 op = 2;
3841 opcode = MOVD_x_rm;
3842
3843 switch (elem_size) {
3844 case 0:
3845 if (!FAST_IS_REG(src)) {
3846 opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;
3847 op = 3;
3848 }
3849 break;
3850 case 1:
3851 if (!FAST_IS_REG(src))
3852 opcode = PINSRW_x_rm_i8;
3853 break;
3854 case 2:
3855 break;
3856 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3857 case 3:
3858 /* MOVQ */
3859 compiler->mode32 = 0;
3860 break;
3861 #endif /* SLJIT_CONFIG_X86_64 */
3862 }
3863
3864 if (use_vex) {
3865 if (opcode != MOVD_x_rm) {
3866 op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode;
3867 FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, freg, src, srcw));
3868 } else
3869 FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw));
3870 } else {
3871 inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw);
3872 FAIL_IF(!inst);
3873 inst[0] = GROUP_0F;
3874 inst[1] = opcode;
3875
3876 if (op == 3) {
3877 SLJIT_ASSERT(opcode == 0x3a);
3878 inst[2] = PINSRB_x_rm_i8;
3879 }
3880 }
3881
3882 if (use_vex && elem_size >= 2) {
3883 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3884 op = VPBROADCASTD_x_xm;
3885 #else /* !SLJIT_CONFIG_X86_32 */
3886 op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm;
3887 #endif /* SLJIT_CONFIG_X86_32 */
3888 return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
3889 }
3890
3891 SLJIT_ASSERT(reg_size == 4);
3892
3893 if (opcode != MOVD_x_rm)
3894 FAIL_IF(emit_byte(compiler, 0));
3895
3896 switch (elem_size) {
3897 case 0:
3898 if (use_vex) {
3899 FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));
3900 return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, TMP_FREG, 0);
3901 }
3902 FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
3903 return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
3904 case 1:
3905 if (use_vex)
3906 FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, freg, 0));
3907 else
3908 FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, freg, 0));
3909 FAIL_IF(emit_byte(compiler, 0));
3910 /* fallthrough */
3911 default:
3912 if (use_vex)
3913 FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0));
3914 else
3915 FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
3916 return emit_byte(compiler, 0);
3917 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3918 case 3:
3919 compiler->mode32 = 1;
3920 if (use_vex)
3921 FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, freg, 0));
3922 else
3923 FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
3924 return emit_byte(compiler, 0x44);
3925 #endif /* SLJIT_CONFIG_X86_64 */
3926 }
3927 }
3928
3929 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3930 sljit_s32 freg, sljit_s32 lane_index,
3931 sljit_s32 srcdst, sljit_sw srcdstw)
3932 {
3933 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3934 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3935 sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
3936 sljit_u8 *inst;
3937 sljit_u8 opcode = 0;
3938 sljit_uw op;
3939 sljit_s32 freg_orig = freg;
3940 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3941 sljit_s32 srcdst_is_ereg = 0;
3942 sljit_s32 srcdst_orig = 0;
3943 sljit_sw srcdstw_orig = 0;
3944 #endif /* SLJIT_CONFIG_X86_32 */
3945
3946 CHECK_ERROR();
3947 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3948
3949 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3950
3951 if (reg_size == 5) {
3952 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
3953 return SLJIT_ERR_UNSUPPORTED;
3954 use_vex = 1;
3955 } else if (reg_size != 4)
3956 return SLJIT_ERR_UNSUPPORTED;
3957
3958 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3959 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
3960 return SLJIT_ERR_UNSUPPORTED;
3961 #else /* SLJIT_CONFIG_X86_32 */
3962 if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3963 return SLJIT_ERR_UNSUPPORTED;
3964 #endif /* SLJIT_CONFIG_X86_32 */
3965
3966 if (type & SLJIT_SIMD_TEST)
3967 return SLJIT_SUCCESS;
3968
3969 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3970 compiler->mode32 = 1;
3971 #else /* !SLJIT_CONFIG_X86_64 */
3972 if (!(type & SLJIT_SIMD_FLOAT)) {
3973 CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1);
3974
3975 if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) {
3976 srcdst_orig = srcdst;
3977 srcdstw_orig = srcdstw;
3978 srcdst = TMP_REG1;
3979 srcdstw = 0;
3980 }
3981 }
3982 #endif /* SLJIT_CONFIG_X86_64 */
3983
3984 if (type & SLJIT_SIMD_LANE_ZERO) {
3985 if (lane_index == 0) {
3986 if (!(type & SLJIT_SIMD_FLOAT)) {
3987 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3988 if (elem_size == 3) {
3989 compiler->mode32 = 0;
3990 elem_size = 2;
3991 }
3992 #endif /* SLJIT_CONFIG_X86_64 */
3993 if (srcdst == SLJIT_IMM) {
3994 if (elem_size == 0)
3995 srcdstw = (sljit_u8)srcdstw;
3996 else if (elem_size == 1)
3997 srcdstw = (sljit_u16)srcdstw;
3998
3999 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
4000 srcdst = TMP_REG1;
4001 srcdstw = 0;
4002 elem_size = 2;
4003 }
4004
4005 if (elem_size == 2) {
4006 if (use_vex)
4007 return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
4008 return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw);
4009 }
4010 } else if (srcdst & SLJIT_MEM) {
4011 SLJIT_ASSERT(elem_size == 2 || elem_size == 3);
4012
4013 if (use_vex)
4014 return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw);
4015 return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw);
4016 } else if (elem_size == 3) {
4017 if (use_vex)
4018 return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0);
4019 return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0);
4020 } else if (use_vex) {
4021 FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));
4022 return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, freg, TMP_FREG, srcdst, 0);
4023 }
4024 }
4025
4026 if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
4027 freg = TMP_FREG;
4028 lane_index -= (1 << (4 - elem_size));
4029 } else if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
4030 if (use_vex)
4031 FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw));
4032 else
4033 FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw));
4034 srcdst = TMP_FREG;
4035 srcdstw = 0;
4036 }
4037
4038 op = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0)
4039 | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2;
4040
4041 if (use_vex)
4042 FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, freg, freg, freg, 0));
4043 else
4044 FAIL_IF(emit_groupf(compiler, op, freg, freg, 0));
4045 } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
4046 FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
4047 FAIL_IF(emit_byte(compiler, 1));
4048
4049 freg = TMP_FREG;
4050 lane_index -= (1 << (4 - elem_size));
4051 }
4052
4053 if (type & SLJIT_SIMD_FLOAT) {
4054 if (elem_size == 3) {
4055 if (srcdst & SLJIT_MEM) {
4056 if (type & SLJIT_SIMD_STORE)
4057 op = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;
4058 else
4059 op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;
4060
4061 /* VEX prefix clears upper bits of the target register. */
4062 if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || freg == TMP_FREG))
4063 FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2
4064 | ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), freg, (type & SLJIT_SIMD_STORE) ? 0 : freg, srcdst, srcdstw));
4065 else
4066 FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw));
4067
4068 /* In case of store, freg is not TMP_FREG. */
4069 } else if (type & SLJIT_SIMD_STORE) {
4070 if (lane_index == 1) {
4071 if (use_vex)
4072 return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0);
4073 return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, freg, 0);
4074 }
4075 if (use_vex)
4076 return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, freg, 0);
4077 return emit_sse2_load(compiler, 0, srcdst, freg, 0);
4078 } else if (use_vex && (reg_size == 4 || freg == TMP_FREG)) {
4079 if (lane_index == 1)
4080 FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0));
4081 else
4082 FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, srcdst, 0));
4083 } else {
4084 if (lane_index == 1)
4085 FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, freg, srcdst, 0));
4086 else
4087 FAIL_IF(emit_sse2_load(compiler, 0, freg, srcdst, 0));
4088 }
4089 } else if (type & SLJIT_SIMD_STORE) {
4090 if (lane_index == 0) {
4091 if (use_vex)
4092 return emit_vex_instruction(compiler, ((srcdst & SLJIT_MEM) ? MOVSD_xm_x : MOVSD_x_xm) | EX86_PREF_F3 | EX86_SSE2
4093 | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV), freg, ((srcdst & SLJIT_MEM) ? 0 : freg), srcdst, srcdstw);
4094 return emit_sse2_store(compiler, 1, srcdst, srcdstw, freg);
4095 }
4096
4097 if (srcdst & SLJIT_MEM) {
4098 if (use_vex)
4099 FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, srcdst, srcdstw));
4100 else
4101 FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
4102 return emit_byte(compiler, U8(lane_index));
4103 }
4104
4105 if (use_vex) {
4106 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0));
4107 return emit_byte(compiler, U8(lane_index));
4108 }
4109
4110 if (srcdst == freg)
4111 op = SHUFPS_x_xm | EX86_SSE2;
4112 else {
4113 switch (lane_index) {
4114 case 1:
4115 op = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2;
4116 break;
4117 case 2:
4118 op = MOVHLPS_x_x | EX86_SSE2;
4119 break;
4120 default:
4121 SLJIT_ASSERT(lane_index == 3);
4122 op = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2;
4123 break;
4124 }
4125 }
4126
4127 FAIL_IF(emit_groupf(compiler, op, srcdst, freg, 0));
4128
4129 op &= 0xff;
4130 if (op == SHUFPS_x_xm || op == PSHUFD_x_xm)
4131 return emit_byte(compiler, U8(lane_index));
4132
4133 return SLJIT_SUCCESS;
4134 } else {
4135 if (lane_index != 0 || (srcdst & SLJIT_MEM)) {
4136 FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
4137 FAIL_IF(emit_byte(compiler, U8(lane_index << 4)));
4138 } else
4139 FAIL_IF(emit_sse2_store(compiler, 1, freg, 0, srcdst));
4140 }
4141
4142 if (freg != TMP_FREG || (type & SLJIT_SIMD_STORE))
4143 return SLJIT_SUCCESS;
4144
4145 SLJIT_ASSERT(reg_size == 5);
4146
4147 if (type & SLJIT_SIMD_LANE_ZERO) {
4148 FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
4149 return emit_byte(compiler, 0x4e);
4150 }
4151
4152 FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
4153 return emit_byte(compiler, 1);
4154 }
4155
4156 if (srcdst == SLJIT_IMM) {
4157 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
4158 srcdst = TMP_REG1;
4159 srcdstw = 0;
4160 }
4161
4162 op = 3;
4163
4164 switch (elem_size) {
4165 case 0:
4166 opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8;
4167 break;
4168 case 1:
4169 if (!(type & SLJIT_SIMD_STORE)) {
4170 op = 2;
4171 opcode = PINSRW_x_rm_i8;
4172 } else
4173 opcode = PEXTRW_rm_x_i8;
4174 break;
4175 case 2:
4176 opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
4177 break;
4178 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4179 case 3:
4180 /* PINSRQ / PEXTRQ */
4181 opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
4182 compiler->mode32 = 0;
4183 break;
4184 #endif /* SLJIT_CONFIG_X86_64 */
4185 }
4186
4187 if (use_vex && (type & SLJIT_SIMD_STORE)) {
4188 op = opcode | ((op == 3) ? VEX_OP_0F3A : 0);
4189 FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, freg, 0, srcdst, srcdstw));
4190 } else {
4191 inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
4192 FAIL_IF(!inst);
4193 inst[0] = GROUP_0F;
4194
4195 if (op == 3) {
4196 inst[1] = 0x3a;
4197 inst[2] = opcode;
4198 } else
4199 inst[1] = opcode;
4200 }
4201
4202 FAIL_IF(emit_byte(compiler, U8(lane_index)));
4203
4204 if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) {
4205 if (freg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) {
4206 SLJIT_ASSERT(reg_size == 5);
4207
4208 if (type & SLJIT_SIMD_LANE_ZERO) {
4209 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
4210 return emit_byte(compiler, 0x4e);
4211 }
4212
4213 FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
4214 return emit_byte(compiler, 1);
4215 }
4216
4217 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4218 if (srcdst_orig & SLJIT_MEM)
4219 return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4220 #endif /* SLJIT_CONFIG_X86_32 */
4221 return SLJIT_SUCCESS;
4222 }
4223
4224 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4225 if (elem_size >= 3)
4226 return SLJIT_SUCCESS;
4227
4228 compiler->mode32 = (type & SLJIT_32);
4229
4230 op = 2;
4231
4232 if (elem_size == 0)
4233 op |= EX86_REX;
4234
4235 if (elem_size == 2) {
4236 if (type & SLJIT_32)
4237 return SLJIT_SUCCESS;
4238
4239 SLJIT_ASSERT(!(compiler->mode32));
4240 op = 1;
4241 }
4242
4243 inst = emit_x86_instruction(compiler, op, srcdst, 0, srcdst, 0);
4244 FAIL_IF(!inst);
4245
4246 if (op != 1) {
4247 inst[0] = GROUP_0F;
4248 inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16);
4249 } else
4250 inst[0] = MOVSXD_r_rm;
4251 #else /* !SLJIT_CONFIG_X86_64 */
4252 if (elem_size >= 2)
4253 return SLJIT_SUCCESS;
4254
4255 FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16,
4256 (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0));
4257
4258 if (srcdst_orig & SLJIT_MEM)
4259 return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4260 #endif /* SLJIT_CONFIG_X86_64 */
4261 return SLJIT_SUCCESS;
4262 }
4263
4264 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4265 sljit_s32 freg,
4266 sljit_s32 src, sljit_s32 src_lane_index)
4267 {
4268 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4269 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4270 sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4271 sljit_uw pref;
4272 sljit_u8 byte;
4273 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4274 sljit_s32 opcode3 = TMP_REG1;
4275 #else /* !SLJIT_CONFIG_X86_32 */
4276 sljit_s32 opcode3 = SLJIT_S0;
4277 #endif /* SLJIT_CONFIG_X86_32 */
4278
4279 CHECK_ERROR();
4280 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4281
4282 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4283 compiler->mode32 = 1;
4284 #endif /* SLJIT_CONFIG_X86_64 */
4285 SLJIT_ASSERT(reg_map[opcode3] == 3);
4286
4287 if (reg_size == 5) {
4288 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4289 return SLJIT_ERR_UNSUPPORTED;
4290 use_vex = 1;
4291 } else if (reg_size != 4)
4292 return SLJIT_ERR_UNSUPPORTED;
4293
4294 if (type & SLJIT_SIMD_FLOAT) {
4295 pref = 0;
4296 byte = U8(src_lane_index);
4297
4298 if (elem_size == 3) {
4299 if (type & SLJIT_SIMD_TEST)
4300 return SLJIT_SUCCESS;
4301
4302 if (reg_size == 5) {
4303 if (src_lane_index == 0)
4304 return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4305
4306 FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4307
4308 byte = U8(byte | (byte << 2));
4309 return emit_byte(compiler, U8(byte | (byte << 4)));
4310 }
4311
4312 if (src_lane_index == 0) {
4313 if (use_vex)
4314 return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, 0, src, 0);
4315 return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, src, 0);
4316 }
4317
4318 /* Changes it to SHUFPD_x_xm. */
4319 pref = EX86_PREF_66;
4320 } else if (elem_size != 2)
4321 return SLJIT_ERR_UNSUPPORTED;
4322 else if (type & SLJIT_SIMD_TEST)
4323 return SLJIT_SUCCESS;
4324
4325 if (reg_size == 5) {
4326 SLJIT_ASSERT(elem_size == 2);
4327
4328 if (src_lane_index == 0)
4329 return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4330
4331 FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4332
4333 byte = 0x44;
4334 if (src_lane_index >= 4) {
4335 byte = 0xee;
4336 src_lane_index -= 4;
4337 }
4338
4339 FAIL_IF(emit_byte(compiler, byte));
4340 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0));
4341 byte = U8(src_lane_index);
4342 } else if (use_vex) {
4343 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0));
4344 } else {
4345 if (freg != src)
4346 FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, freg, src, 0));
4347
4348 FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, freg, freg, 0));
4349 }
4350
4351 if (elem_size == 2) {
4352 byte = U8(byte | (byte << 2));
4353 byte = U8(byte | (byte << 4));
4354 } else
4355 byte = U8(byte | (byte << 1));
4356
4357 return emit_byte(compiler, U8(byte));
4358 }
4359
4360 if (type & SLJIT_SIMD_TEST)
4361 return SLJIT_SUCCESS;
4362
4363 if (elem_size == 0) {
4364 if (reg_size == 5 && src_lane_index >= 16) {
4365 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4366 FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa));
4367 src_lane_index &= 0x7;
4368 src = freg;
4369 }
4370
4371 if (src_lane_index != 0 || (freg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) {
4372 pref = 0;
4373
4374 if ((src_lane_index & 0x3) == 0) {
4375 pref = EX86_PREF_66;
4376 byte = U8(src_lane_index >> 2);
4377 } else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) {
4378 pref = EX86_PREF_F2;
4379 byte = U8(src_lane_index >> 1);
4380 } else {
4381 if (!use_vex) {
4382 if (freg != src)
4383 FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0));
4384
4385 FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, freg, 0));
4386 } else
4387 FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, freg, src, 0));
4388
4389 FAIL_IF(emit_byte(compiler, U8(src_lane_index)));
4390 }
4391
4392 if (pref != 0) {
4393 if (use_vex)
4394 FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0));
4395 else
4396 FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0));
4397 FAIL_IF(emit_byte(compiler, byte));
4398 }
4399
4400 src = freg;
4401 }
4402
4403 if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2))
4404 return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4405
4406 SLJIT_ASSERT(reg_size == 4);
4407 FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
4408 return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
4409 }
4410
4411 if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) {
4412 switch (elem_size) {
4413 case 1:
4414 pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4415 break;
4416 case 2:
4417 pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4418 break;
4419 default:
4420 pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4421 break;
4422 }
4423
4424 if (reg_size == 5)
4425 pref |= VEX_256;
4426
4427 return emit_vex_instruction(compiler, pref, freg, 0, src, 0);
4428 }
4429
4430 if (reg_size == 5) {
4431 switch (elem_size) {
4432 case 1:
4433 byte = U8(src_lane_index & 0x3);
4434 src_lane_index >>= 2;
4435 pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2;
4436 break;
4437 case 2:
4438 byte = U8(src_lane_index & 0x3);
4439 src_lane_index >>= 1;
4440 pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2;
4441 break;
4442 case 3:
4443 pref = 0;
4444 break;
4445 default:
4446 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4447 return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee));
4448 }
4449
4450 if (pref != 0) {
4451 FAIL_IF(emit_vex_instruction(compiler, pref, freg, 0, src, 0));
4452 byte = U8(byte | (byte << 2));
4453 FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4454
4455 if (src_lane_index == 0)
4456 return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
4457
4458 src = freg;
4459 }
4460
4461 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4462 byte = U8(src_lane_index);
4463 byte = U8(byte | (byte << 2));
4464 return emit_byte(compiler, U8(byte | (byte << 4)));
4465 }
4466
4467 switch (elem_size) {
4468 case 1:
4469 byte = U8(src_lane_index & 0x3);
4470 src_lane_index >>= 1;
4471 pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3;
4472
4473 if (use_vex)
4474 FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, 0, src, 0));
4475 else
4476 FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0));
4477 byte = U8(byte | (byte << 2));
4478 FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4479
4480 if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2)
4481 return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
4482
4483 src = freg;
4484 /* fallthrough */
4485 case 2:
4486 byte = U8(src_lane_index);
4487 byte = U8(byte | (byte << 2));
4488 break;
4489 default:
4490 byte = U8(src_lane_index << 1);
4491 byte = U8(byte | (byte << 2) | 0x4);
4492 break;
4493 }
4494
4495 if (use_vex)
4496 FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, 0, src, 0));
4497 else
4498 FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0));
4499 return emit_byte(compiler, U8(byte | (byte << 4)));
4500 }
4501
4502 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4503 sljit_s32 freg,
4504 sljit_s32 src, sljit_sw srcw)
4505 {
4506 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4507 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4508 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4509 sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4510 sljit_u8 opcode;
4511
4512 CHECK_ERROR();
4513 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4514
4515 ADJUST_LOCAL_OFFSET(src, srcw);
4516
4517 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4518 compiler->mode32 = 1;
4519 #endif /* SLJIT_CONFIG_X86_64 */
4520
4521 if (reg_size == 5) {
4522 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4523 return SLJIT_ERR_UNSUPPORTED;
4524 use_vex = 1;
4525 } else if (reg_size != 4)
4526 return SLJIT_ERR_UNSUPPORTED;
4527
4528 if (type & SLJIT_SIMD_FLOAT) {
4529 if (elem_size != 2 || elem2_size != 3)
4530 return SLJIT_ERR_UNSUPPORTED;
4531
4532 if (type & SLJIT_SIMD_TEST)
4533 return SLJIT_SUCCESS;
4534
4535 if (use_vex)
4536 return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, freg, 0, src, srcw);
4537 return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, freg, src, srcw);
4538 }
4539
4540 switch (elem_size) {
4541 case 0:
4542 if (elem2_size == 1)
4543 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm;
4544 else if (elem2_size == 2)
4545 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm;
4546 else if (elem2_size == 3)
4547 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm;
4548 else
4549 return SLJIT_ERR_UNSUPPORTED;
4550 break;
4551 case 1:
4552 if (elem2_size == 2)
4553 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm;
4554 else if (elem2_size == 3)
4555 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm;
4556 else
4557 return SLJIT_ERR_UNSUPPORTED;
4558 break;
4559 case 2:
4560 if (elem2_size == 3)
4561 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm;
4562 else
4563 return SLJIT_ERR_UNSUPPORTED;
4564 break;
4565 default:
4566 return SLJIT_ERR_UNSUPPORTED;
4567 }
4568
4569 if (type & SLJIT_SIMD_TEST)
4570 return SLJIT_SUCCESS;
4571
4572 if (use_vex)
4573 return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw);
4574 return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw);
4575 }
4576
4577 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4578 sljit_s32 freg,
4579 sljit_s32 dst, sljit_sw dstw)
4580 {
4581 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4582 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4583 sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);
4584 sljit_s32 dst_r;
4585 sljit_uw op;
4586 sljit_u8 *inst;
4587
4588 CHECK_ERROR();
4589 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4590
4591 ADJUST_LOCAL_OFFSET(dst, dstw);
4592
4593 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4594 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4595 compiler->mode32 = 1;
4596 #endif /* SLJIT_CONFIG_X86_64 */
4597
4598 if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
4599 return SLJIT_ERR_UNSUPPORTED;
4600
4601 if (reg_size == 4) {
4602 if (type & SLJIT_SIMD_TEST)
4603 return SLJIT_SUCCESS;
4604
4605 op = EX86_PREF_66 | EX86_SSE2_OP2;
4606
4607 switch (elem_size) {
4608 case 1:
4609 if (use_vex)
4610 FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, freg, 0));
4611 else
4612 FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0));
4613 freg = TMP_FREG;
4614 break;
4615 case 2:
4616 op = EX86_SSE2_OP2;
4617 break;
4618 }
4619
4620 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4621 op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x;
4622
4623 if (use_vex)
4624 FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0));
4625 else
4626 FAIL_IF(emit_groupf(compiler, op, dst_r, freg, 0));
4627
4628 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4629 compiler->mode32 = type & SLJIT_32;
4630 #endif /* SLJIT_CONFIG_X86_64 */
4631
4632 if (elem_size == 1) {
4633 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0);
4634 FAIL_IF(!inst);
4635 inst[1] |= SHR;
4636 }
4637
4638 if (dst_r == TMP_REG1)
4639 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4640
4641 return SLJIT_SUCCESS;
4642 }
4643
4644 if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))
4645 return SLJIT_ERR_UNSUPPORTED;
4646
4647 if (type & SLJIT_SIMD_TEST)
4648 return SLJIT_SUCCESS;
4649
4650 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4651
4652 if (elem_size == 1) {
4653 FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
4654 FAIL_IF(emit_byte(compiler, 1));
4655 FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, TMP_FREG, 0));
4656 FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0));
4657 } else {
4658 op = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2;
4659
4660 if (elem_size == 0)
4661 op = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2;
4662 else if (elem_size == 3)
4663 op |= EX86_PREF_66;
4664
4665 FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, freg, 0));
4666 }
4667
4668 if (dst_r == TMP_REG1) {
4669 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4670 compiler->mode32 = type & SLJIT_32;
4671 #endif /* SLJIT_CONFIG_X86_64 */
4672 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4673 }
4674
4675 return SLJIT_SUCCESS;
4676 }
4677
4678 static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
4679 sljit_s32 dst_freg, sljit_s32 src_freg)
4680 {
4681 sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2;
4682
4683 SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4);
4684
4685 if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3)
4686 op |= EX86_PREF_66;
4687
4688 return emit_groupf(compiler, op, dst_freg, src_freg, 0);
4689 }
4690
4691 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4692 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4693 {
4694 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4695 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4696 sljit_uw op = 0;
4697
4698 CHECK_ERROR();
4699 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4700
4701 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4702 compiler->mode32 = 1;
4703 #endif /* SLJIT_CONFIG_X86_64 */
4704
4705 if (reg_size == 5) {
4706 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4707 return SLJIT_ERR_UNSUPPORTED;
4708 } else if (reg_size != 4)
4709 return SLJIT_ERR_UNSUPPORTED;
4710
4711 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4712 return SLJIT_ERR_UNSUPPORTED;
4713
4714 switch (SLJIT_SIMD_GET_OPCODE(type)) {
4715 case SLJIT_SIMD_OP2_AND:
4716 op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm;
4717
4718 if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4719 op |= EX86_PREF_66;
4720 break;
4721 case SLJIT_SIMD_OP2_OR:
4722 op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm;
4723
4724 if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4725 op |= EX86_PREF_66;
4726 break;
4727 case SLJIT_SIMD_OP2_XOR:
4728 op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm;
4729
4730 if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4731 op |= EX86_PREF_66;
4732 break;
4733 }
4734
4735 if (type & SLJIT_SIMD_TEST)
4736 return SLJIT_SUCCESS;
4737
4738 if (reg_size == 5 || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX))) {
4739 if (reg_size == 5)
4740 op |= VEX_256;
4741
4742 return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_freg, src1_freg, src2_freg, 0);
4743 }
4744
4745 if (dst_freg != src1_freg) {
4746 if (dst_freg == src2_freg)
4747 src2_freg = src1_freg;
4748 else
4749 FAIL_IF(emit_simd_mov(compiler, type, dst_freg, src1_freg));
4750 }
4751
4752 FAIL_IF(emit_groupf(compiler, op | EX86_SSE2, dst_freg, src2_freg, 0));
4753 return SLJIT_SUCCESS;
4754 }
4755
4756 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4757 sljit_s32 dst_reg,
4758 sljit_s32 mem_reg)
4759 {
4760 CHECK_ERROR();
4761 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4762
4763 SLJIT_SKIP_CHECKS(compiler);
4764 return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4765 }
4766
4767 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4768 sljit_s32 src_reg,
4769 sljit_s32 mem_reg,
4770 sljit_s32 temp_reg)
4771 {
4772 sljit_uw pref;
4773 sljit_s32 free_reg = TMP_REG1;
4774 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4775 sljit_sw srcw = 0;
4776 sljit_sw tempw = 0;
4777 #endif /* SLJIT_CONFIG_X86_32 */
4778
4779 CHECK_ERROR();
4780 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4781 CHECK_EXTRA_REGS(src_reg, srcw, (void)0);
4782 CHECK_EXTRA_REGS(temp_reg, tempw, (void)0);
4783
4784 SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP));
4785 SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP));
4786
4787 op = GET_OPCODE(op);
4788 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4789 if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) {
4790 /* Src is virtual register or its low byte is not accessible. */
4791 SLJIT_ASSERT(src_reg != SLJIT_R1);
4792 free_reg = src_reg;
4793
4794 EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw);
4795 src_reg = TMP_REG1;
4796
4797 if (mem_reg == src_reg)
4798 mem_reg = TMP_REG1;
4799 }
4800 #endif /* SLJIT_CONFIG_X86_32 */
4801
4802 if (temp_reg != SLJIT_R0) {
4803 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4804 compiler->mode32 = 0;
4805
4806 EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0);
4807 EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0);
4808
4809 if (src_reg == SLJIT_R0)
4810 src_reg = free_reg;
4811 if (mem_reg == SLJIT_R0)
4812 mem_reg = free_reg;
4813 #else /* !SLJIT_CONFIG_X86_64 */
4814 if (src_reg == TMP_REG1 && mem_reg == SLJIT_R0 && (free_reg & SLJIT_MEM)) {
4815 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0);
4816 EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0);
4817 EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
4818
4819 mem_reg = SLJIT_R1;
4820 free_reg = SLJIT_R1;
4821 } else {
4822 EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0);
4823 EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
4824
4825 if (src_reg == SLJIT_R0)
4826 src_reg = free_reg;
4827 if (mem_reg == SLJIT_R0)
4828 mem_reg = free_reg;
4829 }
4830 #endif /* SLJIT_CONFIG_X86_64 */
4831 }
4832
4833 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4834 compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P;
4835 #endif /* SLJIT_CONFIG_X86_64 */
4836
4837 /* Lock prefix. */
4838 FAIL_IF(emit_byte(compiler, GROUP_LOCK));
4839
4840 pref = 0;
4841 if (op == SLJIT_MOV_U16)
4842 pref = EX86_HALF_ARG | EX86_PREF_66;
4843 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4844 if (op == SLJIT_MOV_U8)
4845 pref = EX86_REX;
4846 #endif /* SLJIT_CONFIG_X86_64 */
4847
4848 FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0));
4849
4850 if (temp_reg != SLJIT_R0) {
4851 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4852 compiler->mode32 = 0;
4853 return emit_mov(compiler, SLJIT_R0, 0, TMP_REG1, 0);
4854 #else /* !SLJIT_CONFIG_X86_64 */
4855 EMIT_MOV(compiler, SLJIT_R0, 0, free_reg, 0);
4856 if (free_reg != TMP_REG1)
4857 return emit_mov(compiler, free_reg, 0, (free_reg == SLJIT_R1) ? SLJIT_MEM1(SLJIT_SP) : TMP_REG1, 0);
4858 #endif /* SLJIT_CONFIG_X86_64 */
4859 }
4860 return SLJIT_SUCCESS;
4861 }
4862
4863 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
4864 {
4865 CHECK_ERROR();
4866 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
4867 ADJUST_LOCAL_OFFSET(dst, dstw);
4868
4869 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4870
4871 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4872 compiler->mode32 = 0;
4873 #endif
4874
4875 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
4876
4877 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4878 if (NOT_HALFWORD(offset)) {
4879 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
4880 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
4881 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
4882 return compiler->error;
4883 #else
4884 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
4885 #endif
4886 }
4887 #endif
4888
4889 if (offset != 0)
4890 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
4891 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
4892 }
4893
4894 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4895 {
4896 sljit_u8 *inst;
4897 struct sljit_const *const_;
4898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4899 sljit_s32 reg;
4900 #endif
4901
4902 CHECK_ERROR_PTR();
4903 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4904 ADJUST_LOCAL_OFFSET(dst, dstw);
4905
4906 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4907
4908 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4909 PTR_FAIL_IF(!const_);
4910 set_const(const_, compiler);
4911
4912 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4913 compiler->mode32 = 0;
4914 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
4915
4916 if (emit_load_imm64(compiler, reg, init_value))
4917 return NULL;
4918 #else
4919 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
4920 return NULL;
4921 #endif
4922
4923 inst = (sljit_u8*)ensure_buf(compiler, 1);
4924 PTR_FAIL_IF(!inst);
4925
4926 inst[0] = SLJIT_INST_CONST;
4927
4928 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4929 if (dst & SLJIT_MEM)
4930 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
4931 return NULL;
4932 #endif
4933
4934 return const_;
4935 }
4936
4937 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4938 {
4939 struct sljit_jump *jump;
4940 sljit_u8 *inst;
4941 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4942 sljit_s32 reg;
4943 #endif /* SLJIT_CONFIG_X86_64 */
4944
4945 CHECK_ERROR_PTR();
4946 CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4947 ADJUST_LOCAL_OFFSET(dst, dstw);
4948
4949 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4950
4951 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4952 PTR_FAIL_IF(!jump);
4953 set_mov_addr(jump, compiler, 0);
4954
4955 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4956 compiler->mode32 = 0;
4957 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
4958
4959 PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0));
4960 jump->addr = compiler->size;
4961
4962 if (reg_map[reg] >= 8)
4963 jump->flags |= MOV_ADDR_HI;
4964 #else /* !SLJIT_CONFIG_X86_64 */
4965 PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0));
4966 #endif /* SLJIT_CONFIG_X86_64 */
4967
4968 inst = (sljit_u8*)ensure_buf(compiler, 1);
4969 PTR_FAIL_IF(!inst);
4970
4971 inst[0] = SLJIT_INST_MOV_ADDR;
4972
4973 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4974 if (dst & SLJIT_MEM)
4975 PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0));
4976 #endif /* SLJIT_CONFIG_X86_64 */
4977
4978 return jump;
4979 }
4980
4981 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4982 {
4983 SLJIT_UNUSED_ARG(executable_offset);
4984
4985 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);
4986 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4987 sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));
4988 #else
4989 sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);
4990 #endif
4991 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);
4992 }
4993
4994 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4995 {
4996 SLJIT_UNUSED_ARG(executable_offset);
4997
4998 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0);
4999 sljit_unaligned_store_sw((void*)addr, new_constant);
5000 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1);
5001 }
5002