xref: /aosp_15_r20/external/swiftshader/third_party/subzero/unittest/AssemblerX8632/XmmArith.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 //===- subzero/unittest/AssemblerX8632/XmmArith.cpp -----------------------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "AssemblerX8632/TestUtil.h"
10 
11 namespace Ice {
12 namespace X8632 {
13 namespace Test {
14 namespace {
15 
TEST_F(AssemblerX8632Test,ArithSS)16 TEST_F(AssemblerX8632Test, ArithSS) {
17 #define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op)       \
18   do {                                                                         \
19     static_assert(FloatSize == 32 || FloatSize == 64,                          \
20                   "Invalid fp size " #FloatSize);                              \
21     static constexpr char TestString[] =                                       \
22         "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1           \
23         ", " #Inst ", " #Op ")";                                               \
24     static constexpr bool IsDouble = FloatSize == 64;                          \
25     using Type = std::conditional<IsDouble, double, float>::type;              \
26     const uint32_t T0 = allocateQword();                                       \
27     const Type V0 = Value0;                                                    \
28     const uint32_t T1 = allocateQword();                                       \
29     const Type V1 = Value1;                                                    \
30                                                                                \
31     __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
32              dwordAddress(T0));                                                \
33     __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Src,             \
34              dwordAddress(T1));                                                \
35     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
36             XmmRegister::Encoded_Reg_##Src);                                   \
37                                                                                \
38     AssembledTest test = assemble();                                           \
39     if (IsDouble) {                                                            \
40       test.setQwordTo(T0, static_cast<double>(V0));                            \
41       test.setQwordTo(T1, static_cast<double>(V1));                            \
42     } else {                                                                   \
43       test.setDwordTo(T0, static_cast<float>(V0));                             \
44       test.setDwordTo(T1, static_cast<float>(V1));                             \
45     }                                                                          \
46                                                                                \
47     test.run();                                                                \
48                                                                                \
49     ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
50     reset();                                                                   \
51   } while (0)
52 
53 #define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op)           \
54   do {                                                                         \
55     static_assert(FloatSize == 32 || FloatSize == 64,                          \
56                   "Invalid fp size " #FloatSize);                              \
57     static constexpr char TestString[] =                                       \
58         "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst    \
59         ", " #Op ")";                                                          \
60     static constexpr bool IsDouble = FloatSize == 64;                          \
61     using Type = std::conditional<IsDouble, double, float>::type;              \
62     const uint32_t T0 = allocateQword();                                       \
63     const Type V0 = Value0;                                                    \
64     const uint32_t T1 = allocateQword();                                       \
65     const Type V1 = Value1;                                                    \
66                                                                                \
67     __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
68              dwordAddress(T0));                                                \
69     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
70             dwordAddress(T1));                                                 \
71                                                                                \
72     AssembledTest test = assemble();                                           \
73     if (IsDouble) {                                                            \
74       test.setQwordTo(T0, static_cast<double>(V0));                            \
75       test.setQwordTo(T1, static_cast<double>(V1));                            \
76     } else {                                                                   \
77       test.setDwordTo(T0, static_cast<float>(V0));                             \
78       test.setDwordTo(T1, static_cast<float>(V1));                             \
79     }                                                                          \
80                                                                                \
81     test.run();                                                                \
82                                                                                \
83     ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
84     reset();                                                                   \
85   } while (0)
86 
87 #define TestArithSS(FloatSize, Src, Dst0, Dst1)                                \
88   do {                                                                         \
89     TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +);              \
90     TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +);                  \
91     TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -);              \
92     TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -);                  \
93     TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *);              \
94     TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *);                  \
95     TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, /);              \
96     TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, /);                  \
97   } while (0)
98 
99   TestArithSS(32, xmm0, xmm1, xmm2);
100   TestArithSS(32, xmm1, xmm2, xmm3);
101   TestArithSS(32, xmm2, xmm3, xmm4);
102   TestArithSS(32, xmm3, xmm4, xmm5);
103   TestArithSS(32, xmm4, xmm5, xmm6);
104   TestArithSS(32, xmm5, xmm6, xmm7);
105   TestArithSS(32, xmm6, xmm7, xmm0);
106   TestArithSS(32, xmm7, xmm0, xmm1);
107 
108   TestArithSS(64, xmm0, xmm1, xmm2);
109   TestArithSS(64, xmm1, xmm2, xmm3);
110   TestArithSS(64, xmm2, xmm3, xmm4);
111   TestArithSS(64, xmm3, xmm4, xmm5);
112   TestArithSS(64, xmm4, xmm5, xmm6);
113   TestArithSS(64, xmm5, xmm6, xmm7);
114   TestArithSS(64, xmm6, xmm7, xmm0);
115   TestArithSS(64, xmm7, xmm0, xmm1);
116 
117 #undef TestArithSS
118 #undef TestArithSSXmmAddr
119 #undef TestArithSSXmmXmm
120 }
121 
TEST_F(AssemblerX8632Test,PArith)122 TEST_F(AssemblerX8632Test, PArith) {
123 #define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size)       \
124   do {                                                                         \
125     static constexpr char TestString[] =                                       \
126         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
127         ", " #Type ", " #Size ")";                                             \
128     const uint32_t T0 = allocateDqword();                                      \
129     const Dqword V0 Value0;                                                    \
130                                                                                \
131     const uint32_t T1 = allocateDqword();                                      \
132     const Dqword V1 Value1;                                                    \
133                                                                                \
134     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
135     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
136     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
137             XmmRegister::Encoded_Reg_##Src);                                   \
138                                                                                \
139     AssembledTest test = assemble();                                           \
140     test.setDqwordTo(T0, V0);                                                  \
141     test.setDqwordTo(T1, V1);                                                  \
142     test.run();                                                                \
143                                                                                \
144     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
145         << TestString;                                                         \
146     reset();                                                                   \
147   } while (0)
148 
149 #define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size)           \
150   do {                                                                         \
151     static constexpr char TestString[] =                                       \
152         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
153         ", " #Type ", " #Size ")";                                             \
154     const uint32_t T0 = allocateDqword();                                      \
155     const Dqword V0 Value0;                                                    \
156                                                                                \
157     const uint32_t T1 = allocateDqword();                                      \
158     const Dqword V1 Value1;                                                    \
159                                                                                \
160     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
161     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
162             dwordAddress(T1));                                                 \
163                                                                                \
164     AssembledTest test = assemble();                                           \
165     test.setDqwordTo(T0, V0);                                                  \
166     test.setDqwordTo(T1, V1);                                                  \
167     test.run();                                                                \
168                                                                                \
169     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
170         << TestString;                                                         \
171     reset();                                                                   \
172   } while (0)
173 
174 #define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size)               \
175   do {                                                                         \
176     static constexpr char TestString[] =                                       \
177         "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type         \
178         ", " #Size ")";                                                        \
179     const uint32_t T0 = allocateDqword();                                      \
180     const Dqword V0 Value0;                                                    \
181                                                                                \
182     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
183     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, Immediate(Imm));  \
184                                                                                \
185     AssembledTest test = assemble();                                           \
186     test.setDqwordTo(T0, V0);                                                  \
187     test.run();                                                                \
188                                                                                \
189     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>())         \
190         << TestString;                                                         \
191     reset();                                                                   \
192   } while (0)
193 
194 #define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size)                  \
195   do {                                                                         \
196     static constexpr char TestString[] =                                       \
197         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type         \
198         ", " #Size ")";                                                        \
199     const uint32_t T0 = allocateDqword();                                      \
200     const Dqword V0 Value0;                                                    \
201                                                                                \
202     const uint32_t T1 = allocateDqword();                                      \
203     const Dqword V1 Value1;                                                    \
204                                                                                \
205     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
206     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
207     __ pandn(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
208              XmmRegister::Encoded_Reg_##Src);                                  \
209                                                                                \
210     AssembledTest test = assemble();                                           \
211     test.setDqwordTo(T0, V0);                                                  \
212     test.setDqwordTo(T1, V1);                                                  \
213     test.run();                                                                \
214                                                                                \
215     ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
216         << TestString;                                                         \
217     reset();                                                                   \
218   } while (0)
219 
220 #define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size)                      \
221   do {                                                                         \
222     static constexpr char TestString[] =                                       \
223         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size  \
224         ")";                                                                   \
225     const uint32_t T0 = allocateDqword();                                      \
226     const Dqword V0 Value0;                                                    \
227                                                                                \
228     const uint32_t T1 = allocateDqword();                                      \
229     const Dqword V1 Value1;                                                    \
230                                                                                \
231     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
232     __ pandn(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
233              dwordAddress(T1));                                                \
234                                                                                \
235     AssembledTest test = assemble();                                           \
236     test.setDqwordTo(T0, V0);                                                  \
237     test.setDqwordTo(T1, V1);                                                  \
238     test.run();                                                                \
239                                                                                \
240     ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
241         << TestString;                                                         \
242     reset();                                                                   \
243   } while (0)
244 
245 #define TestPArithSize(Dst, Src, Size)                                         \
246   do {                                                                         \
247     static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size.");     \
248     if (Size != 8) {                                                           \
249       TestPArithXmmXmm(                                                        \
250           Dst,                                                                 \
251           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
252           Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);             \
253       TestPArithXmmAddr(                                                       \
254           Dst,                                                                 \
255           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
256           (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);                  \
257       TestPArithXmmImm(                                                        \
258           Dst,                                                                 \
259           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
260           3u, psra, >>, int, Size);                                            \
261       TestPArithXmmXmm(                                                        \
262           Dst,                                                                 \
263           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
264           Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);            \
265       TestPArithXmmAddr(                                                       \
266           Dst,                                                                 \
267           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
268           (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);                 \
269       TestPArithXmmImm(                                                        \
270           Dst,                                                                 \
271           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
272           3u, psrl, >>, uint, Size);                                           \
273       TestPArithXmmXmm(                                                        \
274           Dst,                                                                 \
275           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
276           Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);            \
277       TestPArithXmmAddr(                                                       \
278           Dst,                                                                 \
279           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
280           (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);                 \
281       TestPArithXmmImm(                                                        \
282           Dst,                                                                 \
283           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
284           3u, psll, <<, uint, Size);                                           \
285                                                                                \
286       TestPArithXmmXmm(                                                        \
287           Dst,                                                                 \
288           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
289           Src,                                                                 \
290           (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),   \
291           pmull, *, int, Size);                                                \
292       TestPArithXmmAddr(                                                       \
293           Dst,                                                                 \
294           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
295           (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),   \
296           pmull, *, int, Size);                                                \
297       if (Size != 16) {                                                        \
298         TestPArithXmmXmm(                                                      \
299             Dst,                                                               \
300             (uint64_t(0x8040201008040201ull),                                  \
301              uint64_t(0x8080404002020101ull)),                                 \
302             Src,                                                               \
303             (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
304             pmuludq, *, uint, Size);                                           \
305         TestPArithXmmAddr(                                                     \
306             Dst,                                                               \
307             (uint64_t(0x8040201008040201ull),                                  \
308              uint64_t(0x8080404002020101ull)),                                 \
309             (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
310             pmuludq, *, uint, Size);                                           \
311       }                                                                        \
312     }                                                                          \
313     TestPArithXmmXmm(                                                          \
314         Dst,                                                                   \
315         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
316         Src,                                                                   \
317         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
318         padd, +, int, Size);                                                   \
319     TestPArithXmmAddr(                                                         \
320         Dst,                                                                   \
321         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
322         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
323         padd, +, int, Size);                                                   \
324     TestPArithXmmXmm(                                                          \
325         Dst,                                                                   \
326         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
327         Src,                                                                   \
328         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
329         psub, -, int, Size);                                                   \
330     TestPArithXmmAddr(                                                         \
331         Dst,                                                                   \
332         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
333         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
334         psub, -, int, Size);                                                   \
335     TestPArithXmmXmm(                                                          \
336         Dst,                                                                   \
337         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
338         Src,                                                                   \
339         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
340         pand, &, int, Size);                                                   \
341     TestPArithXmmAddr(                                                         \
342         Dst,                                                                   \
343         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
344         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
345         pand, &, int, Size);                                                   \
346                                                                                \
347     TestPAndnXmmXmm(                                                           \
348         Dst,                                                                   \
349         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
350         Src,                                                                   \
351         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
352         int, Size);                                                            \
353     TestPAndnXmmAddr(                                                          \
354         Dst,                                                                   \
355         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
356         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
357         int, Size);                                                            \
358                                                                                \
359     TestPArithXmmXmm(                                                          \
360         Dst,                                                                   \
361         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
362         Src,                                                                   \
363         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
364         por, |, int, Size);                                                    \
365     TestPArithXmmAddr(                                                         \
366         Dst,                                                                   \
367         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
368         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
369         por, |, int, Size);                                                    \
370     TestPArithXmmXmm(                                                          \
371         Dst,                                                                   \
372         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
373         Src,                                                                   \
374         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
375         pxor, ^, int, Size);                                                   \
376     TestPArithXmmAddr(                                                         \
377         Dst,                                                                   \
378         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
379         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
380         pxor, ^, int, Size);                                                   \
381   } while (0)
382 
383 #define TestPArith(Src, Dst)                                                   \
384   do {                                                                         \
385     TestPArithSize(Src, Dst, 8);                                               \
386     TestPArithSize(Src, Dst, 16);                                              \
387     TestPArithSize(Src, Dst, 32);                                              \
388   } while (0)
389 
390   TestPArith(xmm0, xmm1);
391   TestPArith(xmm1, xmm2);
392   TestPArith(xmm2, xmm3);
393   TestPArith(xmm3, xmm4);
394   TestPArith(xmm4, xmm5);
395   TestPArith(xmm5, xmm6);
396   TestPArith(xmm6, xmm7);
397   TestPArith(xmm7, xmm0);
398 
399 #undef TestPArith
400 #undef TestPArithSize
401 #undef TestPAndnXmmAddr
402 #undef TestPAndnXmmXmm
403 #undef TestPArithXmmImm
404 #undef TestPArithXmmAddr
405 #undef TestPArithXmmXmm
406 }
407 
TEST_F(AssemblerX8632Test,ArithPS)408 TEST_F(AssemblerX8632Test, ArithPS) {
409 #define TestArithPSXmmXmm(FloatSize, Dst, Value0, Src, Value1, Inst, Op, Type) \
410   do {                                                                         \
411     static constexpr char TestString[] =                                       \
412         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
413         ", " #Type ")";                                                        \
414     const uint32_t T0 = allocateDqword();                                      \
415     const Dqword V0 Value0;                                                    \
416     const uint32_t T1 = allocateDqword();                                      \
417     const Dqword V1 Value1;                                                    \
418                                                                                \
419     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
420     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
421     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
422             XmmRegister::Encoded_Reg_##Src);                                   \
423                                                                                \
424     AssembledTest test = assemble();                                           \
425     test.setDqwordTo(T0, V0);                                                  \
426     test.setDqwordTo(T1, V1);                                                  \
427     test.run();                                                                \
428                                                                                \
429     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
430                                                                                \
431     reset();                                                                   \
432   } while (0)
433 
434 #define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type)     \
435   do {                                                                         \
436     static constexpr char TestString[] =                                       \
437         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
438         ", " #Type ")";                                                        \
439     const uint32_t T0 = allocateDqword();                                      \
440     const Dqword V0 Value0;                                                    \
441     const uint32_t T1 = allocateDqword();                                      \
442     const Dqword V1 Value1;                                                    \
443                                                                                \
444     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
445     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
446     __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src);   \
447                                                                                \
448     AssembledTest test = assemble();                                           \
449     test.setDqwordTo(T0, V0);                                                  \
450     test.setDqwordTo(T1, V1);                                                  \
451     test.run();                                                                \
452                                                                                \
453     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
454                                                                                \
455     reset();                                                                   \
456   } while (0)
457 
458 #define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type)         \
459   do {                                                                         \
460     static constexpr char TestString[] =                                       \
461         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
462         ", " #Type ")";                                                        \
463     const uint32_t T0 = allocateDqword();                                      \
464     const Dqword V0 Value0;                                                    \
465     const uint32_t T1 = allocateDqword();                                      \
466     const Dqword V1 Value1;                                                    \
467                                                                                \
468     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
469     __ Inst(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));                 \
470                                                                                \
471     AssembledTest test = assemble();                                           \
472     test.setDqwordTo(T0, V0);                                                  \
473     test.setDqwordTo(T1, V1);                                                  \
474     test.run();                                                                \
475                                                                                \
476     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
477                                                                                \
478     reset();                                                                   \
479   } while (0)
480 
481 #define TestMinMaxPS(FloatSize, Dst, Value0, Src, Value1, Inst, Type)          \
482   do {                                                                         \
483     static constexpr char TestString[] =                                       \
484         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type     \
485         ")";                                                                   \
486     const uint32_t T0 = allocateDqword();                                      \
487     const Dqword V0 Value0;                                                    \
488     const uint32_t T1 = allocateDqword();                                      \
489     const Dqword V1 Value1;                                                    \
490                                                                                \
491     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
492     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
493     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
494             XmmRegister::Encoded_Reg_##Src);                                   \
495                                                                                \
496     AssembledTest test = assemble();                                           \
497     test.setDqwordTo(T0, V0);                                                  \
498     test.setDqwordTo(T1, V1);                                                  \
499     test.run();                                                                \
500                                                                                \
501     ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString;  \
502                                                                                \
503     reset();                                                                   \
504   } while (0)
505 
506 #define TestArithPSXmmAddr(FloatSize, Dst, Value0, Value1, Inst, Op, Type)     \
507   do {                                                                         \
508     static constexpr char TestString[] =                                       \
509         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
510         ", " #Type ")";                                                        \
511     const uint32_t T0 = allocateDqword();                                      \
512     const Dqword V0 Value0;                                                    \
513     const uint32_t T1 = allocateDqword();                                      \
514     const Dqword V1 Value1;                                                    \
515                                                                                \
516     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
517     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
518             dwordAddress(T1));                                                 \
519                                                                                \
520     AssembledTest test = assemble();                                           \
521     test.setDqwordTo(T0, V0);                                                  \
522     test.setDqwordTo(T1, V1);                                                  \
523     test.run();                                                                \
524                                                                                \
525     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
526                                                                                \
527     reset();                                                                   \
528   } while (0)
529 
530 #define TestArithPS(Dst, Src)                                                  \
531   do {                                                                         \
532     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
533                       (0.55, 0.43, 0.23, 1.21), addps, +, float);              \
534     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
535                        (0.55, 0.43, 0.23, 1.21), addps, +, float);             \
536     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
537                       (0.55, 0.43, 0.23, 1.21), subps, -, float);              \
538     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
539                        (0.55, 0.43, 0.23, 1.21), subps, -, float);             \
540     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
541                       (0.55, 0.43, 0.23, 1.21), mulps, *, float);              \
542     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
543                        (0.55, 0.43, 0.23, 1.21), mulps, *, float);             \
544     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
545                       (0.55, 0.43, 0.23, 1.21), divps, /, float);              \
546     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
547                        (0.55, 0.43, 0.23, 1.21), divps, /, float);             \
548     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
549                       (0.55, 0.43, 0.23, 1.21), andps, &, float);              \
550     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
551                        (0.55, 0.43, 0.23, 1.21), andps, &, float);             \
552     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), andps, &,    \
553                       double);                                                 \
554     TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), andps, &,        \
555                        double);                                                \
556     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
557                       (0.55, 0.43, 0.23, 1.21), orps, |, float);               \
558     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), orps, |,     \
559                       double);                                                 \
560     TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
561                  (0.55, 0.43, 0.23, 1.21), minps, float);                      \
562     TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
563                  (0.55, 0.43, 0.23, 1.21), maxps, float);                      \
564     TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), minps, double);   \
565     TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxps, double);   \
566     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
567                       (0.55, 0.43, 0.23, 1.21), xorps, ^, float);              \
568     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
569                        (0.55, 0.43, 0.23, 1.21), xorps, ^, float);             \
570     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorps, ^,    \
571                       double);                                                 \
572     TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), xorps, ^,        \
573                        double);                                                \
574   } while (0)
575 
576 #if 0
577 
578 #endif
579 
580   TestArithPS(xmm0, xmm1);
581   TestArithPS(xmm1, xmm2);
582   TestArithPS(xmm2, xmm3);
583   TestArithPS(xmm3, xmm4);
584   TestArithPS(xmm4, xmm5);
585   TestArithPS(xmm5, xmm6);
586   TestArithPS(xmm6, xmm7);
587   TestArithPS(xmm7, xmm0);
588 
589 #undef TestArithPs
590 #undef TestMinMaxPS
591 #undef TestArithPSXmmXmmUntyped
592 #undef TestArithPSXmmAddr
593 #undef TestArithPSXmmXmm
594 }
595 
TEST_F(AssemblerX8632Test,Blending)596 TEST_F(AssemblerX8632Test, Blending) {
597   using f32 = float;
598   using i8 = uint8_t;
599 
600 #define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type)    \
601   do {                                                                         \
602     static constexpr char TestString[] =                                       \
603         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst        \
604         ", " #Type ")";                                                        \
605     const uint32_t T0 = allocateDqword();                                      \
606     const Dqword V0 Value0;                                                    \
607     const uint32_t T1 = allocateDqword();                                      \
608     const Dqword V1 Value1;                                                    \
609     const uint32_t Mask = allocateDqword();                                    \
610     const Dqword MaskValue M;                                                  \
611                                                                                \
612     __ movups(XmmRegister::Encoded_Reg_xmm0, dwordAddress(Mask));              \
613     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
614     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
615     __ Inst(IceType_##Type, XmmRegister::Encoded_Reg_##Dst,                    \
616             XmmRegister::Encoded_Reg_##Src);                                   \
617                                                                                \
618     AssembledTest test = assemble();                                           \
619     test.setDqwordTo(T0, V0);                                                  \
620     test.setDqwordTo(T1, V1);                                                  \
621     test.setDqwordTo(Mask, MaskValue);                                         \
622     test.run();                                                                \
623                                                                                \
624     ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
625         << TestString;                                                         \
626     reset();                                                                   \
627   } while (0)
628 
629 #define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type)        \
630   do {                                                                         \
631     static constexpr char TestString[] =                                       \
632         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \
633         ")";                                                                   \
634     const uint32_t T0 = allocateDqword();                                      \
635     const Dqword V0 Value0;                                                    \
636     const uint32_t T1 = allocateDqword();                                      \
637     const Dqword V1 Value1;                                                    \
638     const uint32_t Mask = allocateDqword();                                    \
639     const Dqword MaskValue M;                                                  \
640                                                                                \
641     __ movups(XmmRegister::Encoded_Reg_xmm0, dwordAddress(Mask));              \
642     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
643     __ Inst(IceType_##Type, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
644                                                                                \
645     AssembledTest test = assemble();                                           \
646     test.setDqwordTo(T0, V0);                                                  \
647     test.setDqwordTo(T1, V1);                                                  \
648     test.setDqwordTo(Mask, MaskValue);                                         \
649     test.run();                                                                \
650                                                                                \
651     ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
652         << TestString;                                                         \
653     reset();                                                                   \
654   } while (0)
655 
656 #define TestBlending(Src, Dst)                                                 \
657   do {                                                                         \
658     TestBlendingXmmXmm(                                                        \
659         Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0),              \
660         (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
661         blendvps, f32);                                                        \
662     TestBlendingXmmAddr(                                                       \
663         Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0),                   \
664         (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
665         blendvps, f32);                                                        \
666     TestBlendingXmmXmm(                                                        \
667         Dst,                                                                   \
668         (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
669         Src,                                                                   \
670         (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
671         (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
672         pblendvb, i8);                                                         \
673     TestBlendingXmmAddr(                                                       \
674         Dst,                                                                   \
675         (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
676         (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
677         (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
678         pblendvb, i8);                                                         \
679   } while (0)
680 
681   /* xmm0 is taken. It is the implicit mask . */
682   TestBlending(xmm1, xmm2);
683   TestBlending(xmm2, xmm3);
684   TestBlending(xmm3, xmm4);
685   TestBlending(xmm4, xmm5);
686   TestBlending(xmm5, xmm6);
687   TestBlending(xmm6, xmm7);
688   TestBlending(xmm7, xmm1);
689 
690 #undef TestBlending
691 #undef TestBlendingXmmAddr
692 #undef TestBlendingXmmXmm
693 }
694 
TEST_F(AssemblerX8632Test,Cmpps)695 TEST_F(AssemblerX8632Test, Cmpps) {
696 #define TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Op, Type)      \
697   do {                                                                         \
698     static constexpr char TestString[] =                                       \
699         "(" #Src ", " #Dst ", " #C ", " #Op ")";                               \
700     const uint32_t T0 = allocateDqword();                                      \
701     const Dqword V0 Value0;                                                    \
702     const uint32_t T1 = allocateDqword();                                      \
703     const Dqword V1 Value1;                                                    \
704                                                                                \
705     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
706     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
707     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
708              XmmRegister::Encoded_Reg_##Src, Cond::Cmpps_##C);                 \
709                                                                                \
710     AssembledTest test = assemble();                                           \
711     test.setDqwordTo(T0, V0);                                                  \
712     test.setDqwordTo(T1, V1);                                                  \
713     test.run();                                                                \
714                                                                                \
715     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
716     ;                                                                          \
717     reset();                                                                   \
718   } while (0)
719 
720 #define TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, C, Op, Type)          \
721   do {                                                                         \
722     static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")";  \
723     const uint32_t T0 = allocateDqword();                                      \
724     const Dqword V0 Value0;                                                    \
725     const uint32_t T1 = allocateDqword();                                      \
726     const Dqword V1 Value1;                                                    \
727                                                                                \
728     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
729     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
730              dwordAddress(T1), Cond::Cmpps_##C);                               \
731                                                                                \
732     AssembledTest test = assemble();                                           \
733     test.setDqwordTo(T0, V0);                                                  \
734     test.setDqwordTo(T1, V1);                                                  \
735     test.run();                                                                \
736                                                                                \
737     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
738     ;                                                                          \
739     reset();                                                                   \
740   } while (0)
741 
742 #define TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Type)  \
743   do {                                                                         \
744     static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")";       \
745     const uint32_t T0 = allocateDqword();                                      \
746     const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
747                     std::numeric_limits<float>::quiet_NaN());                  \
748     const uint32_t T1 = allocateDqword();                                      \
749     const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
750                     std::numeric_limits<float>::quiet_NaN());                  \
751                                                                                \
752     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
753     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
754     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
755              XmmRegister::Encoded_Reg_##Src, Cond::Cmpps_##C);                 \
756                                                                                \
757     AssembledTest test = assemble();                                           \
758     test.setDqwordTo(T0, V0);                                                  \
759     test.setDqwordTo(T1, V1);                                                  \
760     test.run();                                                                \
761                                                                                \
762     ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
763     ;                                                                          \
764     reset();                                                                   \
765   } while (0)
766 
767 #define TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, C, Type)      \
768   do {                                                                         \
769     static constexpr char TestString[] = "(" #Dst ", " #C ")";                 \
770     const uint32_t T0 = allocateDqword();                                      \
771     const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
772                     std::numeric_limits<float>::quiet_NaN());                  \
773     const uint32_t T1 = allocateDqword();                                      \
774     const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
775                     std::numeric_limits<float>::quiet_NaN());                  \
776                                                                                \
777     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
778     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
779              dwordAddress(T1), Cond::Cmpps_##C);                               \
780                                                                                \
781     AssembledTest test = assemble();                                           \
782     test.setDqwordTo(T0, V0);                                                  \
783     test.setDqwordTo(T1, V1);                                                  \
784     test.run();                                                                \
785                                                                                \
786     ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
787     ;                                                                          \
788     reset();                                                                   \
789   } while (0)
790 
791 #define TestCmpps(FloatSize, Dst, Value0, Src, Value1, Type)                   \
792   do {                                                                         \
793     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
794     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
795     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
796     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
797     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
798     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
799     TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, unord, Type); \
800     TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, unord, Type);     \
801     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
802     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
803     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
804     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
805     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
806     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
807     if (FloatSize == 32) {                                                     \
808       TestCmppsOrdUnordXmmXmm(32, Dst,                                         \
809                               (1.0, 1.0,                                       \
810                                std::numeric_limits<float>::quiet_NaN(),        \
811                                std::numeric_limits<float>::quiet_NaN()),       \
812                               Src,                                             \
813                               (1.0, std::numeric_limits<float>::quiet_NaN(),   \
814                                1.0, std::numeric_limits<float>::quiet_NaN()),  \
815                               unord, Type);                                    \
816       TestCmppsOrdUnordXmmAddr(32, Dst,                                        \
817                                (1.0, 1.0,                                      \
818                                 std::numeric_limits<float>::quiet_NaN(),       \
819                                 std::numeric_limits<float>::quiet_NaN()),      \
820                                (1.0, std::numeric_limits<float>::quiet_NaN(),  \
821                                 1.0, std::numeric_limits<float>::quiet_NaN()), \
822                                unord, Type);                                   \
823     } else {                                                                   \
824       TestCmppsOrdUnordXmmXmm(                                                 \
825           64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()), Src,       \
826           (std::numeric_limits<double>::quiet_NaN(),                           \
827            std::numeric_limits<double>::quiet_NaN()),                          \
828           unord, Type);                                                        \
829       TestCmppsOrdUnordXmmXmm(64, Dst, (1.0, 1.0), Src,                        \
830                               (1.0, std::numeric_limits<double>::quiet_NaN()), \
831                               unord, Type);                                    \
832       TestCmppsOrdUnordXmmAddr(                                                \
833           64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()),            \
834           (std::numeric_limits<double>::quiet_NaN(),                           \
835            std::numeric_limits<double>::quiet_NaN()),                          \
836           unord, Type);                                                        \
837       TestCmppsOrdUnordXmmAddr(                                                \
838           64, Dst, (1.0, 1.0),                                                 \
839           (1.0, std::numeric_limits<double>::quiet_NaN()), unord, Type);       \
840     }                                                                          \
841   } while (0)
842 
843 #define TestCmppsSize(FloatSize, Value0, Value1, Type)                         \
844   do {                                                                         \
845     TestCmpps(FloatSize, xmm0, Value0, xmm1, Value1, Type);                    \
846     TestCmpps(FloatSize, xmm1, Value0, xmm2, Value1, Type);                    \
847     TestCmpps(FloatSize, xmm2, Value0, xmm3, Value1, Type);                    \
848     TestCmpps(FloatSize, xmm3, Value0, xmm4, Value1, Type);                    \
849     TestCmpps(FloatSize, xmm4, Value0, xmm5, Value1, Type);                    \
850     TestCmpps(FloatSize, xmm5, Value0, xmm6, Value1, Type);                    \
851     TestCmpps(FloatSize, xmm6, Value0, xmm7, Value1, Type);                    \
852     TestCmpps(FloatSize, xmm7, Value0, xmm0, Value1, Type);                    \
853   } while (0)
854 
855   TestCmppsSize(32, (-1.0, 1.0, 3.14, 1024.5), (-1.0, 1.0, 3.14, 1024.5),
856                 float);
857   TestCmppsSize(64, (1.0, -1000.0), (0.55, 1.21), double);
858 
859 #undef TestCmpps
860 #undef TestCmppsOrdUnordXmmAddr
861 #undef TestCmppsOrdUnordXmmXmm
862 #undef TestCmppsXmmAddr
863 #undef TestCmppsXmmXmm
864 }
865 
TEST_F(AssemblerX8632Test,Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd)866 TEST_F(AssemblerX8632Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) {
867 #define TestImplSingle(Dst, Inst, Expect)                                      \
868   do {                                                                         \
869     static constexpr char TestString[] = "(" #Dst ", " #Inst ")";              \
870     const uint32_t T0 = allocateDqword();                                      \
871     const Dqword V0(1.0, 4.0, 20.0, 3.14);                                     \
872                                                                                \
873     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
874     __ Inst(XmmRegister::Encoded_Reg_##Dst);                                   \
875                                                                                \
876     AssembledTest test = assemble();                                           \
877     test.setDqwordTo(T0, V0);                                                  \
878     test.run();                                                                \
879     ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString;                \
880     reset();                                                                   \
881   } while (0)
882 
883 #define TestImpl(Dst)                                                          \
884   do {                                                                         \
885     TestImplSingle(                                                            \
886         Dst, sqrtps,                                                           \
887         (uint64_t(0x400000003F800000ull), uint64_t(0x3FE2D10B408F1BBDull)));   \
888     TestImplSingle(                                                            \
889         Dst, rsqrtps,                                                          \
890         (uint64_t(0x3EFFF0003F7FF000ull), uint64_t(0x3F1078003E64F000ull)));   \
891     TestImplSingle(                                                            \
892         Dst, reciprocalps,                                                     \
893         (uint64_t(0x3E7FF0003F7FF000ull), uint64_t(0x3EA310003D4CC000ull)));   \
894                                                                                \
895     TestImplSingle(                                                            \
896         Dst, sqrtpd,                                                           \
897         (uint64_t(0x4036A09E9365F5F3ull), uint64_t(0x401C42FAE40282A8ull)));   \
898   } while (0)
899 
900   TestImpl(xmm0);
901   TestImpl(xmm1);
902   TestImpl(xmm2);
903   TestImpl(xmm3);
904   TestImpl(xmm4);
905   TestImpl(xmm5);
906   TestImpl(xmm6);
907   TestImpl(xmm7);
908 
909 #undef TestImpl
910 #undef TestImplSingle
911 }
912 
TEST_F(AssemblerX8632Test,Unpck)913 TEST_F(AssemblerX8632Test, Unpck) {
914   const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull),
915                   uint64_t(0xCCCCCCCCDDDDDDDDull));
916   const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull),
917                   uint64_t(0x9999999988888888ull));
918 
919   const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull),
920                                 uint64_t(0xEEEEEEEEAAAAAAAAull));
921   const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull),
922                                 uint64_t(0xEEEEEEEEFFFFFFFFull));
923   const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull),
924                                 uint64_t(0x99999999CCCCCCCCull));
925   const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull),
926                                 uint64_t(0x9999999988888888ull));
927 
928 #define TestImplSingle(Dst, Src, Inst)                                         \
929   do {                                                                         \
930     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
931     const uint32_t T0 = allocateDqword();                                      \
932     const uint32_t T1 = allocateDqword();                                      \
933                                                                                \
934     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
935     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
936     __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src);   \
937                                                                                \
938     AssembledTest test = assemble();                                           \
939     test.setDqwordTo(T0, V0);                                                  \
940     test.setDqwordTo(T1, V1);                                                  \
941     test.run();                                                                \
942                                                                                \
943     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
944     reset();                                                                   \
945   } while (0)
946 
947 #define TestImpl(Dst, Src)                                                     \
948   do {                                                                         \
949     TestImplSingle(Dst, Src, unpcklps);                                        \
950     TestImplSingle(Dst, Src, unpcklpd);                                        \
951     TestImplSingle(Dst, Src, unpckhps);                                        \
952     TestImplSingle(Dst, Src, unpckhpd);                                        \
953   } while (0)
954 
955   TestImpl(xmm0, xmm1);
956   TestImpl(xmm1, xmm2);
957   TestImpl(xmm2, xmm3);
958   TestImpl(xmm3, xmm4);
959   TestImpl(xmm4, xmm5);
960   TestImpl(xmm5, xmm6);
961   TestImpl(xmm6, xmm7);
962   TestImpl(xmm7, xmm0);
963 
964 #undef TestImpl
965 #undef TestImplSingle
966 }
967 
TEST_F(AssemblerX8632Test,Shufp)968 TEST_F(AssemblerX8632Test, Shufp) {
969   const Dqword V0(uint64_t(0x1111111122222222ull),
970                   uint64_t(0x5555555577777777ull));
971   const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
972                   uint64_t(0xCCCCCCCCDDDDDDDDull));
973 
974   const uint8_t pshufdImm = 0x63;
975   const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull),
976                               uint64_t(0xAAAAAAAADDDDDDDDull));
977 
978   const uint8_t shufpsImm = 0xf9;
979   const Dqword shufpsExpected(uint64_t(0x7777777711111111ull),
980                               uint64_t(0xCCCCCCCCCCCCCCCCull));
981 
982 #define TestImplSingleXmmXmm(Dst, Src, Inst)                                   \
983   do {                                                                         \
984     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
985     const uint32_t T0 = allocateDqword();                                      \
986     const uint32_t T1 = allocateDqword();                                      \
987                                                                                \
988     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
989     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
990     __ Inst(IceType_f32, XmmRegister::Encoded_Reg_##Dst,                       \
991             XmmRegister::Encoded_Reg_##Src, Immediate(Inst##Imm));             \
992                                                                                \
993     AssembledTest test = assemble();                                           \
994     test.setDqwordTo(T0, V0);                                                  \
995     test.setDqwordTo(T1, V1);                                                  \
996     test.run();                                                                \
997                                                                                \
998     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
999     reset();                                                                   \
1000   } while (0)
1001 
1002 #define TestImplSingleXmmAddr(Dst, Inst)                                       \
1003   do {                                                                         \
1004     static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
1005     const uint32_t T0 = allocateDqword();                                      \
1006     const uint32_t T1 = allocateDqword();                                      \
1007                                                                                \
1008     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1009     __ Inst(IceType_f32, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1),     \
1010             Immediate(Inst##Imm));                                             \
1011                                                                                \
1012     AssembledTest test = assemble();                                           \
1013     test.setDqwordTo(T0, V0);                                                  \
1014     test.setDqwordTo(T1, V1);                                                  \
1015     test.run();                                                                \
1016                                                                                \
1017     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
1018     reset();                                                                   \
1019   } while (0)
1020 
1021 #define TestImpl(Dst, Src)                                                     \
1022   do {                                                                         \
1023     TestImplSingleXmmXmm(Dst, Src, pshufd);                                    \
1024     TestImplSingleXmmAddr(Dst, pshufd);                                        \
1025     TestImplSingleXmmXmm(Dst, Src, shufps);                                    \
1026     TestImplSingleXmmAddr(Dst, shufps);                                        \
1027   } while (0)
1028 
1029   TestImpl(xmm0, xmm1);
1030   TestImpl(xmm1, xmm2);
1031   TestImpl(xmm2, xmm3);
1032   TestImpl(xmm3, xmm4);
1033   TestImpl(xmm4, xmm5);
1034   TestImpl(xmm5, xmm6);
1035   TestImpl(xmm6, xmm7);
1036   TestImpl(xmm7, xmm0);
1037 
1038 #undef TestImpl
1039 #undef TestImplSingleXmmAddr
1040 #undef TestImplSingleXmmXmm
1041 }
1042 
TEST_F(AssemblerX8632Test,Punpckl)1043 TEST_F(AssemblerX8632Test, Punpckl) {
1044   const Dqword V0_v4i32(uint64_t(0x1111111122222222ull),
1045                         uint64_t(0x5555555577777777ull));
1046   const Dqword V1_v4i32(uint64_t(0xAAAAAAAABBBBBBBBull),
1047                         uint64_t(0xCCCCCCCCDDDDDDDDull));
1048   const Dqword Expected_v4i32(uint64_t(0xBBBBBBBB22222222ull),
1049                               uint64_t(0xAAAAAAAA11111111ull));
1050 
1051   const Dqword V0_v8i16(uint64_t(0x1111222233334444ull),
1052                         uint64_t(0x5555666677778888ull));
1053   const Dqword V1_v8i16(uint64_t(0xAAAABBBBCCCCDDDDull),
1054                         uint64_t(0xEEEEFFFF00009999ull));
1055   const Dqword Expected_v8i16(uint64_t(0xCCCC3333DDDD4444ull),
1056                               uint64_t(0xAAAA1111BBBB2222ull));
1057 
1058   const Dqword V0_v16i8(uint64_t(0x1122334455667788ull),
1059                         uint64_t(0x99AABBCCDDEEFF00ull));
1060   const Dqword V1_v16i8(uint64_t(0xFFEEDDCCBBAA9900ull),
1061                         uint64_t(0xBAADF00DFEEDFACEull));
1062   const Dqword Expected_v16i8(uint64_t(0xBB55AA6699770088ull),
1063                               uint64_t(0xFF11EE22DD33CC44ull));
1064 
1065 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1066   do {                                                                         \
1067     static constexpr char TestString[] =                                       \
1068         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1069     const uint32_t T0 = allocateDqword();                                      \
1070     const uint32_t T1 = allocateDqword();                                      \
1071                                                                                \
1072     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1073     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1074     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1075             XmmRegister::Encoded_Reg_##Src);                                   \
1076                                                                                \
1077     AssembledTest test = assemble();                                           \
1078     test.setDqwordTo(T0, V0_##Ty);                                             \
1079     test.setDqwordTo(T1, V1_##Ty);                                             \
1080     test.run();                                                                \
1081                                                                                \
1082     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1083     reset();                                                                   \
1084   } while (0)
1085 
1086 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1087   do {                                                                         \
1088     static constexpr char TestString[] =                                       \
1089         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1090     const uint32_t T0 = allocateDqword();                                      \
1091     const uint32_t T1 = allocateDqword();                                      \
1092                                                                                \
1093     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1094     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1095                                                                                \
1096     AssembledTest test = assemble();                                           \
1097     test.setDqwordTo(T0, V0_##Ty);                                             \
1098     test.setDqwordTo(T1, V1_##Ty);                                             \
1099     test.run();                                                                \
1100                                                                                \
1101     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1102     reset();                                                                   \
1103   } while (0)
1104 
1105 #define TestImpl(Dst, Src)                                                     \
1106   do {                                                                         \
1107     TestImplXmmXmm(Dst, Src, punpckl, v4i32);                                  \
1108     TestImplXmmAddr(Dst, punpckl, v4i32);                                      \
1109     TestImplXmmXmm(Dst, Src, punpckl, v8i16);                                  \
1110     TestImplXmmAddr(Dst, punpckl, v8i16);                                      \
1111     TestImplXmmXmm(Dst, Src, punpckl, v16i8);                                  \
1112     TestImplXmmAddr(Dst, punpckl, v16i8);                                      \
1113   } while (0)
1114 
1115   TestImpl(xmm0, xmm1);
1116   TestImpl(xmm1, xmm2);
1117   TestImpl(xmm2, xmm3);
1118   TestImpl(xmm3, xmm4);
1119   TestImpl(xmm4, xmm5);
1120   TestImpl(xmm5, xmm6);
1121   TestImpl(xmm6, xmm7);
1122   TestImpl(xmm7, xmm0);
1123 
1124 #undef TestImpl
1125 #undef TestImplXmmAddr
1126 #undef TestImplXmmXmm
1127 }
1128 
TEST_F(AssemblerX8632Test,Packss)1129 TEST_F(AssemblerX8632Test, Packss) {
1130   const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1131                         uint64_t(0x7FFFFFFF80000000ull));
1132   const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1133                         uint64_t(0x0000800100007FFEull));
1134   const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull),
1135                               uint64_t(0x7FFF7FFEFFFEFFFFull));
1136 
1137   const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1138                         uint64_t(0xFFFEFFFF7FFF8000ull));
1139   const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1140                         uint64_t(0x0088007700660055ull));
1141   const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull),
1142                               uint64_t(0x7F776655057F7F7Eull));
1143 
1144 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1145   do {                                                                         \
1146     static constexpr char TestString[] =                                       \
1147         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1148     const uint32_t T0 = allocateDqword();                                      \
1149     const uint32_t T1 = allocateDqword();                                      \
1150                                                                                \
1151     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1152     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1153     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1154             XmmRegister::Encoded_Reg_##Src);                                   \
1155                                                                                \
1156     AssembledTest test = assemble();                                           \
1157     test.setDqwordTo(T0, V0_##Ty);                                             \
1158     test.setDqwordTo(T1, V1_##Ty);                                             \
1159     test.run();                                                                \
1160                                                                                \
1161     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1162     reset();                                                                   \
1163   } while (0)
1164 
1165 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1166   do {                                                                         \
1167     static constexpr char TestString[] =                                       \
1168         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1169     const uint32_t T0 = allocateDqword();                                      \
1170     const uint32_t T1 = allocateDqword();                                      \
1171                                                                                \
1172     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1173     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1174                                                                                \
1175     AssembledTest test = assemble();                                           \
1176     test.setDqwordTo(T0, V0_##Ty);                                             \
1177     test.setDqwordTo(T1, V1_##Ty);                                             \
1178     test.run();                                                                \
1179                                                                                \
1180     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1181     reset();                                                                   \
1182   } while (0)
1183 
1184 #define TestImpl(Dst, Src)                                                     \
1185   do {                                                                         \
1186     TestImplXmmXmm(Dst, Src, packss, v4i32);                                   \
1187     TestImplXmmAddr(Dst, packss, v4i32);                                       \
1188     TestImplXmmXmm(Dst, Src, packss, v8i16);                                   \
1189     TestImplXmmAddr(Dst, packss, v8i16);                                       \
1190   } while (0)
1191 
1192   TestImpl(xmm0, xmm1);
1193   TestImpl(xmm1, xmm2);
1194   TestImpl(xmm2, xmm3);
1195   TestImpl(xmm3, xmm4);
1196   TestImpl(xmm4, xmm5);
1197   TestImpl(xmm5, xmm6);
1198   TestImpl(xmm6, xmm7);
1199   TestImpl(xmm7, xmm0);
1200 
1201 #undef TestImpl
1202 #undef TestImplXmmAddr
1203 #undef TestImplXmmXmm
1204 }
1205 
TEST_F(AssemblerX8632Test,Packus)1206 TEST_F(AssemblerX8632Test, Packus) {
1207   const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1208                         uint64_t(0x7FFFFFFF80000000ull));
1209   const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1210                         uint64_t(0x0000800100007FFEull));
1211   const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull),
1212                               uint64_t(0x80017FFE00000000ull));
1213 
1214   const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1215                         uint64_t(0xFFFEFFFF7FFF8000ull));
1216   const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1217                         uint64_t(0x0088007700660055ull));
1218   const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull),
1219                               uint64_t(0x8877665505FF817Eull));
1220 
1221 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1222   do {                                                                         \
1223     static constexpr char TestString[] =                                       \
1224         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1225     const uint32_t T0 = allocateDqword();                                      \
1226     const uint32_t T1 = allocateDqword();                                      \
1227                                                                                \
1228     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1229     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1230     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1231             XmmRegister::Encoded_Reg_##Src);                                   \
1232                                                                                \
1233     AssembledTest test = assemble();                                           \
1234     test.setDqwordTo(T0, V0_##Ty);                                             \
1235     test.setDqwordTo(T1, V1_##Ty);                                             \
1236     test.run();                                                                \
1237                                                                                \
1238     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1239     reset();                                                                   \
1240   } while (0)
1241 
1242 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1243   do {                                                                         \
1244     static constexpr char TestString[] =                                       \
1245         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1246     const uint32_t T0 = allocateDqword();                                      \
1247     const uint32_t T1 = allocateDqword();                                      \
1248                                                                                \
1249     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1250     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1251                                                                                \
1252     AssembledTest test = assemble();                                           \
1253     test.setDqwordTo(T0, V0_##Ty);                                             \
1254     test.setDqwordTo(T1, V1_##Ty);                                             \
1255     test.run();                                                                \
1256                                                                                \
1257     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1258     reset();                                                                   \
1259   } while (0)
1260 
1261 #define TestImpl(Dst, Src)                                                     \
1262   do {                                                                         \
1263     TestImplXmmXmm(Dst, Src, packus, v4i32);                                   \
1264     TestImplXmmAddr(Dst, packus, v4i32);                                       \
1265     TestImplXmmXmm(Dst, Src, packus, v8i16);                                   \
1266     TestImplXmmAddr(Dst, packus, v8i16);                                       \
1267   } while (0)
1268 
1269   TestImpl(xmm0, xmm1);
1270   TestImpl(xmm1, xmm2);
1271   TestImpl(xmm2, xmm3);
1272   TestImpl(xmm3, xmm4);
1273   TestImpl(xmm4, xmm5);
1274   TestImpl(xmm5, xmm6);
1275   TestImpl(xmm6, xmm7);
1276   TestImpl(xmm7, xmm0);
1277 
1278 #undef TestImpl
1279 #undef TestImplXmmAddr
1280 #undef TestImplXmmXmm
1281 }
1282 
TEST_F(AssemblerX8632Test,Pshufb)1283 TEST_F(AssemblerX8632Test, Pshufb) {
1284   const Dqword V0(uint64_t(0x1122334455667788ull),
1285                   uint64_t(0x99aabbccddeeff32ull));
1286   const Dqword V1(uint64_t(0x0204050380060708ull),
1287                   uint64_t(0x010306080a8b0c0dull));
1288 
1289   const Dqword Expected(uint64_t(0x6644335500221132ull),
1290                         uint64_t(0x77552232ee00ccbbull));
1291 
1292 #define TestImplXmmXmm(Dst, Src, Inst)                                         \
1293   do {                                                                         \
1294     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
1295     const uint32_t T0 = allocateDqword();                                      \
1296     const uint32_t T1 = allocateDqword();                                      \
1297                                                                                \
1298     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1299     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1300     __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst,                      \
1301             XmmRegister::Encoded_Reg_##Src);                                   \
1302                                                                                \
1303     AssembledTest test = assemble();                                           \
1304     test.setDqwordTo(T0, V0);                                                  \
1305     test.setDqwordTo(T1, V1);                                                  \
1306     test.run();                                                                \
1307                                                                                \
1308     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1309     reset();                                                                   \
1310   } while (0)
1311 
1312 #define TestImplXmmAddr(Dst, Inst)                                             \
1313   do {                                                                         \
1314     static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
1315     const uint32_t T0 = allocateDqword();                                      \
1316     const uint32_t T1 = allocateDqword();                                      \
1317                                                                                \
1318     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1319     __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1320                                                                                \
1321     AssembledTest test = assemble();                                           \
1322     test.setDqwordTo(T0, V0);                                                  \
1323     test.setDqwordTo(T1, V1);                                                  \
1324     test.run();                                                                \
1325                                                                                \
1326     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1327     reset();                                                                   \
1328   } while (0)
1329 
1330 #define TestImpl(Dst, Src)                                                     \
1331   do {                                                                         \
1332     TestImplXmmXmm(Dst, Src, pshufb);                                          \
1333     TestImplXmmAddr(Dst, pshufb);                                              \
1334   } while (0)
1335 
1336   TestImpl(xmm0, xmm1);
1337   TestImpl(xmm1, xmm2);
1338   TestImpl(xmm2, xmm3);
1339   TestImpl(xmm3, xmm4);
1340   TestImpl(xmm4, xmm5);
1341   TestImpl(xmm5, xmm6);
1342   TestImpl(xmm6, xmm7);
1343   TestImpl(xmm7, xmm0);
1344 
1345 #undef TestImpl
1346 #undef TestImplXmmAddr
1347 #undef TestImplXmmXmm
1348 }
1349 
TEST_F(AssemblerX8632Test,Cvt)1350 TEST_F(AssemblerX8632Test, Cvt) {
1351   const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1352   const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
1353   const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0);
1354 
1355   const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f);
1356   const Dqword dq2ps64SrcValue(-5, 3, 100, 200);
1357   const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0);
1358 
1359   const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1360   const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1361   const Dqword tps2dq32Expected(-5, 3, 100, 200);
1362 
1363   const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1364   const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1365   const Dqword tps2dq64Expected(-5, 3, 100, 200);
1366 
1367   const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1368   const int32_t si2ss32SrcValue = 5;
1369   const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f);
1370 
1371   const Dqword si2ss64DstValue(-1.0, -1.0);
1372   const int32_t si2ss64SrcValue = 5;
1373   const Dqword si2ss64Expected(5.0, -1.0);
1374 
1375   const int32_t tss2si32DstValue = 0xF00F0FF0;
1376   const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f);
1377   const int32_t tss2si32Expected = -5;
1378 
1379   const int32_t tss2si64DstValue = 0xF00F0FF0;
1380   const Dqword tss2si64SrcValue(-5.0, -1.0);
1381   const int32_t tss2si64Expected = -5;
1382 
1383   const Dqword float2float32DstValue(-1.0, -1.0);
1384   const Dqword float2float32SrcValue(-5.0, 3, 100, 200);
1385   const Dqword float2float32Expected(-5.0, -1.0);
1386 
1387   const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0);
1388   const Dqword float2float64SrcValue(-5.0, 3.0);
1389   const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0);
1390 
1391 #define TestImplPXmmXmm(Dst, Src, Inst, Size)                                  \
1392   do {                                                                         \
1393     static constexpr char TestString[] =                                       \
1394         "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")";                      \
1395     const uint32_t T0 = allocateDqword();                                      \
1396     const uint32_t T1 = allocateDqword();                                      \
1397                                                                                \
1398     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1399     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1400     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,              \
1401                  XmmRegister::Encoded_Reg_##Src);                              \
1402                                                                                \
1403     AssembledTest test = assemble();                                           \
1404     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1405     test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
1406     test.run();                                                                \
1407                                                                                \
1408     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1409     reset();                                                                   \
1410   } while (0)
1411 
1412 #define TestImplSXmmReg(Dst, GPR, Inst, Size)                                  \
1413   do {                                                                         \
1414     static constexpr char TestString[] =                                       \
1415         "(" #Dst ", " #GPR ", cvt" #Inst ", f" #Size ")";                      \
1416     const uint32_t T0 = allocateDqword();                                      \
1417                                                                                \
1418     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1419     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR,                        \
1420            Immediate(Inst##Size##SrcValue));                                   \
1421     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
1422                  GPRRegister::Encoded_Reg_##GPR);                              \
1423                                                                                \
1424     AssembledTest test = assemble();                                           \
1425     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1426     test.run();                                                                \
1427                                                                                \
1428     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1429     reset();                                                                   \
1430   } while (0)
1431 
1432 #define TestImplSRegXmm(GPR, Src, Inst, Size)                                  \
1433   do {                                                                         \
1434     static constexpr char TestString[] =                                       \
1435         "(" #GPR ", " #Src ", cvt" #Inst ", f" #Size ")";                      \
1436     const uint32_t T0 = allocateDqword();                                      \
1437                                                                                \
1438     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR,                        \
1439            Immediate(Inst##Size##DstValue));                                   \
1440     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0));               \
1441     __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
1442                  XmmRegister::Encoded_Reg_##Src);                              \
1443                                                                                \
1444     AssembledTest test = assemble();                                           \
1445     test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
1446     test.run();                                                                \
1447                                                                                \
1448     ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR())         \
1449         << TestString;                                                         \
1450     reset();                                                                   \
1451   } while (0)
1452 
1453 #define TestImplPXmmAddr(Dst, Inst, Size)                                      \
1454   do {                                                                         \
1455     static constexpr char TestString[] =                                       \
1456         "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")";                          \
1457     const uint32_t T0 = allocateDqword();                                      \
1458     const uint32_t T1 = allocateDqword();                                      \
1459                                                                                \
1460     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1461     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,              \
1462                  dwordAddress(T1));                                            \
1463                                                                                \
1464     AssembledTest test = assemble();                                           \
1465     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1466     test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
1467     test.run();                                                                \
1468                                                                                \
1469     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1470     reset();                                                                   \
1471   } while (0)
1472 
1473 #define TestImplSXmmAddr(Dst, Inst, Size)                                      \
1474   do {                                                                         \
1475     static constexpr char TestString[] =                                       \
1476         "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")";                          \
1477     const uint32_t T0 = allocateDqword();                                      \
1478     const uint32_t T1 = allocateDword();                                       \
1479                                                                                \
1480     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1481     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
1482                  dwordAddress(T1));                                            \
1483                                                                                \
1484     AssembledTest test = assemble();                                           \
1485     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1486     test.setDwordTo(T1, Inst##Size##SrcValue);                                 \
1487     test.run();                                                                \
1488                                                                                \
1489     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1490     reset();                                                                   \
1491   } while (0)
1492 
1493 #define TestImplSRegAddr(GPR, Inst, Size)                                      \
1494   do {                                                                         \
1495     static constexpr char TestString[] =                                       \
1496         "(" #GPR ", Addr, cvt" #Inst ", f" #Size ")";                          \
1497     const uint32_t T0 = allocateDqword();                                      \
1498                                                                                \
1499     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR,                        \
1500            Immediate(Inst##Size##DstValue));                                   \
1501     __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
1502                  dwordAddress(T0));                                            \
1503                                                                                \
1504     AssembledTest test = assemble();                                           \
1505     test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
1506     test.run();                                                                \
1507                                                                                \
1508     ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR())         \
1509         << TestString;                                                         \
1510     reset();                                                                   \
1511   } while (0)
1512 
1513 #define TestImplSize(Dst, Src, GPR, Size)                                      \
1514   do {                                                                         \
1515     TestImplPXmmXmm(Dst, Src, dq2ps, Size);                                    \
1516     TestImplPXmmAddr(Src, dq2ps, Size);                                        \
1517     TestImplPXmmXmm(Dst, Src, tps2dq, Size);                                   \
1518     TestImplPXmmAddr(Src, tps2dq, Size);                                       \
1519     TestImplSXmmReg(Dst, GPR, si2ss, Size);                                    \
1520     TestImplSXmmAddr(Dst, si2ss, Size);                                        \
1521     TestImplSRegXmm(GPR, Src, tss2si, Size);                                   \
1522     TestImplSRegAddr(GPR, tss2si, Size);                                       \
1523     TestImplPXmmXmm(Dst, Src, float2float, Size);                              \
1524     TestImplPXmmAddr(Src, float2float, Size);                                  \
1525   } while (0)
1526 
1527 #define TestImpl(Dst, Src, GPR)                                                \
1528   do {                                                                         \
1529     TestImplSize(Dst, Src, GPR, 32);                                           \
1530     TestImplSize(Dst, Src, GPR, 64);                                           \
1531   } while (0)
1532 
1533   TestImpl(xmm0, xmm1, eax);
1534   TestImpl(xmm1, xmm2, ebx);
1535   TestImpl(xmm2, xmm3, ecx);
1536   TestImpl(xmm3, xmm4, edx);
1537   TestImpl(xmm4, xmm5, esi);
1538   TestImpl(xmm5, xmm6, edi);
1539   TestImpl(xmm6, xmm7, eax);
1540   TestImpl(xmm7, xmm0, ebx);
1541 
1542 #undef TestImpl
1543 #undef TestImplSize
1544 #undef TestImplSRegAddr
1545 #undef TestImplSXmmAddr
1546 #undef TestImplPXmmAddr
1547 #undef TestImplSRegXmm
1548 #undef TestImplSXmmReg
1549 #undef TestImplPXmmXmm
1550 }
1551 
TEST_F(AssemblerX8632Test,Ucomiss)1552 TEST_F(AssemblerX8632Test, Ucomiss) {
1553   static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN();
1554   static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN();
1555 
1556   Dqword test32DstValue(0.0, qnan32, qnan32, qnan32);
1557   Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32);
1558 
1559   Dqword test64DstValue(0.0, qnan64);
1560   Dqword test64SrcValue(0.0, qnan64);
1561 
1562 #define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity,      \
1563                        BOther)                                                 \
1564   do {                                                                         \
1565     static constexpr char NearBranch = AssemblerX8632::kNearJump;              \
1566     static constexpr char TestString[] =                                       \
1567         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \
1568         ", " #BParity ", " #BOther ")";                                        \
1569     const uint32_t T0 = allocateDqword();                                      \
1570     test##Size##DstValue.F##Size[0] = Value0;                                  \
1571     const uint32_t T1 = allocateDqword();                                      \
1572     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1573     const uint32_t ImmIfTrue = 0xBEEF;                                         \
1574     const uint32_t ImmIfFalse = 0xC0FFE;                                       \
1575                                                                                \
1576     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1577     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1578     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
1579     __ ucomiss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                \
1580                XmmRegister::Encoded_Reg_##Src);                                \
1581     Label Done;                                                                \
1582     __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
1583     __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
1584     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
1585     __ bind(&Done);                                                            \
1586                                                                                \
1587     AssembledTest test = assemble();                                           \
1588     test.setDqwordTo(T0, test##Size##DstValue);                                \
1589     test.setDqwordTo(T1, test##Size##SrcValue);                                \
1590     test.run();                                                                \
1591                                                                                \
1592     ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
1593     reset();                                                                   \
1594   } while (0)
1595 
1596 #define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther)  \
1597   do {                                                                         \
1598     static constexpr char NearBranch = AssemblerX8632::kNearJump;              \
1599     static constexpr char TestString[] =                                       \
1600         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType     \
1601         ", " #BParity ", " #BOther ")";                                        \
1602     const uint32_t T0 = allocateDqword();                                      \
1603     test##Size##DstValue.F##Size[0] = Value0;                                  \
1604     const uint32_t T1 = allocateDqword();                                      \
1605     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1606     const uint32_t ImmIfTrue = 0xBEEF;                                         \
1607     const uint32_t ImmIfFalse = 0xC0FFE;                                       \
1608                                                                                \
1609     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1610     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
1611     __ ucomiss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                \
1612                dwordAddress(T1));                                              \
1613     Label Done;                                                                \
1614     __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
1615     __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
1616     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
1617     __ bind(&Done);                                                            \
1618                                                                                \
1619     AssembledTest test = assemble();                                           \
1620     test.setDqwordTo(T0, test##Size##DstValue);                                \
1621     test.setDqwordTo(T1, test##Size##SrcValue);                                \
1622     test.run();                                                                \
1623                                                                                \
1624     ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
1625     reset();                                                                   \
1626   } while (0)
1627 
1628 #define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity,        \
1629                      BOther)                                                   \
1630   do {                                                                         \
1631     TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \
1632     TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther);     \
1633   } while (0)
1634 
1635 #define TestImplSize(Dst, Src, Size)                                           \
1636   do {                                                                         \
1637     TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne);                       \
1638     TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e);                        \
1639     TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a);                        \
1640     TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a);                        \
1641     TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae);                       \
1642     TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b);                        \
1643     TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b);                        \
1644     TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be);                       \
1645     TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o);             \
1646     TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s);             \
1647     TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s);      \
1648   } while (0)
1649 
1650 #define TestImpl(Dst, Src)                                                     \
1651   do {                                                                         \
1652     TestImplSize(Dst, Src, 32);                                                \
1653     TestImplSize(Dst, Src, 64);                                                \
1654   } while (0)
1655 
1656   TestImpl(xmm0, xmm1);
1657   TestImpl(xmm1, xmm2);
1658   TestImpl(xmm2, xmm3);
1659   TestImpl(xmm3, xmm4);
1660   TestImpl(xmm4, xmm5);
1661   TestImpl(xmm5, xmm6);
1662   TestImpl(xmm6, xmm7);
1663   TestImpl(xmm7, xmm0);
1664 
1665 #undef TestImpl
1666 #undef TestImplSize
1667 #undef TestImplCond
1668 #undef TestImplXmmAddr
1669 #undef TestImplXmmXmm
1670 }
1671 
TEST_F(AssemblerX8632Test,Sqrtss)1672 TEST_F(AssemblerX8632Test, Sqrtss) {
1673   Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0);
1674   Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0);
1675 
1676   Dqword test64SrcValue(-100.0, -100.0);
1677   Dqword test64DstValue(-1.0, -1.0);
1678 
1679 #define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size)                       \
1680   do {                                                                         \
1681     static constexpr char TestString[] =                                       \
1682         "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")";           \
1683     const uint32_t T0 = allocateDqword();                                      \
1684     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1685     const uint32_t T1 = allocateDqword();                                      \
1686                                                                                \
1687     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0));               \
1688     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));               \
1689     __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                   \
1690             XmmRegister::Encoded_Reg_##Src);                                   \
1691                                                                                \
1692     AssembledTest test = assemble();                                           \
1693     test.setDqwordTo(T0, test##Size##SrcValue);                                \
1694     test.setDqwordTo(T1, test##Size##DstValue);                                \
1695     test.run();                                                                \
1696                                                                                \
1697     Dqword Expected = test##Size##DstValue;                                    \
1698     Expected.F##Size[0] = Result;                                              \
1699     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1700     reset();                                                                   \
1701   } while (0)
1702 
1703 #define TestSqrtssXmmAddr(Dst, Value1, Result, Size)                           \
1704   do {                                                                         \
1705     static constexpr char TestString[] =                                       \
1706         "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")";               \
1707     const uint32_t T0 = allocateDqword();                                      \
1708     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1709     const uint32_t T1 = allocateDqword();                                      \
1710                                                                                \
1711     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));               \
1712     __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                   \
1713             dwordAddress(T0));                                                 \
1714                                                                                \
1715     AssembledTest test = assemble();                                           \
1716     test.setDqwordTo(T0, test##Size##SrcValue);                                \
1717     test.setDqwordTo(T1, test##Size##DstValue);                                \
1718     test.run();                                                                \
1719                                                                                \
1720     Dqword Expected = test##Size##DstValue;                                    \
1721     Expected.F##Size[0] = Result;                                              \
1722     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1723     reset();                                                                   \
1724   } while (0)
1725 
1726 #define TestSqrtssSize(Dst, Src, Size)                                         \
1727   do {                                                                         \
1728     TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size);                                \
1729     TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size);                                    \
1730     TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size);                                \
1731     TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size);                                    \
1732     TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size);                             \
1733     TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size);                                 \
1734   } while (0)
1735 
1736 #define TestSqrtss(Dst, Src)                                                   \
1737   do {                                                                         \
1738     TestSqrtssSize(Dst, Src, 32);                                              \
1739     TestSqrtssSize(Dst, Src, 64);                                              \
1740   } while (0)
1741 
1742   TestSqrtss(xmm0, xmm1);
1743   TestSqrtss(xmm1, xmm2);
1744   TestSqrtss(xmm2, xmm3);
1745   TestSqrtss(xmm3, xmm4);
1746   TestSqrtss(xmm4, xmm5);
1747   TestSqrtss(xmm5, xmm6);
1748   TestSqrtss(xmm6, xmm7);
1749   TestSqrtss(xmm7, xmm0);
1750 
1751 #undef TestSqrtss
1752 #undef TestSqrtssSize
1753 #undef TestSqrtssXmmAddr
1754 #undef TestSqrtssXmmXmm
1755 }
1756 
TEST_F(AssemblerX8632Test,Insertps)1757 TEST_F(AssemblerX8632Test, Insertps) {
1758 #define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected)         \
1759   do {                                                                         \
1760     static constexpr char TestString[] =                                       \
1761         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected  \
1762         ")";                                                                   \
1763     const uint32_t T0 = allocateDqword();                                      \
1764     const Dqword V0 Value0;                                                    \
1765     const uint32_t T1 = allocateDqword();                                      \
1766     const Dqword V1 Value1;                                                    \
1767                                                                                \
1768     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1769     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1770     __ insertps(IceType_v4f32, XmmRegister::Encoded_Reg_##Dst,                 \
1771                 XmmRegister::Encoded_Reg_##Src, Immediate(Imm));               \
1772                                                                                \
1773     AssembledTest test = assemble();                                           \
1774     test.setDqwordTo(T0, V0);                                                  \
1775     test.setDqwordTo(T1, V1);                                                  \
1776     test.run();                                                                \
1777                                                                                \
1778     ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
1779     reset();                                                                   \
1780   } while (0)
1781 
1782 #define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected)             \
1783   do {                                                                         \
1784     static constexpr char TestString[] =                                       \
1785         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \
1786     const uint32_t T0 = allocateDqword();                                      \
1787     const Dqword V0 Value0;                                                    \
1788     const uint32_t T1 = allocateDqword();                                      \
1789     const Dqword V1 Value1;                                                    \
1790                                                                                \
1791     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1792     __ insertps(IceType_v4f32, XmmRegister::Encoded_Reg_##Dst,                 \
1793                 dwordAddress(T1), Immediate(Imm));                             \
1794                                                                                \
1795     AssembledTest test = assemble();                                           \
1796     test.setDqwordTo(T0, V0);                                                  \
1797     test.setDqwordTo(T1, V1);                                                  \
1798     test.run();                                                                \
1799                                                                                \
1800     ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
1801     reset();                                                                   \
1802   } while (0)
1803 
1804 #define TestInsertps(Dst, Src)                                                 \
1805   do {                                                                         \
1806     TestInsertpsXmmXmmImm(                                                     \
1807         Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
1808         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1809         0x99,                                                                  \
1810         (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
1811     TestInsertpsXmmAddrImm(                                                    \
1812         Dst, (uint64_t(-1), uint64_t(-1)),                                     \
1813         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1814         0x99,                                                                  \
1815         (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
1816     TestInsertpsXmmXmmImm(                                                     \
1817         Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
1818         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1819         0x9D,                                                                  \
1820         (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull)));   \
1821     TestInsertpsXmmAddrImm(                                                    \
1822         Dst, (uint64_t(-1), uint64_t(-1)),                                     \
1823         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1824         0x9D,                                                                  \
1825         (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull)));   \
1826   } while (0)
1827 
1828   TestInsertps(xmm0, xmm1);
1829   TestInsertps(xmm1, xmm2);
1830   TestInsertps(xmm2, xmm3);
1831   TestInsertps(xmm3, xmm4);
1832   TestInsertps(xmm4, xmm5);
1833   TestInsertps(xmm5, xmm6);
1834   TestInsertps(xmm6, xmm7);
1835   TestInsertps(xmm7, xmm0);
1836 
1837 #undef TestInsertps
1838 #undef TestInsertpsXmmXmmAddr
1839 #undef TestInsertpsXmmXmmImm
1840 }
1841 
TEST_F(AssemblerX8632Test,Pinsr)1842 TEST_F(AssemblerX8632Test, Pinsr) {
1843   static constexpr uint8_t Mask32 = 0x03;
1844   static constexpr uint8_t Mask16 = 0x07;
1845   static constexpr uint8_t Mask8 = 0x0F;
1846 
1847 #define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size)                \
1848   do {                                                                         \
1849     static constexpr char TestString[] =                                       \
1850         "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \
1851     const uint32_t T0 = allocateDqword();                                      \
1852     const Dqword V0 Value0;                                                    \
1853                                                                                \
1854     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1855     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, Immediate(Value1));    \
1856     __ pinsr(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
1857              GPRRegister::Encoded_Reg_##GPR, Immediate(Imm));                  \
1858                                                                                \
1859     AssembledTest test = assemble();                                           \
1860     test.setDqwordTo(T0, V0);                                                  \
1861     test.run();                                                                \
1862                                                                                \
1863     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
1864     Dqword Expected = V0;                                                      \
1865     Expected.U##Size[sel] = Value1;                                            \
1866     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1867     reset();                                                                   \
1868   } while (0)
1869 
1870 #define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size)                    \
1871   do {                                                                         \
1872     static constexpr char TestString[] =                                       \
1873         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")";     \
1874     const uint32_t T0 = allocateDqword();                                      \
1875     const Dqword V0 Value0;                                                    \
1876     const uint32_t T1 = allocateDword();                                       \
1877     const uint32_t V1 = Value1;                                                \
1878                                                                                \
1879     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1880     __ pinsr(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
1881              dwordAddress(T1), Immediate(Imm));                                \
1882                                                                                \
1883     AssembledTest test = assemble();                                           \
1884     test.setDqwordTo(T0, V0);                                                  \
1885     test.setDwordTo(T1, V1);                                                   \
1886     test.run();                                                                \
1887                                                                                \
1888     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
1889     Dqword Expected = V0;                                                      \
1890     Expected.U##Size[sel] = Value1;                                            \
1891     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1892     reset();                                                                   \
1893   } while (0)
1894 
1895 #define TestPinsrSize(Dst, GPR, Value1, Imm, Size)                             \
1896   do {                                                                         \
1897     TestPinsrXmmGPRImm(                                                        \
1898         Dst,                                                                   \
1899         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xFFFFFFFFDDDDDDDDull)),    \
1900         GPR, Value1, Imm, Size);                                               \
1901     TestPinsrXmmAddrImm(                                                       \
1902         Dst,                                                                   \
1903         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xFFFFFFFFDDDDDDDDull)),    \
1904         Value1, Imm, Size);                                                    \
1905   } while (0)
1906 
1907 #define TestPinsr(Src, Dst)                                                    \
1908   do {                                                                         \
1909     TestPinsrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
1910     TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
1911     TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
1912   } while (0)
1913 
1914   TestPinsr(xmm0, eax);
1915   TestPinsr(xmm1, ebx);
1916   TestPinsr(xmm2, ecx);
1917   TestPinsr(xmm3, edx);
1918   TestPinsr(xmm4, esi);
1919   TestPinsr(xmm5, edi);
1920   TestPinsr(xmm6, eax);
1921   TestPinsr(xmm7, ebx);
1922 
1923 #undef TestPinsr
1924 #undef TestPinsrSize
1925 #undef TestPinsrXmmAddrImm
1926 #undef TestPinsrXmmGPRImm
1927 }
1928 
TEST_F(AssemblerX8632Test,Pextr)1929 TEST_F(AssemblerX8632Test, Pextr) {
1930   static constexpr uint8_t Mask32 = 0x03;
1931   static constexpr uint8_t Mask16 = 0x07;
1932   static constexpr uint8_t Mask8 = 0x0F;
1933 
1934 #define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size)                        \
1935   do {                                                                         \
1936     static constexpr char TestString[] =                                       \
1937         "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")";              \
1938     const uint32_t T0 = allocateDqword();                                      \
1939     const Dqword V0 Value1;                                                    \
1940                                                                                \
1941     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0));               \
1942     __ pextr(IceType_i##Size, GPRRegister::Encoded_Reg_##GPR,                  \
1943              XmmRegister::Encoded_Reg_##Src, Immediate(Imm));                  \
1944                                                                                \
1945     AssembledTest test = assemble();                                           \
1946     test.setDqwordTo(T0, V0);                                                  \
1947     test.run();                                                                \
1948                                                                                \
1949     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
1950     ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString;                      \
1951     reset();                                                                   \
1952   } while (0)
1953 
1954 #define TestPextrSize(GPR, Src, Value1, Imm, Size)                             \
1955   do {                                                                         \
1956     TestPextrGPRXmmImm(                                                        \
1957         GPR, Src,                                                              \
1958         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xFFFFFFFFDDDDDDDDull)),    \
1959         Imm, Size);                                                            \
1960   } while (0)
1961 
1962 #define TestPextr(Src, Dst)                                                    \
1963   do {                                                                         \
1964     TestPextrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
1965     TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
1966     TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
1967   } while (0)
1968 
1969   TestPextr(eax, xmm0);
1970   TestPextr(ebx, xmm1);
1971   TestPextr(ecx, xmm2);
1972   TestPextr(edx, xmm3);
1973   TestPextr(esi, xmm4);
1974   TestPextr(edi, xmm5);
1975   TestPextr(eax, xmm6);
1976   TestPextr(ebx, xmm7);
1977 
1978 #undef TestPextr
1979 #undef TestPextrSize
1980 #undef TestPextrXmmGPRImm
1981 }
1982 
TEST_F(AssemblerX8632Test,Pcmpeq_Pcmpgt)1983 TEST_F(AssemblerX8632Test, Pcmpeq_Pcmpgt) {
1984 #define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op)               \
1985   do {                                                                         \
1986     static constexpr char TestString[] =                                       \
1987         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")";  \
1988     const uint32_t T0 = allocateDqword();                                      \
1989     const Dqword V0 Value0;                                                    \
1990     const uint32_t T1 = allocateDqword();                                      \
1991     const Dqword V1 Value1;                                                    \
1992                                                                                \
1993     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1994     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1995     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
1996             XmmRegister::Encoded_Reg_##Src);                                   \
1997                                                                                \
1998     AssembledTest test = assemble();                                           \
1999     test.setDqwordTo(T0, V0);                                                  \
2000     test.setDqwordTo(T1, V1);                                                  \
2001     test.run();                                                                \
2002                                                                                \
2003     Dqword Expected(uint64_t(0), uint64_t(0));                                 \
2004     static constexpr uint8_t ArraySize =                                       \
2005         sizeof(Dqword) / sizeof(uint##Size##_t);                               \
2006     for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
2007       Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
2008     }                                                                          \
2009     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2010     reset();                                                                   \
2011   } while (0)
2012 
2013 #define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op)                   \
2014   do {                                                                         \
2015     static constexpr char TestString[] =                                       \
2016         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")";      \
2017     const uint32_t T0 = allocateDqword();                                      \
2018     const Dqword V0 Value0;                                                    \
2019     const uint32_t T1 = allocateDqword();                                      \
2020     const Dqword V1 Value1;                                                    \
2021                                                                                \
2022     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
2023     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
2024             dwordAddress(T1));                                                 \
2025                                                                                \
2026     AssembledTest test = assemble();                                           \
2027     test.setDqwordTo(T0, V0);                                                  \
2028     test.setDqwordTo(T1, V1);                                                  \
2029     test.run();                                                                \
2030                                                                                \
2031     Dqword Expected(uint64_t(0), uint64_t(0));                                 \
2032     static constexpr uint8_t ArraySize =                                       \
2033         sizeof(Dqword) / sizeof(uint##Size##_t);                               \
2034     for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
2035       Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
2036     }                                                                          \
2037     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2038     reset();                                                                   \
2039   } while (0)
2040 
2041 #define TestPcmpValues(Dst, Value0, Src, Value1, Size)                         \
2042   do {                                                                         \
2043     TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, ==);                \
2044     TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, ==);                    \
2045     TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, <);                 \
2046     TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, <);                     \
2047   } while (0)
2048 
2049 #define TestPcmpSize(Dst, Src, Size)                                           \
2050   do {                                                                         \
2051     TestPcmpValues(                                                            \
2052         Dst,                                                                   \
2053         (uint64_t(0x8888888888888888ull), uint64_t(0x0000000000000000ull)),    \
2054         Src,                                                                   \
2055         (uint64_t(0x0000008800008800ull), uint64_t(0xFFFFFFFFFFFFFFFFull)),    \
2056         Size);                                                                 \
2057     TestPcmpValues(                                                            \
2058         Dst,                                                                   \
2059         (uint64_t(0x123567ABAB55DE01ull), uint64_t(0x12345abcde12345Aull)),    \
2060         Src,                                                                   \
2061         (uint64_t(0x0000008800008800ull), uint64_t(0xAABBCCDD1234321Aull)),    \
2062         Size);                                                                 \
2063   } while (0)
2064 
2065 #define TestPcmp(Dst, Src)                                                     \
2066   do {                                                                         \
2067     TestPcmpSize(xmm0, xmm1, 8);                                               \
2068     TestPcmpSize(xmm0, xmm1, 16);                                              \
2069     TestPcmpSize(xmm0, xmm1, 32);                                              \
2070   } while (0)
2071 
2072   TestPcmp(xmm0, xmm1);
2073   TestPcmp(xmm1, xmm2);
2074   TestPcmp(xmm2, xmm3);
2075   TestPcmp(xmm3, xmm4);
2076   TestPcmp(xmm4, xmm5);
2077   TestPcmp(xmm5, xmm6);
2078   TestPcmp(xmm6, xmm7);
2079   TestPcmp(xmm7, xmm0);
2080 
2081 #undef TestPcmp
2082 #undef TestPcmpSize
2083 #undef TestPcmpValues
2084 #undef TestPcmpXmmAddr
2085 #undef TestPcmpXmmXmm
2086 }
2087 
TEST_F(AssemblerX8632Test,Roundsd)2088 TEST_F(AssemblerX8632Test, Roundsd) {
2089 #define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN)                           \
2090   do {                                                                         \
2091     static constexpr char TestString[] =                                       \
2092         "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")";                \
2093     const uint32_t T0 = allocateDqword();                                      \
2094     const Dqword V0(-3.0, -3.0);                                               \
2095     const uint32_t T1 = allocateDqword();                                      \
2096     const Dqword V1(double(Input), -123.4);                                    \
2097                                                                                \
2098     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
2099     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
2100     __ round(IceType_f64, XmmRegister::Encoded_Reg_##Dst,                      \
2101              XmmRegister::Encoded_Reg_##Src,                                   \
2102              Immediate(AssemblerX8632::k##Mode));                              \
2103                                                                                \
2104     AssembledTest test = assemble();                                           \
2105     test.setDqwordTo(T0, V0);                                                  \
2106     test.setDqwordTo(T1, V1);                                                  \
2107     test.run();                                                                \
2108                                                                                \
2109     const Dqword Expected(double(RN), -3.0);                                   \
2110     EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2111     reset();                                                                   \
2112   } while (0)
2113 
2114 #define TestRoundsd(Dst, Src)                                                  \
2115   do {                                                                         \
2116     TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6);                      \
2117     TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5);                      \
2118     TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5);                           \
2119     TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6);                             \
2120     TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5);                         \
2121     TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5);                         \
2122   } while (0)
2123 
2124   TestRoundsd(xmm0, xmm1);
2125   TestRoundsd(xmm1, xmm2);
2126   TestRoundsd(xmm2, xmm3);
2127   TestRoundsd(xmm3, xmm4);
2128   TestRoundsd(xmm4, xmm5);
2129   TestRoundsd(xmm5, xmm6);
2130   TestRoundsd(xmm6, xmm7);
2131   TestRoundsd(xmm7, xmm0);
2132 
2133 #undef TestRoundsd
2134 #undef TestRoundsdXmmXmm
2135 }
2136 
TEST_F(AssemblerX8632Test,Set1ps)2137 TEST_F(AssemblerX8632Test, Set1ps) {
2138 #define TestImpl(Xmm, Src, Imm)                                                \
2139   do {                                                                         \
2140     __ set1ps(XmmRegister::Encoded_Reg_##Xmm, GPRRegister::Encoded_Reg_##Src,  \
2141               Immediate(Imm));                                                 \
2142                                                                                \
2143     AssembledTest test = assemble();                                           \
2144     test.run();                                                                \
2145                                                                                \
2146     const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm),               \
2147                           (uint64_t(Imm) << 32) | uint32_t(Imm));              \
2148     ASSERT_EQ(Expected, test.Xmm<Dqword>())                                    \
2149         << "(" #Xmm ", " #Src ", " #Imm ")";                                   \
2150     reset();                                                                   \
2151   } while (0)
2152 
2153   TestImpl(xmm0, ebx, 1);
2154   TestImpl(xmm1, ecx, 2);
2155   TestImpl(xmm2, edx, 3);
2156   TestImpl(xmm3, esi, 4);
2157   TestImpl(xmm4, edi, 5);
2158   TestImpl(xmm5, eax, 6);
2159   TestImpl(xmm6, ebx, 7);
2160   TestImpl(xmm7, ecx, 8);
2161 
2162 #undef TestImpl
2163 }
2164 
2165 } // end of anonymous namespace
2166 } // end of namespace Test
2167 } // end of namespace X8632
2168 } // end of namespace Ice
2169