xref: /aosp_15_r20/external/swiftshader/third_party/subzero/unittest/AssemblerX8664/XmmArith.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 //===- subzero/unittest/AssemblerX8664/XmmArith.cpp -----------------------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "AssemblerX8664/TestUtil.h"
10 
11 namespace Ice {
12 namespace X8664 {
13 namespace Test {
14 namespace {
15 
TEST_F(AssemblerX8664Test,ArithSS)16 TEST_F(AssemblerX8664Test, ArithSS) {
17 #define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op)       \
18   do {                                                                         \
19     static_assert(FloatSize == 32 || FloatSize == 64,                          \
20                   "Invalid fp size " #FloatSize);                              \
21     static constexpr char TestString[] =                                       \
22         "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1           \
23         ", " #Inst ", " #Op ")";                                               \
24     static constexpr bool IsDouble = FloatSize == 64;                          \
25     using Type = std::conditional<IsDouble, double, float>::type;              \
26     const uint32_t T0 = allocateQword();                                       \
27     const Type V0 = Value0;                                                    \
28     const uint32_t T1 = allocateQword();                                       \
29     const Type V1 = Value1;                                                    \
30                                                                                \
31     __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0));     \
32     __ movss(IceType_f##FloatSize, Encoded_Xmm_##Src(), dwordAddress(T1));     \
33     __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());   \
34                                                                                \
35     AssembledTest test = assemble();                                           \
36     if (IsDouble) {                                                            \
37       test.setQwordTo(T0, static_cast<double>(V0));                            \
38       test.setQwordTo(T1, static_cast<double>(V1));                            \
39     } else {                                                                   \
40       test.setDwordTo(T0, static_cast<float>(V0));                             \
41       test.setDwordTo(T1, static_cast<float>(V1));                             \
42     }                                                                          \
43                                                                                \
44     test.run();                                                                \
45                                                                                \
46     ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
47     reset();                                                                   \
48   } while (0)
49 
50 #define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op)           \
51   do {                                                                         \
52     static_assert(FloatSize == 32 || FloatSize == 64,                          \
53                   "Invalid fp size " #FloatSize);                              \
54     static constexpr char TestString[] =                                       \
55         "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst    \
56         ", " #Op ")";                                                          \
57     static constexpr bool IsDouble = FloatSize == 64;                          \
58     using Type = std::conditional<IsDouble, double, float>::type;              \
59     const uint32_t T0 = allocateQword();                                       \
60     const Type V0 = Value0;                                                    \
61     const uint32_t T1 = allocateQword();                                       \
62     const Type V1 = Value1;                                                    \
63                                                                                \
64     __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0));     \
65     __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1));      \
66                                                                                \
67     AssembledTest test = assemble();                                           \
68     if (IsDouble) {                                                            \
69       test.setQwordTo(T0, static_cast<double>(V0));                            \
70       test.setQwordTo(T1, static_cast<double>(V1));                            \
71     } else {                                                                   \
72       test.setDwordTo(T0, static_cast<float>(V0));                             \
73       test.setDwordTo(T1, static_cast<float>(V1));                             \
74     }                                                                          \
75                                                                                \
76     test.run();                                                                \
77                                                                                \
78     ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
79     reset();                                                                   \
80   } while (0)
81 
82 #define TestArithSS(FloatSize, Src, Dst0, Dst1)                                \
83   do {                                                                         \
84     TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +);              \
85     TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +);                  \
86     TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -);              \
87     TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -);                  \
88     TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *);              \
89     TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *);                  \
90     TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, /);              \
91     TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, /);                  \
92   } while (0)
93 
94 #define TestImpl(Src, Dst0, Dst1)                                              \
95   do {                                                                         \
96     TestArithSS(32, Src, Dst0, Dst1);                                          \
97     TestArithSS(64, Src, Dst0, Dst1);                                          \
98   } while (0)
99 
100   TestImpl(xmm0, xmm1, xmm2);
101   TestImpl(xmm1, xmm2, xmm3);
102   TestImpl(xmm2, xmm3, xmm4);
103   TestImpl(xmm3, xmm4, xmm5);
104   TestImpl(xmm4, xmm5, xmm6);
105   TestImpl(xmm5, xmm6, xmm7);
106   TestImpl(xmm6, xmm7, xmm8);
107   TestImpl(xmm7, xmm8, xmm9);
108   TestImpl(xmm8, xmm9, xmm10);
109   TestImpl(xmm9, xmm10, xmm11);
110   TestImpl(xmm10, xmm11, xmm12);
111   TestImpl(xmm11, xmm12, xmm13);
112   TestImpl(xmm12, xmm13, xmm14);
113   TestImpl(xmm13, xmm14, xmm15);
114   TestImpl(xmm14, xmm15, xmm0);
115   TestImpl(xmm15, xmm0, xmm1);
116 
117 #undef TestImpl
118 #undef TestArithSS
119 #undef TestArithSSXmmAddr
120 #undef TestArithSSXmmXmm
121 }
122 
TEST_F(AssemblerX8664Test,PArith)123 TEST_F(AssemblerX8664Test, PArith) {
124 #define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size)       \
125   do {                                                                         \
126     static constexpr char TestString[] =                                       \
127         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
128         ", " #Type ", " #Size ")";                                             \
129     const uint32_t T0 = allocateDqword();                                      \
130     const Dqword V0 Value0;                                                    \
131                                                                                \
132     const uint32_t T1 = allocateDqword();                                      \
133     const Dqword V1 Value1;                                                    \
134                                                                                \
135     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
136     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
137     __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());        \
138                                                                                \
139     AssembledTest test = assemble();                                           \
140     test.setDqwordTo(T0, V0);                                                  \
141     test.setDqwordTo(T1, V1);                                                  \
142     test.run();                                                                \
143                                                                                \
144     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
145         << TestString;                                                         \
146     reset();                                                                   \
147   } while (0)
148 
149 #define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size)           \
150   do {                                                                         \
151     static constexpr char TestString[] =                                       \
152         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
153         ", " #Type ", " #Size ")";                                             \
154     const uint32_t T0 = allocateDqword();                                      \
155     const Dqword V0 Value0;                                                    \
156                                                                                \
157     const uint32_t T1 = allocateDqword();                                      \
158     const Dqword V1 Value1;                                                    \
159                                                                                \
160     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
161     __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));           \
162                                                                                \
163     AssembledTest test = assemble();                                           \
164     test.setDqwordTo(T0, V0);                                                  \
165     test.setDqwordTo(T1, V1);                                                  \
166     test.run();                                                                \
167                                                                                \
168     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
169         << TestString;                                                         \
170     reset();                                                                   \
171   } while (0)
172 
173 #define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size)               \
174   do {                                                                         \
175     static constexpr char TestString[] =                                       \
176         "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type         \
177         ", " #Size ")";                                                        \
178     const uint32_t T0 = allocateDqword();                                      \
179     const Dqword V0 Value0;                                                    \
180                                                                                \
181     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
182     __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Immediate(Imm));             \
183                                                                                \
184     AssembledTest test = assemble();                                           \
185     test.setDqwordTo(T0, V0);                                                  \
186     test.run();                                                                \
187                                                                                \
188     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>())         \
189         << TestString;                                                         \
190     reset();                                                                   \
191   } while (0)
192 
193 #define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size)                  \
194   do {                                                                         \
195     static constexpr char TestString[] =                                       \
196         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type         \
197         ", " #Size ")";                                                        \
198     const uint32_t T0 = allocateDqword();                                      \
199     const Dqword V0 Value0;                                                    \
200                                                                                \
201     const uint32_t T1 = allocateDqword();                                      \
202     const Dqword V1 Value1;                                                    \
203                                                                                \
204     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
205     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
206     __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());       \
207                                                                                \
208     AssembledTest test = assemble();                                           \
209     test.setDqwordTo(T0, V0);                                                  \
210     test.setDqwordTo(T1, V1);                                                  \
211     test.run();                                                                \
212                                                                                \
213     ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
214         << TestString;                                                         \
215     reset();                                                                   \
216   } while (0)
217 
218 #define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size)                      \
219   do {                                                                         \
220     static constexpr char TestString[] =                                       \
221         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size  \
222         ")";                                                                   \
223     const uint32_t T0 = allocateDqword();                                      \
224     const Dqword V0 Value0;                                                    \
225                                                                                \
226     const uint32_t T1 = allocateDqword();                                      \
227     const Dqword V1 Value1;                                                    \
228                                                                                \
229     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
230     __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));          \
231                                                                                \
232     AssembledTest test = assemble();                                           \
233     test.setDqwordTo(T0, V0);                                                  \
234     test.setDqwordTo(T1, V1);                                                  \
235     test.run();                                                                \
236                                                                                \
237     ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
238         << TestString;                                                         \
239     reset();                                                                   \
240   } while (0)
241 
242 #define TestPArithSize(Dst, Src, Size)                                         \
243   do {                                                                         \
244     static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size.");     \
245     if (Size != 8) {                                                           \
246       TestPArithXmmXmm(                                                        \
247           Dst,                                                                 \
248           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
249           Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);             \
250       TestPArithXmmAddr(                                                       \
251           Dst,                                                                 \
252           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
253           (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);                  \
254       TestPArithXmmImm(                                                        \
255           Dst,                                                                 \
256           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
257           3u, psra, >>, int, Size);                                            \
258       TestPArithXmmXmm(                                                        \
259           Dst,                                                                 \
260           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
261           Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);            \
262       TestPArithXmmAddr(                                                       \
263           Dst,                                                                 \
264           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
265           (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);                 \
266       TestPArithXmmImm(                                                        \
267           Dst,                                                                 \
268           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
269           3u, psrl, >>, uint, Size);                                           \
270       TestPArithXmmXmm(                                                        \
271           Dst,                                                                 \
272           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
273           Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);            \
274       TestPArithXmmAddr(                                                       \
275           Dst,                                                                 \
276           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
277           (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);                 \
278       TestPArithXmmImm(                                                        \
279           Dst,                                                                 \
280           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
281           3u, psll, <<, uint, Size);                                           \
282                                                                                \
283       TestPArithXmmXmm(                                                        \
284           Dst,                                                                 \
285           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
286           Src,                                                                 \
287           (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),   \
288           pmull, *, int, Size);                                                \
289       TestPArithXmmAddr(                                                       \
290           Dst,                                                                 \
291           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
292           (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),   \
293           pmull, *, int, Size);                                                \
294       if (Size != 16) {                                                        \
295         TestPArithXmmXmm(                                                      \
296             Dst,                                                               \
297             (uint64_t(0x8040201008040201ull),                                  \
298              uint64_t(0x8080404002020101ull)),                                 \
299             Src,                                                               \
300             (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
301             pmuludq, *, uint, Size);                                           \
302         TestPArithXmmAddr(                                                     \
303             Dst,                                                               \
304             (uint64_t(0x8040201008040201ull),                                  \
305              uint64_t(0x8080404002020101ull)),                                 \
306             (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
307             pmuludq, *, uint, Size);                                           \
308       }                                                                        \
309     }                                                                          \
310     TestPArithXmmXmm(                                                          \
311         Dst,                                                                   \
312         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
313         Src,                                                                   \
314         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
315         padd, +, int, Size);                                                   \
316     TestPArithXmmAddr(                                                         \
317         Dst,                                                                   \
318         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
319         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
320         padd, +, int, Size);                                                   \
321     TestPArithXmmXmm(                                                          \
322         Dst,                                                                   \
323         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
324         Src,                                                                   \
325         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
326         psub, -, int, Size);                                                   \
327     TestPArithXmmAddr(                                                         \
328         Dst,                                                                   \
329         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
330         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
331         psub, -, int, Size);                                                   \
332     TestPArithXmmXmm(                                                          \
333         Dst,                                                                   \
334         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
335         Src,                                                                   \
336         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
337         pand, &, int, Size);                                                   \
338     TestPArithXmmAddr(                                                         \
339         Dst,                                                                   \
340         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
341         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
342         pand, &, int, Size);                                                   \
343                                                                                \
344     TestPAndnXmmXmm(                                                           \
345         Dst,                                                                   \
346         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
347         Src,                                                                   \
348         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
349         int, Size);                                                            \
350     TestPAndnXmmAddr(                                                          \
351         Dst,                                                                   \
352         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
353         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
354         int, Size);                                                            \
355                                                                                \
356     TestPArithXmmXmm(                                                          \
357         Dst,                                                                   \
358         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
359         Src,                                                                   \
360         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
361         por, |, int, Size);                                                    \
362     TestPArithXmmAddr(                                                         \
363         Dst,                                                                   \
364         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
365         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
366         por, |, int, Size);                                                    \
367     TestPArithXmmXmm(                                                          \
368         Dst,                                                                   \
369         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
370         Src,                                                                   \
371         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
372         pxor, ^, int, Size);                                                   \
373     TestPArithXmmAddr(                                                         \
374         Dst,                                                                   \
375         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
376         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
377         pxor, ^, int, Size);                                                   \
378   } while (0)
379 
380 #define TestPArith(Src, Dst)                                                   \
381   do {                                                                         \
382     TestPArithSize(Src, Dst, 8);                                               \
383     TestPArithSize(Src, Dst, 16);                                              \
384     TestPArithSize(Src, Dst, 32);                                              \
385   } while (0)
386 
387   TestPArith(xmm0, xmm1);
388   TestPArith(xmm1, xmm2);
389   TestPArith(xmm2, xmm3);
390   TestPArith(xmm3, xmm4);
391   TestPArith(xmm4, xmm5);
392   TestPArith(xmm5, xmm6);
393   TestPArith(xmm6, xmm7);
394   TestPArith(xmm7, xmm8);
395   TestPArith(xmm8, xmm9);
396   TestPArith(xmm9, xmm10);
397   TestPArith(xmm10, xmm11);
398   TestPArith(xmm11, xmm12);
399   TestPArith(xmm12, xmm13);
400   TestPArith(xmm13, xmm14);
401   TestPArith(xmm14, xmm15);
402   TestPArith(xmm15, xmm0);
403 
404 #undef TestPArith
405 #undef TestPArithSize
406 #undef TestPAndnXmmAddr
407 #undef TestPAndnXmmXmm
408 #undef TestPArithXmmImm
409 #undef TestPArithXmmAddr
410 #undef TestPArithXmmXmm
411 }
412 
TEST_F(AssemblerX8664Test,ArithPS)413 TEST_F(AssemblerX8664Test, ArithPS) {
414 #define TestArithPSXmmXmm(FloatSize, Dst, Value0, Src, Value1, Inst, Op, Type) \
415   do {                                                                         \
416     static constexpr char TestString[] =                                       \
417         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
418         ", " #Type ")";                                                        \
419     const uint32_t T0 = allocateDqword();                                      \
420     const Dqword V0 Value0;                                                    \
421     const uint32_t T1 = allocateDqword();                                      \
422     const Dqword V1 Value1;                                                    \
423                                                                                \
424     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
425     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
426     __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());            \
427                                                                                \
428     AssembledTest test = assemble();                                           \
429     test.setDqwordTo(T0, V0);                                                  \
430     test.setDqwordTo(T1, V1);                                                  \
431     test.run();                                                                \
432                                                                                \
433     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
434                                                                                \
435     reset();                                                                   \
436   } while (0)
437 
438 #define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type)     \
439   do {                                                                         \
440     static constexpr char TestString[] =                                       \
441         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
442         ", " #Type ")";                                                        \
443     const uint32_t T0 = allocateDqword();                                      \
444     const Dqword V0 Value0;                                                    \
445     const uint32_t T1 = allocateDqword();                                      \
446     const Dqword V1 Value1;                                                    \
447                                                                                \
448     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
449     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
450     __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());                         \
451                                                                                \
452     AssembledTest test = assemble();                                           \
453     test.setDqwordTo(T0, V0);                                                  \
454     test.setDqwordTo(T1, V1);                                                  \
455     test.run();                                                                \
456                                                                                \
457     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
458                                                                                \
459     reset();                                                                   \
460   } while (0)
461 
462 #define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type)         \
463   do {                                                                         \
464     static constexpr char TestString[] =                                       \
465         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
466         ", " #Type ")";                                                        \
467     const uint32_t T0 = allocateDqword();                                      \
468     const Dqword V0 Value0;                                                    \
469     const uint32_t T1 = allocateDqword();                                      \
470     const Dqword V1 Value1;                                                    \
471                                                                                \
472     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
473     __ Inst(Encoded_Xmm_##Dst(), dwordAddress(T1));                            \
474                                                                                \
475     AssembledTest test = assemble();                                           \
476     test.setDqwordTo(T0, V0);                                                  \
477     test.setDqwordTo(T1, V1);                                                  \
478     test.run();                                                                \
479                                                                                \
480     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
481                                                                                \
482     reset();                                                                   \
483   } while (0)
484 
485 #define TestMinMaxPS(FloatSize, Dst, Value0, Src, Value1, Inst, Type)          \
486   do {                                                                         \
487     static constexpr char TestString[] =                                       \
488         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type     \
489         ")";                                                                   \
490     const uint32_t T0 = allocateDqword();                                      \
491     const Dqword V0 Value0;                                                    \
492     const uint32_t T1 = allocateDqword();                                      \
493     const Dqword V1 Value1;                                                    \
494                                                                                \
495     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
496     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
497     __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());   \
498                                                                                \
499     AssembledTest test = assemble();                                           \
500     test.setDqwordTo(T0, V0);                                                  \
501     test.setDqwordTo(T1, V1);                                                  \
502     test.run();                                                                \
503                                                                                \
504     ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString;  \
505                                                                                \
506     reset();                                                                   \
507   } while (0)
508 
509 #define TestArithPSXmmAddr(FloatSize, Dst, Value0, Value1, Inst, Op, Type)     \
510   do {                                                                         \
511     static constexpr char TestString[] =                                       \
512         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
513         ", " #Type ")";                                                        \
514     const uint32_t T0 = allocateDqword();                                      \
515     const Dqword V0 Value0;                                                    \
516     const uint32_t T1 = allocateDqword();                                      \
517     const Dqword V1 Value1;                                                    \
518                                                                                \
519     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
520     __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1));      \
521                                                                                \
522     AssembledTest test = assemble();                                           \
523     test.setDqwordTo(T0, V0);                                                  \
524     test.setDqwordTo(T1, V1);                                                  \
525     test.run();                                                                \
526                                                                                \
527     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
528                                                                                \
529     reset();                                                                   \
530   } while (0)
531 
532 #define TestArithPS(Dst, Src)                                                  \
533   do {                                                                         \
534     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
535                       (0.55, 0.43, 0.23, 1.21), addps, +, float);              \
536     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
537                        (0.55, 0.43, 0.23, 1.21), addps, +, float);             \
538     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
539                       (0.55, 0.43, 0.23, 1.21), subps, -, float);              \
540     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
541                        (0.55, 0.43, 0.23, 1.21), subps, -, float);             \
542     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
543                       (0.55, 0.43, 0.23, 1.21), mulps, *, float);              \
544     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
545                        (0.55, 0.43, 0.23, 1.21), mulps, *, float);             \
546     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
547                       (0.55, 0.43, 0.23, 1.21), divps, /, float);              \
548     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
549                        (0.55, 0.43, 0.23, 1.21), divps, /, float);             \
550     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
551                       (0.55, 0.43, 0.23, 1.21), andps, &, float);              \
552     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
553                        (0.55, 0.43, 0.23, 1.21), andps, &, float);             \
554     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), andps, &,    \
555                       double);                                                 \
556     TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), andps, &,        \
557                        double);                                                \
558     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
559                       (0.55, 0.43, 0.23, 1.21), orps, |, float);               \
560     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), orps, |,     \
561                       double);                                                 \
562     TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
563                  (0.55, 0.43, 0.23, 1.21), minps, float);                      \
564     TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
565                  (0.55, 0.43, 0.23, 1.21), maxps, float);                      \
566     TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), minps, double);   \
567     TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxps, double);   \
568     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
569                       (0.55, 0.43, 0.23, 1.21), xorps, ^, float);              \
570     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
571                        (0.55, 0.43, 0.23, 1.21), xorps, ^, float);             \
572     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorps, ^,    \
573                       double);                                                 \
574     TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), xorps, ^,        \
575                        double);                                                \
576   } while (0)
577 
578   TestArithPS(xmm0, xmm1);
579   TestArithPS(xmm1, xmm2);
580   TestArithPS(xmm2, xmm3);
581   TestArithPS(xmm3, xmm4);
582   TestArithPS(xmm4, xmm5);
583   TestArithPS(xmm5, xmm6);
584   TestArithPS(xmm6, xmm7);
585   TestArithPS(xmm7, xmm8);
586   TestArithPS(xmm8, xmm9);
587   TestArithPS(xmm9, xmm10);
588   TestArithPS(xmm10, xmm11);
589   TestArithPS(xmm11, xmm12);
590   TestArithPS(xmm12, xmm13);
591   TestArithPS(xmm13, xmm14);
592   TestArithPS(xmm14, xmm15);
593   TestArithPS(xmm15, xmm0);
594 
595 #undef TestArithPs
596 #undef TestMinMaxPS
597 #undef TestArithPSXmmXmmUntyped
598 #undef TestArithPSXmmAddr
599 #undef TestArithPSXmmXmm
600 }
601 
TEST_F(AssemblerX8664Test,Blending)602 TEST_F(AssemblerX8664Test, Blending) {
603   using f32 = float;
604   using i8 = uint8_t;
605 
606 #define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type)    \
607   do {                                                                         \
608     static constexpr char TestString[] =                                       \
609         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst        \
610         ", " #Type ")";                                                        \
611     const uint32_t T0 = allocateDqword();                                      \
612     const Dqword V0 Value0;                                                    \
613     const uint32_t T1 = allocateDqword();                                      \
614     const Dqword V1 Value1;                                                    \
615     const uint32_t Mask = allocateDqword();                                    \
616     const Dqword MaskValue M;                                                  \
617                                                                                \
618     __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask));                         \
619     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
620     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
621     __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());         \
622                                                                                \
623     AssembledTest test = assemble();                                           \
624     test.setDqwordTo(T0, V0);                                                  \
625     test.setDqwordTo(T1, V1);                                                  \
626     test.setDqwordTo(Mask, MaskValue);                                         \
627     test.run();                                                                \
628                                                                                \
629     ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
630         << TestString;                                                         \
631     reset();                                                                   \
632   } while (0)
633 
634 #define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type)        \
635   do {                                                                         \
636     static constexpr char TestString[] =                                       \
637         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \
638         ")";                                                                   \
639     const uint32_t T0 = allocateDqword();                                      \
640     const Dqword V0 Value0;                                                    \
641     const uint32_t T1 = allocateDqword();                                      \
642     const Dqword V1 Value1;                                                    \
643     const uint32_t Mask = allocateDqword();                                    \
644     const Dqword MaskValue M;                                                  \
645                                                                                \
646     __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask));                         \
647     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
648     __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), dwordAddress(T1));            \
649                                                                                \
650     AssembledTest test = assemble();                                           \
651     test.setDqwordTo(T0, V0);                                                  \
652     test.setDqwordTo(T1, V1);                                                  \
653     test.setDqwordTo(Mask, MaskValue);                                         \
654     test.run();                                                                \
655                                                                                \
656     ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
657         << TestString;                                                         \
658     reset();                                                                   \
659   } while (0)
660 
661 #define TestBlending(Src, Dst)                                                 \
662   do {                                                                         \
663     TestBlendingXmmXmm(                                                        \
664         Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0),              \
665         (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
666         blendvps, f32);                                                        \
667     TestBlendingXmmAddr(                                                       \
668         Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0),                   \
669         (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
670         blendvps, f32);                                                        \
671     TestBlendingXmmXmm(                                                        \
672         Dst,                                                                   \
673         (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
674         Src,                                                                   \
675         (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
676         (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
677         pblendvb, i8);                                                         \
678     TestBlendingXmmAddr(                                                       \
679         Dst,                                                                   \
680         (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
681         (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
682         (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
683         pblendvb, i8);                                                         \
684   } while (0)
685 
686   /* xmm0 is taken. It is the implicit mask . */
687   TestBlending(xmm1, xmm2);
688   TestBlending(xmm2, xmm3);
689   TestBlending(xmm3, xmm4);
690   TestBlending(xmm4, xmm5);
691   TestBlending(xmm5, xmm6);
692   TestBlending(xmm6, xmm7);
693   TestBlending(xmm7, xmm8);
694   TestBlending(xmm8, xmm9);
695   TestBlending(xmm9, xmm10);
696   TestBlending(xmm10, xmm11);
697   TestBlending(xmm11, xmm12);
698   TestBlending(xmm12, xmm13);
699   TestBlending(xmm13, xmm14);
700   TestBlending(xmm14, xmm15);
701   TestBlending(xmm15, xmm1);
702 
703 #undef TestBlending
704 #undef TestBlendingXmmAddr
705 #undef TestBlendingXmmXmm
706 }
707 
TEST_F(AssemblerX8664Test,Cmpps)708 TEST_F(AssemblerX8664Test, Cmpps) {
709 #define TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Op, Type)      \
710   do {                                                                         \
711     static constexpr char TestString[] =                                       \
712         "(" #Src ", " #Dst ", " #C ", " #Op ")";                               \
713     const uint32_t T0 = allocateDqword();                                      \
714     const Dqword V0 Value0;                                                    \
715     const uint32_t T1 = allocateDqword();                                      \
716     const Dqword V1 Value1;                                                    \
717                                                                                \
718     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
719     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
720     __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),   \
721              Cond::Cmpps_##C);                                                 \
722                                                                                \
723     AssembledTest test = assemble();                                           \
724     test.setDqwordTo(T0, V0);                                                  \
725     test.setDqwordTo(T1, V1);                                                  \
726     test.run();                                                                \
727                                                                                \
728     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
729     ;                                                                          \
730     reset();                                                                   \
731   } while (0)
732 
733 #define TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, C, Op, Type)          \
734   do {                                                                         \
735     static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")";  \
736     const uint32_t T0 = allocateDqword();                                      \
737     const Dqword V0 Value0;                                                    \
738     const uint32_t T1 = allocateDqword();                                      \
739     const Dqword V1 Value1;                                                    \
740                                                                                \
741     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
742     __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1),      \
743              Cond::Cmpps_##C);                                                 \
744                                                                                \
745     AssembledTest test = assemble();                                           \
746     test.setDqwordTo(T0, V0);                                                  \
747     test.setDqwordTo(T1, V1);                                                  \
748     test.run();                                                                \
749                                                                                \
750     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
751     ;                                                                          \
752     reset();                                                                   \
753   } while (0)
754 
755 #define TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Type)  \
756   do {                                                                         \
757     static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")";       \
758     const uint32_t T0 = allocateDqword();                                      \
759     const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
760                     std::numeric_limits<float>::quiet_NaN());                  \
761     const uint32_t T1 = allocateDqword();                                      \
762     const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
763                     std::numeric_limits<float>::quiet_NaN());                  \
764                                                                                \
765     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
766     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
767     __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),   \
768              Cond::Cmpps_##C);                                                 \
769                                                                                \
770     AssembledTest test = assemble();                                           \
771     test.setDqwordTo(T0, V0);                                                  \
772     test.setDqwordTo(T1, V1);                                                  \
773     test.run();                                                                \
774                                                                                \
775     ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
776     ;                                                                          \
777     reset();                                                                   \
778   } while (0)
779 
780 #define TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, C, Type)      \
781   do {                                                                         \
782     static constexpr char TestString[] = "(" #Dst ", " #C ")";                 \
783     const uint32_t T0 = allocateDqword();                                      \
784     const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
785                     std::numeric_limits<float>::quiet_NaN());                  \
786     const uint32_t T1 = allocateDqword();                                      \
787     const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
788                     std::numeric_limits<float>::quiet_NaN());                  \
789                                                                                \
790     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
791     __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1),      \
792              Cond::Cmpps_##C);                                                 \
793                                                                                \
794     AssembledTest test = assemble();                                           \
795     test.setDqwordTo(T0, V0);                                                  \
796     test.setDqwordTo(T1, V1);                                                  \
797     test.run();                                                                \
798                                                                                \
799     ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
800     ;                                                                          \
801     reset();                                                                   \
802   } while (0)
803 
804 #define TestCmpps(FloatSize, Dst, Value0, Src, Value1, Type)                   \
805   do {                                                                         \
806     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
807     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
808     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
809     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
810     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
811     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
812     TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, unord, Type); \
813     TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, unord, Type);     \
814     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
815     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
816     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
817     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
818     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
819     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
820     if (FloatSize == 32) {                                                     \
821       TestCmppsOrdUnordXmmXmm(32, Dst,                                         \
822                               (1.0, 1.0,                                       \
823                                std::numeric_limits<float>::quiet_NaN(),        \
824                                std::numeric_limits<float>::quiet_NaN()),       \
825                               Src,                                             \
826                               (1.0, std::numeric_limits<float>::quiet_NaN(),   \
827                                1.0, std::numeric_limits<float>::quiet_NaN()),  \
828                               unord, Type);                                    \
829       TestCmppsOrdUnordXmmAddr(32, Dst,                                        \
830                                (1.0, 1.0,                                      \
831                                 std::numeric_limits<float>::quiet_NaN(),       \
832                                 std::numeric_limits<float>::quiet_NaN()),      \
833                                (1.0, std::numeric_limits<float>::quiet_NaN(),  \
834                                 1.0, std::numeric_limits<float>::quiet_NaN()), \
835                                unord, Type);                                   \
836     } else {                                                                   \
837       TestCmppsOrdUnordXmmXmm(                                                 \
838           64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()), Src,       \
839           (std::numeric_limits<double>::quiet_NaN(),                           \
840            std::numeric_limits<double>::quiet_NaN()),                          \
841           unord, Type);                                                        \
842       TestCmppsOrdUnordXmmXmm(64, Dst, (1.0, 1.0), Src,                        \
843                               (1.0, std::numeric_limits<double>::quiet_NaN()), \
844                               unord, Type);                                    \
845       TestCmppsOrdUnordXmmAddr(                                                \
846           64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()),            \
847           (std::numeric_limits<double>::quiet_NaN(),                           \
848            std::numeric_limits<double>::quiet_NaN()),                          \
849           unord, Type);                                                        \
850       TestCmppsOrdUnordXmmAddr(                                                \
851           64, Dst, (1.0, 1.0),                                                 \
852           (1.0, std::numeric_limits<double>::quiet_NaN()), unord, Type);       \
853     }                                                                          \
854   } while (0)
855 
856 #define TestCmppsSize(FloatSize, Value0, Value1, Type)                         \
857   do {                                                                         \
858     TestCmpps(FloatSize, xmm0, Value0, xmm1, Value1, Type);                    \
859     TestCmpps(FloatSize, xmm1, Value0, xmm2, Value1, Type);                    \
860     TestCmpps(FloatSize, xmm2, Value0, xmm3, Value1, Type);                    \
861     TestCmpps(FloatSize, xmm3, Value0, xmm4, Value1, Type);                    \
862     TestCmpps(FloatSize, xmm4, Value0, xmm5, Value1, Type);                    \
863     TestCmpps(FloatSize, xmm5, Value0, xmm6, Value1, Type);                    \
864     TestCmpps(FloatSize, xmm6, Value0, xmm7, Value1, Type);                    \
865     TestCmpps(FloatSize, xmm7, Value0, xmm8, Value1, Type);                    \
866     TestCmpps(FloatSize, xmm8, Value0, xmm9, Value1, Type);                    \
867     TestCmpps(FloatSize, xmm9, Value0, xmm10, Value1, Type);                   \
868     TestCmpps(FloatSize, xmm10, Value0, xmm11, Value1, Type);                  \
869     TestCmpps(FloatSize, xmm11, Value0, xmm12, Value1, Type);                  \
870     TestCmpps(FloatSize, xmm12, Value0, xmm13, Value1, Type);                  \
871     TestCmpps(FloatSize, xmm13, Value0, xmm14, Value1, Type);                  \
872     TestCmpps(FloatSize, xmm14, Value0, xmm15, Value1, Type);                  \
873     TestCmpps(FloatSize, xmm15, Value0, xmm0, Value1, Type);                   \
874   } while (0)
875 
876   TestCmppsSize(32, (-1.0, 1.0, 3.14, 1024.5), (-1.0, 1.0, 3.14, 1024.5),
877                 float);
878   TestCmppsSize(64, (1.0, -1000.0), (1.0, -1000.0), double);
879 
880 #undef TestCmpps
881 #undef TestCmppsOrdUnordXmmAddr
882 #undef TestCmppsOrdUnordXmmXmm
883 #undef TestCmppsXmmAddr
884 #undef TestCmppsXmmXmm
885 }
886 
TEST_F(AssemblerX8664Test,Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd)887 TEST_F(AssemblerX8664Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) {
888 #define TestImplSingle(Dst, Inst, Expect)                                      \
889   do {                                                                         \
890     static constexpr char TestString[] = "(" #Dst ", " #Inst ")";              \
891     const uint32_t T0 = allocateDqword();                                      \
892     const Dqword V0(1.0, 4.0, 20.0, 3.14);                                     \
893                                                                                \
894     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
895     __ Inst(Encoded_Xmm_##Dst());                                              \
896                                                                                \
897     AssembledTest test = assemble();                                           \
898     test.setDqwordTo(T0, V0);                                                  \
899     test.run();                                                                \
900     ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString;                \
901     reset();                                                                   \
902   } while (0)
903 
904 #define TestImpl(Dst)                                                          \
905   do {                                                                         \
906     TestImplSingle(                                                            \
907         Dst, sqrtps,                                                           \
908         (uint64_t(0x400000003F800000ull), uint64_t(0x3FE2D10B408F1BBDull)));   \
909     TestImplSingle(                                                            \
910         Dst, rsqrtps,                                                          \
911         (uint64_t(0x3EFFF0003F7FF000ull), uint64_t(0x3F1078003E64F000ull)));   \
912     TestImplSingle(                                                            \
913         Dst, reciprocalps,                                                     \
914         (uint64_t(0x3E7FF0003F7FF000ull), uint64_t(0x3EA310003D4CC000ull)));   \
915                                                                                \
916     TestImplSingle(                                                            \
917         Dst, sqrtpd,                                                           \
918         (uint64_t(0x4036A09E9365F5F3ull), uint64_t(0x401C42FAE40282A8ull)));   \
919   } while (0)
920 
921   TestImpl(xmm0);
922   TestImpl(xmm1);
923   TestImpl(xmm2);
924   TestImpl(xmm3);
925   TestImpl(xmm4);
926   TestImpl(xmm5);
927   TestImpl(xmm6);
928   TestImpl(xmm7);
929   TestImpl(xmm8);
930   TestImpl(xmm9);
931   TestImpl(xmm10);
932   TestImpl(xmm11);
933   TestImpl(xmm12);
934   TestImpl(xmm13);
935   TestImpl(xmm14);
936   TestImpl(xmm15);
937 
938 #undef TestImpl
939 #undef TestImplSingle
940 }
941 
TEST_F(AssemblerX8664Test,Unpck)942 TEST_F(AssemblerX8664Test, Unpck) {
943   const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull),
944                   uint64_t(0xCCCCCCCCDDDDDDDDull));
945   const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull),
946                   uint64_t(0x9999999988888888ull));
947 
948   const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull),
949                                 uint64_t(0xEEEEEEEEAAAAAAAAull));
950   const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull),
951                                 uint64_t(0xEEEEEEEEFFFFFFFFull));
952   const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull),
953                                 uint64_t(0x99999999CCCCCCCCull));
954   const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull),
955                                 uint64_t(0x9999999988888888ull));
956 
957 #define TestImplSingle(Dst, Src, Inst)                                         \
958   do {                                                                         \
959     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
960     const uint32_t T0 = allocateDqword();                                      \
961     const uint32_t T1 = allocateDqword();                                      \
962                                                                                \
963     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
964     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
965     __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());                         \
966                                                                                \
967     AssembledTest test = assemble();                                           \
968     test.setDqwordTo(T0, V0);                                                  \
969     test.setDqwordTo(T1, V1);                                                  \
970     test.run();                                                                \
971                                                                                \
972     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
973     reset();                                                                   \
974   } while (0)
975 
976 #define TestImpl(Dst, Src)                                                     \
977   do {                                                                         \
978     TestImplSingle(Dst, Src, unpcklps);                                        \
979     TestImplSingle(Dst, Src, unpcklpd);                                        \
980     TestImplSingle(Dst, Src, unpckhps);                                        \
981     TestImplSingle(Dst, Src, unpckhpd);                                        \
982   } while (0)
983 
984   TestImpl(xmm0, xmm1);
985   TestImpl(xmm1, xmm2);
986   TestImpl(xmm2, xmm3);
987   TestImpl(xmm3, xmm4);
988   TestImpl(xmm4, xmm5);
989   TestImpl(xmm5, xmm6);
990   TestImpl(xmm6, xmm7);
991   TestImpl(xmm7, xmm8);
992   TestImpl(xmm8, xmm9);
993   TestImpl(xmm9, xmm10);
994   TestImpl(xmm10, xmm11);
995   TestImpl(xmm11, xmm12);
996   TestImpl(xmm12, xmm13);
997   TestImpl(xmm13, xmm14);
998   TestImpl(xmm14, xmm15);
999   TestImpl(xmm15, xmm0);
1000 
1001 #undef TestImpl
1002 #undef TestImplSingle
1003 }
1004 
TEST_F(AssemblerX8664Test,Shufp)1005 TEST_F(AssemblerX8664Test, Shufp) {
1006   const Dqword V0(uint64_t(0x1111111122222222ull),
1007                   uint64_t(0x5555555577777777ull));
1008   const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
1009                   uint64_t(0xCCCCCCCCDDDDDDDDull));
1010 
1011   const uint8_t pshufdImm = 0x63;
1012   const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull),
1013                               uint64_t(0xAAAAAAAADDDDDDDDull));
1014 
1015   const uint8_t shufpsImm = 0xf9;
1016   const Dqword shufpsExpected(uint64_t(0x7777777711111111ull),
1017                               uint64_t(0xCCCCCCCCCCCCCCCCull));
1018 
1019 #define TestImplSingleXmmXmm(Dst, Src, Inst)                                   \
1020   do {                                                                         \
1021     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
1022     const uint32_t T0 = allocateDqword();                                      \
1023     const uint32_t T1 = allocateDqword();                                      \
1024                                                                                \
1025     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1026     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
1027     __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),             \
1028             Immediate(Inst##Imm));                                             \
1029                                                                                \
1030     AssembledTest test = assemble();                                           \
1031     test.setDqwordTo(T0, V0);                                                  \
1032     test.setDqwordTo(T1, V1);                                                  \
1033     test.run();                                                                \
1034                                                                                \
1035     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
1036     reset();                                                                   \
1037   } while (0)
1038 
1039 #define TestImplSingleXmmAddr(Dst, Inst)                                       \
1040   do {                                                                         \
1041     static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
1042     const uint32_t T0 = allocateDqword();                                      \
1043     const uint32_t T1 = allocateDqword();                                      \
1044                                                                                \
1045     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1046     __ Inst(IceType_f32, Encoded_Xmm_##Dst(), dwordAddress(T1),                \
1047             Immediate(Inst##Imm));                                             \
1048                                                                                \
1049     AssembledTest test = assemble();                                           \
1050     test.setDqwordTo(T0, V0);                                                  \
1051     test.setDqwordTo(T1, V1);                                                  \
1052     test.run();                                                                \
1053                                                                                \
1054     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
1055     reset();                                                                   \
1056   } while (0)
1057 
1058 #define TestImplSingleXmmXmmUntyped(Dst, Src, Inst)                            \
1059   do {                                                                         \
1060     static constexpr char TestString[] =                                       \
1061         "(" #Dst ", " #Src ", " #Inst ", Untyped)";                            \
1062     const uint32_t T0 = allocateDqword();                                      \
1063     const uint32_t T1 = allocateDqword();                                      \
1064                                                                                \
1065     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1066     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
1067     __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Immediate(Inst##Imm));   \
1068                                                                                \
1069     AssembledTest test = assemble();                                           \
1070     test.setDqwordTo(T0, V0);                                                  \
1071     test.setDqwordTo(T1, V1);                                                  \
1072     test.run();                                                                \
1073                                                                                \
1074     ASSERT_EQ(Inst##UntypedExpected, test.Dst<Dqword>()) << TestString;        \
1075     reset();                                                                   \
1076   } while (0)
1077 
1078 #define TestImpl(Dst, Src)                                                     \
1079   do {                                                                         \
1080     TestImplSingleXmmXmm(Dst, Src, pshufd);                                    \
1081     TestImplSingleXmmAddr(Dst, pshufd);                                        \
1082     TestImplSingleXmmXmm(Dst, Src, shufps);                                    \
1083     TestImplSingleXmmAddr(Dst, shufps);                                        \
1084   } while (0)
1085 
1086   TestImpl(xmm0, xmm1);
1087   TestImpl(xmm1, xmm2);
1088   TestImpl(xmm2, xmm3);
1089   TestImpl(xmm3, xmm4);
1090   TestImpl(xmm4, xmm5);
1091   TestImpl(xmm5, xmm6);
1092   TestImpl(xmm6, xmm7);
1093   TestImpl(xmm7, xmm8);
1094   TestImpl(xmm8, xmm9);
1095   TestImpl(xmm9, xmm10);
1096   TestImpl(xmm10, xmm11);
1097   TestImpl(xmm11, xmm12);
1098   TestImpl(xmm12, xmm13);
1099   TestImpl(xmm13, xmm14);
1100   TestImpl(xmm14, xmm15);
1101   TestImpl(xmm15, xmm0);
1102 
1103 #undef TestImpl
1104 #undef TestImplSingleXmmXmmUntyped
1105 #undef TestImplSingleXmmAddr
1106 #undef TestImplSingleXmmXmm
1107 }
1108 
TEST_F(AssemblerX8664Test,Punpckl)1109 TEST_F(AssemblerX8664Test, Punpckl) {
1110   const Dqword V0_v4i32(uint64_t(0x1111111122222222ull),
1111                         uint64_t(0x5555555577777777ull));
1112   const Dqword V1_v4i32(uint64_t(0xAAAAAAAABBBBBBBBull),
1113                         uint64_t(0xCCCCCCCCDDDDDDDDull));
1114   const Dqword Expected_v4i32(uint64_t(0xBBBBBBBB22222222ull),
1115                               uint64_t(0xAAAAAAAA11111111ull));
1116 
1117   const Dqword V0_v8i16(uint64_t(0x1111222233334444ull),
1118                         uint64_t(0x5555666677778888ull));
1119   const Dqword V1_v8i16(uint64_t(0xAAAABBBBCCCCDDDDull),
1120                         uint64_t(0xEEEEFFFF00009999ull));
1121   const Dqword Expected_v8i16(uint64_t(0xCCCC3333DDDD4444ull),
1122                               uint64_t(0xAAAA1111BBBB2222ull));
1123 
1124   const Dqword V0_v16i8(uint64_t(0x1122334455667788ull),
1125                         uint64_t(0x99AABBCCDDEEFF00ull));
1126   const Dqword V1_v16i8(uint64_t(0xFFEEDDCCBBAA9900ull),
1127                         uint64_t(0xBAADF00DFEEDFACEull));
1128   const Dqword Expected_v16i8(uint64_t(0xBB55AA6699770088ull),
1129                               uint64_t(0xFF11EE22DD33CC44ull));
1130 
1131 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1132   do {                                                                         \
1133     static constexpr char TestString[] =                                       \
1134         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1135     const uint32_t T0 = allocateDqword();                                      \
1136     const uint32_t T1 = allocateDqword();                                      \
1137                                                                                \
1138     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1139     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1140     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1141             XmmRegister::Encoded_Reg_##Src);                                   \
1142                                                                                \
1143     AssembledTest test = assemble();                                           \
1144     test.setDqwordTo(T0, V0_##Ty);                                             \
1145     test.setDqwordTo(T1, V1_##Ty);                                             \
1146     test.run();                                                                \
1147                                                                                \
1148     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1149     reset();                                                                   \
1150   } while (0)
1151 
1152 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1153   do {                                                                         \
1154     static constexpr char TestString[] =                                       \
1155         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1156     const uint32_t T0 = allocateDqword();                                      \
1157     const uint32_t T1 = allocateDqword();                                      \
1158                                                                                \
1159     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1160     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1161                                                                                \
1162     AssembledTest test = assemble();                                           \
1163     test.setDqwordTo(T0, V0_##Ty);                                             \
1164     test.setDqwordTo(T1, V1_##Ty);                                             \
1165     test.run();                                                                \
1166                                                                                \
1167     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1168     reset();                                                                   \
1169   } while (0)
1170 
1171 #define TestImpl(Dst, Src)                                                     \
1172   do {                                                                         \
1173     TestImplXmmXmm(Dst, Src, punpckl, v4i32);                                  \
1174     TestImplXmmAddr(Dst, punpckl, v4i32);                                      \
1175     TestImplXmmXmm(Dst, Src, punpckl, v8i16);                                  \
1176     TestImplXmmAddr(Dst, punpckl, v8i16);                                      \
1177     TestImplXmmXmm(Dst, Src, punpckl, v16i8);                                  \
1178     TestImplXmmAddr(Dst, punpckl, v16i8);                                      \
1179   } while (0)
1180 
1181   TestImpl(xmm0, xmm1);
1182   TestImpl(xmm1, xmm2);
1183   TestImpl(xmm2, xmm3);
1184   TestImpl(xmm3, xmm4);
1185   TestImpl(xmm4, xmm5);
1186   TestImpl(xmm5, xmm6);
1187   TestImpl(xmm6, xmm7);
1188   TestImpl(xmm7, xmm0);
1189 
1190 #undef TestImpl
1191 #undef TestImplXmmAddr
1192 #undef TestImplXmmXmm
1193 }
1194 
TEST_F(AssemblerX8664Test,Packss)1195 TEST_F(AssemblerX8664Test, Packss) {
1196   const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1197                         uint64_t(0x7FFFFFFF80000000ull));
1198   const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1199                         uint64_t(0x0000800100007FFEull));
1200   const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull),
1201                               uint64_t(0x7FFF7FFEFFFEFFFFull));
1202 
1203   const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1204                         uint64_t(0xFFFEFFFF7FFF8000ull));
1205   const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1206                         uint64_t(0x0088007700660055ull));
1207   const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull),
1208                               uint64_t(0x7F776655057F7F7Eull));
1209 
1210 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1211   do {                                                                         \
1212     static constexpr char TestString[] =                                       \
1213         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1214     const uint32_t T0 = allocateDqword();                                      \
1215     const uint32_t T1 = allocateDqword();                                      \
1216                                                                                \
1217     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1218     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1219     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1220             XmmRegister::Encoded_Reg_##Src);                                   \
1221                                                                                \
1222     AssembledTest test = assemble();                                           \
1223     test.setDqwordTo(T0, V0_##Ty);                                             \
1224     test.setDqwordTo(T1, V1_##Ty);                                             \
1225     test.run();                                                                \
1226                                                                                \
1227     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1228     reset();                                                                   \
1229   } while (0)
1230 
1231 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1232   do {                                                                         \
1233     static constexpr char TestString[] =                                       \
1234         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1235     const uint32_t T0 = allocateDqword();                                      \
1236     const uint32_t T1 = allocateDqword();                                      \
1237                                                                                \
1238     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1239     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1240                                                                                \
1241     AssembledTest test = assemble();                                           \
1242     test.setDqwordTo(T0, V0_##Ty);                                             \
1243     test.setDqwordTo(T1, V1_##Ty);                                             \
1244     test.run();                                                                \
1245                                                                                \
1246     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1247     reset();                                                                   \
1248   } while (0)
1249 
1250 #define TestImpl(Dst, Src)                                                     \
1251   do {                                                                         \
1252     TestImplXmmXmm(Dst, Src, packss, v4i32);                                   \
1253     TestImplXmmAddr(Dst, packss, v4i32);                                       \
1254     TestImplXmmXmm(Dst, Src, packss, v8i16);                                   \
1255     TestImplXmmAddr(Dst, packss, v8i16);                                       \
1256   } while (0)
1257 
1258   TestImpl(xmm0, xmm1);
1259   TestImpl(xmm1, xmm2);
1260   TestImpl(xmm2, xmm3);
1261   TestImpl(xmm3, xmm4);
1262   TestImpl(xmm4, xmm5);
1263   TestImpl(xmm5, xmm6);
1264   TestImpl(xmm6, xmm7);
1265   TestImpl(xmm7, xmm0);
1266 
1267 #undef TestImpl
1268 #undef TestImplXmmAddr
1269 #undef TestImplXmmXmm
1270 }
1271 
TEST_F(AssemblerX8664Test,Packus)1272 TEST_F(AssemblerX8664Test, Packus) {
1273   const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1274                         uint64_t(0x7FFFFFFF80000000ull));
1275   const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1276                         uint64_t(0x0000800100007FFEull));
1277   const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull),
1278                               uint64_t(0x80017FFE00000000ull));
1279 
1280   const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1281                         uint64_t(0xFFFEFFFF7FFF8000ull));
1282   const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1283                         uint64_t(0x0088007700660055ull));
1284   const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull),
1285                               uint64_t(0x8877665505FF817Eull));
1286 
1287 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1288   do {                                                                         \
1289     static constexpr char TestString[] =                                       \
1290         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1291     const uint32_t T0 = allocateDqword();                                      \
1292     const uint32_t T1 = allocateDqword();                                      \
1293                                                                                \
1294     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1295     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1296     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1297             XmmRegister::Encoded_Reg_##Src);                                   \
1298                                                                                \
1299     AssembledTest test = assemble();                                           \
1300     test.setDqwordTo(T0, V0_##Ty);                                             \
1301     test.setDqwordTo(T1, V1_##Ty);                                             \
1302     test.run();                                                                \
1303                                                                                \
1304     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1305     reset();                                                                   \
1306   } while (0)
1307 
1308 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1309   do {                                                                         \
1310     static constexpr char TestString[] =                                       \
1311         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1312     const uint32_t T0 = allocateDqword();                                      \
1313     const uint32_t T1 = allocateDqword();                                      \
1314                                                                                \
1315     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1316     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1317                                                                                \
1318     AssembledTest test = assemble();                                           \
1319     test.setDqwordTo(T0, V0_##Ty);                                             \
1320     test.setDqwordTo(T1, V1_##Ty);                                             \
1321     test.run();                                                                \
1322                                                                                \
1323     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1324     reset();                                                                   \
1325   } while (0)
1326 
1327 #define TestImpl(Dst, Src)                                                     \
1328   do {                                                                         \
1329     TestImplXmmXmm(Dst, Src, packus, v4i32);                                   \
1330     TestImplXmmAddr(Dst, packus, v4i32);                                       \
1331     TestImplXmmXmm(Dst, Src, packus, v8i16);                                   \
1332     TestImplXmmAddr(Dst, packus, v8i16);                                       \
1333   } while (0)
1334 
1335   TestImpl(xmm0, xmm1);
1336   TestImpl(xmm1, xmm2);
1337   TestImpl(xmm2, xmm3);
1338   TestImpl(xmm3, xmm4);
1339   TestImpl(xmm4, xmm5);
1340   TestImpl(xmm5, xmm6);
1341   TestImpl(xmm6, xmm7);
1342   TestImpl(xmm7, xmm0);
1343 
1344 #undef TestImpl
1345 #undef TestImplXmmAddr
1346 #undef TestImplXmmXmm
1347 }
1348 
TEST_F(AssemblerX8664Test,Pshufb)1349 TEST_F(AssemblerX8664Test, Pshufb) {
1350   const Dqword V0(uint64_t(0x1122334455667788ull),
1351                   uint64_t(0x99aabbccddeeff32ull));
1352   const Dqword V1(uint64_t(0x0204050380060708ull),
1353                   uint64_t(0x010306080a8b0c0dull));
1354 
1355   const Dqword Expected(uint64_t(0x6644335500221132ull),
1356                         uint64_t(0x77552232ee00ccbbull));
1357 
1358 #define TestImplXmmXmm(Dst, Src, Inst)                                         \
1359   do {                                                                         \
1360     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
1361     const uint32_t T0 = allocateDqword();                                      \
1362     const uint32_t T1 = allocateDqword();                                      \
1363                                                                                \
1364     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1365     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1366     __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst,                      \
1367             XmmRegister::Encoded_Reg_##Src);                                   \
1368                                                                                \
1369     AssembledTest test = assemble();                                           \
1370     test.setDqwordTo(T0, V0);                                                  \
1371     test.setDqwordTo(T1, V1);                                                  \
1372     test.run();                                                                \
1373                                                                                \
1374     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1375     reset();                                                                   \
1376   } while (0)
1377 
1378 #define TestImplXmmAddr(Dst, Inst)                                             \
1379   do {                                                                         \
1380     static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
1381     const uint32_t T0 = allocateDqword();                                      \
1382     const uint32_t T1 = allocateDqword();                                      \
1383                                                                                \
1384     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1385     __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1386                                                                                \
1387     AssembledTest test = assemble();                                           \
1388     test.setDqwordTo(T0, V0);                                                  \
1389     test.setDqwordTo(T1, V1);                                                  \
1390     test.run();                                                                \
1391                                                                                \
1392     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1393     reset();                                                                   \
1394   } while (0)
1395 
1396 #define TestImpl(Dst, Src)                                                     \
1397   do {                                                                         \
1398     TestImplXmmXmm(Dst, Src, pshufb);                                          \
1399     TestImplXmmAddr(Dst, pshufb);                                              \
1400   } while (0)
1401 
1402   TestImpl(xmm0, xmm1);
1403   TestImpl(xmm1, xmm2);
1404   TestImpl(xmm2, xmm3);
1405   TestImpl(xmm3, xmm4);
1406   TestImpl(xmm4, xmm5);
1407   TestImpl(xmm5, xmm6);
1408   TestImpl(xmm6, xmm7);
1409   TestImpl(xmm7, xmm8);
1410   TestImpl(xmm8, xmm9);
1411   TestImpl(xmm9, xmm10);
1412   TestImpl(xmm10, xmm11);
1413   TestImpl(xmm11, xmm12);
1414   TestImpl(xmm12, xmm13);
1415   TestImpl(xmm13, xmm14);
1416   TestImpl(xmm14, xmm15);
1417   TestImpl(xmm15, xmm0);
1418 
1419 #undef TestImpl
1420 #undef TestImplXmmAddr
1421 #undef TestImplXmmXmm
1422 }
1423 
TEST_F(AssemblerX8664Test,Cvt)1424 TEST_F(AssemblerX8664Test, Cvt) {
1425   const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1426   const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
1427   const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0);
1428 
1429   const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f);
1430   const Dqword dq2ps64SrcValue(-5, 3, 100, 200);
1431   const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0);
1432 
1433   const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1434   const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1435   const Dqword tps2dq32Expected(-5, 3, 100, 200);
1436 
1437   const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1438   const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1439   const Dqword tps2dq64Expected(-5, 3, 100, 200);
1440 
1441   const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1442   const int32_t si2ss32SrcValue = 5;
1443   const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f);
1444 
1445   const Dqword si2ss64DstValue(-1.0, -1.0);
1446   const int32_t si2ss64SrcValue = 5;
1447   const Dqword si2ss64Expected(5.0, -1.0);
1448 
1449   const int32_t tss2si32DstValue = 0xF00F0FF0;
1450   const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f);
1451   const int32_t tss2si32Expected = -5;
1452 
1453   const int32_t tss2si64DstValue = 0xF00F0FF0;
1454   const Dqword tss2si64SrcValue(-5.0, -1.0);
1455   const int32_t tss2si64Expected = -5;
1456 
1457   const Dqword float2float32DstValue(-1.0, -1.0);
1458   const Dqword float2float32SrcValue(-5.0, 3, 100, 200);
1459   const Dqword float2float32Expected(-5.0, -1.0);
1460 
1461   const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0);
1462   const Dqword float2float64SrcValue(-5.0, 3.0);
1463   const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0);
1464 
1465 #define TestImplPXmmXmm(Dst, Src, Inst, Size)                                  \
1466   do {                                                                         \
1467     static constexpr char TestString[] =                                       \
1468         "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")";                      \
1469     const uint32_t T0 = allocateDqword();                                      \
1470     const uint32_t T1 = allocateDqword();                                      \
1471                                                                                \
1472     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1473     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
1474     __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());   \
1475                                                                                \
1476     AssembledTest test = assemble();                                           \
1477     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1478     test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
1479     test.run();                                                                \
1480                                                                                \
1481     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1482     reset();                                                                   \
1483   } while (0)
1484 
1485 #define TestImplSXmmReg(Dst, GPR, Inst, Size, IntType)                         \
1486   do {                                                                         \
1487     static constexpr char TestString[] =                                       \
1488         "(" #Dst ", " #GPR ", cvt" #Inst ", " #IntType ", f" #Size ")";        \
1489     const uint32_t T0 = allocateDqword();                                      \
1490                                                                                \
1491     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1492     __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \
1493     __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType,                \
1494                  Encoded_GPR_##GPR());                                         \
1495                                                                                \
1496     AssembledTest test = assemble();                                           \
1497     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1498     test.run();                                                                \
1499                                                                                \
1500     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1501     reset();                                                                   \
1502   } while (0)
1503 
1504 #define TestImplSRegXmm(GPR, Src, Inst, IntSize, Size)                         \
1505   do {                                                                         \
1506     static constexpr char TestString[] =                                       \
1507         "(" #GPR ", " #Src ", cvt" #Inst ", " #IntSize ", f" #Size ")";        \
1508     const uint32_t T0 = allocateDqword();                                      \
1509                                                                                \
1510     __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
1511     __ movups(Encoded_Xmm_##Src(), dwordAddress(T0));                          \
1512     __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size,     \
1513                  Encoded_Xmm_##Src());                                         \
1514                                                                                \
1515     AssembledTest test = assemble();                                           \
1516     test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
1517     test.run();                                                                \
1518                                                                                \
1519     ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected),            \
1520               test.GPR())                                                      \
1521         << TestString;                                                         \
1522     reset();                                                                   \
1523   } while (0)
1524 
1525 #define TestImplPXmmAddr(Dst, Inst, Size)                                      \
1526   do {                                                                         \
1527     static constexpr char TestString[] =                                       \
1528         "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")";                          \
1529     const uint32_t T0 = allocateDqword();                                      \
1530     const uint32_t T1 = allocateDqword();                                      \
1531                                                                                \
1532     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1533     __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));      \
1534                                                                                \
1535     AssembledTest test = assemble();                                           \
1536     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1537     test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
1538     test.run();                                                                \
1539                                                                                \
1540     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1541     reset();                                                                   \
1542   } while (0)
1543 
1544 #define TestImplSXmmAddr(Dst, Inst, Size, IntType)                             \
1545   do {                                                                         \
1546     static constexpr char TestString[] =                                       \
1547         "(" #Dst ", Addr, cvt" #Inst ", f" #Size ", " #IntType ")";            \
1548     const uint32_t T0 = allocateDqword();                                      \
1549     const uint32_t T1 = allocateDword();                                       \
1550                                                                                \
1551     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1552     __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType,                \
1553                  dwordAddress(T1));                                            \
1554                                                                                \
1555     AssembledTest test = assemble();                                           \
1556     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1557     test.setDwordTo(T1, Inst##Size##SrcValue);                                 \
1558     test.run();                                                                \
1559                                                                                \
1560     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1561     reset();                                                                   \
1562   } while (0)
1563 
1564 #define TestImplSRegAddr(GPR, Inst, IntSize, Size)                             \
1565   do {                                                                         \
1566     static constexpr char TestString[] =                                       \
1567         "(" #GPR ", Addr, cvt" #Inst ", f" #Size ", " #IntSize ")";            \
1568     const uint32_t T0 = allocateDqword();                                      \
1569                                                                                \
1570     __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
1571     __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size,     \
1572                  dwordAddress(T0));                                            \
1573                                                                                \
1574     AssembledTest test = assemble();                                           \
1575     test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
1576     test.run();                                                                \
1577                                                                                \
1578     ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected),            \
1579               test.GPR())                                                      \
1580         << TestString;                                                         \
1581     reset();                                                                   \
1582   } while (0)
1583 
1584 #define TestImplSize(Dst, Src, GPR, Size)                                      \
1585   do {                                                                         \
1586     TestImplPXmmXmm(Dst, Src, dq2ps, Size);                                    \
1587     TestImplPXmmAddr(Src, dq2ps, Size);                                        \
1588     TestImplPXmmXmm(Dst, Src, tps2dq, Size);                                   \
1589     TestImplPXmmAddr(Src, tps2dq, Size);                                       \
1590     TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i32);                       \
1591     TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i64);                       \
1592     TestImplSXmmAddr(Dst, si2ss, Size, IceType_i32);                           \
1593     TestImplSXmmAddr(Dst, si2ss, Size, IceType_i64);                           \
1594     TestImplSRegXmm(GPR, Src, tss2si, 32, Size);                               \
1595     TestImplSRegXmm(GPR, Src, tss2si, 64, Size);                               \
1596     TestImplSRegAddr(GPR, tss2si, 32, Size);                                   \
1597     TestImplSRegAddr(GPR, tss2si, 64, Size);                                   \
1598     TestImplPXmmXmm(Dst, Src, float2float, Size);                              \
1599     TestImplPXmmAddr(Src, float2float, Size);                                  \
1600   } while (0)
1601 
1602 #define TestImpl(Dst, Src, GPR)                                                \
1603   do {                                                                         \
1604     TestImplSize(Dst, Src, GPR, 32);                                           \
1605     TestImplSize(Dst, Src, GPR, 64);                                           \
1606   } while (0)
1607 
1608   TestImpl(xmm0, xmm1, r1);
1609   TestImpl(xmm1, xmm2, r2);
1610   TestImpl(xmm2, xmm3, r3);
1611   TestImpl(xmm3, xmm4, r4);
1612   TestImpl(xmm4, xmm5, r5);
1613   TestImpl(xmm5, xmm6, r6);
1614   TestImpl(xmm6, xmm7, r7);
1615   TestImpl(xmm7, xmm8, r8);
1616   TestImpl(xmm8, xmm9, r10);
1617   TestImpl(xmm9, xmm10, r11);
1618   TestImpl(xmm10, xmm11, r12);
1619   TestImpl(xmm11, xmm12, r13);
1620   TestImpl(xmm12, xmm13, r14);
1621   TestImpl(xmm13, xmm14, r15);
1622   TestImpl(xmm14, xmm15, r1);
1623   TestImpl(xmm15, xmm0, r2);
1624 
1625 #undef TestImpl
1626 #undef TestImplSize
1627 #undef TestImplSRegAddr
1628 #undef TestImplSXmmAddr
1629 #undef TestImplPXmmAddr
1630 #undef TestImplSRegXmm
1631 #undef TestImplSXmmReg
1632 #undef TestImplPXmmXmm
1633 }
1634 
TEST_F(AssemblerX8664Test,Ucomiss)1635 TEST_F(AssemblerX8664Test, Ucomiss) {
1636   static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN();
1637   static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN();
1638 
1639   Dqword test32DstValue(0.0, qnan32, qnan32, qnan32);
1640   Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32);
1641 
1642   Dqword test64DstValue(0.0, qnan64);
1643   Dqword test64SrcValue(0.0, qnan64);
1644 
1645 #define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity,      \
1646                        BOther)                                                 \
1647   do {                                                                         \
1648     static constexpr char NearBranch = AssemblerX8664::kNearJump;              \
1649     static constexpr char TestString[] =                                       \
1650         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \
1651         ", " #BParity ", " #BOther ")";                                        \
1652     const uint32_t T0 = allocateDqword();                                      \
1653     test##Size##DstValue.F##Size[0] = Value0;                                  \
1654     const uint32_t T1 = allocateDqword();                                      \
1655     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1656     const uint32_t ImmIfTrue = 0xBEEF;                                         \
1657     const uint32_t ImmIfFalse = 0xC0FFE;                                       \
1658                                                                                \
1659     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1660     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
1661     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
1662     __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());     \
1663     Label Done;                                                                \
1664     __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
1665     __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
1666     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
1667     __ bind(&Done);                                                            \
1668                                                                                \
1669     AssembledTest test = assemble();                                           \
1670     test.setDqwordTo(T0, test##Size##DstValue);                                \
1671     test.setDqwordTo(T1, test##Size##SrcValue);                                \
1672     test.run();                                                                \
1673                                                                                \
1674     ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
1675     reset();                                                                   \
1676   } while (0)
1677 
1678 #define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther)  \
1679   do {                                                                         \
1680     static constexpr char NearBranch = AssemblerX8664::kNearJump;              \
1681     static constexpr char TestString[] =                                       \
1682         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType     \
1683         ", " #BParity ", " #BOther ")";                                        \
1684     const uint32_t T0 = allocateDqword();                                      \
1685     test##Size##DstValue.F##Size[0] = Value0;                                  \
1686     const uint32_t T1 = allocateDqword();                                      \
1687     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1688     const uint32_t ImmIfTrue = 0xBEEF;                                         \
1689     const uint32_t ImmIfFalse = 0xC0FFE;                                       \
1690                                                                                \
1691     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1692     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
1693     __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));        \
1694     Label Done;                                                                \
1695     __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
1696     __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
1697     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
1698     __ bind(&Done);                                                            \
1699                                                                                \
1700     AssembledTest test = assemble();                                           \
1701     test.setDqwordTo(T0, test##Size##DstValue);                                \
1702     test.setDqwordTo(T1, test##Size##SrcValue);                                \
1703     test.run();                                                                \
1704                                                                                \
1705     ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
1706     reset();                                                                   \
1707   } while (0)
1708 
1709 #define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity,        \
1710                      BOther)                                                   \
1711   do {                                                                         \
1712     TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \
1713     TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther);     \
1714   } while (0)
1715 
1716 #define TestImplSize(Dst, Src, Size)                                           \
1717   do {                                                                         \
1718     TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne);                       \
1719     TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e);                        \
1720     TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a);                        \
1721     TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a);                        \
1722     TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae);                       \
1723     TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b);                        \
1724     TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b);                        \
1725     TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be);                       \
1726     TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o);             \
1727     TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s);             \
1728     TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s);      \
1729   } while (0)
1730 
1731 #define TestImpl(Dst, Src)                                                     \
1732   do {                                                                         \
1733     TestImplSize(Dst, Src, 32);                                                \
1734     TestImplSize(Dst, Src, 64);                                                \
1735   } while (0)
1736 
1737   TestImpl(xmm0, xmm1);
1738   TestImpl(xmm1, xmm2);
1739   TestImpl(xmm2, xmm3);
1740   TestImpl(xmm3, xmm4);
1741   TestImpl(xmm4, xmm5);
1742   TestImpl(xmm5, xmm6);
1743   TestImpl(xmm6, xmm7);
1744   TestImpl(xmm7, xmm8);
1745   TestImpl(xmm8, xmm9);
1746   TestImpl(xmm9, xmm10);
1747   TestImpl(xmm10, xmm11);
1748   TestImpl(xmm11, xmm12);
1749   TestImpl(xmm12, xmm13);
1750   TestImpl(xmm13, xmm14);
1751   TestImpl(xmm14, xmm15);
1752   TestImpl(xmm15, xmm0);
1753 
1754 #undef TestImpl
1755 #undef TestImplSize
1756 #undef TestImplCond
1757 #undef TestImplXmmAddr
1758 #undef TestImplXmmXmm
1759 }
1760 
TEST_F(AssemblerX8664Test,Sqrtss)1761 TEST_F(AssemblerX8664Test, Sqrtss) {
1762   Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0);
1763   Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0);
1764 
1765   Dqword test64SrcValue(-100.0, -100.0);
1766   Dqword test64DstValue(-1.0, -1.0);
1767 
1768 #define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size)                       \
1769   do {                                                                         \
1770     static constexpr char TestString[] =                                       \
1771         "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")";           \
1772     const uint32_t T0 = allocateDqword();                                      \
1773     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1774     const uint32_t T1 = allocateDqword();                                      \
1775                                                                                \
1776     __ movups(Encoded_Xmm_##Src(), dwordAddress(T0));                          \
1777     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1));                          \
1778     __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());        \
1779                                                                                \
1780     AssembledTest test = assemble();                                           \
1781     test.setDqwordTo(T0, test##Size##SrcValue);                                \
1782     test.setDqwordTo(T1, test##Size##DstValue);                                \
1783     test.run();                                                                \
1784                                                                                \
1785     Dqword Expected = test##Size##DstValue;                                    \
1786     Expected.F##Size[0] = Result;                                              \
1787     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1788     reset();                                                                   \
1789   } while (0)
1790 
1791 #define TestSqrtssXmmAddr(Dst, Value1, Result, Size)                           \
1792   do {                                                                         \
1793     static constexpr char TestString[] =                                       \
1794         "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")";               \
1795     const uint32_t T0 = allocateDqword();                                      \
1796     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1797     const uint32_t T1 = allocateDqword();                                      \
1798                                                                                \
1799     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1));                          \
1800     __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0));           \
1801                                                                                \
1802     AssembledTest test = assemble();                                           \
1803     test.setDqwordTo(T0, test##Size##SrcValue);                                \
1804     test.setDqwordTo(T1, test##Size##DstValue);                                \
1805     test.run();                                                                \
1806                                                                                \
1807     Dqword Expected = test##Size##DstValue;                                    \
1808     Expected.F##Size[0] = Result;                                              \
1809     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1810     reset();                                                                   \
1811   } while (0)
1812 
1813 #define TestSqrtssSize(Dst, Src, Size)                                         \
1814   do {                                                                         \
1815     TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size);                                \
1816     TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size);                                    \
1817     TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size);                                \
1818     TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size);                                    \
1819     TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size);                             \
1820     TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size);                                 \
1821   } while (0)
1822 
1823 #define TestSqrtss(Dst, Src)                                                   \
1824   do {                                                                         \
1825     TestSqrtssSize(Dst, Src, 32);                                              \
1826     TestSqrtssSize(Dst, Src, 64);                                              \
1827   } while (0)
1828 
1829   TestSqrtss(xmm0, xmm1);
1830   TestSqrtss(xmm1, xmm2);
1831   TestSqrtss(xmm2, xmm3);
1832   TestSqrtss(xmm3, xmm4);
1833   TestSqrtss(xmm4, xmm5);
1834   TestSqrtss(xmm5, xmm6);
1835   TestSqrtss(xmm6, xmm7);
1836   TestSqrtss(xmm7, xmm8);
1837   TestSqrtss(xmm8, xmm9);
1838   TestSqrtss(xmm9, xmm10);
1839   TestSqrtss(xmm10, xmm11);
1840   TestSqrtss(xmm11, xmm12);
1841   TestSqrtss(xmm12, xmm13);
1842   TestSqrtss(xmm13, xmm14);
1843   TestSqrtss(xmm14, xmm15);
1844   TestSqrtss(xmm15, xmm0);
1845 
1846 #undef TestSqrtss
1847 #undef TestSqrtssSize
1848 #undef TestSqrtssXmmAddr
1849 #undef TestSqrtssXmmXmm
1850 }
1851 
TEST_F(AssemblerX8664Test,Insertps)1852 TEST_F(AssemblerX8664Test, Insertps) {
1853 #define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected)         \
1854   do {                                                                         \
1855     static constexpr char TestString[] =                                       \
1856         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected  \
1857         ")";                                                                   \
1858     const uint32_t T0 = allocateDqword();                                      \
1859     const Dqword V0 Value0;                                                    \
1860     const uint32_t T1 = allocateDqword();                                      \
1861     const Dqword V1 Value1;                                                    \
1862                                                                                \
1863     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1864     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
1865     __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),       \
1866                 Immediate(Imm));                                               \
1867                                                                                \
1868     AssembledTest test = assemble();                                           \
1869     test.setDqwordTo(T0, V0);                                                  \
1870     test.setDqwordTo(T1, V1);                                                  \
1871     test.run();                                                                \
1872                                                                                \
1873     ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
1874     reset();                                                                   \
1875   } while (0)
1876 
1877 #define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected)             \
1878   do {                                                                         \
1879     static constexpr char TestString[] =                                       \
1880         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \
1881     const uint32_t T0 = allocateDqword();                                      \
1882     const Dqword V0 Value0;                                                    \
1883     const uint32_t T1 = allocateDqword();                                      \
1884     const Dqword V1 Value1;                                                    \
1885                                                                                \
1886     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1887     __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), dwordAddress(T1),          \
1888                 Immediate(Imm));                                               \
1889                                                                                \
1890     AssembledTest test = assemble();                                           \
1891     test.setDqwordTo(T0, V0);                                                  \
1892     test.setDqwordTo(T1, V1);                                                  \
1893     test.run();                                                                \
1894                                                                                \
1895     ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
1896     reset();                                                                   \
1897   } while (0)
1898 
1899 #define TestInsertps(Dst, Src)                                                 \
1900   do {                                                                         \
1901     TestInsertpsXmmXmmImm(                                                     \
1902         Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
1903         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1904         0x99,                                                                  \
1905         (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
1906     TestInsertpsXmmAddrImm(                                                    \
1907         Dst, (uint64_t(-1), uint64_t(-1)),                                     \
1908         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1909         0x99,                                                                  \
1910         (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
1911     TestInsertpsXmmXmmImm(                                                     \
1912         Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
1913         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1914         0x9D,                                                                  \
1915         (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull)));   \
1916     TestInsertpsXmmAddrImm(                                                    \
1917         Dst, (uint64_t(-1), uint64_t(-1)),                                     \
1918         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1919         0x9D,                                                                  \
1920         (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull)));   \
1921   } while (0)
1922 
1923   TestInsertps(xmm0, xmm1);
1924   TestInsertps(xmm1, xmm2);
1925   TestInsertps(xmm2, xmm3);
1926   TestInsertps(xmm3, xmm4);
1927   TestInsertps(xmm4, xmm5);
1928   TestInsertps(xmm5, xmm6);
1929   TestInsertps(xmm6, xmm7);
1930   TestInsertps(xmm7, xmm8);
1931   TestInsertps(xmm8, xmm9);
1932   TestInsertps(xmm9, xmm10);
1933   TestInsertps(xmm10, xmm11);
1934   TestInsertps(xmm11, xmm12);
1935   TestInsertps(xmm12, xmm13);
1936   TestInsertps(xmm13, xmm14);
1937   TestInsertps(xmm14, xmm15);
1938   TestInsertps(xmm15, xmm0);
1939 
1940 #undef TestInsertps
1941 #undef TestInsertpsXmmXmmAddr
1942 #undef TestInsertpsXmmXmmImm
1943 }
1944 
TEST_F(AssemblerX8664Test,Pinsr)1945 TEST_F(AssemblerX8664Test, Pinsr) {
1946   static constexpr uint8_t Mask32 = 0x03;
1947   static constexpr uint8_t Mask16 = 0x07;
1948   static constexpr uint8_t Mask8 = 0x0F;
1949 
1950 #define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size)                \
1951   do {                                                                         \
1952     static constexpr char TestString[] =                                       \
1953         "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \
1954     const uint32_t T0 = allocateDqword();                                      \
1955     const Dqword V0 Value0;                                                    \
1956                                                                                \
1957     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1958     __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Value1));               \
1959     __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR(),        \
1960              Immediate(Imm));                                                  \
1961                                                                                \
1962     AssembledTest test = assemble();                                           \
1963     test.setDqwordTo(T0, V0);                                                  \
1964     test.run();                                                                \
1965                                                                                \
1966     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
1967     Dqword Expected = V0;                                                      \
1968     Expected.U##Size[sel] = Value1;                                            \
1969     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1970     reset();                                                                   \
1971   } while (0)
1972 
1973 #define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size)                    \
1974   do {                                                                         \
1975     static constexpr char TestString[] =                                       \
1976         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")";     \
1977     const uint32_t T0 = allocateDqword();                                      \
1978     const Dqword V0 Value0;                                                    \
1979     const uint32_t T1 = allocateDword();                                       \
1980     const uint32_t V1 = Value1;                                                \
1981                                                                                \
1982     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
1983     __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1),           \
1984              Immediate(Imm));                                                  \
1985                                                                                \
1986     AssembledTest test = assemble();                                           \
1987     test.setDqwordTo(T0, V0);                                                  \
1988     test.setDwordTo(T1, V1);                                                   \
1989     test.run();                                                                \
1990                                                                                \
1991     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
1992     Dqword Expected = V0;                                                      \
1993     Expected.U##Size[sel] = Value1;                                            \
1994     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1995     reset();                                                                   \
1996   } while (0)
1997 
1998 #define TestPinsrSize(Dst, GPR, Value1, Imm, Size)                             \
1999   do {                                                                         \
2000     TestPinsrXmmGPRImm(                                                        \
2001         Dst,                                                                   \
2002         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xFFFFFFFFDDDDDDDDull)),    \
2003         GPR, Value1, Imm, Size);                                               \
2004     TestPinsrXmmAddrImm(                                                       \
2005         Dst,                                                                   \
2006         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xFFFFFFFFDDDDDDDDull)),    \
2007         Value1, Imm, Size);                                                    \
2008   } while (0)
2009 
2010 #define TestPinsr(Src, Dst)                                                    \
2011   do {                                                                         \
2012     TestPinsrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
2013     TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
2014     TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
2015   } while (0)
2016 
2017   TestPinsr(xmm0, r1);
2018   TestPinsr(xmm1, r2);
2019   TestPinsr(xmm2, r3);
2020   TestPinsr(xmm3, r4);
2021   TestPinsr(xmm4, r5);
2022   TestPinsr(xmm5, r6);
2023   TestPinsr(xmm6, r7);
2024   TestPinsr(xmm7, r8);
2025   TestPinsr(xmm8, r10);
2026   TestPinsr(xmm9, r11);
2027   TestPinsr(xmm10, r12);
2028   TestPinsr(xmm11, r13);
2029   TestPinsr(xmm12, r14);
2030   TestPinsr(xmm13, r15);
2031   TestPinsr(xmm14, r1);
2032   TestPinsr(xmm15, r2);
2033 
2034 #undef TestPinsr
2035 #undef TestPinsrSize
2036 #undef TestPinsrXmmAddrImm
2037 #undef TestPinsrXmmGPRImm
2038 }
2039 
TEST_F(AssemblerX8664Test,Pextr)2040 TEST_F(AssemblerX8664Test, Pextr) {
2041   static constexpr uint8_t Mask32 = 0x03;
2042   static constexpr uint8_t Mask16 = 0x07;
2043   static constexpr uint8_t Mask8 = 0x0F;
2044 
2045 #define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size)                        \
2046   do {                                                                         \
2047     static constexpr char TestString[] =                                       \
2048         "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")";              \
2049     const uint32_t T0 = allocateDqword();                                      \
2050     const Dqword V0 Value1;                                                    \
2051                                                                                \
2052     __ movups(Encoded_Xmm_##Src(), dwordAddress(T0));                          \
2053     __ pextr(IceType_i##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src(),        \
2054              Immediate(Imm));                                                  \
2055                                                                                \
2056     AssembledTest test = assemble();                                           \
2057     test.setDqwordTo(T0, V0);                                                  \
2058     test.run();                                                                \
2059                                                                                \
2060     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
2061     ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString;                      \
2062     reset();                                                                   \
2063   } while (0)
2064 
2065 #define TestPextrSize(GPR, Src, Value1, Imm, Size)                             \
2066   do {                                                                         \
2067     TestPextrGPRXmmImm(                                                        \
2068         GPR, Src,                                                              \
2069         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xFFFFFFFFDDDDDDDDull)),    \
2070         Imm, Size);                                                            \
2071   } while (0)
2072 
2073 #define TestPextr(Src, Dst)                                                    \
2074   do {                                                                         \
2075     TestPextrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
2076     TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
2077     TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
2078   } while (0)
2079 
2080   TestPextr(r1, xmm0);
2081   TestPextr(r2, xmm1);
2082   TestPextr(r3, xmm2);
2083   TestPextr(r4, xmm3);
2084   TestPextr(r5, xmm4);
2085   TestPextr(r6, xmm5);
2086   TestPextr(r7, xmm6);
2087   TestPextr(r8, xmm7);
2088   TestPextr(r10, xmm8);
2089   TestPextr(r11, xmm9);
2090   TestPextr(r12, xmm10);
2091   TestPextr(r13, xmm11);
2092   TestPextr(r14, xmm12);
2093   TestPextr(r15, xmm13);
2094   TestPextr(r1, xmm14);
2095   TestPextr(r2, xmm15);
2096 
2097 #undef TestPextr
2098 #undef TestPextrSize
2099 #undef TestPextrXmmGPRImm
2100 }
2101 
TEST_F(AssemblerX8664Test,Pcmpeq_Pcmpgt)2102 TEST_F(AssemblerX8664Test, Pcmpeq_Pcmpgt) {
2103 #define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op)               \
2104   do {                                                                         \
2105     static constexpr char TestString[] =                                       \
2106         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")";  \
2107     const uint32_t T0 = allocateDqword();                                      \
2108     const Dqword V0 Value0;                                                    \
2109     const uint32_t T1 = allocateDqword();                                      \
2110     const Dqword V1 Value1;                                                    \
2111                                                                                \
2112     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
2113     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
2114     __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src());        \
2115                                                                                \
2116     AssembledTest test = assemble();                                           \
2117     test.setDqwordTo(T0, V0);                                                  \
2118     test.setDqwordTo(T1, V1);                                                  \
2119     test.run();                                                                \
2120                                                                                \
2121     Dqword Expected(uint64_t(0), uint64_t(0));                                 \
2122     static constexpr uint8_t ArraySize =                                       \
2123         sizeof(Dqword) / sizeof(uint##Size##_t);                               \
2124     for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
2125       Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
2126     }                                                                          \
2127     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2128     reset();                                                                   \
2129   } while (0)
2130 
2131 #define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op)                   \
2132   do {                                                                         \
2133     static constexpr char TestString[] =                                       \
2134         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")";      \
2135     const uint32_t T0 = allocateDqword();                                      \
2136     const Dqword V0 Value0;                                                    \
2137     const uint32_t T1 = allocateDqword();                                      \
2138     const Dqword V1 Value1;                                                    \
2139                                                                                \
2140     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
2141     __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1));           \
2142                                                                                \
2143     AssembledTest test = assemble();                                           \
2144     test.setDqwordTo(T0, V0);                                                  \
2145     test.setDqwordTo(T1, V1);                                                  \
2146     test.run();                                                                \
2147                                                                                \
2148     Dqword Expected(uint64_t(0), uint64_t(0));                                 \
2149     static constexpr uint8_t ArraySize =                                       \
2150         sizeof(Dqword) / sizeof(uint##Size##_t);                               \
2151     for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
2152       Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
2153     }                                                                          \
2154     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2155     reset();                                                                   \
2156   } while (0)
2157 
2158 #define TestPcmpValues(Dst, Value0, Src, Value1, Size)                         \
2159   do {                                                                         \
2160     TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, ==);                \
2161     TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, ==);                    \
2162     TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, <);                 \
2163     TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, <);                     \
2164   } while (0)
2165 
2166 #define TestPcmpSize(Dst, Src, Size)                                           \
2167   do {                                                                         \
2168     TestPcmpValues(                                                            \
2169         Dst,                                                                   \
2170         (uint64_t(0x8888888888888888ull), uint64_t(0x0000000000000000ull)),    \
2171         Src,                                                                   \
2172         (uint64_t(0x0000008800008800ull), uint64_t(0xFFFFFFFFFFFFFFFFull)),    \
2173         Size);                                                                 \
2174     TestPcmpValues(                                                            \
2175         Dst,                                                                   \
2176         (uint64_t(0x123567ABAB55DE01ull), uint64_t(0x12345abcde12345Aull)),    \
2177         Src,                                                                   \
2178         (uint64_t(0x0000008800008800ull), uint64_t(0xAABBCCDD1234321Aull)),    \
2179         Size);                                                                 \
2180   } while (0)
2181 
2182 #define TestPcmp(Dst, Src)                                                     \
2183   do {                                                                         \
2184     TestPcmpSize(xmm0, xmm1, 8);                                               \
2185     TestPcmpSize(xmm0, xmm1, 16);                                              \
2186     TestPcmpSize(xmm0, xmm1, 32);                                              \
2187   } while (0)
2188 
2189   TestPcmp(xmm0, xmm1);
2190   TestPcmp(xmm1, xmm2);
2191   TestPcmp(xmm2, xmm3);
2192   TestPcmp(xmm3, xmm4);
2193   TestPcmp(xmm4, xmm5);
2194   TestPcmp(xmm5, xmm6);
2195   TestPcmp(xmm6, xmm7);
2196   TestPcmp(xmm7, xmm8);
2197   TestPcmp(xmm8, xmm9);
2198   TestPcmp(xmm9, xmm10);
2199   TestPcmp(xmm10, xmm11);
2200   TestPcmp(xmm11, xmm12);
2201   TestPcmp(xmm12, xmm13);
2202   TestPcmp(xmm13, xmm14);
2203   TestPcmp(xmm14, xmm15);
2204   TestPcmp(xmm15, xmm0);
2205 
2206 #undef TestPcmp
2207 #undef TestPcmpSize
2208 #undef TestPcmpValues
2209 #undef TestPcmpXmmAddr
2210 #undef TestPcmpXmmXmm
2211 }
2212 
TEST_F(AssemblerX8664Test,Roundsd)2213 TEST_F(AssemblerX8664Test, Roundsd) {
2214 #define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN)                           \
2215   do {                                                                         \
2216     static constexpr char TestString[] =                                       \
2217         "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")";                \
2218     const uint32_t T0 = allocateDqword();                                      \
2219     const Dqword V0(-3.0, -3.0);                                               \
2220     const uint32_t T1 = allocateDqword();                                      \
2221     const Dqword V1(double(Input), -123.4);                                    \
2222                                                                                \
2223     __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0));                          \
2224     __ movups(Encoded_Xmm_##Src(), dwordAddress(T1));                          \
2225     __ round(IceType_f64, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(),            \
2226              Immediate(AssemblerX8664::k##Mode));                              \
2227                                                                                \
2228     AssembledTest test = assemble();                                           \
2229     test.setDqwordTo(T0, V0);                                                  \
2230     test.setDqwordTo(T1, V1);                                                  \
2231     test.run();                                                                \
2232                                                                                \
2233     const Dqword Expected(double(RN), -3.0);                                   \
2234     EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2235     reset();                                                                   \
2236   } while (0)
2237 
2238 #define TestRoundsd(Dst, Src)                                                  \
2239   do {                                                                         \
2240     TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6);                      \
2241     TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5);                      \
2242     TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5);                           \
2243     TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6);                             \
2244     TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5);                         \
2245     TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5);                         \
2246   } while (0)
2247 
2248   TestRoundsd(xmm0, xmm1);
2249   TestRoundsd(xmm1, xmm2);
2250   TestRoundsd(xmm2, xmm3);
2251   TestRoundsd(xmm3, xmm4);
2252   TestRoundsd(xmm4, xmm5);
2253   TestRoundsd(xmm5, xmm6);
2254   TestRoundsd(xmm6, xmm7);
2255   TestRoundsd(xmm7, xmm8);
2256   TestRoundsd(xmm8, xmm9);
2257   TestRoundsd(xmm9, xmm10);
2258   TestRoundsd(xmm10, xmm11);
2259   TestRoundsd(xmm11, xmm12);
2260   TestRoundsd(xmm12, xmm13);
2261   TestRoundsd(xmm13, xmm14);
2262   TestRoundsd(xmm14, xmm15);
2263   TestRoundsd(xmm15, xmm0);
2264 
2265 #undef TestRoundsd
2266 #undef TestRoundsdXmmXmm
2267 }
2268 
TEST_F(AssemblerX8664Test,Set1ps)2269 TEST_F(AssemblerX8664Test, Set1ps) {
2270 #define TestImpl(Xmm, Src, Imm)                                                \
2271   do {                                                                         \
2272     __ set1ps(Encoded_Xmm_##Xmm(), Encoded_GPR_##Src(), Immediate(Imm));       \
2273                                                                                \
2274     AssembledTest test = assemble();                                           \
2275     test.run();                                                                \
2276                                                                                \
2277     const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm),               \
2278                           (uint64_t(Imm) << 32) | uint32_t(Imm));              \
2279     ASSERT_EQ(Expected, test.Xmm<Dqword>())                                    \
2280         << "(" #Xmm ", " #Src ", " #Imm ")";                                   \
2281     reset();                                                                   \
2282   } while (0)
2283 
2284   TestImpl(xmm0, r1, 1);
2285   TestImpl(xmm1, r2, 12);
2286   TestImpl(xmm2, r3, 22);
2287   TestImpl(xmm3, r4, 54);
2288   TestImpl(xmm4, r5, 80);
2289   TestImpl(xmm5, r6, 32);
2290   TestImpl(xmm6, r7, 55);
2291   TestImpl(xmm7, r8, 44);
2292   TestImpl(xmm8, r10, 10);
2293   TestImpl(xmm9, r11, 155);
2294   TestImpl(xmm10, r12, 165);
2295   TestImpl(xmm11, r13, 170);
2296   TestImpl(xmm12, r14, 200);
2297   TestImpl(xmm13, r15, 124);
2298   TestImpl(xmm14, r1, 101);
2299   TestImpl(xmm15, r2, 166);
2300 
2301 #undef TestImpl
2302 }
2303 
2304 } // end of anonymous namespace
2305 } // end of namespace Test
2306 } // end of namespace X8664
2307 } // end of namespace Ice
2308