1 //===- subzero/unittest/AssemblerX8664/XmmArith.cpp -----------------------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "AssemblerX8664/TestUtil.h"
10
11 namespace Ice {
12 namespace X8664 {
13 namespace Test {
14 namespace {
15
TEST_F(AssemblerX8664Test,ArithSS)16 TEST_F(AssemblerX8664Test, ArithSS) {
17 #define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op) \
18 do { \
19 static_assert(FloatSize == 32 || FloatSize == 64, \
20 "Invalid fp size " #FloatSize); \
21 static constexpr char TestString[] = \
22 "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1 \
23 ", " #Inst ", " #Op ")"; \
24 static constexpr bool IsDouble = FloatSize == 64; \
25 using Type = std::conditional<IsDouble, double, float>::type; \
26 const uint32_t T0 = allocateQword(); \
27 const Type V0 = Value0; \
28 const uint32_t T1 = allocateQword(); \
29 const Type V1 = Value1; \
30 \
31 __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
32 __ movss(IceType_f##FloatSize, Encoded_Xmm_##Src(), dwordAddress(T1)); \
33 __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
34 \
35 AssembledTest test = assemble(); \
36 if (IsDouble) { \
37 test.setQwordTo(T0, static_cast<double>(V0)); \
38 test.setQwordTo(T1, static_cast<double>(V1)); \
39 } else { \
40 test.setDwordTo(T0, static_cast<float>(V0)); \
41 test.setDwordTo(T1, static_cast<float>(V1)); \
42 } \
43 \
44 test.run(); \
45 \
46 ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \
47 reset(); \
48 } while (0)
49
50 #define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op) \
51 do { \
52 static_assert(FloatSize == 32 || FloatSize == 64, \
53 "Invalid fp size " #FloatSize); \
54 static constexpr char TestString[] = \
55 "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst \
56 ", " #Op ")"; \
57 static constexpr bool IsDouble = FloatSize == 64; \
58 using Type = std::conditional<IsDouble, double, float>::type; \
59 const uint32_t T0 = allocateQword(); \
60 const Type V0 = Value0; \
61 const uint32_t T1 = allocateQword(); \
62 const Type V1 = Value1; \
63 \
64 __ movss(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
65 __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
66 \
67 AssembledTest test = assemble(); \
68 if (IsDouble) { \
69 test.setQwordTo(T0, static_cast<double>(V0)); \
70 test.setQwordTo(T1, static_cast<double>(V1)); \
71 } else { \
72 test.setDwordTo(T0, static_cast<float>(V0)); \
73 test.setDwordTo(T1, static_cast<float>(V1)); \
74 } \
75 \
76 test.run(); \
77 \
78 ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \
79 reset(); \
80 } while (0)
81
82 #define TestArithSS(FloatSize, Src, Dst0, Dst1) \
83 do { \
84 TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +); \
85 TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +); \
86 TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -); \
87 TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -); \
88 TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *); \
89 TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *); \
90 TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, /); \
91 TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, /); \
92 } while (0)
93
94 #define TestImpl(Src, Dst0, Dst1) \
95 do { \
96 TestArithSS(32, Src, Dst0, Dst1); \
97 TestArithSS(64, Src, Dst0, Dst1); \
98 } while (0)
99
100 TestImpl(xmm0, xmm1, xmm2);
101 TestImpl(xmm1, xmm2, xmm3);
102 TestImpl(xmm2, xmm3, xmm4);
103 TestImpl(xmm3, xmm4, xmm5);
104 TestImpl(xmm4, xmm5, xmm6);
105 TestImpl(xmm5, xmm6, xmm7);
106 TestImpl(xmm6, xmm7, xmm8);
107 TestImpl(xmm7, xmm8, xmm9);
108 TestImpl(xmm8, xmm9, xmm10);
109 TestImpl(xmm9, xmm10, xmm11);
110 TestImpl(xmm10, xmm11, xmm12);
111 TestImpl(xmm11, xmm12, xmm13);
112 TestImpl(xmm12, xmm13, xmm14);
113 TestImpl(xmm13, xmm14, xmm15);
114 TestImpl(xmm14, xmm15, xmm0);
115 TestImpl(xmm15, xmm0, xmm1);
116
117 #undef TestImpl
118 #undef TestArithSS
119 #undef TestArithSSXmmAddr
120 #undef TestArithSSXmmXmm
121 }
122
TEST_F(AssemblerX8664Test,PArith)123 TEST_F(AssemblerX8664Test, PArith) {
124 #define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size) \
125 do { \
126 static constexpr char TestString[] = \
127 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \
128 ", " #Type ", " #Size ")"; \
129 const uint32_t T0 = allocateDqword(); \
130 const Dqword V0 Value0; \
131 \
132 const uint32_t T1 = allocateDqword(); \
133 const Dqword V1 Value1; \
134 \
135 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
136 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
137 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
138 \
139 AssembledTest test = assemble(); \
140 test.setDqwordTo(T0, V0); \
141 test.setDqwordTo(T1, V1); \
142 test.run(); \
143 \
144 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \
145 << TestString; \
146 reset(); \
147 } while (0)
148
149 #define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size) \
150 do { \
151 static constexpr char TestString[] = \
152 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \
153 ", " #Type ", " #Size ")"; \
154 const uint32_t T0 = allocateDqword(); \
155 const Dqword V0 Value0; \
156 \
157 const uint32_t T1 = allocateDqword(); \
158 const Dqword V1 Value1; \
159 \
160 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
161 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
162 \
163 AssembledTest test = assemble(); \
164 test.setDqwordTo(T0, V0); \
165 test.setDqwordTo(T1, V1); \
166 test.run(); \
167 \
168 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \
169 << TestString; \
170 reset(); \
171 } while (0)
172
173 #define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size) \
174 do { \
175 static constexpr char TestString[] = \
176 "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type \
177 ", " #Size ")"; \
178 const uint32_t T0 = allocateDqword(); \
179 const Dqword V0 Value0; \
180 \
181 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
182 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Immediate(Imm)); \
183 \
184 AssembledTest test = assemble(); \
185 test.setDqwordTo(T0, V0); \
186 test.run(); \
187 \
188 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>()) \
189 << TestString; \
190 reset(); \
191 } while (0)
192
193 #define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size) \
194 do { \
195 static constexpr char TestString[] = \
196 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type \
197 ", " #Size ")"; \
198 const uint32_t T0 = allocateDqword(); \
199 const Dqword V0 Value0; \
200 \
201 const uint32_t T1 = allocateDqword(); \
202 const Dqword V1 Value1; \
203 \
204 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
205 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
206 __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
207 \
208 AssembledTest test = assemble(); \
209 test.setDqwordTo(T0, V0); \
210 test.setDqwordTo(T1, V1); \
211 test.run(); \
212 \
213 ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \
214 << TestString; \
215 reset(); \
216 } while (0)
217
218 #define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size) \
219 do { \
220 static constexpr char TestString[] = \
221 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size \
222 ")"; \
223 const uint32_t T0 = allocateDqword(); \
224 const Dqword V0 Value0; \
225 \
226 const uint32_t T1 = allocateDqword(); \
227 const Dqword V1 Value1; \
228 \
229 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
230 __ pandn(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
231 \
232 AssembledTest test = assemble(); \
233 test.setDqwordTo(T0, V0); \
234 test.setDqwordTo(T1, V1); \
235 test.run(); \
236 \
237 ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \
238 << TestString; \
239 reset(); \
240 } while (0)
241
242 #define TestPArithSize(Dst, Src, Size) \
243 do { \
244 static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size."); \
245 if (Size != 8) { \
246 TestPArithXmmXmm( \
247 Dst, \
248 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
249 Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \
250 TestPArithXmmAddr( \
251 Dst, \
252 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
253 (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \
254 TestPArithXmmImm( \
255 Dst, \
256 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
257 3u, psra, >>, int, Size); \
258 TestPArithXmmXmm( \
259 Dst, \
260 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
261 Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \
262 TestPArithXmmAddr( \
263 Dst, \
264 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
265 (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \
266 TestPArithXmmImm( \
267 Dst, \
268 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
269 3u, psrl, >>, uint, Size); \
270 TestPArithXmmXmm( \
271 Dst, \
272 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
273 Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \
274 TestPArithXmmAddr( \
275 Dst, \
276 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
277 (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \
278 TestPArithXmmImm( \
279 Dst, \
280 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
281 3u, psll, <<, uint, Size); \
282 \
283 TestPArithXmmXmm( \
284 Dst, \
285 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
286 Src, \
287 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
288 pmull, *, int, Size); \
289 TestPArithXmmAddr( \
290 Dst, \
291 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
292 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
293 pmull, *, int, Size); \
294 if (Size != 16) { \
295 TestPArithXmmXmm( \
296 Dst, \
297 (uint64_t(0x8040201008040201ull), \
298 uint64_t(0x8080404002020101ull)), \
299 Src, \
300 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
301 pmuludq, *, uint, Size); \
302 TestPArithXmmAddr( \
303 Dst, \
304 (uint64_t(0x8040201008040201ull), \
305 uint64_t(0x8080404002020101ull)), \
306 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
307 pmuludq, *, uint, Size); \
308 } \
309 } \
310 TestPArithXmmXmm( \
311 Dst, \
312 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
313 Src, \
314 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
315 padd, +, int, Size); \
316 TestPArithXmmAddr( \
317 Dst, \
318 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
319 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
320 padd, +, int, Size); \
321 TestPArithXmmXmm( \
322 Dst, \
323 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
324 Src, \
325 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
326 psub, -, int, Size); \
327 TestPArithXmmAddr( \
328 Dst, \
329 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
330 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
331 psub, -, int, Size); \
332 TestPArithXmmXmm( \
333 Dst, \
334 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
335 Src, \
336 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
337 pand, &, int, Size); \
338 TestPArithXmmAddr( \
339 Dst, \
340 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
341 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
342 pand, &, int, Size); \
343 \
344 TestPAndnXmmXmm( \
345 Dst, \
346 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
347 Src, \
348 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
349 int, Size); \
350 TestPAndnXmmAddr( \
351 Dst, \
352 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
353 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
354 int, Size); \
355 \
356 TestPArithXmmXmm( \
357 Dst, \
358 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
359 Src, \
360 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
361 por, |, int, Size); \
362 TestPArithXmmAddr( \
363 Dst, \
364 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
365 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
366 por, |, int, Size); \
367 TestPArithXmmXmm( \
368 Dst, \
369 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
370 Src, \
371 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
372 pxor, ^, int, Size); \
373 TestPArithXmmAddr( \
374 Dst, \
375 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
376 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
377 pxor, ^, int, Size); \
378 } while (0)
379
380 #define TestPArith(Src, Dst) \
381 do { \
382 TestPArithSize(Src, Dst, 8); \
383 TestPArithSize(Src, Dst, 16); \
384 TestPArithSize(Src, Dst, 32); \
385 } while (0)
386
387 TestPArith(xmm0, xmm1);
388 TestPArith(xmm1, xmm2);
389 TestPArith(xmm2, xmm3);
390 TestPArith(xmm3, xmm4);
391 TestPArith(xmm4, xmm5);
392 TestPArith(xmm5, xmm6);
393 TestPArith(xmm6, xmm7);
394 TestPArith(xmm7, xmm8);
395 TestPArith(xmm8, xmm9);
396 TestPArith(xmm9, xmm10);
397 TestPArith(xmm10, xmm11);
398 TestPArith(xmm11, xmm12);
399 TestPArith(xmm12, xmm13);
400 TestPArith(xmm13, xmm14);
401 TestPArith(xmm14, xmm15);
402 TestPArith(xmm15, xmm0);
403
404 #undef TestPArith
405 #undef TestPArithSize
406 #undef TestPAndnXmmAddr
407 #undef TestPAndnXmmXmm
408 #undef TestPArithXmmImm
409 #undef TestPArithXmmAddr
410 #undef TestPArithXmmXmm
411 }
412
TEST_F(AssemblerX8664Test,ArithPS)413 TEST_F(AssemblerX8664Test, ArithPS) {
414 #define TestArithPSXmmXmm(FloatSize, Dst, Value0, Src, Value1, Inst, Op, Type) \
415 do { \
416 static constexpr char TestString[] = \
417 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \
418 ", " #Type ")"; \
419 const uint32_t T0 = allocateDqword(); \
420 const Dqword V0 Value0; \
421 const uint32_t T1 = allocateDqword(); \
422 const Dqword V1 Value1; \
423 \
424 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
425 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
426 __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
427 \
428 AssembledTest test = assemble(); \
429 test.setDqwordTo(T0, V0); \
430 test.setDqwordTo(T1, V1); \
431 test.run(); \
432 \
433 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
434 \
435 reset(); \
436 } while (0)
437
438 #define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type) \
439 do { \
440 static constexpr char TestString[] = \
441 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \
442 ", " #Type ")"; \
443 const uint32_t T0 = allocateDqword(); \
444 const Dqword V0 Value0; \
445 const uint32_t T1 = allocateDqword(); \
446 const Dqword V1 Value1; \
447 \
448 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
449 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
450 __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
451 \
452 AssembledTest test = assemble(); \
453 test.setDqwordTo(T0, V0); \
454 test.setDqwordTo(T1, V1); \
455 test.run(); \
456 \
457 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
458 \
459 reset(); \
460 } while (0)
461
462 #define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type) \
463 do { \
464 static constexpr char TestString[] = \
465 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \
466 ", " #Type ")"; \
467 const uint32_t T0 = allocateDqword(); \
468 const Dqword V0 Value0; \
469 const uint32_t T1 = allocateDqword(); \
470 const Dqword V1 Value1; \
471 \
472 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
473 __ Inst(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
474 \
475 AssembledTest test = assemble(); \
476 test.setDqwordTo(T0, V0); \
477 test.setDqwordTo(T1, V1); \
478 test.run(); \
479 \
480 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
481 \
482 reset(); \
483 } while (0)
484
485 #define TestMinMaxPS(FloatSize, Dst, Value0, Src, Value1, Inst, Type) \
486 do { \
487 static constexpr char TestString[] = \
488 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type \
489 ")"; \
490 const uint32_t T0 = allocateDqword(); \
491 const Dqword V0 Value0; \
492 const uint32_t T1 = allocateDqword(); \
493 const Dqword V1 Value1; \
494 \
495 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
496 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
497 __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
498 \
499 AssembledTest test = assemble(); \
500 test.setDqwordTo(T0, V0); \
501 test.setDqwordTo(T1, V1); \
502 test.run(); \
503 \
504 ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString; \
505 \
506 reset(); \
507 } while (0)
508
509 #define TestArithPSXmmAddr(FloatSize, Dst, Value0, Value1, Inst, Op, Type) \
510 do { \
511 static constexpr char TestString[] = \
512 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \
513 ", " #Type ")"; \
514 const uint32_t T0 = allocateDqword(); \
515 const Dqword V0 Value0; \
516 const uint32_t T1 = allocateDqword(); \
517 const Dqword V1 Value1; \
518 \
519 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
520 __ Inst(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
521 \
522 AssembledTest test = assemble(); \
523 test.setDqwordTo(T0, V0); \
524 test.setDqwordTo(T1, V1); \
525 test.run(); \
526 \
527 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
528 \
529 reset(); \
530 } while (0)
531
532 #define TestArithPS(Dst, Src) \
533 do { \
534 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
535 (0.55, 0.43, 0.23, 1.21), addps, +, float); \
536 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
537 (0.55, 0.43, 0.23, 1.21), addps, +, float); \
538 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
539 (0.55, 0.43, 0.23, 1.21), subps, -, float); \
540 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
541 (0.55, 0.43, 0.23, 1.21), subps, -, float); \
542 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
543 (0.55, 0.43, 0.23, 1.21), mulps, *, float); \
544 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
545 (0.55, 0.43, 0.23, 1.21), mulps, *, float); \
546 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
547 (0.55, 0.43, 0.23, 1.21), divps, /, float); \
548 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
549 (0.55, 0.43, 0.23, 1.21), divps, /, float); \
550 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
551 (0.55, 0.43, 0.23, 1.21), andps, &, float); \
552 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
553 (0.55, 0.43, 0.23, 1.21), andps, &, float); \
554 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), andps, &, \
555 double); \
556 TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), andps, &, \
557 double); \
558 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
559 (0.55, 0.43, 0.23, 1.21), orps, |, float); \
560 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), orps, |, \
561 double); \
562 TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
563 (0.55, 0.43, 0.23, 1.21), minps, float); \
564 TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
565 (0.55, 0.43, 0.23, 1.21), maxps, float); \
566 TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), minps, double); \
567 TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxps, double); \
568 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
569 (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \
570 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
571 (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \
572 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorps, ^, \
573 double); \
574 TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), xorps, ^, \
575 double); \
576 } while (0)
577
578 TestArithPS(xmm0, xmm1);
579 TestArithPS(xmm1, xmm2);
580 TestArithPS(xmm2, xmm3);
581 TestArithPS(xmm3, xmm4);
582 TestArithPS(xmm4, xmm5);
583 TestArithPS(xmm5, xmm6);
584 TestArithPS(xmm6, xmm7);
585 TestArithPS(xmm7, xmm8);
586 TestArithPS(xmm8, xmm9);
587 TestArithPS(xmm9, xmm10);
588 TestArithPS(xmm10, xmm11);
589 TestArithPS(xmm11, xmm12);
590 TestArithPS(xmm12, xmm13);
591 TestArithPS(xmm13, xmm14);
592 TestArithPS(xmm14, xmm15);
593 TestArithPS(xmm15, xmm0);
594
595 #undef TestArithPs
596 #undef TestMinMaxPS
597 #undef TestArithPSXmmXmmUntyped
598 #undef TestArithPSXmmAddr
599 #undef TestArithPSXmmXmm
600 }
601
TEST_F(AssemblerX8664Test,Blending)602 TEST_F(AssemblerX8664Test, Blending) {
603 using f32 = float;
604 using i8 = uint8_t;
605
606 #define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type) \
607 do { \
608 static constexpr char TestString[] = \
609 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst \
610 ", " #Type ")"; \
611 const uint32_t T0 = allocateDqword(); \
612 const Dqword V0 Value0; \
613 const uint32_t T1 = allocateDqword(); \
614 const Dqword V1 Value1; \
615 const uint32_t Mask = allocateDqword(); \
616 const Dqword MaskValue M; \
617 \
618 __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask)); \
619 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
620 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
621 __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
622 \
623 AssembledTest test = assemble(); \
624 test.setDqwordTo(T0, V0); \
625 test.setDqwordTo(T1, V1); \
626 test.setDqwordTo(Mask, MaskValue); \
627 test.run(); \
628 \
629 ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
630 << TestString; \
631 reset(); \
632 } while (0)
633
634 #define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type) \
635 do { \
636 static constexpr char TestString[] = \
637 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \
638 ")"; \
639 const uint32_t T0 = allocateDqword(); \
640 const Dqword V0 Value0; \
641 const uint32_t T1 = allocateDqword(); \
642 const Dqword V1 Value1; \
643 const uint32_t Mask = allocateDqword(); \
644 const Dqword MaskValue M; \
645 \
646 __ movups(Encoded_Xmm_xmm0(), dwordAddress(Mask)); \
647 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
648 __ Inst(IceType_##Type, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
649 \
650 AssembledTest test = assemble(); \
651 test.setDqwordTo(T0, V0); \
652 test.setDqwordTo(T1, V1); \
653 test.setDqwordTo(Mask, MaskValue); \
654 test.run(); \
655 \
656 ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
657 << TestString; \
658 reset(); \
659 } while (0)
660
661 #define TestBlending(Src, Dst) \
662 do { \
663 TestBlendingXmmXmm( \
664 Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0), \
665 (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \
666 blendvps, f32); \
667 TestBlendingXmmAddr( \
668 Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0), \
669 (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \
670 blendvps, f32); \
671 TestBlendingXmmXmm( \
672 Dst, \
673 (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \
674 Src, \
675 (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \
676 (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \
677 pblendvb, i8); \
678 TestBlendingXmmAddr( \
679 Dst, \
680 (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \
681 (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \
682 (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \
683 pblendvb, i8); \
684 } while (0)
685
686 /* xmm0 is taken. It is the implicit mask . */
687 TestBlending(xmm1, xmm2);
688 TestBlending(xmm2, xmm3);
689 TestBlending(xmm3, xmm4);
690 TestBlending(xmm4, xmm5);
691 TestBlending(xmm5, xmm6);
692 TestBlending(xmm6, xmm7);
693 TestBlending(xmm7, xmm8);
694 TestBlending(xmm8, xmm9);
695 TestBlending(xmm9, xmm10);
696 TestBlending(xmm10, xmm11);
697 TestBlending(xmm11, xmm12);
698 TestBlending(xmm12, xmm13);
699 TestBlending(xmm13, xmm14);
700 TestBlending(xmm14, xmm15);
701 TestBlending(xmm15, xmm1);
702
703 #undef TestBlending
704 #undef TestBlendingXmmAddr
705 #undef TestBlendingXmmXmm
706 }
707
TEST_F(AssemblerX8664Test,Cmpps)708 TEST_F(AssemblerX8664Test, Cmpps) {
709 #define TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Op, Type) \
710 do { \
711 static constexpr char TestString[] = \
712 "(" #Src ", " #Dst ", " #C ", " #Op ")"; \
713 const uint32_t T0 = allocateDqword(); \
714 const Dqword V0 Value0; \
715 const uint32_t T1 = allocateDqword(); \
716 const Dqword V1 Value1; \
717 \
718 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
719 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
720 __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
721 Cond::Cmpps_##C); \
722 \
723 AssembledTest test = assemble(); \
724 test.setDqwordTo(T0, V0); \
725 test.setDqwordTo(T1, V1); \
726 test.run(); \
727 \
728 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
729 ; \
730 reset(); \
731 } while (0)
732
733 #define TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, C, Op, Type) \
734 do { \
735 static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")"; \
736 const uint32_t T0 = allocateDqword(); \
737 const Dqword V0 Value0; \
738 const uint32_t T1 = allocateDqword(); \
739 const Dqword V1 Value1; \
740 \
741 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
742 __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1), \
743 Cond::Cmpps_##C); \
744 \
745 AssembledTest test = assemble(); \
746 test.setDqwordTo(T0, V0); \
747 test.setDqwordTo(T1, V1); \
748 test.run(); \
749 \
750 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
751 ; \
752 reset(); \
753 } while (0)
754
755 #define TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Type) \
756 do { \
757 static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")"; \
758 const uint32_t T0 = allocateDqword(); \
759 const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \
760 std::numeric_limits<float>::quiet_NaN()); \
761 const uint32_t T1 = allocateDqword(); \
762 const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \
763 std::numeric_limits<float>::quiet_NaN()); \
764 \
765 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
766 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
767 __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
768 Cond::Cmpps_##C); \
769 \
770 AssembledTest test = assemble(); \
771 test.setDqwordTo(T0, V0); \
772 test.setDqwordTo(T1, V1); \
773 test.run(); \
774 \
775 ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString; \
776 ; \
777 reset(); \
778 } while (0)
779
780 #define TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, C, Type) \
781 do { \
782 static constexpr char TestString[] = "(" #Dst ", " #C ")"; \
783 const uint32_t T0 = allocateDqword(); \
784 const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \
785 std::numeric_limits<float>::quiet_NaN()); \
786 const uint32_t T1 = allocateDqword(); \
787 const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \
788 std::numeric_limits<float>::quiet_NaN()); \
789 \
790 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
791 __ cmpps(IceType_f##FloatSize, Encoded_Xmm_##Dst(), dwordAddress(T1), \
792 Cond::Cmpps_##C); \
793 \
794 AssembledTest test = assemble(); \
795 test.setDqwordTo(T0, V0); \
796 test.setDqwordTo(T1, V1); \
797 test.run(); \
798 \
799 ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString; \
800 ; \
801 reset(); \
802 } while (0)
803
804 #define TestCmpps(FloatSize, Dst, Value0, Src, Value1, Type) \
805 do { \
806 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
807 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
808 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
809 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
810 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
811 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
812 TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, unord, Type); \
813 TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, unord, Type); \
814 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
815 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
816 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
817 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
818 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
819 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
820 if (FloatSize == 32) { \
821 TestCmppsOrdUnordXmmXmm(32, Dst, \
822 (1.0, 1.0, \
823 std::numeric_limits<float>::quiet_NaN(), \
824 std::numeric_limits<float>::quiet_NaN()), \
825 Src, \
826 (1.0, std::numeric_limits<float>::quiet_NaN(), \
827 1.0, std::numeric_limits<float>::quiet_NaN()), \
828 unord, Type); \
829 TestCmppsOrdUnordXmmAddr(32, Dst, \
830 (1.0, 1.0, \
831 std::numeric_limits<float>::quiet_NaN(), \
832 std::numeric_limits<float>::quiet_NaN()), \
833 (1.0, std::numeric_limits<float>::quiet_NaN(), \
834 1.0, std::numeric_limits<float>::quiet_NaN()), \
835 unord, Type); \
836 } else { \
837 TestCmppsOrdUnordXmmXmm( \
838 64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()), Src, \
839 (std::numeric_limits<double>::quiet_NaN(), \
840 std::numeric_limits<double>::quiet_NaN()), \
841 unord, Type); \
842 TestCmppsOrdUnordXmmXmm(64, Dst, (1.0, 1.0), Src, \
843 (1.0, std::numeric_limits<double>::quiet_NaN()), \
844 unord, Type); \
845 TestCmppsOrdUnordXmmAddr( \
846 64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()), \
847 (std::numeric_limits<double>::quiet_NaN(), \
848 std::numeric_limits<double>::quiet_NaN()), \
849 unord, Type); \
850 TestCmppsOrdUnordXmmAddr( \
851 64, Dst, (1.0, 1.0), \
852 (1.0, std::numeric_limits<double>::quiet_NaN()), unord, Type); \
853 } \
854 } while (0)
855
856 #define TestCmppsSize(FloatSize, Value0, Value1, Type) \
857 do { \
858 TestCmpps(FloatSize, xmm0, Value0, xmm1, Value1, Type); \
859 TestCmpps(FloatSize, xmm1, Value0, xmm2, Value1, Type); \
860 TestCmpps(FloatSize, xmm2, Value0, xmm3, Value1, Type); \
861 TestCmpps(FloatSize, xmm3, Value0, xmm4, Value1, Type); \
862 TestCmpps(FloatSize, xmm4, Value0, xmm5, Value1, Type); \
863 TestCmpps(FloatSize, xmm5, Value0, xmm6, Value1, Type); \
864 TestCmpps(FloatSize, xmm6, Value0, xmm7, Value1, Type); \
865 TestCmpps(FloatSize, xmm7, Value0, xmm8, Value1, Type); \
866 TestCmpps(FloatSize, xmm8, Value0, xmm9, Value1, Type); \
867 TestCmpps(FloatSize, xmm9, Value0, xmm10, Value1, Type); \
868 TestCmpps(FloatSize, xmm10, Value0, xmm11, Value1, Type); \
869 TestCmpps(FloatSize, xmm11, Value0, xmm12, Value1, Type); \
870 TestCmpps(FloatSize, xmm12, Value0, xmm13, Value1, Type); \
871 TestCmpps(FloatSize, xmm13, Value0, xmm14, Value1, Type); \
872 TestCmpps(FloatSize, xmm14, Value0, xmm15, Value1, Type); \
873 TestCmpps(FloatSize, xmm15, Value0, xmm0, Value1, Type); \
874 } while (0)
875
876 TestCmppsSize(32, (-1.0, 1.0, 3.14, 1024.5), (-1.0, 1.0, 3.14, 1024.5),
877 float);
878 TestCmppsSize(64, (1.0, -1000.0), (1.0, -1000.0), double);
879
880 #undef TestCmpps
881 #undef TestCmppsOrdUnordXmmAddr
882 #undef TestCmppsOrdUnordXmmXmm
883 #undef TestCmppsXmmAddr
884 #undef TestCmppsXmmXmm
885 }
886
TEST_F(AssemblerX8664Test,Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd)887 TEST_F(AssemblerX8664Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) {
888 #define TestImplSingle(Dst, Inst, Expect) \
889 do { \
890 static constexpr char TestString[] = "(" #Dst ", " #Inst ")"; \
891 const uint32_t T0 = allocateDqword(); \
892 const Dqword V0(1.0, 4.0, 20.0, 3.14); \
893 \
894 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
895 __ Inst(Encoded_Xmm_##Dst()); \
896 \
897 AssembledTest test = assemble(); \
898 test.setDqwordTo(T0, V0); \
899 test.run(); \
900 ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString; \
901 reset(); \
902 } while (0)
903
904 #define TestImpl(Dst) \
905 do { \
906 TestImplSingle( \
907 Dst, sqrtps, \
908 (uint64_t(0x400000003F800000ull), uint64_t(0x3FE2D10B408F1BBDull))); \
909 TestImplSingle( \
910 Dst, rsqrtps, \
911 (uint64_t(0x3EFFF0003F7FF000ull), uint64_t(0x3F1078003E64F000ull))); \
912 TestImplSingle( \
913 Dst, reciprocalps, \
914 (uint64_t(0x3E7FF0003F7FF000ull), uint64_t(0x3EA310003D4CC000ull))); \
915 \
916 TestImplSingle( \
917 Dst, sqrtpd, \
918 (uint64_t(0x4036A09E9365F5F3ull), uint64_t(0x401C42FAE40282A8ull))); \
919 } while (0)
920
921 TestImpl(xmm0);
922 TestImpl(xmm1);
923 TestImpl(xmm2);
924 TestImpl(xmm3);
925 TestImpl(xmm4);
926 TestImpl(xmm5);
927 TestImpl(xmm6);
928 TestImpl(xmm7);
929 TestImpl(xmm8);
930 TestImpl(xmm9);
931 TestImpl(xmm10);
932 TestImpl(xmm11);
933 TestImpl(xmm12);
934 TestImpl(xmm13);
935 TestImpl(xmm14);
936 TestImpl(xmm15);
937
938 #undef TestImpl
939 #undef TestImplSingle
940 }
941
TEST_F(AssemblerX8664Test,Unpck)942 TEST_F(AssemblerX8664Test, Unpck) {
943 const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull),
944 uint64_t(0xCCCCCCCCDDDDDDDDull));
945 const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull),
946 uint64_t(0x9999999988888888ull));
947
948 const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull),
949 uint64_t(0xEEEEEEEEAAAAAAAAull));
950 const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull),
951 uint64_t(0xEEEEEEEEFFFFFFFFull));
952 const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull),
953 uint64_t(0x99999999CCCCCCCCull));
954 const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull),
955 uint64_t(0x9999999988888888ull));
956
957 #define TestImplSingle(Dst, Src, Inst) \
958 do { \
959 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
960 const uint32_t T0 = allocateDqword(); \
961 const uint32_t T1 = allocateDqword(); \
962 \
963 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
964 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
965 __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
966 \
967 AssembledTest test = assemble(); \
968 test.setDqwordTo(T0, V0); \
969 test.setDqwordTo(T1, V1); \
970 test.run(); \
971 \
972 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \
973 reset(); \
974 } while (0)
975
976 #define TestImpl(Dst, Src) \
977 do { \
978 TestImplSingle(Dst, Src, unpcklps); \
979 TestImplSingle(Dst, Src, unpcklpd); \
980 TestImplSingle(Dst, Src, unpckhps); \
981 TestImplSingle(Dst, Src, unpckhpd); \
982 } while (0)
983
984 TestImpl(xmm0, xmm1);
985 TestImpl(xmm1, xmm2);
986 TestImpl(xmm2, xmm3);
987 TestImpl(xmm3, xmm4);
988 TestImpl(xmm4, xmm5);
989 TestImpl(xmm5, xmm6);
990 TestImpl(xmm6, xmm7);
991 TestImpl(xmm7, xmm8);
992 TestImpl(xmm8, xmm9);
993 TestImpl(xmm9, xmm10);
994 TestImpl(xmm10, xmm11);
995 TestImpl(xmm11, xmm12);
996 TestImpl(xmm12, xmm13);
997 TestImpl(xmm13, xmm14);
998 TestImpl(xmm14, xmm15);
999 TestImpl(xmm15, xmm0);
1000
1001 #undef TestImpl
1002 #undef TestImplSingle
1003 }
1004
TEST_F(AssemblerX8664Test,Shufp)1005 TEST_F(AssemblerX8664Test, Shufp) {
1006 const Dqword V0(uint64_t(0x1111111122222222ull),
1007 uint64_t(0x5555555577777777ull));
1008 const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
1009 uint64_t(0xCCCCCCCCDDDDDDDDull));
1010
1011 const uint8_t pshufdImm = 0x63;
1012 const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull),
1013 uint64_t(0xAAAAAAAADDDDDDDDull));
1014
1015 const uint8_t shufpsImm = 0xf9;
1016 const Dqword shufpsExpected(uint64_t(0x7777777711111111ull),
1017 uint64_t(0xCCCCCCCCCCCCCCCCull));
1018
1019 #define TestImplSingleXmmXmm(Dst, Src, Inst) \
1020 do { \
1021 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
1022 const uint32_t T0 = allocateDqword(); \
1023 const uint32_t T1 = allocateDqword(); \
1024 \
1025 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1026 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
1027 __ Inst(IceType_f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
1028 Immediate(Inst##Imm)); \
1029 \
1030 AssembledTest test = assemble(); \
1031 test.setDqwordTo(T0, V0); \
1032 test.setDqwordTo(T1, V1); \
1033 test.run(); \
1034 \
1035 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \
1036 reset(); \
1037 } while (0)
1038
1039 #define TestImplSingleXmmAddr(Dst, Inst) \
1040 do { \
1041 static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
1042 const uint32_t T0 = allocateDqword(); \
1043 const uint32_t T1 = allocateDqword(); \
1044 \
1045 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1046 __ Inst(IceType_f32, Encoded_Xmm_##Dst(), dwordAddress(T1), \
1047 Immediate(Inst##Imm)); \
1048 \
1049 AssembledTest test = assemble(); \
1050 test.setDqwordTo(T0, V0); \
1051 test.setDqwordTo(T1, V1); \
1052 test.run(); \
1053 \
1054 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \
1055 reset(); \
1056 } while (0)
1057
1058 #define TestImplSingleXmmXmmUntyped(Dst, Src, Inst) \
1059 do { \
1060 static constexpr char TestString[] = \
1061 "(" #Dst ", " #Src ", " #Inst ", Untyped)"; \
1062 const uint32_t T0 = allocateDqword(); \
1063 const uint32_t T1 = allocateDqword(); \
1064 \
1065 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1066 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
1067 __ Inst(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), Immediate(Inst##Imm)); \
1068 \
1069 AssembledTest test = assemble(); \
1070 test.setDqwordTo(T0, V0); \
1071 test.setDqwordTo(T1, V1); \
1072 test.run(); \
1073 \
1074 ASSERT_EQ(Inst##UntypedExpected, test.Dst<Dqword>()) << TestString; \
1075 reset(); \
1076 } while (0)
1077
1078 #define TestImpl(Dst, Src) \
1079 do { \
1080 TestImplSingleXmmXmm(Dst, Src, pshufd); \
1081 TestImplSingleXmmAddr(Dst, pshufd); \
1082 TestImplSingleXmmXmm(Dst, Src, shufps); \
1083 TestImplSingleXmmAddr(Dst, shufps); \
1084 } while (0)
1085
1086 TestImpl(xmm0, xmm1);
1087 TestImpl(xmm1, xmm2);
1088 TestImpl(xmm2, xmm3);
1089 TestImpl(xmm3, xmm4);
1090 TestImpl(xmm4, xmm5);
1091 TestImpl(xmm5, xmm6);
1092 TestImpl(xmm6, xmm7);
1093 TestImpl(xmm7, xmm8);
1094 TestImpl(xmm8, xmm9);
1095 TestImpl(xmm9, xmm10);
1096 TestImpl(xmm10, xmm11);
1097 TestImpl(xmm11, xmm12);
1098 TestImpl(xmm12, xmm13);
1099 TestImpl(xmm13, xmm14);
1100 TestImpl(xmm14, xmm15);
1101 TestImpl(xmm15, xmm0);
1102
1103 #undef TestImpl
1104 #undef TestImplSingleXmmXmmUntyped
1105 #undef TestImplSingleXmmAddr
1106 #undef TestImplSingleXmmXmm
1107 }
1108
TEST_F(AssemblerX8664Test,Punpckl)1109 TEST_F(AssemblerX8664Test, Punpckl) {
1110 const Dqword V0_v4i32(uint64_t(0x1111111122222222ull),
1111 uint64_t(0x5555555577777777ull));
1112 const Dqword V1_v4i32(uint64_t(0xAAAAAAAABBBBBBBBull),
1113 uint64_t(0xCCCCCCCCDDDDDDDDull));
1114 const Dqword Expected_v4i32(uint64_t(0xBBBBBBBB22222222ull),
1115 uint64_t(0xAAAAAAAA11111111ull));
1116
1117 const Dqword V0_v8i16(uint64_t(0x1111222233334444ull),
1118 uint64_t(0x5555666677778888ull));
1119 const Dqword V1_v8i16(uint64_t(0xAAAABBBBCCCCDDDDull),
1120 uint64_t(0xEEEEFFFF00009999ull));
1121 const Dqword Expected_v8i16(uint64_t(0xCCCC3333DDDD4444ull),
1122 uint64_t(0xAAAA1111BBBB2222ull));
1123
1124 const Dqword V0_v16i8(uint64_t(0x1122334455667788ull),
1125 uint64_t(0x99AABBCCDDEEFF00ull));
1126 const Dqword V1_v16i8(uint64_t(0xFFEEDDCCBBAA9900ull),
1127 uint64_t(0xBAADF00DFEEDFACEull));
1128 const Dqword Expected_v16i8(uint64_t(0xBB55AA6699770088ull),
1129 uint64_t(0xFF11EE22DD33CC44ull));
1130
1131 #define TestImplXmmXmm(Dst, Src, Inst, Ty) \
1132 do { \
1133 static constexpr char TestString[] = \
1134 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \
1135 const uint32_t T0 = allocateDqword(); \
1136 const uint32_t T1 = allocateDqword(); \
1137 \
1138 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1139 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1140 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \
1141 XmmRegister::Encoded_Reg_##Src); \
1142 \
1143 AssembledTest test = assemble(); \
1144 test.setDqwordTo(T0, V0_##Ty); \
1145 test.setDqwordTo(T1, V1_##Ty); \
1146 test.run(); \
1147 \
1148 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1149 reset(); \
1150 } while (0)
1151
1152 #define TestImplXmmAddr(Dst, Inst, Ty) \
1153 do { \
1154 static constexpr char TestString[] = \
1155 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \
1156 const uint32_t T0 = allocateDqword(); \
1157 const uint32_t T1 = allocateDqword(); \
1158 \
1159 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1160 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
1161 \
1162 AssembledTest test = assemble(); \
1163 test.setDqwordTo(T0, V0_##Ty); \
1164 test.setDqwordTo(T1, V1_##Ty); \
1165 test.run(); \
1166 \
1167 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1168 reset(); \
1169 } while (0)
1170
1171 #define TestImpl(Dst, Src) \
1172 do { \
1173 TestImplXmmXmm(Dst, Src, punpckl, v4i32); \
1174 TestImplXmmAddr(Dst, punpckl, v4i32); \
1175 TestImplXmmXmm(Dst, Src, punpckl, v8i16); \
1176 TestImplXmmAddr(Dst, punpckl, v8i16); \
1177 TestImplXmmXmm(Dst, Src, punpckl, v16i8); \
1178 TestImplXmmAddr(Dst, punpckl, v16i8); \
1179 } while (0)
1180
1181 TestImpl(xmm0, xmm1);
1182 TestImpl(xmm1, xmm2);
1183 TestImpl(xmm2, xmm3);
1184 TestImpl(xmm3, xmm4);
1185 TestImpl(xmm4, xmm5);
1186 TestImpl(xmm5, xmm6);
1187 TestImpl(xmm6, xmm7);
1188 TestImpl(xmm7, xmm0);
1189
1190 #undef TestImpl
1191 #undef TestImplXmmAddr
1192 #undef TestImplXmmXmm
1193 }
1194
TEST_F(AssemblerX8664Test,Packss)1195 TEST_F(AssemblerX8664Test, Packss) {
1196 const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1197 uint64_t(0x7FFFFFFF80000000ull));
1198 const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1199 uint64_t(0x0000800100007FFEull));
1200 const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull),
1201 uint64_t(0x7FFF7FFEFFFEFFFFull));
1202
1203 const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1204 uint64_t(0xFFFEFFFF7FFF8000ull));
1205 const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1206 uint64_t(0x0088007700660055ull));
1207 const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull),
1208 uint64_t(0x7F776655057F7F7Eull));
1209
1210 #define TestImplXmmXmm(Dst, Src, Inst, Ty) \
1211 do { \
1212 static constexpr char TestString[] = \
1213 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \
1214 const uint32_t T0 = allocateDqword(); \
1215 const uint32_t T1 = allocateDqword(); \
1216 \
1217 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1218 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1219 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \
1220 XmmRegister::Encoded_Reg_##Src); \
1221 \
1222 AssembledTest test = assemble(); \
1223 test.setDqwordTo(T0, V0_##Ty); \
1224 test.setDqwordTo(T1, V1_##Ty); \
1225 test.run(); \
1226 \
1227 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1228 reset(); \
1229 } while (0)
1230
1231 #define TestImplXmmAddr(Dst, Inst, Ty) \
1232 do { \
1233 static constexpr char TestString[] = \
1234 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \
1235 const uint32_t T0 = allocateDqword(); \
1236 const uint32_t T1 = allocateDqword(); \
1237 \
1238 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1239 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
1240 \
1241 AssembledTest test = assemble(); \
1242 test.setDqwordTo(T0, V0_##Ty); \
1243 test.setDqwordTo(T1, V1_##Ty); \
1244 test.run(); \
1245 \
1246 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1247 reset(); \
1248 } while (0)
1249
1250 #define TestImpl(Dst, Src) \
1251 do { \
1252 TestImplXmmXmm(Dst, Src, packss, v4i32); \
1253 TestImplXmmAddr(Dst, packss, v4i32); \
1254 TestImplXmmXmm(Dst, Src, packss, v8i16); \
1255 TestImplXmmAddr(Dst, packss, v8i16); \
1256 } while (0)
1257
1258 TestImpl(xmm0, xmm1);
1259 TestImpl(xmm1, xmm2);
1260 TestImpl(xmm2, xmm3);
1261 TestImpl(xmm3, xmm4);
1262 TestImpl(xmm4, xmm5);
1263 TestImpl(xmm5, xmm6);
1264 TestImpl(xmm6, xmm7);
1265 TestImpl(xmm7, xmm0);
1266
1267 #undef TestImpl
1268 #undef TestImplXmmAddr
1269 #undef TestImplXmmXmm
1270 }
1271
TEST_F(AssemblerX8664Test,Packus)1272 TEST_F(AssemblerX8664Test, Packus) {
1273 const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1274 uint64_t(0x7FFFFFFF80000000ull));
1275 const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1276 uint64_t(0x0000800100007FFEull));
1277 const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull),
1278 uint64_t(0x80017FFE00000000ull));
1279
1280 const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1281 uint64_t(0xFFFEFFFF7FFF8000ull));
1282 const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1283 uint64_t(0x0088007700660055ull));
1284 const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull),
1285 uint64_t(0x8877665505FF817Eull));
1286
1287 #define TestImplXmmXmm(Dst, Src, Inst, Ty) \
1288 do { \
1289 static constexpr char TestString[] = \
1290 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \
1291 const uint32_t T0 = allocateDqword(); \
1292 const uint32_t T1 = allocateDqword(); \
1293 \
1294 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1295 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1296 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \
1297 XmmRegister::Encoded_Reg_##Src); \
1298 \
1299 AssembledTest test = assemble(); \
1300 test.setDqwordTo(T0, V0_##Ty); \
1301 test.setDqwordTo(T1, V1_##Ty); \
1302 test.run(); \
1303 \
1304 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1305 reset(); \
1306 } while (0)
1307
1308 #define TestImplXmmAddr(Dst, Inst, Ty) \
1309 do { \
1310 static constexpr char TestString[] = \
1311 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \
1312 const uint32_t T0 = allocateDqword(); \
1313 const uint32_t T1 = allocateDqword(); \
1314 \
1315 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1316 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
1317 \
1318 AssembledTest test = assemble(); \
1319 test.setDqwordTo(T0, V0_##Ty); \
1320 test.setDqwordTo(T1, V1_##Ty); \
1321 test.run(); \
1322 \
1323 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1324 reset(); \
1325 } while (0)
1326
1327 #define TestImpl(Dst, Src) \
1328 do { \
1329 TestImplXmmXmm(Dst, Src, packus, v4i32); \
1330 TestImplXmmAddr(Dst, packus, v4i32); \
1331 TestImplXmmXmm(Dst, Src, packus, v8i16); \
1332 TestImplXmmAddr(Dst, packus, v8i16); \
1333 } while (0)
1334
1335 TestImpl(xmm0, xmm1);
1336 TestImpl(xmm1, xmm2);
1337 TestImpl(xmm2, xmm3);
1338 TestImpl(xmm3, xmm4);
1339 TestImpl(xmm4, xmm5);
1340 TestImpl(xmm5, xmm6);
1341 TestImpl(xmm6, xmm7);
1342 TestImpl(xmm7, xmm0);
1343
1344 #undef TestImpl
1345 #undef TestImplXmmAddr
1346 #undef TestImplXmmXmm
1347 }
1348
TEST_F(AssemblerX8664Test,Pshufb)1349 TEST_F(AssemblerX8664Test, Pshufb) {
1350 const Dqword V0(uint64_t(0x1122334455667788ull),
1351 uint64_t(0x99aabbccddeeff32ull));
1352 const Dqword V1(uint64_t(0x0204050380060708ull),
1353 uint64_t(0x010306080a8b0c0dull));
1354
1355 const Dqword Expected(uint64_t(0x6644335500221132ull),
1356 uint64_t(0x77552232ee00ccbbull));
1357
1358 #define TestImplXmmXmm(Dst, Src, Inst) \
1359 do { \
1360 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
1361 const uint32_t T0 = allocateDqword(); \
1362 const uint32_t T1 = allocateDqword(); \
1363 \
1364 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1365 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1366 __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \
1367 XmmRegister::Encoded_Reg_##Src); \
1368 \
1369 AssembledTest test = assemble(); \
1370 test.setDqwordTo(T0, V0); \
1371 test.setDqwordTo(T1, V1); \
1372 test.run(); \
1373 \
1374 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1375 reset(); \
1376 } while (0)
1377
1378 #define TestImplXmmAddr(Dst, Inst) \
1379 do { \
1380 static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
1381 const uint32_t T0 = allocateDqword(); \
1382 const uint32_t T1 = allocateDqword(); \
1383 \
1384 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1385 __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
1386 \
1387 AssembledTest test = assemble(); \
1388 test.setDqwordTo(T0, V0); \
1389 test.setDqwordTo(T1, V1); \
1390 test.run(); \
1391 \
1392 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1393 reset(); \
1394 } while (0)
1395
1396 #define TestImpl(Dst, Src) \
1397 do { \
1398 TestImplXmmXmm(Dst, Src, pshufb); \
1399 TestImplXmmAddr(Dst, pshufb); \
1400 } while (0)
1401
1402 TestImpl(xmm0, xmm1);
1403 TestImpl(xmm1, xmm2);
1404 TestImpl(xmm2, xmm3);
1405 TestImpl(xmm3, xmm4);
1406 TestImpl(xmm4, xmm5);
1407 TestImpl(xmm5, xmm6);
1408 TestImpl(xmm6, xmm7);
1409 TestImpl(xmm7, xmm8);
1410 TestImpl(xmm8, xmm9);
1411 TestImpl(xmm9, xmm10);
1412 TestImpl(xmm10, xmm11);
1413 TestImpl(xmm11, xmm12);
1414 TestImpl(xmm12, xmm13);
1415 TestImpl(xmm13, xmm14);
1416 TestImpl(xmm14, xmm15);
1417 TestImpl(xmm15, xmm0);
1418
1419 #undef TestImpl
1420 #undef TestImplXmmAddr
1421 #undef TestImplXmmXmm
1422 }
1423
TEST_F(AssemblerX8664Test,Cvt)1424 TEST_F(AssemblerX8664Test, Cvt) {
1425 const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1426 const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
1427 const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0);
1428
1429 const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f);
1430 const Dqword dq2ps64SrcValue(-5, 3, 100, 200);
1431 const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0);
1432
1433 const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1434 const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1435 const Dqword tps2dq32Expected(-5, 3, 100, 200);
1436
1437 const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1438 const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1439 const Dqword tps2dq64Expected(-5, 3, 100, 200);
1440
1441 const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1442 const int32_t si2ss32SrcValue = 5;
1443 const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f);
1444
1445 const Dqword si2ss64DstValue(-1.0, -1.0);
1446 const int32_t si2ss64SrcValue = 5;
1447 const Dqword si2ss64Expected(5.0, -1.0);
1448
1449 const int32_t tss2si32DstValue = 0xF00F0FF0;
1450 const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f);
1451 const int32_t tss2si32Expected = -5;
1452
1453 const int32_t tss2si64DstValue = 0xF00F0FF0;
1454 const Dqword tss2si64SrcValue(-5.0, -1.0);
1455 const int32_t tss2si64Expected = -5;
1456
1457 const Dqword float2float32DstValue(-1.0, -1.0);
1458 const Dqword float2float32SrcValue(-5.0, 3, 100, 200);
1459 const Dqword float2float32Expected(-5.0, -1.0);
1460
1461 const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0);
1462 const Dqword float2float64SrcValue(-5.0, 3.0);
1463 const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0);
1464
1465 #define TestImplPXmmXmm(Dst, Src, Inst, Size) \
1466 do { \
1467 static constexpr char TestString[] = \
1468 "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")"; \
1469 const uint32_t T0 = allocateDqword(); \
1470 const uint32_t T1 = allocateDqword(); \
1471 \
1472 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1473 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
1474 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
1475 \
1476 AssembledTest test = assemble(); \
1477 test.setDqwordTo(T0, Inst##Size##DstValue); \
1478 test.setDqwordTo(T1, Inst##Size##SrcValue); \
1479 test.run(); \
1480 \
1481 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
1482 reset(); \
1483 } while (0)
1484
1485 #define TestImplSXmmReg(Dst, GPR, Inst, Size, IntType) \
1486 do { \
1487 static constexpr char TestString[] = \
1488 "(" #Dst ", " #GPR ", cvt" #Inst ", " #IntType ", f" #Size ")"; \
1489 const uint32_t T0 = allocateDqword(); \
1490 \
1491 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1492 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##SrcValue)); \
1493 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType, \
1494 Encoded_GPR_##GPR()); \
1495 \
1496 AssembledTest test = assemble(); \
1497 test.setDqwordTo(T0, Inst##Size##DstValue); \
1498 test.run(); \
1499 \
1500 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
1501 reset(); \
1502 } while (0)
1503
1504 #define TestImplSRegXmm(GPR, Src, Inst, IntSize, Size) \
1505 do { \
1506 static constexpr char TestString[] = \
1507 "(" #GPR ", " #Src ", cvt" #Inst ", " #IntSize ", f" #Size ")"; \
1508 const uint32_t T0 = allocateDqword(); \
1509 \
1510 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
1511 __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
1512 __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size, \
1513 Encoded_Xmm_##Src()); \
1514 \
1515 AssembledTest test = assemble(); \
1516 test.setDqwordTo(T0, Inst##Size##SrcValue); \
1517 test.run(); \
1518 \
1519 ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected), \
1520 test.GPR()) \
1521 << TestString; \
1522 reset(); \
1523 } while (0)
1524
1525 #define TestImplPXmmAddr(Dst, Inst, Size) \
1526 do { \
1527 static constexpr char TestString[] = \
1528 "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \
1529 const uint32_t T0 = allocateDqword(); \
1530 const uint32_t T1 = allocateDqword(); \
1531 \
1532 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1533 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
1534 \
1535 AssembledTest test = assemble(); \
1536 test.setDqwordTo(T0, Inst##Size##DstValue); \
1537 test.setDqwordTo(T1, Inst##Size##SrcValue); \
1538 test.run(); \
1539 \
1540 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
1541 reset(); \
1542 } while (0)
1543
1544 #define TestImplSXmmAddr(Dst, Inst, Size, IntType) \
1545 do { \
1546 static constexpr char TestString[] = \
1547 "(" #Dst ", Addr, cvt" #Inst ", f" #Size ", " #IntType ")"; \
1548 const uint32_t T0 = allocateDqword(); \
1549 const uint32_t T1 = allocateDword(); \
1550 \
1551 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1552 __ cvt##Inst(IceType_f##Size, Encoded_Xmm_##Dst(), IntType, \
1553 dwordAddress(T1)); \
1554 \
1555 AssembledTest test = assemble(); \
1556 test.setDqwordTo(T0, Inst##Size##DstValue); \
1557 test.setDwordTo(T1, Inst##Size##SrcValue); \
1558 test.run(); \
1559 \
1560 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
1561 reset(); \
1562 } while (0)
1563
1564 #define TestImplSRegAddr(GPR, Inst, IntSize, Size) \
1565 do { \
1566 static constexpr char TestString[] = \
1567 "(" #GPR ", Addr, cvt" #Inst ", f" #Size ", " #IntSize ")"; \
1568 const uint32_t T0 = allocateDqword(); \
1569 \
1570 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Inst##Size##DstValue)); \
1571 __ cvt##Inst(IceType_i##IntSize, Encoded_GPR_##GPR(), IceType_f##Size, \
1572 dwordAddress(T0)); \
1573 \
1574 AssembledTest test = assemble(); \
1575 test.setDqwordTo(T0, Inst##Size##SrcValue); \
1576 test.run(); \
1577 \
1578 ASSERT_EQ(static_cast<uint##IntSize##_t>(Inst##Size##Expected), \
1579 test.GPR()) \
1580 << TestString; \
1581 reset(); \
1582 } while (0)
1583
1584 #define TestImplSize(Dst, Src, GPR, Size) \
1585 do { \
1586 TestImplPXmmXmm(Dst, Src, dq2ps, Size); \
1587 TestImplPXmmAddr(Src, dq2ps, Size); \
1588 TestImplPXmmXmm(Dst, Src, tps2dq, Size); \
1589 TestImplPXmmAddr(Src, tps2dq, Size); \
1590 TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i32); \
1591 TestImplSXmmReg(Dst, GPR, si2ss, Size, IceType_i64); \
1592 TestImplSXmmAddr(Dst, si2ss, Size, IceType_i32); \
1593 TestImplSXmmAddr(Dst, si2ss, Size, IceType_i64); \
1594 TestImplSRegXmm(GPR, Src, tss2si, 32, Size); \
1595 TestImplSRegXmm(GPR, Src, tss2si, 64, Size); \
1596 TestImplSRegAddr(GPR, tss2si, 32, Size); \
1597 TestImplSRegAddr(GPR, tss2si, 64, Size); \
1598 TestImplPXmmXmm(Dst, Src, float2float, Size); \
1599 TestImplPXmmAddr(Src, float2float, Size); \
1600 } while (0)
1601
1602 #define TestImpl(Dst, Src, GPR) \
1603 do { \
1604 TestImplSize(Dst, Src, GPR, 32); \
1605 TestImplSize(Dst, Src, GPR, 64); \
1606 } while (0)
1607
1608 TestImpl(xmm0, xmm1, r1);
1609 TestImpl(xmm1, xmm2, r2);
1610 TestImpl(xmm2, xmm3, r3);
1611 TestImpl(xmm3, xmm4, r4);
1612 TestImpl(xmm4, xmm5, r5);
1613 TestImpl(xmm5, xmm6, r6);
1614 TestImpl(xmm6, xmm7, r7);
1615 TestImpl(xmm7, xmm8, r8);
1616 TestImpl(xmm8, xmm9, r10);
1617 TestImpl(xmm9, xmm10, r11);
1618 TestImpl(xmm10, xmm11, r12);
1619 TestImpl(xmm11, xmm12, r13);
1620 TestImpl(xmm12, xmm13, r14);
1621 TestImpl(xmm13, xmm14, r15);
1622 TestImpl(xmm14, xmm15, r1);
1623 TestImpl(xmm15, xmm0, r2);
1624
1625 #undef TestImpl
1626 #undef TestImplSize
1627 #undef TestImplSRegAddr
1628 #undef TestImplSXmmAddr
1629 #undef TestImplPXmmAddr
1630 #undef TestImplSRegXmm
1631 #undef TestImplSXmmReg
1632 #undef TestImplPXmmXmm
1633 }
1634
TEST_F(AssemblerX8664Test,Ucomiss)1635 TEST_F(AssemblerX8664Test, Ucomiss) {
1636 static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN();
1637 static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN();
1638
1639 Dqword test32DstValue(0.0, qnan32, qnan32, qnan32);
1640 Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32);
1641
1642 Dqword test64DstValue(0.0, qnan64);
1643 Dqword test64SrcValue(0.0, qnan64);
1644
1645 #define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, \
1646 BOther) \
1647 do { \
1648 static constexpr char NearBranch = AssemblerX8664::kNearJump; \
1649 static constexpr char TestString[] = \
1650 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \
1651 ", " #BParity ", " #BOther ")"; \
1652 const uint32_t T0 = allocateDqword(); \
1653 test##Size##DstValue.F##Size[0] = Value0; \
1654 const uint32_t T1 = allocateDqword(); \
1655 test##Size##SrcValue.F##Size[0] = Value1; \
1656 const uint32_t ImmIfTrue = 0xBEEF; \
1657 const uint32_t ImmIfFalse = 0xC0FFE; \
1658 \
1659 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1660 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
1661 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \
1662 __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
1663 Label Done; \
1664 __ j(Cond::Br_##BParity, &Done, NearBranch); \
1665 __ j(Cond::Br_##BOther, &Done, NearBranch); \
1666 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \
1667 __ bind(&Done); \
1668 \
1669 AssembledTest test = assemble(); \
1670 test.setDqwordTo(T0, test##Size##DstValue); \
1671 test.setDqwordTo(T1, test##Size##SrcValue); \
1672 test.run(); \
1673 \
1674 ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \
1675 reset(); \
1676 } while (0)
1677
1678 #define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther) \
1679 do { \
1680 static constexpr char NearBranch = AssemblerX8664::kNearJump; \
1681 static constexpr char TestString[] = \
1682 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType \
1683 ", " #BParity ", " #BOther ")"; \
1684 const uint32_t T0 = allocateDqword(); \
1685 test##Size##DstValue.F##Size[0] = Value0; \
1686 const uint32_t T1 = allocateDqword(); \
1687 test##Size##SrcValue.F##Size[0] = Value1; \
1688 const uint32_t ImmIfTrue = 0xBEEF; \
1689 const uint32_t ImmIfFalse = 0xC0FFE; \
1690 \
1691 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1692 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \
1693 __ ucomiss(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
1694 Label Done; \
1695 __ j(Cond::Br_##BParity, &Done, NearBranch); \
1696 __ j(Cond::Br_##BOther, &Done, NearBranch); \
1697 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \
1698 __ bind(&Done); \
1699 \
1700 AssembledTest test = assemble(); \
1701 test.setDqwordTo(T0, test##Size##DstValue); \
1702 test.setDqwordTo(T1, test##Size##SrcValue); \
1703 test.run(); \
1704 \
1705 ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \
1706 reset(); \
1707 } while (0)
1708
1709 #define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity, \
1710 BOther) \
1711 do { \
1712 TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \
1713 TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther); \
1714 } while (0)
1715
1716 #define TestImplSize(Dst, Src, Size) \
1717 do { \
1718 TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne); \
1719 TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e); \
1720 TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a); \
1721 TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a); \
1722 TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae); \
1723 TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b); \
1724 TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b); \
1725 TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be); \
1726 TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o); \
1727 TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s); \
1728 TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s); \
1729 } while (0)
1730
1731 #define TestImpl(Dst, Src) \
1732 do { \
1733 TestImplSize(Dst, Src, 32); \
1734 TestImplSize(Dst, Src, 64); \
1735 } while (0)
1736
1737 TestImpl(xmm0, xmm1);
1738 TestImpl(xmm1, xmm2);
1739 TestImpl(xmm2, xmm3);
1740 TestImpl(xmm3, xmm4);
1741 TestImpl(xmm4, xmm5);
1742 TestImpl(xmm5, xmm6);
1743 TestImpl(xmm6, xmm7);
1744 TestImpl(xmm7, xmm8);
1745 TestImpl(xmm8, xmm9);
1746 TestImpl(xmm9, xmm10);
1747 TestImpl(xmm10, xmm11);
1748 TestImpl(xmm11, xmm12);
1749 TestImpl(xmm12, xmm13);
1750 TestImpl(xmm13, xmm14);
1751 TestImpl(xmm14, xmm15);
1752 TestImpl(xmm15, xmm0);
1753
1754 #undef TestImpl
1755 #undef TestImplSize
1756 #undef TestImplCond
1757 #undef TestImplXmmAddr
1758 #undef TestImplXmmXmm
1759 }
1760
TEST_F(AssemblerX8664Test,Sqrtss)1761 TEST_F(AssemblerX8664Test, Sqrtss) {
1762 Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0);
1763 Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0);
1764
1765 Dqword test64SrcValue(-100.0, -100.0);
1766 Dqword test64DstValue(-1.0, -1.0);
1767
1768 #define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size) \
1769 do { \
1770 static constexpr char TestString[] = \
1771 "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")"; \
1772 const uint32_t T0 = allocateDqword(); \
1773 test##Size##SrcValue.F##Size[0] = Value1; \
1774 const uint32_t T1 = allocateDqword(); \
1775 \
1776 __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
1777 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
1778 __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
1779 \
1780 AssembledTest test = assemble(); \
1781 test.setDqwordTo(T0, test##Size##SrcValue); \
1782 test.setDqwordTo(T1, test##Size##DstValue); \
1783 test.run(); \
1784 \
1785 Dqword Expected = test##Size##DstValue; \
1786 Expected.F##Size[0] = Result; \
1787 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1788 reset(); \
1789 } while (0)
1790
1791 #define TestSqrtssXmmAddr(Dst, Value1, Result, Size) \
1792 do { \
1793 static constexpr char TestString[] = \
1794 "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")"; \
1795 const uint32_t T0 = allocateDqword(); \
1796 test##Size##SrcValue.F##Size[0] = Value1; \
1797 const uint32_t T1 = allocateDqword(); \
1798 \
1799 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T1)); \
1800 __ sqrt(IceType_f##Size, Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1801 \
1802 AssembledTest test = assemble(); \
1803 test.setDqwordTo(T0, test##Size##SrcValue); \
1804 test.setDqwordTo(T1, test##Size##DstValue); \
1805 test.run(); \
1806 \
1807 Dqword Expected = test##Size##DstValue; \
1808 Expected.F##Size[0] = Result; \
1809 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1810 reset(); \
1811 } while (0)
1812
1813 #define TestSqrtssSize(Dst, Src, Size) \
1814 do { \
1815 TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size); \
1816 TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size); \
1817 TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size); \
1818 TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size); \
1819 TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size); \
1820 TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size); \
1821 } while (0)
1822
1823 #define TestSqrtss(Dst, Src) \
1824 do { \
1825 TestSqrtssSize(Dst, Src, 32); \
1826 TestSqrtssSize(Dst, Src, 64); \
1827 } while (0)
1828
1829 TestSqrtss(xmm0, xmm1);
1830 TestSqrtss(xmm1, xmm2);
1831 TestSqrtss(xmm2, xmm3);
1832 TestSqrtss(xmm3, xmm4);
1833 TestSqrtss(xmm4, xmm5);
1834 TestSqrtss(xmm5, xmm6);
1835 TestSqrtss(xmm6, xmm7);
1836 TestSqrtss(xmm7, xmm8);
1837 TestSqrtss(xmm8, xmm9);
1838 TestSqrtss(xmm9, xmm10);
1839 TestSqrtss(xmm10, xmm11);
1840 TestSqrtss(xmm11, xmm12);
1841 TestSqrtss(xmm12, xmm13);
1842 TestSqrtss(xmm13, xmm14);
1843 TestSqrtss(xmm14, xmm15);
1844 TestSqrtss(xmm15, xmm0);
1845
1846 #undef TestSqrtss
1847 #undef TestSqrtssSize
1848 #undef TestSqrtssXmmAddr
1849 #undef TestSqrtssXmmXmm
1850 }
1851
TEST_F(AssemblerX8664Test,Insertps)1852 TEST_F(AssemblerX8664Test, Insertps) {
1853 #define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected) \
1854 do { \
1855 static constexpr char TestString[] = \
1856 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected \
1857 ")"; \
1858 const uint32_t T0 = allocateDqword(); \
1859 const Dqword V0 Value0; \
1860 const uint32_t T1 = allocateDqword(); \
1861 const Dqword V1 Value1; \
1862 \
1863 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1864 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
1865 __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
1866 Immediate(Imm)); \
1867 \
1868 AssembledTest test = assemble(); \
1869 test.setDqwordTo(T0, V0); \
1870 test.setDqwordTo(T1, V1); \
1871 test.run(); \
1872 \
1873 ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \
1874 reset(); \
1875 } while (0)
1876
1877 #define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected) \
1878 do { \
1879 static constexpr char TestString[] = \
1880 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \
1881 const uint32_t T0 = allocateDqword(); \
1882 const Dqword V0 Value0; \
1883 const uint32_t T1 = allocateDqword(); \
1884 const Dqword V1 Value1; \
1885 \
1886 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1887 __ insertps(IceType_v4f32, Encoded_Xmm_##Dst(), dwordAddress(T1), \
1888 Immediate(Imm)); \
1889 \
1890 AssembledTest test = assemble(); \
1891 test.setDqwordTo(T0, V0); \
1892 test.setDqwordTo(T1, V1); \
1893 test.run(); \
1894 \
1895 ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \
1896 reset(); \
1897 } while (0)
1898
1899 #define TestInsertps(Dst, Src) \
1900 do { \
1901 TestInsertpsXmmXmmImm( \
1902 Dst, (uint64_t(-1), uint64_t(-1)), Src, \
1903 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
1904 0x99, \
1905 (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull))); \
1906 TestInsertpsXmmAddrImm( \
1907 Dst, (uint64_t(-1), uint64_t(-1)), \
1908 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
1909 0x99, \
1910 (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull))); \
1911 TestInsertpsXmmXmmImm( \
1912 Dst, (uint64_t(-1), uint64_t(-1)), Src, \
1913 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
1914 0x9D, \
1915 (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull))); \
1916 TestInsertpsXmmAddrImm( \
1917 Dst, (uint64_t(-1), uint64_t(-1)), \
1918 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
1919 0x9D, \
1920 (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull))); \
1921 } while (0)
1922
1923 TestInsertps(xmm0, xmm1);
1924 TestInsertps(xmm1, xmm2);
1925 TestInsertps(xmm2, xmm3);
1926 TestInsertps(xmm3, xmm4);
1927 TestInsertps(xmm4, xmm5);
1928 TestInsertps(xmm5, xmm6);
1929 TestInsertps(xmm6, xmm7);
1930 TestInsertps(xmm7, xmm8);
1931 TestInsertps(xmm8, xmm9);
1932 TestInsertps(xmm9, xmm10);
1933 TestInsertps(xmm10, xmm11);
1934 TestInsertps(xmm11, xmm12);
1935 TestInsertps(xmm12, xmm13);
1936 TestInsertps(xmm13, xmm14);
1937 TestInsertps(xmm14, xmm15);
1938 TestInsertps(xmm15, xmm0);
1939
1940 #undef TestInsertps
1941 #undef TestInsertpsXmmXmmAddr
1942 #undef TestInsertpsXmmXmmImm
1943 }
1944
TEST_F(AssemblerX8664Test,Pinsr)1945 TEST_F(AssemblerX8664Test, Pinsr) {
1946 static constexpr uint8_t Mask32 = 0x03;
1947 static constexpr uint8_t Mask16 = 0x07;
1948 static constexpr uint8_t Mask8 = 0x0F;
1949
1950 #define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size) \
1951 do { \
1952 static constexpr char TestString[] = \
1953 "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \
1954 const uint32_t T0 = allocateDqword(); \
1955 const Dqword V0 Value0; \
1956 \
1957 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1958 __ mov(IceType_i32, Encoded_GPR_##GPR(), Immediate(Value1)); \
1959 __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_GPR_##GPR(), \
1960 Immediate(Imm)); \
1961 \
1962 AssembledTest test = assemble(); \
1963 test.setDqwordTo(T0, V0); \
1964 test.run(); \
1965 \
1966 constexpr uint8_t sel = (Imm)&Mask##Size; \
1967 Dqword Expected = V0; \
1968 Expected.U##Size[sel] = Value1; \
1969 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1970 reset(); \
1971 } while (0)
1972
1973 #define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size) \
1974 do { \
1975 static constexpr char TestString[] = \
1976 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")"; \
1977 const uint32_t T0 = allocateDqword(); \
1978 const Dqword V0 Value0; \
1979 const uint32_t T1 = allocateDword(); \
1980 const uint32_t V1 = Value1; \
1981 \
1982 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
1983 __ pinsr(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1), \
1984 Immediate(Imm)); \
1985 \
1986 AssembledTest test = assemble(); \
1987 test.setDqwordTo(T0, V0); \
1988 test.setDwordTo(T1, V1); \
1989 test.run(); \
1990 \
1991 constexpr uint8_t sel = (Imm)&Mask##Size; \
1992 Dqword Expected = V0; \
1993 Expected.U##Size[sel] = Value1; \
1994 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1995 reset(); \
1996 } while (0)
1997
1998 #define TestPinsrSize(Dst, GPR, Value1, Imm, Size) \
1999 do { \
2000 TestPinsrXmmGPRImm( \
2001 Dst, \
2002 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xFFFFFFFFDDDDDDDDull)), \
2003 GPR, Value1, Imm, Size); \
2004 TestPinsrXmmAddrImm( \
2005 Dst, \
2006 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xFFFFFFFFDDDDDDDDull)), \
2007 Value1, Imm, Size); \
2008 } while (0)
2009
2010 #define TestPinsr(Src, Dst) \
2011 do { \
2012 TestPinsrSize(Src, Dst, 0xEE, 0x03, 8); \
2013 TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16); \
2014 TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \
2015 } while (0)
2016
2017 TestPinsr(xmm0, r1);
2018 TestPinsr(xmm1, r2);
2019 TestPinsr(xmm2, r3);
2020 TestPinsr(xmm3, r4);
2021 TestPinsr(xmm4, r5);
2022 TestPinsr(xmm5, r6);
2023 TestPinsr(xmm6, r7);
2024 TestPinsr(xmm7, r8);
2025 TestPinsr(xmm8, r10);
2026 TestPinsr(xmm9, r11);
2027 TestPinsr(xmm10, r12);
2028 TestPinsr(xmm11, r13);
2029 TestPinsr(xmm12, r14);
2030 TestPinsr(xmm13, r15);
2031 TestPinsr(xmm14, r1);
2032 TestPinsr(xmm15, r2);
2033
2034 #undef TestPinsr
2035 #undef TestPinsrSize
2036 #undef TestPinsrXmmAddrImm
2037 #undef TestPinsrXmmGPRImm
2038 }
2039
TEST_F(AssemblerX8664Test,Pextr)2040 TEST_F(AssemblerX8664Test, Pextr) {
2041 static constexpr uint8_t Mask32 = 0x03;
2042 static constexpr uint8_t Mask16 = 0x07;
2043 static constexpr uint8_t Mask8 = 0x0F;
2044
2045 #define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size) \
2046 do { \
2047 static constexpr char TestString[] = \
2048 "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")"; \
2049 const uint32_t T0 = allocateDqword(); \
2050 const Dqword V0 Value1; \
2051 \
2052 __ movups(Encoded_Xmm_##Src(), dwordAddress(T0)); \
2053 __ pextr(IceType_i##Size, Encoded_GPR_##GPR(), Encoded_Xmm_##Src(), \
2054 Immediate(Imm)); \
2055 \
2056 AssembledTest test = assemble(); \
2057 test.setDqwordTo(T0, V0); \
2058 test.run(); \
2059 \
2060 constexpr uint8_t sel = (Imm)&Mask##Size; \
2061 ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString; \
2062 reset(); \
2063 } while (0)
2064
2065 #define TestPextrSize(GPR, Src, Value1, Imm, Size) \
2066 do { \
2067 TestPextrGPRXmmImm( \
2068 GPR, Src, \
2069 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xFFFFFFFFDDDDDDDDull)), \
2070 Imm, Size); \
2071 } while (0)
2072
2073 #define TestPextr(Src, Dst) \
2074 do { \
2075 TestPextrSize(Src, Dst, 0xEE, 0x03, 8); \
2076 TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16); \
2077 TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \
2078 } while (0)
2079
2080 TestPextr(r1, xmm0);
2081 TestPextr(r2, xmm1);
2082 TestPextr(r3, xmm2);
2083 TestPextr(r4, xmm3);
2084 TestPextr(r5, xmm4);
2085 TestPextr(r6, xmm5);
2086 TestPextr(r7, xmm6);
2087 TestPextr(r8, xmm7);
2088 TestPextr(r10, xmm8);
2089 TestPextr(r11, xmm9);
2090 TestPextr(r12, xmm10);
2091 TestPextr(r13, xmm11);
2092 TestPextr(r14, xmm12);
2093 TestPextr(r15, xmm13);
2094 TestPextr(r1, xmm14);
2095 TestPextr(r2, xmm15);
2096
2097 #undef TestPextr
2098 #undef TestPextrSize
2099 #undef TestPextrXmmGPRImm
2100 }
2101
TEST_F(AssemblerX8664Test,Pcmpeq_Pcmpgt)2102 TEST_F(AssemblerX8664Test, Pcmpeq_Pcmpgt) {
2103 #define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op) \
2104 do { \
2105 static constexpr char TestString[] = \
2106 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")"; \
2107 const uint32_t T0 = allocateDqword(); \
2108 const Dqword V0 Value0; \
2109 const uint32_t T1 = allocateDqword(); \
2110 const Dqword V1 Value1; \
2111 \
2112 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
2113 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
2114 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src()); \
2115 \
2116 AssembledTest test = assemble(); \
2117 test.setDqwordTo(T0, V0); \
2118 test.setDqwordTo(T1, V1); \
2119 test.run(); \
2120 \
2121 Dqword Expected(uint64_t(0), uint64_t(0)); \
2122 static constexpr uint8_t ArraySize = \
2123 sizeof(Dqword) / sizeof(uint##Size##_t); \
2124 for (uint8_t i = 0; i < ArraySize; ++i) { \
2125 Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \
2126 } \
2127 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
2128 reset(); \
2129 } while (0)
2130
2131 #define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op) \
2132 do { \
2133 static constexpr char TestString[] = \
2134 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")"; \
2135 const uint32_t T0 = allocateDqword(); \
2136 const Dqword V0 Value0; \
2137 const uint32_t T1 = allocateDqword(); \
2138 const Dqword V1 Value1; \
2139 \
2140 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
2141 __ Inst(IceType_i##Size, Encoded_Xmm_##Dst(), dwordAddress(T1)); \
2142 \
2143 AssembledTest test = assemble(); \
2144 test.setDqwordTo(T0, V0); \
2145 test.setDqwordTo(T1, V1); \
2146 test.run(); \
2147 \
2148 Dqword Expected(uint64_t(0), uint64_t(0)); \
2149 static constexpr uint8_t ArraySize = \
2150 sizeof(Dqword) / sizeof(uint##Size##_t); \
2151 for (uint8_t i = 0; i < ArraySize; ++i) { \
2152 Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \
2153 } \
2154 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
2155 reset(); \
2156 } while (0)
2157
2158 #define TestPcmpValues(Dst, Value0, Src, Value1, Size) \
2159 do { \
2160 TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, ==); \
2161 TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, ==); \
2162 TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, <); \
2163 TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, <); \
2164 } while (0)
2165
2166 #define TestPcmpSize(Dst, Src, Size) \
2167 do { \
2168 TestPcmpValues( \
2169 Dst, \
2170 (uint64_t(0x8888888888888888ull), uint64_t(0x0000000000000000ull)), \
2171 Src, \
2172 (uint64_t(0x0000008800008800ull), uint64_t(0xFFFFFFFFFFFFFFFFull)), \
2173 Size); \
2174 TestPcmpValues( \
2175 Dst, \
2176 (uint64_t(0x123567ABAB55DE01ull), uint64_t(0x12345abcde12345Aull)), \
2177 Src, \
2178 (uint64_t(0x0000008800008800ull), uint64_t(0xAABBCCDD1234321Aull)), \
2179 Size); \
2180 } while (0)
2181
2182 #define TestPcmp(Dst, Src) \
2183 do { \
2184 TestPcmpSize(xmm0, xmm1, 8); \
2185 TestPcmpSize(xmm0, xmm1, 16); \
2186 TestPcmpSize(xmm0, xmm1, 32); \
2187 } while (0)
2188
2189 TestPcmp(xmm0, xmm1);
2190 TestPcmp(xmm1, xmm2);
2191 TestPcmp(xmm2, xmm3);
2192 TestPcmp(xmm3, xmm4);
2193 TestPcmp(xmm4, xmm5);
2194 TestPcmp(xmm5, xmm6);
2195 TestPcmp(xmm6, xmm7);
2196 TestPcmp(xmm7, xmm8);
2197 TestPcmp(xmm8, xmm9);
2198 TestPcmp(xmm9, xmm10);
2199 TestPcmp(xmm10, xmm11);
2200 TestPcmp(xmm11, xmm12);
2201 TestPcmp(xmm12, xmm13);
2202 TestPcmp(xmm13, xmm14);
2203 TestPcmp(xmm14, xmm15);
2204 TestPcmp(xmm15, xmm0);
2205
2206 #undef TestPcmp
2207 #undef TestPcmpSize
2208 #undef TestPcmpValues
2209 #undef TestPcmpXmmAddr
2210 #undef TestPcmpXmmXmm
2211 }
2212
TEST_F(AssemblerX8664Test,Roundsd)2213 TEST_F(AssemblerX8664Test, Roundsd) {
2214 #define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN) \
2215 do { \
2216 static constexpr char TestString[] = \
2217 "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")"; \
2218 const uint32_t T0 = allocateDqword(); \
2219 const Dqword V0(-3.0, -3.0); \
2220 const uint32_t T1 = allocateDqword(); \
2221 const Dqword V1(double(Input), -123.4); \
2222 \
2223 __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
2224 __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
2225 __ round(IceType_f64, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
2226 Immediate(AssemblerX8664::k##Mode)); \
2227 \
2228 AssembledTest test = assemble(); \
2229 test.setDqwordTo(T0, V0); \
2230 test.setDqwordTo(T1, V1); \
2231 test.run(); \
2232 \
2233 const Dqword Expected(double(RN), -3.0); \
2234 EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
2235 reset(); \
2236 } while (0)
2237
2238 #define TestRoundsd(Dst, Src) \
2239 do { \
2240 TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6); \
2241 TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5); \
2242 TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5); \
2243 TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6); \
2244 TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5); \
2245 TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5); \
2246 } while (0)
2247
2248 TestRoundsd(xmm0, xmm1);
2249 TestRoundsd(xmm1, xmm2);
2250 TestRoundsd(xmm2, xmm3);
2251 TestRoundsd(xmm3, xmm4);
2252 TestRoundsd(xmm4, xmm5);
2253 TestRoundsd(xmm5, xmm6);
2254 TestRoundsd(xmm6, xmm7);
2255 TestRoundsd(xmm7, xmm8);
2256 TestRoundsd(xmm8, xmm9);
2257 TestRoundsd(xmm9, xmm10);
2258 TestRoundsd(xmm10, xmm11);
2259 TestRoundsd(xmm11, xmm12);
2260 TestRoundsd(xmm12, xmm13);
2261 TestRoundsd(xmm13, xmm14);
2262 TestRoundsd(xmm14, xmm15);
2263 TestRoundsd(xmm15, xmm0);
2264
2265 #undef TestRoundsd
2266 #undef TestRoundsdXmmXmm
2267 }
2268
TEST_F(AssemblerX8664Test,Set1ps)2269 TEST_F(AssemblerX8664Test, Set1ps) {
2270 #define TestImpl(Xmm, Src, Imm) \
2271 do { \
2272 __ set1ps(Encoded_Xmm_##Xmm(), Encoded_GPR_##Src(), Immediate(Imm)); \
2273 \
2274 AssembledTest test = assemble(); \
2275 test.run(); \
2276 \
2277 const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm), \
2278 (uint64_t(Imm) << 32) | uint32_t(Imm)); \
2279 ASSERT_EQ(Expected, test.Xmm<Dqword>()) \
2280 << "(" #Xmm ", " #Src ", " #Imm ")"; \
2281 reset(); \
2282 } while (0)
2283
2284 TestImpl(xmm0, r1, 1);
2285 TestImpl(xmm1, r2, 12);
2286 TestImpl(xmm2, r3, 22);
2287 TestImpl(xmm3, r4, 54);
2288 TestImpl(xmm4, r5, 80);
2289 TestImpl(xmm5, r6, 32);
2290 TestImpl(xmm6, r7, 55);
2291 TestImpl(xmm7, r8, 44);
2292 TestImpl(xmm8, r10, 10);
2293 TestImpl(xmm9, r11, 155);
2294 TestImpl(xmm10, r12, 165);
2295 TestImpl(xmm11, r13, 170);
2296 TestImpl(xmm12, r14, 200);
2297 TestImpl(xmm13, r15, 124);
2298 TestImpl(xmm14, r1, 101);
2299 TestImpl(xmm15, r2, 166);
2300
2301 #undef TestImpl
2302 }
2303
2304 } // end of anonymous namespace
2305 } // end of namespace Test
2306 } // end of namespace X8664
2307 } // end of namespace Ice
2308