1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "gtest/gtest.h"
18
19 #include <cstdint>
20 #include <initializer_list>
21 #include <limits>
22
23 #include "utility.h"
24
25 namespace {
26
TEST(Arm64InsnTest,UnsignedBitfieldMoveNoShift)27 TEST(Arm64InsnTest, UnsignedBitfieldMoveNoShift) {
28 uint64_t arg = 0x3952247371907021ULL;
29 uint64_t res;
30
31 asm("ubfm %0, %1, #0, #63" : "=r"(res) : "r"(arg));
32
33 ASSERT_EQ(res, 0x3952247371907021ULL);
34 }
35
TEST(Arm64InsnTest,BitfieldLeftInsertion)36 TEST(Arm64InsnTest, BitfieldLeftInsertion) {
37 uint64_t arg = 0x389522868478abcdULL;
38 uint64_t res = 0x1101044682325271ULL;
39
40 asm("bfm %0, %1, #40, #15" : "=r"(res) : "r"(arg), "0"(res));
41
42 ASSERT_EQ(res, 0x110104abcd325271ULL);
43 }
44
TEST(Arm64InsnTest,BitfieldRightInsertion)45 TEST(Arm64InsnTest, BitfieldRightInsertion) {
46 uint64_t arg = 0x3276561809377344ULL;
47 uint64_t res = 0x1668039626579787ULL;
48
49 asm("bfm %0, %1, #4, #39" : "=r"(res) : "r"(arg), "0"(res));
50
51 ASSERT_EQ(res, 0x1668039180937734ULL);
52 }
53
TEST(Arm64InsnTest,MoveImmToFp32)54 TEST(Arm64InsnTest, MoveImmToFp32) {
55 // The tests below verify that fmov works with various immediates.
56 // Specifically, the instruction has an 8-bit immediate field consisting of
57 // the following four subfields:
58 //
59 // - sign (one bit)
60 // - upper exponent (one bit)
61 // - lower exponent (two bits)
62 // - mantisa (four bits)
63 //
64 // For example, we decompose imm8 = 0b01001111 into:
65 //
66 // - sign = 0 (positive)
67 // - upper exponent = 1
68 // - lower exponent = 00
69 // - mantisa = 1111
70 //
71 // This immediate corresponds to 32-bit floating point value:
72 //
73 // 0 011111 00 1111 0000000000000000000
74 // | | | | |
75 // | | | | +- 19 zeros
76 // | | | +------ mantisa
77 // | | +--------- lower exponent
78 // | +---------------- upper exponent (custom extended to 6 bits)
79 // +------------------ sign
80 //
81 // Thus we have:
82 //
83 // 1.11110000... * 2^(124-127) = 0.2421875
84 //
85 // where 1.11110000... is in binary.
86 //
87 // See VFPExpandImm in the ARM Architecture Manual for details.
88 //
89 // We enumerate all possible 8-bit immediate encodings of the form:
90 //
91 // {0,1}{0,1}{00,11}{0000,1111}
92 //
93 // to verify that the decoder correctly splits the immediate into the
94 // subfields and reconstructs the intended floating-point value.
95
96 // imm8 = 0b00000000
97 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.0e+00")();
98 ASSERT_EQ(res1, MakeUInt128(0x40000000U, 0U));
99
100 // imm8 = 0b00001111
101 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.8750e+00")();
102 ASSERT_EQ(res2, MakeUInt128(0x40780000U, 0U));
103
104 // imm8 = 0b00110000
105 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.60e+01")();
106 ASSERT_EQ(res3, MakeUInt128(0x41800000U, 0U));
107
108 // imm8 = 0b00111111
109 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.10e+01")();
110 ASSERT_EQ(res4, MakeUInt128(0x41f80000U, 0U));
111
112 // imm8 = 0b01000000
113 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.250e-01")();
114 ASSERT_EQ(res5, MakeUInt128(0x3e000000U, 0U));
115
116 // imm8 = 0b01001111
117 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.4218750e-01")();
118 ASSERT_EQ(res6, MakeUInt128(0x3e780000U, 0U));
119
120 // imm8 = 0b01110000
121 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.0e+00")();
122 ASSERT_EQ(res7, MakeUInt128(0x3f800000U, 0U));
123
124 // imm8 = 0b01111111
125 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.93750e+00")();
126 ASSERT_EQ(res8, MakeUInt128(0x3ff80000U, 0U));
127
128 // imm8 = 0b10000000
129 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.0e+00")();
130 ASSERT_EQ(res9, MakeUInt128(0xc0000000U, 0U));
131
132 // imm8 = 0b10001111
133 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.8750e+00")();
134 ASSERT_EQ(res10, MakeUInt128(0xc0780000U, 0U));
135
136 // imm8 = 0b10110000
137 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.60e+01")();
138 ASSERT_EQ(res11, MakeUInt128(0xc1800000U, 0U));
139
140 // imm8 = 0b10111111
141 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.10e+01")();
142 ASSERT_EQ(res12, MakeUInt128(0xc1f80000U, 0U));
143
144 // imm8 = 0b11000000
145 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.250e-01")();
146 ASSERT_EQ(res13, MakeUInt128(0xbe000000U, 0U));
147
148 // imm8 = 0b11001111
149 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.4218750e-01")();
150 ASSERT_EQ(res14, MakeUInt128(0xbe780000U, 0U));
151
152 // imm8 = 0b11110000
153 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.0e+00")();
154 ASSERT_EQ(res15, MakeUInt128(0xbf800000U, 0U));
155
156 // imm8 = 0b11111111
157 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.93750e+00")();
158 ASSERT_EQ(res16, MakeUInt128(0xbff80000U, 0U));
159 }
160
TEST(Arm64InsnTest,MoveImmToFp64)161 TEST(Arm64InsnTest, MoveImmToFp64) {
162 // The tests below verify that fmov works with various immediates.
163 // Specifically, the instruction has an 8-bit immediate field consisting of
164 // the following four subfields:
165 //
166 // - sign (one bit)
167 // - upper exponent (one bit)
168 // - lower exponent (two bits)
169 // - mantisa (four bits)
170 //
171 // For example, we decompose imm8 = 0b01001111 into:
172 //
173 // - sign = 0 (positive)
174 // - upper exponent = 1
175 // - lower exponent = 00
176 // - mantisa = 1111
177 //
178 // This immediate corresponds to 64-bit floating point value:
179 //
180 // 0 011111111 00 1111 000000000000000000000000000000000000000000000000
181 // | | | | |
182 // | | | | +- 48 zeros
183 // | | | +------ mantisa
184 // | | +--------- lower exponent
185 // | +------------------- upper exponent (custom extended to 9 bits)
186 // +--------------------- sign
187 //
188 // Thus we have:
189 //
190 // 1.11110000... * 2^(1020-1023) = 0.2421875
191 //
192 // where 1.11110000... is in binary.
193 //
194 // See VFPExpandImm in the ARM Architecture Manual for details.
195 //
196 // We enumerate all possible 8-bit immediate encodings of the form:
197 //
198 // {0,1}{0,1}{00,11}{0000,1111}
199 //
200 // to verify that the decoder correctly splits the immediate into the
201 // subfields and reconstructs the intended floating-point value.
202
203 // imm8 = 0b00000000
204 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.0e+00")();
205 ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0U));
206
207 // imm8 = 0b00001111
208 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.8750e+00")();
209 ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0U));
210
211 // imm8 = 0b00110000
212 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.60e+01")();
213 ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0U));
214
215 // imm8 = 0b00111111
216 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.10e+01")();
217 ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0U));
218
219 // imm8 = 0b01000000
220 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.250e-01")();
221 ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0U));
222
223 // imm8 = 0b01001111
224 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.4218750e-01")();
225 ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0U));
226
227 // imm8 = 0b01110000
228 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.0e+00")();
229 ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0U));
230
231 // imm8 = 0b01111111
232 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.93750e+00")();
233 ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0U));
234
235 // imm8 = 0b10000000
236 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.0e+00")();
237 ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0U));
238
239 // imm8 = 0b10001111
240 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.8750e+00")();
241 ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0U));
242
243 // imm8 = 0b10110000
244 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.60e+01")();
245 ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0U));
246
247 // imm8 = 0b10111111
248 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.10e+01")();
249 ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0U));
250
251 // imm8 = 0b11000000
252 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.250e-01")();
253 ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0U));
254
255 // imm8 = 0b11001111
256 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.4218750e-01")();
257 ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0U));
258
259 // imm8 = 0b11110000
260 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.0e+00")();
261 ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0U));
262
263 // imm8 = 0b11111111
264 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.93750e+00")();
265 ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0U));
266 }
267
TEST(Arm64InsnTest,MoveImmToF32x4)268 TEST(Arm64InsnTest, MoveImmToF32x4) {
269 // The tests below verify that fmov works with various immediates.
270 // Specifically, the instruction has an 8-bit immediate field consisting of
271 // the following four subfields:
272 //
273 // - sign (one bit)
274 // - upper exponent (one bit)
275 // - lower exponent (two bits)
276 // - mantisa (four bits)
277 //
278 // We enumerate all possible 8-bit immediate encodings of the form:
279 //
280 // {0,1}{0,1}{00,11}{0000,1111}
281 //
282 // to verify that the decoder correctly splits the immediate into the
283 // subfields and reconstructs the intended floating-point value.
284
285 // imm8 = 0b00000000
286 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.0e+00")();
287 ASSERT_EQ(res1, MakeUInt128(0x4000000040000000ULL, 0x4000000040000000ULL));
288
289 // imm8 = 0b00001111
290 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.8750e+00")();
291 ASSERT_EQ(res2, MakeUInt128(0x4078000040780000ULL, 0x4078000040780000ULL));
292
293 // imm8 = 0b00110000
294 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.60e+01")();
295 ASSERT_EQ(res3, MakeUInt128(0x4180000041800000ULL, 0x4180000041800000ULL));
296
297 // imm8 = 0b00111111
298 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.10e+01")();
299 ASSERT_EQ(res4, MakeUInt128(0x41f8000041f80000ULL, 0x41f8000041f80000ULL));
300
301 // imm8 = 0b01000000
302 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.250e-01")();
303 ASSERT_EQ(res5, MakeUInt128(0x3e0000003e000000ULL, 0x3e0000003e000000ULL));
304
305 // imm8 = 0b01001111
306 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.4218750e-01")();
307 ASSERT_EQ(res6, MakeUInt128(0x3e7800003e780000ULL, 0x3e7800003e780000ULL));
308
309 // imm8 = 0b01110000
310 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.0e+00")();
311 ASSERT_EQ(res7, MakeUInt128(0x3f8000003f800000ULL, 0x3f8000003f800000ULL));
312
313 // imm8 = 0b01111111
314 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.93750e+00")();
315 ASSERT_EQ(res8, MakeUInt128(0x3ff800003ff80000ULL, 0x3ff800003ff80000ULL));
316
317 // imm8 = 0b10000000
318 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.0e+00")();
319 ASSERT_EQ(res9, MakeUInt128(0xc0000000c0000000ULL, 0xc0000000c0000000ULL));
320
321 // imm8 = 0b10001111
322 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.8750e+00")();
323 ASSERT_EQ(res10, MakeUInt128(0xc0780000c0780000ULL, 0xc0780000c0780000ULL));
324
325 // imm8 = 0b10110000
326 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.60e+01")();
327 ASSERT_EQ(res11, MakeUInt128(0xc1800000c1800000ULL, 0xc1800000c1800000ULL));
328
329 // imm8 = 0b10111111
330 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.10e+01")();
331 ASSERT_EQ(res12, MakeUInt128(0xc1f80000c1f80000ULL, 0xc1f80000c1f80000ULL));
332
333 // imm8 = 0b11000000
334 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.250e-01")();
335 ASSERT_EQ(res13, MakeUInt128(0xbe000000be000000ULL, 0xbe000000be000000ULL));
336
337 // imm8 = 0b11001111
338 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.4218750e-01")();
339 ASSERT_EQ(res14, MakeUInt128(0xbe780000be780000ULL, 0xbe780000be780000ULL));
340
341 // imm8 = 0b11110000
342 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.0e+00")();
343 ASSERT_EQ(res15, MakeUInt128(0xbf800000bf800000ULL, 0xbf800000bf800000ULL));
344
345 // imm8 = 0b11111111
346 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.93750e+00")();
347 ASSERT_EQ(res16, MakeUInt128(0xbff80000bff80000ULL, 0xbff80000bff80000ULL));
348 }
349
TEST(Arm64InsnTest,MoveImmToF64x2)350 TEST(Arm64InsnTest, MoveImmToF64x2) {
351 // The tests below verify that fmov works with various immediates.
352 // Specifically, the instruction has an 8-bit immediate field consisting of
353 // the following four subfields:
354 //
355 // - sign (one bit)
356 // - upper exponent (one bit)
357 // - lower exponent (two bits)
358 // - mantisa (four bits)
359 //
360 // We enumerate all possible 8-bit immediate encodings of the form:
361 //
362 // {0,1}{0,1}{00,11}{0000,1111}
363 //
364 // to verify that the decoder correctly splits the immediate into the
365 // subfields and reconstructs the intended floating-point value.
366
367 // imm8 = 0b00000000
368 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.0e+00")();
369 ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0x4000000000000000ULL));
370
371 // imm8 = 0b00001111
372 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.8750e+00")();
373 ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0x400f000000000000ULL));
374
375 // imm8 = 0b00110000
376 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.60e+01")();
377 ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0x4030000000000000ULL));
378
379 // imm8 = 0b00111111
380 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.10e+01")();
381 ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0x403f000000000000ULL));
382
383 // imm8 = 0b01000000
384 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.250e-01")();
385 ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0x3fc0000000000000ULL));
386
387 // imm8 = 0b01001111
388 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.4218750e-01")();
389 ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0x3fcf000000000000ULL));
390
391 // imm8 = 0b01110000
392 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.0e+00")();
393 ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0x3ff0000000000000ULL));
394
395 // imm8 = 0b01111111
396 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.93750e+00")();
397 ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0x3fff000000000000ULL));
398
399 // imm8 = 0b10000000
400 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.0e+00")();
401 ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0xc000000000000000ULL));
402
403 // imm8 = 0b10001111
404 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.8750e+00")();
405 ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0xc00f000000000000ULL));
406
407 // imm8 = 0b10110000
408 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.60e+01")();
409 ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0xc030000000000000ULL));
410
411 // imm8 = 0b10111111
412 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.10e+01")();
413 ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0xc03f000000000000ULL));
414
415 // imm8 = 0b11000000
416 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.250e-01")();
417 ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0xbfc0000000000000ULL));
418
419 // imm8 = 0b11001111
420 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.4218750e-01")();
421 ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0xbfcf000000000000ULL));
422
423 // imm8 = 0b11110000
424 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.0e+00")();
425 ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0xbff0000000000000ULL));
426
427 // imm8 = 0b11111111
428 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.93750e+00")();
429 ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0xbfff000000000000ULL));
430 }
431
TEST(Arm64InsnTest,MoveFpRegToReg)432 TEST(Arm64InsnTest, MoveFpRegToReg) {
433 __uint128_t arg = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
434 uint64_t res = 0xffffeeeeddddccccULL;
435
436 // Move from high double.
437 asm("fmov %0, %1.d[1]" : "=r"(res) : "w"(arg));
438 ASSERT_EQ(res, 0x3333cccc4444ddddULL);
439
440 // Move from low double.
441 asm("fmov %0, %d1" : "=r"(res) : "w"(arg));
442 ASSERT_EQ(res, 0x1111aaaa2222bbbbULL);
443
444 // Move from single.
445 asm("fmov %w0, %s1" : "=r"(res) : "w"(arg));
446 ASSERT_EQ(res, 0x2222bbbbULL);
447 }
448
TEST(Arm64InsnTest,MoveRegToFpReg)449 TEST(Arm64InsnTest, MoveRegToFpReg) {
450 uint64_t arg = 0xffffeeeeddddccccULL;
451 __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
452
453 // Move to high double.
454 asm("fmov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
455 ASSERT_EQ(res, MakeUInt128(0x1111aaaa2222bbbbULL, 0xffffeeeeddddccccULL));
456
457 // Move to low double.
458 asm("fmov %d0, %1" : "=w"(res) : "r"(arg));
459 ASSERT_EQ(res, MakeUInt128(0xffffeeeeddddccccULL, 0x0));
460
461 // Move to single.
462 asm("fmov %s0, %w1" : "=w"(res) : "r"(arg));
463 ASSERT_EQ(res, MakeUInt128(0xddddccccULL, 0x0));
464 }
465
TEST(Arm64InsnTest,MoveFpRegToFpReg)466 TEST(Arm64InsnTest, MoveFpRegToFpReg) {
467 __uint128_t res;
468
469 __uint128_t fp64_arg =
470 MakeUInt128(0x402e9eb851eb851fULL, 0xdeadbeefaabbccddULL); // 15.31 in double
471 asm("fmov %d0, %d1" : "=w"(res) : "w"(fp64_arg));
472 ASSERT_EQ(res, MakeUInt128(0x402e9eb851eb851fULL, 0ULL));
473
474 __uint128_t fp32_arg =
475 MakeUInt128(0xaabbccdd40e51eb8ULL, 0x0011223344556677ULL); // 7.16 in float
476 asm("fmov %s0, %s1" : "=w"(res) : "w"(fp32_arg));
477 ASSERT_EQ(res, MakeUInt128(0x40e51eb8ULL, 0ULL));
478 }
479
TEST(Arm64InsnTest,InsertRegPartIntoSimd128)480 TEST(Arm64InsnTest, InsertRegPartIntoSimd128) {
481 uint64_t arg = 0xffffeeeeddddccccULL;
482 __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
483
484 // Byte.
485 asm("mov %0.b[3], %w1" : "=w"(res) : "r"(arg), "0"(res));
486 ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0x3333cccc4444ddddULL));
487
488 // Double word.
489 asm("mov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
490 ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0xffffeeeeddddccccULL));
491 }
492
TEST(Arm64InsnTest,DuplicateRegIntoSimd128)493 TEST(Arm64InsnTest, DuplicateRegIntoSimd128) {
494 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("dup %0.16b, %w1")(0xabU);
495 ASSERT_EQ(res, MakeUInt128(0xababababababababULL, 0xababababababababULL));
496 }
497
TEST(Arm64InsnTest,MoveSimd128ElemToRegSigned)498 TEST(Arm64InsnTest, MoveSimd128ElemToRegSigned) {
499 uint64_t res = 0;
500 __uint128_t arg = MakeUInt128(0x9796959493929190ULL, 0x9f9e9d9c9b9a99ULL);
501
502 // Single word.
503 asm("smov %0, %1.s[0]" : "=r"(res) : "w"(arg));
504 ASSERT_EQ(res, 0xffffffff93929190ULL);
505
506 asm("smov %0, %1.s[2]" : "=r"(res) : "w"(arg));
507 ASSERT_EQ(res, 0xffffffff9c9b9a99ULL);
508
509 // Half word.
510 asm("smov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
511 ASSERT_EQ(res, 0x00000000ffff9190ULL);
512
513 asm("smov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
514 ASSERT_EQ(res, 0x00000000ffff9594ULL);
515
516 // Byte.
517 asm("smov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
518 ASSERT_EQ(res, 0x00000000ffffff90ULL);
519
520 asm("smov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
521 ASSERT_EQ(res, 0x00000000ffffff92ULL);
522 }
523
TEST(Arm64InsnTest,MoveSimd128ElemToRegUnsigned)524 TEST(Arm64InsnTest, MoveSimd128ElemToRegUnsigned) {
525 uint64_t res = 0;
526 __uint128_t arg = MakeUInt128(0xaaaabbbbcccceeeeULL, 0xffff000011112222ULL);
527
528 // Double word.
529 asm("umov %0, %1.d[0]" : "=r"(res) : "w"(arg));
530 ASSERT_EQ(res, 0xaaaabbbbcccceeeeULL);
531
532 asm("umov %0, %1.d[1]" : "=r"(res) : "w"(arg));
533 ASSERT_EQ(res, 0xffff000011112222ULL);
534
535 // Single word.
536 asm("umov %w0, %1.s[0]" : "=r"(res) : "w"(arg));
537 ASSERT_EQ(res, 0xcccceeeeULL);
538
539 asm("umov %w0, %1.s[2]" : "=r"(res) : "w"(arg));
540 ASSERT_EQ(res, 0x11112222ULL);
541
542 // Half word.
543 asm("umov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
544 ASSERT_EQ(res, 0xeeeeULL);
545
546 asm("umov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
547 ASSERT_EQ(res, 0xbbbbULL);
548
549 // Byte.
550 asm("umov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
551 ASSERT_EQ(res, 0xeeULL);
552
553 asm("umov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
554 ASSERT_EQ(res, 0xccULL);
555 }
556
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4)557 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4) {
558 __uint128_t arg1 = MakeUInt128(0x9463229563989898ULL, 0x9358211674562701ULL);
559 __uint128_t arg2 = MakeUInt128(0x0218356462201349ULL, 0x6715188190973038ULL);
560 __uint128_t arg3 = MakeUInt128(0x1198004973407239ULL, 0x6103685406643193ULL);
561 __uint128_t res =
562 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
563 ASSERT_EQ(res, MakeUInt128(0x37c4a3494b9db539ULL, 0x37c3dab413a58e33ULL));
564 }
565
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper)566 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper) {
567 __uint128_t arg1 = MakeUInt128(0x9478221818528624ULL, 0x0851400666044332ULL);
568 __uint128_t arg2 = MakeUInt128(0x5888569867054315ULL, 0x4706965747458550ULL);
569 __uint128_t arg3 = MakeUInt128(0x3323233421073015ULL, 0x4594051655379068ULL);
570 __uint128_t res =
571 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
572 ASSERT_EQ(res, MakeUInt128(0x5c30bd483c119e0fULL, 0x48ecc5ab6efb3a86ULL));
573 }
574
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper2)575 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper2) {
576 __uint128_t arg1 = MakeUInt128(0x9968262824727064ULL, 0x1336222178923903ULL);
577 __uint128_t arg2 = MakeUInt128(0x1760854289437339ULL, 0x3561889165125042ULL);
578 __uint128_t arg3 = MakeUInt128(0x4404008952719837ULL, 0x8738648058472689ULL);
579 __uint128_t res =
580 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[7]")(arg1, arg2, arg3);
581 ASSERT_EQ(res, MakeUInt128(0x5d27e9db5e54d15aULL, 0x8b39d9f65f64ea0aULL));
582 }
583
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4)584 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4) {
585 __uint128_t arg1 = MakeUInt128(0x9143447886360410ULL, 0x3182350736502778ULL);
586 __uint128_t arg2 = MakeUInt128(0x5908975782727313ULL, 0x0504889398900992ULL);
587 __uint128_t arg3 = MakeUInt128(0x3913503373250855ULL, 0x9826558670892426ULL);
588 __uint128_t res =
589 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
590 ASSERT_EQ(res, MakeUInt128(0xfd58202775231935ULL, 0x61d69fb0921db6b6ULL));
591 }
592
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4Upper)593 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4Upper) {
594 __uint128_t arg1 = MakeUInt128(0x9320199199688285ULL, 0x1718395366913452ULL);
595 __uint128_t arg2 = MakeUInt128(0x2244470804592396ULL, 0x6028171565515656ULL);
596 __uint128_t arg3 = MakeUInt128(0x6611135982311225ULL, 0x0628905854914509ULL);
597 __uint128_t res =
598 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
599 ASSERT_EQ(res, MakeUInt128(0x645326f0814d99a3ULL, 0x05c4290053980b2eULL));
600 }
601
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4)602 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4) {
603 __uint128_t arg1 = MakeUInt128(0x9027601834840306ULL, 0x8113818551059797ULL);
604 __uint128_t arg2 = MakeUInt128(0x0566400750942608ULL, 0x7885735796037324ULL);
605 __uint128_t arg3 = MakeUInt128(0x5141467867036880ULL, 0x9880609716425849ULL);
606 __uint128_t res =
607 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
608 ASSERT_EQ(res, MakeUInt128(0x61c8e2c867f707f8ULL, 0xc5dfe72334816629ULL));
609 }
610
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4Upper)611 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4Upper) {
612 __uint128_t arg1 = MakeUInt128(0x9454236828860613ULL, 0x4084148637767009ULL);
613 __uint128_t arg2 = MakeUInt128(0x6120715124914043ULL, 0x0272538607648236ULL);
614 __uint128_t arg3 = MakeUInt128(0x3414334623518975ULL, 0x7664521641376796ULL);
615 __uint128_t res =
616 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
617 ASSERT_EQ(res, MakeUInt128(0x3c00351c3352428eULL, 0x7f9b6cda4425df7cULL));
618 }
619
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4)620 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4) {
621 __uint128_t arg1 = MakeUInt128(0x9128009282525619ULL, 0x0205263016391147ULL);
622 __uint128_t arg2 = MakeUInt128(0x7247331485739107ULL, 0x7758744253876117ULL);
623 __uint128_t arg3 = MakeUInt128(0x4657867116941477ULL, 0x6421441111263583ULL);
624 __uint128_t res =
625 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
626 ASSERT_EQ(res, MakeUInt128(0x0268619be9b26a3cULL, 0x1876471910da19edULL));
627 }
628
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4Upper)629 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4Upper) {
630 __uint128_t arg1 = MakeUInt128(0x9420757136275167ULL, 0x4573189189456283ULL);
631 __uint128_t arg2 = MakeUInt128(0x5257044133543758ULL, 0x5753426986994725ULL);
632 __uint128_t arg3 = MakeUInt128(0x4703165661399199ULL, 0x9682628247270641ULL);
633 __uint128_t res =
634 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
635 ASSERT_EQ(res, MakeUInt128(0x2b7d4cb24d79259dULL, 0x8895afc6423a13adULL));
636 }
637
TEST(Arm64InsnTest,AsmConvertI32F32)638 TEST(Arm64InsnTest, AsmConvertI32F32) {
639 constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1");
640 ASSERT_EQ(AsmConvertI32F32(21), MakeUInt128(0x41a80000U, 0U));
641 }
642
TEST(Arm64InsnTest,AsmConvertU32F32)643 TEST(Arm64InsnTest, AsmConvertU32F32) {
644 constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1");
645
646 ASSERT_EQ(AsmConvertU32F32(29), MakeUInt128(0x41e80000U, 0U));
647
648 // Test that the topmost bit isn't treated as the sign.
649 ASSERT_EQ(AsmConvertU32F32(1U << 31), MakeUInt128(0x4f000000U, 0U));
650 }
651
TEST(Arm64InsnTest,AsmConvertU32F32FromSimdReg)652 TEST(Arm64InsnTest, AsmConvertU32F32FromSimdReg) {
653 constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1");
654
655 ASSERT_EQ(AsmUcvtf(28), MakeUInt128(0x41e00000U, 0U));
656
657 // Test that the topmost bit isn't treated as the sign.
658 ASSERT_EQ(AsmUcvtf(1U << 31), MakeUInt128(0x4f000000U, 0U));
659 }
660
TEST(Arm64InsnTest,AsmConvertI32F64)661 TEST(Arm64InsnTest, AsmConvertI32F64) {
662 constexpr auto AsmConvertI32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1");
663 ASSERT_EQ(AsmConvertI32F64(21), MakeUInt128(0x4035000000000000ULL, 0U));
664 }
665
TEST(Arm64InsnTest,AsmConvertU32F64)666 TEST(Arm64InsnTest, AsmConvertU32F64) {
667 constexpr auto AsmConvertU32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1");
668
669 ASSERT_EQ(AsmConvertU32F64(18), MakeUInt128(0x4032000000000000ULL, 0U));
670
671 // Test that the topmost bit isn't treated as the sign.
672 ASSERT_EQ(AsmConvertU32F64(1U << 31), MakeUInt128(0x41e0000000000000ULL, 0U));
673 }
674
TEST(Arm64InsnTest,AsmConvertI64F32)675 TEST(Arm64InsnTest, AsmConvertI64F32) {
676 constexpr auto AsmConvertI64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1");
677 ASSERT_EQ(AsmConvertI64F32(11), MakeUInt128(0x41300000U, 0U));
678 }
679
TEST(Arm64InsnTest,AsmConvertU64F32)680 TEST(Arm64InsnTest, AsmConvertU64F32) {
681 constexpr auto AsmConvertU64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1");
682
683 ASSERT_EQ(AsmConvertU64F32(3), MakeUInt128(0x40400000U, 0U));
684
685 // Test that the topmost bit isn't treated as the sign.
686 ASSERT_EQ(AsmConvertU64F32(1ULL << 63), MakeUInt128(0x5f000000U, 0U));
687 }
688
TEST(Arm64InsnTest,AsmConvertI64F64)689 TEST(Arm64InsnTest, AsmConvertI64F64) {
690 constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1");
691 ASSERT_EQ(AsmConvertI64F64(137), MakeUInt128(0x4061200000000000ULL, 0U));
692 }
693
TEST(Arm64InsnTest,AsmConvertI32F32FromSimdReg)694 TEST(Arm64InsnTest, AsmConvertI32F32FromSimdReg) {
695 constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1");
696 ASSERT_EQ(AsmConvertI32F32(1109), MakeUInt128(0x448aa000ULL, 0U));
697 }
698
TEST(Arm64InsnTest,AsmConvertI64F64FromSimdReg)699 TEST(Arm64InsnTest, AsmConvertI64F64FromSimdReg) {
700 constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1");
701 ASSERT_EQ(AsmConvertI64F64(123), MakeUInt128(0x405ec00000000000ULL, 0U));
702 }
703
TEST(Arm64InsnTest,AsmConvertI32x4F32x4)704 TEST(Arm64InsnTest, AsmConvertI32x4F32x4) {
705 constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s");
706 __uint128_t arg = MakeUInt128(0x0000003500000014ULL, 0x0000005400000009ULL);
707 ASSERT_EQ(AsmConvertI32F32(arg), MakeUInt128(0x4254000041a00000ULL, 0x42a8000041100000ULL));
708 }
709
TEST(Arm64InsnTest,AsmConvertI64x2F64x2)710 TEST(Arm64InsnTest, AsmConvertI64x2F64x2) {
711 constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d");
712 __uint128_t arg = MakeUInt128(static_cast<int64_t>(-9), 17U);
713 ASSERT_EQ(AsmConvertI64F64(arg), MakeUInt128(0xc022000000000000ULL, 0x4031000000000000ULL));
714 }
715
TEST(Arm64InsnTest,AsmConvertU32x4F32x4)716 TEST(Arm64InsnTest, AsmConvertU32x4F32x4) {
717 constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s");
718 __uint128_t arg = MakeUInt128(0x8000000000000019ULL, 0x0000005800000010ULL);
719 ASSERT_EQ(AsmConvertU32F32(arg), MakeUInt128(0x4f00000041c80000ULL, 0x42b0000041800000ULL));
720 }
721
TEST(Arm64InsnTest,AsmConvertU64x2F64x2)722 TEST(Arm64InsnTest, AsmConvertU64x2F64x2) {
723 constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d");
724 __uint128_t arg = MakeUInt128(1ULL << 63, 29U);
725 ASSERT_EQ(AsmConvertU64F64(arg), MakeUInt128(0x43e0000000000000ULL, 0x403d000000000000ULL));
726 }
727
TEST(Arm64InsnTest,AsmConvertU64F64)728 TEST(Arm64InsnTest, AsmConvertU64F64) {
729 constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1");
730
731 ASSERT_EQ(AsmConvertU64F64(49), MakeUInt128(0x4048800000000000ULL, 0U));
732
733 // Test that the topmost bit isn't treated as the sign.
734 ASSERT_EQ(AsmConvertU64F64(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
735 }
736
TEST(Arm64InsnTest,AsmConvertU64F64FromSimdReg)737 TEST(Arm64InsnTest, AsmConvertU64F64FromSimdReg) {
738 constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1");
739
740 ASSERT_EQ(AsmUcvtf(47), MakeUInt128(0x4047800000000000ULL, 0U));
741
742 // Test that the topmost bit isn't treated as the sign.
743 ASSERT_EQ(AsmUcvtf(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
744 }
745
TEST(Arm64InsnTest,AsmConvertLiterals)746 TEST(Arm64InsnTest, AsmConvertLiterals) {
747 // Verify that the compiler encodes the floating-point literals used in the
748 // conversion tests below exactly as expected.
749 ASSERT_EQ(bit_cast<uint32_t>(-7.50f), 0xc0f00000U);
750 ASSERT_EQ(bit_cast<uint32_t>(-6.75f), 0xc0d80000U);
751 ASSERT_EQ(bit_cast<uint32_t>(-6.50f), 0xc0d00000U);
752 ASSERT_EQ(bit_cast<uint32_t>(-6.25f), 0xc0c80000U);
753 ASSERT_EQ(bit_cast<uint32_t>(6.25f), 0x40c80000U);
754 ASSERT_EQ(bit_cast<uint32_t>(6.50f), 0x40d00000U);
755 ASSERT_EQ(bit_cast<uint32_t>(6.75f), 0x40d80000U);
756 ASSERT_EQ(bit_cast<uint32_t>(7.50f), 0x40f00000U);
757
758 ASSERT_EQ(bit_cast<uint64_t>(-7.50), 0xc01e000000000000ULL);
759 ASSERT_EQ(bit_cast<uint64_t>(-6.75), 0xc01b000000000000ULL);
760 ASSERT_EQ(bit_cast<uint64_t>(-6.50), 0xc01a000000000000ULL);
761 ASSERT_EQ(bit_cast<uint64_t>(-6.25), 0xc019000000000000ULL);
762 ASSERT_EQ(bit_cast<uint64_t>(6.25), 0x4019000000000000ULL);
763 ASSERT_EQ(bit_cast<uint64_t>(6.50), 0x401a000000000000ULL);
764 ASSERT_EQ(bit_cast<uint64_t>(6.75), 0x401b000000000000ULL);
765 ASSERT_EQ(bit_cast<uint64_t>(7.50), 0x401e000000000000ULL);
766 }
767
768 template <typename IntType, typename FuncType>
TestConvertF32ToInt(FuncType AsmFunc,std::initializer_list<int> expected)769 void TestConvertF32ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
770 // Note that bit_cast isn't a constexpr.
771 static const uint32_t kConvertF32ToIntInputs[] = {
772 bit_cast<uint32_t>(-7.50f),
773 bit_cast<uint32_t>(-6.75f),
774 bit_cast<uint32_t>(-6.50f),
775 bit_cast<uint32_t>(-6.25f),
776 bit_cast<uint32_t>(6.25f),
777 bit_cast<uint32_t>(6.50f),
778 bit_cast<uint32_t>(6.75f),
779 bit_cast<uint32_t>(7.50f),
780 };
781
782 const size_t kConvertF32ToIntInputsSize = sizeof(kConvertF32ToIntInputs) / sizeof(uint32_t);
783 ASSERT_EQ(kConvertF32ToIntInputsSize, expected.size());
784
785 auto expected_it = expected.begin();
786 for (size_t input_it = 0; input_it < kConvertF32ToIntInputsSize; input_it++) {
787 ASSERT_EQ(AsmFunc(kConvertF32ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
788 }
789 }
790
TEST(Arm64InsnTest,AsmConvertF32I32TieAway)791 TEST(Arm64InsnTest, AsmConvertF32I32TieAway) {
792 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %s1");
793 TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
794 }
795
TEST(Arm64InsnTest,AsmConvertF32U32TieAway)796 TEST(Arm64InsnTest, AsmConvertF32U32TieAway) {
797 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %s1");
798 TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
799 }
800
TEST(Arm64InsnTest,AsmConvertF32I32NegInf)801 TEST(Arm64InsnTest, AsmConvertF32I32NegInf) {
802 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %s1");
803 TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
804 }
805
TEST(Arm64InsnTest,AsmConvertF32U32NegInf)806 TEST(Arm64InsnTest, AsmConvertF32U32NegInf) {
807 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %s1");
808 TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
809 }
810
TEST(Arm64InsnTest,AsmConvertF32I32TieEven)811 TEST(Arm64InsnTest, AsmConvertF32I32TieEven) {
812 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %s1");
813 TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
814 }
815
TEST(Arm64InsnTest,AsmConvertF32U32TieEven)816 TEST(Arm64InsnTest, AsmConvertF32U32TieEven) {
817 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %s1");
818 TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
819 }
820
TEST(Arm64InsnTest,AsmConvertF32I32PosInf)821 TEST(Arm64InsnTest, AsmConvertF32I32PosInf) {
822 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %s1");
823 TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
824 }
825
TEST(Arm64InsnTest,AsmConvertF32U32PosInf)826 TEST(Arm64InsnTest, AsmConvertF32U32PosInf) {
827 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %s1");
828 TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
829 }
830
TEST(Arm64InsnTest,AsmConvertF32I32Truncate)831 TEST(Arm64InsnTest, AsmConvertF32I32Truncate) {
832 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1");
833 TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
834 }
835
TEST(Arm64InsnTest,AsmConvertF32U32Truncate)836 TEST(Arm64InsnTest, AsmConvertF32U32Truncate) {
837 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %s1");
838 TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
839 }
840
TEST(Arm64InsnTest,AsmConvertF32I64TieAway)841 TEST(Arm64InsnTest, AsmConvertF32I64TieAway) {
842 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %s1");
843 TestConvertF32ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
844 }
845
TEST(Arm64InsnTest,AsmConvertF32U64TieAway)846 TEST(Arm64InsnTest, AsmConvertF32U64TieAway) {
847 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %s1");
848 TestConvertF32ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
849 }
850
TEST(Arm64InsnTest,AsmConvertF32I64NegInf)851 TEST(Arm64InsnTest, AsmConvertF32I64NegInf) {
852 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %s1");
853 TestConvertF32ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
854 }
855
TEST(Arm64InsnTest,AsmConvertF32U64NegInf)856 TEST(Arm64InsnTest, AsmConvertF32U64NegInf) {
857 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %s1");
858 TestConvertF32ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
859 }
860
TEST(Arm64InsnTest,AsmConvertF32I64TieEven)861 TEST(Arm64InsnTest, AsmConvertF32I64TieEven) {
862 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %s1");
863 TestConvertF32ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
864 }
865
TEST(Arm64InsnTest,AsmConvertF32U64TieEven)866 TEST(Arm64InsnTest, AsmConvertF32U64TieEven) {
867 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %s1");
868 TestConvertF32ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
869 }
870
TEST(Arm64InsnTest,AsmConvertF32I64PosInf)871 TEST(Arm64InsnTest, AsmConvertF32I64PosInf) {
872 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %s1");
873 TestConvertF32ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
874 }
875
TEST(Arm64InsnTest,AsmConvertF32U64PosInf)876 TEST(Arm64InsnTest, AsmConvertF32U64PosInf) {
877 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %s1");
878 TestConvertF32ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
879 }
880
TEST(Arm64InsnTest,AsmConvertF32I64Truncate)881 TEST(Arm64InsnTest, AsmConvertF32I64Truncate) {
882 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1");
883 TestConvertF32ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
884 }
885
TEST(Arm64InsnTest,AsmConvertF32U64Truncate)886 TEST(Arm64InsnTest, AsmConvertF32U64Truncate) {
887 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1");
888 TestConvertF32ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
889 }
890
891 template <typename IntType, typename FuncType>
TestConvertF64ToInt(FuncType AsmFunc,std::initializer_list<int> expected)892 void TestConvertF64ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
893 // Note that bit_cast isn't a constexpr.
894 static const uint64_t kConvertF64ToIntInputs[] = {
895 bit_cast<uint64_t>(-7.50),
896 bit_cast<uint64_t>(-6.75),
897 bit_cast<uint64_t>(-6.50),
898 bit_cast<uint64_t>(-6.25),
899 bit_cast<uint64_t>(6.25),
900 bit_cast<uint64_t>(6.50),
901 bit_cast<uint64_t>(6.75),
902 bit_cast<uint64_t>(7.50),
903 };
904
905 const size_t kConvertF64ToIntInputsSize = sizeof(kConvertF64ToIntInputs) / sizeof(uint64_t);
906 ASSERT_EQ(kConvertF64ToIntInputsSize, expected.size());
907
908 auto expected_it = expected.begin();
909 for (size_t input_it = 0; input_it < kConvertF64ToIntInputsSize; input_it++) {
910 ASSERT_EQ(AsmFunc(kConvertF64ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
911 }
912 }
913
TEST(Arm64InsnTest,AsmConvertF64I32TieAway)914 TEST(Arm64InsnTest, AsmConvertF64I32TieAway) {
915 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %d1");
916 TestConvertF64ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
917 }
918
TEST(Arm64InsnTest,AsmConvertF64U32TieAway)919 TEST(Arm64InsnTest, AsmConvertF64U32TieAway) {
920 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %d1");
921 TestConvertF64ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
922 }
923
TEST(Arm64InsnTest,AsmConvertF64I32NegInf)924 TEST(Arm64InsnTest, AsmConvertF64I32NegInf) {
925 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %d1");
926 TestConvertF64ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
927 }
928
TEST(Arm64InsnTest,AsmConvertF64U32NegInf)929 TEST(Arm64InsnTest, AsmConvertF64U32NegInf) {
930 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %d1");
931 TestConvertF64ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
932 }
933
TEST(Arm64InsnTest,AsmConvertF64I32TieEven)934 TEST(Arm64InsnTest, AsmConvertF64I32TieEven) {
935 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %d1");
936 TestConvertF64ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
937 }
938
TEST(Arm64InsnTest,AsmConvertF64U32TieEven)939 TEST(Arm64InsnTest, AsmConvertF64U32TieEven) {
940 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %d1");
941 TestConvertF64ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
942 }
943
TEST(Arm64InsnTest,AsmConvertF64I32PosInf)944 TEST(Arm64InsnTest, AsmConvertF64I32PosInf) {
945 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %d1");
946 TestConvertF64ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
947 }
948
TEST(Arm64InsnTest,AsmConvertF64U32PosInf)949 TEST(Arm64InsnTest, AsmConvertF64U32PosInf) {
950 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %d1");
951 TestConvertF64ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
952 }
953
TEST(Arm64InsnTest,AsmConvertF64I32Truncate)954 TEST(Arm64InsnTest, AsmConvertF64I32Truncate) {
955 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1");
956 TestConvertF64ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
957 }
958
TEST(Arm64InsnTest,AsmConvertF64U32Truncate)959 TEST(Arm64InsnTest, AsmConvertF64U32Truncate) {
960 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1");
961 TestConvertF64ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
962 }
963
TEST(Arm64InsnTest,AsmConvertF64I64TieAway)964 TEST(Arm64InsnTest, AsmConvertF64I64TieAway) {
965 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %d1");
966 TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
967 }
968
TEST(Arm64InsnTest,AsmConvertF64U64TieAway)969 TEST(Arm64InsnTest, AsmConvertF64U64TieAway) {
970 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %d1");
971 TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
972 }
973
TEST(Arm64InsnTest,AsmConvertF64I64NegInf)974 TEST(Arm64InsnTest, AsmConvertF64I64NegInf) {
975 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %d1");
976 TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
977 }
978
TEST(Arm64InsnTest,AsmConvertF64U64NegInf)979 TEST(Arm64InsnTest, AsmConvertF64U64NegInf) {
980 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %d1");
981 TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
982 }
983
TEST(Arm64InsnTest,AsmConvertF64I64TieEven)984 TEST(Arm64InsnTest, AsmConvertF64I64TieEven) {
985 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %d1");
986 TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
987 }
988
TEST(Arm64InsnTest,AsmConvertF64U64TieEven)989 TEST(Arm64InsnTest, AsmConvertF64U64TieEven) {
990 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %d1");
991 TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
992 }
993
TEST(Arm64InsnTest,AsmConvertF64I64PosInf)994 TEST(Arm64InsnTest, AsmConvertF64I64PosInf) {
995 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %d1");
996 TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
997 }
998
TEST(Arm64InsnTest,AsmConvertF64U64PosInf)999 TEST(Arm64InsnTest, AsmConvertF64U64PosInf) {
1000 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %d1");
1001 TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1002 }
1003
TEST(Arm64InsnTest,AsmConvertF64I64Truncate)1004 TEST(Arm64InsnTest, AsmConvertF64I64Truncate) {
1005 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1");
1006 TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1007 }
1008
TEST(Arm64InsnTest,AsmConvertF64U64Truncate)1009 TEST(Arm64InsnTest, AsmConvertF64U64Truncate) {
1010 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1");
1011 TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1012 }
1013
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieAway)1014 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieAway) {
1015 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %s0, %s1");
1016 TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1017 }
1018
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieAway)1019 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieAway) {
1020 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %s0, %s1");
1021 TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1022 }
1023
TEST(Arm64InsnTest,AsmConvertF32I32ScalarNegInf)1024 TEST(Arm64InsnTest, AsmConvertF32I32ScalarNegInf) {
1025 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %s0, %s1");
1026 TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1027 }
1028
TEST(Arm64InsnTest,AsmConvertF32U32ScalarNegInf)1029 TEST(Arm64InsnTest, AsmConvertF32U32ScalarNegInf) {
1030 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %s0, %s1");
1031 TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1032 }
1033
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieEven)1034 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieEven) {
1035 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %s0, %s1");
1036 TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1037 }
1038
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieEven)1039 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieEven) {
1040 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %s0, %s1");
1041 TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1042 }
1043
TEST(Arm64InsnTest,AsmConvertF32I32ScalarPosInf)1044 TEST(Arm64InsnTest, AsmConvertF32I32ScalarPosInf) {
1045 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %s0, %s1");
1046 TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1047 }
1048
TEST(Arm64InsnTest,AsmConvertF32U32ScalarPosInf)1049 TEST(Arm64InsnTest, AsmConvertF32U32ScalarPosInf) {
1050 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %s0, %s1");
1051 TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1052 }
1053
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTruncate)1054 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTruncate) {
1055 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %s0, %s1");
1056 TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1057 }
1058
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTruncate)1059 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTruncate) {
1060 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %s0, %s1");
1061 TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1062 }
1063
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieAway)1064 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieAway) {
1065 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %d0, %d1");
1066 TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1067 }
1068
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieAway)1069 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieAway) {
1070 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %d0, %d1");
1071 TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1072 }
1073
TEST(Arm64InsnTest,AsmConvertF64I64ScalarNegInf)1074 TEST(Arm64InsnTest, AsmConvertF64I64ScalarNegInf) {
1075 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %d0, %d1");
1076 TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1077 }
1078
TEST(Arm64InsnTest,AsmConvertF64U64ScalarNegInf)1079 TEST(Arm64InsnTest, AsmConvertF64U64ScalarNegInf) {
1080 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %d0, %d1");
1081 TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1082 }
1083
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieEven)1084 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieEven) {
1085 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %d0, %d1");
1086 TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1087 }
1088
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieEven)1089 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieEven) {
1090 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %d0, %d1");
1091 TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1092 }
1093
TEST(Arm64InsnTest,AsmConvertF64I64ScalarPosInf)1094 TEST(Arm64InsnTest, AsmConvertF64I64ScalarPosInf) {
1095 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %d0, %d1");
1096 TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1097 }
1098
TEST(Arm64InsnTest,AsmConvertF64U64ScalarPosInf)1099 TEST(Arm64InsnTest, AsmConvertF64U64ScalarPosInf) {
1100 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %d0, %d1");
1101 TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1102 }
1103
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTruncate)1104 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTruncate) {
1105 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %d0, %d1");
1106 TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1107 }
1108
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTruncate)1109 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTruncate) {
1110 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %d0, %d1");
1111 TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1112 }
1113
TEST(Arm64InsnTest,AsmConvertF32I32x4TieAway)1114 TEST(Arm64InsnTest, AsmConvertF32I32x4TieAway) {
1115 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.4s, %1.4s");
1116 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1117 ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffff9ULL));
1118 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1119 ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1120 }
1121
TEST(Arm64InsnTest,AsmConvertF32U32x4TieAway)1122 TEST(Arm64InsnTest, AsmConvertF32U32x4TieAway) {
1123 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.4s, %1.4s");
1124 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1125 ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1126 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1127 ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1128 }
1129
TEST(Arm64InsnTest,AsmConvertF32I32x4NegInf)1130 TEST(Arm64InsnTest, AsmConvertF32I32x4NegInf) {
1131 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.4s, %1.4s");
1132 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1133 ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffff9fffffff9ULL));
1134 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1135 ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1136 }
1137
TEST(Arm64InsnTest,AsmConvertF32U32x4NegInf)1138 TEST(Arm64InsnTest, AsmConvertF32U32x4NegInf) {
1139 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.4s, %1.4s");
1140 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1141 ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1142 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1143 ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1144 }
1145
TEST(Arm64InsnTest,AsmConvertF32I32x4TieEven)1146 TEST(Arm64InsnTest, AsmConvertF32I32x4TieEven) {
1147 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.4s, %1.4s");
1148 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1149 ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffffaULL));
1150 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1151 ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1152 }
1153
TEST(Arm64InsnTest,AsmConvertF32U32x4TieEven)1154 TEST(Arm64InsnTest, AsmConvertF32U32x4TieEven) {
1155 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.4s, %1.4s");
1156 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1157 ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1158 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1159 ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1160 }
1161
TEST(Arm64InsnTest,AsmConvertF32I32x4PosInf)1162 TEST(Arm64InsnTest, AsmConvertF32I32x4PosInf) {
1163 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.4s, %1.4s");
1164 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1165 ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1166 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1167 ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1168 }
1169
TEST(Arm64InsnTest,AsmConvertF32U32x4PosInf)1170 TEST(Arm64InsnTest, AsmConvertF32U32x4PosInf) {
1171 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.4s, %1.4s");
1172 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1173 ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1174 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1175 ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1176 }
1177
TEST(Arm64InsnTest,AsmConvertF32I32x4Truncate)1178 TEST(Arm64InsnTest, AsmConvertF32I32x4Truncate) {
1179 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s");
1180 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1181 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1182 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1183 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1184 }
1185
TEST(Arm64InsnTest,AsmConvertF32U32x4Truncate)1186 TEST(Arm64InsnTest, AsmConvertF32U32x4Truncate) {
1187 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s");
1188 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1189 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1190 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1191 ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1192 }
1193
TEST(Arm64InsnTest,AsmConvertF64I64x4TieAway)1194 TEST(Arm64InsnTest, AsmConvertF64I64x4TieAway) {
1195 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.2d, %1.2d");
1196 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1197 ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1198 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1199 ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1200 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1201 ASSERT_EQ(AsmFcvtas(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1202 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1203 ASSERT_EQ(AsmFcvtas(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1204 }
1205
TEST(Arm64InsnTest,AsmConvertF64U64x4TieAway)1206 TEST(Arm64InsnTest, AsmConvertF64U64x4TieAway) {
1207 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.2d, %1.2d");
1208 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1209 ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1210 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1211 ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1212 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1213 ASSERT_EQ(AsmFcvtau(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1214 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1215 ASSERT_EQ(AsmFcvtau(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1216 }
1217
TEST(Arm64InsnTest,AsmConvertF64I64x4NegInf)1218 TEST(Arm64InsnTest, AsmConvertF64I64x4NegInf) {
1219 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.2d, %1.2d");
1220 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1221 ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1222 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1223 ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffff9ULL));
1224 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1225 ASSERT_EQ(AsmFcvtms(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1226 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1227 ASSERT_EQ(AsmFcvtms(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1228 }
1229
TEST(Arm64InsnTest,AsmConvertF64U64x4NegInf)1230 TEST(Arm64InsnTest, AsmConvertF64U64x4NegInf) {
1231 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.2d, %1.2d");
1232 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1233 ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1234 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1235 ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1236 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1237 ASSERT_EQ(AsmFcvtmu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1238 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1239 ASSERT_EQ(AsmFcvtmu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1240 }
1241
TEST(Arm64InsnTest,AsmConvertF64I64x4TieEven)1242 TEST(Arm64InsnTest, AsmConvertF64I64x4TieEven) {
1243 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.2d, %1.2d");
1244 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1245 ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1246 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1247 ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1248 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1249 ASSERT_EQ(AsmFcvtns(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1250 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1251 ASSERT_EQ(AsmFcvtns(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1252 }
1253
TEST(Arm64InsnTest,AsmConvertF64U64x4TieEven)1254 TEST(Arm64InsnTest, AsmConvertF64U64x4TieEven) {
1255 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.2d, %1.2d");
1256 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1257 ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1258 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1259 ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1260 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1261 ASSERT_EQ(AsmFcvtnu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1262 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1263 ASSERT_EQ(AsmFcvtnu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1264 }
1265
TEST(Arm64InsnTest,AsmConvertF64I64x4PosInf)1266 TEST(Arm64InsnTest, AsmConvertF64I64x4PosInf) {
1267 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.2d, %1.2d");
1268 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1269 ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1270 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1271 ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1272 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1273 ASSERT_EQ(AsmFcvtps(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1274 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1275 ASSERT_EQ(AsmFcvtps(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1276 }
1277
TEST(Arm64InsnTest,AsmConvertF64U64x4PosInf)1278 TEST(Arm64InsnTest, AsmConvertF64U64x4PosInf) {
1279 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.2d, %1.2d");
1280 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1281 ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1282 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1283 ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1284 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1285 ASSERT_EQ(AsmFcvtpu(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1286 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1287 ASSERT_EQ(AsmFcvtpu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1288 }
1289
TEST(Arm64InsnTest,AsmConvertF64I64x4Truncate)1290 TEST(Arm64InsnTest, AsmConvertF64I64x4Truncate) {
1291 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.2d, %1.2d");
1292 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1293 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1294 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1295 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1296 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1297 ASSERT_EQ(AsmFcvtzs(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1298 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1299 ASSERT_EQ(AsmFcvtzs(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1300 }
1301
TEST(Arm64InsnTest,AsmConvertF64U64x4Truncate)1302 TEST(Arm64InsnTest, AsmConvertF64U64x4Truncate) {
1303 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.2d, %1.2d");
1304 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1305 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1306 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1307 ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1308 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1309 ASSERT_EQ(AsmFcvtzu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1310 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1311 ASSERT_EQ(AsmFcvtzu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1312 }
1313
TEST(Arm64InsnTest,AsmConvertX32F32Scalar)1314 TEST(Arm64InsnTest, AsmConvertX32F32Scalar) {
1315 constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1, #7");
1316
1317 ASSERT_EQ(AsmConvertX32F32(0x610), MakeUInt128(0x41420000ULL, 0U));
1318
1319 ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1320 }
1321
TEST(Arm64InsnTest,AsmConvertX32F64Scalar)1322 TEST(Arm64InsnTest, AsmConvertX32F64Scalar) {
1323 constexpr auto AsmConvertX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1, #8");
1324
1325 ASSERT_EQ(AsmConvertX32F64(0x487), MakeUInt128(0x40121c0000000000ULL, 0U));
1326
1327 ASSERT_EQ(AsmConvertX32F64(1 << 31), MakeUInt128(0xc160000000000000ULL, 0U));
1328 }
1329
TEST(Arm64InsnTest,AsmConvertX32F32)1330 TEST(Arm64InsnTest, AsmConvertX32F32) {
1331 constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1, #7");
1332
1333 ASSERT_EQ(AsmConvertX32F32(0x123), MakeUInt128(0x40118000ULL, 0U));
1334
1335 ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1336 }
1337
TEST(Arm64InsnTest,AsmConvertX32x4F32x4)1338 TEST(Arm64InsnTest, AsmConvertX32x4F32x4) {
1339 constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s, #11");
1340 __uint128_t arg = MakeUInt128(0x80000000ffff9852ULL, 0x0000110200001254ULL);
1341 ASSERT_EQ(AsmConvertX32F32(arg), MakeUInt128(0xc9800000c14f5c00ULL, 0x400810004012a000ULL));
1342 }
1343
TEST(Arm64InsnTest,AsmConvertUX32F32Scalar)1344 TEST(Arm64InsnTest, AsmConvertUX32F32Scalar) {
1345 constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1, #7");
1346
1347 ASSERT_EQ(AsmConvertUX32F32(0x857), MakeUInt128(0x41857000ULL, 0U));
1348
1349 ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1350
1351 // Test the default rounding behavior (FPRounding_TIEEVEN).
1352 ASSERT_EQ(AsmConvertUX32F32(0x80000080), MakeUInt128(0x4b800000ULL, 0U));
1353 ASSERT_EQ(AsmConvertUX32F32(0x800000c0), MakeUInt128(0x4b800001ULL, 0U));
1354 ASSERT_EQ(AsmConvertUX32F32(0x80000140), MakeUInt128(0x4b800001ULL, 0U));
1355 ASSERT_EQ(AsmConvertUX32F32(0x80000180), MakeUInt128(0x4b800002ULL, 0U));
1356 }
1357
TEST(Arm64InsnTest,AsmConvertUX32F64Scalar)1358 TEST(Arm64InsnTest, AsmConvertUX32F64Scalar) {
1359 constexpr auto AsmConvertUX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1, #8");
1360
1361 ASSERT_EQ(AsmConvertUX32F64(0x361), MakeUInt128(0x400b080000000000ULL, 0U));
1362
1363 ASSERT_EQ(AsmConvertUX32F64(1U << 31), MakeUInt128(0x4160000000000000ULL, 0U));
1364 }
1365
TEST(Arm64InsnTest,AsmConvertUX32F32)1366 TEST(Arm64InsnTest, AsmConvertUX32F32) {
1367 constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1, #7");
1368
1369 ASSERT_EQ(AsmConvertUX32F32(0x456), MakeUInt128(0x410ac000ULL, 0U));
1370
1371 ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1372 }
1373
TEST(Arm64InsnTest,AsmConvertUX32x4F32x4)1374 TEST(Arm64InsnTest, AsmConvertUX32x4F32x4) {
1375 constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s, #11");
1376 __uint128_t arg = MakeUInt128(0x8000000000008023ULL, 0x0000201800001956ULL);
1377 ASSERT_EQ(AsmConvertUX32F32(arg), MakeUInt128(0x4980000041802300ULL, 0x40806000404ab000ULL));
1378 }
1379
TEST(Arm64InsnTest,AsmConvertX64F32Scalar)1380 TEST(Arm64InsnTest, AsmConvertX64F32Scalar) {
1381 constexpr auto AsmConvertX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1, #10");
1382
1383 ASSERT_EQ(AsmConvertX64F32(0x2234), MakeUInt128(0x4108d000ULL, 0U));
1384 }
1385
TEST(Arm64InsnTest,AsmConvertX64F64Scalar)1386 TEST(Arm64InsnTest, AsmConvertX64F64Scalar) {
1387 constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1, #10");
1388
1389 ASSERT_EQ(AsmConvertX64F64(0x1324), MakeUInt128(0x4013240000000000ULL, 0U));
1390 }
1391
TEST(Arm64InsnTest,AsmConvertUX64F32Scalar)1392 TEST(Arm64InsnTest, AsmConvertUX64F32Scalar) {
1393 constexpr auto AsmConvertUX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1, #10");
1394
1395 ASSERT_EQ(AsmConvertUX64F32(0x5763), MakeUInt128(0x41aec600ULL, 0U));
1396 }
1397
TEST(Arm64InsnTest,AsmConvertUX64F64Scalar)1398 TEST(Arm64InsnTest, AsmConvertUX64F64Scalar) {
1399 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1, #10");
1400
1401 ASSERT_EQ(AsmConvertUX64F64(0x2217), MakeUInt128(0x40210b8000000000ULL, 0U));
1402 }
1403
TEST(Arm64InsnTest,AsmConvertX64F64)1404 TEST(Arm64InsnTest, AsmConvertX64F64) {
1405 constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1, #12");
1406
1407 ASSERT_EQ(AsmConvertX64F64(0x723), MakeUInt128(0x3fdc8c0000000000ULL, 0U));
1408
1409 ASSERT_EQ(AsmConvertX64F64(1ULL << 63), MakeUInt128(0xc320000000000000ULL, 0U));
1410 }
1411
TEST(Arm64InsnTest,AsmConvertUX64F64)1412 TEST(Arm64InsnTest, AsmConvertUX64F64) {
1413 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #12");
1414
1415 ASSERT_EQ(AsmConvertUX64F64(0x416), MakeUInt128(0x3fd0580000000000ULL, 0U));
1416
1417 ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x4320000000000000ULL, 0U));
1418 }
1419
TEST(Arm64InsnTest,AsmConvertUX64F64With64BitFraction)1420 TEST(Arm64InsnTest, AsmConvertUX64F64With64BitFraction) {
1421 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #64");
1422
1423 ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x3fe0'0000'0000'0000ULL, 0U));
1424 }
1425
TEST(Arm64InsnTest,AsmConvertX64x2F64x2)1426 TEST(Arm64InsnTest, AsmConvertX64x2F64x2) {
1427 constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d, #12");
1428 __uint128_t arg = MakeUInt128(1ULL << 63, 0x8086U);
1429 ASSERT_EQ(AsmConvertX64F64(arg), MakeUInt128(0xc320000000000000ULL, 0x402010c000000000ULL));
1430 }
1431
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2)1432 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2) {
1433 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #12");
1434 __uint128_t arg = MakeUInt128(1ULL << 63, 0x6809U);
1435 ASSERT_EQ(AsmConvertUX64F64(arg), MakeUInt128(0x4320000000000000ULL, 0x401a024000000000ULL));
1436 }
1437
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2With64BitFraction)1438 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2With64BitFraction) {
1439 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #64");
1440 __uint128_t arg = MakeUInt128(0x7874'211c'b7aa'f597ULL, 0x2c0f'5504'd25e'f673ULL);
1441 ASSERT_EQ(AsmConvertUX64F64(arg),
1442 MakeUInt128(0x3fde'1d08'472d'eabdULL, 0x3fc6'07aa'8269'2f7bULL));
1443 }
1444
TEST(Arm64InsnTest,AsmConvertF32X32Scalar)1445 TEST(Arm64InsnTest, AsmConvertF32X32Scalar) {
1446 constexpr auto AsmConvertF32X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1447 uint32_t arg1 = 0x4091eb85U; // 4.56 in float
1448 ASSERT_EQ(AsmConvertF32X32(arg1), MakeUInt128(0x00048f5cU, 0U));
1449
1450 uint32_t arg2 = 0xc0d80000U; // -6.75 in float
1451 ASSERT_EQ(AsmConvertF32X32(arg2), MakeUInt128(0xfff94000U, 0U));
1452
1453 ASSERT_EQ(AsmConvertF32X32(kDefaultNaN32), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1454 }
1455
TEST(Arm64InsnTest,AsmConvertF32UX32Scalar)1456 TEST(Arm64InsnTest, AsmConvertF32UX32Scalar) {
1457 constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1458 uint32_t arg1 = 0x41223d71U; // 10.14 in float
1459 ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x000a23d7U, 0U));
1460
1461 uint32_t arg2 = 0xc1540000U; // -13.25 in float
1462 ASSERT_EQ(AsmConvertF32UX32(arg2), MakeUInt128(0xfff2c000U, 0U));
1463
1464 ASSERT_EQ(AsmConvertF32UX32(kDefaultNaN32), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1465 }
1466
TEST(Arm64InsnTest,AsmConvertF32UX32With31FractionalBits)1467 TEST(Arm64InsnTest, AsmConvertF32UX32With31FractionalBits) {
1468 constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #31");
1469 uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1470 ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x20000000U, 0U));
1471 }
1472
TEST(Arm64InsnTest,AsmConvertF64X32Scalar)1473 TEST(Arm64InsnTest, AsmConvertF64X32Scalar) {
1474 constexpr auto AsmConvertF64X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1, #16");
1475 uint64_t arg1 = 0x401e8f5c28f5c28fULL; // 7.46 in double
1476 ASSERT_EQ(AsmConvertF64X32(arg1), MakeUInt128(0x0007a3d7U, 0U));
1477
1478 uint64_t arg2 = 0xc040200000000000ULL; // -32.44 in double
1479 ASSERT_EQ(AsmConvertF64X32(arg2), MakeUInt128(0xffdfc000U, 0U));
1480 }
1481
TEST(Arm64InsnTest,AsmConvertF32X64Scalar)1482 TEST(Arm64InsnTest, AsmConvertF32X64Scalar) {
1483 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #16");
1484 uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1485 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1486
1487 uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1488 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1489 }
1490
TEST(Arm64InsnTest,AsmConvertF32UX64With63FractionalBits)1491 TEST(Arm64InsnTest, AsmConvertF32UX64With63FractionalBits) {
1492 constexpr auto AsmConvertF32UX64 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #63");
1493 uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1494 ASSERT_EQ(AsmConvertF32UX64(arg1), MakeUInt128(0x20000000'00000000ULL, 0U));
1495 }
1496
TEST(Arm64InsnTest,AsmConvertF64X64Scalar)1497 TEST(Arm64InsnTest, AsmConvertF64X64Scalar) {
1498 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1, #16");
1499 uint64_t arg1 = bit_cast<uint64_t>(7.50);
1500 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1501
1502 uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1503 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1504 }
1505
TEST(Arm64InsnTest,AsmConvertF32X32x4)1506 TEST(Arm64InsnTest, AsmConvertF32X32x4) {
1507 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s, #2");
1508 __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1509 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffeaULL, 0x0000001a00000000ULL));
1510 }
1511
TEST(Arm64InsnTest,AsmConvertF64UX32Scalar)1512 TEST(Arm64InsnTest, AsmConvertF64UX32Scalar) {
1513 constexpr auto AsmConvertF64UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1, #16");
1514 uint64_t arg1 = 0x4020947ae147ae14ULL; // 8.29 in double
1515 ASSERT_EQ(AsmConvertF64UX32(arg1), MakeUInt128(0x00084a3dU, 0U));
1516
1517 uint64_t arg2 = 0xc023666666666666ULL; // -9.70 in double
1518 ASSERT_EQ(AsmConvertF64UX32(arg2), MakeUInt128(0U, 0U));
1519 }
1520
TEST(Arm64InsnTest,AsmConvertF32UX64Scalar)1521 TEST(Arm64InsnTest, AsmConvertF32UX64Scalar) {
1522 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1, #16");
1523 uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1524 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1525 uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1526 ASSERT_EQ(AsmFcvtzu(arg2), 0ULL);
1527 }
1528
TEST(Arm64InsnTest,AsmConvertF64UX64Scalar)1529 TEST(Arm64InsnTest, AsmConvertF64UX64Scalar) {
1530 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #16");
1531 uint64_t arg1 = bit_cast<uint64_t>(7.50);
1532 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1533
1534 uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1535 ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0ULL, 0ULL));
1536 }
1537
TEST(Arm64InsnTest,AsmConvertF64UX64ScalarWith64BitFraction)1538 TEST(Arm64InsnTest, AsmConvertF64UX64ScalarWith64BitFraction) {
1539 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #64");
1540 uint64_t arg = bit_cast<uint64_t>(0.625);
1541 ASSERT_EQ(AsmFcvtzu(arg), MakeUInt128(0xa000'0000'0000'0000ULL, 0ULL));
1542 }
1543
TEST(Arm64InsnTest,AsmConvertF32UX32x4)1544 TEST(Arm64InsnTest, AsmConvertF32UX32x4) {
1545 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s, #2");
1546 __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1547 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000001a00000000ULL));
1548 }
1549
TEST(Arm64InsnTest,Fp32ConditionalSelect)1550 TEST(Arm64InsnTest, Fp32ConditionalSelect) {
1551 uint64_t int_arg1 = 3;
1552 uint64_t int_arg2 = 7;
1553 uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1554 uint64_t fp_arg2 = 0x0123456789abcdefULL;
1555 __uint128_t res;
1556
1557 asm("cmp %x1,%x2\n\t"
1558 "fcsel %s0, %s3, %s4, eq"
1559 : "=w"(res)
1560 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1561 ASSERT_EQ(res, MakeUInt128(0x89abcdefULL, 0U));
1562
1563 asm("cmp %x1,%x2\n\t"
1564 "fcsel %s0, %s3, %s4, ne"
1565 : "=w"(res)
1566 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1567 ASSERT_EQ(res, MakeUInt128(0x76543210ULL, 0U));
1568 }
1569
TEST(Arm64InsnTest,Fp64ConditionalSelect)1570 TEST(Arm64InsnTest, Fp64ConditionalSelect) {
1571 uint64_t int_arg1 = 8;
1572 uint64_t int_arg2 = 3;
1573 uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1574 uint64_t fp_arg2 = 0x0123456789abcdefULL;
1575 __uint128_t res;
1576
1577 asm("cmp %x1,%x2\n\t"
1578 "fcsel %d0, %d3, %d4, eq"
1579 : "=w"(res)
1580 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1581 ASSERT_EQ(res, MakeUInt128(0x0123456789abcdefULL, 0U));
1582
1583 asm("cmp %x1,%x2\n\t"
1584 "fcsel %d0, %d3, %d4, ne"
1585 : "=w"(res)
1586 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1587 ASSERT_EQ(res, MakeUInt128(0xfedcba9876543210ULL, 0U));
1588 }
1589
TEST(Arm64InsnTest,RoundUpFp32)1590 TEST(Arm64InsnTest, RoundUpFp32) {
1591 // The lower 32-bit represents 2.7182817 in float.
1592 uint64_t fp_arg = 0xdeadbeef402df854ULL;
1593 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %s0, %s1")(fp_arg);
1594 ASSERT_EQ(res, MakeUInt128(0x40400000ULL, 0U)); // 3.0 in float
1595 }
1596
TEST(Arm64InsnTest,RoundUpFp64)1597 TEST(Arm64InsnTest, RoundUpFp64) {
1598 // 2.7182817 in double.
1599 uint64_t fp_arg = 0x4005BF0A8B145769ULL;
1600 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %d0, %d1")(fp_arg);
1601 ASSERT_EQ(res, MakeUInt128(0x4008000000000000ULL, 0U)); // 3.0 in double
1602 }
1603
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp64)1604 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp64) {
1605 constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %d0, %d1");
1606
1607 // -7.50 -> -8.00 (ties away from zero as opposted to even)
1608 ASSERT_EQ(AsmFrinta(0xc01E000000000000ULL), MakeUInt128(0xc020000000000000ULL, 0U));
1609
1610 // -6.75 -> -7.00
1611 ASSERT_EQ(AsmFrinta(0xc01B000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1612
1613 // -6.50 -> -7.00 (ties away from zero as opposted to even)
1614 ASSERT_EQ(AsmFrinta(0xc01A000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1615
1616 // -6.25 -> -6.00
1617 ASSERT_EQ(AsmFrinta(0xc019000000000000ULL), MakeUInt128(0xc018000000000000ULL, 0U));
1618
1619 // 6.25 -> 6.00
1620 ASSERT_EQ(AsmFrinta(0x4019000000000000ULL), MakeUInt128(0x4018000000000000ULL, 0U));
1621
1622 // 6.50 -> 7.00 (ties away from zero as opposted to even)
1623 ASSERT_EQ(AsmFrinta(0x401A000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1624
1625 // 6.75 -> 7.00
1626 ASSERT_EQ(AsmFrinta(0x401B000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1627
1628 // 7.50 -> 8.00 (ties away from zero as opposted to even)
1629 ASSERT_EQ(AsmFrinta(0x401E000000000000ULL), MakeUInt128(0x4020000000000000ULL, 0U));
1630
1631 // -0.49999999999999994 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1632 ASSERT_EQ(AsmFrinta(0xBFDFFFFFFFFFFFFF), MakeUInt128(0x8000000000000000U, 0U));
1633
1634 // A number too large to have fractional precision, should not change upon rounding with tie-away
1635 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(0.5 / std::numeric_limits<double>::epsilon())),
1636 MakeUInt128(bit_cast<uint64_t>(0.5 / std::numeric_limits<double>::epsilon()), 0U));
1637 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-0.5 / std::numeric_limits<double>::epsilon())),
1638 MakeUInt128(bit_cast<uint64_t>(-0.5 / std::numeric_limits<double>::epsilon()), 0U));
1639 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(0.75 / std::numeric_limits<double>::epsilon())),
1640 MakeUInt128(bit_cast<uint64_t>(0.75 / std::numeric_limits<double>::epsilon()), 0U));
1641 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-0.75 / std::numeric_limits<double>::epsilon())),
1642 MakeUInt128(bit_cast<uint64_t>(-0.75 / std::numeric_limits<double>::epsilon()), 0U));
1643 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(1.0 / std::numeric_limits<double>::epsilon())),
1644 MakeUInt128(bit_cast<uint64_t>(1.0 / std::numeric_limits<double>::epsilon()), 0U));
1645 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-1.0 / std::numeric_limits<double>::epsilon())),
1646 MakeUInt128(bit_cast<uint64_t>(-1.0 / std::numeric_limits<double>::epsilon()), 0U));
1647 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(2.0 / std::numeric_limits<double>::epsilon())),
1648 MakeUInt128(bit_cast<uint64_t>(2.0 / std::numeric_limits<double>::epsilon()), 0U));
1649 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-2.0 / std::numeric_limits<double>::epsilon())),
1650 MakeUInt128(bit_cast<uint64_t>(-2.0 / std::numeric_limits<double>::epsilon()), 0U));
1651 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(1.0e100)), MakeUInt128(bit_cast<uint64_t>(1.0e100), 0U));
1652 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-1.0e100)), MakeUInt128(bit_cast<uint64_t>(-1.0e100), 0U));
1653 }
1654
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp32)1655 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp32) {
1656 constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %s0, %s1");
1657
1658 // -7.50 -> -8.00 (ties away from zero as opposted to even)
1659 ASSERT_EQ(AsmFrinta(0xc0f00000U), MakeUInt128(0xc1000000U, 0U));
1660
1661 // -6.75 -> -7.00
1662 ASSERT_EQ(AsmFrinta(0xc0d80000U), MakeUInt128(0xc0e00000U, 0U));
1663
1664 // -6.50 -> -7.00 (ties away from zero as opposted to even)
1665 ASSERT_EQ(AsmFrinta(0xc0d00000U), MakeUInt128(0xc0e00000U, 0U));
1666
1667 // -6.25 -> -6.00
1668 ASSERT_EQ(AsmFrinta(0xc0c80000U), MakeUInt128(0xc0c00000U, 0U));
1669
1670 // 6.25 -> 6.00
1671 ASSERT_EQ(AsmFrinta(0x40c80000U), MakeUInt128(0x40c00000U, 0U));
1672
1673 // 6.50 -> 7.00 (ties away from zero as opposted to even)
1674 ASSERT_EQ(AsmFrinta(0x40d00000U), MakeUInt128(0x40e00000U, 0U));
1675
1676 // 6.75 -> 7.00
1677 ASSERT_EQ(AsmFrinta(0x40d80000U), MakeUInt128(0x40e00000U, 0U));
1678
1679 // 7.50 -> 8.00 (ties away from zero as opposted to even)
1680 ASSERT_EQ(AsmFrinta(0x40f00000U), MakeUInt128(0x41000000U, 0U));
1681
1682 // -0.49999997019767761 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1683 ASSERT_EQ(AsmFrinta(0xbeffffff), MakeUInt128(0x80000000U, 0U));
1684
1685 // A number too large to have fractional precision, should not change upon rounding with tie-away
1686 ASSERT_EQ(
1687 AsmFrinta(bit_cast<uint32_t>(float{0.5 / std::numeric_limits<float>::epsilon()})),
1688 MakeUInt128(bit_cast<uint32_t>(float{0.5 / std::numeric_limits<float>::epsilon()}), 0U));
1689 ASSERT_EQ(
1690 AsmFrinta(bit_cast<uint32_t>(float{-0.5 / std::numeric_limits<float>::epsilon()})),
1691 MakeUInt128(bit_cast<uint32_t>(float{-0.5 / std::numeric_limits<float>::epsilon()}), 0U));
1692 ASSERT_EQ(
1693 AsmFrinta(bit_cast<uint32_t>(float{0.75 / std::numeric_limits<float>::epsilon()})),
1694 MakeUInt128(bit_cast<uint32_t>(float{0.75 / std::numeric_limits<float>::epsilon()}), 0U));
1695 ASSERT_EQ(
1696 AsmFrinta(bit_cast<uint32_t>(float{-0.75 / std::numeric_limits<float>::epsilon()})),
1697 MakeUInt128(bit_cast<uint32_t>(float{-0.75 / std::numeric_limits<float>::epsilon()}), 0U));
1698 ASSERT_EQ(
1699 AsmFrinta(bit_cast<uint32_t>(float{1.0 / std::numeric_limits<float>::epsilon()})),
1700 MakeUInt128(bit_cast<uint32_t>(float{1.0 / std::numeric_limits<float>::epsilon()}), 0U));
1701 ASSERT_EQ(
1702 AsmFrinta(bit_cast<uint32_t>(float{-1.0 / std::numeric_limits<float>::epsilon()})),
1703 MakeUInt128(bit_cast<uint32_t>(float{-1.0 / std::numeric_limits<float>::epsilon()}), 0U));
1704 ASSERT_EQ(
1705 AsmFrinta(bit_cast<uint32_t>(float{2.0 / std::numeric_limits<float>::epsilon()})),
1706 MakeUInt128(bit_cast<uint32_t>(float{2.0 / std::numeric_limits<float>::epsilon()}), 0U));
1707 ASSERT_EQ(
1708 AsmFrinta(bit_cast<uint32_t>(float{-2.0 / std::numeric_limits<float>::epsilon()})),
1709 MakeUInt128(bit_cast<uint32_t>(float{-2.0 / std::numeric_limits<float>::epsilon()}), 0U));
1710 ASSERT_EQ(AsmFrinta(bit_cast<uint32_t>(1.0e38f)), MakeUInt128(bit_cast<uint32_t>(1.0e38f), 0U));
1711 ASSERT_EQ(AsmFrinta(bit_cast<uint32_t>(-1.0e38f)), MakeUInt128(bit_cast<uint32_t>(-1.0e38f), 0U));
1712 }
1713
TEST(Arm64InsnTest,RoundToIntDownwardFp64)1714 TEST(Arm64InsnTest, RoundToIntDownwardFp64) {
1715 constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %d0, %d1");
1716
1717 // 7.7 -> 7.00
1718 ASSERT_EQ(AsmFrintm(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1719
1720 // 7.1 -> 7.00
1721 ASSERT_EQ(AsmFrintm(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1722
1723 // -7.10 -> -8.00
1724 ASSERT_EQ(AsmFrintm(0xc01c666666666666ULL), MakeUInt128(0xc020000000000000, 0U));
1725
1726 // -7.90 -> -8.00
1727 ASSERT_EQ(AsmFrintm(0xc01f99999999999aULL), MakeUInt128(0xc020000000000000, 0U));
1728
1729 // 0 -> 0
1730 ASSERT_EQ(AsmFrintm(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1731
1732 // -0 -> -0
1733 ASSERT_EQ(AsmFrintm(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1734 }
1735
TEST(Arm64InsnTest,RoundToIntDownwardFp32)1736 TEST(Arm64InsnTest, RoundToIntDownwardFp32) {
1737 constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %s0, %s1");
1738
1739 // 7.7 -> 7.00
1740 ASSERT_EQ(AsmFrintm(0x40f66666), 0x40e00000);
1741
1742 // 7.1 -> 7.00
1743 ASSERT_EQ(AsmFrintm(0x40e33333), 0x40e00000);
1744
1745 // -7.10 -> -8.00
1746 ASSERT_EQ(AsmFrintm(0xc0e33333), 0xc1000000);
1747
1748 // -7.90 -> -8.00
1749 ASSERT_EQ(AsmFrintm(0xc0fccccd), 0xc1000000);
1750
1751 // 0 -> 0
1752 ASSERT_EQ(AsmFrintm(0x00000000), 0x00000000);
1753
1754 // -0 -> -0
1755 ASSERT_EQ(AsmFrintm(0x80000000), 0x80000000);
1756 }
1757
TEST(Arm64InsnTest,RoundToIntNearestFp64)1758 TEST(Arm64InsnTest, RoundToIntNearestFp64) {
1759 constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %d0, %d1");
1760
1761 // 7.5 -> 8.00 (ties to even)
1762 ASSERT_EQ(AsmFrintn(0x401e000000000000ULL), MakeUInt128(0x4020000000000000, 0U));
1763
1764 // 8.5 -> 8.00 (ties to even)
1765 ASSERT_EQ(AsmFrintn(0x4021000000000000), MakeUInt128(0x4020000000000000, 0U));
1766
1767 // 7.10 -> 7.00
1768 ASSERT_EQ(AsmFrintn(0x401c666666666666), MakeUInt128(0x401c000000000000, 0U));
1769
1770 // 7.90 -> 8.00
1771 ASSERT_EQ(AsmFrintn(0x401f99999999999a), MakeUInt128(0x4020000000000000, 0U));
1772
1773 // -7.5 -> -8.00 (ties to even)
1774 ASSERT_EQ(AsmFrintn(0xc01e000000000000), MakeUInt128(0xc020000000000000, 0U));
1775
1776 // // -8.5 -> -8.00 (ties to even)
1777 ASSERT_EQ(AsmFrintn(0xc021000000000000), MakeUInt128(0xc020000000000000, 0U));
1778
1779 // -7.10 -> -7.00
1780 ASSERT_EQ(AsmFrintn(0xc01c666666666666), MakeUInt128(0xc01c000000000000, 0U));
1781
1782 // -7.90 -> -8.00
1783 ASSERT_EQ(AsmFrintn(0xc01f99999999999a), MakeUInt128(0xc020000000000000, 0U));
1784
1785 // 0 -> 0
1786 ASSERT_EQ(AsmFrintn(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1787
1788 // -0 -> -0
1789 ASSERT_EQ(AsmFrintn(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1790 }
1791
TEST(Arm64InsnTest,RoundToIntToNearestFp32)1792 TEST(Arm64InsnTest, RoundToIntToNearestFp32) {
1793 constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %s0, %s1");
1794
1795 // 7.5 -> 8.00 (ties to even)
1796 ASSERT_EQ(AsmFrintn(0x40f00000), 0x41000000);
1797
1798 // 8.5 -> 8.00 (ties to even)
1799 ASSERT_EQ(AsmFrintn(0x41080000), 0x41000000);
1800
1801 // 7.10 -> 7.00
1802 ASSERT_EQ(AsmFrintn(0x40e33333), 0x40e00000);
1803
1804 // 7.90 -> 8.00
1805 ASSERT_EQ(AsmFrintn(0x40fccccd), 0x41000000);
1806
1807 // -7.5 -> -8.00 (ties to even)
1808 ASSERT_EQ(AsmFrintn(0xc0f00000), 0xc1000000);
1809
1810 // -8.5 -> -8.00 (ties to even)
1811 ASSERT_EQ(AsmFrintn(0xc1080000), 0xc1000000);
1812
1813 // -7.10 -> -7.00
1814 ASSERT_EQ(AsmFrintn(0xc0e33333), 0xc0e00000);
1815
1816 // -7.90 -> -8.00
1817 ASSERT_EQ(AsmFrintn(0xc0fccccd), 0xc1000000);
1818
1819 // 0 -> 0
1820 ASSERT_EQ(AsmFrintn(0x00000000), 0x00000000);
1821
1822 // -0 -> -0
1823 ASSERT_EQ(AsmFrintn(0x80000000), 0x80000000);
1824 }
1825
TEST(Arm64InsnTest,RoundToIntTowardZeroFp64)1826 TEST(Arm64InsnTest, RoundToIntTowardZeroFp64) {
1827 constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %d0, %d1");
1828
1829 // 7.7 -> 7.00
1830 ASSERT_EQ(AsmFrintz(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1831
1832 // 7.1 -> 7.00
1833 ASSERT_EQ(AsmFrintz(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1834
1835 // -7.10 -> -7.00
1836 ASSERT_EQ(AsmFrintz(0xc01c666666666666ULL), MakeUInt128(0xc01c000000000000, 0U));
1837
1838 // -7.90 -> -7.00
1839 ASSERT_EQ(AsmFrintz(0xc01f99999999999aULL), MakeUInt128(0xc01c000000000000, 0U));
1840
1841 // 0 -> 0
1842 ASSERT_EQ(AsmFrintz(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1843
1844 // -0 -> -0
1845 ASSERT_EQ(AsmFrintz(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1846 }
1847
TEST(Arm64InsnTest,RoundToIntTowardZeroFp32)1848 TEST(Arm64InsnTest, RoundToIntTowardZeroFp32) {
1849 constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %s0, %s1");
1850
1851 // 7.7 -> 7.00
1852 ASSERT_EQ(AsmFrintz(0x40f66666), 0x40e00000);
1853
1854 // 7.1 -> 7.00
1855 ASSERT_EQ(AsmFrintz(0x40e33333), 0x40e00000);
1856
1857 // -7.10 -> -7.00
1858 ASSERT_EQ(AsmFrintz(0xc0e33333), 0xc0e00000);
1859
1860 // -7.90 -> -7.00
1861 ASSERT_EQ(AsmFrintz(0xc0fccccd), 0xc0e00000);
1862
1863 // 0 -> 0
1864 ASSERT_EQ(AsmFrintz(0x00000000), 0x00000000);
1865
1866 // -0 -> -0
1867 ASSERT_EQ(AsmFrintz(0x80000000), 0x80000000);
1868 }
1869
TEST(Arm64InsnTest,AsmConvertF32x4TieAway)1870 TEST(Arm64InsnTest, AsmConvertF32x4TieAway) {
1871 constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.4s, %1.4s");
1872 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1873 ASSERT_EQ(AsmFcvta(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -6.00f));
1874 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1875 ASSERT_EQ(AsmFcvta(arg2), MakeF32x4(6.00f, 7.00f, 7.00f, 8.00f));
1876 }
1877
TEST(Arm64InsnTest,AsmConvertF32x4NegInf)1878 TEST(Arm64InsnTest, AsmConvertF32x4NegInf) {
1879 constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.4s, %1.4s");
1880 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1881 ASSERT_EQ(AsmFcvtm(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
1882 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1883 ASSERT_EQ(AsmFcvtm(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1884 }
1885
TEST(Arm64InsnTest,AsmConvertF32x4TieEven)1886 TEST(Arm64InsnTest, AsmConvertF32x4TieEven) {
1887 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.4s, %1.4s");
1888 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1889 ASSERT_EQ(AsmFcvtn(arg1), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
1890 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1891 ASSERT_EQ(AsmFcvtn(arg2), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
1892 }
1893
TEST(Arm64InsnTest,AsmConvertF32x4PosInf)1894 TEST(Arm64InsnTest, AsmConvertF32x4PosInf) {
1895 constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.4s, %1.4s");
1896 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1897 ASSERT_EQ(AsmFcvtp(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1898 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1899 ASSERT_EQ(AsmFcvtp(arg2), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
1900 }
1901
TEST(Arm64InsnTest,AsmConvertF32x4Truncate)1902 TEST(Arm64InsnTest, AsmConvertF32x4Truncate) {
1903 constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.4s, %1.4s");
1904 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1905 ASSERT_EQ(AsmFcvtz(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1906 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1907 ASSERT_EQ(AsmFcvtz(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1908 }
1909
TEST(Arm64InsnTest,AsmConvertF64x4TieAway)1910 TEST(Arm64InsnTest, AsmConvertF64x4TieAway) {
1911 constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.2d, %1.2d");
1912 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1913 ASSERT_EQ(AsmFcvta(arg1), MakeF64x2(-8.00, -7.00));
1914 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1915 ASSERT_EQ(AsmFcvta(arg2), MakeF64x2(-7.00, -6.00));
1916 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1917 ASSERT_EQ(AsmFcvta(arg3), MakeF64x2(6.00, 7.00));
1918 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1919 ASSERT_EQ(AsmFcvta(arg4), MakeF64x2(7.00, 8.00));
1920 }
1921
TEST(Arm64InsnTest,AsmConvertF64x4NegInf)1922 TEST(Arm64InsnTest, AsmConvertF64x4NegInf) {
1923 constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.2d, %1.2d");
1924 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1925 ASSERT_EQ(AsmFcvtm(arg1), MakeF64x2(-8.00, -7.00));
1926 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1927 ASSERT_EQ(AsmFcvtm(arg2), MakeF64x2(-7.00, -7.00));
1928 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1929 ASSERT_EQ(AsmFcvtm(arg3), MakeF64x2(6.00, 6.00));
1930 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1931 ASSERT_EQ(AsmFcvtm(arg4), MakeF64x2(6.00, 7.00));
1932 }
1933
TEST(Arm64InsnTest,AsmConvertF64x4TieEven)1934 TEST(Arm64InsnTest, AsmConvertF64x4TieEven) {
1935 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.2d, %1.2d");
1936 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1937 ASSERT_EQ(AsmFcvtn(arg1), MakeF64x2(-8.00, -7.00));
1938 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1939 ASSERT_EQ(AsmFcvtn(arg2), MakeF64x2(-6.00, -6.00));
1940 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1941 ASSERT_EQ(AsmFcvtn(arg3), MakeF64x2(6.00, 6.00));
1942 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1943 ASSERT_EQ(AsmFcvtn(arg4), MakeF64x2(7.00, 8.00));
1944 }
1945
TEST(Arm64InsnTest,AsmConvertF64x4PosInf)1946 TEST(Arm64InsnTest, AsmConvertF64x4PosInf) {
1947 constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.2d, %1.2d");
1948 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1949 ASSERT_EQ(AsmFcvtp(arg1), MakeF64x2(-7.00, -6.00));
1950 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1951 ASSERT_EQ(AsmFcvtp(arg2), MakeF64x2(-6.00, -6.00));
1952 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1953 ASSERT_EQ(AsmFcvtp(arg3), MakeF64x2(7.00, 7.00));
1954 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1955 ASSERT_EQ(AsmFcvtp(arg4), MakeF64x2(7.00, 8.00));
1956 }
1957
TEST(Arm64InsnTest,AsmConvertF64x4Truncate)1958 TEST(Arm64InsnTest, AsmConvertF64x4Truncate) {
1959 constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.2d, %1.2d");
1960 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1961 ASSERT_EQ(AsmFcvtz(arg1), MakeF64x2(-7.00, -6.00));
1962 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1963 ASSERT_EQ(AsmFcvtz(arg2), MakeF64x2(-6.00, -6.00));
1964 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1965 ASSERT_EQ(AsmFcvtz(arg3), MakeF64x2(6.00, 6.00));
1966 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1967 ASSERT_EQ(AsmFcvtz(arg4), MakeF64x2(6.00, 7.00));
1968 }
1969
TEST(Arm64InsnTest,AsmRoundCurrentModeF32)1970 TEST(Arm64InsnTest, AsmRoundCurrentModeF32) {
1971 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %s0, %s1");
1972 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
1973 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
1974 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1975 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1976 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1977 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1978 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
1979 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
1980 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
1981 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1982 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1983 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1984 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1985 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1986 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1987 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
1988 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
1989 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1990 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1991 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1992 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1993 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1994 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1995 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
1996 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
1997 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1998 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1999 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2000 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2001 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2002 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2003 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
2004 }
2005
TEST(Arm64InsnTest,AsmRoundCurrentModeF64)2006 TEST(Arm64InsnTest, AsmRoundCurrentModeF64) {
2007 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %d0, %d1");
2008 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
2009 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
2010 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2011 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2012 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2013 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2014 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
2015 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
2016 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
2017 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2018 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2019 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2020 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2021 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2022 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2023 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
2024 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
2025 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2026 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2027 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2028 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2029 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2030 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2031 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
2032 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
2033 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2034 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2035 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2036 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2037 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2038 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2039 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
2040 }
2041
TEST(Arm64InsnTest,AsmRoundCurrentModeF32x4)2042 TEST(Arm64InsnTest, AsmRoundCurrentModeF32x4) {
2043 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.4s, %1.4s");
2044 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2045 ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2046 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2047 ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2048 __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2049 ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2050 __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2051 ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2052 __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2053 ASSERT_EQ(AsmFrinti(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2054 __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2055 ASSERT_EQ(AsmFrinti(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2056 __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2057 ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2058 __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2059 ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2060 }
2061
TEST(Arm64InsnTest,AsmRoundCurrentModeF64x2)2062 TEST(Arm64InsnTest, AsmRoundCurrentModeF64x2) {
2063 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.2d, %1.2d");
2064 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2065 ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2066 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2067 ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2068 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2069 ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2070 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2071 ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2072 __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2073 ASSERT_EQ(AsmFrinti(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2074 __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2075 ASSERT_EQ(AsmFrinti(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2076 __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2077 ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2078 __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2079 ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2080 __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2081 ASSERT_EQ(AsmFrinti(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2082 __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2083 ASSERT_EQ(AsmFrinti(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2084 __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2085 ASSERT_EQ(AsmFrinti(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2086 __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2087 ASSERT_EQ(AsmFrinti(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2088 __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2089 ASSERT_EQ(AsmFrinti(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2090 __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2091 ASSERT_EQ(AsmFrinti(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2092 __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2093 ASSERT_EQ(AsmFrinti(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2094 __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2095 ASSERT_EQ(AsmFrinti(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2096 }
2097
TEST(Arm64InsnTest,AsmRoundExactF32)2098 TEST(Arm64InsnTest, AsmRoundExactF32) {
2099 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %s0, %s1");
2100 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
2101 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
2102 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2103 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2104 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2105 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2106 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
2107 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
2108 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
2109 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2110 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2111 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2112 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2113 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2114 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2115 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
2116 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
2117 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2118 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2119 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2120 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2121 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2122 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2123 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
2124 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
2125 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2126 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2127 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2128 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2129 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2130 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2131 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
2132 }
2133
TEST(Arm64InsnTest,AsmRoundExactF64)2134 TEST(Arm64InsnTest, AsmRoundExactF64) {
2135 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %d0, %d1");
2136 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
2137 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
2138 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2139 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2140 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2141 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2142 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
2143 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
2144 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
2145 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2146 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2147 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2148 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2149 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2150 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2151 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
2152 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
2153 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2154 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2155 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2156 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2157 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2158 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2159 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
2160 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
2161 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2162 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2163 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2164 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2165 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2166 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2167 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
2168 }
2169
TEST(Arm64InsnTest,AsmRoundExactF32x4)2170 TEST(Arm64InsnTest, AsmRoundExactF32x4) {
2171 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.4s, %1.4s");
2172 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2173 ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2174 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2175 ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2176 __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2177 ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2178 __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2179 ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2180 __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2181 ASSERT_EQ(AsmFrintx(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2182 __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2183 ASSERT_EQ(AsmFrintx(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2184 __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2185 ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2186 __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2187 ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2188 }
2189
TEST(Arm64InsnTest,AsmRoundExactF64x2)2190 TEST(Arm64InsnTest, AsmRoundExactF64x2) {
2191 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.2d, %1.2d");
2192 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2193 ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2194 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2195 ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2196 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2197 ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2198 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2199 ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2200 __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2201 ASSERT_EQ(AsmFrintx(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2202 __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2203 ASSERT_EQ(AsmFrintx(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2204 __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2205 ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2206 __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2207 ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2208 __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2209 ASSERT_EQ(AsmFrintx(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2210 __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2211 ASSERT_EQ(AsmFrintx(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2212 __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2213 ASSERT_EQ(AsmFrintx(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2214 __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2215 ASSERT_EQ(AsmFrintx(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2216 __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2217 ASSERT_EQ(AsmFrintx(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2218 __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2219 ASSERT_EQ(AsmFrintx(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2220 __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2221 ASSERT_EQ(AsmFrintx(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2222 __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2223 ASSERT_EQ(AsmFrintx(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2224 }
2225
Fp32Compare(uint64_t arg1,uint64_t arg2)2226 uint64_t Fp32Compare(uint64_t arg1, uint64_t arg2) {
2227 uint64_t res;
2228 asm("fcmp %s1, %s2\n\t"
2229 "mrs %x0, nzcv"
2230 : "=r"(res)
2231 : "w"(arg1), "w"(arg2));
2232 return res;
2233 }
2234
Fp64Compare(uint64_t arg1,uint64_t arg2)2235 uint64_t Fp64Compare(uint64_t arg1, uint64_t arg2) {
2236 uint64_t res;
2237 asm("fcmp %d1, %d2\n\t"
2238 "mrs %x0, nzcv"
2239 : "=r"(res)
2240 : "w"(arg1), "w"(arg2));
2241 return res;
2242 }
2243
MakeNZCV(uint64_t nzcv)2244 constexpr uint64_t MakeNZCV(uint64_t nzcv) {
2245 return nzcv << 28;
2246 }
2247
TEST(Arm64InsnTest,Fp32Compare)2248 TEST(Arm64InsnTest, Fp32Compare) {
2249 // NaN and 1.83
2250 ASSERT_EQ(Fp32Compare(0x7fc00000ULL, 0x3fea3d71ULL), MakeNZCV(0b0011));
2251
2252 // 6.31 == 6.31
2253 ASSERT_EQ(Fp32Compare(0x40c9eb85ULL, 0x40c9eb85ULL), MakeNZCV(0b0110));
2254
2255 // 1.23 < 2.34
2256 ASSERT_EQ(Fp32Compare(0x3f9d70a4ULL, 0x4015c28fULL), MakeNZCV(0b1000));
2257
2258 // 5.25 > 2.94
2259 ASSERT_EQ(Fp32Compare(0x40a80000ULL, 0x403c28f6ULL), MakeNZCV(0b0010));
2260 }
2261
TEST(Arm64InsnTest,Fp32CompareZero)2262 TEST(Arm64InsnTest, Fp32CompareZero) {
2263 constexpr auto Fp32CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2264 "fcmp %s1, #0.0\n\t"
2265 "mrs %x0, nzcv");
2266
2267 // NaN and 0.00
2268 ASSERT_EQ(Fp32CompareZero(0x7fa00000ULL), MakeNZCV(0b0011));
2269
2270 // 0.00 == 0.00
2271 ASSERT_EQ(Fp32CompareZero(0x00000000ULL), MakeNZCV(0b0110));
2272
2273 // -2.67 < 0.00
2274 ASSERT_EQ(Fp32CompareZero(0xc02ae148ULL), MakeNZCV(0b1000));
2275
2276 // 1.56 > 0.00
2277 ASSERT_EQ(Fp32CompareZero(0x3fc7ae14ULL), MakeNZCV(0b0010));
2278 }
2279
TEST(Arm64InsnTest,Fp64Compare)2280 TEST(Arm64InsnTest, Fp64Compare) {
2281 // NaN and 1.19
2282 ASSERT_EQ(Fp64Compare(0x7ff8000000000000ULL, 0x3ff30a3d70a3d70aULL), MakeNZCV(0b0011));
2283
2284 // 8.42 == 8.42
2285 ASSERT_EQ(Fp64Compare(0x4020d70a3d70a3d7ULL, 0x4020d70a3d70a3d7ULL), MakeNZCV(0b0110));
2286
2287 // 0.50 < 1.00
2288 ASSERT_EQ(Fp64Compare(0x3fe0000000000000ULL, 0x3ff0000000000000ULL), MakeNZCV(0b1000));
2289
2290 // 7.38 > 1.54
2291 ASSERT_EQ(Fp64Compare(0x401d851eb851eb85ULL, 0x3ff8a3d70a3d70a4ULL), MakeNZCV(0b0010));
2292 }
2293
TEST(Arm64InsnTest,Fp64CompareZero)2294 TEST(Arm64InsnTest, Fp64CompareZero) {
2295 constexpr auto Fp64CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2296 "fcmp %d1, #0.0\n\t"
2297 "mrs %x0, nzcv");
2298
2299 // NaN and 0.00
2300 ASSERT_EQ(Fp64CompareZero(0x7ff4000000000000ULL), MakeNZCV(0b0011));
2301
2302 // 0.00 == 0.00
2303 ASSERT_EQ(Fp64CompareZero(0x0000000000000000ULL), MakeNZCV(0b0110));
2304
2305 // -7.23 < 0.00
2306 ASSERT_EQ(Fp64CompareZero(0xc01ceb851eb851ecULL), MakeNZCV(0b1000));
2307
2308 // 5.39 > 0.00
2309 ASSERT_EQ(Fp64CompareZero(0x40158f5c28f5c28fULL), MakeNZCV(0b0010));
2310 }
2311
Fp32CompareIfEqualOrSetAllFlags(float arg1,float arg2,uint64_t nzcv)2312 uint64_t Fp32CompareIfEqualOrSetAllFlags(float arg1, float arg2, uint64_t nzcv) {
2313 asm("msr nzcv, %x0\n\t"
2314 "fccmp %s2, %s3, #15, eq\n\t"
2315 "mrs %x0, nzcv\n\t"
2316 : "=r"(nzcv)
2317 : "0"(nzcv), "w"(arg1), "w"(arg2));
2318 return nzcv;
2319 }
2320
TEST(Arm64InsnTest,Fp32ConditionalCompare)2321 TEST(Arm64InsnTest, Fp32ConditionalCompare) {
2322 // Comparison is performed.
2323 constexpr uint64_t kEqual = MakeNZCV(0b0100);
2324 constexpr float kNan = std::numeric_limits<float>::quiet_NaN();
2325 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kEqual), MakeNZCV(0b0110));
2326 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kEqual), MakeNZCV(0b1000));
2327 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kEqual), MakeNZCV(0b0010));
2328 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kEqual), MakeNZCV(0b0011));
2329 // Comparison is not performed; alt-nzcv is returned.
2330 constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2331 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2332 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kNotEqual), MakeNZCV(0b1111));
2333 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2334 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2335 }
2336
Fp64CompareIfEqualOrSetAllFlags(double arg1,double arg2,uint64_t nzcv)2337 uint64_t Fp64CompareIfEqualOrSetAllFlags(double arg1, double arg2, uint64_t nzcv) {
2338 asm("msr nzcv, %x0\n\t"
2339 "fccmp %d2, %d3, #15, eq\n\t"
2340 "mrs %x0, nzcv\n\t"
2341 : "=r"(nzcv)
2342 : "0"(nzcv), "w"(arg1), "w"(arg2));
2343 return nzcv;
2344 }
2345
TEST(Arm64InsnTest,Fp64ConditionalCompare)2346 TEST(Arm64InsnTest, Fp64ConditionalCompare) {
2347 // Comparison is performed.
2348 constexpr uint64_t kEqual = MakeNZCV(0b0100);
2349 constexpr double kNan = std::numeric_limits<double>::quiet_NaN();
2350 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kEqual), MakeNZCV(0b0110));
2351 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kEqual), MakeNZCV(0b1000));
2352 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kEqual), MakeNZCV(0b0010));
2353 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0, kEqual), MakeNZCV(0b0011));
2354 // Comparison is not performed; alt-nzcv is returned.
2355 constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2356 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2357 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kNotEqual), MakeNZCV(0b1111));
2358 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2359 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2360 }
2361
TEST(Arm64InsnTest,ConvertFp32ToFp64)2362 TEST(Arm64InsnTest, ConvertFp32ToFp64) {
2363 uint64_t arg = 0x40cd70a4ULL; // 6.42 in float
2364 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %s1")(arg);
2365 ASSERT_EQ(res, MakeUInt128(0x4019ae1480000000ULL, 0U));
2366 }
2367
TEST(Arm64InsnTest,ConvertFp64ToFp32)2368 TEST(Arm64InsnTest, ConvertFp64ToFp32) {
2369 uint64_t arg = 0x401a0a3d70a3d70aULL; // 6.51 in double
2370 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %d1")(arg);
2371 ASSERT_EQ(res, MakeUInt128(0x40d051ecULL, 0U));
2372 }
2373
TEST(Arm64InsnTest,ConvertFp32ToFp16)2374 TEST(Arm64InsnTest, ConvertFp32ToFp16) {
2375 constexpr auto AsmFcvt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %s1");
2376 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(2.5f)), MakeUInt128(0x4100U, 0U));
2377 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(4.5f)), MakeUInt128(0x4480U, 0U));
2378 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(8.5f)), MakeUInt128(0x4840U, 0U));
2379 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(16.5f)), MakeUInt128(0x4c20U, 0U));
2380 }
2381
TEST(Arm64InsnTest,ConvertFp16ToFp32)2382 TEST(Arm64InsnTest, ConvertFp16ToFp32) {
2383 uint64_t arg = 0x4100U;
2384 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %h1")(arg);
2385 ASSERT_EQ(res, bit_cast<uint32_t>(2.5f));
2386 }
2387
TEST(Arm64InsnTest,ConvertFp64ToFp16)2388 TEST(Arm64InsnTest, ConvertFp64ToFp16) {
2389 uint64_t arg = bit_cast<uint64_t>(2.5);
2390 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %d1")(arg);
2391 ASSERT_EQ(res, MakeUInt128(0x4100U, 0U));
2392 }
2393
TEST(Arm64InsnTest,ConvertFp16ToFp64)2394 TEST(Arm64InsnTest, ConvertFp16ToFp64) {
2395 uint64_t arg = 0x4100U;
2396 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %h1")(arg);
2397 ASSERT_EQ(res, bit_cast<uint64_t>(2.5));
2398 }
2399
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2)2400 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2) {
2401 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.2s, %1.2d");
2402 ASSERT_EQ(AsmFcvtn(MakeF64x2(2.0, 3.0)), MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2403 // Overflow or inf arguments result in inf.
2404 __uint128_t res = AsmFcvtn(
2405 MakeF64x2(std::numeric_limits<double>::max(), std::numeric_limits<double>::infinity()));
2406 ASSERT_EQ(res,
2407 MakeF32x4(std::numeric_limits<float>::infinity(),
2408 std::numeric_limits<float>::infinity(),
2409 0.0f,
2410 0.0f));
2411 res = AsmFcvtn(
2412 MakeF64x2(std::numeric_limits<double>::lowest(), -std::numeric_limits<double>::infinity()));
2413 ASSERT_EQ(res,
2414 MakeF32x4(-std::numeric_limits<float>::infinity(),
2415 -std::numeric_limits<float>::infinity(),
2416 0.0f,
2417 0.0f));
2418 }
2419
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2Upper)2420 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2Upper) {
2421 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.4s, %1.2d");
2422 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2423 __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2424 ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2425 }
2426
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32)2427 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32) {
2428 constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %s0, %d1");
2429 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(2.0)), bit_cast<uint32_t>(2.0f));
2430 // Overflow is saturated.
2431 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::max())),
2432 bit_cast<uint32_t>(std::numeric_limits<float>::max()));
2433 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::lowest())),
2434 bit_cast<uint32_t>(std::numeric_limits<float>::lowest()));
2435 // inf is converted to inf.
2436 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::infinity())),
2437 bit_cast<uint32_t>(std::numeric_limits<float>::infinity()));
2438 // -inf is converted to -inf.
2439 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(-std::numeric_limits<double>::infinity())),
2440 bit_cast<uint32_t>(-std::numeric_limits<float>::infinity()));
2441 }
2442
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2)2443 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2) {
2444 constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %0.2s, %1.2d");
2445 __uint128_t res = AsmFcvtxn(MakeF64x2(2.0, 3.0));
2446 ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2447 }
2448
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2Upper)2449 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2Upper) {
2450 constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtxn2 %0.4s, %1.2d");
2451 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2452 __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2453 ASSERT_EQ(AsmFcvtxn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2454 }
2455
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Lower)2456 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Lower) {
2457 constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.2d, %1.2s");
2458 __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2459 ASSERT_EQ(AsmFcvtl(arg), MakeF64x2(2.0, 3.0));
2460 }
2461
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Upper)2462 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Upper) {
2463 constexpr auto AsmFcvtl2 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.2d, %1.4s");
2464 __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2465 ASSERT_EQ(AsmFcvtl2(arg), MakeF64x2(4.0, 5.0));
2466 }
2467
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Lower)2468 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Lower) {
2469 constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.4s, %1.4h");
2470 // 4xF16 in the lower half.
2471 __uint128_t arg = MakeUInt128(0x4c20'4840'4480'4100ULL, 0);
2472 ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2473 }
2474
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Upper)2475 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Upper) {
2476 constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.4s, %1.8h");
2477 // 4xF16 in the upper half.
2478 __uint128_t arg = MakeUInt128(0, 0x4c20'4840'4480'4100ULL);
2479 ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2480 }
2481
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Lower)2482 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Lower) {
2483 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.4h, %1.4s");
2484 __uint128_t arg = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2485 // 4xF16 in the lower half.
2486 ASSERT_EQ(AsmFcvtn(arg), MakeUInt128(0x4c20'4840'4480'4100ULL, 0));
2487 }
2488
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Upper)2489 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Upper) {
2490 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.8h, %1.4s");
2491 __uint128_t arg1 = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2492 __uint128_t arg2 = MakeF32x4(3.0f, 5.0f, 7.0f, 11.0f);
2493 // 4xF16 in the upper half, lower half preserved.
2494 ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeUInt128(uint64_t(arg2), 0x4c20'4840'4480'4100ULL));
2495 }
2496
TEST(Arm64InsnTest,AbsF32)2497 TEST(Arm64InsnTest, AbsF32) {
2498 uint32_t arg = 0xc1273333U; // -10.45 in float
2499 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %s0, %s1")(arg);
2500 ASSERT_EQ(res, MakeUInt128(0x41273333ULL, 0U)); // 10.45 in float
2501 }
2502
TEST(Arm64InsnTest,AbsF64)2503 TEST(Arm64InsnTest, AbsF64) {
2504 uint64_t arg = 0xc03de8f5c28f5c29ULL; // -29.91 in double
2505 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %d0, %d1")(arg);
2506 ASSERT_EQ(res, MakeUInt128(0x403de8f5c28f5c29ULL, 0U)); // 29.91 in double
2507 }
2508
TEST(Arm64InsnTest,AbsF32x4)2509 TEST(Arm64InsnTest, AbsF32x4) {
2510 constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.4s, %1.4s");
2511 __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 3.0f, -7.0f);
2512 ASSERT_EQ(AsmFabs(arg), MakeF32x4(0.0f, 0.0f, 3.0f, 7.0f));
2513 }
2514
TEST(Arm64InsnTest,AbsF64x2)2515 TEST(Arm64InsnTest, AbsF64x2) {
2516 constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.2d, %1.2d");
2517 __uint128_t arg = MakeF64x2(-0.0, 3.0);
2518 ASSERT_EQ(AsmFabs(arg), MakeF64x2(0.0, 3.0));
2519 }
2520
TEST(Arm64InsnTest,AbdF32)2521 TEST(Arm64InsnTest, AbdF32) {
2522 uint32_t arg1 = 0x4181851fU; // 16.19 in float
2523 uint32_t arg2 = 0x41211eb8U; // 10.06 in float
2524 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %s0, %s1, %s2")(arg1, arg2);
2525 ASSERT_EQ(res, MakeUInt128(0x40c3d70cULL, 0U)); // 6.12 in float
2526 }
2527
TEST(Arm64InsnTest,AbdF64)2528 TEST(Arm64InsnTest, AbdF64) {
2529 constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %d0, %d1, %d2");
2530 uint64_t arg1 = 0x403828f5c28f5c29U; // 24.16 in double
2531 uint64_t arg2 = 0x4027d70a3d70a3d7U; // 11.92 in double
2532 __uint128_t res = AsmFabd(arg1, arg2);
2533 ASSERT_EQ(res, MakeUInt128(0x40287ae147ae147bULL, 0U)); // 12.24 in double
2534 }
2535
TEST(Arm64InsnTest,AbdF32x4)2536 TEST(Arm64InsnTest, AbdF32x4) {
2537 constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.4s, %1.4s, %2.4s");
2538 __uint128_t arg1 = MakeF32x4(1.0f, 5.0f, -3.0f, -2.0f);
2539 __uint128_t arg2 = MakeF32x4(-1.0f, 2.0f, -5.0f, 3.0f);
2540 __uint128_t res = AsmFabd(arg1, arg2);
2541 ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 2.0f, 5.0f));
2542 }
2543
TEST(Arm64InsnTest,AbdF64x2)2544 TEST(Arm64InsnTest, AbdF64x2) {
2545 constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.2d, %1.2d, %2.2d");
2546 __uint128_t arg1 = MakeF64x2(5.0, -2.0);
2547 __uint128_t arg2 = MakeF64x2(4.0, 3.0);
2548 __uint128_t res = AsmFabd(arg1, arg2);
2549 ASSERT_EQ(res, MakeF64x2(1.0, 5.0));
2550 }
2551
TEST(Arm64InsnTest,NegF32)2552 TEST(Arm64InsnTest, NegF32) {
2553 uint32_t arg = 0x40eeb852U; // 7.46 in float
2554 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %s0, %s1")(arg);
2555 ASSERT_EQ(res, MakeUInt128(0xc0eeb852ULL, 0U)); // -7.46 in float
2556 }
2557
TEST(Arm64InsnTest,NegF64)2558 TEST(Arm64InsnTest, NegF64) {
2559 uint64_t arg = 0x4054b28f5c28f5c3ULL; // 82.79 in double
2560 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %d0, %d1")(arg);
2561 ASSERT_EQ(res, MakeUInt128(0xc054b28f5c28f5c3ULL, 0U)); // -82.79 in double
2562 }
2563
TEST(Arm64InsnTest,NegF32x4)2564 TEST(Arm64InsnTest, NegF32x4) {
2565 constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.4s, %1.4s");
2566 __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 1.0f, -3.0f);
2567 ASSERT_EQ(AsmFneg(arg), MakeF32x4(0.0f, -0.0f, -1.0f, 3.0f));
2568 }
2569
TEST(Arm64InsnTest,NegF64x2)2570 TEST(Arm64InsnTest, NegF64x2) {
2571 constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.2d, %1.2d");
2572 __uint128_t arg = MakeF64x2(0.0, 3.0);
2573 ASSERT_EQ(AsmFneg(arg), MakeF64x2(-0.0, -3.0));
2574 }
2575
TEST(Arm64InsnTest,SqrtF32)2576 TEST(Arm64InsnTest, SqrtF32) {
2577 uint32_t arg = 0x41f3cac1U; // 30.474 in float
2578 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %s0, %s1")(arg);
2579 ASSERT_EQ(res, MakeUInt128(0x40b0a683ULL, 0U)); // 5.5203261 in float
2580 }
2581
TEST(Arm64InsnTest,SqrtF64)2582 TEST(Arm64InsnTest, SqrtF64) {
2583 uint64_t arg = 0x403d466666666666ULL; // 29.275 in double
2584 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %d0, %d1")(arg);
2585 ASSERT_EQ(res, MakeUInt128(0x4015a47e3392efb8ULL, 0U)); // 5.41... in double
2586 }
2587
TEST(Arm64InsnTest,SqrtF32x4)2588 TEST(Arm64InsnTest, SqrtF32x4) {
2589 constexpr auto AsmSqrt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %0.4s, %1.4s");
2590 __uint128_t arg = MakeF32x4(0.0f, 1.0f, 4.0f, 9.0f);
2591 ASSERT_EQ(AsmSqrt(arg), MakeF32x4(0.0f, 1.0f, 2.0f, 3.0f));
2592 }
2593
TEST(Arm64InsnTest,RecipEstimateF32)2594 TEST(Arm64InsnTest, RecipEstimateF32) {
2595 constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %s0, %s1");
2596 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.25f)), bit_cast<uint32_t>(3.9921875f));
2597 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.50f)), bit_cast<uint32_t>(1.99609375f));
2598 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(2.00f)), bit_cast<uint32_t>(0.4990234375f));
2599 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(4.00f)), bit_cast<uint32_t>(0.24951171875f));
2600 }
2601
TEST(Arm64InsnTest,RecipEstimateF32x4)2602 TEST(Arm64InsnTest, RecipEstimateF32x4) {
2603 constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %0.4s, %1.4s");
2604 __uint128_t res = AsmFrecpe(MakeF32x4(0.25f, 0.50f, 2.00f, 4.00f));
2605 ASSERT_EQ(res, MakeF32x4(3.9921875f, 1.99609375f, 0.4990234375f, 0.24951171875f));
2606 }
2607
TEST(Arm64InsnTest,RecipStepF32)2608 TEST(Arm64InsnTest, RecipStepF32) {
2609 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %s0, %s1, %s2");
2610 __uint128_t res1 = AsmFrecps(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2611 ASSERT_EQ(res1, bit_cast<uint32_t>(1.25f));
2612 __uint128_t res2 = AsmFrecps(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2613 ASSERT_EQ(res2, bit_cast<uint32_t>(1.00f));
2614 __uint128_t res3 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2615 ASSERT_EQ(res3, bit_cast<uint32_t>(1.25f));
2616 __uint128_t res4 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2617 ASSERT_EQ(res4, bit_cast<uint32_t>(0.50f));
2618 }
2619
TEST(Arm64InsnTest,RecipStepF64)2620 TEST(Arm64InsnTest, RecipStepF64) {
2621 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %d0, %d1, %d2");
2622 __uint128_t res1 = AsmFrecps(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2623 ASSERT_EQ(res1, bit_cast<uint64_t>(1.25));
2624 __uint128_t res2 = AsmFrecps(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2625 ASSERT_EQ(res2, bit_cast<uint64_t>(1.00));
2626 __uint128_t res3 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2627 ASSERT_EQ(res3, bit_cast<uint64_t>(1.25));
2628 __uint128_t res4 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2629 ASSERT_EQ(res4, bit_cast<uint64_t>(0.50));
2630 }
2631
TEST(Arm64InsnTest,RecipStepF32x4)2632 TEST(Arm64InsnTest, RecipStepF32x4) {
2633 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.4s, %1.4s, %2.4s");
2634 __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2635 __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2636 __uint128_t res = AsmFrecps(arg1, arg2);
2637 ASSERT_EQ(res, MakeF32x4(1.25f, 1.00f, 1.25f, 0.50f));
2638 }
2639
TEST(Arm64InsnTest,RecipStepF64x2)2640 TEST(Arm64InsnTest, RecipStepF64x2) {
2641 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.2d, %1.2d, %2.2d");
2642 __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2643 __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2644 ASSERT_EQ(AsmFrecps(arg1, arg2), MakeF64x2(1.25, 1.00));
2645 __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2646 __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2647 ASSERT_EQ(AsmFrecps(arg3, arg4), MakeF64x2(1.25, 0.50));
2648 }
2649
TEST(Arm64InsnTest,RecipSqrtEstimateF32)2650 TEST(Arm64InsnTest, RecipSqrtEstimateF32) {
2651 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %s0, %s1");
2652 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(2.0f)), bit_cast<uint32_t>(0.705078125f));
2653 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(3.0f)), bit_cast<uint32_t>(0.576171875f));
2654 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(4.0f)), bit_cast<uint32_t>(0.4990234375f));
2655 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(5.0f)), bit_cast<uint32_t>(0.4462890625f));
2656 }
2657
TEST(Arm64InsnTest,RecipSqrtEstimateF32x4)2658 TEST(Arm64InsnTest, RecipSqrtEstimateF32x4) {
2659 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.4s, %1.4s");
2660 __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2661 __uint128_t res = AsmFrsqrte(arg);
2662 ASSERT_EQ(res, MakeF32x4(0.705078125f, 0.576171875f, 0.4990234375f, 0.4462890625f));
2663 }
2664
TEST(Arm64InsnTest,RecipSqrtEstimateF64)2665 TEST(Arm64InsnTest, RecipSqrtEstimateF64) {
2666 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %d0, %d1");
2667 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(2.0)), bit_cast<uint64_t>(0.705078125));
2668 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(3.0)), bit_cast<uint64_t>(0.576171875));
2669 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(4.0)), bit_cast<uint64_t>(0.4990234375));
2670 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(5.0)), bit_cast<uint64_t>(0.4462890625));
2671 }
2672
TEST(Arm64InsnTest,RecipSqrtEstimateF64x2)2673 TEST(Arm64InsnTest, RecipSqrtEstimateF64x2) {
2674 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.2d, %1.2d");
2675 __uint128_t arg = MakeF64x2(2.0, 3.0);
2676 __uint128_t res = AsmFrsqrte(arg);
2677 ASSERT_EQ(res, MakeUInt128(bit_cast<uint64_t>(0.705078125), bit_cast<uint64_t>(0.576171875)));
2678 }
2679
TEST(Arm64InsnTest,RecipSqrtStepF32)2680 TEST(Arm64InsnTest, RecipSqrtStepF32) {
2681 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %s0, %s1, %s2");
2682 __uint128_t res1 = AsmFrsqrts(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2683 ASSERT_EQ(res1, bit_cast<uint32_t>(1.125f));
2684 __uint128_t res2 = AsmFrsqrts(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2685 ASSERT_EQ(res2, bit_cast<uint32_t>(1.000f));
2686 __uint128_t res3 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2687 ASSERT_EQ(res3, bit_cast<uint32_t>(1.125f));
2688 __uint128_t res4 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2689 ASSERT_EQ(res4, bit_cast<uint32_t>(0.750f));
2690 }
2691
TEST(Arm64InsnTest,RecipSqrtStepF64)2692 TEST(Arm64InsnTest, RecipSqrtStepF64) {
2693 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %d0, %d1, %d2");
2694 __uint128_t res1 = AsmFrsqrts(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2695 ASSERT_EQ(res1, bit_cast<uint64_t>(1.125));
2696 __uint128_t res2 = AsmFrsqrts(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2697 ASSERT_EQ(res2, bit_cast<uint64_t>(1.000));
2698 __uint128_t res3 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2699 ASSERT_EQ(res3, bit_cast<uint64_t>(1.125));
2700 __uint128_t res4 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2701 ASSERT_EQ(res4, bit_cast<uint64_t>(0.750));
2702 }
2703
TEST(Arm64InsnTest,RecipSqrtStepF32x4)2704 TEST(Arm64InsnTest, RecipSqrtStepF32x4) {
2705 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.4s, %1.4s, %2.4s");
2706 __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2707 __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2708 __uint128_t res = AsmFrsqrts(arg1, arg2);
2709 ASSERT_EQ(res, MakeF32x4(1.125f, 1.000f, 1.125f, 0.750f));
2710 }
2711
TEST(Arm64InsnTest,RecipSqrtStepF64x2)2712 TEST(Arm64InsnTest, RecipSqrtStepF64x2) {
2713 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.2d, %1.2d, %2.2d");
2714 __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2715 __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2716 ASSERT_EQ(AsmFrsqrts(arg1, arg2), MakeF64x2(1.125, 1.000));
2717 __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2718 __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2719 ASSERT_EQ(AsmFrsqrts(arg3, arg4), MakeF64x2(1.125, 0.750));
2720 }
2721
TEST(Arm64InsnTest,AddFp32)2722 TEST(Arm64InsnTest, AddFp32) {
2723 uint64_t fp_arg1 = 0x40d5c28fULL; // 6.68 in float
2724 uint64_t fp_arg2 = 0x409f5c29ULL; // 4.98 in float
2725 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %s0, %s1, %s2")(fp_arg1, fp_arg2);
2726 ASSERT_EQ(rd, MakeUInt128(0x413a8f5cULL, 0U)); // 11.66 in float
2727 }
2728
TEST(Arm64InsnTest,AddFp64)2729 TEST(Arm64InsnTest, AddFp64) {
2730 uint64_t fp_arg1 = 0x402099999999999aULL; // 8.30 in double
2731 uint64_t fp_arg2 = 0x4010ae147ae147aeULL; // 4.17 in double
2732 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %d0, %d1, %d2")(fp_arg1, fp_arg2);
2733 ASSERT_EQ(rd, MakeUInt128(0x4028f0a3d70a3d71ULL, 0U)); // 12.47 in double
2734 }
2735
TEST(Arm64InsnTest,AddF32x4)2736 TEST(Arm64InsnTest, AddF32x4) {
2737 constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.4s, %1.4s, %2.4s");
2738 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2739 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2740 ASSERT_EQ(AsmFadd(arg1, arg2), MakeF32x4(3.0f, 3.0f, -1.0f, 5.0f));
2741 }
2742
TEST(Arm64InsnTest,AddF64x2)2743 TEST(Arm64InsnTest, AddF64x2) {
2744 constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.2d, %1.2d, %2.2d");
2745 __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2746 __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2747 ASSERT_EQ(AsmFadd(arg1, arg2), MakeF64x2(-1.0, 7.0));
2748 }
2749
TEST(Arm64InsnTest,AddPairwiseF32x2)2750 TEST(Arm64InsnTest, AddPairwiseF32x2) {
2751 constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("faddp %s0, %1.2s");
2752 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 8.0f);
2753 ASSERT_EQ(AsmFaddp(arg1), bit_cast<uint32_t>(3.0f));
2754 }
2755
TEST(Arm64InsnTest,AddPairwiseF32x4)2756 TEST(Arm64InsnTest, AddPairwiseF32x4) {
2757 constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("faddp %0.4s, %1.4s, %2.4s");
2758 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2759 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2760 ASSERT_EQ(AsmFaddp(arg1, arg2), MakeF32x4(-1.0f, 7.0f, 7.0f, -3.0f));
2761 }
2762
TEST(Arm64InsnTest,SubFp32)2763 TEST(Arm64InsnTest, SubFp32) {
2764 uint64_t fp_arg1 = 0x411f5c29ULL; // 9.96 in float
2765 uint64_t fp_arg2 = 0x404851ecULL; // 3.13 in float
2766 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %s0, %s1, %s2")(fp_arg1, fp_arg2);
2767 ASSERT_EQ(rd, MakeUInt128(0x40da8f5cULL, 0U)); // 6.83 in float
2768 }
2769
TEST(Arm64InsnTest,SubFp64)2770 TEST(Arm64InsnTest, SubFp64) {
2771 uint64_t fp_arg1 = 0x401ee147ae147ae1ULL; // 7.72 in double
2772 uint64_t fp_arg2 = 0x4015666666666666ULL; // 5.35 in double
2773 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %d0, %d1, %d2")(fp_arg1, fp_arg2);
2774 ASSERT_EQ(rd, MakeUInt128(0x4002f5c28f5c28f6ULL, 0U)); // 2.37 in double
2775 }
2776
TEST(Arm64InsnTest,SubF32x4)2777 TEST(Arm64InsnTest, SubF32x4) {
2778 constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.4s, %1.4s, %2.4s");
2779 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2780 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2781 ASSERT_EQ(AsmFsub(arg1, arg2), MakeF32x4(-9.0f, 1.0f, 15.0f, -5.0f));
2782 }
2783
TEST(Arm64InsnTest,SubF64x2)2784 TEST(Arm64InsnTest, SubF64x2) {
2785 constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.2d, %1.2d, %2.2d");
2786 __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2787 __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2788 ASSERT_EQ(AsmFsub(arg1, arg2), MakeF64x2(7.0, 3.0));
2789 }
2790
TEST(Arm64InsnTest,MaxFp32)2791 TEST(Arm64InsnTest, MaxFp32) {
2792 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %s0, %s1, %s2");
2793 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2794 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2795
2796 ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2797 ASSERT_EQ(AsmFmax(kDefaultNaN32, fp_arg_three), kDefaultNaN32);
2798 ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN32), kDefaultNaN32);
2799 }
2800
TEST(Arm64InsnTest,MaxFp64)2801 TEST(Arm64InsnTest, MaxFp64) {
2802 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %d0, %d1, %d2");
2803 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2804 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2805
2806 ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2807 ASSERT_EQ(AsmFmax(kDefaultNaN64, fp_arg_three), kDefaultNaN64);
2808 ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN64), kDefaultNaN64);
2809 }
2810
TEST(Arm64InsnTest,MaxF32x4)2811 TEST(Arm64InsnTest, MaxF32x4) {
2812 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.4s, %1.4s, %2.4s");
2813 __uint128_t arg1 = MakeF32x4(-0.0f, 2.0f, 3.0f, -4.0f);
2814 __uint128_t arg2 = MakeF32x4(0.0f, 1.0f, -3.0f, -3.0f);
2815 ASSERT_EQ(AsmFmax(arg1, arg2), MakeF32x4(0.0f, 2.0f, 3.0f, -3.0f));
2816
2817 __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, -4.0f);
2818 __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32));
2819 ASSERT_EQ(AsmFmax(arg3, arg4),
2820 MakeF32x4(0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, bit_cast<float>(kDefaultNaN32)));
2821 }
2822
TEST(Arm64InsnTest,MaxF64x2)2823 TEST(Arm64InsnTest, MaxF64x2) {
2824 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.2d, %1.2d, %2.2d");
2825 __uint128_t arg1 = MakeF64x2(-0.0, 3.0);
2826 __uint128_t arg2 = MakeF64x2(0.0, -3.0);
2827 ASSERT_EQ(AsmFmax(arg1, arg2), MakeF64x2(0.0, 3.0));
2828
2829 __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64), 3.0);
2830 __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64));
2831 ASSERT_EQ(AsmFmax(arg3, arg4),
2832 MakeF64x2(bit_cast<double>(kDefaultNaN64), bit_cast<double>(kDefaultNaN64)));
2833 }
2834
TEST(Arm64InsnTest,MaxNumberFp32)2835 TEST(Arm64InsnTest, MaxNumberFp32) {
2836 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %s0, %s1, %s2");
2837 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2838 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2839 uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2840
2841 ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2842
2843 ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN32), MakeU32x4(fp_arg_two, 0, 0, 0));
2844 ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN32), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2845 ASSERT_EQ(AsmFmaxnm(kQuietNaN32, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2846 ASSERT_EQ(AsmFmaxnm(kQuietNaN32, fp_arg_minus_two), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2847 }
2848
TEST(Arm64InsnTest,MaxNumberFp64)2849 TEST(Arm64InsnTest, MaxNumberFp64) {
2850 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %d0, %d1, %d2");
2851 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2852 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2853 uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2854
2855 ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2856
2857 ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN64), MakeUInt128(fp_arg_two, 0U));
2858 ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN64), MakeUInt128(fp_arg_minus_two, 0));
2859 ASSERT_EQ(AsmFmaxnm(kQuietNaN64, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2860 ASSERT_EQ(AsmFmaxnm(kQuietNaN64, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2861 }
2862
TEST(Arm64InsnTest,MinNumberFp32)2863 TEST(Arm64InsnTest, MinNumberFp32) {
2864 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %s0, %s1, %s2");
2865 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2866 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2867 uint32_t fp_arg_minus_two = bit_cast<uint32_t>(-2.0f);
2868
2869 ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
2870
2871 ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN32), MakeU32x4(fp_arg_two, 0, 0, 0));
2872 ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN32), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2873 ASSERT_EQ(AsmFminnm(kQuietNaN32, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2874 ASSERT_EQ(AsmFminnm(kQuietNaN32, fp_arg_minus_two), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2875 }
2876
TEST(Arm64InsnTest,MinNumberFp64)2877 TEST(Arm64InsnTest, MinNumberFp64) {
2878 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %d0, %d1, %d2");
2879 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2880 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2881 uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2882
2883 ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
2884
2885 ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN64), MakeUInt128(fp_arg_two, 0U));
2886 ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN64), MakeUInt128(fp_arg_minus_two, 0));
2887 ASSERT_EQ(AsmFminnm(kQuietNaN64, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2888 ASSERT_EQ(AsmFminnm(kQuietNaN64, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2889 }
2890
TEST(Arm64InsnTest,MaxNumberF32x4)2891 TEST(Arm64InsnTest, MaxNumberF32x4) {
2892 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.4s, %1.4s, %2.4s");
2893 __uint128_t arg1 = MakeF32x4(-1.0f, 2.0f, 3.0f, -4.0f);
2894 __uint128_t arg2 = MakeF32x4(2.0f, 1.0f, -3.0f, -3.0f);
2895 ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF32x4(2.0f, 2.0f, 3.0f, -3.0f));
2896
2897 __uint128_t arg3 =
2898 MakeU32x4(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kQuietNaN32, kQuietNaN32);
2899 __uint128_t arg4 =
2900 MakeU32x4(kQuietNaN32, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2901 ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2902
2903 __uint128_t arg5 = MakeU32x4(
2904 bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kSignalingNaN32_1, kQuietNaN32);
2905 __uint128_t arg6 = MakeU32x4(
2906 kSignalingNaN32_1, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2907 }
2908
TEST(Arm64InsnTest,MaxNumberF64x2)2909 TEST(Arm64InsnTest, MaxNumberF64x2) {
2910 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.2d, %1.2d, %2.2d");
2911 __uint128_t arg1 = MakeF64x2(-1.0, -4.0);
2912 __uint128_t arg2 = MakeF64x2(2.0, -3.0);
2913 ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF64x2(2.0, -3.0));
2914
2915 __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64);
2916 __uint128_t arg4 = MakeUInt128(kQuietNaN64, bit_cast<uint64_t>(-1.0));
2917 ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF64x2(1.0, -1.0));
2918 }
2919
TEST(Arm64InsnTest,MinNumberF32x4)2920 TEST(Arm64InsnTest, MinNumberF32x4) {
2921 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.4s, %1.4s, %2.4s");
2922 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
2923 __uint128_t arg2 = MakeF32x4(-0.0f, 1.0f, -3.0f, -3.0f);
2924 ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF32x4(-0.0f, 1.0f, -3.0f, -4.0f));
2925
2926 __uint128_t arg3 =
2927 MakeU32x4(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kQuietNaN32, kQuietNaN32);
2928 __uint128_t arg4 =
2929 MakeU32x4(kQuietNaN32, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2930 __uint128_t res = AsmFminnm(arg3, arg4);
2931 ASSERT_EQ(res, MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2932 }
2933
TEST(Arm64InsnTest,MinNumberF64x2)2934 TEST(Arm64InsnTest, MinNumberF64x2) {
2935 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.2d, %1.2d, %2.2d");
2936 __uint128_t arg1 = MakeF64x2(0.0, 3.0);
2937 __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
2938 ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF64x2(-0.0, -3.0));
2939
2940 __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64);
2941 __uint128_t arg4 = MakeUInt128(kQuietNaN64, bit_cast<uint64_t>(-1.0));
2942 __uint128_t res = AsmFminnm(arg3, arg4);
2943 ASSERT_EQ(res, MakeF64x2(1.0, -1.0));
2944 }
2945
TEST(Arm64InsnTest,MinFp32)2946 TEST(Arm64InsnTest, MinFp32) {
2947 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %s0, %s1, %s2");
2948 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2949 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2950
2951 ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
2952 ASSERT_EQ(AsmFmin(kDefaultNaN32, fp_arg_three), kDefaultNaN32);
2953 ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN32), kDefaultNaN32);
2954 }
2955
TEST(Arm64InsnTest,MinFp64)2956 TEST(Arm64InsnTest, MinFp64) {
2957 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %d0, %d1, %d2");
2958 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2959 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2960
2961 ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
2962 ASSERT_EQ(AsmFmin(kDefaultNaN64, fp_arg_three), kDefaultNaN64);
2963 ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN64), kDefaultNaN64);
2964 }
2965
TEST(Arm64InsnTest,MinF32x4)2966 TEST(Arm64InsnTest, MinF32x4) {
2967 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.4s, %1.4s, %2.4s");
2968 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
2969 __uint128_t arg2 = MakeF32x4(-0.0f, 1.0f, -3.0f, -3.0f);
2970 ASSERT_EQ(AsmFmin(arg1, arg2), MakeF32x4(-0.0f, 1.0f, -3.0f, -4.0f));
2971
2972 __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, -4.0f);
2973 __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32));
2974 ASSERT_EQ(
2975 AsmFmin(arg3, arg4),
2976 MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), -3.0f, bit_cast<float>(kDefaultNaN32)));
2977 }
2978
TEST(Arm64InsnTest,MinF64x2)2979 TEST(Arm64InsnTest, MinF64x2) {
2980 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.2d, %1.2d, %2.2d");
2981 __uint128_t arg1 = MakeF64x2(0.0, 3.0);
2982 __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
2983 ASSERT_EQ(AsmFmin(arg1, arg2), MakeF64x2(-0.0, -3.0));
2984
2985 __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64), 3.0);
2986 __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64));
2987 ASSERT_EQ(AsmFmin(arg3, arg4),
2988 MakeF64x2(bit_cast<double>(kDefaultNaN64), bit_cast<double>(kDefaultNaN64)));
2989 }
2990
TEST(Arm64InsnTest,MaxPairwiseF32Scalar)2991 TEST(Arm64InsnTest, MaxPairwiseF32Scalar) {
2992 constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxp %s0, %1.2s");
2993 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2994 ASSERT_EQ(AsmFmaxp(arg1), bit_cast<uint32_t>(2.0f));
2995
2996 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, -0.0f);
2997 ASSERT_EQ(AsmFmaxp(arg2), kDefaultNaN32);
2998 }
2999
TEST(Arm64InsnTest,MaxPairwiseF32x4)3000 TEST(Arm64InsnTest, MaxPairwiseF32x4) {
3001 constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxp %0.4s, %1.4s, %2.4s");
3002 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3003 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3004 ASSERT_EQ(AsmFmaxp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3005
3006 __uint128_t arg3 =
3007 MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32));
3008 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3009 ASSERT_EQ(AsmFmaxp(arg3, arg4),
3010 MakeF32x4(bit_cast<float>(kDefaultNaN32), bit_cast<float>(kDefaultNaN32), 6.0f, 5.0f));
3011 }
3012
TEST(Arm64InsnTest,MinPairwiseF32Scalar)3013 TEST(Arm64InsnTest, MinPairwiseF32Scalar) {
3014 constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminp %s0, %1.2s");
3015 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3016 ASSERT_EQ(AsmFminp(arg1), bit_cast<uint32_t>(-3.0f));
3017
3018 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, -0.0f);
3019 ASSERT_EQ(AsmFminp(arg2), kDefaultNaN32);
3020 }
3021
TEST(Arm64InsnTest,MinPairwiseF32x4)3022 TEST(Arm64InsnTest, MinPairwiseF32x4) {
3023 constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminp %0.4s, %1.4s, %2.4s");
3024 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3025 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3026 ASSERT_EQ(AsmFminp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
3027
3028 __uint128_t arg3 =
3029 MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32));
3030 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3031 ASSERT_EQ(AsmFminp(arg3, arg4),
3032 MakeF32x4(bit_cast<float>(kDefaultNaN32), bit_cast<float>(kDefaultNaN32), 1.0f, -8.0f));
3033 }
3034
TEST(Arm64InsnTest,MaxPairwiseNumberF32Scalar)3035 TEST(Arm64InsnTest, MaxPairwiseNumberF32Scalar) {
3036 constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmp %s0, %1.2s");
3037 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3038 ASSERT_EQ(AsmFmaxnmp(arg1), bit_cast<uint32_t>(2.0f));
3039
3040 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, -0.0f);
3041 ASSERT_EQ(AsmFmaxnmp(arg2), bit_cast<uint32_t>(2.0f));
3042 }
3043
TEST(Arm64InsnTest,MaxPairwiseNumberF32x4)3044 TEST(Arm64InsnTest, MaxPairwiseNumberF32x4) {
3045 constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnmp %0.4s, %1.4s, %2.4s");
3046 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3047 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3048 ASSERT_EQ(AsmFmaxnmp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3049
3050 __uint128_t arg3 =
3051 MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32));
3052 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3053 ASSERT_EQ(AsmFmaxnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3054 }
3055
TEST(Arm64InsnTest,MinPairwiseNumberF32Scalar)3056 TEST(Arm64InsnTest, MinPairwiseNumberF32Scalar) {
3057 constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmp %s0, %1.2s");
3058 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3059 ASSERT_EQ(AsmFminnmp(arg1), bit_cast<uint32_t>(-3.0f));
3060
3061 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, -0.0f);
3062 ASSERT_EQ(AsmFminnmp(arg2), bit_cast<uint32_t>(2.0f));
3063 }
3064
TEST(Arm64InsnTest,MinPairwiseNumberF32x4)3065 TEST(Arm64InsnTest, MinPairwiseNumberF32x4) {
3066 constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnmp %0.4s, %1.4s, %2.4s");
3067 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3068 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3069 ASSERT_EQ(AsmFminnmp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
3070
3071 __uint128_t arg3 =
3072 MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32));
3073 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3074 ASSERT_EQ(AsmFminnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 1.0f, -8.0f));
3075 }
3076
TEST(Arm64InsnTest,MaxAcrossF32x4)3077 TEST(Arm64InsnTest, MaxAcrossF32x4) {
3078 constexpr auto AsmFmaxv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxv %s0, %1.4s");
3079 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3080 ASSERT_EQ(AsmFmaxv(arg1), bit_cast<uint32_t>(3.0f));
3081
3082 __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32), -4.0f);
3083 ASSERT_EQ(AsmFmaxv(arg2), kDefaultNaN32);
3084 }
3085
TEST(Arm64InsnTest,MinAcrossF32x4)3086 TEST(Arm64InsnTest, MinAcrossF32x4) {
3087 constexpr auto AsmFminv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminv %s0, %1.4s");
3088 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3089 ASSERT_EQ(AsmFminv(arg1), bit_cast<uint32_t>(-4.0f));
3090
3091 __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32), -4.0f);
3092 ASSERT_EQ(AsmFminv(arg2), kDefaultNaN32);
3093 }
3094
TEST(Arm64InsnTest,MaxNumberAcrossF32x4)3095 TEST(Arm64InsnTest, MaxNumberAcrossF32x4) {
3096 constexpr auto AsmFmaxnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmv %s0, %1.4s");
3097 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3098 ASSERT_EQ(AsmFmaxnmv(arg1), bit_cast<uint32_t>(3.0f));
3099
3100 __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32), 3.0f, -4.0f);
3101 ASSERT_EQ(AsmFmaxnmv(arg2), bit_cast<uint32_t>(3.0f));
3102 }
3103
TEST(Arm64InsnTest,MinNumberAcrossF32x4)3104 TEST(Arm64InsnTest, MinNumberAcrossF32x4) {
3105 constexpr auto AsmFminnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmv %s0, %1.4s");
3106 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3107 ASSERT_EQ(AsmFminnmv(arg1), bit_cast<uint32_t>(-4.0f));
3108
3109 __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32), 3.0f, -4.0f);
3110 ASSERT_EQ(AsmFminnmv(arg2), bit_cast<uint32_t>(-4.0f));
3111 }
3112
TEST(Arm64InsnTest,MulFp32)3113 TEST(Arm64InsnTest, MulFp32) {
3114 uint64_t fp_arg1 = 0x40a1999aULL; // 5.05 in float
3115 uint64_t fp_arg2 = 0x40dae148ULL; // 6.84 in float
3116 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3117 ASSERT_EQ(rd, MakeUInt128(0x420a2b03ULL, 0U)); // 34.5420 in float
3118 }
3119
TEST(Arm64InsnTest,MulFp64)3120 TEST(Arm64InsnTest, MulFp64) {
3121 uint64_t fp_arg1 = 0x40226b851eb851ecULL; // 9.21 in double
3122 uint64_t fp_arg2 = 0x4020c7ae147ae148ULL; // 8.39 in double
3123 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3124 ASSERT_EQ(rd, MakeUInt128(0x40535166cf41f214ULL, 0U)); // 77.2719 in double
3125 }
3126
TEST(Arm64InsnTest,MulF32x4)3127 TEST(Arm64InsnTest, MulF32x4) {
3128 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.4s");
3129 __uint128_t arg1 = MakeF32x4(1.0f, -2.0f, 3.0f, -4.0f);
3130 __uint128_t arg2 = MakeF32x4(-3.0f, -1.0f, 4.0f, 1.0f);
3131 ASSERT_EQ(AsmFmul(arg1, arg2), MakeF32x4(-3.0f, 2.0f, 12.0f, -4.0f));
3132 }
3133
TEST(Arm64InsnTest,MulF64x2)3134 TEST(Arm64InsnTest, MulF64x2) {
3135 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.2d");
3136 __uint128_t arg1 = MakeF64x2(-4.0, 2.0);
3137 __uint128_t arg2 = MakeF64x2(2.0, 3.0);
3138 ASSERT_EQ(AsmFmul(arg1, arg2), MakeF64x2(-8.0, 6.0));
3139 }
3140
TEST(Arm64InsnTest,MulF32x4ByScalar)3141 TEST(Arm64InsnTest, MulF32x4ByScalar) {
3142 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
3143 __uint128_t arg2 = MakeF32x4(6.0f, 7.0f, 8.0f, 9.0f);
3144 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.s[3]")(arg1, arg2);
3145 ASSERT_EQ(res, MakeF32x4(18.0f, 27.0f, 36.0f, 45.0f));
3146 }
3147
TEST(Arm64InsnTest,MulF64x2ByScalar)3148 TEST(Arm64InsnTest, MulF64x2ByScalar) {
3149 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3150 __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3151 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.d[1]")(arg1, arg2);
3152 ASSERT_EQ(res, MakeF64x2(8.0, 12.0));
3153 }
3154
TEST(Arm64InsnTest,MulF32IndexedElem)3155 TEST(Arm64InsnTest, MulF32IndexedElem) {
3156 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %2.s[2]");
3157 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3158 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3159 ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint32_t>(34.0f));
3160 }
3161
TEST(Arm64InsnTest,MulF64IndexedElem)3162 TEST(Arm64InsnTest, MulF64IndexedElem) {
3163 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %2.d[1]");
3164 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3165 __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3166 ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint64_t>(8.0));
3167 }
3168
TEST(Arm64InsnTest,MulExtendedF32)3169 TEST(Arm64InsnTest, MulExtendedF32) {
3170 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %s2");
3171 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3172 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3173 ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(22.0f));
3174 }
3175
TEST(Arm64InsnTest,MulExtendedF32x4)3176 TEST(Arm64InsnTest, MulExtendedF32x4) {
3177 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.4s");
3178 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3179 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3180 ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(22.0f, 39.0f, 85.0f, 133.0f));
3181 }
3182
TEST(Arm64InsnTest,MulExtendedF32IndexedElem)3183 TEST(Arm64InsnTest, MulExtendedF32IndexedElem) {
3184 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %2.s[2]");
3185 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3186 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3187 ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(34.0f));
3188 }
3189
TEST(Arm64InsnTest,MulExtendedF64IndexedElem)3190 TEST(Arm64InsnTest, MulExtendedF64IndexedElem) {
3191 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %d0, %d1, %2.d[1]");
3192 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3193 __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3194 ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint64_t>(8.0));
3195 }
3196
TEST(Arm64InsnTest,MulExtendedF32x4IndexedElem)3197 TEST(Arm64InsnTest, MulExtendedF32x4IndexedElem) {
3198 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.s[2]");
3199 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3200 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3201 ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(34.0f, 51.0f, 85.0f, 119.0f));
3202 }
3203
TEST(Arm64InsnTest,MulNegFp32)3204 TEST(Arm64InsnTest, MulNegFp32) {
3205 uint64_t fp_arg1 = bit_cast<uint32_t>(2.0f);
3206 uint64_t fp_arg2 = bit_cast<uint32_t>(3.0f);
3207 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3208 ASSERT_EQ(rd, MakeUInt128(bit_cast<uint32_t>(-6.0f), 0U));
3209 }
3210
TEST(Arm64InsnTest,MulNegFp64)3211 TEST(Arm64InsnTest, MulNegFp64) {
3212 uint64_t fp_arg1 = bit_cast<uint64_t>(2.0);
3213 uint64_t fp_arg2 = bit_cast<uint64_t>(3.0);
3214 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3215 ASSERT_EQ(rd, MakeUInt128(bit_cast<uint64_t>(-6.0), 0U));
3216 }
3217
TEST(Arm64InsnTest,DivFp32)3218 TEST(Arm64InsnTest, DivFp32) {
3219 constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %s0, %s1, %s2");
3220
3221 uint32_t arg1 = 0x40c23d71U; // 6.07 in float
3222 uint32_t arg2 = 0x401a3d71U; // 2.41 in float
3223 ASSERT_EQ(AsmFdiv(arg1, arg2), MakeUInt128(0x402131edULL, 0U)); // 2.5186722 in float
3224
3225 // Make sure that FDIV can produce a denormal result under the default FPCR,
3226 // where the FZ bit (flush-to-zero) is off.
3227 uint32_t arg3 = 0xa876eff9U; // exponent (without offset) = -47
3228 uint32_t arg4 = 0xe7d86b60U; // exponent (without offset) = 80
3229 ASSERT_EQ(AsmFdiv(arg3, arg4), MakeUInt128(0x0049065cULL, 0U)); // denormal
3230 }
3231
TEST(Arm64InsnTest,DivFp64)3232 TEST(Arm64InsnTest, DivFp64) {
3233 uint64_t fp_arg1 = 0x401e5c28f5c28f5cULL; // 7.59 in double
3234 uint64_t fp_arg2 = 0x3ff28f5c28f5c28fULL; // 1.16 in double
3235 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %d0, %d1, %d2")(fp_arg1, fp_arg2);
3236 ASSERT_EQ(rd, MakeUInt128(0x401a2c234f72c235ULL, 0U)); // 6.5431034482758620995923593 in double
3237 }
3238
TEST(Arm64InsnTest,DivFp32_FlagsWhenDivByZero)3239 TEST(Arm64InsnTest, DivFp32_FlagsWhenDivByZero) {
3240 uint64_t fpsr;
3241 volatile float dividend = 123.0f;
3242 volatile float divisor = 0.0f;
3243 float res;
3244 asm volatile(
3245 "msr fpsr, xzr\n\t"
3246 "fdiv %s1, %s2, %s3\n\t"
3247 "mrs %0, fpsr"
3248 : "=r"(fpsr), "=w"(res)
3249 : "w"(dividend), "w"(divisor));
3250 ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3251
3252 // Previous bug caused IOC to be set upon scalar div by zero.
3253 ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3254 }
3255
TEST(Arm64InsnTest,DivFp64_FlagsWhenDivByZero)3256 TEST(Arm64InsnTest, DivFp64_FlagsWhenDivByZero) {
3257 uint64_t fpsr;
3258 double res;
3259 asm volatile(
3260 "msr fpsr, xzr\n\t"
3261 "fdiv %d1, %d2, %d3\n\t"
3262 "mrs %0, fpsr"
3263 : "=r"(fpsr), "=w"(res)
3264 : "w"(123.0), "w"(0.0));
3265 ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3266
3267 // Previous bug caused IOC to be set upon scalar div by zero.
3268 ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3269 }
3270
TEST(Arm64InsnTest,DivFp32x4)3271 TEST(Arm64InsnTest, DivFp32x4) {
3272 constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.4s, %1.4s, %2.4s");
3273
3274 // 16.39, 80.286, 41.16, 98.01
3275 __uint128_t arg1 = MakeUInt128(0x41831eb842a0926fULL, 0x4224a3d742c4051fULL);
3276 // 13.3, 45.45, 7.89, -2.63
3277 __uint128_t arg2 = MakeUInt128(0x4154cccd4235cccdULL, 0x40fc7ae1c02851ecULL);
3278 __uint128_t res1 = AsmFdiv(arg1, arg2);
3279 // 1.2323308, 1.7664686, 5.21673, -37.26616
3280 ASSERT_EQ(res1, MakeUInt128(0x3f9dbd043fe21ba5ULL, 0x40a6ef74c215108cULL));
3281
3282 // Verify that fdiv produces a denormal result under the default FPCR.
3283 __uint128_t arg3 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.eddff2p-47f);
3284 __uint128_t arg4 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.b0d6c0p80f);
3285 __uint128_t res2 = AsmFdiv(arg3, arg4);
3286 __uint128_t expected2 = MakeF32x4(1.0f, 1.0f, 1.0f, 0x0.920cb8p-126f);
3287 ASSERT_EQ(res2, expected2);
3288 }
3289
TEST(Arm64InsnTest,DivFp64x2)3290 TEST(Arm64InsnTest, DivFp64x2) {
3291 // 6.23, 65.02
3292 __uint128_t arg1 = MakeUInt128(0x4018EB851EB851ECULL, 0x40504147AE147AE1ULL);
3293 // -7.54, 11.92
3294 __uint128_t arg2 = MakeUInt128(0xC01E28F5C28F5C29ULL, 0x4027D70A3D70A3D7ULL);
3295 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.2d, %1.2d, %2.2d")(arg1, arg2);
3296 // -0.82625994695, 5.45469798658
3297 ASSERT_EQ(res, MakeUInt128(0xbfea70b8b3449564ULL, 0x4015d19c59579fc9ULL));
3298 }
3299
TEST(Arm64InsnTest,MulAddFp32)3300 TEST(Arm64InsnTest, MulAddFp32) {
3301 constexpr auto AsmFmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %s0, %s1, %s2, %s3");
3302
3303 __uint128_t res1 =
3304 AsmFmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3305 ASSERT_EQ(res1, MakeF32x4(11.0f, 0, 0, 0));
3306
3307 __uint128_t res2 =
3308 AsmFmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3309 ASSERT_EQ(res2, MakeF32x4(0, 0, 0, 0));
3310
3311 // These tests verify that fmadd does not lose precision while doing the mult + add.
3312 __uint128_t res3 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3313 bit_cast<uint32_t>(0x1.000002p0f),
3314 bit_cast<uint32_t>(-0x1.p23f));
3315 ASSERT_EQ(res3, MakeF32x4(0x1.fffffcp-2f, 0, 0, 0));
3316
3317 __uint128_t res4 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3318 bit_cast<uint32_t>(0x1.000002p0f),
3319 bit_cast<uint32_t>(-0x1.fffffep22f));
3320 ASSERT_EQ(res4, MakeF32x4(0x1.fffffep-1f, 0, 0, 0));
3321
3322 __uint128_t res5 = AsmFmadd(bit_cast<uint32_t>(0x1.p23f),
3323 bit_cast<uint32_t>(0x1.fffffep-1f),
3324 bit_cast<uint32_t>(-0x1.000002p23f));
3325 ASSERT_EQ(res5, MakeF32x4(-0x1.80p0f, 0, 0, 0));
3326 }
3327
TEST(Arm64InsnTest,MulAddFp64)3328 TEST(Arm64InsnTest, MulAddFp64) {
3329 uint64_t arg1 = 0x40323d70a3d70a3dULL; // 18.24
3330 uint64_t arg2 = 0x40504147ae147ae1ULL; // 65.02
3331 uint64_t arg3 = 0x4027d70a3d70a3d7ULL; // 11.92
3332 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3333 ASSERT_EQ(res1, MakeUInt128(0x4092b78a0902de00ULL, 0U)); // 1197.8848
3334 __uint128_t res2 =
3335 ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3336 ASSERT_EQ(res2, MakeUInt128(0xc092b78a0902de00ULL, 0U)); // -1197.8848
3337 }
3338
TEST(Arm64InsnTest,MulAddFp64Precision)3339 TEST(Arm64InsnTest, MulAddFp64Precision) {
3340 uint64_t arg1 = bit_cast<uint64_t>(0x1.0p1023);
3341 uint64_t arg2 = bit_cast<uint64_t>(0x1.0p-1);
3342 uint64_t arg3 = bit_cast<uint64_t>(0x1.fffffffffffffp1022);
3343 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3344 ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3345 }
3346
TEST(Arm64InsnTest,NegMulAddFp32)3347 TEST(Arm64InsnTest, NegMulAddFp32) {
3348 constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %s0, %s1, %s2, %s3");
3349
3350 __uint128_t res1 =
3351 AsmFnmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3352 ASSERT_EQ(res1, MakeF32x4(-11.0f, 0, 0, 0));
3353
3354 // No -0 (proper negation)
3355 __uint128_t res2 =
3356 AsmFnmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3357 ASSERT_EQ(res2, MakeF32x4(0.0f, 0, 0, 0));
3358
3359 // These tests verify that fmadd does not lose precision while doing the mult + add.
3360 __uint128_t res3 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3361 bit_cast<uint32_t>(0x1.000002p0f),
3362 bit_cast<uint32_t>(-0x1.p23f));
3363 ASSERT_EQ(res3, MakeF32x4(-0x1.fffffcp-2f, 0, 0, 0));
3364
3365 __uint128_t res4 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3366 bit_cast<uint32_t>(0x1.000002p0f),
3367 bit_cast<uint32_t>(-0x1.fffffep22f));
3368 ASSERT_EQ(res4, MakeF32x4(-0x1.fffffep-1f, 0, 0, 0));
3369
3370 __uint128_t res5 = AsmFnmadd(bit_cast<uint32_t>(0x1.p23f),
3371 bit_cast<uint32_t>(0x1.fffffep-1f),
3372 bit_cast<uint32_t>(-0x1.000002p23f));
3373 ASSERT_EQ(res5, MakeF32x4(0x1.80p0f, 0, 0, 0));
3374 }
3375
TEST(Arm64InsnTest,NegMulAddFp64)3376 TEST(Arm64InsnTest, NegMulAddFp64) {
3377 constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3");
3378
3379 __uint128_t res1 =
3380 AsmFnmadd(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3381 ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3382
3383 // Proper negation (no -0 in this case)
3384 __uint128_t res2 =
3385 AsmFnmadd(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(-5.0));
3386 ASSERT_EQ(res2, MakeF64x2(0.0, 0));
3387 }
3388
TEST(Arm64InsnTest,NegMulSubFp64)3389 TEST(Arm64InsnTest, NegMulSubFp64) {
3390 constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3391
3392 __uint128_t res1 =
3393 AsmFnmsub(bit_cast<uint64_t>(-2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3394 ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3395
3396 uint64_t arg1 = 0x40357ae147ae147bULL; // 21.48
3397 uint64_t arg2 = 0x404ce3d70a3d70a4ull; // 57.78
3398 uint64_t arg3 = 0x405e29999999999aULL; // 120.65
3399 __uint128_t res2 = AsmFnmsub(arg1, arg2, arg3);
3400 ASSERT_EQ(res2, MakeUInt128(0x409181db8bac710dULL, 0U)); // 1120.4644
3401
3402 // Assert no -0 in this case
3403 __uint128_t res3 =
3404 AsmFnmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3405 ASSERT_EQ(res3, MakeF64x2(0.0, 0));
3406 }
3407
TEST(Arm64InsnTest,NegMulSubFp64Precision)3408 TEST(Arm64InsnTest, NegMulSubFp64Precision) {
3409 constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3410
3411 __uint128_t res = AsmFnmsub(bit_cast<uint64_t>(0x1.0p1023),
3412 bit_cast<uint64_t>(0x1.0p-1),
3413 bit_cast<uint64_t>(-0x1.fffffffffffffp1022));
3414 ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3415 }
3416
TEST(Arm64InsnTest,MulAddF32x4)3417 TEST(Arm64InsnTest, MulAddF32x4) {
3418 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.4s");
3419 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3420 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3421 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3422 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(5.0f, 5.0f, 9.0f, 14.0f));
3423 }
3424
TEST(Arm64InsnTest,MulAddF32IndexedElem)3425 TEST(Arm64InsnTest, MulAddF32IndexedElem) {
3426 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %s0, %s1, %2.s[2]");
3427 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3428 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3429 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3430 // 2 + (1 * 2)
3431 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3432 }
3433
TEST(Arm64InsnTest,MulAddF64IndexedElem)3434 TEST(Arm64InsnTest, MulAddF64IndexedElem) {
3435 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %d0, %d1, %2.d[1]");
3436 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3437 __uint128_t arg2 = MakeF64x2(4.0, 5.0);
3438 __uint128_t arg3 = MakeF64x2(6.0, 7.0);
3439 // 6 + (2 * 5)
3440 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint64_t>(16.0));
3441 }
3442
TEST(Arm64InsnTest,MulAddF64x2)3443 TEST(Arm64InsnTest, MulAddF64x2) {
3444 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.2d, %1.2d, %2.2d");
3445 __uint128_t arg1 = MakeF64x2(1.0f, 2.0f);
3446 __uint128_t arg2 = MakeF64x2(3.0f, 1.0f);
3447 __uint128_t arg3 = MakeF64x2(2.0f, 3.0f);
3448 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF64x2(5.0f, 5.0f));
3449 }
3450
TEST(Arm64InsnTest,MulAddF32x4IndexedElem)3451 TEST(Arm64InsnTest, MulAddF32x4IndexedElem) {
3452 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.s[2]");
3453 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3454 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3455 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3456 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(4.0f, 7.0f, 9.0f, 8.0f));
3457 }
3458
TEST(Arm64InsnTest,MulSubFp32)3459 TEST(Arm64InsnTest, MulSubFp32) {
3460 uint32_t arg1 = bit_cast<uint32_t>(2.0f);
3461 uint32_t arg2 = bit_cast<uint32_t>(5.0f);
3462 uint32_t arg3 = bit_cast<uint32_t>(3.0f);
3463 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3464 ASSERT_EQ(res1, MakeUInt128(bit_cast<uint32_t>(-7.0f), 0U));
3465 __uint128_t res2 =
3466 ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3467 ASSERT_EQ(res2, MakeUInt128(bit_cast<uint32_t>(7.0f), 0U));
3468 }
3469
TEST(Arm64InsnTest,MulSubFp64)3470 TEST(Arm64InsnTest, MulSubFp64) {
3471 constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3472
3473 uint64_t arg1 = 0x40357ae147ae147bULL; // 21.48
3474 uint64_t arg2 = 0x404ce3d70a3d70a4ull; // 57.78
3475 uint64_t arg3 = 0x405e29999999999aULL; // 120.65
3476 __uint128_t res1 = AsmFmsub(arg1, arg2, arg3);
3477 ASSERT_EQ(res1, MakeUInt128(0xc09181db8bac710dULL, 0U)); // -1120.4644
3478
3479 // Basic case
3480 __uint128_t res3 =
3481 AsmFmsub(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(-5.0));
3482 ASSERT_EQ(res3, MakeF64x2(-11.0, 0));
3483
3484 // No -0 in this case (proper negation order)
3485 __uint128_t res4 =
3486 AsmFmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3487 ASSERT_EQ(res4, MakeF64x2(0.0, 0));
3488 }
3489
TEST(Arm64InsnTest,MulSubFp64Precision)3490 TEST(Arm64InsnTest, MulSubFp64Precision) {
3491 constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3492 __uint128_t res5 = AsmFmsub(bit_cast<uint64_t>(-0x1.0p1023),
3493 bit_cast<uint64_t>(0x1.0p-1),
3494 bit_cast<uint64_t>(0x1.fffffffffffffp1022));
3495 ASSERT_EQ(res5, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3496 }
3497
TEST(Arm64InsnTest,MulSubF32x4)3498 TEST(Arm64InsnTest, MulSubF32x4) {
3499 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.4s");
3500 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3501 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3502 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3503 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(-1.0f, 1.0f, -7.0f, -10.0f));
3504 }
3505
TEST(Arm64InsnTest,MulSubF32IndexedElem)3506 TEST(Arm64InsnTest, MulSubF32IndexedElem) {
3507 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %s0, %s1, %2.s[2]");
3508 __uint128_t arg1 = MakeF32x4(2.0f, 1.0f, 4.0f, 3.0f);
3509 __uint128_t arg2 = MakeF32x4(4.0f, 3.0f, 2.0f, 1.0f);
3510 __uint128_t arg3 = MakeF32x4(8.0f, 3.0f, 1.0f, 2.0f);
3511 // 8 - (2 * 2)
3512 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3513 }
3514
TEST(Arm64InsnTest,MulSubF32x4IndexedElem)3515 TEST(Arm64InsnTest, MulSubF32x4IndexedElem) {
3516 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.s[2]");
3517 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3518 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3519 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3520 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(0.0f, -1.0f, -7.0f, -4.0f));
3521 }
3522
TEST(Arm64InsnTest,MulSubF64x2)3523 TEST(Arm64InsnTest, MulSubF64x2) {
3524 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.2d, %1.2d, %2.2d");
3525 __uint128_t arg1 = MakeF64x2(1.0f, 2.0f);
3526 __uint128_t arg2 = MakeF64x2(3.0f, 1.0f);
3527 __uint128_t arg3 = MakeF64x2(2.0f, 3.0f);
3528 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF64x2(-1.0f, 1.0f));
3529 }
3530
TEST(Arm64InsnTest,MulSubF64IndexedElem)3531 TEST(Arm64InsnTest, MulSubF64IndexedElem) {
3532 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %d0, %d1, %2.d[1]");
3533 __uint128_t arg1 = MakeF64x2(2.0, 5.0);
3534 __uint128_t arg2 = MakeF64x2(4.0, 1.0);
3535 __uint128_t arg3 = MakeF64x2(6.0, 7.0f);
3536 // 6 - (2 * 1)
3537 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint64_t>(4.0));
3538 }
3539
TEST(Arm64InsnTest,CompareEqualF32)3540 TEST(Arm64InsnTest, CompareEqualF32) {
3541 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %s0, %s1, %s2");
3542 uint32_t two = bit_cast<uint32_t>(2.0f);
3543 uint32_t six = bit_cast<uint32_t>(6.0f);
3544 ASSERT_EQ(AsmFcmeq(two, six), 0x00000000ULL);
3545 ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffULL);
3546 ASSERT_EQ(AsmFcmeq(kDefaultNaN32, two), 0x00000000ULL);
3547 ASSERT_EQ(AsmFcmeq(two, kDefaultNaN32), 0x00000000ULL);
3548 }
3549
TEST(Arm64InsnTest,CompareEqualF32x4)3550 TEST(Arm64InsnTest, CompareEqualF32x4) {
3551 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.4s, %1.4s, %2.4s");
3552 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3553 __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3554 __uint128_t res = AsmFcmeq(arg1, arg2);
3555 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3556 }
3557
TEST(Arm64InsnTest,CompareGreaterEqualF32)3558 TEST(Arm64InsnTest, CompareGreaterEqualF32) {
3559 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %s0, %s1, %s2");
3560 uint32_t two = bit_cast<uint32_t>(2.0f);
3561 uint32_t six = bit_cast<uint32_t>(6.0f);
3562 ASSERT_EQ(AsmFcmge(two, six), 0x00000000ULL);
3563 ASSERT_EQ(AsmFcmge(two, two), 0xffffffffULL);
3564 ASSERT_EQ(AsmFcmge(six, two), 0xffffffffULL);
3565 ASSERT_EQ(AsmFcmge(kDefaultNaN32, two), 0x00000000ULL);
3566 ASSERT_EQ(AsmFcmge(two, kDefaultNaN32), 0x00000000ULL);
3567 }
3568
TEST(Arm64InsnTest,CompareGreaterEqualF32x4)3569 TEST(Arm64InsnTest, CompareGreaterEqualF32x4) {
3570 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.4s, %1.4s, %2.4s");
3571 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3572 __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3573 __uint128_t res = AsmFcmge(arg1, arg2);
3574 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x00000000ffffffffULL));
3575 }
3576
TEST(Arm64InsnTest,CompareGreaterF32)3577 TEST(Arm64InsnTest, CompareGreaterF32) {
3578 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %s0, %s1, %s2");
3579 uint32_t two = bit_cast<uint32_t>(2.0f);
3580 uint32_t six = bit_cast<uint32_t>(6.0f);
3581 ASSERT_EQ(AsmFcmgt(two, six), 0x00000000ULL);
3582 ASSERT_EQ(AsmFcmgt(two, two), 0x00000000ULL);
3583 ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffULL);
3584 ASSERT_EQ(AsmFcmgt(kDefaultNaN32, two), 0x00000000ULL);
3585 ASSERT_EQ(AsmFcmgt(two, kDefaultNaN32), 0x00000000ULL);
3586 }
3587
TEST(Arm64InsnTest,CompareGreaterF32x4)3588 TEST(Arm64InsnTest, CompareGreaterF32x4) {
3589 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.4s, %1.4s, %2.4s");
3590 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3591 __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3592 __uint128_t res = AsmFcmgt(arg1, arg2);
3593 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x00000000ffffffffULL));
3594 }
3595
TEST(Arm64InsnTest,CompareEqualZeroF32)3596 TEST(Arm64InsnTest, CompareEqualZeroF32) {
3597 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %s0, %s1, #0");
3598 ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3599 ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(4.0f)), 0x00000000ULL);
3600 }
3601
TEST(Arm64InsnTest,CompareEqualZeroF32x4)3602 TEST(Arm64InsnTest, CompareEqualZeroF32x4) {
3603 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %0.4s, %1.4s, #0");
3604 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3605 __uint128_t res = AsmFcmeq(arg);
3606 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3607 }
3608
TEST(Arm64InsnTest,CompareGreaterThanZeroF32)3609 TEST(Arm64InsnTest, CompareGreaterThanZeroF32) {
3610 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %s0, %s1, #0");
3611 ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3612 ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3613 ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3614 }
3615
TEST(Arm64InsnTest,CompareGreaterThanZeroF32x4)3616 TEST(Arm64InsnTest, CompareGreaterThanZeroF32x4) {
3617 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %0.4s, %1.4s, #0");
3618 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3619 __uint128_t res = AsmFcmgt(arg);
3620 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3621 }
3622
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32)3623 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32) {
3624 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %s0, %s1, #0");
3625 ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3626 ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3627 ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3628 }
3629
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32x4)3630 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32x4) {
3631 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %0.4s, %1.4s, #0");
3632 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3633 __uint128_t res = AsmFcmge(arg);
3634 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0xffffffffffffffffULL));
3635 }
3636
TEST(Arm64InsnTest,CompareLessThanZeroF32)3637 TEST(Arm64InsnTest, CompareLessThanZeroF32) {
3638 constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %s0, %s1, #0");
3639 ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3640 ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3641 ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3642 }
3643
TEST(Arm64InsnTest,CompareLessThanZeroF32x4)3644 TEST(Arm64InsnTest, CompareLessThanZeroF32x4) {
3645 constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %0.4s, %1.4s, #0");
3646 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3647 __uint128_t res = AsmFcmlt(arg);
3648 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
3649 }
3650
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32)3651 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32) {
3652 constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %s0, %s1, #0");
3653 ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3654 ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3655 ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3656 }
3657
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32x4)3658 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32x4) {
3659 constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %0.4s, %1.4s, #0");
3660 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3661 __uint128_t res = AsmFcmle(arg);
3662 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3663 }
3664
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32)3665 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32) {
3666 constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %s0, %s1, %s2");
3667 ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3668 ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3669 ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3670 }
3671
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanOrEqualF32)3672 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanOrEqualF32) {
3673 constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %s0, %s1, %s2");
3674 ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3675 ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3676 ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3677 }
3678
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32x4)3679 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32x4) {
3680 constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %0.4s, %1.4s, %2.4s");
3681 __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3682 __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3683 ASSERT_EQ(AsmFacgt(arg1, arg2), MakeUInt128(0x00000000ffffffffULL, 0xffffffff00000000ULL));
3684 }
3685
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanEqualF32x4)3686 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanEqualF32x4) {
3687 constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %0.4s, %1.4s, %2.4s");
3688 __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3689 __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3690 ASSERT_EQ(AsmFacge(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0xffffffff00000000ULL));
3691 }
3692
TEST(Arm64InsnTest,CompareEqualF64)3693 TEST(Arm64InsnTest, CompareEqualF64) {
3694 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %d0, %d1, %d2");
3695 uint64_t two = bit_cast<uint64_t>(2.0);
3696 uint64_t six = bit_cast<uint64_t>(6.0);
3697 ASSERT_EQ(AsmFcmeq(two, six), 0x0000000000000000ULL);
3698 ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffffffffffULL);
3699 ASSERT_EQ(AsmFcmeq(kDefaultNaN64, two), 0x0000000000000000ULL);
3700 ASSERT_EQ(AsmFcmeq(two, kDefaultNaN64), 0x0000000000000000ULL);
3701 }
3702
TEST(Arm64InsnTest,CompareEqualF64x2)3703 TEST(Arm64InsnTest, CompareEqualF64x2) {
3704 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.2d, %1.2d, %2.2d");
3705 __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3706 __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3707 __uint128_t res = AsmFcmeq(arg1, arg2);
3708 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3709 arg1 = MakeF64x2(7.0, -0.0);
3710 arg2 = MakeF64x2(-8.0, 5.0);
3711 res = AsmFcmeq(arg1, arg2);
3712 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3713 }
3714
TEST(Arm64InsnTest,CompareGreaterEqualF64)3715 TEST(Arm64InsnTest, CompareGreaterEqualF64) {
3716 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %d0, %d1, %d2");
3717 uint64_t two = bit_cast<uint64_t>(2.0);
3718 uint64_t six = bit_cast<uint64_t>(6.0);
3719 ASSERT_EQ(AsmFcmge(two, six), 0x0000000000000000ULL);
3720 ASSERT_EQ(AsmFcmge(two, two), 0xffffffffffffffffULL);
3721 ASSERT_EQ(AsmFcmge(six, two), 0xffffffffffffffffULL);
3722 ASSERT_EQ(AsmFcmge(kDefaultNaN64, two), 0x0000000000000000ULL);
3723 ASSERT_EQ(AsmFcmge(two, kDefaultNaN64), 0x0000000000000000ULL);
3724 }
3725
TEST(Arm64InsnTest,CompareGreaterEqualF64x2)3726 TEST(Arm64InsnTest, CompareGreaterEqualF64x2) {
3727 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.2d, %1.2d, %2.2d");
3728 __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3729 __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3730 __uint128_t res = AsmFcmge(arg1, arg2);
3731 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3732 arg1 = MakeF64x2(7.0, -0.0);
3733 arg2 = MakeF64x2(-8.0, 5.0);
3734 res = AsmFcmge(arg1, arg2);
3735 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3736 }
3737
TEST(Arm64InsnTest,CompareGreaterF64)3738 TEST(Arm64InsnTest, CompareGreaterF64) {
3739 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %d0, %d1, %d2");
3740 uint64_t two = bit_cast<uint64_t>(2.0);
3741 uint64_t six = bit_cast<uint64_t>(6.0);
3742 ASSERT_EQ(AsmFcmgt(two, six), 0x0000000000000000ULL);
3743 ASSERT_EQ(AsmFcmgt(two, two), 0x0000000000000000ULL);
3744 ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffffffffffULL);
3745 ASSERT_EQ(AsmFcmgt(kDefaultNaN64, two), 0x0000000000000000ULL);
3746 ASSERT_EQ(AsmFcmgt(two, kDefaultNaN64), 0x0000000000000000ULL);
3747 }
3748
TEST(Arm64InsnTest,CompareGreaterF64x2)3749 TEST(Arm64InsnTest, CompareGreaterF64x2) {
3750 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.2d, %1.2d, %2.2d");
3751 __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3752 __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3753 __uint128_t res = AsmFcmgt(arg1, arg2);
3754 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3755 arg1 = MakeF64x2(7.0, -0.0);
3756 arg2 = MakeF64x2(-8.0, 5.0);
3757 res = AsmFcmgt(arg1, arg2);
3758 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3759 }
3760
TEST(Arm64InsnTest,AndInt8x16)3761 TEST(Arm64InsnTest, AndInt8x16) {
3762 __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3763 __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3764 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.16b, %1.16b, %2.16b")(op1, op2);
3765 ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0x2000004100001060ULL));
3766 }
3767
TEST(Arm64InsnTest,AndInt8x8)3768 TEST(Arm64InsnTest, AndInt8x8) {
3769 __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3770 __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3771 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.8b, %1.8b, %2.8b")(op1, op2);
3772 ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0));
3773 }
3774
TEST(Arm64InsnTest,OrInt8x16)3775 TEST(Arm64InsnTest, OrInt8x16) {
3776 __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3777 __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3778 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.16b, %1.16b, %2.16b")(op1, op2);
3779 ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0x136a56c8136a56c8ULL));
3780 }
3781
TEST(Arm64InsnTest,OrInt8x8)3782 TEST(Arm64InsnTest, OrInt8x8) {
3783 __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3784 __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3785 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.8b, %1.8b, %2.8b")(op1, op2);
3786 ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0));
3787 }
3788
TEST(Arm64InsnTest,XorInt8x16)3789 TEST(Arm64InsnTest, XorInt8x16) {
3790 __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3791 __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3792 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.16b, %1.16b, %2.16b")(op1, op2);
3793 ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0xc6bd2031ce224768ULL));
3794 }
3795
TEST(Arm64InsnTest,XorInt8x8)3796 TEST(Arm64InsnTest, XorInt8x8) {
3797 __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3798 __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3799 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.8b, %1.8b, %2.8b")(op1, op2);
3800 ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0));
3801 }
3802
TEST(Arm64InsnTest,AndNotInt8x16)3803 TEST(Arm64InsnTest, AndNotInt8x16) {
3804 __uint128_t op1 = MakeUInt128(0x0313783875288658ULL, 0x7533208381420617ULL);
3805 __uint128_t op2 = MakeUInt128(0x2327917860857843ULL, 0x8382796797668145ULL);
3806 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.16b, %1.16b, %2.16b")(op1, op2);
3807 ASSERT_EQ(rd, MakeUInt128(0x0010680015288618ULL, 0x7431008000000612ULL));
3808 }
3809
TEST(Arm64InsnTest,AndNotInt8x8)3810 TEST(Arm64InsnTest, AndNotInt8x8) {
3811 __uint128_t op1 = MakeUInt128(0x4861045432664821ULL, 0x2590360011330530ULL);
3812 __uint128_t op2 = MakeUInt128(0x5420199561121290ULL, 0x8572424541506959ULL);
3813 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.8b, %1.8b, %2.8b")(op1, op2);
3814 ASSERT_EQ(rd, MakeUInt128(0x0841044012644821ULL, 0x0000000000000000ULL));
3815 }
3816
TEST(Arm64InsnTest,AndNotInt16x4Imm)3817 TEST(Arm64InsnTest, AndNotInt16x4Imm) {
3818 __uint128_t res = MakeUInt128(0x9690314950191085ULL, 0x7598442391986291ULL);
3819
3820 asm("bic %0.4h, #0x3" : "=w"(res) : "0"(res));
3821
3822 ASSERT_EQ(res, MakeUInt128(0x9690314850181084ULL, 0x0000000000000000ULL));
3823 }
3824
TEST(Arm64InsnTest,AndNotInt16x4ImmShiftedBy8)3825 TEST(Arm64InsnTest, AndNotInt16x4ImmShiftedBy8) {
3826 __uint128_t res = MakeUInt128(0x8354056704038674ULL, 0x3513622224771589ULL);
3827
3828 asm("bic %0.4h, #0xa8, lsl #8" : "=w"(res) : "0"(res));
3829
3830 ASSERT_EQ(res, MakeUInt128(0x0354056704030674ULL, 0x0000000000000000ULL));
3831 }
3832
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy8)3833 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy8) {
3834 __uint128_t res = MakeUInt128(0x1842631298608099ULL, 0x8886874132604721ULL);
3835
3836 asm("bic %0.2s, #0xd3, lsl #8" : "=w"(res) : "0"(res));
3837
3838 ASSERT_EQ(res, MakeUInt128(0x1842201298600099ULL, 0x0000000000000000ULL));
3839 }
3840
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy16)3841 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy16) {
3842 __uint128_t res = MakeUInt128(0x2947867242292465ULL, 0x4366800980676928ULL);
3843
3844 asm("bic %0.2s, #0x22, lsl #16" : "=w"(res) : "0"(res));
3845
3846 ASSERT_EQ(res, MakeUInt128(0x2945867242092465ULL, 0x0000000000000000ULL));
3847 }
3848
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy24)3849 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy24) {
3850 __uint128_t res = MakeUInt128(0x0706977942236250ULL, 0x8221688957383798ULL);
3851
3852 asm("bic %0.2s, #0x83, lsl #24" : "=w"(res) : "0"(res));
3853
3854 ASSERT_EQ(res, MakeUInt128(0x0406977940236250ULL, 0x0000000000000000ULL));
3855 }
3856
TEST(Arm64InsnTest,OrInt16x4Imm)3857 TEST(Arm64InsnTest, OrInt16x4Imm) {
3858 __uint128_t res = MakeUInt128(0x0841284886269456ULL, 0x0424196528502221ULL);
3859
3860 asm("orr %0.4h, #0x5" : "=w"(res) : "0"(res));
3861
3862 ASSERT_EQ(res, MakeUInt128(0x0845284d86279457ULL, 0x0000000000000000ULL));
3863 }
3864
TEST(Arm64InsnTest,OrNotInt8x16)3865 TEST(Arm64InsnTest, OrNotInt8x16) {
3866 __uint128_t op1 = MakeUInt128(0x5428584447952658ULL, 0x6782105114135473ULL);
3867 __uint128_t op2 = MakeUInt128(0x3558764024749647ULL, 0x3263914199272604ULL);
3868 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.16b, %1.16b, %2.16b")(op1, op2);
3869 ASSERT_EQ(rd, MakeUInt128(0xdeafd9ffdf9f6ff8ULL, 0xef9e7eff76dbddfbULL));
3870 }
3871
TEST(Arm64InsnTest,OrNotInt8x8)3872 TEST(Arm64InsnTest, OrNotInt8x8) {
3873 __uint128_t op1 = MakeUInt128(0x3279178608578438ULL, 0x3827967976681454ULL);
3874 __uint128_t op2 = MakeUInt128(0x6838689427741559ULL, 0x9185592524595395ULL);
3875 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.8b, %1.8b, %2.8b")(op1, op2);
3876 ASSERT_EQ(rd, MakeUInt128(0xb7ff97efd8dfeebeULL, 0x0000000000000000ULL));
3877 }
3878
TEST(Arm64InsnTest,BitwiseSelectInt8x8)3879 TEST(Arm64InsnTest, BitwiseSelectInt8x8) {
3880 __uint128_t op1 = MakeUInt128(0x2000568127145263ULL, 0x5608277857713427ULL);
3881 __uint128_t op2 = MakeUInt128(0x0792279689258923ULL, 0x5420199561121290ULL);
3882 __uint128_t op3 = MakeUInt128(0x8372978049951059ULL, 0x7317328160963185ULL);
3883 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bsl %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3884 ASSERT_EQ(res, MakeUInt128(0x0480369681349963ULL, 0x0000000000000000ULL));
3885 }
3886
TEST(Arm64InsnTest,BitwiseInsertIfTrueInt8x8)3887 TEST(Arm64InsnTest, BitwiseInsertIfTrueInt8x8) {
3888 __uint128_t op1 = MakeUInt128(0x3678925903600113ULL, 0x3053054882046652ULL);
3889 __uint128_t op2 = MakeUInt128(0x9326117931051185ULL, 0x4807446237996274ULL);
3890 __uint128_t op3 = MakeUInt128(0x6430860213949463ULL, 0x9522473719070217ULL);
3891 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bit %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3892 ASSERT_EQ(res, MakeUInt128(0x7630965b03908563ULL, 0x0000000000000000ULL));
3893 }
3894
TEST(Arm64InsnTest,BitwiseInsertIfFalseInt8x8)3895 TEST(Arm64InsnTest, BitwiseInsertIfFalseInt8x8) {
3896 __uint128_t op1 = MakeUInt128(0x7067982148086513ULL, 0x2823066470938446ULL);
3897 __uint128_t op2 = MakeUInt128(0x5964462294895493ULL, 0x0381964428810975ULL);
3898 __uint128_t op3 = MakeUInt128(0x0348610454326648ULL, 0x2133936072602491ULL);
3899 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bif %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3900 ASSERT_EQ(res, MakeUInt128(0x2143d8015c006500ULL, 0x0000000000000000ULL));
3901 }
3902
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x1)3903 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x1) {
3904 __uint128_t arg = MakeUInt128(0x9486015046652681ULL, 0x4398770516153170ULL);
3905 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #39")(arg);
3906 ASSERT_EQ(res, MakeUInt128(0xffffffffff290c02ULL, 0x0000000000000000ULL));
3907 }
3908
TEST(Arm64InsnTest,ArithmeticShiftRightBy64Int64x1)3909 TEST(Arm64InsnTest, ArithmeticShiftRightBy64Int64x1) {
3910 __uint128_t arg = MakeUInt128(0x9176042601763387ULL, 0x0454990176143641ULL);
3911 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #64")(arg);
3912 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3913 }
3914
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x2)3915 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x2) {
3916 __uint128_t arg = MakeUInt128(0x7501116498327856ULL, 0x3531614516845769ULL);
3917 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %0.2d, %1.2d, #35")(arg);
3918 ASSERT_EQ(res, MakeUInt128(0x000000000ea0222cULL, 0x0000000006a62c28ULL));
3919 }
3920
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt64x1)3921 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt64x1) {
3922 __uint128_t arg1 = MakeUInt128(0x9667179643468760ULL, 0x0770479995378833ULL);
3923 __uint128_t arg2 = MakeUInt128(0x2557176908196030ULL, 0x9201824018842705ULL);
3924 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #40")(arg1, arg2);
3925 ASSERT_EQ(res, MakeUInt128(0x2557176907afc747ULL, 0x0000000000000000ULL));
3926 }
3927
TEST(Arm64InsnTest,ArithmeticShiftRightBy64AccumulateInt64x1)3928 TEST(Arm64InsnTest, ArithmeticShiftRightBy64AccumulateInt64x1) {
3929 __uint128_t arg1 = MakeUInt128(0x9223343657791601ULL, 0x2809317940171859ULL);
3930 __uint128_t arg2 = MakeUInt128(0x3498025249906698ULL, 0x4233017350358044ULL);
3931 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #64")(arg1, arg2);
3932 ASSERT_EQ(res, MakeUInt128(0x3498025249906697ULL, 0x0000000000000000ULL));
3933 }
3934
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt16x8)3935 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt16x8) {
3936 __uint128_t arg1 = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
3937 __uint128_t arg2 = MakeUInt128(0x0101655256375678ULL, 0x5667227966198857ULL);
3938 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %0.8h, %1.8h, #12")(arg1, arg2);
3939 ASSERT_EQ(res, MakeUInt128(0x00fa6556563a567dULL, 0x5669227b6611885cULL));
3940 }
3941
TEST(Arm64InsnTest,ArithmeticRoundingShiftRightAccumulateInt16x8)3942 TEST(Arm64InsnTest, ArithmeticRoundingShiftRightAccumulateInt16x8) {
3943 __uint128_t arg1 = MakeUInt128(0x9894671543578468ULL, 0x7886144458123145ULL);
3944 __uint128_t arg2 = MakeUInt128(0x1412147805734551ULL, 0x0500801908699603ULL);
3945 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
3946 ASSERT_EQ(res, MakeUInt128(0x140c147e05774549ULL, 0x0508801a086f9606ULL));
3947 }
3948
TEST(Arm64InsnTest,LogicalShiftRightInt64x1)3949 TEST(Arm64InsnTest, LogicalShiftRightInt64x1) {
3950 __uint128_t arg = MakeUInt128(0x9859771921805158ULL, 0x5321473926532515ULL);
3951 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #33")(arg);
3952 ASSERT_EQ(res, MakeUInt128(0x000000004c2cbb8cULL, 0x0000000000000000ULL));
3953 }
3954
TEST(Arm64InsnTest,LogicalShiftRightBy64Int64x1)3955 TEST(Arm64InsnTest, LogicalShiftRightBy64Int64x1) {
3956 __uint128_t arg = MakeUInt128(0x9474696134360928ULL, 0x6148494178501718ULL);
3957 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #64")(arg);
3958 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3959 }
3960
TEST(Arm64InsnTest,LogicalShiftRightInt64x2)3961 TEST(Arm64InsnTest, LogicalShiftRightInt64x2) {
3962 __uint128_t op = MakeUInt128(0x3962657978771855ULL, 0x6084552965412665ULL);
3963 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %0.2d, %1.2d, #33")(op);
3964 ASSERT_EQ(rd, MakeUInt128(0x000000001cb132bcULL, 0x0000000030422a94ULL));
3965 }
3966
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt64x1)3967 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt64x1) {
3968 __uint128_t arg1 = MakeUInt128(0x9004112453790153ULL, 0x3296615697052237ULL);
3969 __uint128_t arg2 = MakeUInt128(0x0499939532215362ULL, 0x2748476603613677ULL);
3970 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #40")(arg1, arg2);
3971 ASSERT_EQ(res, MakeUInt128(0x0499939532b15773ULL, 0x0000000000000000ULL));
3972 }
3973
TEST(Arm64InsnTest,LogicalShiftRightBy64AccumulateInt64x1)3974 TEST(Arm64InsnTest, LogicalShiftRightBy64AccumulateInt64x1) {
3975 __uint128_t arg1 = MakeUInt128(0x9886592578662856ULL, 0x1249665523533829ULL);
3976 __uint128_t arg2 = MakeUInt128(0x3559152534784459ULL, 0x8183134112900199ULL);
3977 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #64")(arg1, arg2);
3978 ASSERT_EQ(res, MakeUInt128(0x3559152534784459ULL, 0x0000000000000000ULL));
3979 }
3980
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt16x8)3981 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt16x8) {
3982 __uint128_t arg1 = MakeUInt128(0x9984345225161050ULL, 0x7027056235266012ULL);
3983 __uint128_t arg2 = MakeUInt128(0x4628654036036745ULL, 0x3286510570658748ULL);
3984 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %0.8h, %1.8h, #12")(arg1, arg2);
3985 ASSERT_EQ(res, MakeUInt128(0x4631654336056746ULL, 0x328d51057068874eULL));
3986 }
3987
TEST(Arm64InsnTest,LogicalRoundingShiftRightAccumulateInt16x8)3988 TEST(Arm64InsnTest, LogicalRoundingShiftRightAccumulateInt16x8) {
3989 __uint128_t arg1 = MakeUInt128(0x9843452251610507ULL, 0x0270562352660127ULL);
3990 __uint128_t arg2 = MakeUInt128(0x6286540360367453ULL, 0x2865105706587488ULL);
3991 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
3992 ASSERT_EQ(res, MakeUInt128(0x62805407603b7453ULL, 0x2865105c065d7488ULL));
3993 }
3994
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x1)3995 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x1) {
3996 __uint128_t arg = MakeUInt128(0x9323685785585581ULL, 0x9555604215625088ULL);
3997 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %d0, %d1, #40")(arg);
3998 ASSERT_EQ(res, MakeUInt128(0xffffffffff932368ULL, 0x0000000000000000ULL));
3999 }
4000
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x2)4001 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x2) {
4002 __uint128_t arg = MakeUInt128(0x8714878398908107ULL, 0x4295309410605969ULL);
4003 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %0.2d, %1.2d, #36")(arg);
4004 ASSERT_EQ(res, MakeUInt128(0xfffffffff8714878ULL, 0x0000000004295309ULL));
4005 }
4006
TEST(Arm64InsnTest,SignedRoundingShiftRightAccumulateInt64x1)4007 TEST(Arm64InsnTest, SignedRoundingShiftRightAccumulateInt64x1) {
4008 __uint128_t arg1 = MakeUInt128(0x9946016520577405ULL, 0x2942305360178031ULL);
4009 __uint128_t arg2 = MakeUInt128(0x3960188013782542ULL, 0x1927094767337191ULL);
4010 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %d0, %d1, #33")(arg1, arg2);
4011 ASSERT_EQ(res, MakeUInt128(0x3960187fe01b25f5ULL, 0x0000000000000000ULL));
4012 }
4013
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x1)4014 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x1) {
4015 __uint128_t arg = MakeUInt128(0x9713552208445285ULL, 0x2640081252027665ULL);
4016 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %d0, %d1, #33")(arg);
4017 ASSERT_EQ(res, MakeUInt128(0x000000004b89aa91ULL, 0x0000000000000000ULL));
4018 }
4019
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x2)4020 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x2) {
4021 __uint128_t arg = MakeUInt128(0x6653398573888786ULL, 0x6147629443414010ULL);
4022 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %0.2d, %1.2d, #34")(arg);
4023 ASSERT_EQ(res, MakeUInt128(0x000000001994ce61ULL, 0x000000001851d8a5ULL));
4024 }
4025
TEST(Arm64InsnTest,UnsignedRoundingShiftRightAccumulateInt64x1)4026 TEST(Arm64InsnTest, UnsignedRoundingShiftRightAccumulateInt64x1) {
4027 __uint128_t arg1 = MakeUInt128(0x9616143204006381ULL, 0x3224658411111577ULL);
4028 __uint128_t arg2 = MakeUInt128(0x7184728147519983ULL, 0x5050478129771859ULL);
4029 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ursra %d0, %d1, #33")(arg1, arg2);
4030 ASSERT_EQ(res, MakeUInt128(0x71847281925ca39cULL, 0x0000000000000000ULL));
4031 }
4032
TEST(Arm64InsnTest,ShiftLeftInt64x1)4033 TEST(Arm64InsnTest, ShiftLeftInt64x1) {
4034 __uint128_t arg = MakeUInt128(0x3903594664691623ULL, 0x5396809201394578ULL);
4035 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %d0, %d1, #35")(arg);
4036 ASSERT_EQ(res, MakeUInt128(0x2348b11800000000ULL, 0x0000000000000000ULL));
4037 }
4038
TEST(Arm64InsnTest,ShiftLeftInt64x2)4039 TEST(Arm64InsnTest, ShiftLeftInt64x2) {
4040 __uint128_t arg = MakeUInt128(0x0750111649832785ULL, 0x6353161451684576ULL);
4041 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.2d, %1.2d, #37")(arg);
4042 ASSERT_EQ(res, MakeUInt128(0x3064f0a000000000ULL, 0x2d08aec000000000ULL));
4043 }
4044
TEST(Arm64InsnTest,ShiftLeftInt8x8)4045 TEST(Arm64InsnTest, ShiftLeftInt8x8) {
4046 __uint128_t arg = MakeUInt128(0x0402956047346131ULL, 0x1382638788975517ULL);
4047 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.8b, %1.8b, #6")(arg);
4048 ASSERT_EQ(res, MakeUInt128(0x00804000c0004040ULL, 0x0000000000000000ULL));
4049 }
4050
TEST(Arm64InsnTest,ShiftRightInsertInt64x1)4051 TEST(Arm64InsnTest, ShiftRightInsertInt64x1) {
4052 __uint128_t arg1 = MakeUInt128(0x9112232618794059ULL, 0x9415540632701319ULL);
4053 __uint128_t arg2 = MakeUInt128(0x1537675115830432ULL, 0x0849872092028092ULL);
4054 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %d0, %d1, #20")(arg1, arg2);
4055 ASSERT_EQ(res, MakeUInt128(0x1537691122326187ULL, 0x0000000000000000ULL));
4056 }
4057
TEST(Arm64InsnTest,ShiftRightInsertInt64x2)4058 TEST(Arm64InsnTest, ShiftRightInsertInt64x2) {
4059 __uint128_t arg1 = MakeUInt128(0x7332335603484653ULL, 0x1873029302665964ULL);
4060 __uint128_t arg2 = MakeUInt128(0x5013718375428897ULL, 0x5579714499246540ULL);
4061 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %0.2d, %1.2d, #21")(arg1, arg2);
4062 ASSERT_EQ(res, MakeUInt128(0x50137399919ab01aULL, 0x557970c398149813ULL));
4063 }
4064
TEST(Arm64InsnTest,ShiftLeftInsertInt64x1)4065 TEST(Arm64InsnTest, ShiftLeftInsertInt64x1) {
4066 __uint128_t arg1 = MakeUInt128(0x3763526969344354ULL, 0x4004730671988689ULL);
4067 __uint128_t arg2 = MakeUInt128(0x6369498567302175ULL, 0x2313252926537589ULL);
4068 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %d0, %d1, #23")(arg1, arg2);
4069 ASSERT_EQ(res, MakeUInt128(0x34b49a21aa302175ULL, 0x0000000000000000ULL));
4070 }
4071
TEST(Arm64InsnTest,ShiftLeftInsertInt64x2)4072 TEST(Arm64InsnTest, ShiftLeftInsertInt64x2) {
4073 __uint128_t arg1 = MakeUInt128(0x3270206902872323ULL, 0x3005386216347988ULL);
4074 __uint128_t arg2 = MakeUInt128(0x5094695472004795ULL, 0x2311201504329322ULL);
4075 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %0.2d, %1.2d, #21")(arg1, arg2);
4076 ASSERT_EQ(res, MakeUInt128(0x0d2050e464604795ULL, 0x0c42c68f31129322ULL));
4077 }
4078
TEST(Arm64InsnTest,ShiftLeftLongInt8x8)4079 TEST(Arm64InsnTest, ShiftLeftLongInt8x8) {
4080 __uint128_t arg = MakeUInt128(0x2650697620201995ULL, 0x5484126500053944ULL);
4081 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll %0.8h, %1.8b, #8")(arg);
4082 ASSERT_EQ(res, MakeUInt128(0x2000200019009500ULL, 0x2600500069007600ULL));
4083 }
4084
TEST(Arm64InsnTest,ShiftLeftLongInt8x8Upper)4085 TEST(Arm64InsnTest, ShiftLeftLongInt8x8Upper) {
4086 __uint128_t arg = MakeUInt128(0x9050429225978771ULL, 0x0667873840000616ULL);
4087 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll2 %0.8h, %1.16b, #8")(arg);
4088 ASSERT_EQ(res, MakeUInt128(0x4000000006001600ULL, 0x0600670087003800ULL));
4089 }
4090
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2)4091 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2) {
4092 __uint128_t arg = MakeUInt128(0x9075407923424023ULL, 0x0092590070173196ULL);
4093 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll %0.2d, %1.2s, #9")(arg);
4094 ASSERT_EQ(res, MakeUInt128(0x0000004684804600ULL, 0xffffff20ea80f200ULL));
4095 }
4096
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2Upper)4097 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2Upper) {
4098 __uint128_t arg = MakeUInt128(0x9382432227188515ULL, 0x9740547021482897ULL);
4099 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll2 %0.2d, %1.4s, #9")(arg);
4100 ASSERT_EQ(res, MakeUInt128(0x0000004290512e00ULL, 0xffffff2e80a8e000ULL));
4101 }
4102
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2By0)4103 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2By0) {
4104 __uint128_t arg = MakeUInt128(0x9008777697763127ULL, 0x9572267265556259ULL);
4105 // SXTL is an alias for SSHLL for the shift count being zero.
4106 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sxtl %0.2d, %1.2s")(arg);
4107 ASSERT_EQ(res, MakeUInt128(0xffffffff97763127ULL, 0xffffffff90087776ULL));
4108 }
4109
TEST(Arm64InsnTest,ShiftLeftLongInt32x2)4110 TEST(Arm64InsnTest, ShiftLeftLongInt32x2) {
4111 __uint128_t arg = MakeUInt128(0x9094334676851422ULL, 0x1447737939375170ULL);
4112 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll %0.2d, %1.2s, #9")(arg);
4113 ASSERT_EQ(res, MakeUInt128(0x000000ed0a284400ULL, 0x0000012128668c00ULL));
4114 }
4115
TEST(Arm64InsnTest,ShiftLeftLongInt32x2Upper)4116 TEST(Arm64InsnTest, ShiftLeftLongInt32x2Upper) {
4117 __uint128_t arg = MakeUInt128(0x7096834080053559ULL, 0x8491754173818839ULL);
4118 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll2 %0.2d, %1.4s, #17")(arg);
4119 ASSERT_EQ(res, MakeUInt128(0x0000e70310720000ULL, 0x00010922ea820000ULL));
4120 }
4121
TEST(Arm64InsnTest,ShiftLeftLongInt32x2By0)4122 TEST(Arm64InsnTest, ShiftLeftLongInt32x2By0) {
4123 __uint128_t arg = MakeUInt128(0x9945681506526530ULL, 0x5371829412703369ULL);
4124 // UXTL is an alias for USHLL for the shift count being zero.
4125 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uxtl %0.2d, %1.2s")(arg);
4126 ASSERT_EQ(res, MakeUInt128(0x0000000006526530ULL, 0x0000000099456815ULL));
4127 }
4128
TEST(Arm64InsnTest,ShiftRightNarrowI16x8)4129 TEST(Arm64InsnTest, ShiftRightNarrowI16x8) {
4130 __uint128_t arg = MakeUInt128(0x9378541786109696ULL, 0x9202538865034577ULL);
4131 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shrn %0.8b, %1.8h, #2")(arg);
4132 ASSERT_EQ(res, MakeUInt128(0x80e2405dde0584a5ULL, 0x0000000000000000ULL));
4133 }
4134
TEST(Arm64InsnTest,ShiftRightNarrowI16x8Upper)4135 TEST(Arm64InsnTest, ShiftRightNarrowI16x8Upper) {
4136 __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
4137 __uint128_t arg2 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
4138 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("shrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4139 ASSERT_EQ(res, MakeUInt128(0x3879158299848645ULL, 0xd8988dc1de009890ULL));
4140 }
4141
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8)4142 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8) {
4143 __uint128_t arg = MakeUInt128(0x9303774688099929ULL, 0x6877582441047878ULL);
4144 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rshrn %0.8b, %1.8h, #2")(arg);
4145 ASSERT_EQ(res, MakeUInt128(0x1e09411ec1d2024aULL, 0x0000000000000000ULL));
4146 }
4147
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8Upper)4148 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8Upper) {
4149 __uint128_t arg1 = MakeUInt128(0x9314507607167064ULL, 0x3556827437743965ULL);
4150 __uint128_t arg2 = MakeUInt128(0x2103098604092717ULL, 0x0909512808630902ULL);
4151 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("rshrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4152 ASSERT_EQ(res, MakeUInt128(0x2103098604092717ULL, 0x569ddd59c51ec619ULL));
4153 }
4154
TEST(Arm64InsnTest,AddInt64x1)4155 TEST(Arm64InsnTest, AddInt64x1) {
4156 __uint128_t arg1 = MakeUInt128(0x0080000000000003ULL, 0xdeadbeef01234567ULL);
4157 __uint128_t arg2 = MakeUInt128(0x0080000000000005ULL, 0x0123deadbeef4567ULL);
4158 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %d0, %d1, %d2")(arg1, arg2);
4159 ASSERT_EQ(res, MakeUInt128(0x0100000000000008ULL, 0x0ULL));
4160 }
4161
TEST(Arm64InsnTest,AddInt32x4)4162 TEST(Arm64InsnTest, AddInt32x4) {
4163 // The "add" below adds two vectors, each with four 32-bit elements. We set the sign
4164 // bit for each element to verify that the carry does not affect any lane.
4165 __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4166 __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4167 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.4s, %1.4s, %2.4s")(op1, op2);
4168 ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0x0000000f0000000bULL));
4169 }
4170
TEST(Arm64InsnTest,AddInt32x2)4171 TEST(Arm64InsnTest, AddInt32x2) {
4172 __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4173 __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4174 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2s, %1.2s, %2.2s")(op1, op2);
4175 ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0));
4176 }
4177
TEST(Arm64InsnTest,AddInt64x2)4178 TEST(Arm64InsnTest, AddInt64x2) {
4179 __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4180 __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4181 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2d, %1.2d, %2.2d")(op1, op2);
4182 ASSERT_EQ(rd, MakeUInt128(0x0000000800000003ULL, 0x000000100000000bULL));
4183 }
4184
TEST(Arm64InsnTest,SubInt64x1)4185 TEST(Arm64InsnTest, SubInt64x1) {
4186 __uint128_t arg1 = MakeUInt128(0x0000000000000002ULL, 0x0011223344556677ULL);
4187 __uint128_t arg2 = MakeUInt128(0x0000000000000003ULL, 0x0123456789abcdefULL);
4188 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %d0, %d1, %d2")(arg1, arg2);
4189 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0ULL));
4190 }
4191
TEST(Arm64InsnTest,SubInt64x2)4192 TEST(Arm64InsnTest, SubInt64x2) {
4193 constexpr auto AsmSub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.2d, %1.2d, %2.2d");
4194 __uint128_t arg1 = MakeUInt128(0x6873115956286388ULL, 0x2353787593751957ULL);
4195 __uint128_t arg2 = MakeUInt128(0x7818577805321712ULL, 0x2680661300192787ULL);
4196 __uint128_t res = AsmSub(arg1, arg2);
4197 ASSERT_EQ(res, MakeUInt128(0xf05ab9e150f64c76ULL, 0xfcd31262935bf1d0ULL));
4198 }
4199
TEST(Arm64InsnTest,SubInt32x4)4200 TEST(Arm64InsnTest, SubInt32x4) {
4201 __uint128_t op1 = MakeUInt128(0x0000000A00000005ULL, 0x0000000C00000C45ULL);
4202 __uint128_t op2 = MakeUInt128(0x0000000500000003ULL, 0x0000000200000C45ULL);
4203 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.4s, %1.4s, %2.4s")(op1, op2);
4204 ASSERT_EQ(rd, MakeUInt128(0x0000000500000002ULL, 0x00000000A00000000ULL));
4205 }
4206
TEST(Arm64InsnTest,SubInt32x2)4207 TEST(Arm64InsnTest, SubInt32x2) {
4208 __uint128_t op1 = MakeUInt128(0x0000000000000005ULL, 0x0000000000000C45ULL);
4209 __uint128_t op2 = MakeUInt128(0x0000000000000003ULL, 0x0000000000000C45ULL);
4210 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.2s, %1.2s, %2.2s")(op1, op2);
4211 ASSERT_EQ(rd, MakeUInt128(0x0000000000000002ULL, 0x00000000000000000ULL));
4212 }
4213
TEST(Arm64InsnTest,SubInt16x4)4214 TEST(Arm64InsnTest, SubInt16x4) {
4215 __uint128_t arg1 = MakeUInt128(0x8888777766665555ULL, 0);
4216 __uint128_t arg2 = MakeUInt128(0x1111222233334444ULL, 0);
4217 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.4h, %1.4h, %2.4h")(arg1, arg2);
4218 ASSERT_EQ(res, MakeUInt128(0x7777555533331111ULL, 0));
4219 }
4220
TEST(Arm64InsnTest,MultiplyI8x8)4221 TEST(Arm64InsnTest, MultiplyI8x8) {
4222 __uint128_t arg1 = MakeUInt128(0x5261365549781893ULL, 0x1297848216829989ULL);
4223 __uint128_t arg2 = MakeUInt128(0x4542858444795265ULL, 0x8678210511413547ULL);
4224 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4225 ASSERT_EQ(res, MakeUInt128(0x1a020ed464b8b0ffULL, 0x0000000000000000ULL));
4226 }
4227
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8)4228 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8) {
4229 __uint128_t arg1 = MakeUInt128(0x5848406353422072ULL, 0x2258284886481584ULL);
4230 __uint128_t arg2 = MakeUInt128(0x7823986456596116ULL, 0x3548862305774564ULL);
4231 __uint128_t arg3 = MakeUInt128(0x8797108931456691ULL, 0x3686722874894056ULL);
4232 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4233 ASSERT_EQ(res, MakeUInt128(0xc76f10351337865dULL, 0x0000000000000000ULL));
4234 }
4235
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElem)4236 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElem) {
4237 __uint128_t arg1 = MakeUInt128(0x4143334547762416ULL, 0x8625189835694855ULL);
4238 __uint128_t arg2 = MakeUInt128(0x5346462080466842ULL, 0x5906949129331367ULL);
4239 __uint128_t arg3 = MakeUInt128(0x0355876402474964ULL, 0x7326391419927260ULL);
4240 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.4h, %1.4h, %2.h[0]")(arg1, arg2, arg3);
4241 ASSERT_EQ(res, MakeUInt128(0x0e9bc72e5eb38710ULL, 0x0000000000000000ULL));
4242 }
4243
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElemPosition2)4244 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElemPosition2) {
4245 __uint128_t arg1 = MakeUInt128(0x1431429809190659ULL, 0x2509372216964615ULL);
4246 __uint128_t arg2 = MakeUInt128(0x2686838689427741ULL, 0x5599185592524595ULL);
4247 __uint128_t arg3 = MakeUInt128(0x6099124608051243ULL, 0x8843904512441365ULL);
4248 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.2s, %1.2s, %2.s[2]")(arg1, arg2, arg3);
4249 ASSERT_EQ(res, MakeUInt128(0x6ce7ccbedccdc110ULL, 0x0000000000000000ULL));
4250 }
4251
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8IndexedElem)4252 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8IndexedElem) {
4253 __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4254 __uint128_t arg2 = MakeUInt128(0x6549911988183479ULL, 0x7753566369807426ULL);
4255 __uint128_t arg3 = MakeUInt128(0x4524919217321721ULL, 0x4772350141441973ULL);
4256 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.4h, %1.4h, %2.h[1]")(arg1, arg2, arg3);
4257 ASSERT_EQ(res, MakeUInt128(0xcefce99ad58a9ad9ULL, 0x0000000000000000ULL));
4258 }
4259
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8)4260 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8) {
4261 __uint128_t arg1 = MakeUInt128(0x0635342207222582ULL, 0x8488648158456028ULL);
4262 __uint128_t arg2 = MakeUInt128(0x9864565961163548ULL, 0x8623057745649803ULL);
4263 __uint128_t arg3 = MakeUInt128(0x1089314566913686ULL, 0x7228748940560101ULL);
4264 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4265 ASSERT_EQ(res, MakeUInt128(0x80d5b973bfa58df6ULL, 0x0000000000000000ULL));
4266 }
4267
TEST(Arm64InsnTest,MultiplyI32x4IndexedElem)4268 TEST(Arm64InsnTest, MultiplyI32x4IndexedElem) {
4269 __uint128_t arg1 = MakeUInt128(0x143334547762416ULL, 0x8625189835694855ULL);
4270 __uint128_t arg2 = MakeUInt128(0x627232791786085ULL, 0x7843838279679766ULL);
4271 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.4s, %1.4s, %2.s[1]")(arg1, arg2);
4272 ASSERT_EQ(res, MakeUInt128(0xcec23e830d48815aULL, 0xd12b87288ae0a3f3ULL));
4273 }
4274
TEST(Arm64InsnTest,PolynomialMultiplyU8x8)4275 TEST(Arm64InsnTest, PolynomialMultiplyU8x8) {
4276 __uint128_t arg1 = MakeUInt128(0x1862056476931257ULL, 0x0586356620185581ULL);
4277 __uint128_t arg2 = MakeUInt128(0x1668039626579787ULL, 0x7185560845529654ULL);
4278 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4279 ASSERT_EQ(res, MakeUInt128(0xd0d00f18f4095e25ULL, 0x0000000000000000ULL));
4280 }
4281
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8)4282 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8) {
4283 __uint128_t arg1 = MakeUInt128(0x1327656180937734ULL, 0x4403070746921120ULL);
4284 __uint128_t arg2 = MakeUInt128(0x9838952286847831ULL, 0x2355265821314495ULL);
4285 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.8h, %1.8b, %2.8b")(arg1, arg2);
4286 ASSERT_EQ(res, MakeUInt128(0x43004bcc17e805f4ULL, 0x082807a835210ce2ULL));
4287 }
4288
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8Upper)4289 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8Upper) {
4290 __uint128_t arg1 = MakeUInt128(0x4439658253375438ULL, 0x8569094113031509ULL);
4291 __uint128_t arg2 = MakeUInt128(0x1865619673378623ULL, 0x6256125216320862ULL);
4292 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
4293 ASSERT_EQ(res, MakeUInt128(0x015a005600a80372ULL, 0x30ea1da6008214d2ULL));
4294 }
4295
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2)4296 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2) {
4297 __uint128_t arg1 = MakeUInt128(0x1000100010001000ULL, 0xffffeeeeffffeeeeULL);
4298 __uint128_t arg2 = MakeUInt128(0x10001ULL, 0xffffeeeeffffeeeeULL);
4299 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.1q, %1.1d, %2.1d")(arg1, arg2);
4300 ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4301 }
4302
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2Upper)4303 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2Upper) {
4304 __uint128_t arg1 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x1000100010001000ULL);
4305 __uint128_t arg2 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x10001ULL);
4306 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.1q, %1.2d, %2.2d")(arg1, arg2);
4307 ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4308 }
4309
TEST(Arm64InsnTest,PairwiseAddInt8x16)4310 TEST(Arm64InsnTest, PairwiseAddInt8x16) {
4311 __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4312 __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4313 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.16b, %1.16b, %2.16b")(op1, op2);
4314 ASSERT_EQ(rd, MakeUInt128(0xeda96521dd995511ULL, 0x1d1915110d090501ULL));
4315 }
4316
TEST(Arm64InsnTest,PairwiseAddInt8x8)4317 TEST(Arm64InsnTest, PairwiseAddInt8x8) {
4318 __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4319 __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4320 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.8b, %1.8b, %2.8b")(op1, op2);
4321 ASSERT_EQ(rd, MakeUInt128(0x0d090501dd995511ULL, 0));
4322 }
4323
TEST(Arm64InsnTest,PairwiseAddInt64x2)4324 TEST(Arm64InsnTest, PairwiseAddInt64x2) {
4325 __uint128_t op1 = MakeUInt128(1ULL, 2ULL);
4326 __uint128_t op2 = MakeUInt128(3ULL, 4ULL);
4327 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.2d, %1.2d, %2.2d")(op1, op2);
4328 ASSERT_EQ(rd, MakeUInt128(3ULL, 7ULL));
4329 }
4330
TEST(Arm64InsnTest,CompareEqualInt8x16)4331 TEST(Arm64InsnTest, CompareEqualInt8x16) {
4332 __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4333 __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4334 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.16b, %1.16b, %2.16b")(op1, op2);
4335 ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0x0000ff00ff0000ffULL));
4336 }
4337
TEST(Arm64InsnTest,CompareEqualInt8x8)4338 TEST(Arm64InsnTest, CompareEqualInt8x8) {
4339 __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4340 __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4341 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.8b, %1.8b, %2.8b")(op1, op2);
4342 ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0));
4343 }
4344
TEST(Arm64InsnTest,CompareEqualInt16x4)4345 TEST(Arm64InsnTest, CompareEqualInt16x4) {
4346 __uint128_t op1 = MakeUInt128(0x4444333322221111ULL, 0);
4347 __uint128_t op2 = MakeUInt128(0x8888333300001111ULL, 0);
4348 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.4h, %1.4h, %2.4h")(op1, op2);
4349 ASSERT_EQ(rd, MakeUInt128(0x0000ffff0000ffffULL, 0));
4350 }
4351
TEST(Arm64InsnTest,CompareEqualInt64x1)4352 TEST(Arm64InsnTest, CompareEqualInt64x1) {
4353 constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %d0, %d1, %d2");
4354 __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4355 __uint128_t arg2 = MakeUInt128(0x0665499119881834ULL, 0x7977535663698074ULL);
4356 __uint128_t arg3 = MakeUInt128(0x8297455570674983ULL, 0x1452491921732172ULL);
4357 ASSERT_EQ(AsmCmeq(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4358 ASSERT_EQ(AsmCmeq(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4359 }
4360
TEST(Arm64InsnTest,CompareEqualZeroInt64x1)4361 TEST(Arm64InsnTest, CompareEqualZeroInt64x1) {
4362 constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %d0, %d1, #0");
4363 __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4364 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x1746089232839170ULL);
4365 ASSERT_EQ(AsmCmeq(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4366 ASSERT_EQ(AsmCmeq(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4367 }
4368
TEST(Arm64InsnTest,CompareEqualZeroInt8x16)4369 TEST(Arm64InsnTest, CompareEqualZeroInt8x16) {
4370 __uint128_t op = MakeUInt128(0x0000555500332200ULL, 0x0000000077001100ULL);
4371 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.16b, %1.16b, #0")(op);
4372 ASSERT_EQ(rd, MakeUInt128(0xffff0000ff0000ffULL, 0xffffffff00ff00ffULL));
4373 }
4374
TEST(Arm64InsnTest,CompareEqualZeroInt8x8)4375 TEST(Arm64InsnTest, CompareEqualZeroInt8x8) {
4376 __uint128_t op = MakeUInt128(0x001122330000aaaaULL, 0xdeadbeef0000cafeULL);
4377 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.8b, %1.8b, #0")(op);
4378 ASSERT_EQ(rd, MakeUInt128(0xff000000ffff0000ULL, 0));
4379 }
4380
TEST(Arm64InsnTest,CompareGreaterInt64x1)4381 TEST(Arm64InsnTest, CompareGreaterInt64x1) {
4382 constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %d0, %d1, %d2");
4383 __uint128_t arg1 = MakeUInt128(0x1976668559233565ULL, 0x4639138363185745ULL);
4384 __uint128_t arg2 = MakeUInt128(0x3474940784884423ULL, 0x7721751543342603ULL);
4385 __uint128_t arg3 = MakeUInt128(0x1976668559233565ULL, 0x8183196376370761ULL);
4386 __uint128_t arg4 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
4387 ASSERT_EQ(AsmCmgt(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4388 ASSERT_EQ(AsmCmgt(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4389 ASSERT_EQ(AsmCmgt(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4390 }
4391
TEST(Arm64InsnTest,CompareGreaterZeroInt64x1)4392 TEST(Arm64InsnTest, CompareGreaterZeroInt64x1) {
4393 constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %d0, %d1, #0");
4394 __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4395 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x6174599705674507ULL);
4396 __uint128_t arg3 = MakeUInt128(0x9592057668278967ULL, 0x7644531840404185ULL);
4397 ASSERT_EQ(AsmCmgt(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4398 ASSERT_EQ(AsmCmgt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4399 ASSERT_EQ(AsmCmgt(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4400 }
4401
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x16)4402 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x16) {
4403 __uint128_t op = MakeUInt128(0x807fff00017efe02ULL, 0xff7f80000102fe02ULL);
4404 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.16b, %1.16b, #0")(op);
4405 ASSERT_EQ(rd, MakeUInt128(0x00ff0000ffff00ffULL, 0x00ff0000ffff00ffULL));
4406 }
4407
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x8)4408 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x8) {
4409 __uint128_t op = MakeUInt128(0x00ff7f80017efe00ULL, 0x0000cafedeadbeefULL);
4410 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.8b, %1.8b, #0")(op);
4411 ASSERT_EQ(rd, MakeUInt128(0x0000ff00ffff0000ULL, 0));
4412 }
4413
TEST(Arm64InsnTest,CompareGreaterThanInt16x8)4414 TEST(Arm64InsnTest, CompareGreaterThanInt16x8) {
4415 __uint128_t arg1 = MakeUInt128(0x9789389001852956ULL, 0x9196780455448285ULL);
4416 __uint128_t arg2 = MakeUInt128(0x7269389081795897ULL, 0x5469399264218285);
4417 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.8h, %1.8h, %2.8h")(arg1, arg2);
4418 ASSERT_EQ(res, MakeUInt128(0x00000000ffff0000ULL, 0x0000ffff00000000ULL));
4419 }
4420
TEST(Arm64InsnTest,CompareGreaterThanInt32x4)4421 TEST(Arm64InsnTest, CompareGreaterThanInt32x4) {
4422 __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4423 __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4424 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.4s, %1.4s, %2.4s")(arg1, arg2);
4425 ASSERT_EQ(res, MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL));
4426 }
4427
TEST(Arm64InsnTest,CompareLessZeroInt64x1)4428 TEST(Arm64InsnTest, CompareLessZeroInt64x1) {
4429 constexpr auto AsmCmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %d0, %d1, #0");
4430 __uint128_t arg1 = MakeUInt128(0x4784264567633881ULL, 0x8807565612168960ULL);
4431 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x8955999911209916ULL);
4432 __uint128_t arg3 = MakeUInt128(0x9364610175685060ULL, 0x1671453543158148ULL);
4433 ASSERT_EQ(AsmCmlt(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4434 ASSERT_EQ(AsmCmlt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4435 ASSERT_EQ(AsmCmlt(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4436 }
4437
TEST(Arm64InsnTest,CompareLessThanZeroInt8x16)4438 TEST(Arm64InsnTest, CompareLessThanZeroInt8x16) {
4439 __uint128_t op = MakeUInt128(0xff00017ffe020180ULL, 0x0001027e7ffeff80ULL);
4440 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.16b, %1.16b, #0")(op);
4441 ASSERT_EQ(rd, MakeUInt128(0xff000000ff0000ffULL, 0x0000000000ffffffULL));
4442 }
4443
TEST(Arm64InsnTest,CompareLessThanZeroInt8x8)4444 TEST(Arm64InsnTest, CompareLessThanZeroInt8x8) {
4445 __uint128_t op = MakeUInt128(0x0002017e7fff8000ULL, 0x001100220000ffffULL);
4446 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.8b, %1.8b, #0")(op);
4447 ASSERT_EQ(rd, MakeUInt128(0x0000000000ffff00ULL, 0));
4448 }
4449
TEST(Arm64InsnTest,CompareGreaterThanEqualInt64x1)4450 TEST(Arm64InsnTest, CompareGreaterThanEqualInt64x1) {
4451 constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %d0, %d1, %d2");
4452 __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4453 __uint128_t arg2 = MakeUInt128(0x5890939568814856ULL, 0x0263224393726562ULL);
4454 __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x5511995818319637ULL);
4455 __uint128_t arg4 = MakeUInt128(0x9427141009391369ULL, 0x1381072581378135ULL);
4456 ASSERT_EQ(AsmCmge(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4457 ASSERT_EQ(AsmCmge(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4458 ASSERT_EQ(AsmCmge(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4459 }
4460
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt64x1)4461 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt64x1) {
4462 constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %d0, %d1, #0");
4463 __uint128_t arg1 = MakeUInt128(0x5562116715468484ULL, 0x7780394475697980ULL);
4464 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x3548487562529875ULL);
4465 __uint128_t arg3 = MakeUInt128(0x9212366168902596ULL, 0x2730430679316531ULL);
4466 ASSERT_EQ(AsmCmge(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4467 ASSERT_EQ(AsmCmge(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4468 ASSERT_EQ(AsmCmge(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4469 }
4470
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x16)4471 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x16) {
4472 __uint128_t op = MakeUInt128(0x00ff01027ffe8002ULL, 0x80fffe7f7e020100ULL);
4473 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.16b, %1.16b, #0")(op);
4474 ASSERT_EQ(rd, MakeUInt128(0xff00ffffff0000ffULL, 0x000000ffffffffffULL));
4475 }
4476
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x8)4477 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x8) {
4478 __uint128_t op = MakeUInt128(0x0001027f80feff00ULL, 0x0011223344556677ULL);
4479 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.8b, %1.8b, #0")(op);
4480 ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4481 }
4482
TEST(Arm64InsnTest,CompareGreaterEqualInt16x8)4483 TEST(Arm64InsnTest, CompareGreaterEqualInt16x8) {
4484 __uint128_t arg1 = MakeUInt128(0x4391962838870543ULL, 0x6777432242768091ULL);
4485 __uint128_t arg2 = MakeUInt128(0x4391838548318875ULL, 0x0142432208995068ULL);
4486 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %0.8h, %1.8h, %2.8h")(arg1, arg2);
4487 ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffffffff0000ULL));
4488 }
4489
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt64x1)4490 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt64x1) {
4491 constexpr auto AsmCmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %d0, %d1, #0");
4492 __uint128_t arg1 = MakeUInt128(0x3643296406335728ULL, 0x1070788758164043ULL);
4493 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x5865720227637840ULL);
4494 __uint128_t arg3 = MakeUInt128(0x8694346828590066ULL, 0x6408063140777577ULL);
4495 ASSERT_EQ(AsmCmle(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4496 ASSERT_EQ(AsmCmle(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4497 ASSERT_EQ(AsmCmle(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4498 }
4499
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x16)4500 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x16) {
4501 __uint128_t op = MakeUInt128(0x80fffe7f7e020100ULL, 0x00ff01027ffe8002ULL);
4502 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.16b, %1.16b, #0")(op);
4503 ASSERT_EQ(rd, MakeUInt128(0xffffff00000000ffULL, 0xffff000000ffff00ULL));
4504 }
4505
TEST(Arm64InsnTest,CompareHigherInt64x1)4506 TEST(Arm64InsnTest, CompareHigherInt64x1) {
4507 constexpr auto AsmCmhi = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %d0, %d1, %d2");
4508 __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4509 __uint128_t arg2 = MakeUInt128(0x0759167297007850ULL, 0x5807171863810549ULL);
4510 __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x6026322439372656ULL);
4511 __uint128_t arg4 = MakeUInt128(0x9087839523245323ULL, 0x7896029841669225ULL);
4512 ASSERT_EQ(AsmCmhi(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4513 ASSERT_EQ(AsmCmhi(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4514 ASSERT_EQ(AsmCmhi(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4515 }
4516
TEST(Arm64InsnTest,CompareHigherInt16x8)4517 TEST(Arm64InsnTest, CompareHigherInt16x8) {
4518 __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4519 __uint128_t arg2 = MakeUInt128(0x2057166778967764ULL, 0x4531840442045540ULL);
4520 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.8h, %1.8h, %2.8h")(arg1, arg2);
4521 ASSERT_EQ(res, MakeUInt128(0xffff000000000000ULL, 0x0000ffff00000000ULL));
4522 }
4523
TEST(Arm64InsnTest,CompareHigherInt32x4)4524 TEST(Arm64InsnTest, CompareHigherInt32x4) {
4525 __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4526 __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4527 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.4s, %1.4s, %2.4s")(arg1, arg2);
4528 ASSERT_EQ(res, MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL));
4529 }
4530
TEST(Arm64InsnTest,CompareHigherSameInt64x1)4531 TEST(Arm64InsnTest, CompareHigherSameInt64x1) {
4532 constexpr auto AsmCmhs = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %d0, %d1, %d2");
4533 __uint128_t arg1 = MakeUInt128(0x3529566139788848ULL, 0x6050978608595701ULL);
4534 __uint128_t arg2 = MakeUInt128(0x1769845875810446ULL, 0x6283998806006162ULL);
4535 __uint128_t arg3 = MakeUInt128(0x3529566139788848ULL, 0x9001852956919678ULL);
4536 __uint128_t arg4 = MakeUInt128(0x9628388705436777ULL, 0x4322427680913236ULL);
4537 ASSERT_EQ(AsmCmhs(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4538 ASSERT_EQ(AsmCmhs(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4539 ASSERT_EQ(AsmCmhs(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4540 }
4541
TEST(Arm64InsnTest,CompareHigherSameInt16x8)4542 TEST(Arm64InsnTest, CompareHigherSameInt16x8) {
4543 __uint128_t arg1 = MakeUInt128(0x4599705674507183ULL, 0x3206503455664403ULL);
4544 __uint128_t arg2 = MakeUInt128(0x4264705633881880ULL, 0x3206612168960504ULL);
4545 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %0.8h, %1.8h, %2.8h")(arg1, arg2);
4546 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0xffff00000000ffffULL));
4547 }
4548
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x8)4549 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x8) {
4550 __uint128_t op = MakeUInt128(0x00fffe807f020100ULL, 0x00aabbccddeeff00ULL);
4551 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.8b, %1.8b, #0")(op);
4552 ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4553 }
4554
TEST(Arm64InsnTest,TestInt64x1)4555 TEST(Arm64InsnTest, TestInt64x1) {
4556 constexpr auto AsmCmtst = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %d0, %d1, %d2");
4557 __uint128_t arg1 = MakeUInt128(0xaaaaaaaa55555555ULL, 0x7698385483188750ULL);
4558 __uint128_t arg2 = MakeUInt128(0x55555555aaaaaaaaULL, 0x1429389089950685ULL);
4559 __uint128_t arg3 = MakeUInt128(0xaa00aa0055005500ULL, 0x4530765116803337ULL);
4560 ASSERT_EQ(AsmCmtst(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4561 ASSERT_EQ(AsmCmtst(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4562 }
4563
TEST(Arm64InsnTest,TestInt16x8)4564 TEST(Arm64InsnTest, TestInt16x8) {
4565 __uint128_t arg1 = MakeUInt128(0x5999911209916464ULL, 0x6441191856827700ULL);
4566 __uint128_t arg2 = MakeUInt128(0x6101756850601671ULL, 0x4535431581480105ULL);
4567 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %0.8h, %1.8h, %2.8h")(arg1, arg2);
4568 ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffff0000ffffULL));
4569 }
4570
TEST(Arm64InsnTest,ExtractVectorFromPair)4571 TEST(Arm64InsnTest, ExtractVectorFromPair) {
4572 __uint128_t op1 = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4573 __uint128_t op2 = MakeUInt128(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
4574 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.16b, %1.16b, %2.16b, #8")(op1, op2);
4575 ASSERT_EQ(rd, MakeUInt128(0x8899aabbccddeeffULL, 0x0001020304050607ULL));
4576 }
4577
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidth)4578 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidth) {
4579 __uint128_t op1 = MakeUInt128(0x8138268683868942ULL, 0x7741559918559252ULL);
4580 __uint128_t op2 = MakeUInt128(0x3622262609912460ULL, 0x8051243884390451ULL);
4581 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #3")(op1, op2);
4582 ASSERT_EQ(res, MakeUInt128(0x9124608138268683ULL, 0x0000000000000000ULL));
4583 }
4584
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidthPosition1)4585 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidthPosition1) {
4586 __uint128_t op1 = MakeUInt128(0x9471329621073404ULL, 0x3751895735961458ULL);
4587 __uint128_t op2 = MakeUInt128(0x9048010941214722ULL, 0x1317947647772622ULL);
4588 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #1")(op1, op2);
4589 ASSERT_EQ(res, MakeUInt128(0x2294713296210734ULL, 0x0000000000000000ULL));
4590 }
4591
TEST(Arm64InsnTest,Load1OneI8x8)4592 TEST(Arm64InsnTest, Load1OneI8x8) {
4593 static constexpr uint64_t arg = 0x8867915896904956ULL;
4594 __uint128_t res;
4595 asm("ld1 {%0.8b}, [%1]" : "=w"(res) : "r"(&arg) : "memory");
4596 ASSERT_EQ(res, arg);
4597 }
4598
TEST(Arm64InsnTest,Load1ThreeI8x8)4599 TEST(Arm64InsnTest, Load1ThreeI8x8) {
4600 static constexpr uint64_t arg[3] = {
4601 0x3415354584283376ULL, 0x4378111988556318ULL, 0x7777925372011667ULL};
4602 __uint128_t res[3];
4603 asm("ld1 {v0.8b-v2.8b}, [%3]\n\t"
4604 "mov %0.16b, v0.16b\n\t"
4605 "mov %1.16b, v1.16b\n\t"
4606 "mov %2.16b, v2.16b"
4607 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4608 : "r"(arg)
4609 : "v0", "v1", "v2", "memory");
4610 ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4611 ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4612 ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4613 }
4614
TEST(Arm64InsnTest,Load1FourI8x8)4615 TEST(Arm64InsnTest, Load1FourI8x8) {
4616 static constexpr uint64_t arg[4] = {
4617 0x9523688483099930ULL,
4618 0x2757419916463841ULL,
4619 0x4270779887088742ULL,
4620 0x2927705389122717ULL,
4621 };
4622 __uint128_t res[4];
4623 asm("ld1 {v0.8b-v3.8b}, [%4]\n\t"
4624 "mov %0.16b, v0.16b\n\t"
4625 "mov %1.16b, v1.16b\n\t"
4626 "mov %2.16b, v2.16b\n\t"
4627 "mov %3.16b, v3.16b"
4628 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
4629 : "r"(arg)
4630 : "v0", "v1", "v2", "v3", "memory");
4631 ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4632 ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4633 ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4634 ASSERT_EQ(res[3], static_cast<__uint128_t>(arg[3]));
4635 }
4636
TEST(Arm64InsnTest,Store1OneI8x16)4637 TEST(Arm64InsnTest, Store1OneI8x16) {
4638 static constexpr __uint128_t arg = MakeUInt128(0x7642291583425006ULL, 0x7361245384916067ULL);
4639 __uint128_t res;
4640 asm("st1 {%0.16b}, [%1]" : : "w"(arg), "r"(&res) : "memory");
4641 ASSERT_EQ(res, arg);
4642 }
4643
TEST(Arm64InsnTest,Store1ThreeI8x8)4644 TEST(Arm64InsnTest, Store1ThreeI8x8) {
4645 static constexpr uint64_t arg[3] = {
4646 0x3086436111389069ULL, 0x4202790881431194ULL, 0x4879941715404210ULL};
4647 uint64_t res[3];
4648 asm("mov v0.16b, %0.16b\n\t"
4649 "mov v1.16b, %1.16b\n\t"
4650 "mov v2.16b, %2.16b\n\t"
4651 "st1 {v0.8b-v2.8b}, [%3]"
4652 :
4653 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4654 : "v0", "v1", "v2", "memory");
4655 ASSERT_EQ(res[0], arg[0]);
4656 ASSERT_EQ(res[1], arg[1]);
4657 ASSERT_EQ(res[2], arg[2]);
4658 }
4659
TEST(Arm64InsnTest,Store1FourI8x8)4660 TEST(Arm64InsnTest, Store1FourI8x8) {
4661 static constexpr uint64_t arg[4] = {
4662 0x8954750448339314ULL, 0x6896307633966572ULL, 0x2672704339321674ULL, 0x5421824557062524ULL};
4663 uint64_t res[4];
4664 asm("mov v0.16b, %0.16b\n\t"
4665 "mov v1.16b, %1.16b\n\t"
4666 "mov v2.16b, %2.16b\n\t"
4667 "mov v3.16b, %3.16b\n\t"
4668 "st1 {v0.8b-v3.8b}, [%4]"
4669 :
4670 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
4671 : "v0", "v1", "v2", "v3", "memory");
4672 ASSERT_EQ(res[0], arg[0]);
4673 ASSERT_EQ(res[1], arg[1]);
4674 ASSERT_EQ(res[2], arg[2]);
4675 ASSERT_EQ(res[3], arg[3]);
4676 }
4677
TEST(Arm64InsnTest,Load1TwoPostIndex)4678 TEST(Arm64InsnTest, Load1TwoPostIndex) {
4679 __uint128_t op0 = MakeUInt128(0x5499119881834797ULL, 0x0507922796892589ULL);
4680 __uint128_t op1 = MakeUInt128(0x0511854807446237ULL, 0x6691368672287489ULL);
4681 __uint128_t array[] = {
4682 op0,
4683 op1,
4684 };
4685 __uint128_t* addr = &array[0];
4686 __uint128_t res0 = 0;
4687 __uint128_t res1 = 0;
4688
4689 // The "memory" below ensures that the array contents are up to date. Without it, the
4690 // compiler might decide to initialize the array after the asm statement.
4691 //
4692 // We hardcode SIMD registers v0 and v1 below because there is no other way to express
4693 // consecutive registers, which in turn requires the mov instructions to retrieve the
4694 // loaded values into res0 and res1.
4695 asm("ld1 {v0.16b, v1.16b}, [%2], #32\n\t"
4696 "mov %0.16b, v0.16b\n\t"
4697 "mov %1.16b, v1.16b"
4698 : "=w"(res0), "=w"(res1), "+r"(addr)
4699 :
4700 : "v0", "v1", "memory");
4701
4702 ASSERT_EQ(res0, op0);
4703 ASSERT_EQ(res1, op1);
4704 ASSERT_EQ(addr, &array[2]);
4705 }
4706
TEST(Arm64InsnTest,Load1OnePostIndexReg)4707 TEST(Arm64InsnTest, Load1OnePostIndexReg) {
4708 static constexpr __uint128_t arg = MakeUInt128(0x4884761005564018ULL, 0x2423921926950620ULL);
4709 __uint128_t res_val;
4710 uint64_t res_addr;
4711 asm("ld1 {%0.16b}, [%1], %2"
4712 : "=w"(res_val), "=r"(res_addr)
4713 : "r"(static_cast<uint64_t>(32U)), "1"(&arg)
4714 : "memory");
4715 ASSERT_EQ(res_val, arg);
4716 ASSERT_EQ(res_addr, reinterpret_cast<uint64_t>(&arg) + 32);
4717 }
4718
TEST(Arm64InsnTest,LoadSingleInt8)4719 TEST(Arm64InsnTest, LoadSingleInt8) {
4720 static constexpr __uint128_t reg_before =
4721 MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4722 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4723 __uint128_t reg_after;
4724 asm("ld1 {%0.b}[3], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4725 ASSERT_EQ(reg_after, MakeUInt128(0x00112233'08'556677ULL, 0x8899aabbccddeeffULL));
4726 }
4727
TEST(Arm64InsnTest,LoadSingleInt16)4728 TEST(Arm64InsnTest, LoadSingleInt16) {
4729 static constexpr __uint128_t reg_before =
4730 MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4731 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4732 __uint128_t reg_after;
4733 asm("ld1 {%0.h}[2], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4734 ASSERT_EQ(reg_after, MakeUInt128(0x0000'0708'22223333ULL, 0x4444555566667777ULL));
4735 }
4736
TEST(Arm64InsnTest,LoadSingleInt32)4737 TEST(Arm64InsnTest, LoadSingleInt32) {
4738 static constexpr __uint128_t reg_before =
4739 MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4740 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4741 __uint128_t reg_after;
4742 asm("ld1 {%0.s}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4743 ASSERT_EQ(reg_after, MakeUInt128(0x0506070811111111ULL, 0x2222222233333333ULL));
4744 }
4745
TEST(Arm64InsnTest,LoadSingleInt64)4746 TEST(Arm64InsnTest, LoadSingleInt64) {
4747 static constexpr __uint128_t reg_before =
4748 MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4749 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4750 __uint128_t reg_after;
4751 asm("ld1 {%0.d}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4752 ASSERT_EQ(reg_after, MakeUInt128(0x0000000000000000ULL, 0x0102030405060708ULL));
4753 }
4754
TEST(Arm64InsnTest,StoreSingleInt8)4755 TEST(Arm64InsnTest, StoreSingleInt8) {
4756 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4757 __uint128_t mem_dest = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4758 asm("st1 {%1.b}[3], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4759 ASSERT_EQ(mem_dest, MakeUInt128(0x00112233445566'05ULL, 0x8899aabbccddeeffULL));
4760 }
4761
TEST(Arm64InsnTest,StoreSingleInt16)4762 TEST(Arm64InsnTest, StoreSingleInt16) {
4763 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4764 __uint128_t mem_dest = MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4765 asm("st1 {%1.h}[5], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4766 ASSERT_EQ(mem_dest, MakeUInt128(0x000011112222'0d0eULL, 0x4444555566667777ULL));
4767 }
4768
TEST(Arm64InsnTest,StoreSingleInt32)4769 TEST(Arm64InsnTest, StoreSingleInt32) {
4770 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4771 __uint128_t mem_dest = MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4772 asm("st1 {%1.s}[2], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4773 ASSERT_EQ(mem_dest, MakeUInt128(0x000000000'd0e0f10ULL, 0x2222222233333333ULL));
4774 }
4775
TEST(Arm64InsnTest,StoreSingleInt64)4776 TEST(Arm64InsnTest, StoreSingleInt64) {
4777 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4778 __uint128_t mem_dest = MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4779 asm("st1 {%1.d}[1], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4780 ASSERT_EQ(mem_dest, MakeUInt128(0x090a0b0c0d0e0f10ULL, 0x1111111111111111ULL));
4781 }
4782
TEST(Arm64InsnTest,LoadSinglePostIndexImmInt8)4783 TEST(Arm64InsnTest, LoadSinglePostIndexImmInt8) {
4784 static constexpr __uint128_t arg1 = MakeUInt128(0x5494167594605487ULL, 0x1172359464291058ULL);
4785 static constexpr __uint128_t arg2 = MakeUInt128(0x5090995021495879ULL, 0x3112196135908315ULL);
4786 __uint128_t res;
4787 uint8_t* addr;
4788 asm("ld1 {%0.b}[3], [%1], #1" : "=w"(res), "=r"(addr) : "0"(arg1), "1"(&arg2) : "memory");
4789 ASSERT_EQ(res, MakeUInt128(0x5494167579605487ULL, 0x1172359464291058ULL));
4790 ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 1);
4791 }
4792
TEST(Arm64InsnTest,LoadSinglePostIndexRegInt16)4793 TEST(Arm64InsnTest, LoadSinglePostIndexRegInt16) {
4794 static constexpr __uint128_t arg1 = MakeUInt128(0x0080587824107493ULL, 0x5751488997891173ULL);
4795 static constexpr __uint128_t arg2 = MakeUInt128(0x9746129320351081ULL, 0x4327032514090304ULL);
4796 __uint128_t res;
4797 uint8_t* addr;
4798 asm("ld1 {%0.h}[7], [%1], %2"
4799 : "=w"(res), "=r"(addr)
4800 : "r"(static_cast<uint64_t>(17U)), "0"(arg1), "1"(&arg2)
4801 : "memory");
4802 ASSERT_EQ(res, MakeUInt128(0x0080587824107493ULL, 0x1081488997891173ULL));
4803 ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 17);
4804 }
4805
TEST(Arm64InsnTest,StoreSimdPostIndex)4806 TEST(Arm64InsnTest, StoreSimdPostIndex) {
4807 __uint128_t old_val = MakeUInt128(0x4939965143142980ULL, 0x9190659250937221ULL);
4808 __uint128_t new_val = MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL);
4809 __uint128_t* addr = &old_val;
4810
4811 // Verify that the interpreter accepts "str q0, [x0], #8" where the register numbers are
4812 // the same, when the data register is one of the SIMD registers.
4813 asm("mov x0, %0\n\t"
4814 "mov v0.2D, %1.2D\n\t"
4815 "str q0, [x0], #8\n\t"
4816 "mov %0, x0"
4817 : "+r"(addr)
4818 : "w"(new_val)
4819 : "v0", "x0", "memory");
4820
4821 ASSERT_EQ(old_val, MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL));
4822 ASSERT_EQ(reinterpret_cast<uintptr_t>(addr), reinterpret_cast<uintptr_t>(&old_val) + 8);
4823 }
4824
TEST(Arm64InsnTest,StoreZeroPostIndex1)4825 TEST(Arm64InsnTest, StoreZeroPostIndex1) {
4826 uint64_t res;
4827 asm("str xzr, [sp, #-16]!\n\t"
4828 "ldr %0, [sp, #0]\n\t"
4829 "add sp, sp, #16"
4830 : "=r"(res));
4831 ASSERT_EQ(res, 0);
4832 }
4833
TEST(Arm64InsnTest,StoreZeroPostIndex2)4834 TEST(Arm64InsnTest, StoreZeroPostIndex2) {
4835 __uint128_t arg1 = MakeUInt128(0x9415573293820485ULL, 0x4212350817391254ULL);
4836 __uint128_t arg2 = MakeUInt128(0x9749819308714396ULL, 0x6151329420459193ULL);
4837 __uint128_t res1;
4838 __uint128_t res2;
4839 asm("mov v30.16b, %2.16b\n\t"
4840 "mov v31.16b, %3.16b\n\t"
4841 "stp q30, q31, [sp, #-32]!\n\t"
4842 "ldr %q0, [sp, #0]\n\t"
4843 "ldr %q1, [sp, #16]\n\t"
4844 "add sp, sp, #32"
4845 : "=w"(res1), "=w"(res2)
4846 : "w"(arg1), "w"(arg2)
4847 : "v30", "v31");
4848
4849 ASSERT_EQ(res1, arg1);
4850 ASSERT_EQ(res2, arg2);
4851 }
4852
TEST(Arm64InsnTest,Load2MultipleInt8x8)4853 TEST(Arm64InsnTest, Load2MultipleInt8x8) {
4854 static constexpr uint8_t mem[] = {0x02,
4855 0x16,
4856 0x91,
4857 0x83,
4858 0x37,
4859 0x23,
4860 0x68,
4861 0x03,
4862 0x99,
4863 0x02,
4864 0x79,
4865 0x31,
4866 0x60,
4867 0x64,
4868 0x20,
4869 0x43};
4870 __uint128_t res[2];
4871 asm("ld2 {v0.8b, v1.8b}, [%2]\n\t"
4872 "mov %0.16b, v0.16b\n\t"
4873 "mov %1.16b, v1.16b"
4874 : "=w"(res[0]), "=w"(res[1])
4875 : "r"(mem)
4876 : "v0", "v1", "memory");
4877 ASSERT_EQ(res[0], MakeUInt128(0x2060799968379102ULL, 0U));
4878 ASSERT_EQ(res[1], MakeUInt128(0x4364310203238316ULL, 0U));
4879 }
4880
TEST(Arm64InsnTest,Load3MultipleInt8x8)4881 TEST(Arm64InsnTest, Load3MultipleInt8x8) {
4882 static constexpr uint8_t mem[3 * 8] = {0x32, 0x87, 0x67, 0x03, 0x80, 0x92, 0x52, 0x16,
4883 0x79, 0x07, 0x57, 0x12, 0x04, 0x06, 0x12, 0x37,
4884 0x59, 0x63, 0x27, 0x68, 0x56, 0x74, 0x84, 0x50};
4885 __uint128_t res[3];
4886 asm("ld3 {v7.8b-v9.8b}, [%3]\n\t"
4887 "mov %0.16b, v7.16b\n\t"
4888 "mov %1.16b, v8.16b\n\t"
4889 "mov %2.16b, v9.16b"
4890 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4891 : "r"(mem)
4892 : "v7", "v8", "v9", "memory");
4893 ASSERT_EQ(res[0], MakeUInt128(0x7427370407520332ULL, 0U));
4894 ASSERT_EQ(res[1], MakeUInt128(0x8468590657168087ULL, 0U));
4895 ASSERT_EQ(res[2], MakeUInt128(0x5056631212799267ULL, 0U));
4896 }
4897
TEST(Arm64InsnTest,Store3MultipleInt8x8)4898 TEST(Arm64InsnTest, Store3MultipleInt8x8) {
4899 static constexpr uint64_t arg[3] = {
4900 0x7427370407520332ULL, 0x8468590657168087ULL, 0x5056631212799267ULL};
4901 uint64_t res[3];
4902 asm("mov v0.16b, %0.16b\n\t"
4903 "mov v1.16b, %1.16b\n\t"
4904 "mov v2.16b, %2.16b\n\t"
4905 "st3 {v0.8b-v2.8b}, [%3]"
4906 :
4907 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4908 : "v0", "v1", "v2", "memory");
4909 ASSERT_EQ(res[0], 0x1652928003678732ULL);
4910 ASSERT_EQ(res[1], 0x3712060412570779ULL);
4911 ASSERT_EQ(res[2], 0x5084745668276359ULL);
4912 }
4913
TEST(Arm64InsnTest,Load3MultipleInt8x16)4914 TEST(Arm64InsnTest, Load3MultipleInt8x16) {
4915 static constexpr uint8_t mem[3 * 16] = {
4916 0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96, 0x25, 0x32, 0x83, 0x38,
4917 0x52, 0x27, 0x99, 0x24, 0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88,
4918 0x91, 0x29, 0x63, 0x62, 0x59, 0x54, 0x32, 0x73, 0x45, 0x44, 0x37, 0x16,
4919 0x33, 0x55, 0x77, 0x43, 0x29, 0x49, 0x99, 0x28, 0x81, 0x05, 0x57, 0x17};
4920 __uint128_t res[3];
4921 asm("ld3 {v7.16b-v9.16b}, [%3]\n\t"
4922 "mov %0.16b, v7.16b\n\t"
4923 "mov %1.16b, v8.16b\n\t"
4924 "mov %2.16b, v9.16b"
4925 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4926 : "r"(mem)
4927 : "v7", "v8", "v9", "memory");
4928 ASSERT_EQ(res[0], MakeUInt128(0x4797245232446569ULL, 0x599433344326291ULL));
4929 ASSERT_EQ(res[1], MakeUInt128(0x2386592783966320ULL, 0x5728295537735929ULL));
4930 ASSERT_EQ(res[2], MakeUInt128(0x8859609938253835ULL, 0x1781497716455463ULL));
4931 }
4932
TEST(Arm64InsnTest,Store3MultipleInt8x16)4933 TEST(Arm64InsnTest, Store3MultipleInt8x16) {
4934 static constexpr __uint128_t arg[3] = {MakeUInt128(0x4797245232446569ULL, 0x599433344326291ULL),
4935 MakeUInt128(0x2386592783966320ULL, 0x5728295537735929ULL),
4936 MakeUInt128(0x8859609938253835ULL, 0x1781497716455463ULL)};
4937 __uint128_t res[3];
4938 asm("mov v0.16b, %0.16b\n\t"
4939 "mov v1.16b, %1.16b\n\t"
4940 "mov v2.16b, %2.16b\n\t"
4941 "st3 {v0.16b-v2.16b}, [%3]"
4942 :
4943 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4944 : "v0", "v1", "v2", "memory");
4945 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
4946 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
4947 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
4948 }
4949
TEST(Arm64InsnTest,Load3MultipleInt16x4)4950 TEST(Arm64InsnTest, Load3MultipleInt16x4) {
4951 static constexpr uint16_t mem[3 * 4] = {0x2069,
4952 0x6535,
4953 0x3863,
4954 0x9644,
4955 0x3225,
4956 0x3883,
4957 0x2752,
4958 0x2499,
4959 0x6059,
4960 0x8697,
4961 0x4759,
4962 0x8823};
4963 __uint128_t res[3];
4964 asm("ld3 {v30.4h-v0.4h}, [%3]\n\t"
4965 "mov %0.16b, v30.16b\n\t"
4966 "mov %1.16b, v31.16b\n\t"
4967 "mov %2.16b, v0.16b"
4968 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4969 : "r"(mem)
4970 : "v30", "v31", "v0", "memory");
4971 ASSERT_EQ(res[0], MakeUInt128(0x8697275296442069ULL, 0));
4972 ASSERT_EQ(res[1], MakeUInt128(0x4759249932256535ULL, 0));
4973 ASSERT_EQ(res[2], MakeUInt128(0x8823605938833863ULL, 0));
4974 }
4975
TEST(Arm64InsnTest,Store3MultipleInt16x4)4976 TEST(Arm64InsnTest, Store3MultipleInt16x4) {
4977 static constexpr uint64_t arg[3] = {
4978 0x8697275296442069ULL, 0x4759249932256535ULL, 0x8823605938833863ULL};
4979 uint64_t res[3];
4980 asm("mov v0.16b, %0.16b\n\t"
4981 "mov v1.16b, %1.16b\n\t"
4982 "mov v2.16b, %2.16b\n\t"
4983 "st3 {v0.4h-v2.4h}, [%3]"
4984 :
4985 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4986 : "v0", "v1", "v2", "memory");
4987 ASSERT_EQ(res[0], 0x9644386365352069ULL);
4988 ASSERT_EQ(res[1], 0x2499275238833225ULL);
4989 ASSERT_EQ(res[2], 0x8823475986976059ULL);
4990 }
4991
TEST(Arm64InsnTest,Load3MultipleInt16x8)4992 TEST(Arm64InsnTest, Load3MultipleInt16x8) {
4993 static constexpr uint16_t mem[3 * 8] = {0x2069, 0x6535, 0x3863, 0x9644, 0x3225, 0x3883,
4994 0x2752, 0x2499, 0x6059, 0x8697, 0x4759, 0x8823,
4995 0x2991, 0x6263, 0x5459, 0x7332, 0x4445, 0x1637,
4996 0x5533, 0x4377, 0x4929, 0x2899, 0x0581, 0x1757};
4997 __uint128_t res[3];
4998 asm("ld3 {v30.8h-v0.8h}, [%3]\n\t"
4999 "mov %0.16b, v30.16b\n\t"
5000 "mov %1.16b, v31.16b\n\t"
5001 "mov %2.16b, v0.16b"
5002 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5003 : "r"(mem)
5004 : "v30", "v31", "v0", "memory");
5005 ASSERT_EQ(res[0], MakeUInt128(0x8697275296442069ULL, 0x2899553373322991ULL));
5006 ASSERT_EQ(res[1], MakeUInt128(0x4759249932256535ULL, 0x581437744456263ULL));
5007 ASSERT_EQ(res[2], MakeUInt128(0x8823605938833863ULL, 0x1757492916375459ULL));
5008 }
5009
TEST(Arm64InsnTest,Store3MultipleInt16x8)5010 TEST(Arm64InsnTest, Store3MultipleInt16x8) {
5011 static constexpr __uint128_t arg[3] = {MakeUInt128(0x8697275296442069ULL, 0x2899553373322991ULL),
5012 MakeUInt128(0x4759249932256535ULL, 0x581437744456263ULL),
5013 MakeUInt128(0x8823605938833863ULL, 0x1757492916375459ULL)};
5014 __uint128_t res[3];
5015 asm("mov v0.16b, %0.16b\n\t"
5016 "mov v1.16b, %1.16b\n\t"
5017 "mov v2.16b, %2.16b\n\t"
5018 "st3 {v0.8h-v2.8h}, [%3]"
5019 :
5020 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5021 : "v0", "v1", "v2", "memory");
5022 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5023 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5024 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5025 }
5026
TEST(Arm64InsnTest,Load3MultipleInt32x2)5027 TEST(Arm64InsnTest, Load3MultipleInt32x2) {
5028 static constexpr uint32_t mem[3 * 2] = {
5029 0x65352069, 0x96443863, 0x38833225, 0x24992752, 0x86976059, 0x88234759};
5030 __uint128_t res[3];
5031 asm("ld3 {v30.2s-v0.2s}, [%3]\n\t"
5032 "mov %0.16b, v30.16b\n\t"
5033 "mov %1.16b, v31.16b\n\t"
5034 "mov %2.16b, v0.16b"
5035 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5036 : "r"(mem)
5037 : "v30", "v31", "v0", "memory");
5038 ASSERT_EQ(res[0], MakeUInt128(0x2499275265352069ULL, 0));
5039 ASSERT_EQ(res[1], MakeUInt128(0x8697605996443863ULL, 0));
5040 ASSERT_EQ(res[2], MakeUInt128(0x8823475938833225ULL, 0));
5041 }
5042
TEST(Arm64InsnTest,Store3MultipleInt32x2)5043 TEST(Arm64InsnTest, Store3MultipleInt32x2) {
5044 static constexpr uint64_t arg[3] = {
5045 0x2499275265352069ULL, 0x8697605996443863ULL, 0x8823475938833225ULL};
5046 uint64_t res[3];
5047 asm("mov v0.16b, %0.16b\n\t"
5048 "mov v1.16b, %1.16b\n\t"
5049 "mov v2.16b, %2.16b\n\t"
5050 "st3 {v0.2s-v2.2s}, [%3]"
5051 :
5052 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5053 : "v0", "v1", "v2", "memory");
5054 ASSERT_EQ(res[0], 0x9644386365352069ULL);
5055 ASSERT_EQ(res[1], 0x2499275238833225ULL);
5056 ASSERT_EQ(res[2], 0x8823475986976059ULL);
5057 }
5058
TEST(Arm64InsnTest,Load3MultipleInt32x4)5059 TEST(Arm64InsnTest, Load3MultipleInt32x4) {
5060 static constexpr uint32_t mem[3 * 4] = {0x65352069,
5061 0x96443863,
5062 0x38833225,
5063 0x24992752,
5064 0x86976059,
5065 0x88234759,
5066 0x62632991,
5067 0x73325459,
5068 0x16374445,
5069 0x43775533,
5070 0x28994929,
5071 0x17570581};
5072 __uint128_t res[3];
5073 asm("ld3 {v30.4s-v0.4s}, [%3]\n\t"
5074 "mov %0.16b, v30.16b\n\t"
5075 "mov %1.16b, v31.16b\n\t"
5076 "mov %2.16b, v0.16b"
5077 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5078 : "r"(mem)
5079 : "v30", "v31", "v0", "memory");
5080 ASSERT_EQ(res[0], MakeUInt128(0x2499275265352069ULL, 0x4377553362632991ULL));
5081 ASSERT_EQ(res[1], MakeUInt128(0x8697605996443863ULL, 0x2899492973325459ULL));
5082 ASSERT_EQ(res[2], MakeUInt128(0x8823475938833225ULL, 0x1757058116374445ULL));
5083 }
5084
TEST(Arm64InsnTest,Store3MultipleInt32x4)5085 TEST(Arm64InsnTest, Store3MultipleInt32x4) {
5086 static constexpr __uint128_t arg[3] = {MakeUInt128(0x2499275265352069ULL, 0x4377553362632991ULL),
5087 MakeUInt128(0x8697605996443863ULL, 0x2899492973325459ULL),
5088 MakeUInt128(0x8823475938833225ULL, 0x1757058116374445ULL)};
5089 __uint128_t res[3];
5090 asm("mov v0.16b, %0.16b\n\t"
5091 "mov v1.16b, %1.16b\n\t"
5092 "mov v2.16b, %2.16b\n\t"
5093 "st3 {v0.4s-v2.4s}, [%3]"
5094 :
5095 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5096 : "v0", "v1", "v2", "memory");
5097 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5098 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5099 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5100 }
5101
TEST(Arm64InsnTest,Load3MultipleInt64x2)5102 TEST(Arm64InsnTest, Load3MultipleInt64x2) {
5103 static constexpr uint64_t mem[3 * 2] = {0x9644386365352069,
5104 0x2499275238833225,
5105 0x8823475986976059,
5106 0x7332545962632991,
5107 0x4377553316374445,
5108 0x1757058128994929};
5109 __uint128_t res[3];
5110 asm("ld3 {v30.2d-v0.2d}, [%3]\n\t"
5111 "mov %0.16b, v30.16b\n\t"
5112 "mov %1.16b, v31.16b\n\t"
5113 "mov %2.16b, v0.16b"
5114 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5115 : "r"(mem)
5116 : "v30", "v31", "v0", "memory");
5117 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x7332545962632991ULL));
5118 ASSERT_EQ(res[1], MakeUInt128(0x2499275238833225ULL, 0x4377553316374445ULL));
5119 ASSERT_EQ(res[2], MakeUInt128(0x8823475986976059ULL, 0x1757058128994929ULL));
5120 }
5121
TEST(Arm64InsnTest,Store3MultipleInt64x2)5122 TEST(Arm64InsnTest, Store3MultipleInt64x2) {
5123 static constexpr __uint128_t arg[3] = {MakeUInt128(0x9644386365352069ULL, 0x7332545962632991ULL),
5124 MakeUInt128(0x2499275238833225ULL, 0x4377553316374445ULL),
5125 MakeUInt128(0x8823475986976059ULL, 0x1757058128994929ULL)};
5126 __uint128_t res[3];
5127 asm("mov v0.16b, %0.16b\n\t"
5128 "mov v1.16b, %1.16b\n\t"
5129 "mov v2.16b, %2.16b\n\t"
5130 "st3 {v0.2d-v2.2d}, [%3]"
5131 :
5132 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5133 : "v0", "v1", "v2", "memory");
5134 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5135 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5136 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5137 }
5138
TEST(Arm64InsnTest,Load4MultipleInt8x8)5139 TEST(Arm64InsnTest, Load4MultipleInt8x8) {
5140 static constexpr uint8_t mem[4 * 8] = {0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96,
5141 0x25, 0x32, 0x83, 0x38, 0x52, 0x27, 0x99, 0x24,
5142 0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88,
5143 0x91, 0x29, 0x63, 0x62, 0x59, 0x54, 0x32, 0x73};
5144 __uint128_t res[4];
5145 asm("ld4 {v7.8b-v10.8b}, [%4]\n\t"
5146 "mov %0.16b, v7.16b\n\t"
5147 "mov %1.16b, v8.16b\n\t"
5148 "mov %2.16b, v9.16b\n\t"
5149 "mov %3.16b, v10.16b"
5150 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5151 : "r"(mem)
5152 : "v7", "v8", "v9", "v10", "memory");
5153 ASSERT_EQ(res[0], MakeUInt128(0x5991595952256369ULL, 0));
5154 ASSERT_EQ(res[1], MakeUInt128(0x5429476027323820ULL, 0));
5155 ASSERT_EQ(res[2], MakeUInt128(0x3263239799834435ULL, 0));
5156 ASSERT_EQ(res[3], MakeUInt128(0x7362888624389665ULL, 0));
5157 }
5158
TEST(Arm64InsnTest,Store4MultipleInt8x8)5159 TEST(Arm64InsnTest, Store4MultipleInt8x8) {
5160 static constexpr uint64_t arg[4] = {
5161 0x5991595952256369ULL, 0x5429476027323820ULL, 0x3263239799834435ULL, 0x7362888624389665ULL};
5162 uint64_t res[4];
5163 asm("mov v7.16b, %0.16b\n\t"
5164 "mov v8.16b, %1.16b\n\t"
5165 "mov v9.16b, %2.16b\n\t"
5166 "mov v10.16b, %3.16b\n\t"
5167 "st4 {v7.8b-v10.8b}, [%4]"
5168 :
5169 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5170 : "v7", "v8", "v9", "v10", "memory");
5171 ASSERT_EQ(res[0], 0x9644386365352069ULL);
5172 ASSERT_EQ(res[1], 0x2499275238833225ULL);
5173 ASSERT_EQ(res[2], 0x8823475986976059ULL);
5174 ASSERT_EQ(res[3], 0x7332545962632991ULL);
5175 }
5176
TEST(Arm64InsnTest,Load4MultipleInt8x16)5177 TEST(Arm64InsnTest, Load4MultipleInt8x16) {
5178 static constexpr uint8_t mem[4 * 16] = {
5179 0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96, 0x25, 0x32, 0x83, 0x38, 0x52,
5180 0x27, 0x99, 0x24, 0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88, 0x91, 0x29,
5181 0x63, 0x62, 0x59, 0x54, 0x32, 0x73, 0x45, 0x44, 0x37, 0x16, 0x33, 0x55, 0x77,
5182 0x43, 0x29, 0x49, 0x99, 0x28, 0x81, 0x05, 0x57, 0x17, 0x81, 0x98, 0x78, 0x50,
5183 0x68, 0x14, 0x62, 0x52, 0x32, 0x13, 0x47, 0x52, 0x37, 0x38, 0x11, 0x65};
5184 __uint128_t res[4];
5185 asm("ld4 {v7.16b-v10.16b}, [%4]\n\t"
5186 "mov %0.16b, v7.16b\n\t"
5187 "mov %1.16b, v8.16b\n\t"
5188 "mov %2.16b, v9.16b\n\t"
5189 "mov %3.16b, v10.16b"
5190 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5191 : "r"(mem)
5192 : "v7", "v8", "v9", "v10", "memory");
5193 ASSERT_EQ(res[0], MakeUInt128(0x5991595952256369ULL, 0x3732688181293345ULL));
5194 ASSERT_EQ(res[1], MakeUInt128(0x5429476027323820ULL, 0x3813149805495544ULL));
5195 ASSERT_EQ(res[2], MakeUInt128(0x3263239799834435ULL, 0x1147627857997737ULL));
5196 ASSERT_EQ(res[3], MakeUInt128(0x7362888624389665ULL, 0x6552525017284316ULL));
5197 }
5198
TEST(Arm64InsnTest,Store4MultipleInt8x16)5199 TEST(Arm64InsnTest, Store4MultipleInt8x16) {
5200 static constexpr __uint128_t arg[4] = {MakeUInt128(0x5991595952256369ULL, 0x3732688181293345ULL),
5201 MakeUInt128(0x5429476027323820ULL, 0x3813149805495544ULL),
5202 MakeUInt128(0x3263239799834435ULL, 0x1147627857997737ULL),
5203 MakeUInt128(0x7362888624389665ULL, 0x6552525017284316ULL)};
5204 __uint128_t res[4];
5205 asm("mov v7.16b, %0.16b\n\t"
5206 "mov v8.16b, %1.16b\n\t"
5207 "mov v9.16b, %2.16b\n\t"
5208 "mov v10.16b, %3.16b\n\t"
5209 "st4 {v7.16b-v10.16b}, [%4]"
5210 :
5211 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5212 : "v7", "v8", "v9", "v10", "memory");
5213 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5214 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5215 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5216 ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5217 }
5218
TEST(Arm64InsnTest,Load4MultipleInt16x4)5219 TEST(Arm64InsnTest, Load4MultipleInt16x4) {
5220 static constexpr uint16_t mem[4 * 4] = {0x2069,
5221 0x6535,
5222 0x3863,
5223 0x9644,
5224 0x3225,
5225 0x3883,
5226 0x2752,
5227 0x2499,
5228 0x6059,
5229 0x8697,
5230 0x4759,
5231 0x8823,
5232 0x2991,
5233 0x6263,
5234 0x5459,
5235 0x7332};
5236 __uint128_t res[4];
5237 asm("ld4 {v30.4h-v1.4h}, [%4]\n\t"
5238 "mov %0.16b, v30.16b\n\t"
5239 "mov %1.16b, v31.16b\n\t"
5240 "mov %2.16b, v0.16b\n\t"
5241 "mov %3.16b, v1.16b"
5242 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5243 : "r"(mem)
5244 : "v30", "v31", "v0", "v1", "memory");
5245 ASSERT_EQ(res[0], MakeUInt128(0x2991605932252069ULL, 0));
5246 ASSERT_EQ(res[1], MakeUInt128(0x6263869738836535ULL, 0));
5247 ASSERT_EQ(res[2], MakeUInt128(0x5459475927523863ULL, 0));
5248 ASSERT_EQ(res[3], MakeUInt128(0x7332882324999644ULL, 0));
5249 }
5250
TEST(Arm64InsnTest,Store4MultipleInt16x4)5251 TEST(Arm64InsnTest, Store4MultipleInt16x4) {
5252 static constexpr uint64_t arg[4] = {
5253 0x2991605932252069ULL, 0x6263869738836535ULL, 0x5459475927523863ULL, 0x7332882324999644ULL};
5254 uint64_t res[4];
5255 asm("mov v30.16b, %0.16b\n\t"
5256 "mov v31.16b, %1.16b\n\t"
5257 "mov v0.16b, %2.16b\n\t"
5258 "mov v1.16b, %3.16b\n\t"
5259 "st4 {v30.4h-v1.4h}, [%4]"
5260 :
5261 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5262 : "v30", "v31", "v0", "v1", "memory");
5263 ASSERT_EQ(res[0], 0x9644386365352069ULL);
5264 ASSERT_EQ(res[1], 0x2499275238833225ULL);
5265 ASSERT_EQ(res[2], 0x8823475986976059ULL);
5266 ASSERT_EQ(res[3], 0x7332545962632991ULL);
5267 }
5268
TEST(Arm64InsnTest,Load4MultipleInt16x8)5269 TEST(Arm64InsnTest, Load4MultipleInt16x8) {
5270 static constexpr uint16_t mem[4 * 8] = {
5271 0x2069, 0x6535, 0x3863, 0x9644, 0x3225, 0x3883, 0x2752, 0x2499, 0x6059, 0x8697, 0x4759,
5272 0x8823, 0x2991, 0x6263, 0x5459, 0x7332, 0x4445, 0x1637, 0x5533, 0x4377, 0x4929, 0x2899,
5273 0x0581, 0x1757, 0x9881, 0x5078, 0x1468, 0x5262, 0x1332, 0x5247, 0x3837, 0x6511};
5274 __uint128_t res[4];
5275 asm("ld4 {v30.8h-v1.8h}, [%4]\n\t"
5276 "mov %0.16b, v30.16b\n\t"
5277 "mov %1.16b, v31.16b\n\t"
5278 "mov %2.16b, v0.16b\n\t"
5279 "mov %3.16b, v1.16b"
5280 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5281 : "r"(mem)
5282 : "v30", "v31", "v0", "v1", "memory");
5283 ASSERT_EQ(res[0], MakeUInt128(0x2991605932252069ULL, 0x1332988149294445ULL));
5284 ASSERT_EQ(res[1], MakeUInt128(0x6263869738836535ULL, 0x5247507828991637ULL));
5285 ASSERT_EQ(res[2], MakeUInt128(0x5459475927523863ULL, 0x3837146805815533ULL));
5286 ASSERT_EQ(res[3], MakeUInt128(0x7332882324999644ULL, 0x6511526217574377ULL));
5287 }
5288
TEST(Arm64InsnTest,Store4MultipleInt16x8)5289 TEST(Arm64InsnTest, Store4MultipleInt16x8) {
5290 static constexpr __uint128_t arg[4] = {MakeUInt128(0x2991605932252069ULL, 0x1332988149294445ULL),
5291 MakeUInt128(0x6263869738836535ULL, 0x5247507828991637ULL),
5292 MakeUInt128(0x5459475927523863ULL, 0x3837146805815533ULL),
5293 MakeUInt128(0x7332882324999644ULL, 0x6511526217574377ULL)};
5294 __uint128_t res[4];
5295 asm("mov v30.16b, %0.16b\n\t"
5296 "mov v31.16b, %1.16b\n\t"
5297 "mov v0.16b, %2.16b\n\t"
5298 "mov v1.16b, %3.16b\n\t"
5299 "st4 {v30.8h-v1.8h}, [%4]"
5300 :
5301 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5302 : "v30", "v31", "v0", "v1", "memory");
5303 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5304 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5305 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5306 ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5307 }
5308
TEST(Arm64InsnTest,Load4MultipleInt32x2)5309 TEST(Arm64InsnTest, Load4MultipleInt32x2) {
5310 static constexpr uint32_t mem[4 * 2] = {0x65352069,
5311 0x96443863,
5312 0x38833225,
5313 0x24992752,
5314 0x86976059,
5315 0x88234759,
5316 0x62632991,
5317 0x73325459};
5318 __uint128_t res[4];
5319 asm("ld4 {v30.2s-v1.2s}, [%4]\n\t"
5320 "mov %0.16b, v30.16b\n\t"
5321 "mov %1.16b, v31.16b\n\t"
5322 "mov %2.16b, v0.16b\n\t"
5323 "mov %3.16b, v1.16b"
5324 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5325 : "r"(mem)
5326 : "v30", "v31", "v0", "v1", "memory");
5327 ASSERT_EQ(res[0], MakeUInt128(0x8697605965352069ULL, 0));
5328 ASSERT_EQ(res[1], MakeUInt128(0x8823475996443863ULL, 0));
5329 ASSERT_EQ(res[2], MakeUInt128(0x6263299138833225ULL, 0));
5330 ASSERT_EQ(res[3], MakeUInt128(0x7332545924992752ULL, 0));
5331 }
5332
TEST(Arm64InsnTest,Store4MultipleInt32x2)5333 TEST(Arm64InsnTest, Store4MultipleInt32x2) {
5334 static constexpr uint64_t arg[4] = {
5335 0x8697605965352069ULL, 0x8823475996443863ULL, 0x6263299138833225ULL, 0x7332545924992752ULL};
5336 uint64_t res[4];
5337 asm("mov v30.16b, %0.16b\n\t"
5338 "mov v31.16b, %1.16b\n\t"
5339 "mov v0.16b, %2.16b\n\t"
5340 "mov v1.16b, %3.16b\n\t"
5341 "st4 {v30.2s-v1.2s}, [%4]"
5342 :
5343 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5344 : "v30", "v31", "v0", "v1", "memory");
5345 ASSERT_EQ(res[0], 0x9644386365352069ULL);
5346 ASSERT_EQ(res[1], 0x2499275238833225ULL);
5347 ASSERT_EQ(res[2], 0x8823475986976059ULL);
5348 ASSERT_EQ(res[3], 0x7332545962632991ULL);
5349 }
5350
TEST(Arm64InsnTest,Load4MultipleInt32x4)5351 TEST(Arm64InsnTest, Load4MultipleInt32x4) {
5352 static constexpr uint32_t mem[4 * 4] = {0x65352069,
5353 0x96443863,
5354 0x38833225,
5355 0x24992752,
5356 0x86976059,
5357 0x88234759,
5358 0x62632991,
5359 0x73325459,
5360 0x16374445,
5361 0x43775533,
5362 0x28994929,
5363 0x17570581,
5364 0x50789881,
5365 0x52621468,
5366 0x52471332,
5367 0x65113837};
5368 __uint128_t res[4];
5369 asm("ld4 {v30.4s-v1.4s}, [%4]\n\t"
5370 "mov %0.16b, v30.16b\n\t"
5371 "mov %1.16b, v31.16b\n\t"
5372 "mov %2.16b, v0.16b\n\t"
5373 "mov %3.16b, v1.16b"
5374 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5375 : "r"(mem)
5376 : "v30", "v31", "v0", "v1", "memory");
5377 ASSERT_EQ(res[0], MakeUInt128(0x8697605965352069ULL, 0x5078988116374445ULL));
5378 ASSERT_EQ(res[1], MakeUInt128(0x8823475996443863ULL, 0x5262146843775533ULL));
5379 ASSERT_EQ(res[2], MakeUInt128(0x6263299138833225ULL, 0x5247133228994929ULL));
5380 ASSERT_EQ(res[3], MakeUInt128(0x7332545924992752ULL, 0x6511383717570581ULL));
5381 }
5382
TEST(Arm64InsnTest,Store4MultipleInt32x4)5383 TEST(Arm64InsnTest, Store4MultipleInt32x4) {
5384 static constexpr __uint128_t arg[4] = {MakeUInt128(0x8697605965352069ULL, 0x5078988116374445ULL),
5385 MakeUInt128(0x8823475996443863ULL, 0x5262146843775533ULL),
5386 MakeUInt128(0x6263299138833225ULL, 0x5247133228994929ULL),
5387 MakeUInt128(0x7332545924992752ULL, 0x6511383717570581ULL)};
5388 __uint128_t res[4];
5389 asm("mov v30.16b, %0.16b\n\t"
5390 "mov v31.16b, %1.16b\n\t"
5391 "mov v0.16b, %2.16b\n\t"
5392 "mov v1.16b, %3.16b\n\t"
5393 "st4 {v30.4s-v1.4s}, [%4]"
5394 :
5395 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5396 : "v30", "v31", "v0", "v1", "memory");
5397 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5398 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5399 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5400 ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5401 }
5402
TEST(Arm64InsnTest,Load4MultipleInt64x2)5403 TEST(Arm64InsnTest, Load4MultipleInt64x2) {
5404 static constexpr uint64_t mem[4 * 2] = {0x9644386365352069,
5405 0x2499275238833225,
5406 0x8823475986976059,
5407 0x7332545962632991,
5408 0x4377553316374445,
5409 0x1757058128994929,
5410 0x5262146850789881,
5411 0x6511383752471332};
5412 __uint128_t res[4];
5413 asm("ld4 {v30.2d-v1.2d}, [%4]\n\t"
5414 "mov %0.16b, v30.16b\n\t"
5415 "mov %1.16b, v31.16b\n\t"
5416 "mov %2.16b, v0.16b\n\t"
5417 "mov %3.16b, v1.16b"
5418 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5419 : "r"(mem)
5420 : "v30", "v31", "v0", "v1", "memory");
5421 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x4377553316374445ULL));
5422 ASSERT_EQ(res[1], MakeUInt128(0x2499275238833225ULL, 0x1757058128994929ULL));
5423 ASSERT_EQ(res[2], MakeUInt128(0x8823475986976059ULL, 0x5262146850789881ULL));
5424 ASSERT_EQ(res[3], MakeUInt128(0x7332545962632991ULL, 0x6511383752471332ULL));
5425 }
5426
TEST(Arm64InsnTest,Store4MultipleInt64x2)5427 TEST(Arm64InsnTest, Store4MultipleInt64x2) {
5428 static constexpr __uint128_t arg[4] = {MakeUInt128(0x9644386365352069ULL, 0x4377553316374445ULL),
5429 MakeUInt128(0x2499275238833225ULL, 0x1757058128994929ULL),
5430 MakeUInt128(0x8823475986976059ULL, 0x5262146850789881ULL),
5431 MakeUInt128(0x7332545962632991ULL, 0x6511383752471332ULL)};
5432 __uint128_t res[4];
5433 asm("mov v30.16b, %0.16b\n\t"
5434 "mov v31.16b, %1.16b\n\t"
5435 "mov v0.16b, %2.16b\n\t"
5436 "mov v1.16b, %3.16b\n\t"
5437 "st4 {v30.2d-v1.2d}, [%4]"
5438 :
5439 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5440 : "v30", "v31", "v0", "v1", "memory");
5441 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5442 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5443 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5444 ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5445 }
5446
TEST(Arm64InsnTest,Load1ReplicateInt8x8)5447 TEST(Arm64InsnTest, Load1ReplicateInt8x8) {
5448 static constexpr uint8_t mem = 0x81U;
5449 __uint128_t res;
5450 asm("ld1r {%0.8b}, [%1]" : "=w"(res) : "r"(&mem) : "memory");
5451 ASSERT_EQ(res, MakeUInt128(0x8181818181818181ULL, 0U));
5452 }
5453
TEST(Arm64InsnTest,Load2ReplicateInt16x8)5454 TEST(Arm64InsnTest, Load2ReplicateInt16x8) {
5455 static constexpr uint16_t mem[] = {0x7904, 0x8715};
5456 __uint128_t res[2];
5457 asm("ld2r {v6.8h, v7.8h}, [%2]\n\t"
5458 "mov %0.16b, v6.16b\n\t"
5459 "mov %1.16b, v7.16b"
5460 : "=w"(res[0]), "=w"(res[1])
5461 : "r"(mem)
5462 : "v6", "v7", "memory");
5463 ASSERT_EQ(res[0], MakeUInt128(0x7904790479047904ULL, 0x7904790479047904ULL));
5464 ASSERT_EQ(res[1], MakeUInt128(0x8715871587158715ULL, 0x8715871587158715ULL));
5465 }
5466
TEST(Arm64InsnTest,Load3ReplicateInt32x4)5467 TEST(Arm64InsnTest, Load3ReplicateInt32x4) {
5468 static constexpr uint32_t mem[] = {0x78713710U, 0x60510637U, 0x95558588U};
5469 __uint128_t res[3];
5470 asm("ld3r {v30.4s-v0.4s}, [%3]\n\t"
5471 "mov %0.16b, v30.16b\n\t"
5472 "mov %1.16b, v31.16b\n\t"
5473 "mov %2.16b, v0.16b"
5474 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5475 : "r"(mem)
5476 : "v30", "v31", "v0", "memory");
5477 ASSERT_EQ(res[0], MakeUInt128(0x7871371078713710ULL, 0x7871371078713710ULL));
5478 ASSERT_EQ(res[1], MakeUInt128(0x6051063760510637ULL, 0x6051063760510637ULL));
5479 ASSERT_EQ(res[2], MakeUInt128(0x9555858895558588ULL, 0x9555858895558588ULL));
5480 }
5481
TEST(Arm64InsnTest,Load4ReplicateInt64x2)5482 TEST(Arm64InsnTest, Load4ReplicateInt64x2) {
5483 static constexpr uint64_t mem[] = {
5484 0x8150781468526213ULL, 0x3252473837651192ULL, 0x9901561091897779ULL, 0x2200870579339646ULL};
5485 __uint128_t res[4];
5486 asm("ld4r {v29.2d-v0.2d}, [%4]\n\t"
5487 "mov %0.16b, v29.16b\n\t"
5488 "mov %1.16b, v30.16b\n\t"
5489 "mov %2.16b, v31.16b\n\t"
5490 "mov %3.16b, v0.16b"
5491 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5492 : "r"(mem)
5493 : "v29", "v30", "v31", "v0", "memory");
5494 ASSERT_EQ(res[0], MakeUInt128(mem[0], mem[0]));
5495 ASSERT_EQ(res[1], MakeUInt128(mem[1], mem[1]));
5496 ASSERT_EQ(res[2], MakeUInt128(mem[2], mem[2]));
5497 ASSERT_EQ(res[3], MakeUInt128(mem[3], mem[3]));
5498 }
5499
TEST(Arm64InsnTest,LoadPairNonTemporarlInt64)5500 TEST(Arm64InsnTest, LoadPairNonTemporarlInt64) {
5501 static constexpr uint64_t mem[] = {0x3843601737474215ULL, 0x2476085152099016ULL};
5502 __uint128_t res[2];
5503 asm("ldnp %d0, %d1, [%2]" : "=w"(res[0]), "=w"(res[1]) : "r"(mem) : "memory");
5504 ASSERT_EQ(res[0], MakeUInt128(0x3843601737474215ULL, 0U));
5505 ASSERT_EQ(res[1], MakeUInt128(0x2476085152099016ULL, 0U));
5506 }
5507
TEST(Arm64InsnTest,MoviVector2S)5508 TEST(Arm64InsnTest, MoviVector2S) {
5509 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0xe4")();
5510 ASSERT_EQ(rd, MakeUInt128(0x000000e4000000e4ULL, 0x0000000000000000ULL));
5511 }
5512
TEST(Arm64InsnTest,MoviVector2D)5513 TEST(Arm64InsnTest, MoviVector2D) {
5514 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2d, #0xff")();
5515 ASSERT_EQ(rd, MakeUInt128(0x00000000000000ffULL, 0x00000000000000ffULL));
5516 }
5517
TEST(Arm64InsnTest,MoviVector8B)5518 TEST(Arm64InsnTest, MoviVector8B) {
5519 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.8b, #0xda")();
5520 ASSERT_EQ(res, MakeUInt128(0xdadadadadadadadaULL, 0x0000000000000000ULL));
5521 }
5522
TEST(Arm64InsnTest,MoviVector4HShiftBy8)5523 TEST(Arm64InsnTest, MoviVector4HShiftBy8) {
5524 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.4h, #0xd1, lsl #8")();
5525 ASSERT_EQ(res, MakeUInt128(0xd100d100d100d100ULL, 0x0000000000000000ULL));
5526 }
5527
TEST(Arm64InsnTest,MoviVector2SShiftBy16)5528 TEST(Arm64InsnTest, MoviVector2SShiftBy16) {
5529 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0x37, msl #16")();
5530 ASSERT_EQ(res, MakeUInt128(0x0037ffff0037ffffULL, 0x0000000000000000ULL));
5531 }
5532
TEST(Arm64InsnTest,MvniVector4H)5533 TEST(Arm64InsnTest, MvniVector4H) {
5534 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.4h, #0xbc")();
5535 ASSERT_EQ(res, MakeUInt128(0xff43ff43ff43ff43ULL, 0x0000000000000000ULL));
5536 }
5537
TEST(Arm64InsnTest,MvniVector2SShiftBy8)5538 TEST(Arm64InsnTest, MvniVector2SShiftBy8) {
5539 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x24, lsl #8")();
5540 ASSERT_EQ(res, MakeUInt128(0xffffdbffffffdbffULL, 0x0000000000000000ULL));
5541 }
5542
TEST(Arm64InsnTest,MvniVector2SShiftBy16)5543 TEST(Arm64InsnTest, MvniVector2SShiftBy16) {
5544 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x25, msl #16")();
5545 ASSERT_EQ(res, MakeUInt128(0xffda0000ffda0000ULL, 0x0000000000000000ULL));
5546 }
5547
TEST(Arm64InsnTest,LoadSimdRegPlusReg)5548 TEST(Arm64InsnTest, LoadSimdRegPlusReg) {
5549 __uint128_t array[] = {
5550 MakeUInt128(0x6517980694113528ULL, 0x0131470130478164ULL),
5551 MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL),
5552 };
5553 uint64_t offset = 16;
5554 __uint128_t rd;
5555
5556 asm("ldr %q0, [%1, %2]" : "=w"(rd) : "r"(array), "r"(offset) : "memory");
5557
5558 ASSERT_EQ(rd, MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL));
5559 }
5560
TEST(Arm64InsnTest,ExtractNarrowI16x8ToI8x8)5561 TEST(Arm64InsnTest, ExtractNarrowI16x8ToI8x8) {
5562 __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5563 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.8b, %1.8h")(arg);
5564 ASSERT_EQ(res, MakeUInt128(0x113355772367abefULL, 0x0ULL));
5565 }
5566
TEST(Arm64InsnTest,ExtractNarrowI32x4ToI16x4)5567 TEST(Arm64InsnTest, ExtractNarrowI32x4ToI16x4) {
5568 __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5569 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.4h, %1.4s")(arg);
5570 ASSERT_EQ(res, MakeUInt128(0x223366774567cdefULL, 0x0ULL));
5571 }
5572
TEST(Arm64InsnTest,ExtractNarrowI64x2ToI32x2)5573 TEST(Arm64InsnTest, ExtractNarrowI64x2ToI32x2) {
5574 __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5575 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.2s, %1.2d")(arg);
5576 ASSERT_EQ(res, MakeUInt128(0x4455667789abcdefULL, 0x0ULL));
5577 }
5578
TEST(Arm64InsnTest,ExtractNarrow2Int16x8ToInt8x16)5579 TEST(Arm64InsnTest, ExtractNarrow2Int16x8ToInt8x16) {
5580 __uint128_t arg1 = MakeUInt128(0x1844396582533754ULL, 0x3885690941130315ULL);
5581 __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
5582 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("xtn2 %0.16b, %1.8h")(arg1, arg2);
5583 ASSERT_EQ(res, MakeUInt128(0x6121865619673378ULL, 0x8509131544655354ULL));
5584 }
5585
TEST(Arm64InsnTest,LoadLiteralSimd)5586 TEST(Arm64InsnTest, LoadLiteralSimd) {
5587 // We call an external assembly function to perform LDR literal because we
5588 // need to place the literal in .rodata. The literal placed in .text would
5589 // trigger a segfault.
5590 ASSERT_EQ(get_fp64_literal(), 0x0123456789abcdefULL);
5591 }
5592
TEST(Arm64InsnTest,AbsInt64x1)5593 TEST(Arm64InsnTest, AbsInt64x1) {
5594 __uint128_t arg = MakeUInt128(0xfffffffffffffffdULL, 0xdeadbeef01234567ULL);
5595 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %d0, %d1")(arg);
5596 ASSERT_EQ(res, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
5597 }
5598
TEST(Arm64InsnTest,AbsInt8x8)5599 TEST(Arm64InsnTest, AbsInt8x8) {
5600 __uint128_t arg = MakeUInt128(0x0001027e7f8081ffULL, 0x0123456789abcdefULL);
5601 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %0.8b, %1.8b")(arg);
5602 ASSERT_EQ(res, MakeUInt128(0x0001027e7f807f01ULL, 0x0ULL));
5603 }
5604
TEST(Arm64InsnTest,UseV31)5605 TEST(Arm64InsnTest, UseV31) {
5606 __uint128_t res;
5607
5608 asm("movi v31.2d, #0xffffffffffffffff\n\t"
5609 "mov %0.16b, v31.16b"
5610 : "=w"(res)
5611 :
5612 : "v31");
5613
5614 ASSERT_EQ(res, MakeUInt128(~0ULL, ~0ULL));
5615 }
5616
TEST(Arm64InsnTest,AddHighNarrowInt16x8)5617 TEST(Arm64InsnTest, AddHighNarrowInt16x8) {
5618 __uint128_t arg1 = MakeUInt128(0x2296617119637792ULL, 0x1337575114959501ULL);
5619 __uint128_t arg2 = MakeUInt128(0x0941214722131794ULL, 0x7647772622414254ULL);
5620 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5621 ASSERT_EQ(res, MakeUInt128(0x89ce36d72b823b8fULL, 0x0ULL));
5622 }
5623
TEST(Arm64InsnTest,AddHighNarrowUpperInt16x8)5624 TEST(Arm64InsnTest, AddHighNarrowUpperInt16x8) {
5625 __uint128_t arg1 = MakeUInt128(0x6561809377344403ULL, 0x0707469211201913ULL);
5626 __uint128_t arg2 = MakeUInt128(0x6095752706957220ULL, 0x9175671167229109ULL);
5627 __uint128_t arg3 = MakeUInt128(0x5797877185560845ULL, 0x5296541266540853ULL);
5628 __uint128_t res =
5629 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("addhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5630 ASSERT_EQ(res, MakeUInt128(0x5797877185560845ULL, 0x98ad78aac5f57db6ULL));
5631 }
5632
TEST(Arm64InsnTest,SubHighNarrowInt16x8)5633 TEST(Arm64InsnTest, SubHighNarrowInt16x8) {
5634 __uint128_t arg1 = MakeUInt128(0x4978189312978482ULL, 0x1682998948722658ULL);
5635 __uint128_t arg2 = MakeUInt128(0x1210835791513698ULL, 0x8209144421006751ULL);
5636 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("subhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5637 ASSERT_EQ(res, MakeUInt128(0x948527bf3795814dULL, 0x0ULL));
5638 }
5639
TEST(Arm64InsnTest,SubHighNarrowUpperInt16x8)5640 TEST(Arm64InsnTest, SubHighNarrowUpperInt16x8) {
5641 __uint128_t arg1 = MakeUInt128(0x5324944166803962ULL, 0x6579787718556084ULL);
5642 __uint128_t arg2 = MakeUInt128(0x1066587969981635ULL, 0x7473638405257145ULL);
5643 __uint128_t arg3 = MakeUInt128(0x3142980919065925ULL, 0x0937221696461515ULL);
5644 __uint128_t res =
5645 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("subhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5646 ASSERT_EQ(res, MakeUInt128(0x3142980919065925ULL, 0xf11413ef423bfc23ULL));
5647 }
5648
TEST(Arm64InsnTest,RoundingAddHighNarrowInt16x8)5649 TEST(Arm64InsnTest, RoundingAddHighNarrowInt16x8) {
5650 __uint128_t arg1 = MakeUInt128(0x8039626579787718ULL, 0x5560845529654126ULL);
5651 __uint128_t arg2 = MakeUInt128(0x3440171274947042ULL, 0x0562230538994561ULL);
5652 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("raddhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5653 ASSERT_EQ(res, MakeUInt128(0x5ba76287b479eee7ULL, 0x0000000000000000ULL));
5654 }
5655
TEST(Arm64InsnTest,RoundingSubHighNarrowInt16x8)5656 TEST(Arm64InsnTest, RoundingSubHighNarrowInt16x8) {
5657 __uint128_t arg1 = MakeUInt128(0x3063432858785698ULL, 0x3052358089330657ULL);
5658 __uint128_t arg2 = MakeUInt128(0x0216471550979259ULL, 0x2309907965473761ULL);
5659 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("rsubhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5660 ASSERT_EQ(res, MakeUInt128(0x0da524cf2efc08c4ULL, 0x0000000000000000ULL));
5661 }
5662
TEST(Arm64InsnTest,ScalarPairwiseAddInt8x2)5663 TEST(Arm64InsnTest, ScalarPairwiseAddInt8x2) {
5664 __uint128_t arg = MakeUInt128(0x6257591633303910ULL, 0x7225383742182140ULL);
5665 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addp %d0, %1.2d")(arg);
5666 ASSERT_EQ(res, MakeUInt128(0xd47c914d75485a50ULL, 0x0000000000000000ULL));
5667 }
5668
TEST(Arm64InsnTest,AddAcrossInt8x8)5669 TEST(Arm64InsnTest, AddAcrossInt8x8) {
5670 __uint128_t arg = MakeUInt128(0x0681216028764962ULL, 0x8674460477464915ULL);
5671 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addv %b0, %1.8b")(arg);
5672 ASSERT_EQ(res, MakeUInt128(0x51ULL, 0x0ULL));
5673 }
5674
TEST(Arm64InsnTest,SignedAddLongAcrossInt16x8)5675 TEST(Arm64InsnTest, SignedAddLongAcrossInt16x8) {
5676 __uint128_t arg = MakeUInt128(0x9699557377273756ULL, 0x6761552711392258ULL);
5677 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlv %s0, %1.8h")(arg);
5678 ASSERT_EQ(res, MakeUInt128(0x0000000000018aa2ULL, 0x0000000000000000ULL));
5679 }
5680
TEST(Arm64InsnTest,UnsignedAddLongAcrossInt16x8)5681 TEST(Arm64InsnTest, UnsignedAddLongAcrossInt16x8) {
5682 __uint128_t arg = MakeUInt128(0x7986396522961312ULL, 0x8017826797172898ULL);
5683 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlv %s0, %1.8h")(arg);
5684 ASSERT_EQ(res, MakeUInt128(0x000000000002aac0ULL, 0x0000000000000000ULL));
5685 }
5686
TEST(Arm64InsnTest,SignedMaximumAcrossInt16x8)5687 TEST(Arm64InsnTest, SignedMaximumAcrossInt16x8) {
5688 __uint128_t arg = MakeUInt128(0x8482065967379473ULL, 0x1680864156456505ULL);
5689 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("smaxv %h0, %1.8h")(arg);
5690 ASSERT_EQ(res, MakeUInt128(0x0000000000006737ULL, 0x0000000000000000ULL));
5691 }
5692
TEST(Arm64InsnTest,SignedMinimumAcrossInt16x8)5693 TEST(Arm64InsnTest, SignedMinimumAcrossInt16x8) {
5694 __uint128_t arg = MakeUInt128(0x6772530431825197ULL, 0x5791679296996504ULL);
5695 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sminv %h0, %1.8h")(arg);
5696 ASSERT_EQ(res, MakeUInt128(0x0000000000009699ULL, 0x0000000000000000ULL));
5697 }
5698
TEST(Arm64InsnTest,UnsignedMaximumAcrossInt16x8)5699 TEST(Arm64InsnTest, UnsignedMaximumAcrossInt16x8) {
5700 __uint128_t arg = MakeUInt128(0x6500378070466126ULL, 0x4706021457505793ULL);
5701 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("umaxv %h0, %1.8h")(arg);
5702 ASSERT_EQ(res, MakeUInt128(0x0000000000007046ULL, 0x0000000000000000ULL));
5703 }
5704
TEST(Arm64InsnTest,UnsignedMinimumAcrossInt16x8)5705 TEST(Arm64InsnTest, UnsignedMinimumAcrossInt16x8) {
5706 __uint128_t arg = MakeUInt128(0x5223572397395128ULL, 0x8181640597859142ULL);
5707 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uminv %h0, %1.8h")(arg);
5708 ASSERT_EQ(res, MakeUInt128(0x0000000000005128ULL, 0x0000000000000000ULL));
5709 }
5710
TEST(Arm64InsnTest,CountLeadingZerosI8x8)5711 TEST(Arm64InsnTest, CountLeadingZerosI8x8) {
5712 __uint128_t arg = MakeUInt128(0x1452635608277857ULL, 0x7134275778960917ULL);
5713 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("clz %0.8b, %1.8b")(arg);
5714 ASSERT_EQ(res, MakeUInt128(0x0301010104020101ULL, 0x0000000000000000ULL));
5715 }
5716
TEST(Arm64InsnTest,CountLeadingSignBitsI8x8)5717 TEST(Arm64InsnTest, CountLeadingSignBitsI8x8) {
5718 __uint128_t arg = MakeUInt128(0x8925892354201995ULL, 0x6112129021960864ULL);
5719 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cls %0.8b, %1.8b")(arg);
5720 ASSERT_EQ(res, MakeUInt128(0x0001000100010200ULL, 0x0000000000000000ULL));
5721 }
5722
TEST(Arm64InsnTest,Cnt)5723 TEST(Arm64InsnTest, Cnt) {
5724 __uint128_t arg = MakeUInt128(0x9835484875625298ULL, 0x7524238730775595ULL);
5725 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cnt %0.16b, %1.16b")(arg);
5726 ASSERT_EQ(res, MakeUInt128(0x0304020205030303ULL, 0x0502030402060404ULL));
5727 }
5728
TEST(Arm64InsnTest,SimdScalarMove)5729 TEST(Arm64InsnTest, SimdScalarMove) {
5730 __uint128_t arg = MakeUInt128(0x1433345477624168ULL, 0x6251898356948556ULL);
5731 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("mov %b0, %1.b[5]")(arg);
5732 ASSERT_EQ(res, MakeUInt128(0x0000000000000034ULL, 0x0000000000000000ULL));
5733 }
5734
TEST(Arm64InsnTest,SimdVectorElemDuplicate)5735 TEST(Arm64InsnTest, SimdVectorElemDuplicate) {
5736 __uint128_t arg = MakeUInt128(0x3021647155097925ULL, 0x9230990796547376ULL);
5737 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.8b, %1.b[5]")(arg);
5738 ASSERT_EQ(res, MakeUInt128(0x6464646464646464ULL, 0x0000000000000000ULL));
5739 }
5740
TEST(Arm64InsnTest,SimdVectorElemDuplicateInt16AtIndex7)5741 TEST(Arm64InsnTest, SimdVectorElemDuplicateInt16AtIndex7) {
5742 __uint128_t arg = MakeUInt128(0x2582262052248940ULL, 0x7726719478268482ULL);
5743 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.4h, %1.h[7]")(arg);
5744 ASSERT_EQ(res, MakeUInt128(0x7726772677267726ULL, 0x0000000000000000ULL));
5745 }
5746
TEST(Arm64InsnTest,SimdVectorElemInsert)5747 TEST(Arm64InsnTest, SimdVectorElemInsert) {
5748 __uint128_t arg1 = MakeUInt128(0x7120844335732654ULL, 0x8938239119325974ULL);
5749 __uint128_t arg2 = MakeUInt128(0x7656180937734440ULL, 0x3070746921120191ULL);
5750 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("mov %0.s[2], %1.s[1]")(arg1, arg2);
5751 ASSERT_EQ(res, MakeUInt128(0x7656180937734440ULL, 0x3070746971208443ULL));
5752 }
5753
TEST(Arm64InsnTest,NegateInt64x1)5754 TEST(Arm64InsnTest, NegateInt64x1) {
5755 constexpr auto AsmNeg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %d0, %d1");
5756 __uint128_t arg1 = MakeUInt128(0x8389522868478312ULL, 0x3552658213144957ULL);
5757 ASSERT_EQ(AsmNeg(arg1), MakeUInt128(0x7c76add797b87ceeULL, 0x0000000000000000ULL));
5758
5759 __uint128_t arg2 = MakeUInt128(1ULL << 63, 0U);
5760 ASSERT_EQ(AsmNeg(arg2), MakeUInt128(1ULL << 63, 0U));
5761 }
5762
TEST(Arm64InsnTest,NegateInt16x8)5763 TEST(Arm64InsnTest, NegateInt16x8) {
5764 __uint128_t arg = MakeUInt128(0x4411010446823252ULL, 0x7162010526522721ULL);
5765 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %0.8h, %1.8h")(arg);
5766 ASSERT_EQ(res, MakeUInt128(0xbbeffefcb97ecdaeULL, 0x8e9efefbd9aed8dfULL));
5767 }
5768
TEST(Arm64InsnTest,NotI8x8)5769 TEST(Arm64InsnTest, NotI8x8) {
5770 __uint128_t arg = MakeUInt128(0x6205647693125705ULL, 0x8635662018558100ULL);
5771 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("not %0.8b, %1.8b")(arg);
5772 ASSERT_EQ(res, MakeUInt128(0x9dfa9b896ceda8faULL, 0x0000000000000000ULL));
5773 }
5774
TEST(Arm64InsnTest,RbitInt8x8)5775 TEST(Arm64InsnTest, RbitInt8x8) {
5776 __uint128_t arg = MakeUInt128(0x4713296210734043ULL, 0x7518957359614589ULL);
5777 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rbit %0.8b, %1.8b")(arg);
5778 ASSERT_EQ(res, MakeUInt128(0xe2c8944608ce02c2ULL, 0x0000000000000000ULL));
5779 }
5780
TEST(Arm64InsnTest,Rev16Int8x16)5781 TEST(Arm64InsnTest, Rev16Int8x16) {
5782 __uint128_t arg = MakeUInt128(0x9904801094121472ULL, 0x2131794764777262ULL);
5783 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev16 %0.16b, %1.16b")(arg);
5784 ASSERT_EQ(res, MakeUInt128(0x0499108012947214ULL, 0x3121477977646272ULL));
5785 }
5786
TEST(Arm64InsnTest,Rev32Int16x8)5787 TEST(Arm64InsnTest, Rev32Int16x8) {
5788 __uint128_t arg = MakeUInt128(0x8662237172159160ULL, 0x7716692547487389ULL);
5789 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev32 %0.8h, %1.8h")(arg);
5790 ASSERT_EQ(res, MakeUInt128(0x2371866291607215ULL, 0x6925771673894748ULL));
5791 }
5792
TEST(Arm64InsnTest,Rev64Int32x4)5793 TEST(Arm64InsnTest, Rev64Int32x4) {
5794 __uint128_t arg = MakeUInt128(0x5306736096571209ULL, 0x1807638327166416ULL);
5795 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev64 %0.4s, %1.4s")(arg);
5796 ASSERT_EQ(res, MakeUInt128(0x9657120953067360ULL, 0x2716641618076383ULL));
5797 }
5798
TEST(Arm64InsnTest,TblInt8x8)5799 TEST(Arm64InsnTest, TblInt8x8) {
5800 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5801 __uint128_t arg2 = MakeUInt128(0x0104011509120605ULL, 0x0315080907091312ULL);
5802 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.8b, {%1.16b}, %2.8b")(arg1, arg2);
5803 ASSERT_EQ(res, MakeUInt128(0x1144110099006655ULL, 0x0000000000000000ULL));
5804 }
5805
TEST(Arm64InsnTest,TblInt8x16)5806 TEST(Arm64InsnTest, TblInt8x16) {
5807 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5808 __uint128_t arg2 = MakeUInt128(0x0905060808010408ULL, 0x0506000206030202ULL);
5809 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.16b, {%1.16b}, %2.16b")(arg1, arg2);
5810 ASSERT_EQ(res, MakeUInt128(0x9955668888114488ULL, 0x5566002266332222ULL));
5811 }
5812
TEST(Arm64InsnTest,Tbl2Int8x16)5813 TEST(Arm64InsnTest, Tbl2Int8x16) {
5814 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5815 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5816 __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5817 __uint128_t res;
5818
5819 // Hardcode v30 and v0 so that the TBL instruction gets consecutive registers.
5820 asm("mov v31.16b, %1.16b\n\t"
5821 "mov v0.16b, %2.16b\n\t"
5822 "tbl %0.16b, {v31.16b, v0.16b}, %3.16b"
5823 : "=w"(res)
5824 : "w"(arg1), "w"(arg2), "w"(arg3)
5825 : "v31", "v0");
5826
5827 ASSERT_EQ(res, MakeUInt128(0x22005500002299ffULL, 0x8777003398000033ULL));
5828 }
5829
TEST(Arm64InsnTest,Tbl3Int8x16)5830 TEST(Arm64InsnTest, Tbl3Int8x16) {
5831 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5832 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5833 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5834 __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5835 __uint128_t res;
5836
5837 // Hardcode v0, v1, and v2 so that the TBL instruction gets consecutive registers.
5838 asm("mov v30.16b, %1.16b\n\t"
5839 "mov v31.16b, %2.16b\n\t"
5840 "mov v0.16b, %3.16b\n\t"
5841 "tbl %0.16b, {v30.16b-v0.16b}, %4.16b"
5842 : "=w"(res)
5843 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4)
5844 : "v0", "v1", "v2");
5845
5846 ASSERT_EQ(res, MakeUInt128(0x778760000090ff00ULL, 0x0060980000103244ULL));
5847 }
5848
TEST(Arm64InsnTest,Tbl4Int8x16)5849 TEST(Arm64InsnTest, Tbl4Int8x16) {
5850 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5851 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5852 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5853 __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
5854 __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5855 __uint128_t res;
5856
5857 // Hardcode v30, v31, v0, and v1 so that the TBX instruction gets consecutive registers.
5858 asm("mov v30.16b, %1.16b\n\t"
5859 "mov v31.16b, %2.16b\n\t"
5860 "mov v0.16b, %3.16b\n\t"
5861 "mov v1.16b, %4.16b\n\t"
5862 "tbl %0.16b, {v30.16b-v1.16b}, %5.16b"
5863 : "=w"(res)
5864 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5)
5865 : "v30", "v31", "v0", "v1");
5866
5867 ASSERT_EQ(res, MakeUInt128(0x778760009f90ff5fULL, 0x5f60980000103244ULL));
5868 }
5869
TEST(Arm64InsnTest,TbxInt8x16)5870 TEST(Arm64InsnTest, TbxInt8x16) {
5871 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5872 __uint128_t arg2 = MakeUInt128(0x0915061808010408ULL, 0x0516000206031202ULL);
5873 __uint128_t arg3 = MakeUInt128(0x6668559233565463ULL, 0x9138363185745698ULL);
5874 __uint128_t res =
5875 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("tbx %0.16b, {%1.16b}, %2.16b")(arg1, arg2, arg3);
5876 ASSERT_EQ(res, MakeUInt128(0x9968669288114488ULL, 0x5538002266335622ULL));
5877 }
5878
TEST(Arm64InsnTest,Tbx2Int8x16)5879 TEST(Arm64InsnTest, Tbx2Int8x16) {
5880 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5881 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5882 __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5883 __uint128_t res = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
5884
5885 // Hardcode v0 and v1 so that the TBX instruction gets consecutive registers.
5886 asm("mov v0.16b, %1.16b\n\t"
5887 "mov v1.16b, %2.16b\n\t"
5888 "tbx %0.16b, {v0.16b, v1.16b}, %3.16b"
5889 : "=w"(res)
5890 : "w"(arg1), "w"(arg2), "w"(arg3), "0"(res)
5891 : "v0", "v1");
5892
5893 ASSERT_EQ(res, MakeUInt128(0x22945584002299ffULL, 0x8777153398000333ULL));
5894 }
5895
TEST(Arm64InsnTest,Tbx3Int8x16)5896 TEST(Arm64InsnTest, Tbx3Int8x16) {
5897 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5898 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5899 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5900 __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5901 __uint128_t res = MakeUInt128(0x0136776310849135ULL, 0x1615642269847507ULL);
5902
5903 // Hardcode v0, v1, and v2 so that the TBX instruction gets consecutive registers.
5904 asm("mov v0.16b, %1.16b\n\t"
5905 "mov v1.16b, %2.16b\n\t"
5906 "mov v2.16b, %3.16b\n\t"
5907 "tbx %0.16b, {v0.16b, v1.16b, v2.16b}, %4.16b"
5908 : "=w"(res)
5909 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "0"(res)
5910 : "v0", "v1", "v2");
5911
5912 ASSERT_EQ(res, MakeUInt128(0x778760631090ff35ULL, 0x1660980069103244ULL));
5913 }
5914
TEST(Arm64InsnTest,Tbx4Int8x16)5915 TEST(Arm64InsnTest, Tbx4Int8x16) {
5916 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5917 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5918 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5919 __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
5920 __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5921 __uint128_t res = MakeUInt128(0x5818319637637076ULL, 0x1799191920357958ULL);
5922
5923 // Hardcode v0, v1, v2, and v3 so that the TBX instruction gets consecutive registers.
5924 asm("mov v0.16b, %1.16b\n\t"
5925 "mov v1.16b, %2.16b\n\t"
5926 "mov v2.16b, %3.16b\n\t"
5927 "mov v3.16b, %4.16b\n\t"
5928 "tbx %0.16b, {v0.16b-v3.16b}, %5.16b"
5929 : "=w"(res)
5930 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5), "0"(res)
5931 : "v0", "v1", "v2", "v3");
5932
5933 ASSERT_EQ(res, MakeUInt128(0x778760969f90ff5fULL, 0x5f60980020103244ULL));
5934 }
5935
TEST(Arm64InsnTest,Trn1Int8x8)5936 TEST(Arm64InsnTest, Trn1Int8x8) {
5937 __uint128_t arg1 = MakeUInt128(0x2075916729700785ULL, 0x0580717186381054ULL);
5938 __uint128_t arg2 = MakeUInt128(0x2786099055690013ULL, 0x4137182368370991ULL);
5939 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5940 ASSERT_EQ(res, MakeUInt128(0x8675906769701385ULL, 0x0000000000000000ULL));
5941 }
5942
TEST(Arm64InsnTest,Trn2Int16x8)5943 TEST(Arm64InsnTest, Trn2Int16x8) {
5944 __uint128_t arg1 = MakeUInt128(0x6685592335654639ULL, 0x1383631857456981ULL);
5945 __uint128_t arg2 = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
5946 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5947 ASSERT_EQ(res, MakeUInt128(0x7494668588443565ULL, 0x2175138334265745ULL));
5948 }
5949
TEST(Arm64InsnTest,Uzp1Int8x8)5950 TEST(Arm64InsnTest, Uzp1Int8x8) {
5951 __uint128_t arg1 = MakeUInt128(0x4954893139394489ULL, 0x9216125525597701ULL);
5952 __uint128_t arg2 = MakeUInt128(0x2783467926101995ULL, 0x5852247172201777ULL);
5953 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5954 ASSERT_EQ(res, MakeUInt128(0x8379109554313989ULL, 0x0000000000000000ULL));
5955 }
5956
TEST(Arm64InsnTest,Uzp2Int16x8)5957 TEST(Arm64InsnTest, Uzp2Int16x8) {
5958 __uint128_t arg1 = MakeUInt128(0x6745642390585850ULL, 0x2167190313952629ULL);
5959 __uint128_t arg2 = MakeUInt128(0x3620129476918749ULL, 0x7519101147231528ULL);
5960 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5961 ASSERT_EQ(res, MakeUInt128(0x2167139567459058ULL, 0x7519472336207691ULL));
5962 }
5963
TEST(Arm64InsnTest,Zip2Int64x2)5964 TEST(Arm64InsnTest, Zip2Int64x2) {
5965 __uint128_t arg1 = MakeUInt128(0x1494271410093913ULL, 0x6913810725813781ULL);
5966 __uint128_t arg2 = MakeUInt128(0x3578940055995001ULL, 0x8354251184172136ULL);
5967 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.2d, %1.2d, %2.2d")(arg1, arg2);
5968 ASSERT_EQ(res, MakeUInt128(0x6913810725813781ULL, 0x8354251184172136ULL));
5969 }
5970
TEST(Arm64InsnTest,Zip1Int8x8)5971 TEST(Arm64InsnTest, Zip1Int8x8) {
5972 __uint128_t arg1 = MakeUInt128(0x7499235630254947ULL, 0x8024901141952123ULL);
5973 __uint128_t arg2 = MakeUInt128(0x3331239480494707ULL, 0x9119153267343028ULL);
5974 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5975 ASSERT_EQ(res, MakeUInt128(0x8030492547490747ULL, 0x0000000000000000ULL));
5976 }
5977
TEST(Arm64InsnTest,Zip1Int64x2)5978 TEST(Arm64InsnTest, Zip1Int64x2) {
5979 __uint128_t arg1 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
5980 __uint128_t arg2 = MakeUInt128(0x0551199581831963ULL, 0x7637076179919192ULL);
5981 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.2d, %1.2d, %2.2d")(arg1, arg2);
5982 ASSERT_EQ(res, MakeUInt128(0x9243530136776310ULL, 0x0551199581831963ULL));
5983 }
5984
TEST(Arm64InsnTest,Zip2Int16x8)5985 TEST(Arm64InsnTest, Zip2Int16x8) {
5986 __uint128_t arg1 = MakeUInt128(0x5831832713142517ULL, 0x0296923488962766ULL);
5987 __uint128_t arg2 = MakeUInt128(0x2934595889706953ULL, 0x6534940603402166ULL);
5988 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5989 ASSERT_EQ(res, MakeUInt128(0x0340889621662766ULL, 0x6534029694069234ULL));
5990 }
5991
TEST(Arm64InsnTest,SignedMaxInt16x8)5992 TEST(Arm64InsnTest, SignedMaxInt16x8) {
5993 __uint128_t arg1 = MakeUInt128(0x9901573466102371ULL, 0x2235478911292547ULL);
5994 __uint128_t arg2 = MakeUInt128(0x4922157650450812ULL, 0x0677173571202718ULL);
5995 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smax %0.8h, %1.8h, %2.8h")(arg1, arg2);
5996 ASSERT_EQ(res, MakeUInt128(0x4922573466102371ULL, 0x2235478971202718ULL));
5997 }
5998
TEST(Arm64InsnTest,SignedMinInt16x8)5999 TEST(Arm64InsnTest, SignedMinInt16x8) {
6000 __uint128_t arg1 = MakeUInt128(0x7820385653909910ULL, 0x4775941413215432ULL);
6001 __uint128_t arg2 = MakeUInt128(0x0084531214065935ULL, 0x8090412711359200ULL);
6002 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smin %0.8h, %1.8h, %2.8h")(arg1, arg2);
6003 ASSERT_EQ(res, MakeUInt128(0x0084385614069910ULL, 0x8090941411359200ULL));
6004 }
6005
TEST(Arm64InsnTest,SignedMaxPairwiseInt16x8)6006 TEST(Arm64InsnTest, SignedMaxPairwiseInt16x8) {
6007 __uint128_t arg1 = MakeUInt128(0x6998469884770232ULL, 0x3823840055655517ULL);
6008 __uint128_t arg2 = MakeUInt128(0x3272867600724817ULL, 0x2987637569816335ULL);
6009 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6010 ASSERT_EQ(res, MakeUInt128(0x3823556569980232ULL, 0x6375698132724817ULL));
6011 }
6012
TEST(Arm64InsnTest,SignedMinPairwiseInt16x8)6013 TEST(Arm64InsnTest, SignedMinPairwiseInt16x8) {
6014 __uint128_t arg1 = MakeUInt128(0x8865701568501691ULL, 0x8647488541679154ULL);
6015 __uint128_t arg2 = MakeUInt128(0x1821553559732353ULL, 0x0686043010675760ULL);
6016 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6017 ASSERT_EQ(res, MakeUInt128(0x8647915488651691ULL, 0x0430106718212353ULL));
6018 }
6019
TEST(Arm64InsnTest,UnsignedMaxInt16x8)6020 TEST(Arm64InsnTest, UnsignedMaxInt16x8) {
6021 __uint128_t arg1 = MakeUInt128(0x7639975974619383ULL, 0x5845749159880976ULL);
6022 __uint128_t arg2 = MakeUInt128(0x5928493695941434ULL, 0x0814685298150539ULL);
6023 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umax %0.8h, %1.8h, %2.8h")(arg1, arg2);
6024 ASSERT_EQ(res, MakeUInt128(0x7639975995949383ULL, 0x5845749198150976ULL));
6025 }
6026
TEST(Arm64InsnTest,UnsignedMinInt16x8)6027 TEST(Arm64InsnTest, UnsignedMinInt16x8) {
6028 __uint128_t arg1 = MakeUInt128(0x2888773717663748ULL, 0x6027660634960353ULL);
6029 __uint128_t arg2 = MakeUInt128(0x6983349515101986ULL, 0x4269887847171939ULL);
6030 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umin %0.8h, %1.8h, %2.8h")(arg1, arg2);
6031 ASSERT_EQ(res, MakeUInt128(0x2888349515101986ULL, 0x4269660634960353ULL));
6032 }
6033
TEST(Arm64InsnTest,UnsignedMaxPairwiseInt16x8)6034 TEST(Arm64InsnTest, UnsignedMaxPairwiseInt16x8) {
6035 __uint128_t arg1 = MakeUInt128(0x1318583584066747ULL, 0x2370297149785084ULL);
6036 __uint128_t arg2 = MakeUInt128(0x4570249413983163ULL, 0x4332378975955680ULL);
6037 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6038 ASSERT_EQ(res, MakeUInt128(0x2971508458358406ULL, 0x4332759545703163ULL));
6039 }
6040
TEST(Arm64InsnTest,UnsignedMinPairwiseInt16x8)6041 TEST(Arm64InsnTest, UnsignedMinPairwiseInt16x8) {
6042 __uint128_t arg1 = MakeUInt128(0x9538121791319145ULL, 0x1350099384631177ULL);
6043 __uint128_t arg2 = MakeUInt128(0x7769055481028850ULL, 0x2080858008781157ULL);
6044 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6045 ASSERT_EQ(res, MakeUInt128(0x0993117712179131ULL, 0x2080087805548102ULL));
6046 }
6047
TEST(Arm64InsnTest,SignedHalvingAddInt16x8)6048 TEST(Arm64InsnTest, SignedHalvingAddInt16x8) {
6049 __uint128_t arg1 = MakeUInt128(0x1021944719713869ULL, 0x2560841624511239ULL);
6050 __uint128_t arg2 = MakeUInt128(0x8062011318454124ULL, 0x4782050110798760ULL);
6051 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6052 ASSERT_EQ(res, MakeUInt128(0xc841caad18db3cc6ULL, 0x3671c48b1a65ccccULL));
6053 }
6054
TEST(Arm64InsnTest,SignedHalvingSubInt16x8)6055 TEST(Arm64InsnTest, SignedHalvingSubInt16x8) {
6056 __uint128_t arg1 = MakeUInt128(0x9041210873032402ULL, 0x0106853419472304ULL);
6057 __uint128_t arg2 = MakeUInt128(0x7666672174986986ULL, 0x8547076781205124ULL);
6058 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
6059 ASSERT_EQ(res, MakeUInt128(0x8ceddcf3ff35dd3eULL, 0x3ddfbee64c13e8f0ULL));
6060 }
6061
TEST(Arm64InsnTest,SignedRoundingHalvingAddInt16x8)6062 TEST(Arm64InsnTest, SignedRoundingHalvingAddInt16x8) {
6063 __uint128_t arg1 = MakeUInt128(0x5871487839890810ULL, 0x7429530941060596ULL);
6064 __uint128_t arg2 = MakeUInt128(0x9443158477539700ULL, 0x9439883949144323ULL);
6065 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6066 ASSERT_EQ(res, MakeUInt128(0xf65a2efe586ecf88ULL, 0x0431eda1450d245dULL));
6067 }
6068
TEST(Arm64InsnTest,SignedAbsoluteDifferenceInt16x8)6069 TEST(Arm64InsnTest, SignedAbsoluteDifferenceInt16x8) {
6070 __uint128_t arg1 = MakeUInt128(0x1349607501116498ULL, 0x3278563531614516ULL);
6071 __uint128_t arg2 = MakeUInt128(0x8457695687109002ULL, 0x9997698412632665ULL);
6072 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6073 ASSERT_EQ(res, MakeUInt128(0x8ef208e17a01d496ULL, 0x98e1134f1efe1eb1ULL));
6074 }
6075
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt16x8)6076 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt16x8) {
6077 __uint128_t arg1 = MakeUInt128(0x7419850973346267ULL, 0x9332107268687076ULL);
6078 __uint128_t arg2 = MakeUInt128(0x8062639919361965ULL, 0x0440995421676278ULL);
6079 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6080 ASSERT_EQ(res, MakeUInt128(0x000059fe00004902ULL, 0x0000f3b70000de90ULL));
6081 }
6082
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongUpperInt16x8)6083 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongUpperInt16x8) {
6084 __uint128_t arg1 = MakeUInt128(0x4980559610330799ULL, 0x4145347784574699ULL);
6085 __uint128_t arg2 = MakeUInt128(0x9921285999993996ULL, 0x1228161521931488ULL);
6086 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6087 ASSERT_EQ(res, MakeUInt128(0x00009d3c00003211ULL, 0x00002f1d00001e62ULL));
6088 }
6089
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt16x8)6090 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt16x8) {
6091 // The lowest element tests the overflow.
6092 __uint128_t arg1 = MakeUInt128(0x8967'0031'9258'7fffULL, 0x9410'5105'3358'4384ULL);
6093 __uint128_t arg2 = MakeUInt128(0x6560'2339'1796'8000ULL, 0x6784'4763'7084'7497ULL);
6094 __uint128_t arg3 = MakeUInt128(0x8333'6555'7900'5555ULL, 0x1914'7319'8862'7135ULL);
6095 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
6096 ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'fe3e'5554ULL, 0xec88'7cbb'c58e'a248ULL));
6097 }
6098
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt32x4)6099 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt32x4) {
6100 // The lowest element tests the overflow.
6101 __uint128_t arg1 = MakeUInt128(0x8967'0031'7fff'ffffULL, 0x9410'5105'3358'4384ULL);
6102 __uint128_t arg2 = MakeUInt128(0x6560'2339'8000'0000ULL, 0x6784'4763'7084'7497ULL);
6103 __uint128_t arg3 = MakeUInt128(0x8333'6555'aaaa'5555ULL, 0x1914'7319'8862'7135ULL);
6104 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.4s, %1.4s, %2.4s")(arg1, arg2, arg3);
6105 ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'aaaa'5554ULL, 0xec88'6977'c58e'a248ULL));
6106 }
6107
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongInt16x4)6108 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongInt16x4) {
6109 __uint128_t arg1 = MakeUInt128(0x078464167452167ULL, 0x719048310967671ULL);
6110 __uint128_t arg2 = MakeUInt128(0x344349481926268ULL, 0x110739948250607ULL);
6111 __uint128_t arg3 = MakeUInt128(0x949507350316901ULL, 0x731852119552635ULL);
6112 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6113 ASSERT_EQ(res, MakeUInt128(0x094a36265031aa02ULL, 0x073187ed195537e2ULL));
6114 }
6115
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt32x2)6116 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt32x2) {
6117 __uint128_t arg1 = MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL);
6118 __uint128_t arg2 = MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL);
6119 __uint128_t arg3 = MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL);
6120 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.2d, %1.2s, %2.2s")(arg1, arg2, arg3);
6121 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6122 }
6123
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongUpperInt16x8)6124 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
6125 __uint128_t arg1 = MakeUInt128(0x690943470482932ULL, 0x414041114654092ULL);
6126 __uint128_t arg2 = MakeUInt128(0x988344435159133ULL, 0x010773944111840ULL);
6127 __uint128_t arg3 = MakeUInt128(0x410768498106634ULL, 0x241048239358274ULL);
6128 __uint128_t res =
6129 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
6130 ASSERT_EQ(res, MakeUInt128(0x0410a63098108e86ULL, 0x024108863935f59cULL));
6131 }
6132
TEST(Arm64InsnTest,UnsignedHalvingAddInt16x8)6133 TEST(Arm64InsnTest, UnsignedHalvingAddInt16x8) {
6134 __uint128_t arg1 = MakeUInt128(0x4775379853799732ULL, 0x2344561227858432ULL);
6135 __uint128_t arg2 = MakeUInt128(0x9684664751333657ULL, 0x3692387201464723ULL);
6136 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6137 ASSERT_EQ(res, MakeUInt128(0x6efc4eef525666c4ULL, 0x2ceb4742146565aaULL));
6138 }
6139
TEST(Arm64InsnTest,UnsignedHalvingSubInt16x8)6140 TEST(Arm64InsnTest, UnsignedHalvingSubInt16x8) {
6141 __uint128_t arg1 = MakeUInt128(0x9926884349592876ULL, 0x1240075587569464ULL);
6142 __uint128_t arg2 = MakeUInt128(0x1370562514001179ULL, 0x7133166207153715ULL);
6143 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
6144 ASSERT_EQ(res, MakeUInt128(0x42db190f1aac0b7eULL, 0xd086f87940202ea7ULL));
6145 }
6146
TEST(Arm64InsnTest,UnsignedRoundingHalvingAddInt16x8)6147 TEST(Arm64InsnTest, UnsignedRoundingHalvingAddInt16x8) {
6148 __uint128_t arg1 = MakeUInt128(0x5066533985738887ULL, 0x8661476294434140ULL);
6149 __uint128_t arg2 = MakeUInt128(0x1049888993160051ULL, 0x2076781035886116ULL);
6150 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6151 ASSERT_EQ(res, MakeUInt128(0x30586de18c45446cULL, 0x536c5fb964e6512bULL));
6152 }
6153
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceInt16x8)6154 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceInt16x8) {
6155 __uint128_t arg1 = MakeUInt128(0x8574664607722834ULL, 0x1540311441529418ULL);
6156 __uint128_t arg2 = MakeUInt128(0x8047825438761770ULL, 0x7904300015669867ULL);
6157 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6158 ASSERT_EQ(res, MakeUInt128(0x052d1c0e310410c4ULL, 0x63c401142bec044fULL));
6159 }
6160
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongInt16x8)6161 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongInt16x8) {
6162 __uint128_t arg1 = MakeUInt128(0x1614585505839727ULL, 0x4209809097817293ULL);
6163 __uint128_t arg2 = MakeUInt128(0x2393010676638682ULL, 0x4040111304024700ULL);
6164 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6165 ASSERT_EQ(res, MakeUInt128(0x000070e0000010a5ULL, 0x00000d7f0000574fULL));
6166 }
6167
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongUpperInt16x8)6168 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongUpperInt16x8) {
6169 __uint128_t arg1 = MakeUInt128(0x0347999588867695ULL, 0x0161249722820403ULL);
6170 __uint128_t arg2 = MakeUInt128(0x0399546327883069ULL, 0x5976249361510102ULL);
6171 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6172 ASSERT_EQ(res, MakeUInt128(0x00003ecf00000301ULL, 0x0000581500000004ULL));
6173 }
6174
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateInt16x8)6175 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateInt16x8) {
6176 __uint128_t arg1 = MakeUInt128(0x0857466460772283ULL, 0x4154031144152941ULL);
6177 __uint128_t arg2 = MakeUInt128(0x8804782543876177ULL, 0x0790430001566986ULL);
6178 __uint128_t arg3 = MakeUInt128(0x7767957609099669ULL, 0x3607559496515273ULL);
6179 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uaba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
6180 ASSERT_EQ(res, MakeUInt128(0xf714c73725f9d55dULL, 0x6fcb9583d91092b8ULL));
6181 }
6182
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongInt16x4)6183 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongInt16x4) {
6184 __uint128_t arg1 = MakeUInt128(0x8343417044157348ULL, 0x2481833301640566ULL);
6185 __uint128_t arg2 = MakeUInt128(0x9596688667695634ULL, 0x9141632842641497ULL);
6186 __uint128_t arg3 = MakeUInt128(0x4533349999480002ULL, 0x6699875888159350ULL);
6187 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6188 ASSERT_EQ(res, MakeUInt128(0x453357ed99481d16ULL, 0x669999ab8815ba66ULL));
6189 }
6190
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8)6191 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
6192 __uint128_t arg1 = MakeUInt128(0x998685541703188ULL, 0x778867592902607ULL);
6193 __uint128_t arg2 = MakeUInt128(0x043212666179192ULL, 0x352093822787888ULL);
6194 __uint128_t arg3 = MakeUInt128(0x988633599116081ULL, 0x235355570464634ULL);
6195 __uint128_t res =
6196 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
6197 ASSERT_EQ(res, MakeUInt128(0x0988d34d9911b302ULL, 0x0235397b7046c371ULL));
6198 }
6199
TEST(Arm64InsnTest,SignedAddLongPairwiseInt8x16)6200 TEST(Arm64InsnTest, SignedAddLongPairwiseInt8x16) {
6201 __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
6202 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.8h, %1.16b")(arg);
6203 ASSERT_EQ(res, MakeUInt128(0x00c50051ffbb0099ULL, 0x0078ffd2006a000bULL));
6204 }
6205
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt8x16)6206 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt8x16) {
6207 __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
6208 __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
6209 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.8h, %1.16b")(arg1, arg2);
6210 ASSERT_EQ(res, MakeUInt128(0x71c183272fe809c2ULL, 0x510924703608905fULL));
6211 }
6212
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt16x8)6213 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt16x8) {
6214 __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
6215 __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
6216 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.4s, %1.8h")(arg1, arg2);
6217 ASSERT_EQ(res, MakeUInt128(0x72180054304fb4afULL, 0x51090e88357296efULL));
6218 }
6219
TEST(Arm64InsnTest,UnsignedAddLongPairwiseInt8x16)6220 TEST(Arm64InsnTest, UnsignedAddLongPairwiseInt8x16) {
6221 __uint128_t arg = MakeUInt128(0x1483287348089574ULL, 0x7777527834422109ULL);
6222 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlp %0.8h, %1.16b")(arg);
6223 ASSERT_EQ(res, MakeUInt128(0x0097009b00500109ULL, 0x00ee00ca0076002aULL));
6224 }
6225
TEST(Arm64InsnTest,UnsignedAddAccumulateLongPairwiseInt8x16)6226 TEST(Arm64InsnTest, UnsignedAddAccumulateLongPairwiseInt8x16) {
6227 __uint128_t arg1 = MakeUInt128(0x9348154691631162ULL, 0x4928873574718824ULL);
6228 __uint128_t arg2 = MakeUInt128(0x5207665738825139ULL, 0x6391635767231510ULL);
6229 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("uadalp %0.8h, %1.16b")(arg1, arg2);
6230 ASSERT_EQ(res, MakeUInt128(0x52e266b2397651acULL, 0x64026413680815bcULL));
6231 }
6232
TEST(Arm64InsnTest,SignedAddLong)6233 TEST(Arm64InsnTest, SignedAddLong) {
6234 __uint128_t arg1 = MakeUInt128(0x3478074585067606ULL, 0x3048229409653041ULL);
6235 __uint128_t arg2 = MakeUInt128(0x1183066710818930ULL, 0x3110887172816751ULL);
6236 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6237 ASSERT_EQ(res, MakeUInt128(0xffff9587ffffff36ULL, 0x000045fb00000dacULL));
6238 }
6239
TEST(Arm64InsnTest,SignedAddLongUpper)6240 TEST(Arm64InsnTest, SignedAddLongUpper) {
6241 __uint128_t arg1 = MakeUInt128(0x3160683158679946ULL, 0x0165205774052942ULL);
6242 __uint128_t arg2 = MakeUInt128(0x3053601780313357ULL, 0x2632670547903384ULL);
6243 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6244 ASSERT_EQ(res, MakeUInt128(0x0000bb9500005cc6ULL, 0x000027970000875cULL));
6245 }
6246
TEST(Arm64InsnTest,SignedSubLong)6247 TEST(Arm64InsnTest, SignedSubLong) {
6248 __uint128_t arg1 = MakeUInt128(0x8566746260879482ULL, 0x0186474876727272ULL);
6249 __uint128_t arg2 = MakeUInt128(0x2206267646533809ULL, 0x9801966883680994ULL);
6250 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6251 ASSERT_EQ(res, MakeUInt128(0x00001a34ffff5c79ULL, 0xffff636000004decULL));
6252 }
6253
TEST(Arm64InsnTest,SignedSubLongUpper)6254 TEST(Arm64InsnTest, SignedSubLongUpper) {
6255 __uint128_t arg1 = MakeUInt128(0x3011331753305329ULL, 0x8020166888174813ULL);
6256 __uint128_t arg2 = MakeUInt128(0x4298868158557781ULL, 0x0343231753064784ULL);
6257 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6258 ASSERT_EQ(res, MakeUInt128(0xffff35110000008fULL, 0xffff7cddfffff351ULL));
6259 }
6260
TEST(Arm64InsnTest,UnsignedAddLong)6261 TEST(Arm64InsnTest, UnsignedAddLong) {
6262 __uint128_t arg1 = MakeUInt128(0x3126059505777727ULL, 0x5424712416483128ULL);
6263 __uint128_t arg2 = MakeUInt128(0x3298207236175057ULL, 0x4673870128209575ULL);
6264 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6265 ASSERT_EQ(res, MakeUInt128(0x00003b8e0000c77eULL, 0x000063be00002607ULL));
6266 }
6267
TEST(Arm64InsnTest,UnsignedAddLongUpper)6268 TEST(Arm64InsnTest, UnsignedAddLongUpper) {
6269 __uint128_t arg1 = MakeUInt128(0x3384698499778726ULL, 0x7065551918544686ULL);
6270 __uint128_t arg2 = MakeUInt128(0x9846947849573462ULL, 0x2606294219624557ULL);
6271 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6272 ASSERT_EQ(res, MakeUInt128(0x000031b600008bddULL, 0x0000966b00007e5bULL));
6273 }
6274
TEST(Arm64InsnTest,UnsignedSubLong)6275 TEST(Arm64InsnTest, UnsignedSubLong) {
6276 __uint128_t arg1 = MakeUInt128(0x4378111988556318ULL, 0x7777925372011667ULL);
6277 __uint128_t arg2 = MakeUInt128(0x1853954183598443ULL, 0x8305203762819440ULL);
6278 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6279 ASSERT_EQ(res, MakeUInt128(0x000004fcffffded5ULL, 0x00002b25ffff7bd8ULL));
6280 }
6281
TEST(Arm64InsnTest,UnsignedSubLongUpper)6282 TEST(Arm64InsnTest, UnsignedSubLongUpper) {
6283 __uint128_t arg1 = MakeUInt128(0x5228717440266638ULL, 0x9148817173086436ULL);
6284 __uint128_t arg2 = MakeUInt128(0x1113890694202790ULL, 0x8814311944879941ULL);
6285 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6286 ASSERT_EQ(res, MakeUInt128(0x00002e81ffffcaf5ULL, 0x0000093400005058ULL));
6287 }
6288
TEST(Arm64InsnTest,SignedAddWide)6289 TEST(Arm64InsnTest, SignedAddWide) {
6290 __uint128_t arg1 = MakeUInt128(0x7844598183134112ULL, 0x9001999205981352ULL);
6291 __uint128_t arg2 = MakeUInt128(0x2051173365856407ULL, 0x8264849427644113ULL);
6292 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6293 ASSERT_EQ(res, MakeUInt128(0x7844bf068313a519ULL, 0x9001b9e305982a85ULL));
6294 }
6295
TEST(Arm64InsnTest,SignedAddWideUpper)6296 TEST(Arm64InsnTest, SignedAddWideUpper) {
6297 __uint128_t arg1 = MakeUInt128(0x3407092233436577ULL, 0x9160128093179401ULL);
6298 __uint128_t arg2 = MakeUInt128(0x7185985999338492ULL, 0x3549564005709955ULL);
6299 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6300 ASSERT_EQ(res, MakeUInt128(0x34070e923342feccULL, 0x916047c99317ea41ULL));
6301 }
6302
TEST(Arm64InsnTest,SignedSubWide)6303 TEST(Arm64InsnTest, SignedSubWide) {
6304 __uint128_t arg1 = MakeUInt128(0x2302847007312065ULL, 0x8032626417116165ULL);
6305 __uint128_t arg2 = MakeUInt128(0x9576132723515666ULL, 0x6253667271899853ULL);
6306 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6307 ASSERT_EQ(res, MakeUInt128(0x2302611f0730c9ffULL, 0x8032ccee17114e3eULL));
6308 }
6309
TEST(Arm64InsnTest,SignedSubWideUpper)6310 TEST(Arm64InsnTest, SignedSubWideUpper) {
6311 __uint128_t arg1 = MakeUInt128(0x4510824783572905ULL, 0x6919885554678860ULL);
6312 __uint128_t arg2 = MakeUInt128(0x7946280537122704ULL, 0x2466543192145281ULL);
6313 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6314 ASSERT_EQ(res, MakeUInt128(0x4510f0338356d684ULL, 0x691963ef5467342fULL));
6315 }
6316
TEST(Arm64InsnTest,UnsignedAddWide)6317 TEST(Arm64InsnTest, UnsignedAddWide) {
6318 __uint128_t arg1 = MakeUInt128(0x5870785951298344ULL, 0x1729535195378855ULL);
6319 __uint128_t arg2 = MakeUInt128(0x3457374260859029ULL, 0x0817651557803905ULL);
6320 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6321 ASSERT_EQ(res, MakeUInt128(0x5870d8de512a136dULL, 0x172987a89537bf97ULL));
6322 }
6323
TEST(Arm64InsnTest,UnsignedAddWideUpper)6324 TEST(Arm64InsnTest, UnsignedAddWideUpper) {
6325 __uint128_t arg1 = MakeUInt128(0x7516493270950493ULL, 0x4639382432227188ULL);
6326 __uint128_t arg2 = MakeUInt128(0x5159740547021482ULL, 0x8971117779237612ULL);
6327 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6328 ASSERT_EQ(res, MakeUInt128(0x7516c25570957aa5ULL, 0x4639c195322282ffULL));
6329 }
6330
TEST(Arm64InsnTest,UnsignedSubWide)6331 TEST(Arm64InsnTest, UnsignedSubWide) {
6332 __uint128_t arg1 = MakeUInt128(0x0625247972199786ULL, 0x6854279897799233ULL);
6333 __uint128_t arg2 = MakeUInt128(0x9579057581890622ULL, 0x5254735822052364ULL);
6334 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6335 ASSERT_EQ(res, MakeUInt128(0x0624a2f072199164ULL, 0x6853921f97798cbeULL));
6336 }
6337
TEST(Arm64InsnTest,UnsignedSubWideUpper)6338 TEST(Arm64InsnTest, UnsignedSubWideUpper) {
6339 __uint128_t arg1 = MakeUInt128(0x8242392192695062ULL, 0x0831838145469839ULL);
6340 __uint128_t arg2 = MakeUInt128(0x2366461363989101ULL, 0x2102177095976704ULL);
6341 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6342 ASSERT_EQ(res, MakeUInt128(0x8241a38a9268e95eULL, 0x0831627f454680c9ULL));
6343 }
6344
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8)6345 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8) {
6346 __uint128_t arg1 = MakeUInt128(0x9191791552241718ULL, 0x9585361680594741ULL);
6347 __uint128_t arg2 = MakeUInt128(0x2341933984202187ULL, 0x4564925644346239ULL);
6348 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.8h, %1.8b, %2.8b")(arg1, arg2);
6349 ASSERT_EQ(res, MakeUInt128(0xd848048002f7f4a8ULL, 0xf0d3e3d1cc7b04adULL));
6350 }
6351
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8Upper)6352 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8Upper) {
6353 __uint128_t arg1 = MakeUInt128(0x9314052976347574ULL, 0x8119356709110137ULL);
6354 __uint128_t arg2 = MakeUInt128(0x7517210080315590ULL, 0x2485309066920376ULL);
6355 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
6356 ASSERT_EQ(res, MakeUInt128(0x0396f8b20003195aULL, 0xee24f3fd09f0d2f0ULL));
6357 }
6358
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8)6359 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8) {
6360 __uint128_t arg1 = MakeUInt128(0x9149055628425039ULL, 0x1275771028402799ULL);
6361 __uint128_t arg2 = MakeUInt128(0x8066365825488926ULL, 0x4880254566101729ULL);
6362 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.8h, %1.8b, %2.8b")(arg1, arg2);
6363 ASSERT_EQ(res, MakeUInt128(0x05c812902ad00876ULL, 0x48801d16010e1d90ULL));
6364 }
6365
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8Upper)6366 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8Upper) {
6367 __uint128_t arg1 = MakeUInt128(0x9709683408005355ULL, 0x9849175417381883ULL);
6368 __uint128_t arg2 = MakeUInt128(0x9994469748676265ULL, 0x5165827658483588ULL);
6369 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
6370 ASSERT_EQ(res, MakeUInt128(0x07e80fc004f84598ULL, 0x30181ccd0bae26b8ULL));
6371 }
6372
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElem)6373 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElem) {
6374 __uint128_t arg1 = MakeUInt128(0x9293459588970695ULL, 0x3653494060340216ULL);
6375 __uint128_t arg2 = MakeUInt128(0x6544375589004563ULL, 0x2882250545255640ULL);
6376 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
6377 ASSERT_EQ(res, MakeUInt128(0xe630cb23016c3279ULL, 0xe8593fcf0f0a1d79ULL));
6378 }
6379
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElemUpper)6380 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElemUpper) {
6381 __uint128_t arg1 = MakeUInt128(0x9279068212073883ULL, 0x7781423356282360ULL);
6382 __uint128_t arg2 = MakeUInt128(0x8963208068222468ULL, 0x0122482611771858ULL);
6383 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
6384 ASSERT_EQ(res, MakeUInt128(0x0af01400047db000ULL, 0x0f2be08008677980ULL));
6385 }
6386
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem)6387 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem) {
6388 __uint128_t arg1 = MakeUInt128(0x9086996033027634ULL, 0x7870810817545011ULL);
6389 __uint128_t arg2 = MakeUInt128(0x9307141223390866ULL, 0x3938339529425786ULL);
6390 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
6391 ASSERT_EQ(res, MakeUInt128(0x03ffbe2409445fa8ULL, 0x0b54a16c0c0648c0ULL));
6392 }
6393
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem2)6394 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem2) {
6395 __uint128_t arg1 = MakeUInt128(0x9132710495478599ULL, 0x1801969678353214ULL);
6396 __uint128_t arg2 = MakeUInt128(0x6444118926063152ULL, 0x6618167443193550ULL);
6397 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[4]")(arg1, arg2);
6398 ASSERT_EQ(res, MakeUInt128(0x1f1659301bd26cd0ULL, 0x1e3cb9a017892540ULL));
6399 }
6400
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElemUpper)6401 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElemUpper) {
6402 __uint128_t arg1 = MakeUInt128(0x9815793678976697ULL, 0x4220575059683440ULL);
6403 __uint128_t arg2 = MakeUInt128(0x8697350201410206ULL, 0x7235850200724522ULL);
6404 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
6405 ASSERT_EQ(res, MakeUInt128(0x12833ad00ad1a880ULL, 0x0db1244012143ea0ULL));
6406 }
6407
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8)6408 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8) {
6409 __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
6410 __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
6411 __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
6412 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6413 ASSERT_EQ(res, MakeUInt128(0x3b5b1ca28ec69893ULL, 0x8b7836c02ef25620ULL));
6414 }
6415
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8Upper)6416 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8Upper) {
6417 __uint128_t arg1 = MakeUInt128(0x5514435021828702ULL, 0x6685610665003531ULL);
6418 __uint128_t arg2 = MakeUInt128(0x0502163182060176ULL, 0x0921798468493686ULL);
6419 __uint128_t arg3 = MakeUInt128(0x3161293727951873ULL, 0x0789726373537171ULL);
6420 __uint128_t res =
6421 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6422 ASSERT_EQ(res, MakeUInt128(0x5a69293732c30119ULL, 0x0b1f6288a12c6e89ULL));
6423 }
6424
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8)6425 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8) {
6426 __uint128_t arg1 = MakeUInt128(0x9662539339538092ULL, 0x2195591918188552ULL);
6427 __uint128_t arg2 = MakeUInt128(0x6780621499231727ULL, 0x6316321833989693ULL);
6428 __uint128_t arg3 = MakeUInt128(0x8075616855911752ULL, 0x9984501320671293ULL);
6429 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6430 ASSERT_EQ(res, MakeUInt128(0x9764560f61112814ULL, 0xc42a811300a11b17ULL));
6431 }
6432
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8Upper)6433 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8Upper) {
6434 __uint128_t arg1 = MakeUInt128(0x9826903089111856ULL, 0x8798692947051352ULL);
6435 __uint128_t arg2 = MakeUInt128(0x4816091743243015ULL, 0x3836847072928989ULL);
6436 __uint128_t arg3 = MakeUInt128(0x8284602223730145ULL, 0x2655679898627767ULL);
6437 __uint128_t res =
6438 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6439 ASSERT_EQ(res, MakeUInt128(0x62e662482c482763ULL, 0x40cd7d88cb3e6577ULL));
6440 }
6441
TEST(Arm64InsnTest,SignedMultiplyAddLongInt16x4)6442 TEST(Arm64InsnTest, SignedMultiplyAddLongInt16x4) {
6443 __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
6444 __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
6445 __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
6446 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6447 ASSERT_EQ(res, MakeUInt128(0x3b6bd2a28eac7893ULL, 0x8b4c38c02edab620ULL));
6448 }
6449
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8)6450 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8) {
6451 __uint128_t arg1 = MakeUInt128(0x9696920253886503ULL, 0x4577183176686885ULL);
6452 __uint128_t arg2 = MakeUInt128(0x9236814884752764ULL, 0x9846882194973972ULL);
6453 __uint128_t arg3 = MakeUInt128(0x9707737187188400ULL, 0x4143231276365048ULL);
6454 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6455 ASSERT_EQ(res, MakeUInt128(0xc1d3b199967b852cULL, 0x96cf42b6bfc850d8ULL));
6456 }
6457
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8Upper)6458 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8Upper) {
6459 __uint128_t arg1 = MakeUInt128(0x9055637695252326ULL, 0x5361442478023082ULL);
6460 __uint128_t arg2 = MakeUInt128(0x6811831037735887ULL, 0x0892406130313364ULL);
6461 __uint128_t arg3 = MakeUInt128(0x7737101162821461ULL, 0x4661679404090518ULL);
6462 __uint128_t res =
6463 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6464 ASSERT_EQ(res, MakeUInt128(0x8db710736c124729ULL, 0x48f99ee6150912bcULL));
6465 }
6466
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8)6467 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8) {
6468 __uint128_t arg1 = MakeUInt128(0x4577772457520386ULL, 0x5437542828256714ULL);
6469 __uint128_t arg2 = MakeUInt128(0x1288583454443513ULL, 0x2562054464241011ULL);
6470 __uint128_t arg3 = MakeUInt128(0x0379554641905811ULL, 0x6862305964476958ULL);
6471 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6472 ASSERT_EQ(res, MakeUInt128(0xe6ed3f7e40f14e1fULL, 0x6388f1213b5f6208ULL));
6473 }
6474
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8Upper)6475 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8Upper) {
6476 __uint128_t arg1 = MakeUInt128(0x4739376564336319ULL, 0x7978680367187307ULL);
6477 __uint128_t arg2 = MakeUInt128(0x9693924236321448ULL, 0x4503547763156702ULL);
6478 __uint128_t arg3 = MakeUInt128(0x5539006542311792ULL, 0x0153464977929066ULL);
6479 __uint128_t res =
6480 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6481 ASSERT_EQ(res, MakeUInt128(0x2d64fe6d13ec1784ULL, 0xe0b644e155728f01ULL));
6482 }
6483
TEST(Arm64InsnTest,SignedShiftLeftInt64x1)6484 TEST(Arm64InsnTest, SignedShiftLeftInt64x1) {
6485 constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %d0, %d1, %d2");
6486 __uint128_t arg = MakeUInt128(0x9007497297363549ULL, 0x6453328886984406ULL);
6487 ASSERT_EQ(AsmSshl(arg, -65), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6488 ASSERT_EQ(AsmSshl(arg, -64), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6489 ASSERT_EQ(AsmSshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6490 ASSERT_EQ(AsmSshl(arg, -1), MakeUInt128(0xc803a4b94b9b1aa4ULL, 0x0000000000000000ULL));
6491 ASSERT_EQ(AsmSshl(arg, 0), MakeUInt128(0x9007497297363549ULL, 0x0000000000000000ULL));
6492 ASSERT_EQ(AsmSshl(arg, 1), MakeUInt128(0x200e92e52e6c6a92ULL, 0x0000000000000000ULL));
6493 ASSERT_EQ(AsmSshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6494 ASSERT_EQ(AsmSshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6495 ASSERT_EQ(AsmSshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6496 }
6497
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt64x1)6498 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt64x1) {
6499 constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %d0, %d1, %d2");
6500 __uint128_t arg = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
6501 ASSERT_EQ(AsmSrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6502 ASSERT_EQ(AsmSrshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6503 ASSERT_EQ(AsmSrshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6504 ASSERT_EQ(AsmSrshl(arg, -1), MakeUInt128(0xc93b22bc98832bc9ULL, 0x0000000000000000ULL));
6505 ASSERT_EQ(AsmSrshl(arg, 0), MakeUInt128(0x9276457931065792ULL, 0x0000000000000000ULL));
6506 ASSERT_EQ(AsmSrshl(arg, 1), MakeUInt128(0x24ec8af2620caf24ULL, 0x0000000000000000ULL));
6507 ASSERT_EQ(AsmSrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6508 ASSERT_EQ(AsmSrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6509 ASSERT_EQ(AsmSrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6510 }
6511
TEST(Arm64InsnTest,UnsignedShiftLeftInt64x1)6512 TEST(Arm64InsnTest, UnsignedShiftLeftInt64x1) {
6513 constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %d0, %d1, %d2");
6514 __uint128_t arg = MakeUInt128(0x9138296682468185ULL, 0x7103188790652870ULL);
6515 ASSERT_EQ(AsmUshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6516 ASSERT_EQ(AsmUshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6517 ASSERT_EQ(AsmUshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6518 ASSERT_EQ(AsmUshl(arg, -1), MakeUInt128(0x489c14b3412340c2ULL, 0x0000000000000000ULL));
6519 ASSERT_EQ(AsmUshl(arg, 0), MakeUInt128(0x9138296682468185ULL, 0x0000000000000000ULL));
6520 ASSERT_EQ(AsmUshl(arg, 1), MakeUInt128(0x227052cd048d030aULL, 0x0000000000000000ULL));
6521 ASSERT_EQ(AsmUshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6522 ASSERT_EQ(AsmUshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6523 ASSERT_EQ(AsmUshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6524 }
6525
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt64x1)6526 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt64x1) {
6527 constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %d0, %d1, %d2");
6528 __uint128_t arg = MakeUInt128(0x9023452924407736ULL, 0x5949563051007421ULL);
6529 ASSERT_EQ(AsmUrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6530 ASSERT_EQ(AsmUrshl(arg, -64), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6531 ASSERT_EQ(AsmUrshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6532 ASSERT_EQ(AsmUrshl(arg, -1), MakeUInt128(0x4811a29492203b9bULL, 0x0000000000000000ULL));
6533 ASSERT_EQ(AsmUrshl(arg, 0), MakeUInt128(0x9023452924407736ULL, 0x0000000000000000ULL));
6534 ASSERT_EQ(AsmUrshl(arg, 1), MakeUInt128(0x20468a524880ee6cULL, 0x0000000000000000ULL));
6535 ASSERT_EQ(AsmUrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6536 ASSERT_EQ(AsmUrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6537 ASSERT_EQ(AsmUrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6538 }
6539
TEST(Arm64InsnTest,SignedShiftLeftInt16x8)6540 TEST(Arm64InsnTest, SignedShiftLeftInt16x8) {
6541 constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %0.8h, %1.8h, %2.8h");
6542 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6543 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6544 ASSERT_EQ(AsmSshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xccccffffffffffffULL));
6545 ASSERT_EQ(AsmSshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6546 }
6547
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt16x8)6548 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt16x8) {
6549 constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %0.8h, %1.8h, %2.8h");
6550 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6551 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6552 ASSERT_EQ(AsmSrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xcccdffff00000000ULL));
6553 ASSERT_EQ(AsmSrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6554 }
6555
TEST(Arm64InsnTest,UnsignedShiftLeftInt16x8)6556 TEST(Arm64InsnTest, UnsignedShiftLeftInt16x8) {
6557 constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %0.8h, %1.8h, %2.8h");
6558 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6559 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6560 ASSERT_EQ(AsmUshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccc000100000000ULL));
6561 ASSERT_EQ(AsmUshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6562 }
6563
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt16x8)6564 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt16x8) {
6565 constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %0.8h, %1.8h, %2.8h");
6566 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6567 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6568 ASSERT_EQ(AsmUrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccd000100010000ULL));
6569 ASSERT_EQ(AsmUrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6570 }
6571
TEST(Arm64InsnTest,UnsignedReciprocalSquareRootEstimateInt32x4)6572 TEST(Arm64InsnTest, UnsignedReciprocalSquareRootEstimateInt32x4) {
6573 __uint128_t arg = MakeUInt128(0x9641122821407533ULL, 0x0265510042410489ULL);
6574 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ursqrte %0.4s, %1.4s")(arg);
6575 ASSERT_EQ(res, MakeUInt128(0xa7000000ffffffffULL, 0xfffffffffb800000ULL));
6576 }
6577
TEST(Arm64InsnTest,UnsignedReciprocalEstimateInt32x4)6578 TEST(Arm64InsnTest, UnsignedReciprocalEstimateInt32x4) {
6579 __uint128_t arg = MakeUInt128(0x9714864899468611ULL, 0x2476054286734367ULL);
6580 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urecpe %0.4s, %1.4s")(arg);
6581 ASSERT_EQ(res, MakeUInt128(0xd8800000d6000000ULL, 0xfffffffff4000000ULL));
6582 }
6583
IsQcBitSet(uint32_t fpsr)6584 bool IsQcBitSet(uint32_t fpsr) {
6585 return (fpsr & kFpsrQcBit) != 0;
6586 }
6587
TEST(Arm64InsnTest,SignedSaturatingAddInt64x1)6588 TEST(Arm64InsnTest, SignedSaturatingAddInt64x1) {
6589 constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %d0, %d2, %d3");
6590
6591 __uint128_t arg1 = MakeUInt128(0x4342527753119724ULL, 0x7430873043619511ULL);
6592 __uint128_t arg2 = MakeUInt128(0x3961190800302558ULL, 0x7838764420608504ULL);
6593 auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
6594 ASSERT_EQ(res1, MakeUInt128(0x7ca36b7f5341bc7cULL, 0x0000000000000000ULL));
6595 ASSERT_FALSE(IsQcBitSet(fpsr1));
6596
6597 __uint128_t arg3 = MakeUInt128(0x2557185308919284ULL, 0x4038050710300647ULL);
6598 __uint128_t arg4 = MakeUInt128(0x7684786324319100ULL, 0x0223929785255372ULL);
6599 auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
6600 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6601 ASSERT_TRUE(IsQcBitSet(fpsr2));
6602 }
6603
TEST(Arm64InsnTest,SignedSaturatingAddInt32x4)6604 TEST(Arm64InsnTest, SignedSaturatingAddInt32x4) {
6605 constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %0.4s, %2.4s, %3.4s");
6606
6607 __uint128_t arg1 = MakeUInt128(0x9883554445602495ULL, 0x5666843660292219ULL);
6608 __uint128_t arg2 = MakeUInt128(0x5124830910605377ULL, 0x2019802183101032ULL);
6609 auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
6610 ASSERT_EQ(res1, MakeUInt128(0xe9a7d84d55c0780cULL, 0x76800457e339324bULL));
6611 ASSERT_FALSE(IsQcBitSet(fpsr1));
6612
6613 __uint128_t arg3 = MakeUInt128(0x9713308844617410ULL, 0x7959162511714864ULL);
6614 __uint128_t arg4 = MakeUInt128(0x8744686112476054ULL, 0x2867343670904667ULL);
6615 auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
6616 ASSERT_EQ(res2, MakeUInt128(0x8000000056a8d464ULL, 0x7fffffff7fffffffULL));
6617 ASSERT_TRUE(IsQcBitSet(fpsr2));
6618 }
6619
TEST(Arm64InsnTest,UnsignedSaturatingAddInt8x1)6620 TEST(Arm64InsnTest, UnsignedSaturatingAddInt8x1) {
6621 constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %b0, %b2, %b3");
6622
6623 __uint128_t arg1 = MakeUInt128(0x6017174229960273ULL, 0x5310276871944944ULL);
6624 __uint128_t arg2 = MakeUInt128(0x4917939785144631ULL, 0x5973144353518504ULL);
6625 auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6626 ASSERT_EQ(res1, MakeUInt128(0x00000000000000a4ULL, 0x0000000000000000ULL));
6627 ASSERT_FALSE(IsQcBitSet(fpsr1));
6628
6629 __uint128_t arg3 = MakeUInt128(0x3306263695626490ULL, 0x9108276271159038ULL);
6630 __uint128_t arg4 = MakeUInt128(0x5699505124652999ULL, 0x6062855443838330ULL);
6631 auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6632 ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
6633 ASSERT_TRUE(IsQcBitSet(fpsr2));
6634 }
6635
TEST(Arm64InsnTest,UnsignedSaturatingAddInt64x1)6636 TEST(Arm64InsnTest, UnsignedSaturatingAddInt64x1) {
6637 constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %d0, %d2, %d3");
6638
6639 __uint128_t arg1 = MakeUInt128(0x0606885137234627ULL, 0x0799732723313469ULL);
6640 __uint128_t arg2 = MakeUInt128(0x3971456285542615ULL, 0x4676506324656766ULL);
6641 auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6642 ASSERT_EQ(res1, MakeUInt128(0x3f77cdb3bc776c3cULL, 0x0000000000000000ULL));
6643 ASSERT_FALSE(IsQcBitSet(fpsr1));
6644
6645 __uint128_t arg3 = MakeUInt128(0x9534957018600154ULL, 0x1262396228641389ULL);
6646 __uint128_t arg4 = MakeUInt128(0x7796733329070567ULL, 0x3769621564981845ULL);
6647 auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6648 ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6649 ASSERT_TRUE(IsQcBitSet(fpsr2));
6650 }
6651
TEST(Arm64InsnTest,UnsignedSaturatingAddInt32x4)6652 TEST(Arm64InsnTest, UnsignedSaturatingAddInt32x4) {
6653 constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %0.4s, %2.4s, %3.4s");
6654
6655 __uint128_t arg1 = MakeUInt128(0x9737425700735921ULL, 0x0031541508936793ULL);
6656 __uint128_t arg2 = MakeUInt128(0x0081699805365202ULL, 0x7600727749674584ULL);
6657 auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6658 ASSERT_EQ(res1, MakeUInt128(0x97b8abef05a9ab23ULL, 0x7631c68c51faad17ULL));
6659 ASSERT_FALSE(IsQcBitSet(fpsr1));
6660
6661 __uint128_t arg3 = MakeUInt128(0x9727856471983963ULL, 0x0878154322116691ULL);
6662 __uint128_t arg4 = MakeUInt128(0x8654522268126887ULL, 0x2684459684424161ULL);
6663 auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6664 ASSERT_EQ(res2, MakeUInt128(0xffffffffd9aaa1eaULL, 0x2efc5ad9a653a7f2ULL));
6665 ASSERT_TRUE(IsQcBitSet(fpsr2));
6666 }
6667
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x1)6668 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x1) {
6669 constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %s0, %s2, %s3");
6670
6671 __uint128_t arg1 = MakeUInt128(0x3178534870760322ULL, 0x1982970579751191ULL);
6672 __uint128_t arg2 = MakeUInt128(0x4405109942358830ULL, 0x3454635349234982ULL);
6673 auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6674 ASSERT_EQ(res1, MakeUInt128(0x2e407af2ULL, 0U));
6675 ASSERT_FALSE(IsQcBitSet(fpsr1));
6676
6677 __uint128_t arg3 = MakeUInt128(0x1423696483086410ULL, 0x2592887457999322ULL);
6678 __uint128_t arg4 = MakeUInt128(0x3749551912219519ULL, 0x0342445230753513ULL);
6679 auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6680 ASSERT_EQ(res2, MakeUInt128(0x80000000ULL, 0U));
6681 ASSERT_TRUE(IsQcBitSet(fpsr2));
6682
6683 __uint128_t arg5 = MakeUInt128(0x3083508879584152ULL, 0x1489912761065137ULL);
6684 __uint128_t arg6 = MakeUInt128(0x4153943580721139ULL, 0x0328574918769094ULL);
6685 auto [res3, fpsr3] = AsmSqsub(arg5, arg6);
6686 ASSERT_EQ(res3, MakeUInt128(0x7fffffffULL, 0U));
6687 ASSERT_TRUE(IsQcBitSet(fpsr3));
6688 }
6689
TEST(Arm64InsnTest,SignedSaturatingSubtractInt64x1)6690 TEST(Arm64InsnTest, SignedSaturatingSubtractInt64x1) {
6691 constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %d0, %d2, %d3");
6692
6693 __uint128_t arg1 = MakeUInt128(0x4416125223196943ULL, 0x4712064173754912ULL);
6694 __uint128_t arg2 = MakeUInt128(0x1635700857369439ULL, 0x7305979709719726ULL);
6695 auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6696 ASSERT_EQ(res1, MakeUInt128(0x2de0a249cbe2d50aULL, 0x0000000000000000ULL));
6697 ASSERT_FALSE(IsQcBitSet(fpsr1));
6698
6699 __uint128_t arg3 = MakeUInt128(0x7862766490242516ULL, 0x1990277471090335ULL);
6700 __uint128_t arg4 = MakeUInt128(0x9333093049483805ULL, 0x9785662884478744ULL);
6701 auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6702 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6703 ASSERT_TRUE(IsQcBitSet(fpsr2));
6704 }
6705
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x4)6706 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x4) {
6707 constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %0.4s, %2.4s, %3.4s");
6708
6709 __uint128_t arg1 = MakeUInt128(0x4485680977569630ULL, 0x3129588719161129ULL);
6710 __uint128_t arg2 = MakeUInt128(0x2946818849363386ULL, 0x4739274760122696ULL);
6711 auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6712 ASSERT_EQ(res1, MakeUInt128(0x1b3ee6812e2062aaULL, 0xe9f03140b903ea93ULL));
6713 ASSERT_FALSE(IsQcBitSet(fpsr1));
6714
6715 __uint128_t arg3 = MakeUInt128(0x9304127100727784ULL, 0x9301555038895360ULL);
6716 __uint128_t arg4 = MakeUInt128(0x3382619293437970ULL, 0x8187432094991415ULL);
6717 auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6718 ASSERT_EQ(res2, MakeUInt128(0x800000006d2efe14ULL, 0x117a12307fffffffULL));
6719 ASSERT_TRUE(IsQcBitSet(fpsr2));
6720 }
6721
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x1)6722 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x1) {
6723 constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %s0, %s2, %s3");
6724
6725 __uint128_t arg1 = MakeUInt128(0x2548156091372812ULL, 0x8406333039373562ULL);
6726 __uint128_t arg2 = MakeUInt128(0x4200160456645574ULL, 0x1458816605216660ULL);
6727 auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6728 ASSERT_EQ(res1, MakeUInt128(0x3ad2d29eULL, 0U));
6729 ASSERT_FALSE(IsQcBitSet(fpsr1));
6730
6731 __uint128_t arg3 = MakeUInt128(0x1259960281839309ULL, 0x5487090590738613ULL);
6732 __uint128_t arg4 = MakeUInt128(0x5191459181951029ULL, 0x7327875571049729ULL);
6733 auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6734 ASSERT_EQ(res2, MakeUInt128(0U, 0U));
6735 ASSERT_TRUE(IsQcBitSet(fpsr2));
6736 }
6737
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt64x1)6738 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt64x1) {
6739 constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %d0, %d2, %d3");
6740
6741 __uint128_t arg1 = MakeUInt128(0x9691077542576474ULL, 0x8832534141213280ULL);
6742 __uint128_t arg2 = MakeUInt128(0x0626717094009098ULL, 0x2235296579579978ULL);
6743 auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6744 ASSERT_EQ(res1, MakeUInt128(0x906a9604ae56d3dcULL, 0x0000000000000000ULL));
6745 ASSERT_FALSE(IsQcBitSet(fpsr1));
6746
6747 __uint128_t arg3 = MakeUInt128(0x7752929106925043ULL, 0x2614469501098610ULL);
6748 __uint128_t arg4 = MakeUInt128(0x8889991465855188ULL, 0x1873582528164302ULL);
6749 auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6750 ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6751 ASSERT_TRUE(IsQcBitSet(fpsr2));
6752 }
6753
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x4)6754 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x4) {
6755 constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %0.4s, %2.4s, %3.4s");
6756
6757 __uint128_t arg1 = MakeUInt128(0x6884962578665885ULL, 0x9991798675205545ULL);
6758 __uint128_t arg2 = MakeUInt128(0x5809900455646117ULL, 0x8755249370124553ULL);
6759 auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6760 ASSERT_EQ(res1, MakeUInt128(0x107b06212301f76eULL, 0x123c54f3050e0ff2ULL));
6761 ASSERT_FALSE(IsQcBitSet(fpsr1));
6762
6763 __uint128_t arg3 = MakeUInt128(0x5032678340586301ULL, 0x9301932429963972ULL);
6764 __uint128_t arg4 = MakeUInt128(0x0444517928812285ULL, 0x4478211953530898ULL);
6765 auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6766 ASSERT_EQ(res2, MakeUInt128(0x4bee160a17d7407cULL, 0x4e89720b00000000ULL));
6767 ASSERT_TRUE(IsQcBitSet(fpsr2));
6768 }
6769
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt8x1)6770 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt8x1) {
6771 constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %b0, %b2");
6772
6773 __uint128_t arg1 = MakeUInt128(0x8918016855727981ULL, 0x5642185819119749ULL);
6774 auto [res1, fpsr1] = AsmSqabs(arg1);
6775 ASSERT_EQ(res1, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6776 ASSERT_FALSE(IsQcBitSet(fpsr1));
6777
6778 __uint128_t arg2 = MakeUInt128(0x0000000000000080ULL, 0x6464607287574305ULL);
6779 auto [res2, fpsr2] = AsmSqabs(arg2);
6780 ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6781 ASSERT_TRUE(IsQcBitSet(fpsr2));
6782 }
6783
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt64x1)6784 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt64x1) {
6785 constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %d0, %d2");
6786
6787 __uint128_t arg1 = MakeUInt128(0x9717317281315179ULL, 0x3290443112181587ULL);
6788 auto [res1, fpsr1] = AsmSqabs(arg1);
6789 ASSERT_EQ(res1, MakeUInt128(0x68e8ce8d7eceae87ULL, 0x0000000000000000ULL));
6790 ASSERT_FALSE(IsQcBitSet(fpsr1));
6791
6792 __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x1001237687219447ULL);
6793 auto [res2, fpsr2] = AsmSqabs(arg2);
6794 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6795 ASSERT_TRUE(IsQcBitSet(fpsr2));
6796 }
6797
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt32x4)6798 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt32x4) {
6799 constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %0.4s, %2.4s");
6800
6801 __uint128_t arg1 = MakeUInt128(0x9133820578492800ULL, 0x6982551957402018ULL);
6802 auto [res1, fpsr1] = AsmSqabs(arg1);
6803 ASSERT_EQ(res1, MakeUInt128(0x6ecc7dfb78492800ULL, 0x6982551957402018ULL));
6804 ASSERT_FALSE(IsQcBitSet(fpsr1));
6805
6806 __uint128_t arg2 = MakeUInt128(0x1810564129725083ULL, 0x6070356880000000ULL);
6807 auto [res2, fpsr2] = AsmSqabs(arg2);
6808 ASSERT_EQ(res2, MakeUInt128(0x1810564129725083ULL, 0x607035687fffffffULL));
6809 ASSERT_TRUE(IsQcBitSet(fpsr2));
6810 }
6811
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x1)6812 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x1) {
6813 constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %s0, %s2");
6814
6815 __uint128_t arg1 = MakeUInt128(0x6461582694563802ULL, 0x3950283712168644ULL);
6816 auto [res1, fpsr1] = AsmSqneg(arg1);
6817 ASSERT_EQ(res1, MakeUInt128(0x000000006ba9c7feULL, 0x0000000000000000ULL));
6818 ASSERT_FALSE(IsQcBitSet(fpsr1));
6819
6820 __uint128_t arg2 = MakeUInt128(0x6561785280000000ULL, 0x1277128269186886ULL);
6821 auto [res2, fpsr2] = AsmSqneg(arg2);
6822 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6823 ASSERT_TRUE(IsQcBitSet(fpsr2));
6824 }
6825
TEST(Arm64InsnTest,SignedSaturatingNegateInt64x1)6826 TEST(Arm64InsnTest, SignedSaturatingNegateInt64x1) {
6827 constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %d0, %d2");
6828
6829 __uint128_t arg1 = MakeUInt128(0x9703600795698276ULL, 0x2639234410714658ULL);
6830 auto [res1, fpsr1] = AsmSqneg(arg1);
6831 ASSERT_EQ(res1, MakeUInt128(0x68fc9ff86a967d8aULL, 0x0000000000000000ULL));
6832 ASSERT_FALSE(IsQcBitSet(fpsr1));
6833
6834 __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x4052295369374997ULL);
6835 auto [res2, fpsr2] = AsmSqneg(arg2);
6836 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6837 ASSERT_TRUE(IsQcBitSet(fpsr2));
6838 }
6839
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x4)6840 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x4) {
6841 constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %0.4s, %2.4s");
6842
6843 __uint128_t arg1 = MakeUInt128(0x9172320202822291ULL, 0x4886959399729974ULL);
6844 auto [res1, fpsr1] = AsmSqneg(arg1);
6845 ASSERT_EQ(res1, MakeUInt128(0x6e8dcdfefd7ddd6fULL, 0xb7796a6d668d668cULL));
6846 ASSERT_FALSE(IsQcBitSet(fpsr1));
6847
6848 __uint128_t arg2 = MakeUInt128(0x2974711553718589ULL, 0x2423849380000000ULL);
6849 auto [res2, fpsr2] = AsmSqneg(arg2);
6850 ASSERT_EQ(res2, MakeUInt128(0xd68b8eebac8e7a77ULL, 0xdbdc7b6d7fffffffULL));
6851 ASSERT_TRUE(IsQcBitSet(fpsr2));
6852 }
6853
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x1)6854 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x1) {
6855 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %s0, %s2, #20");
6856
6857 __uint128_t arg1 = MakeUInt128(0x9724611600000181ULL, 0x0003509892864120ULL);
6858 auto [res1, fpsr1] = AsmSqshl(arg1);
6859 ASSERT_EQ(res1, MakeUInt128(0x0000000018100000ULL, 0x0000000000000000ULL));
6860 ASSERT_FALSE(IsQcBitSet(fpsr1));
6861
6862 __uint128_t arg2 = MakeUInt128(0x4195163551108763ULL, 0x2042676129798265ULL);
6863 auto [res2, fpsr2] = AsmSqshl(arg2);
6864 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6865 ASSERT_TRUE(IsQcBitSet(fpsr2));
6866 }
6867
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt64x1)6868 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt64x1) {
6869 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %d0, %d2, #28");
6870
6871 __uint128_t arg1 = MakeUInt128(0x0000000774000539ULL, 0x2622760323659751ULL);
6872 auto [res1, fpsr1] = AsmSqshl(arg1);
6873 ASSERT_EQ(res1, MakeUInt128(0x7740005390000000ULL, 0x0000000000000000ULL));
6874 ASSERT_FALSE(IsQcBitSet(fpsr1));
6875
6876 __uint128_t arg2 = MakeUInt128(0x9938714995449137ULL, 0x3020518436690767ULL);
6877 auto [res2, fpsr2] = AsmSqshl(arg2);
6878 ASSERT_EQ(res2, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6879 ASSERT_TRUE(IsQcBitSet(fpsr2));
6880 }
6881
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x4)6882 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x4) {
6883 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %0.4s, %2.4s, #12");
6884
6885 __uint128_t arg1 = MakeUInt128(0x0007256800042011ULL, 0x0000313500033555ULL);
6886 auto [res1, fpsr1] = AsmSqshl(arg1);
6887 ASSERT_EQ(res1, MakeUInt128(0x7256800042011000ULL, 0x0313500033555000ULL));
6888 ASSERT_FALSE(IsQcBitSet(fpsr1));
6889
6890 __uint128_t arg2 = MakeUInt128(0x0944031900072034ULL, 0x8651010561049872ULL);
6891 auto [res2, fpsr2] = AsmSqshl(arg2);
6892 ASSERT_EQ(res2, MakeUInt128(0x7fffffff72034000ULL, 0x800000007fffffffULL));
6893 ASSERT_TRUE(IsQcBitSet(fpsr2));
6894 }
6895
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt32x1)6896 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt32x1) {
6897 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %s0, %s2, %s3");
6898
6899 __uint128_t res;
6900 uint32_t fpsr;
6901 __uint128_t arg1 = MakeUInt128(0x7480771811555330ULL, 0x9098870255052076ULL);
6902
6903 std::tie(res, fpsr) = AsmSqshl(arg1, -33);
6904 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6905 ASSERT_FALSE(IsQcBitSet(fpsr));
6906
6907 std::tie(res, fpsr) = AsmSqshl(arg1, -32);
6908 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6909 ASSERT_FALSE(IsQcBitSet(fpsr));
6910
6911 std::tie(res, fpsr) = AsmSqshl(arg1, -31);
6912 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6913 ASSERT_FALSE(IsQcBitSet(fpsr));
6914
6915 std::tie(res, fpsr) = AsmSqshl(arg1, -1);
6916 ASSERT_EQ(res, MakeUInt128(0x08aaa998ULL, 0U));
6917 ASSERT_FALSE(IsQcBitSet(fpsr));
6918
6919 std::tie(res, fpsr) = AsmSqshl(arg1, 0);
6920 ASSERT_EQ(res, MakeUInt128(0x11555330ULL, 0U));
6921 ASSERT_FALSE(IsQcBitSet(fpsr));
6922
6923 std::tie(res, fpsr) = AsmSqshl(arg1, 1);
6924 ASSERT_EQ(res, MakeUInt128(0x22aaa660ULL, 0U));
6925 ASSERT_FALSE(IsQcBitSet(fpsr));
6926
6927 std::tie(res, fpsr) = AsmSqshl(arg1, 31);
6928 ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6929 ASSERT_TRUE(IsQcBitSet(fpsr));
6930
6931 std::tie(res, fpsr) = AsmSqshl(arg1, 32);
6932 ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6933 ASSERT_TRUE(IsQcBitSet(fpsr));
6934
6935 std::tie(res, fpsr) = AsmSqshl(arg1, 33);
6936 ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6937 ASSERT_TRUE(IsQcBitSet(fpsr));
6938 }
6939
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt64x1)6940 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt64x1) {
6941 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %d0, %d2, #28");
6942
6943 __uint128_t arg1 = MakeUInt128(0x0000000961573564ULL, 0x8883443185280853ULL);
6944 auto [res1, fpsr1] = AsmUqshl(arg1);
6945 ASSERT_EQ(res1, MakeUInt128(0x9615735640000000ULL, 0x0000000000000000ULL));
6946 ASSERT_FALSE(IsQcBitSet(fpsr1));
6947
6948 __uint128_t arg2 = MakeUInt128(0x9759277344336553ULL, 0x8418834030351782ULL);
6949 auto [res2, fpsr2] = AsmUqshl(arg2);
6950 ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6951 ASSERT_TRUE(IsQcBitSet(fpsr2));
6952 }
6953
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt32x4)6954 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt32x4) {
6955 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %0.4s, %2.4s, #12");
6956
6957 __uint128_t arg1 = MakeUInt128(0x0000326300096218ULL, 0x0004565900066853ULL);
6958 auto [res1, fpsr1] = AsmUqshl(arg1);
6959 ASSERT_EQ(res1, MakeUInt128(0x0326300096218000ULL, 0x4565900066853000ULL));
6960 ASSERT_FALSE(IsQcBitSet(fpsr1));
6961
6962 __uint128_t arg2 = MakeUInt128(0x0009911314010804ULL, 0x0009732335449090ULL);
6963 auto [res2, fpsr2] = AsmUqshl(arg2);
6964 ASSERT_EQ(res2, MakeUInt128(0x99113000ffffffffULL, 0x97323000ffffffffULL));
6965 ASSERT_TRUE(IsQcBitSet(fpsr2));
6966 }
6967
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt32x1)6968 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt32x1) {
6969 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %s0, %s2, %s3");
6970
6971 __uint128_t res;
6972 uint32_t fpsr;
6973 __uint128_t arg1 = MakeUInt128(0x9714978507414585ULL, 0x3085781339156270ULL);
6974
6975 std::tie(res, fpsr) = AsmUqshl(arg1, -33);
6976 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6977 ASSERT_FALSE(IsQcBitSet(fpsr));
6978
6979 std::tie(res, fpsr) = AsmUqshl(arg1, -32);
6980 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6981 ASSERT_FALSE(IsQcBitSet(fpsr));
6982
6983 std::tie(res, fpsr) = AsmUqshl(arg1, -31);
6984 ASSERT_EQ(res, MakeUInt128(0U, 0U));
6985 ASSERT_FALSE(IsQcBitSet(fpsr));
6986
6987 std::tie(res, fpsr) = AsmUqshl(arg1, -1);
6988 ASSERT_EQ(res, MakeUInt128(0x03a0a2c2ULL, 0U));
6989 ASSERT_FALSE(IsQcBitSet(fpsr));
6990
6991 std::tie(res, fpsr) = AsmUqshl(arg1, 0);
6992 ASSERT_EQ(res, MakeUInt128(0x07414585ULL, 0U));
6993 ASSERT_FALSE(IsQcBitSet(fpsr));
6994
6995 std::tie(res, fpsr) = AsmUqshl(arg1, 1);
6996 ASSERT_EQ(res, MakeUInt128(0x0e828b0aULL, 0U));
6997 ASSERT_FALSE(IsQcBitSet(fpsr));
6998
6999 std::tie(res, fpsr) = AsmUqshl(arg1, 31);
7000 ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7001 ASSERT_TRUE(IsQcBitSet(fpsr));
7002
7003 std::tie(res, fpsr) = AsmUqshl(arg1, 32);
7004 ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7005 ASSERT_TRUE(IsQcBitSet(fpsr));
7006
7007 std::tie(res, fpsr) = AsmUqshl(arg1, 33);
7008 ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7009 ASSERT_TRUE(IsQcBitSet(fpsr));
7010 }
7011
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt16x8)7012 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt16x8) {
7013 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %0.8h, %2.8h, %3.8h");
7014
7015 __uint128_t arg1 = 0U;
7016 __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
7017 auto [res1, fpsr1] = AsmSqshl(arg1, arg2);
7018 ASSERT_EQ(res1, MakeUInt128(0U, 0U));
7019 ASSERT_FALSE(IsQcBitSet(fpsr1));
7020
7021 __uint128_t arg3 = MakeUInt128(0x3333333333333333ULL, 0x3333333333333333ULL);
7022 auto [res2, fpsr2] = AsmSqshl(arg3, arg2);
7023 ASSERT_EQ(res2, MakeUInt128(0x0000000000001999ULL, 0x66667fff7fff7fffULL));
7024 ASSERT_TRUE(IsQcBitSet(fpsr2));
7025 }
7026
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt16x8)7027 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt16x8) {
7028 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %0.8h, %2.8h, %3.8h");
7029
7030 __uint128_t arg1 = 0U;
7031 __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
7032 auto [res1, fpsr1] = AsmUqshl(arg1, arg2);
7033 ASSERT_EQ(res1, MakeUInt128(0U, 0U));
7034 ASSERT_FALSE(IsQcBitSet(fpsr1));
7035
7036 __uint128_t arg3 = MakeUInt128(0x7777777777777777ULL, 0x7777777777777777ULL);
7037 auto [res2, fpsr2] = AsmUqshl(arg3, arg2);
7038 ASSERT_EQ(res2, MakeUInt128(0x0000000000003bbbULL, 0xeeeeffffffffffffULL));
7039 ASSERT_TRUE(IsQcBitSet(fpsr2));
7040 }
7041
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x2ToInt32x2)7042 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x2ToInt32x2) {
7043 constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %0.2s, %2.2d");
7044
7045 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7046 auto [res1, fpsr1] = AsmSqxtn(arg1);
7047 ASSERT_EQ(res1, MakeUInt128(0x800000007fffffffULL, 0x0000000000000000ULL));
7048 ASSERT_TRUE(IsQcBitSet(fpsr1));
7049
7050 __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
7051 auto [res2, fpsr2] = AsmSqxtn(arg2);
7052 ASSERT_EQ(res2, MakeUInt128(0x7ecdba9801234567ULL, 0x0000000000000000ULL));
7053 ASSERT_FALSE(IsQcBitSet(fpsr2));
7054 }
7055
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x1ToInt32x1)7056 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x1ToInt32x1) {
7057 constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %s0, %d2");
7058
7059 __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
7060 auto [res1, fpsr1] = AsmSqxtn(arg1);
7061 ASSERT_EQ(res1, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7062 ASSERT_TRUE(IsQcBitSet(fpsr1));
7063
7064 __uint128_t arg2 = MakeUInt128(0x0000000012345678ULL, 0x0ULL);
7065 auto [res2, fpsr2] = AsmSqxtn(arg2);
7066 ASSERT_EQ(res2, MakeUInt128(0x0000000012345678ULL, 0x0000000000000000ULL));
7067 ASSERT_FALSE(IsQcBitSet(fpsr2));
7068 }
7069
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x2ToInt32x2)7070 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x2ToInt32x2) {
7071 constexpr auto AsmUqstn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %0.2s, %2.2d");
7072
7073 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7074 auto [res1, fpsr1] = AsmUqstn(arg1);
7075 ASSERT_EQ(res1, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
7076 ASSERT_TRUE(IsQcBitSet(fpsr1));
7077
7078 __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7079 auto [res2, fpsr2] = AsmUqstn(arg2);
7080 ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
7081 ASSERT_FALSE(IsQcBitSet(fpsr2));
7082 }
7083
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x1ToInt32x1)7084 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x1ToInt32x1) {
7085 constexpr auto AsmUqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %s0, %d2");
7086
7087 __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
7088 auto [res1, fpsr1] = AsmUqxtn(arg1);
7089 ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7090 ASSERT_TRUE(IsQcBitSet(fpsr1));
7091
7092 __uint128_t arg2 = MakeUInt128(0x0000000087654321ULL, 0x0ULL);
7093 auto [res2, fpsr2] = AsmUqxtn(arg2);
7094 ASSERT_EQ(res2, MakeUInt128(0x0000000087654321ULL, 0x0000000000000000ULL));
7095 ASSERT_FALSE(IsQcBitSet(fpsr2));
7096 }
7097
TEST(Arm64InsnTest,SignedSaturatingExtractNarrow2Int64x2ToInt32x2)7098 TEST(Arm64InsnTest, SignedSaturatingExtractNarrow2Int64x2ToInt32x2) {
7099 constexpr auto AsmSqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtn2 %0.4s, %2.2d");
7100
7101 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7102 __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7103 auto [res1, fpsr1] = AsmSqxtn2(arg1, arg2);
7104 ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0x800000007fffffffULL));
7105 ASSERT_TRUE(IsQcBitSet(fpsr1));
7106
7107 __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
7108 __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7109 auto [res2, fpsr2] = AsmSqxtn2(arg3, arg4);
7110 ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0x7ecdba9801234567ULL));
7111 ASSERT_FALSE(IsQcBitSet(fpsr2));
7112 }
7113
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4)7114 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4) {
7115 constexpr auto AsmUqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqxtn2 %0.4s, %2.2d");
7116
7117 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7118 __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7119 auto [res1, fpsr1] = AsmUqxtn2(arg1, arg2);
7120 ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0xffffffffffffffffULL));
7121 ASSERT_TRUE(IsQcBitSet(fpsr1));
7122
7123 __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7124 __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7125 auto [res2, fpsr2] = AsmUqxtn2(arg3, arg4);
7126 ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0xfecdba9801234567ULL));
7127 ASSERT_FALSE(IsQcBitSet(fpsr2));
7128 }
7129
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2)7130 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2) {
7131 constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %0.2s, %2.2d");
7132
7133 __uint128_t arg1 = MakeUInt128(0x0000000044332211ULL, 0x00000001aabbccddULL);
7134 auto [res1, fpsr1] = AsmSqxtun(arg1);
7135 ASSERT_EQ(res1, MakeUInt128(0xffffffff44332211ULL, 0x0000000000000000ULL));
7136 ASSERT_TRUE(IsQcBitSet(fpsr1));
7137
7138 __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7139 auto [res2, fpsr2] = AsmSqxtun(arg2);
7140 ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
7141 ASSERT_FALSE(IsQcBitSet(fpsr2));
7142 }
7143
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1)7144 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1) {
7145 constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %s0, %d2");
7146
7147 __uint128_t arg1 = MakeUInt128(0x00000001ff332211ULL, 0x0ULL);
7148 auto [res1, fpsr1] = AsmSqxtun(arg1);
7149 ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7150 ASSERT_TRUE(IsQcBitSet(fpsr1));
7151
7152 __uint128_t arg2 = MakeUInt128(0x00000000ff332211ULL, 0x0ULL);
7153 auto [res2, fpsr2] = AsmSqxtun(arg2);
7154 ASSERT_EQ(res2, MakeUInt128(0x00000000ff332211ULL, 0x0000000000000000ULL));
7155 ASSERT_FALSE(IsQcBitSet(fpsr2));
7156 }
7157
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4)7158 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4) {
7159 constexpr auto AsmSqxtun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtun2 %0.4s, %2.2d");
7160
7161 __uint128_t arg1 = MakeUInt128(0x0000000089abcdefULL, 0xfedcba9876543210ULL);
7162 __uint128_t arg2 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7163 auto [res1, fpsr1] = AsmSqxtun2(arg1, arg2);
7164 ASSERT_EQ(res1, MakeUInt128(0x0123456789abcdefULL, 0x0000000089abcdefULL));
7165 ASSERT_TRUE(IsQcBitSet(fpsr1));
7166
7167 __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7168 __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7169 auto [res2, fpsr2] = AsmSqxtun2(arg3, arg4);
7170 ASSERT_EQ(res2, MakeUInt128(0x0123456789abcdefULL, 0xfecdba9801234567ULL));
7171 ASSERT_FALSE(IsQcBitSet(fpsr2));
7172 }
7173
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x1)7174 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x1) {
7175 constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %s0, %s2");
7176
7177 __uint128_t arg1 = MakeUInt128(0x9392023115638719ULL, 0x5080502467972579ULL);
7178 __uint128_t arg2 = MakeUInt128(0x2497605762625913ULL, 0x3285597263712112ULL);
7179 auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
7180 ASSERT_EQ(res1, MakeUInt128(0x0000000077c5e02cULL, 0x0000000000000000ULL));
7181 ASSERT_FALSE(IsQcBitSet(fpsr1));
7182
7183 __uint128_t arg3 = MakeUInt128(0x9099791776687477ULL, 0x4481882870632315ULL);
7184 __uint128_t arg4 = MakeUInt128(0x5158650328981642ULL, 0x2828823274686610ULL);
7185 auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
7186 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7187 ASSERT_TRUE(IsQcBitSet(fpsr2));
7188 }
7189
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x4)7190 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x4) {
7191 constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %0.4s, %2.4s");
7192
7193 __uint128_t arg1 = MakeUInt128(0x2590181000350989ULL, 0x2864120419516355ULL);
7194 __uint128_t arg2 = MakeUInt128(0x1108763204267612ULL, 0x9798265294258829ULL);
7195 auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
7196 ASSERT_EQ(res1, MakeUInt128(0x36988e42045b7f9bULL, 0xbffc3856ad76eb7eULL));
7197 ASSERT_FALSE(IsQcBitSet(fpsr1));
7198
7199 __uint128_t arg3 = MakeUInt128(0x9082888934938376ULL, 0x4393992569006040ULL);
7200 __uint128_t arg4 = MakeUInt128(0x6731142209331219ULL, 0x5936202982972351ULL);
7201 auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
7202 ASSERT_EQ(res2, MakeUInt128(0x7fffffff3dc6958fULL, 0x7fffffffeb978391ULL));
7203 ASSERT_TRUE(IsQcBitSet(fpsr2));
7204 }
7205
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x1)7206 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x1) {
7207 constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %s0, %s2");
7208
7209 __uint128_t arg1 = MakeUInt128(0x9052523242348615ULL, 0x3152097693846104ULL);
7210 __uint128_t arg2 = MakeUInt128(0x2582849714963475ULL, 0x3418375620030149ULL);
7211 auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
7212 ASSERT_EQ(res1, MakeUInt128(0x0000000056caba8aULL, 0x0000000000000000ULL));
7213 ASSERT_FALSE(IsQcBitSet(fpsr1));
7214
7215 __uint128_t arg3 = MakeUInt128(0x9887125387801719ULL, 0x6071816407812484ULL);
7216 __uint128_t arg4 = MakeUInt128(0x7847257912407824ULL, 0x5443616823452395ULL);
7217 auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
7218 ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7219 ASSERT_TRUE(IsQcBitSet(fpsr2));
7220
7221 __uint128_t arg5 = MakeUInt128(0x9708583970761645ULL, 0x8229630324424328ULL);
7222 __uint128_t arg6 = MakeUInt128(0x2377374595170285ULL, 0x6069806788952176ULL);
7223 auto [res3, fpsr3] = AsmUsqadd(arg5, arg6);
7224 ASSERT_EQ(res3, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7225 ASSERT_TRUE(IsQcBitSet(fpsr3));
7226 }
7227
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x4)7228 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x4) {
7229 constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %0.4s, %2.4s");
7230
7231 __uint128_t arg1 = MakeUInt128(0x4129137074982305ULL, 0x7592909166293919ULL);
7232 __uint128_t arg2 = MakeUInt128(0x5014721157586067ULL, 0x2700925477180257ULL);
7233 auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
7234 ASSERT_EQ(res1, MakeUInt128(0x913d8581cbf0836cULL, 0x9c9322e5dd413b70ULL));
7235 ASSERT_FALSE(IsQcBitSet(fpsr1));
7236
7237 __uint128_t arg3 = MakeUInt128(0x7816422828823274ULL, 0x6866106592732197ULL);
7238 __uint128_t arg4 = MakeUInt128(0x9071623846421534ULL, 0x8985247621678905ULL);
7239 auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
7240 ASSERT_EQ(res2, MakeUInt128(0xffffffff6ec447a8ULL, 0xf1eb34db00000000ULL));
7241 ASSERT_TRUE(IsQcBitSet(fpsr2));
7242 }
7243
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt32x1)7244 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt32x1) {
7245 constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %s0, %s2, %s3");
7246
7247 __uint128_t res;
7248 uint32_t fpsr;
7249
7250 __uint128_t arg = MakeUInt128(0x9736705435580445ULL, 0x8657202276378404ULL);
7251 std::tie(res, fpsr) = AsmSqrshl(arg, -33);
7252 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7253 ASSERT_FALSE(IsQcBitSet(fpsr));
7254
7255 std::tie(res, fpsr) = AsmSqrshl(arg, -32);
7256 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7257 ASSERT_FALSE(IsQcBitSet(fpsr));
7258
7259 std::tie(res, fpsr) = AsmSqrshl(arg, -31);
7260 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7261 ASSERT_FALSE(IsQcBitSet(fpsr));
7262
7263 std::tie(res, fpsr) = AsmSqrshl(arg, -1);
7264 ASSERT_EQ(res, MakeUInt128(0x000000001aac0223ULL, 0x0000000000000000ULL));
7265 ASSERT_FALSE(IsQcBitSet(fpsr));
7266
7267 std::tie(res, fpsr) = AsmSqrshl(arg, 0);
7268 ASSERT_EQ(res, MakeUInt128(0x0000000035580445ULL, 0x0000000000000000ULL));
7269 ASSERT_FALSE(IsQcBitSet(fpsr));
7270
7271 std::tie(res, fpsr) = AsmSqrshl(arg, 1);
7272 ASSERT_EQ(res, MakeUInt128(0x000000006ab0088aULL, 0x0000000000000000ULL));
7273 ASSERT_FALSE(IsQcBitSet(fpsr));
7274
7275 std::tie(res, fpsr) = AsmSqrshl(arg, 31);
7276 ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7277 ASSERT_TRUE(IsQcBitSet(fpsr));
7278
7279 std::tie(res, fpsr) = AsmSqrshl(arg, 32);
7280 ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7281 ASSERT_TRUE(IsQcBitSet(fpsr));
7282
7283 std::tie(res, fpsr) = AsmSqrshl(arg, 33);
7284 ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7285 ASSERT_TRUE(IsQcBitSet(fpsr));
7286 }
7287
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt16x8)7288 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt16x8) {
7289 constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %0.8h, %2.8h, %3.8h");
7290
7291 __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
7292 __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
7293 auto [res1, fpsr1] = AsmSqrshl(arg1, arg2);
7294 ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0xcccd000000000000ULL));
7295 ASSERT_FALSE(IsQcBitSet(fpsr1));
7296
7297 __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
7298 auto [res2, fpsr2] = AsmSqrshl(arg3, arg2);
7299 ASSERT_EQ(res2, MakeUInt128(0x7fff7fff7fff0132ULL, 0x004d000000000000ULL));
7300 ASSERT_TRUE(IsQcBitSet(fpsr2));
7301 }
7302
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt32x1)7303 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt32x1) {
7304 constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %s0, %s2, %s3");
7305
7306 __uint128_t res;
7307 uint32_t fpsr;
7308
7309 __uint128_t arg = MakeUInt128(0x9984124848262367ULL, 0x3771467226061633ULL);
7310 std::tie(res, fpsr) = AsmUqrshl(arg, -33);
7311 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7312 ASSERT_FALSE(IsQcBitSet(fpsr));
7313
7314 std::tie(res, fpsr) = AsmUqrshl(arg, -32);
7315 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7316 ASSERT_FALSE(IsQcBitSet(fpsr));
7317
7318 std::tie(res, fpsr) = AsmUqrshl(arg, -31);
7319 ASSERT_EQ(res, MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
7320 ASSERT_FALSE(IsQcBitSet(fpsr));
7321
7322 std::tie(res, fpsr) = AsmUqrshl(arg, -1);
7323 ASSERT_EQ(res, MakeUInt128(0x00000000241311b4ULL, 0x0000000000000000ULL));
7324 ASSERT_FALSE(IsQcBitSet(fpsr));
7325
7326 std::tie(res, fpsr) = AsmUqrshl(arg, 0);
7327 ASSERT_EQ(res, MakeUInt128(0x0000000048262367ULL, 0x0000000000000000ULL));
7328 ASSERT_FALSE(IsQcBitSet(fpsr));
7329
7330 std::tie(res, fpsr) = AsmUqrshl(arg, 1);
7331 ASSERT_EQ(res, MakeUInt128(0x00000000904c46ceULL, 0x0000000000000000ULL));
7332 ASSERT_FALSE(IsQcBitSet(fpsr));
7333
7334 std::tie(res, fpsr) = AsmUqrshl(arg, 31);
7335 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7336 ASSERT_TRUE(IsQcBitSet(fpsr));
7337
7338 std::tie(res, fpsr) = AsmUqrshl(arg, 32);
7339 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7340 ASSERT_TRUE(IsQcBitSet(fpsr));
7341
7342 std::tie(res, fpsr) = AsmUqrshl(arg, 33);
7343 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7344 ASSERT_TRUE(IsQcBitSet(fpsr));
7345 }
7346
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt16x8)7347 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt16x8) {
7348 constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %0.8h, %2.8h, %3.8h");
7349
7350 __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
7351 __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
7352 auto [res1, fpsr1] = AsmUqrshl(arg1, arg2);
7353 ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0x4ccd000000010000ULL));
7354 ASSERT_FALSE(IsQcBitSet(fpsr1));
7355
7356 __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
7357 auto [res2, fpsr2] = AsmUqrshl(arg3, arg2);
7358 ASSERT_EQ(res2, MakeUInt128(0xffffffffffff0132ULL, 0x004d000000000000ULL));
7359 ASSERT_TRUE(IsQcBitSet(fpsr2));
7360 }
7361
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x1)7362 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x1) {
7363 constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %b0, %h2, #4");
7364
7365 __uint128_t arg1 = MakeUInt128(0x888786614762f943ULL, 0x4140104988899316ULL);
7366 auto [res1, fpsr1] = AsmSqshrn(arg1);
7367 ASSERT_EQ(res1, MakeUInt128(0x94U, 0U));
7368 ASSERT_FALSE(IsQcBitSet(fpsr1));
7369
7370 __uint128_t arg2 = MakeUInt128(0x0051207678103588ULL, 0x6116602029611936ULL);
7371 auto [res2, fpsr2] = AsmSqshrn(arg2);
7372 ASSERT_EQ(res2, MakeUInt128(0x7fU, 0U));
7373 ASSERT_TRUE(IsQcBitSet(fpsr2));
7374 }
7375
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8)7376 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8) {
7377 constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %0.8b, %2.8h, #4");
7378
7379 __uint128_t arg1 = MakeUInt128(0x0625051604340253ULL, 0x0299028602670568ULL);
7380 auto [res1, fpsr1] = AsmSqshrn(arg1);
7381 ASSERT_EQ(res1, MakeUInt128(0x2928265662514325ULL, 0U));
7382 ASSERT_FALSE(IsQcBitSet(fpsr1));
7383
7384 __uint128_t arg2 = MakeUInt128(0x2405806005642114ULL, 0x9386436864224724ULL);
7385 auto [res2, fpsr2] = AsmSqshrn(arg2);
7386 ASSERT_EQ(res2, MakeUInt128(0x807f7f7f7f80567fULL, 0U));
7387 ASSERT_TRUE(IsQcBitSet(fpsr2));
7388 }
7389
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8Upper)7390 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8Upper) {
7391 constexpr auto AsmSqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrn2 %0.16b, %2.8h, #4");
7392
7393 __uint128_t arg1 = MakeUInt128(0x0367034704100536ULL, 0x0175064803000078ULL);
7394 __uint128_t arg2 = MakeUInt128(0x3494819262681110ULL, 0x7399482506073949ULL);
7395 auto [res1, fpsr1] = AsmSqshrn2(arg1, arg2);
7396 ASSERT_EQ(res1, MakeUInt128(0x3494819262681110ULL, 0x1764300736344153ULL));
7397 ASSERT_FALSE(IsQcBitSet(fpsr1));
7398
7399 __uint128_t arg3 = MakeUInt128(0x4641074501673719ULL, 0x0483109676711344ULL);
7400 auto [res2, fpsr2] = AsmSqshrn2(arg3, arg2);
7401 ASSERT_EQ(res2, MakeUInt128(0x3494819262681110ULL, 0x487f7f7f7f74167fULL));
7402 ASSERT_TRUE(IsQcBitSet(fpsr2));
7403 }
7404
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x1)7405 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x1) {
7406 constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %b0, %h2, #4");
7407
7408 __uint128_t arg1 = MakeUInt128(0x6797172898220360ULL, 0x7028806908776866ULL);
7409 auto [res1, fpsr1] = AsmUqshrn(arg1);
7410 ASSERT_EQ(res1, MakeUInt128(0x36U, 0U));
7411 ASSERT_FALSE(IsQcBitSet(fpsr1));
7412
7413 __uint128_t arg2 = MakeUInt128(0x0593252746378405ULL, 0x3976918480820410ULL);
7414 auto [res2, fpsr2] = AsmUqshrn(arg2);
7415 ASSERT_EQ(res2, MakeUInt128(0xffU, 0U));
7416 ASSERT_TRUE(IsQcBitSet(fpsr2));
7417 }
7418
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x8)7419 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x8) {
7420 constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %0.8b, %2.8h, #4");
7421
7422 __uint128_t arg1 = MakeUInt128(0x0867067907600099ULL, 0x0693007509490515ULL);
7423 auto [res1, fpsr1] = AsmUqshrn(arg1);
7424 ASSERT_EQ(res1, MakeUInt128(0x6907945186677609ULL, 0U));
7425 ASSERT_FALSE(IsQcBitSet(fpsr1));
7426
7427 __uint128_t arg2 = MakeUInt128(0x2736049811890413ULL, 0x0433116627747123ULL);
7428 auto [res2, fpsr2] = AsmUqshrn(arg2);
7429 ASSERT_EQ(res2, MakeUInt128(0x43ffffffff49ff41ULL, 0U));
7430 ASSERT_TRUE(IsQcBitSet(fpsr2));
7431 }
7432
TEST(Arm64InsnTest,UnignedSaturatingShiftRightNarrowInt16x8Upper)7433 TEST(Arm64InsnTest, UnignedSaturatingShiftRightNarrowInt16x8Upper) {
7434 constexpr auto AsmUqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqshrn2 %0.16b, %2.8h, #4");
7435
7436 __uint128_t arg1 = MakeUInt128(0x0441018407410768ULL, 0x0981066307240048ULL);
7437 __uint128_t arg2 = MakeUInt128(0x2393582740194493ULL, 0x5665161088463125ULL);
7438 auto [res1, fpsr1] = AsmUqshrn2(arg1, arg2);
7439 ASSERT_EQ(res1, MakeUInt128(0x2393582740194493ULL, 0x9866720444187476ULL));
7440 ASSERT_FALSE(IsQcBitSet(fpsr1));
7441
7442 __uint128_t arg3 = MakeUInt128(0x0785297709734684ULL, 0x3030614624180358ULL);
7443 auto [res2, fpsr2] = AsmUqshrn2(arg3, arg2);
7444 ASSERT_EQ(res2, MakeUInt128(0x2393582740194493ULL, 0xffffff3578ff97ffULL));
7445 ASSERT_TRUE(IsQcBitSet(fpsr2));
7446 }
7447
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x1)7448 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x1) {
7449 constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %b0, %h2, #4");
7450
7451 __uint128_t arg1 = MakeUInt128(0x9610330799410534ULL, 0x7784574699992128ULL);
7452 auto [res1, fpsr1] = AsmSqrshrn(arg1);
7453 ASSERT_EQ(res1, MakeUInt128(0x0000000000000053ULL, 0x0000000000000000ULL));
7454 ASSERT_FALSE(IsQcBitSet(fpsr1));
7455
7456 __uint128_t arg2 = MakeUInt128(0x5999993996122816ULL, 0x1521931488876938ULL);
7457 auto [res2, fpsr2] = AsmSqrshrn(arg2);
7458 ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
7459 ASSERT_TRUE(IsQcBitSet(fpsr2));
7460
7461 __uint128_t arg3 = MakeUInt128(0x8022281083009986ULL, 0x0165494165426169ULL);
7462 auto [res3, fpsr3] = AsmSqrshrn(arg3);
7463 ASSERT_EQ(res3, MakeUInt128(0x0000000000000080ULL, 0x0000000000000000ULL));
7464 ASSERT_TRUE(IsQcBitSet(fpsr3));
7465 }
7466
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8)7467 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8) {
7468 constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %0.8b, %2.8h, #4");
7469
7470 __uint128_t arg1 = MakeUInt128(0x0666070401700260ULL, 0x0520059204930759ULL);
7471 auto [res1, fpsr1] = AsmSqrshrn(arg1);
7472 ASSERT_EQ(res1, MakeUInt128(0x5259497666701726ULL, 0x0000000000000000ULL));
7473 ASSERT_FALSE(IsQcBitSet(fpsr1));
7474
7475 __uint128_t arg2 = MakeUInt128(0x4143408146852981ULL, 0x5053947178900451ULL);
7476 auto [res2, fpsr2] = AsmSqrshrn(arg2);
7477 ASSERT_EQ(res2, MakeUInt128(0x7f807f457f7f7f7fULL, 0x0000000000000000ULL));
7478 ASSERT_TRUE(IsQcBitSet(fpsr2));
7479 }
7480
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8Upper)7481 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
7482 constexpr auto AsmSqrshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrn2 %0.16b, %2.8h, #4");
7483
7484 __uint128_t arg1 = MakeUInt128(0x0784017103960497ULL, 0x0707072501740336ULL);
7485 __uint128_t arg2 = MakeUInt128(0x5662725928440620ULL, 0x4302141137199227ULL);
7486 auto [res1, fpsr1] = AsmSqrshrn2(arg1, arg2);
7487 ASSERT_EQ(res1, MakeUInt128(0x5662725928440620ULL, 0x7072173378173949ULL));
7488 ASSERT_FALSE(IsQcBitSet(fpsr1));
7489
7490 __uint128_t arg3 = MakeUInt128(0x2066886512756882ULL, 0x6614973078865701ULL);
7491 __uint128_t arg4 = MakeUInt128(0x5685016918647488ULL, 0x5416791545965072ULL);
7492 auto [res2, fpsr2] = AsmSqrshrn2(arg3, arg4);
7493 ASSERT_EQ(res2, MakeUInt128(0x5685016918647488ULL, 0x7f807f7f7f807f7fULL));
7494 ASSERT_TRUE(IsQcBitSet(fpsr2));
7495 }
7496
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x1)7497 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x1) {
7498 constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %b0, %h2, #4");
7499
7500 __uint128_t arg1 = MakeUInt128(0x9614236585950920ULL, 0x9083073323356034ULL);
7501 auto [res1, fpsr1] = AsmUqrshrn(arg1);
7502 ASSERT_EQ(res1, MakeUInt128(0x0000000000000092ULL, 0x0000000000000000ULL));
7503 ASSERT_FALSE(IsQcBitSet(fpsr1));
7504
7505 __uint128_t arg2 = MakeUInt128(0x8465318730299026ULL, 0x6596450137183754ULL);
7506 auto [res2, fpsr2] = AsmUqrshrn(arg2);
7507 ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
7508 ASSERT_TRUE(IsQcBitSet(fpsr2));
7509 }
7510
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8)7511 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8) {
7512 constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %0.8b, %2.8h, #4");
7513
7514 __uint128_t arg1 = MakeUInt128(0x0301067603860240ULL, 0x0011030402470073ULL);
7515 auto [res1, fpsr1] = AsmUqrshrn(arg1);
7516 ASSERT_EQ(res1, MakeUInt128(0x0130240730673824ULL, 0x0000000000000000ULL));
7517 ASSERT_FALSE(IsQcBitSet(fpsr1));
7518
7519 __uint128_t arg2 = MakeUInt128(0x5085082872462713ULL, 0x4946368501815469ULL);
7520 auto [res2, fpsr2] = AsmUqrshrn(arg2);
7521 ASSERT_EQ(res2, MakeUInt128(0xffff18ffff83ffffULL, 0x0000000000000000ULL));
7522 ASSERT_TRUE(IsQcBitSet(fpsr2));
7523 }
7524
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper)7525 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
7526 constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqrshrn2 %0.16b, %2.8h, #4");
7527
7528 __uint128_t arg1 = MakeUInt128(0x0388099005730661ULL, 0x0237022304780112ULL);
7529 __uint128_t arg2 = MakeUInt128(0x0392269110277722ULL, 0x6102544149221576ULL);
7530 auto [res1, fpsr1] = AsmUqrshrn(arg1, arg2);
7531 ASSERT_EQ(res1, MakeUInt128(0x0392269110277722ULL, 0x2322481139995766ULL));
7532 ASSERT_FALSE(IsQcBitSet(fpsr1));
7533
7534 __uint128_t arg3 = MakeUInt128(0x9254069617600504ULL, 0x7974928060721268ULL);
7535 __uint128_t arg4 = MakeUInt128(0x8414695726397884ULL, 0x2560084531214065ULL);
7536 auto [res2, fpsr2] = AsmUqrshrn(arg3, arg4);
7537 ASSERT_EQ(res2, MakeUInt128(0x8414695726397884ULL, 0xffffffffff69ff50ULL));
7538 ASSERT_TRUE(IsQcBitSet(fpsr2));
7539 }
7540
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x1)7541 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x1) {
7542 constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %b0, %h2, #4");
7543
7544 __uint128_t arg1 = MakeUInt128(0x9143611439920063ULL, 0x8005083214098760ULL);
7545 auto [res1, fpsr1] = AsmSqshrun(arg1);
7546 ASSERT_EQ(res1, MakeUInt128(0x06U, 0U));
7547 ASSERT_FALSE(IsQcBitSet(fpsr1));
7548
7549 __uint128_t arg2 = MakeUInt128(0x3815174571259975ULL, 0x4953580239983146ULL);
7550 auto [res2, fpsr2] = AsmSqshrun(arg2);
7551 ASSERT_EQ(res2, MakeUInt128(0x00U, 0U));
7552 ASSERT_TRUE(IsQcBitSet(fpsr2));
7553
7554 __uint128_t arg3 = MakeUInt128(0x4599309324851025ULL, 0x1682944672606661ULL);
7555 auto [res3, fpsr3] = AsmSqshrun(arg3);
7556 ASSERT_EQ(res3, MakeUInt128(0xffU, 0U));
7557 ASSERT_TRUE(IsQcBitSet(fpsr3));
7558 }
7559
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8)7560 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8) {
7561 constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %0.8b, %2.8h, #4");
7562
7563 __uint128_t arg1 = MakeUInt128(0x0911066408340874ULL, 0x0800074107250670ULL);
7564 auto [res1, fpsr1] = AsmSqshrun(arg1);
7565 ASSERT_EQ(res1, MakeUInt128(0x8074726791668387ULL, 0U));
7566 ASSERT_FALSE(IsQcBitSet(fpsr1));
7567
7568 __uint128_t arg2 = MakeUInt128(0x4792258319129415ULL, 0x7390809143831384ULL);
7569 auto [res2, fpsr2] = AsmSqshrun(arg2);
7570 ASSERT_EQ(res2, MakeUInt128(0xff00ffffffffff00ULL, 0U));
7571 ASSERT_TRUE(IsQcBitSet(fpsr2));
7572 }
7573
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper)7574 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper) {
7575 constexpr auto AsmSqshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrun2 %0.16b, %2.8h, #4");
7576
7577 __uint128_t arg1 = MakeUInt128(0x0625082101740415ULL, 0x0233074903960353ULL);
7578 __uint128_t arg2 = MakeUInt128(0x0136178653673760ULL, 0x6421667781377399ULL);
7579 auto [res1, fpsr1] = AsmSqshrun2(arg1, arg2);
7580 ASSERT_EQ(res1, MakeUInt128(0x0136178653673760ULL, 0x2374393562821741ULL));
7581 ASSERT_FALSE(IsQcBitSet(fpsr1));
7582
7583 __uint128_t arg3 = MakeUInt128(0x4295810545651083ULL, 0x1046297282937584ULL);
7584 __uint128_t arg4 = MakeUInt128(0x1611625325625165ULL, 0x7249807849209989ULL);
7585 auto [res2, fpsr2] = AsmSqshrun2(arg3, arg4);
7586 ASSERT_EQ(res2, MakeUInt128(0x1611625325625165ULL, 0xffff00ffff00ffffULL));
7587 ASSERT_TRUE(IsQcBitSet(fpsr2));
7588 }
7589
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1)7590 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1) {
7591 constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %b0, %h2, #4");
7592
7593 __uint128_t arg1 = MakeUInt128(0x5760186946490886ULL, 0x8154528562134698ULL);
7594 auto [res1, fpsr1] = AsmSqrshrun(arg1);
7595 ASSERT_EQ(res1, MakeUInt128(0x88ULL, 0U));
7596 ASSERT_FALSE(IsQcBitSet(fpsr1));
7597
7598 __uint128_t arg2 = MakeUInt128(0x8355444560249556ULL, 0x6684366029221951ULL);
7599 auto [res2, fpsr2] = AsmSqrshrun(arg2);
7600 ASSERT_EQ(res2, MakeUInt128(0x00ULL, 0U));
7601 ASSERT_TRUE(IsQcBitSet(fpsr2));
7602
7603 __uint128_t arg3 = MakeUInt128(0x2483091060537720ULL, 0x1980218310103270ULL);
7604 auto [res3, fpsr3] = AsmSqrshrun(arg3);
7605 ASSERT_EQ(res3, MakeUInt128(0xffULL, 0U));
7606 ASSERT_TRUE(IsQcBitSet(fpsr3));
7607 }
7608
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8)7609 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8) {
7610 constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %0.8b, %2.8h, #4");
7611
7612 __uint128_t arg1 = MakeUInt128(0x0150069001490702ULL, 0x0673033808340550ULL);
7613 auto [res1, fpsr1] = AsmSqrshrun(arg1);
7614 ASSERT_EQ(res1, MakeUInt128(0x6734835515691570ULL, 0U));
7615 ASSERT_FALSE(IsQcBitSet(fpsr1));
7616
7617 __uint128_t arg2 = MakeUInt128(0x8363660178487710ULL, 0x6080980426924713ULL);
7618 auto [res2, fpsr2] = AsmSqrshrun(arg2);
7619 ASSERT_EQ(res2, MakeUInt128(0xff00ffff00ffffffULL, 0U));
7620 ASSERT_TRUE(IsQcBitSet(fpsr2));
7621 }
7622
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper)7623 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper) {
7624 constexpr auto AsmSqrshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrun2 %0.16b, %2.8h, #4");
7625
7626 __uint128_t arg1 = MakeUInt128(0x0733049502080757ULL, 0x0651018705990498ULL);
7627 __uint128_t arg2 = MakeUInt128(0x5693795623875551ULL, 0x6175754380917805ULL);
7628 auto [res1, fpsr1] = AsmSqrshrun2(arg1, arg2);
7629 ASSERT_EQ(res1, MakeUInt128(0x5693795623875551ULL, 0x65185a4a73492175ULL));
7630 ASSERT_FALSE(IsQcBitSet(fpsr1));
7631
7632 __uint128_t arg3 = MakeUInt128(0x1444671298615527ULL, 0x5982014514102756ULL);
7633 __uint128_t arg4 = MakeUInt128(0x0068929750246304ULL, 0x0173514891945763ULL);
7634 auto [res2, fpsr2] = AsmSqrshrun2(arg3, arg4);
7635 ASSERT_EQ(res2, MakeUInt128(0x0068929750246304ULL, 0xff14ffffffff00ffULL));
7636 ASSERT_TRUE(IsQcBitSet(fpsr2));
7637 }
7638
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x1)7639 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x1) {
7640 constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %s0, %s2, #4");
7641
7642 __uint128_t arg1 = MakeUInt128(0x9704033001862556ULL, 0x1473321177711744ULL);
7643 auto [res1, fpsr1] = AsmSqshlu(arg1);
7644 ASSERT_EQ(res1, MakeUInt128(0x18625560ULL, 0U));
7645 ASSERT_FALSE(IsQcBitSet(fpsr1));
7646
7647 __uint128_t arg2 = MakeUInt128(0x3095760196946490ULL, 0x8868154528562134ULL);
7648 auto [res2, fpsr2] = AsmSqshlu(arg2);
7649 ASSERT_EQ(res2, MakeUInt128(0x00000000ULL, 0U));
7650 ASSERT_TRUE(IsQcBitSet(fpsr2));
7651
7652 __uint128_t arg3 = MakeUInt128(0x1335028160884035ULL, 0x1781452541964320ULL);
7653 auto [res3, fpsr3] = AsmSqshlu(arg3);
7654 ASSERT_EQ(res3, MakeUInt128(0xffffffffULL, 0U));
7655 ASSERT_TRUE(IsQcBitSet(fpsr3));
7656 }
7657
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x4)7658 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x4) {
7659 constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %0.4s, %2.4s, #4");
7660
7661 __uint128_t arg1 = MakeUInt128(0x0865174507877133ULL, 0x0813875205980941ULL);
7662 auto [res1, fpsr1] = AsmSqshlu(arg1);
7663 ASSERT_EQ(res1, MakeUInt128(0x8651745078771330ULL, 0x8138752059809410ULL));
7664 ASSERT_FALSE(IsQcBitSet(fpsr1));
7665
7666 __uint128_t arg2 = MakeUInt128(0x2174227300352296ULL, 0x0080891797050682ULL);
7667 auto [res2, fpsr2] = AsmSqshlu(arg2);
7668 ASSERT_EQ(res2, MakeUInt128(0xffffffff03522960ULL, 0x0808917000000000ULL));
7669 ASSERT_TRUE(IsQcBitSet(fpsr2));
7670 }
7671
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x2)7672 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x2) {
7673 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.2s");
7674
7675 __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7676 __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7677 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7678 ASSERT_EQ(res1, MakeUInt128(0x0000000000000010ULL, 0x000000000000000cULL));
7679 ASSERT_FALSE(IsQcBitSet(fpsr1));
7680
7681 __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7682 __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0xfeed00040000002ULL);
7683 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7684 ASSERT_EQ(res2, MakeUInt128(0x0000000000000010ULL, 0x7fffffffffffffffULL));
7685 ASSERT_TRUE(IsQcBitSet(fpsr2));
7686 }
7687
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong16x4)7688 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong16x4) {
7689 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.4h");
7690
7691 __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7692 __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7693 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7694 ASSERT_EQ(res1, MakeUInt128(0x0000f00000000010ULL, 0x000000400000000cULL));
7695 ASSERT_FALSE(IsQcBitSet(fpsr1));
7696
7697 __uint128_t arg3 = MakeUInt128(0x8000000200f00004ULL, 0xfeedfeedfeedfeedULL);
7698 __uint128_t arg4 = MakeUInt128(0x8000000300800002ULL, 0xabcd0123ffff4567ULL);
7699 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7700 ASSERT_EQ(res2, MakeUInt128(0x0000f00000000010ULL, 0x7fffffff0000000cULL));
7701 ASSERT_TRUE(IsQcBitSet(fpsr2));
7702 }
7703
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x2)7704 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x2) {
7705 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.4s");
7706
7707 __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7708 __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7709 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7710 ASSERT_EQ(res1, MakeUInt128(0x0000000800000040ULL, 0xffddc4ed7f98e000ULL));
7711 ASSERT_FALSE(IsQcBitSet(fpsr1));
7712
7713 __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7714 __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7715 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7716 ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffffffffffffULL));
7717 ASSERT_TRUE(IsQcBitSet(fpsr2));
7718 }
7719
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper16x4)7720 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper16x4) {
7721 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.8h");
7722
7723 __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7724 __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7725 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7726 ASSERT_EQ(res1, MakeUInt128(0x00000226ff6ae4b6ULL, 0x00b4e592fffd8eceULL));
7727 ASSERT_FALSE(IsQcBitSet(fpsr1));
7728
7729 __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7730 __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7731 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7732 ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffff00000000ULL));
7733 ASSERT_TRUE(IsQcBitSet(fpsr2));
7734 }
7735
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x2IndexedElem)7736 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x2IndexedElem) {
7737 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.s[1]");
7738
7739 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7740 __uint128_t arg2 = MakeUInt128(0x0000000200000000ULL, 0x000000000000000ULL);
7741 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7742 ASSERT_EQ(res1, MakeUInt128(0x000000004488cd10ULL, 0x0000000000880088ULL));
7743 ASSERT_FALSE(IsQcBitSet(fpsr1));
7744
7745 __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400110011LL);
7746 __uint128_t arg4 = MakeUInt128(0x8000000000000000ULL, 0x000000000000000ULL);
7747 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7748 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xffddffde00000000ULL));
7749 ASSERT_TRUE(IsQcBitSet(fpsr2));
7750 }
7751
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x4IndexedElem)7752 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x4IndexedElem) {
7753 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.h[4]");
7754
7755 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7756 __uint128_t arg2 = MakeUInt128(0x000f000f000f000fULL, 0x000f000f000f0002ULL);
7757 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7758 ASSERT_EQ(res1, MakeUInt128(0x000044880000cd10ULL, 0x0000008800000088ULL));
7759 ASSERT_FALSE(IsQcBitSet(fpsr1));
7760
7761 __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400118000ULL);
7762 __uint128_t arg4 = MakeUInt128(0x1111111122222222ULL, 0x1122334411228000ULL);
7763 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7764 ASSERT_EQ(res2, MakeUInt128(0x7fffffff00000000ULL, 0xffde0000ffde0000ULL));
7765 ASSERT_TRUE(IsQcBitSet(fpsr2));
7766 }
7767
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem)7768 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem) {
7769 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.s[3]");
7770
7771 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7772 __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x00000002ffffffffULL);
7773 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7774 ASSERT_EQ(res1, MakeUInt128(0x0000000000440044ULL, 0x000000004488cd10ULL));
7775 ASSERT_FALSE(IsQcBitSet(fpsr1));
7776
7777 __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x1122334480000000ULL);
7778 __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x80000000ffffffffULL);
7779 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7780 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xeeddccbc00000000ULL));
7781 ASSERT_TRUE(IsQcBitSet(fpsr2));
7782 }
7783
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem)7784 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem) {
7785 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.h[7]");
7786
7787 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7788 __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x0002ffffffffffffULL);
7789 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7790 ASSERT_EQ(res1, MakeUInt128(0x0000004400000044ULL, 0x000044880000cd10ULL));
7791 ASSERT_FALSE(IsQcBitSet(fpsr1));
7792
7793 __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x112233448000ffffULL);
7794 __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x8000ffffffffffffULL);
7795 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7796 ASSERT_EQ(res2, MakeUInt128(0x7fffffff00010000ULL, 0xeede0000ccbc0000ULL));
7797 }
7798
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1)7799 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1) {
7800 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %s3");
7801 __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7802 __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7803 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7804 ASSERT_EQ(res1, MakeUInt128(0x0222244440000000ULL, 0x0000000000000000ULL));
7805 ASSERT_FALSE(IsQcBitSet(fpsr1));
7806
7807 __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
7808 __uint128_t arg4 = MakeUInt128(0xff11ff1180000000ULL, 0xffffffff11223344ULL);
7809 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7810 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7811 ASSERT_TRUE(IsQcBitSet(fpsr2));
7812 }
7813
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1)7814 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1) {
7815 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %h3");
7816 __uint128_t arg1 = MakeUInt128(0x1111111811112222ULL, 0xf000000700080006ULL);
7817 __uint128_t arg2 = MakeUInt128(0x0000000510004444ULL, 0xf000000300080002ULL);
7818 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7819 ASSERT_EQ(res1, MakeUInt128(0x0000000012343210ULL, 0x0000000000000000ULL));
7820 ASSERT_FALSE(IsQcBitSet(fpsr1));
7821
7822 __uint128_t arg3 = MakeUInt128(0xaabbccdd00008000ULL, 0x1122334400110011ULL);
7823 __uint128_t arg4 = MakeUInt128(0xff11ff1100008000ULL, 0xffffffff11223344ULL);
7824 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7825 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7826 ASSERT_TRUE(IsQcBitSet(fpsr2));
7827 }
7828
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1IndexedElem)7829 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1IndexedElem) {
7830 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %3.h[7]");
7831 __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7832 __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x1111000300000002ULL);
7833 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7834 ASSERT_EQ(res1, MakeUInt128(0x00000000048d0c84ULL, 0x0000000000000000ULL));
7835 ASSERT_FALSE(IsQcBitSet(fpsr1));
7836
7837 __uint128_t arg3 = MakeUInt128(0xaabbccddaabb8000ULL, 0x1122334400110011ULL);
7838 __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000aabb11223344ULL);
7839 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7840 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7841 ASSERT_TRUE(IsQcBitSet(fpsr2));
7842 }
7843
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1IndexedElem)7844 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1IndexedElem) {
7845 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %3.s[3]");
7846 __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7847 __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7848 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7849 ASSERT_EQ(res1, MakeUInt128(0x000000006666ccccULL, 0x0000000000000000ULL));
7850 ASSERT_FALSE(IsQcBitSet(fpsr1));
7851
7852 __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
7853 __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000000011223344ULL);
7854 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7855 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7856 ASSERT_TRUE(IsQcBitSet(fpsr2));
7857 }
7858
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x2)7859 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x2) {
7860 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.2s");
7861
7862 // No saturation.
7863 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7864 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7865 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7866 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7867 ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
7868 ASSERT_FALSE(IsQcBitSet(fpsr1));
7869
7870 // Saturates in the multiplication.
7871 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7872 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
7873 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7874 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7875 ASSERT_EQ(res2, MakeUInt128(0x0000080000000910ULL, 0x7fffffffffffffffULL));
7876 ASSERT_TRUE(IsQcBitSet(fpsr2));
7877
7878 // Saturates in the addition.
7879 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7880 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7881 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7882 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7883 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
7884 ASSERT_TRUE(IsQcBitSet(fpsr3));
7885 }
7886
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong16x4)7887 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong16x4) {
7888 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.4h");
7889
7890 // No saturation.
7891 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7892 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7893 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7894 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7895 ASSERT_EQ(res1, MakeUInt128(0x0100010001011100ULL, 0x03f0040004024600ULL));
7896 ASSERT_FALSE(IsQcBitSet(fpsr1));
7897
7898 // Saturates in the multiplication.
7899 __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7900 __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7901 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7902 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7903 ASSERT_EQ(res2, MakeUInt128(0x0369cba90369cba9ULL, 0x7fffffff0369cba9ULL));
7904 ASSERT_TRUE(IsQcBitSet(fpsr2));
7905
7906 // Saturates in the addition.
7907 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7908 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7909 __uint128_t arg9 = MakeUInt128(0x7fffffff12345678ULL, 0x00000a000000b000ULL);
7910 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7911 ASSERT_EQ(res3, MakeUInt128(0x7fffffff12356678ULL, 0x00000a0000013800ULL));
7912 ASSERT_TRUE(IsQcBitSet(fpsr3));
7913 }
7914
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x2)7915 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x2) {
7916 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.4s");
7917
7918 // No saturation.
7919 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7920 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7921 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7922 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7923 ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
7924 ASSERT_FALSE(IsQcBitSet(fpsr1));
7925
7926 // Saturates in the multiplication.
7927 __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
7928 __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
7929 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7930 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7931 ASSERT_EQ(res2, MakeUInt128(0x00024a0066000d00ULL, 0x7fffffffffffffffULL));
7932 ASSERT_TRUE(IsQcBitSet(fpsr2));
7933
7934 // Saturates in the addition.
7935 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7936 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7937 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffffffffffffULL);
7938 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7939 ASSERT_EQ(res3, MakeUInt128(0x13419a0a7d513f58ULL, 0x7fffffffffffffffULL));
7940 ASSERT_TRUE(IsQcBitSet(fpsr3));
7941 }
7942
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper16x4)7943 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper16x4) {
7944 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.8h");
7945
7946 // No saturation.
7947 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7948 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7949 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7950 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7951 ASSERT_EQ(res1, MakeUInt128(0x020d03f81c24e9e0ULL, 0x050d06f81f24ece0ULL));
7952 ASSERT_FALSE(IsQcBitSet(fpsr1));
7953
7954 // Saturates in the multiplication.
7955 __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7956 __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7957 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7958 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7959 ASSERT_EQ(res2, MakeUInt128(0x03b9fa8703b9fa87ULL, 0x7fffffff03b9fa87ULL));
7960 ASSERT_TRUE(IsQcBitSet(fpsr2));
7961
7962 // Saturates in the addition.
7963 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7964 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7965 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffff0000b000ULL);
7966 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7967 ASSERT_EQ(res3, MakeUInt128(0x134159702d593f58ULL, 0x7fffffff1b2598e0ULL));
7968 ASSERT_TRUE(IsQcBitSet(fpsr3));
7969 }
7970
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1)7971 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1) {
7972 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %s3");
7973
7974 // No saturation.
7975 __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
7976 __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
7977 __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
7978 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7979 ASSERT_EQ(res1, MakeUInt128(0x167ce349000000ffULL, 0x0000000000000000ULL));
7980 ASSERT_FALSE(IsQcBitSet(fpsr1));
7981
7982 // Saturates in the multiplication.
7983 __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
7984 __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
7985 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
7986 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7987 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7988 ASSERT_TRUE(IsQcBitSet(fpsr2));
7989
7990 // Saturates in the addition.
7991 __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
7992 __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
7993 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7994 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7995 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7996 ASSERT_TRUE(IsQcBitSet(fpsr3));
7997 }
7998
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1)7999 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1) {
8000 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %h3");
8001
8002 // No saturation.
8003 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8004 __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
8005 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8006 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8007 ASSERT_EQ(res1, MakeUInt128(0x0000000001011100ULL, 0x0000000000000000ULL));
8008 ASSERT_FALSE(IsQcBitSet(fpsr1));
8009
8010 // Saturates in the multiplication.
8011 __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
8012 __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
8013 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8014 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8015 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8016 ASSERT_TRUE(IsQcBitSet(fpsr2));
8017
8018 // Saturates in the addition.
8019 __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
8020 __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
8021 __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x00000a000000b000ULL);
8022 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8023 ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8024 ASSERT_TRUE(IsQcBitSet(fpsr3));
8025 }
8026
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem)8027 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem) {
8028 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.s[1]");
8029
8030 // No saturation.
8031 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8032 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8033 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8034 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8035 ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
8036 ASSERT_FALSE(IsQcBitSet(fpsr1));
8037
8038 // Saturates in the multiplication.
8039 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8040 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8041 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8042 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8043 ASSERT_EQ(res2, MakeUInt128(0x000007fc00000900ULL, 0x7fffffffffffffffULL));
8044 ASSERT_TRUE(IsQcBitSet(fpsr2));
8045
8046 // Saturates in the addition.
8047 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8048 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8049 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8050 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8051 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
8052 ASSERT_TRUE(IsQcBitSet(fpsr3));
8053 }
8054
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem)8055 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem) {
8056 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.h[7]");
8057
8058 // No saturation.
8059 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8060 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8061 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8062 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8063 ASSERT_EQ(res1, MakeUInt128(0x012eb10b89bbca1fULL, 0xfedf0524765b0d28ULL));
8064 ASSERT_FALSE(IsQcBitSet(fpsr1));
8065
8066 // Saturates in the multiplication.
8067 __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
8068 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8069 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8070 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8071 ASSERT_EQ(res2, MakeUInt128(0xbbbc4567777f4567ULL, 0x7fffffff00004567ULL));
8072 ASSERT_TRUE(IsQcBitSet(fpsr2));
8073
8074 // Saturates in the addition.
8075 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8076 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8077 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8078 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8079 ASSERT_EQ(res3, MakeUInt128(0x7fffffff004d4bffULL, 0x0026b00000275600ULL));
8080 ASSERT_TRUE(IsQcBitSet(fpsr3));
8081 }
8082
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem)8083 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem) {
8084 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.s[3]");
8085
8086 // No saturation.
8087 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8088 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8089 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8090 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8091 ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
8092 ASSERT_FALSE(IsQcBitSet(fpsr1));
8093
8094 // Saturates in the multiplication.
8095 __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
8096 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
8097 __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
8098 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8099 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xf1e0cfbf04040404ULL));
8100 ASSERT_TRUE(IsQcBitSet(fpsr2));
8101
8102 // Saturates in the addition.
8103 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8104 __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
8105 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8106 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8107 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x010d4d926b1d98e0ULL));
8108 ASSERT_TRUE(IsQcBitSet(fpsr3));
8109 }
8110
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem)8111 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem) {
8112 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.h[7]");
8113
8114 // No saturation.
8115 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8116 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8117 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8118 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8119 ASSERT_EQ(res1, MakeUInt128(0x0230485f8a1d9e4fULL, 0xffe9bd9076c60270ULL));
8120 ASSERT_FALSE(IsQcBitSet(fpsr1));
8121
8122 // Saturates in the multiplication.
8123 __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
8124 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8125 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8126 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8127 ASSERT_EQ(res2, MakeUInt128(0x023645677fffffffULL, 0x0236456702364567ULL));
8128 ASSERT_TRUE(IsQcBitSet(fpsr2));
8129
8130 // Saturates in the addition.
8131 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8132 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8133 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8134 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8135 ASSERT_EQ(res3, MakeUInt128(0x7fffffff0071d05fULL, 0x010d0cf800728060ULL));
8136 ASSERT_TRUE(IsQcBitSet(fpsr3));
8137 }
8138
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem)8139 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem) {
8140 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %3.s[3]");
8141
8142 // No saturation.
8143 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8144 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8145 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8146 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8147 ASSERT_EQ(res1, MakeUInt128(0x012eb3d4d07fc65fULL, 0x0000000000000000ULL));
8148 ASSERT_FALSE(IsQcBitSet(fpsr1));
8149
8150 // Saturates in the multiplication.
8151 __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
8152 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
8153 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8154 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8155 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8156 ASSERT_TRUE(IsQcBitSet(fpsr2));
8157
8158 // Saturates in the addition.
8159 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8160 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8161 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8162 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8163 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8164 ASSERT_TRUE(IsQcBitSet(fpsr3));
8165 }
8166
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem)8167 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem) {
8168 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %3.h[7]");
8169
8170 // No saturation.
8171 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8172 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8173 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8174 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8175 ASSERT_EQ(res1, MakeUInt128(0x0000000089bbca1fULL, 0x0000000000000000ULL));
8176 ASSERT_FALSE(IsQcBitSet(fpsr1));
8177
8178 // Saturates in the multiplication.
8179 __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
8180 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8181 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8182 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8183 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8184 ASSERT_TRUE(IsQcBitSet(fpsr2));
8185
8186 // Saturates in the addition.
8187 __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
8188 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
8189 __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x0011223344556677ULL);
8190 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8191 ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8192 ASSERT_TRUE(IsQcBitSet(fpsr3));
8193 }
8194
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x2)8195 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x2) {
8196 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.2s");
8197
8198 // No saturation.
8199 __uint128_t arg1 = MakeUInt128(0x0000000080000001ULL, 0x7654321076543210ULL);
8200 __uint128_t arg2 = MakeUInt128(0x0000000100000004ULL, 0x0123456701234567ULL);
8201 __uint128_t arg3 = MakeUInt128(0x0000100000000001ULL, 0x0400040004000400ULL);
8202 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8203 ASSERT_EQ(res1, MakeUInt128(0x00001003fffffff9ULL, 0x0400040004000400ULL));
8204 ASSERT_FALSE(IsQcBitSet(fpsr1));
8205
8206 // Saturates in the multiplication.
8207 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8208 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8209 __uint128_t arg6 = MakeUInt128(0x0000000000000900ULL, 0x00000a000000b000ULL);
8210 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8211 ASSERT_EQ(res2, MakeUInt128(0x00000000000008f0ULL, 0x80000a000000b001ULL));
8212 ASSERT_TRUE(IsQcBitSet(fpsr2));
8213
8214 // Saturates in the subtraction.
8215 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8216 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8217 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8218 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8219 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
8220 ASSERT_TRUE(IsQcBitSet(fpsr3));
8221 }
8222
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong16x4)8223 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong16x4) {
8224 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.4h");
8225
8226 // No saturation.
8227 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8228 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8229 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8230 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8231 ASSERT_EQ(res1, MakeUInt128(0x0100010000fef100ULL, 0x0410040003fdc200ULL));
8232 ASSERT_FALSE(IsQcBitSet(fpsr1));
8233
8234 // Saturates in the multiplication.
8235 __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8236 __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8237 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8238 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8239 ASSERT_EQ(res2, MakeUInt128(0xfedcbf25fedcbf25ULL, 0x81234568fedcbf25ULL));
8240 ASSERT_TRUE(IsQcBitSet(fpsr2));
8241
8242 // Saturates in the subtraction.
8243 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8244 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8245 __uint128_t arg9 = MakeUInt128(0x8000000012345678ULL, 0x00000a000000b000ULL);
8246 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8247 ASSERT_EQ(res3, MakeUInt128(0x8000000012334678ULL, 0x00000a0000002800ULL));
8248 ASSERT_TRUE(IsQcBitSet(fpsr3));
8249 }
8250
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x2)8251 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x2) {
8252 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.4s");
8253
8254 // No saturation.
8255 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8256 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8257 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8258 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8259 ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
8260 ASSERT_FALSE(IsQcBitSet(fpsr1));
8261
8262 // Saturates in the multiplication.
8263 __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
8264 __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
8265 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8266 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8267 ASSERT_EQ(res2, MakeUInt128(0xfffdc5ff9a000500ULL, 0x80000a000000b001ULL));
8268 ASSERT_TRUE(IsQcBitSet(fpsr2));
8269
8270 // Saturates in the subtraction.
8271 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8272 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8273 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x8000000000000000ULL);
8274 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8275 ASSERT_EQ(res3, MakeUInt128(0x112712e5a7176d98ULL, 0x8000000000000000ULL));
8276 ASSERT_TRUE(IsQcBitSet(fpsr3));
8277 }
8278
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper16x4)8279 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper16x4) {
8280 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.8h");
8281
8282 // No saturation.
8283 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8284 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8285 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8286 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8287 ASSERT_EQ(res1, MakeUInt128(0xfff2fe08e5db1820ULL, 0x02f30108e8db1b20ULL));
8288 ASSERT_FALSE(IsQcBitSet(fpsr1));
8289
8290 // Saturates in the multiplication.
8291 __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8292 __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8293 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8294 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8295 ASSERT_EQ(res2, MakeUInt128(0xfe8c9047fe8c9047ULL, 0x81234568fe8c9047ULL));
8296 ASSERT_TRUE(IsQcBitSet(fpsr2));
8297
8298 // Saturates in the subtraction.
8299 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8300 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8301 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x800000000000b000ULL);
8302 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8303 ASSERT_EQ(res3, MakeUInt128(0x11275380f70f6d98ULL, 0x80000000e4dbc720ULL));
8304 ASSERT_TRUE(IsQcBitSet(fpsr3));
8305 }
8306
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1)8307 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1) {
8308 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %s3");
8309
8310 // No saturation.
8311 __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
8312 __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
8313 __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
8314 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8315 ASSERT_EQ(res1, MakeUInt128(0x0debc9a7000000ffULL, 0x0000000000000000ULL));
8316 ASSERT_FALSE(IsQcBitSet(fpsr1));
8317
8318 // Saturates in the multiplication.
8319 __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
8320 __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
8321 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8322 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8323 ASSERT_EQ(res2, MakeUInt128(0x9122334411111112ULL, 0x0000000000000000ULL));
8324 ASSERT_TRUE(IsQcBitSet(fpsr2));
8325
8326 // Saturates in the subtraction.
8327 __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
8328 __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
8329 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8330 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8331 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
8332 ASSERT_TRUE(IsQcBitSet(fpsr3));
8333 }
8334
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1)8335 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1) {
8336 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %h3");
8337
8338 // No saturation.
8339 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8340 __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
8341 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8342 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8343 ASSERT_EQ(res1, MakeUInt128(0x0000000000fef100ULL, 0x0000000000000000ULL));
8344 ASSERT_FALSE(IsQcBitSet(fpsr1));
8345
8346 // Saturates in the multiplication.
8347 __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
8348 __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
8349 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8350 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8351 ASSERT_EQ(res2, MakeUInt128(0x0000000091111112ULL, 0x0000000000000000ULL));
8352 ASSERT_TRUE(IsQcBitSet(fpsr2));
8353
8354 // Saturates in the subtraction.
8355 __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
8356 __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
8357 __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x00000a000000b000ULL);
8358 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8359 ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL));
8360 ASSERT_TRUE(IsQcBitSet(fpsr3));
8361 }
8362
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem)8363 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem) {
8364 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.s[1]");
8365
8366 // No saturation.
8367 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8368 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8369 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8370 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8371 ASSERT_EQ(res1, MakeUInt128(0x010000fef0fef100ULL, 0x040003ff7bff7c00ULL));
8372 ASSERT_FALSE(IsQcBitSet(fpsr1));
8373
8374 // Saturates in the multiplication.
8375 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8376 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8377 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8378 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8379 ASSERT_EQ(res2, MakeUInt128(0x0000080400000900ULL, 0x80000a000000b001ULL));
8380 ASSERT_TRUE(IsQcBitSet(fpsr2));
8381
8382 // Saturates in the subtraction.
8383 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8384 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8385 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8386 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8387 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
8388 ASSERT_TRUE(IsQcBitSet(fpsr3));
8389 }
8390
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem)8391 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem) {
8392 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.h[7]");
8393
8394 // No saturation.
8395 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8396 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8397 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8398 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8399 ASSERT_EQ(res1, MakeUInt128(0x0117d9c3899bd1bfULL, 0xfeda700c764d56f8ULL));
8400 ASSERT_FALSE(IsQcBitSet(fpsr1));
8401
8402 // Saturates in the multiplication.
8403 __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
8404 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8405 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8406 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8407 ASSERT_EQ(res2, MakeUInt128(0x468a45678ac74567ULL, 0x8123456802464567ULL));
8408 ASSERT_TRUE(IsQcBitSet(fpsr2));
8409
8410 // Saturates in the subtraction.
8411 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8412 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8413 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8414 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8415 ASSERT_EQ(res3, MakeUInt128(0x80000000ffb2b400ULL, 0xffd96400ffda0a00ULL));
8416 ASSERT_TRUE(IsQcBitSet(fpsr3));
8417 }
8418
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem)8419 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem) {
8420 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.s[3]");
8421
8422 // No saturation.
8423 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8424 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8425 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8426 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8427 ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
8428 ASSERT_FALSE(IsQcBitSet(fpsr1));
8429
8430 // Saturates in the multiplication.
8431 __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
8432 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
8433 __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
8434 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8435 ASSERT_EQ(res2, MakeUInt128(0x8101010102020203ULL, 0x1425364704040404ULL));
8436 ASSERT_TRUE(IsQcBitSet(fpsr2));
8437
8438 // Saturates in the subtraction.
8439 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8440 __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
8441 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8442 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8443 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0xfef2c66d94e3c720ULL));
8444 ASSERT_TRUE(IsQcBitSet(fpsr3));
8445 }
8446
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem)8447 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem) {
8448 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.h[7]");
8449
8450 // No saturation.
8451 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8452 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8453 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8454 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8455 ASSERT_EQ(res1, MakeUInt128(0x0016426f8939fd8fULL, 0xfdcfb7a075e261b0ULL));
8456 ASSERT_FALSE(IsQcBitSet(fpsr1));
8457
8458 // Saturates in the multiplication.
8459 __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
8460 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8461 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8462 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8463 ASSERT_EQ(res2, MakeUInt128(0x0010456781234568ULL, 0x0010456700104567ULL));
8464 ASSERT_TRUE(IsQcBitSet(fpsr2));
8465
8466 // Saturates in the subtraction.
8467 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8468 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8469 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8470 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8471 ASSERT_EQ(res3, MakeUInt128(0x80000000ff8e2fa0ULL, 0xfef30708ff8edfa0ULL));
8472 ASSERT_TRUE(IsQcBitSet(fpsr3));
8473 }
8474
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem)8475 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem) {
8476 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %3.s[3]");
8477
8478 // No saturation.
8479 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8480 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8481 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8482 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8483 ASSERT_EQ(res1, MakeUInt128(0x0117d6fa42d7d57fULL, 0x0ULL));
8484 ASSERT_FALSE(IsQcBitSet(fpsr1));
8485
8486 // Saturates in the multiplication.
8487 __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
8488 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
8489 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8490 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8491 ASSERT_EQ(res2, MakeUInt128(0x8123456701234568ULL, 0x0ULL));
8492 ASSERT_TRUE(IsQcBitSet(fpsr2));
8493
8494 // Saturates in the subtraction.
8495 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8496 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8497 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8498 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8499 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0ULL));
8500 ASSERT_TRUE(IsQcBitSet(fpsr3));
8501 }
8502
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem)8503 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem) {
8504 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %3.h[7]");
8505
8506 // No saturation.
8507 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8508 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8509 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8510 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8511 ASSERT_EQ(res1, MakeUInt128(0x00000000899bd1bfULL, 0x0ULL));
8512 ASSERT_FALSE(IsQcBitSet(fpsr1));
8513
8514 // Saturates in the multiplication.
8515 __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
8516 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8517 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8518 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8519 ASSERT_EQ(res2, MakeUInt128(0x0000000081234568ULL, 0x0ULL));
8520 ASSERT_TRUE(IsQcBitSet(fpsr2));
8521
8522 // Saturates in the subtraction.
8523 __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
8524 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
8525 __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x0011223344556677ULL);
8526 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8527 ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0ULL));
8528 ASSERT_TRUE(IsQcBitSet(fpsr3));
8529 }
8530
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4)8531 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4) {
8532 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.4s");
8533
8534 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8535 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
8536 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8537 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed9UL, 0x0UL));
8538 ASSERT_FALSE(IsQcBitSet(fpsr1));
8539
8540 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8541 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
8542 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8543 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed2UL, 0x0UL));
8544 ASSERT_TRUE(IsQcBitSet(fpsr2));
8545 }
8546
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2)8547 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2) {
8548 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.2s");
8549
8550 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8551 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8552 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8553 ASSERT_EQ(res1, MakeU32x4(0x3, 0x0UL, 0x0UL, 0x0UL));
8554 ASSERT_FALSE(IsQcBitSet(fpsr1));
8555
8556 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8557 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8558 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8559 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8560 ASSERT_TRUE(IsQcBitSet(fpsr2));
8561 }
8562
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8)8563 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8) {
8564 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.8h");
8565
8566 __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
8567 __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
8568 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8569 ASSERT_EQ(res1, MakeUInt128(0x0002000040000000ULL, 0x7ddc000000000000ULL));
8570 ASSERT_FALSE(IsQcBitSet(fpsr1));
8571
8572 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
8573 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
8574 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8575 ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0002000000000000ULL));
8576 ASSERT_TRUE(IsQcBitSet(fpsr2));
8577 }
8578
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4)8579 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4) {
8580 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.4h");
8581
8582 __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
8583 __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
8584 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8585 ASSERT_EQ(res1, MakeUInt128(0x0003000040000000ULL, 0x0000000000000000ULL));
8586 ASSERT_FALSE(IsQcBitSet(fpsr1));
8587
8588 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
8589 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
8590 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8591 ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0000000000000000ULL));
8592 ASSERT_TRUE(IsQcBitSet(fpsr2));
8593 }
8594
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem)8595 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem) {
8596 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.s[0]");
8597
8598 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003, 0x00000010UL);
8599 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeed, 0xfeedfeedUL);
8600 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8601 // Without rounding, result should be 7 instead of 8.
8602 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x8UL, 0x0UL));
8603 ASSERT_FALSE(IsQcBitSet(fpsr1));
8604
8605 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8606 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8607 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8608 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8609 ASSERT_TRUE(IsQcBitSet(fpsr2));
8610 }
8611
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem)8612 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem) {
8613 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.s[0]");
8614
8615 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8616 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8617 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8618 ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8619 ASSERT_FALSE(IsQcBitSet(fpsr1));
8620
8621 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8622 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8623 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8624 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8625 ASSERT_TRUE(IsQcBitSet(fpsr2));
8626 }
8627
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem)8628 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem) {
8629 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.h[7]");
8630
8631 __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8632 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8633 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8634 ASSERT_EQ(res1, MakeUInt128(0x0008fff800040000ULL, 0x0000000800020004ULL));
8635 ASSERT_FALSE(IsQcBitSet(fpsr1));
8636
8637 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8638 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8639 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8640 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8641 ASSERT_TRUE(IsQcBitSet(fpsr2));
8642 }
8643
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem)8644 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem) {
8645 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.h[7]");
8646
8647 __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8648 __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8649 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8650 ASSERT_EQ(res1, MakeUInt128(0x0004fffc00030000ULL, 0x0000000000000000ULL));
8651 ASSERT_FALSE(IsQcBitSet(fpsr1));
8652
8653 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8654 __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8655 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8656 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
8657 ASSERT_TRUE(IsQcBitSet(fpsr2));
8658 }
8659
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1)8660 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1) {
8661 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %s3");
8662
8663 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8664 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8665 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8666 // Without roundings, result should be 2 instead of 3.
8667 ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8668 ASSERT_FALSE(IsQcBitSet(fpsr1));
8669
8670 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8671 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8672 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8673 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8674 ASSERT_TRUE(IsQcBitSet(fpsr2));
8675 }
8676
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1)8677 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1) {
8678 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %h3");
8679
8680 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8681 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
8682 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8683 ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8684 ASSERT_FALSE(IsQcBitSet(fpsr1));
8685
8686 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8687 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8688 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8689 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8690 ASSERT_TRUE(IsQcBitSet(fpsr2));
8691 }
8692
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem)8693 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem) {
8694 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %3.s[2]");
8695
8696 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8697 __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
8698 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8699 // Without rounding, result should be 2 instead of 3.
8700 ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8701 ASSERT_FALSE(IsQcBitSet(fpsr1));
8702
8703 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8704 __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
8705 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8706 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8707 ASSERT_TRUE(IsQcBitSet(fpsr2));
8708 }
8709
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem)8710 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem) {
8711 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %3.h[7]");
8712
8713 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8714 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
8715 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8716 // Without rounding, result should be 2 instead of 3.
8717 ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8718 ASSERT_FALSE(IsQcBitSet(fpsr1));
8719
8720 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8721 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8722 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8723 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8724 ASSERT_TRUE(IsQcBitSet(fpsr2));
8725 }
8726
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4)8727 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4) {
8728 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.4s");
8729
8730 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8731 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
8732 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8733 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed8UL, 0x0UL));
8734 ASSERT_FALSE(IsQcBitSet(fpsr1));
8735
8736 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8737 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
8738 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8739 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed1UL, 0x0UL));
8740 ASSERT_TRUE(IsQcBitSet(fpsr2));
8741 }
8742
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2)8743 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2) {
8744 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.2s");
8745
8746 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8747 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8748 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8749 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8750 ASSERT_FALSE(IsQcBitSet(fpsr1));
8751
8752 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8753 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8754 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8755 ASSERT_EQ(res2, MakeU32x4(0x7fffffff, 0x0UL, 0x0UL, 0x0UL));
8756 ASSERT_TRUE(IsQcBitSet(fpsr2));
8757 }
8758
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8)8759 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8) {
8760 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.8h");
8761
8762 __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
8763 __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
8764 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8765 ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x7ddc000000000000ULL));
8766 ASSERT_FALSE(IsQcBitSet(fpsr1));
8767
8768 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
8769 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
8770 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8771 ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0002000000000000ULL));
8772 ASSERT_TRUE(IsQcBitSet(fpsr2));
8773 }
8774
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4)8775 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4) {
8776 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.4h");
8777
8778 __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
8779 __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
8780 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8781 ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x0000000000000000ULL));
8782 ASSERT_FALSE(IsQcBitSet(fpsr1));
8783
8784 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
8785 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
8786 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8787 ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0000000000000000ULL));
8788 ASSERT_TRUE(IsQcBitSet(fpsr2));
8789 }
8790
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem)8791 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem) {
8792 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.s[0]");
8793
8794 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8795 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8796 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8797 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7UL, 0x0UL));
8798 ASSERT_FALSE(IsQcBitSet(fpsr1));
8799
8800 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8801 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8802 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8803 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8804 ASSERT_TRUE(IsQcBitSet(fpsr2));
8805 }
8806
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem)8807 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem) {
8808 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.s[0]");
8809
8810 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8811 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8812 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8813 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8814 ASSERT_FALSE(IsQcBitSet(fpsr1));
8815
8816 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8817 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8818 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8819 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8820 ASSERT_TRUE(IsQcBitSet(fpsr2));
8821 }
8822
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem)8823 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem) {
8824 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.h[7]");
8825
8826 __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8827 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8828 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8829 ASSERT_EQ(res1, MakeUInt128(0x0007fff800040000ULL, 0xffff000700020004ULL));
8830 ASSERT_FALSE(IsQcBitSet(fpsr1));
8831
8832 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8833 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8834 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8835 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8836 ASSERT_TRUE(IsQcBitSet(fpsr2));
8837 }
8838
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem)8839 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem) {
8840 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.h[7]");
8841
8842 __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8843 __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8844 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8845 ASSERT_EQ(res1, MakeUInt128(0x0003fffc00020000ULL, 0x0000000000000000ULL));
8846 ASSERT_FALSE(IsQcBitSet(fpsr1));
8847
8848 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8849 __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8850 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8851 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
8852 ASSERT_TRUE(IsQcBitSet(fpsr2));
8853 }
8854
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1)8855 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1) {
8856 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %s3");
8857
8858 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8859 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8860 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8861 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0ULL));
8862 ASSERT_FALSE(IsQcBitSet(fpsr1));
8863
8864 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8865 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8866 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8867 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8868 ASSERT_TRUE(IsQcBitSet(fpsr2));
8869 }
8870
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1)8871 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1) {
8872 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %h3");
8873
8874 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8875 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
8876 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8877 ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
8878 ASSERT_FALSE(IsQcBitSet(fpsr1));
8879
8880 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8881 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8882 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8883 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8884 ASSERT_TRUE(IsQcBitSet(fpsr2));
8885 }
8886
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem)8887 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem) {
8888 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %3.s[2]");
8889
8890 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8891 __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
8892 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8893 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8894 ASSERT_FALSE(IsQcBitSet(fpsr1));
8895
8896 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8897 __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
8898 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8899 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8900 ASSERT_TRUE(IsQcBitSet(fpsr2));
8901 }
8902
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem)8903 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem) {
8904 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %3.h[7]");
8905
8906 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8907 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
8908 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8909 ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
8910 ASSERT_FALSE(IsQcBitSet(fpsr1));
8911
8912 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8913 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8914 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8915 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8916 ASSERT_TRUE(IsQcBitSet(fpsr2));
8917 }
8918
8919 class FpcrBitSupport : public testing::TestWithParam<uint64_t> {};
8920
TEST_P(FpcrBitSupport,SupportsBit)8921 TEST_P(FpcrBitSupport, SupportsBit) {
8922 uint64_t fpcr1;
8923 asm("msr fpcr, %x1\n\t"
8924 "mrs %x0, fpcr"
8925 : "=r"(fpcr1)
8926 : "r"(static_cast<uint64_t>(GetParam())));
8927 ASSERT_EQ(fpcr1, GetParam()) << "Should be able to set then get FPCR bit: " << GetParam();
8928 };
8929
8930 // Note: The exception enablement flags (such as IOE) are not checked, because when tested on actual
8931 // ARM64 device we find that the tests fail either because they cannot be written or are RAZ (read
8932 // as zero).
8933 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
8934 FpcrBitSupport,
8935 testing::Values(kFpcrRModeTieEven,
8936 kFpcrRModeZero,
8937 kFpcrRModeNegInf,
8938 kFpcrRModePosInf,
8939 kFpcrFzBit,
8940 kFpcrDnBit,
8941 0));
8942
8943 class FpsrBitSupport : public testing::TestWithParam<uint64_t> {};
8944
TEST_P(FpsrBitSupport,SupportsBit)8945 TEST_P(FpsrBitSupport, SupportsBit) {
8946 uint64_t fpsr1;
8947 asm("msr fpsr, %1\n\t"
8948 "mrs %0, fpsr"
8949 : "=r"(fpsr1)
8950 : "r"(static_cast<uint64_t>(GetParam())));
8951 ASSERT_EQ(fpsr1, GetParam()) << "Should be able to set then get FPSR bit";
8952 };
8953
8954 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
8955 FpsrBitSupport,
8956 testing::Values(kFpsrIocBit,
8957 kFpsrDzcBit,
8958 kFpsrOfcBit,
8959 kFpsrUfcBit,
8960 kFpsrIxcBit,
8961 kFpsrIdcBit,
8962 kFpsrQcBit));
8963
TEST(Arm64InsnTest,UnsignedDivide64)8964 TEST(Arm64InsnTest, UnsignedDivide64) {
8965 auto udiv64 = [](uint64_t num, uint64_t den) {
8966 uint64_t result;
8967 asm("udiv %0, %1, %2" : "=r"(result) : "r"(num), "r"(den));
8968 return result;
8969 };
8970 ASSERT_EQ(udiv64(0x8'0000'0000ULL, 2ULL), 0x4'0000'0000ULL) << "Division should be 64-bit.";
8971 ASSERT_EQ(udiv64(123ULL, 0ULL), 0ULL) << "Div by 0 should result in 0.";
8972 }
8973
TEST(Arm64InsnTest,SignedDivide64)8974 TEST(Arm64InsnTest, SignedDivide64) {
8975 auto div64 = [](int64_t num, int64_t den) {
8976 int64_t result;
8977 asm("sdiv %0, %1, %2" : "=r"(result) : "r"(num), "r"(den));
8978 return result;
8979 };
8980 ASSERT_EQ(div64(67802402LL, -1LL), -67802402LL)
8981 << "Division by -1 should flip sign if dividend is not numeric_limits::min.";
8982 ASSERT_EQ(div64(-531675317891LL, -1LL), 531675317891LL)
8983 << "Division by -1 should flip sign if dividend is not numeric_limits::min.";
8984 ASSERT_EQ(div64(std::numeric_limits<int64_t>::min(), -1LL), std::numeric_limits<int64_t>::min())
8985 << "Div of numeric_limits::min by -1 should result in numeric_limits::min.";
8986 }
8987
TEST(Arm64InsnTest,AesEncode)8988 TEST(Arm64InsnTest, AesEncode) {
8989 __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
8990 __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
8991 __uint128_t res;
8992 asm("aese %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
8993 ASSERT_EQ(res, MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL));
8994 }
8995
TEST(Arm64InsnTest,AesMixColumns)8996 TEST(Arm64InsnTest, AesMixColumns) {
8997 __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
8998 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesmc %0.16b, %1.16b")(arg);
8999 ASSERT_EQ(res, MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL));
9000 }
9001
TEST(Arm64InsnTest,AesDecode)9002 TEST(Arm64InsnTest, AesDecode) {
9003 // Check that it's opposite to AesEncode with extra XORs.
9004 __uint128_t arg = MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL);
9005 __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
9006 arg ^= key;
9007 __uint128_t res;
9008 asm("aesd %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
9009 ASSERT_EQ(res ^ key, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
9010 }
9011
TEST(Arm64InsnTest,AesInverseMixColumns)9012 TEST(Arm64InsnTest, AesInverseMixColumns) {
9013 __uint128_t arg = MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL);
9014 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesimc %0.16b, %1.16b")(arg);
9015 ASSERT_EQ(res, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
9016 }
9017
9018 } // namespace
9019