1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "gtest/gtest.h"
18 
19 #include <cstdint>
20 #include <initializer_list>
21 #include <limits>
22 
23 #include "utility.h"
24 
25 namespace {
26 
TEST(Arm64InsnTest,UnsignedBitfieldMoveNoShift)27 TEST(Arm64InsnTest, UnsignedBitfieldMoveNoShift) {
28   uint64_t arg = 0x3952247371907021ULL;
29   uint64_t res;
30 
31   asm("ubfm %0, %1, #0, #63" : "=r"(res) : "r"(arg));
32 
33   ASSERT_EQ(res, 0x3952247371907021ULL);
34 }
35 
TEST(Arm64InsnTest,BitfieldLeftInsertion)36 TEST(Arm64InsnTest, BitfieldLeftInsertion) {
37   uint64_t arg = 0x389522868478abcdULL;
38   uint64_t res = 0x1101044682325271ULL;
39 
40   asm("bfm %0, %1, #40, #15" : "=r"(res) : "r"(arg), "0"(res));
41 
42   ASSERT_EQ(res, 0x110104abcd325271ULL);
43 }
44 
TEST(Arm64InsnTest,BitfieldRightInsertion)45 TEST(Arm64InsnTest, BitfieldRightInsertion) {
46   uint64_t arg = 0x3276561809377344ULL;
47   uint64_t res = 0x1668039626579787ULL;
48 
49   asm("bfm %0, %1, #4, #39" : "=r"(res) : "r"(arg), "0"(res));
50 
51   ASSERT_EQ(res, 0x1668039180937734ULL);
52 }
53 
TEST(Arm64InsnTest,MoveImmToFp32)54 TEST(Arm64InsnTest, MoveImmToFp32) {
55   // The tests below verify that fmov works with various immediates.
56   // Specifically, the instruction has an 8-bit immediate field consisting of
57   // the following four subfields:
58   //
59   // - sign (one bit)
60   // - upper exponent (one bit)
61   // - lower exponent (two bits)
62   // - mantisa (four bits)
63   //
64   // For example, we decompose imm8 = 0b01001111 into:
65   //
66   // - sign = 0 (positive)
67   // - upper exponent = 1
68   // - lower exponent = 00
69   // - mantisa = 1111
70   //
71   // This immediate corresponds to 32-bit floating point value:
72   //
73   // 0 011111 00 1111 0000000000000000000
74   // | |      |  |    |
75   // | |      |  |    +- 19 zeros
76   // | |      |  +------ mantisa
77   // | |      +--------- lower exponent
78   // | +---------------- upper exponent (custom extended to 6 bits)
79   // +------------------ sign
80   //
81   // Thus we have:
82   //
83   //   1.11110000... * 2^(124-127) = 0.2421875
84   //
85   // where 1.11110000... is in binary.
86   //
87   // See VFPExpandImm in the ARM Architecture Manual for details.
88   //
89   // We enumerate all possible 8-bit immediate encodings of the form:
90   //
91   //   {0,1}{0,1}{00,11}{0000,1111}
92   //
93   // to verify that the decoder correctly splits the immediate into the
94   // subfields and reconstructs the intended floating-point value.
95 
96   // imm8 = 0b00000000
97   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.0e+00")();
98   ASSERT_EQ(res1, MakeUInt128(0x40000000U, 0U));
99 
100   // imm8 = 0b00001111
101   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.8750e+00")();
102   ASSERT_EQ(res2, MakeUInt128(0x40780000U, 0U));
103 
104   // imm8 = 0b00110000
105   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.60e+01")();
106   ASSERT_EQ(res3, MakeUInt128(0x41800000U, 0U));
107 
108   // imm8 = 0b00111111
109   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.10e+01")();
110   ASSERT_EQ(res4, MakeUInt128(0x41f80000U, 0U));
111 
112   // imm8 = 0b01000000
113   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.250e-01")();
114   ASSERT_EQ(res5, MakeUInt128(0x3e000000U, 0U));
115 
116   // imm8 = 0b01001111
117   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.4218750e-01")();
118   ASSERT_EQ(res6, MakeUInt128(0x3e780000U, 0U));
119 
120   // imm8 = 0b01110000
121   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.0e+00")();
122   ASSERT_EQ(res7, MakeUInt128(0x3f800000U, 0U));
123 
124   // imm8 = 0b01111111
125   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.93750e+00")();
126   ASSERT_EQ(res8, MakeUInt128(0x3ff80000U, 0U));
127 
128   // imm8 = 0b10000000
129   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.0e+00")();
130   ASSERT_EQ(res9, MakeUInt128(0xc0000000U, 0U));
131 
132   // imm8 = 0b10001111
133   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.8750e+00")();
134   ASSERT_EQ(res10, MakeUInt128(0xc0780000U, 0U));
135 
136   // imm8 = 0b10110000
137   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.60e+01")();
138   ASSERT_EQ(res11, MakeUInt128(0xc1800000U, 0U));
139 
140   // imm8 = 0b10111111
141   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.10e+01")();
142   ASSERT_EQ(res12, MakeUInt128(0xc1f80000U, 0U));
143 
144   // imm8 = 0b11000000
145   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.250e-01")();
146   ASSERT_EQ(res13, MakeUInt128(0xbe000000U, 0U));
147 
148   // imm8 = 0b11001111
149   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.4218750e-01")();
150   ASSERT_EQ(res14, MakeUInt128(0xbe780000U, 0U));
151 
152   // imm8 = 0b11110000
153   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.0e+00")();
154   ASSERT_EQ(res15, MakeUInt128(0xbf800000U, 0U));
155 
156   // imm8 = 0b11111111
157   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.93750e+00")();
158   ASSERT_EQ(res16, MakeUInt128(0xbff80000U, 0U));
159 }
160 
TEST(Arm64InsnTest,MoveImmToFp64)161 TEST(Arm64InsnTest, MoveImmToFp64) {
162   // The tests below verify that fmov works with various immediates.
163   // Specifically, the instruction has an 8-bit immediate field consisting of
164   // the following four subfields:
165   //
166   // - sign (one bit)
167   // - upper exponent (one bit)
168   // - lower exponent (two bits)
169   // - mantisa (four bits)
170   //
171   // For example, we decompose imm8 = 0b01001111 into:
172   //
173   // - sign = 0 (positive)
174   // - upper exponent = 1
175   // - lower exponent = 00
176   // - mantisa = 1111
177   //
178   // This immediate corresponds to 64-bit floating point value:
179   //
180   // 0 011111111 00 1111 000000000000000000000000000000000000000000000000
181   // | |         |  |    |
182   // | |         |  |    +- 48 zeros
183   // | |         |  +------ mantisa
184   // | |         +--------- lower exponent
185   // | +------------------- upper exponent (custom extended to 9 bits)
186   // +--------------------- sign
187   //
188   // Thus we have:
189   //
190   //   1.11110000... * 2^(1020-1023) = 0.2421875
191   //
192   // where 1.11110000... is in binary.
193   //
194   // See VFPExpandImm in the ARM Architecture Manual for details.
195   //
196   // We enumerate all possible 8-bit immediate encodings of the form:
197   //
198   //   {0,1}{0,1}{00,11}{0000,1111}
199   //
200   // to verify that the decoder correctly splits the immediate into the
201   // subfields and reconstructs the intended floating-point value.
202 
203   // imm8 = 0b00000000
204   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.0e+00")();
205   ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0U));
206 
207   // imm8 = 0b00001111
208   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.8750e+00")();
209   ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0U));
210 
211   // imm8 = 0b00110000
212   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.60e+01")();
213   ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0U));
214 
215   // imm8 = 0b00111111
216   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.10e+01")();
217   ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0U));
218 
219   // imm8 = 0b01000000
220   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.250e-01")();
221   ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0U));
222 
223   // imm8 = 0b01001111
224   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.4218750e-01")();
225   ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0U));
226 
227   // imm8 = 0b01110000
228   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.0e+00")();
229   ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0U));
230 
231   // imm8 = 0b01111111
232   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.93750e+00")();
233   ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0U));
234 
235   // imm8 = 0b10000000
236   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.0e+00")();
237   ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0U));
238 
239   // imm8 = 0b10001111
240   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.8750e+00")();
241   ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0U));
242 
243   // imm8 = 0b10110000
244   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.60e+01")();
245   ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0U));
246 
247   // imm8 = 0b10111111
248   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.10e+01")();
249   ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0U));
250 
251   // imm8 = 0b11000000
252   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.250e-01")();
253   ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0U));
254 
255   // imm8 = 0b11001111
256   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.4218750e-01")();
257   ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0U));
258 
259   // imm8 = 0b11110000
260   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.0e+00")();
261   ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0U));
262 
263   // imm8 = 0b11111111
264   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.93750e+00")();
265   ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0U));
266 }
267 
TEST(Arm64InsnTest,MoveImmToF32x4)268 TEST(Arm64InsnTest, MoveImmToF32x4) {
269   // The tests below verify that fmov works with various immediates.
270   // Specifically, the instruction has an 8-bit immediate field consisting of
271   // the following four subfields:
272   //
273   // - sign (one bit)
274   // - upper exponent (one bit)
275   // - lower exponent (two bits)
276   // - mantisa (four bits)
277   //
278   // We enumerate all possible 8-bit immediate encodings of the form:
279   //
280   //   {0,1}{0,1}{00,11}{0000,1111}
281   //
282   // to verify that the decoder correctly splits the immediate into the
283   // subfields and reconstructs the intended floating-point value.
284 
285   // imm8 = 0b00000000
286   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.0e+00")();
287   ASSERT_EQ(res1, MakeUInt128(0x4000000040000000ULL, 0x4000000040000000ULL));
288 
289   // imm8 = 0b00001111
290   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.8750e+00")();
291   ASSERT_EQ(res2, MakeUInt128(0x4078000040780000ULL, 0x4078000040780000ULL));
292 
293   // imm8 = 0b00110000
294   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.60e+01")();
295   ASSERT_EQ(res3, MakeUInt128(0x4180000041800000ULL, 0x4180000041800000ULL));
296 
297   // imm8 = 0b00111111
298   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.10e+01")();
299   ASSERT_EQ(res4, MakeUInt128(0x41f8000041f80000ULL, 0x41f8000041f80000ULL));
300 
301   // imm8 = 0b01000000
302   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.250e-01")();
303   ASSERT_EQ(res5, MakeUInt128(0x3e0000003e000000ULL, 0x3e0000003e000000ULL));
304 
305   // imm8 = 0b01001111
306   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.4218750e-01")();
307   ASSERT_EQ(res6, MakeUInt128(0x3e7800003e780000ULL, 0x3e7800003e780000ULL));
308 
309   // imm8 = 0b01110000
310   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.0e+00")();
311   ASSERT_EQ(res7, MakeUInt128(0x3f8000003f800000ULL, 0x3f8000003f800000ULL));
312 
313   // imm8 = 0b01111111
314   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.93750e+00")();
315   ASSERT_EQ(res8, MakeUInt128(0x3ff800003ff80000ULL, 0x3ff800003ff80000ULL));
316 
317   // imm8 = 0b10000000
318   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.0e+00")();
319   ASSERT_EQ(res9, MakeUInt128(0xc0000000c0000000ULL, 0xc0000000c0000000ULL));
320 
321   // imm8 = 0b10001111
322   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.8750e+00")();
323   ASSERT_EQ(res10, MakeUInt128(0xc0780000c0780000ULL, 0xc0780000c0780000ULL));
324 
325   // imm8 = 0b10110000
326   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.60e+01")();
327   ASSERT_EQ(res11, MakeUInt128(0xc1800000c1800000ULL, 0xc1800000c1800000ULL));
328 
329   // imm8 = 0b10111111
330   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.10e+01")();
331   ASSERT_EQ(res12, MakeUInt128(0xc1f80000c1f80000ULL, 0xc1f80000c1f80000ULL));
332 
333   // imm8 = 0b11000000
334   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.250e-01")();
335   ASSERT_EQ(res13, MakeUInt128(0xbe000000be000000ULL, 0xbe000000be000000ULL));
336 
337   // imm8 = 0b11001111
338   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.4218750e-01")();
339   ASSERT_EQ(res14, MakeUInt128(0xbe780000be780000ULL, 0xbe780000be780000ULL));
340 
341   // imm8 = 0b11110000
342   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.0e+00")();
343   ASSERT_EQ(res15, MakeUInt128(0xbf800000bf800000ULL, 0xbf800000bf800000ULL));
344 
345   // imm8 = 0b11111111
346   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.93750e+00")();
347   ASSERT_EQ(res16, MakeUInt128(0xbff80000bff80000ULL, 0xbff80000bff80000ULL));
348 }
349 
TEST(Arm64InsnTest,MoveImmToF64x2)350 TEST(Arm64InsnTest, MoveImmToF64x2) {
351   // The tests below verify that fmov works with various immediates.
352   // Specifically, the instruction has an 8-bit immediate field consisting of
353   // the following four subfields:
354   //
355   // - sign (one bit)
356   // - upper exponent (one bit)
357   // - lower exponent (two bits)
358   // - mantisa (four bits)
359   //
360   // We enumerate all possible 8-bit immediate encodings of the form:
361   //
362   //   {0,1}{0,1}{00,11}{0000,1111}
363   //
364   // to verify that the decoder correctly splits the immediate into the
365   // subfields and reconstructs the intended floating-point value.
366 
367   // imm8 = 0b00000000
368   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.0e+00")();
369   ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0x4000000000000000ULL));
370 
371   // imm8 = 0b00001111
372   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.8750e+00")();
373   ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0x400f000000000000ULL));
374 
375   // imm8 = 0b00110000
376   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.60e+01")();
377   ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0x4030000000000000ULL));
378 
379   // imm8 = 0b00111111
380   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.10e+01")();
381   ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0x403f000000000000ULL));
382 
383   // imm8 = 0b01000000
384   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.250e-01")();
385   ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0x3fc0000000000000ULL));
386 
387   // imm8 = 0b01001111
388   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.4218750e-01")();
389   ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0x3fcf000000000000ULL));
390 
391   // imm8 = 0b01110000
392   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.0e+00")();
393   ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0x3ff0000000000000ULL));
394 
395   // imm8 = 0b01111111
396   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.93750e+00")();
397   ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0x3fff000000000000ULL));
398 
399   // imm8 = 0b10000000
400   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.0e+00")();
401   ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0xc000000000000000ULL));
402 
403   // imm8 = 0b10001111
404   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.8750e+00")();
405   ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0xc00f000000000000ULL));
406 
407   // imm8 = 0b10110000
408   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.60e+01")();
409   ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0xc030000000000000ULL));
410 
411   // imm8 = 0b10111111
412   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.10e+01")();
413   ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0xc03f000000000000ULL));
414 
415   // imm8 = 0b11000000
416   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.250e-01")();
417   ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0xbfc0000000000000ULL));
418 
419   // imm8 = 0b11001111
420   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.4218750e-01")();
421   ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0xbfcf000000000000ULL));
422 
423   // imm8 = 0b11110000
424   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.0e+00")();
425   ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0xbff0000000000000ULL));
426 
427   // imm8 = 0b11111111
428   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.93750e+00")();
429   ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0xbfff000000000000ULL));
430 }
431 
TEST(Arm64InsnTest,MoveFpRegToReg)432 TEST(Arm64InsnTest, MoveFpRegToReg) {
433   __uint128_t arg = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
434   uint64_t res = 0xffffeeeeddddccccULL;
435 
436   // Move from high double.
437   asm("fmov %0, %1.d[1]" : "=r"(res) : "w"(arg));
438   ASSERT_EQ(res, 0x3333cccc4444ddddULL);
439 
440   // Move from low double.
441   asm("fmov %0, %d1" : "=r"(res) : "w"(arg));
442   ASSERT_EQ(res, 0x1111aaaa2222bbbbULL);
443 
444   // Move from single.
445   asm("fmov %w0, %s1" : "=r"(res) : "w"(arg));
446   ASSERT_EQ(res, 0x2222bbbbULL);
447 }
448 
TEST(Arm64InsnTest,MoveRegToFpReg)449 TEST(Arm64InsnTest, MoveRegToFpReg) {
450   uint64_t arg = 0xffffeeeeddddccccULL;
451   __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
452 
453   // Move to high double.
454   asm("fmov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
455   ASSERT_EQ(res, MakeUInt128(0x1111aaaa2222bbbbULL, 0xffffeeeeddddccccULL));
456 
457   // Move to low double.
458   asm("fmov %d0, %1" : "=w"(res) : "r"(arg));
459   ASSERT_EQ(res, MakeUInt128(0xffffeeeeddddccccULL, 0x0));
460 
461   // Move to single.
462   asm("fmov %s0, %w1" : "=w"(res) : "r"(arg));
463   ASSERT_EQ(res, MakeUInt128(0xddddccccULL, 0x0));
464 }
465 
TEST(Arm64InsnTest,MoveFpRegToFpReg)466 TEST(Arm64InsnTest, MoveFpRegToFpReg) {
467   __uint128_t res;
468 
469   __uint128_t fp64_arg =
470       MakeUInt128(0x402e9eb851eb851fULL, 0xdeadbeefaabbccddULL);  // 15.31 in double
471   asm("fmov %d0, %d1" : "=w"(res) : "w"(fp64_arg));
472   ASSERT_EQ(res, MakeUInt128(0x402e9eb851eb851fULL, 0ULL));
473 
474   __uint128_t fp32_arg =
475       MakeUInt128(0xaabbccdd40e51eb8ULL, 0x0011223344556677ULL);  // 7.16 in float
476   asm("fmov %s0, %s1" : "=w"(res) : "w"(fp32_arg));
477   ASSERT_EQ(res, MakeUInt128(0x40e51eb8ULL, 0ULL));
478 }
479 
TEST(Arm64InsnTest,InsertRegPartIntoSimd128)480 TEST(Arm64InsnTest, InsertRegPartIntoSimd128) {
481   uint64_t arg = 0xffffeeeeddddccccULL;
482   __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
483 
484   // Byte.
485   asm("mov %0.b[3], %w1" : "=w"(res) : "r"(arg), "0"(res));
486   ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0x3333cccc4444ddddULL));
487 
488   // Double word.
489   asm("mov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
490   ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0xffffeeeeddddccccULL));
491 }
492 
TEST(Arm64InsnTest,DuplicateRegIntoSimd128)493 TEST(Arm64InsnTest, DuplicateRegIntoSimd128) {
494   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("dup %0.16b, %w1")(0xabU);
495   ASSERT_EQ(res, MakeUInt128(0xababababababababULL, 0xababababababababULL));
496 }
497 
TEST(Arm64InsnTest,MoveSimd128ElemToRegSigned)498 TEST(Arm64InsnTest, MoveSimd128ElemToRegSigned) {
499   uint64_t res = 0;
500   __uint128_t arg = MakeUInt128(0x9796959493929190ULL, 0x9f9e9d9c9b9a99ULL);
501 
502   // Single word.
503   asm("smov %0, %1.s[0]" : "=r"(res) : "w"(arg));
504   ASSERT_EQ(res, 0xffffffff93929190ULL);
505 
506   asm("smov %0, %1.s[2]" : "=r"(res) : "w"(arg));
507   ASSERT_EQ(res, 0xffffffff9c9b9a99ULL);
508 
509   // Half word.
510   asm("smov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
511   ASSERT_EQ(res, 0x00000000ffff9190ULL);
512 
513   asm("smov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
514   ASSERT_EQ(res, 0x00000000ffff9594ULL);
515 
516   // Byte.
517   asm("smov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
518   ASSERT_EQ(res, 0x00000000ffffff90ULL);
519 
520   asm("smov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
521   ASSERT_EQ(res, 0x00000000ffffff92ULL);
522 }
523 
TEST(Arm64InsnTest,MoveSimd128ElemToRegUnsigned)524 TEST(Arm64InsnTest, MoveSimd128ElemToRegUnsigned) {
525   uint64_t res = 0;
526   __uint128_t arg = MakeUInt128(0xaaaabbbbcccceeeeULL, 0xffff000011112222ULL);
527 
528   // Double word.
529   asm("umov %0, %1.d[0]" : "=r"(res) : "w"(arg));
530   ASSERT_EQ(res, 0xaaaabbbbcccceeeeULL);
531 
532   asm("umov %0, %1.d[1]" : "=r"(res) : "w"(arg));
533   ASSERT_EQ(res, 0xffff000011112222ULL);
534 
535   // Single word.
536   asm("umov %w0, %1.s[0]" : "=r"(res) : "w"(arg));
537   ASSERT_EQ(res, 0xcccceeeeULL);
538 
539   asm("umov %w0, %1.s[2]" : "=r"(res) : "w"(arg));
540   ASSERT_EQ(res, 0x11112222ULL);
541 
542   // Half word.
543   asm("umov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
544   ASSERT_EQ(res, 0xeeeeULL);
545 
546   asm("umov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
547   ASSERT_EQ(res, 0xbbbbULL);
548 
549   // Byte.
550   asm("umov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
551   ASSERT_EQ(res, 0xeeULL);
552 
553   asm("umov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
554   ASSERT_EQ(res, 0xccULL);
555 }
556 
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4)557 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4) {
558   __uint128_t arg1 = MakeUInt128(0x9463229563989898ULL, 0x9358211674562701ULL);
559   __uint128_t arg2 = MakeUInt128(0x0218356462201349ULL, 0x6715188190973038ULL);
560   __uint128_t arg3 = MakeUInt128(0x1198004973407239ULL, 0x6103685406643193ULL);
561   __uint128_t res =
562       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
563   ASSERT_EQ(res, MakeUInt128(0x37c4a3494b9db539ULL, 0x37c3dab413a58e33ULL));
564 }
565 
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper)566 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper) {
567   __uint128_t arg1 = MakeUInt128(0x9478221818528624ULL, 0x0851400666044332ULL);
568   __uint128_t arg2 = MakeUInt128(0x5888569867054315ULL, 0x4706965747458550ULL);
569   __uint128_t arg3 = MakeUInt128(0x3323233421073015ULL, 0x4594051655379068ULL);
570   __uint128_t res =
571       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
572   ASSERT_EQ(res, MakeUInt128(0x5c30bd483c119e0fULL, 0x48ecc5ab6efb3a86ULL));
573 }
574 
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper2)575 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper2) {
576   __uint128_t arg1 = MakeUInt128(0x9968262824727064ULL, 0x1336222178923903ULL);
577   __uint128_t arg2 = MakeUInt128(0x1760854289437339ULL, 0x3561889165125042ULL);
578   __uint128_t arg3 = MakeUInt128(0x4404008952719837ULL, 0x8738648058472689ULL);
579   __uint128_t res =
580       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[7]")(arg1, arg2, arg3);
581   ASSERT_EQ(res, MakeUInt128(0x5d27e9db5e54d15aULL, 0x8b39d9f65f64ea0aULL));
582 }
583 
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4)584 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4) {
585   __uint128_t arg1 = MakeUInt128(0x9143447886360410ULL, 0x3182350736502778ULL);
586   __uint128_t arg2 = MakeUInt128(0x5908975782727313ULL, 0x0504889398900992ULL);
587   __uint128_t arg3 = MakeUInt128(0x3913503373250855ULL, 0x9826558670892426ULL);
588   __uint128_t res =
589       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
590   ASSERT_EQ(res, MakeUInt128(0xfd58202775231935ULL, 0x61d69fb0921db6b6ULL));
591 }
592 
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4Upper)593 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4Upper) {
594   __uint128_t arg1 = MakeUInt128(0x9320199199688285ULL, 0x1718395366913452ULL);
595   __uint128_t arg2 = MakeUInt128(0x2244470804592396ULL, 0x6028171565515656ULL);
596   __uint128_t arg3 = MakeUInt128(0x6611135982311225ULL, 0x0628905854914509ULL);
597   __uint128_t res =
598       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
599   ASSERT_EQ(res, MakeUInt128(0x645326f0814d99a3ULL, 0x05c4290053980b2eULL));
600 }
601 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4)602 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4) {
603   __uint128_t arg1 = MakeUInt128(0x9027601834840306ULL, 0x8113818551059797ULL);
604   __uint128_t arg2 = MakeUInt128(0x0566400750942608ULL, 0x7885735796037324ULL);
605   __uint128_t arg3 = MakeUInt128(0x5141467867036880ULL, 0x9880609716425849ULL);
606   __uint128_t res =
607       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
608   ASSERT_EQ(res, MakeUInt128(0x61c8e2c867f707f8ULL, 0xc5dfe72334816629ULL));
609 }
610 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4Upper)611 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4Upper) {
612   __uint128_t arg1 = MakeUInt128(0x9454236828860613ULL, 0x4084148637767009ULL);
613   __uint128_t arg2 = MakeUInt128(0x6120715124914043ULL, 0x0272538607648236ULL);
614   __uint128_t arg3 = MakeUInt128(0x3414334623518975ULL, 0x7664521641376796ULL);
615   __uint128_t res =
616       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
617   ASSERT_EQ(res, MakeUInt128(0x3c00351c3352428eULL, 0x7f9b6cda4425df7cULL));
618 }
619 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4)620 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4) {
621   __uint128_t arg1 = MakeUInt128(0x9128009282525619ULL, 0x0205263016391147ULL);
622   __uint128_t arg2 = MakeUInt128(0x7247331485739107ULL, 0x7758744253876117ULL);
623   __uint128_t arg3 = MakeUInt128(0x4657867116941477ULL, 0x6421441111263583ULL);
624   __uint128_t res =
625       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
626   ASSERT_EQ(res, MakeUInt128(0x0268619be9b26a3cULL, 0x1876471910da19edULL));
627 }
628 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4Upper)629 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4Upper) {
630   __uint128_t arg1 = MakeUInt128(0x9420757136275167ULL, 0x4573189189456283ULL);
631   __uint128_t arg2 = MakeUInt128(0x5257044133543758ULL, 0x5753426986994725ULL);
632   __uint128_t arg3 = MakeUInt128(0x4703165661399199ULL, 0x9682628247270641ULL);
633   __uint128_t res =
634       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
635   ASSERT_EQ(res, MakeUInt128(0x2b7d4cb24d79259dULL, 0x8895afc6423a13adULL));
636 }
637 
TEST(Arm64InsnTest,AsmConvertI32F32)638 TEST(Arm64InsnTest, AsmConvertI32F32) {
639   constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1");
640   ASSERT_EQ(AsmConvertI32F32(21), MakeUInt128(0x41a80000U, 0U));
641 }
642 
TEST(Arm64InsnTest,AsmConvertU32F32)643 TEST(Arm64InsnTest, AsmConvertU32F32) {
644   constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1");
645 
646   ASSERT_EQ(AsmConvertU32F32(29), MakeUInt128(0x41e80000U, 0U));
647 
648   // Test that the topmost bit isn't treated as the sign.
649   ASSERT_EQ(AsmConvertU32F32(1U << 31), MakeUInt128(0x4f000000U, 0U));
650 }
651 
TEST(Arm64InsnTest,AsmConvertU32F32FromSimdReg)652 TEST(Arm64InsnTest, AsmConvertU32F32FromSimdReg) {
653   constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1");
654 
655   ASSERT_EQ(AsmUcvtf(28), MakeUInt128(0x41e00000U, 0U));
656 
657   // Test that the topmost bit isn't treated as the sign.
658   ASSERT_EQ(AsmUcvtf(1U << 31), MakeUInt128(0x4f000000U, 0U));
659 }
660 
TEST(Arm64InsnTest,AsmConvertI32F64)661 TEST(Arm64InsnTest, AsmConvertI32F64) {
662   constexpr auto AsmConvertI32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1");
663   ASSERT_EQ(AsmConvertI32F64(21), MakeUInt128(0x4035000000000000ULL, 0U));
664 }
665 
TEST(Arm64InsnTest,AsmConvertU32F64)666 TEST(Arm64InsnTest, AsmConvertU32F64) {
667   constexpr auto AsmConvertU32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1");
668 
669   ASSERT_EQ(AsmConvertU32F64(18), MakeUInt128(0x4032000000000000ULL, 0U));
670 
671   // Test that the topmost bit isn't treated as the sign.
672   ASSERT_EQ(AsmConvertU32F64(1U << 31), MakeUInt128(0x41e0000000000000ULL, 0U));
673 }
674 
TEST(Arm64InsnTest,AsmConvertI64F32)675 TEST(Arm64InsnTest, AsmConvertI64F32) {
676   constexpr auto AsmConvertI64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1");
677   ASSERT_EQ(AsmConvertI64F32(11), MakeUInt128(0x41300000U, 0U));
678 }
679 
TEST(Arm64InsnTest,AsmConvertU64F32)680 TEST(Arm64InsnTest, AsmConvertU64F32) {
681   constexpr auto AsmConvertU64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1");
682 
683   ASSERT_EQ(AsmConvertU64F32(3), MakeUInt128(0x40400000U, 0U));
684 
685   // Test that the topmost bit isn't treated as the sign.
686   ASSERT_EQ(AsmConvertU64F32(1ULL << 63), MakeUInt128(0x5f000000U, 0U));
687 }
688 
TEST(Arm64InsnTest,AsmConvertI64F64)689 TEST(Arm64InsnTest, AsmConvertI64F64) {
690   constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1");
691   ASSERT_EQ(AsmConvertI64F64(137), MakeUInt128(0x4061200000000000ULL, 0U));
692 }
693 
TEST(Arm64InsnTest,AsmConvertI32F32FromSimdReg)694 TEST(Arm64InsnTest, AsmConvertI32F32FromSimdReg) {
695   constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1");
696   ASSERT_EQ(AsmConvertI32F32(1109), MakeUInt128(0x448aa000ULL, 0U));
697 }
698 
TEST(Arm64InsnTest,AsmConvertI64F64FromSimdReg)699 TEST(Arm64InsnTest, AsmConvertI64F64FromSimdReg) {
700   constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1");
701   ASSERT_EQ(AsmConvertI64F64(123), MakeUInt128(0x405ec00000000000ULL, 0U));
702 }
703 
TEST(Arm64InsnTest,AsmConvertI32x4F32x4)704 TEST(Arm64InsnTest, AsmConvertI32x4F32x4) {
705   constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s");
706   __uint128_t arg = MakeUInt128(0x0000003500000014ULL, 0x0000005400000009ULL);
707   ASSERT_EQ(AsmConvertI32F32(arg), MakeUInt128(0x4254000041a00000ULL, 0x42a8000041100000ULL));
708 }
709 
TEST(Arm64InsnTest,AsmConvertI64x2F64x2)710 TEST(Arm64InsnTest, AsmConvertI64x2F64x2) {
711   constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d");
712   __uint128_t arg = MakeUInt128(static_cast<int64_t>(-9), 17U);
713   ASSERT_EQ(AsmConvertI64F64(arg), MakeUInt128(0xc022000000000000ULL, 0x4031000000000000ULL));
714 }
715 
TEST(Arm64InsnTest,AsmConvertU32x4F32x4)716 TEST(Arm64InsnTest, AsmConvertU32x4F32x4) {
717   constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s");
718   __uint128_t arg = MakeUInt128(0x8000000000000019ULL, 0x0000005800000010ULL);
719   ASSERT_EQ(AsmConvertU32F32(arg), MakeUInt128(0x4f00000041c80000ULL, 0x42b0000041800000ULL));
720 }
721 
TEST(Arm64InsnTest,AsmConvertU64x2F64x2)722 TEST(Arm64InsnTest, AsmConvertU64x2F64x2) {
723   constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d");
724   __uint128_t arg = MakeUInt128(1ULL << 63, 29U);
725   ASSERT_EQ(AsmConvertU64F64(arg), MakeUInt128(0x43e0000000000000ULL, 0x403d000000000000ULL));
726 }
727 
TEST(Arm64InsnTest,AsmConvertU64F64)728 TEST(Arm64InsnTest, AsmConvertU64F64) {
729   constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1");
730 
731   ASSERT_EQ(AsmConvertU64F64(49), MakeUInt128(0x4048800000000000ULL, 0U));
732 
733   // Test that the topmost bit isn't treated as the sign.
734   ASSERT_EQ(AsmConvertU64F64(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
735 }
736 
TEST(Arm64InsnTest,AsmConvertU64F64FromSimdReg)737 TEST(Arm64InsnTest, AsmConvertU64F64FromSimdReg) {
738   constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1");
739 
740   ASSERT_EQ(AsmUcvtf(47), MakeUInt128(0x4047800000000000ULL, 0U));
741 
742   // Test that the topmost bit isn't treated as the sign.
743   ASSERT_EQ(AsmUcvtf(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
744 }
745 
TEST(Arm64InsnTest,AsmConvertLiterals)746 TEST(Arm64InsnTest, AsmConvertLiterals) {
747   // Verify that the compiler encodes the floating-point literals used in the
748   // conversion tests below exactly as expected.
749   ASSERT_EQ(bit_cast<uint32_t>(-7.50f), 0xc0f00000U);
750   ASSERT_EQ(bit_cast<uint32_t>(-6.75f), 0xc0d80000U);
751   ASSERT_EQ(bit_cast<uint32_t>(-6.50f), 0xc0d00000U);
752   ASSERT_EQ(bit_cast<uint32_t>(-6.25f), 0xc0c80000U);
753   ASSERT_EQ(bit_cast<uint32_t>(6.25f), 0x40c80000U);
754   ASSERT_EQ(bit_cast<uint32_t>(6.50f), 0x40d00000U);
755   ASSERT_EQ(bit_cast<uint32_t>(6.75f), 0x40d80000U);
756   ASSERT_EQ(bit_cast<uint32_t>(7.50f), 0x40f00000U);
757 
758   ASSERT_EQ(bit_cast<uint64_t>(-7.50), 0xc01e000000000000ULL);
759   ASSERT_EQ(bit_cast<uint64_t>(-6.75), 0xc01b000000000000ULL);
760   ASSERT_EQ(bit_cast<uint64_t>(-6.50), 0xc01a000000000000ULL);
761   ASSERT_EQ(bit_cast<uint64_t>(-6.25), 0xc019000000000000ULL);
762   ASSERT_EQ(bit_cast<uint64_t>(6.25), 0x4019000000000000ULL);
763   ASSERT_EQ(bit_cast<uint64_t>(6.50), 0x401a000000000000ULL);
764   ASSERT_EQ(bit_cast<uint64_t>(6.75), 0x401b000000000000ULL);
765   ASSERT_EQ(bit_cast<uint64_t>(7.50), 0x401e000000000000ULL);
766 }
767 
768 template <typename IntType, typename FuncType>
TestConvertF32ToInt(FuncType AsmFunc,std::initializer_list<int> expected)769 void TestConvertF32ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
770   // Note that bit_cast isn't a constexpr.
771   static const uint32_t kConvertF32ToIntInputs[] = {
772       bit_cast<uint32_t>(-7.50f),
773       bit_cast<uint32_t>(-6.75f),
774       bit_cast<uint32_t>(-6.50f),
775       bit_cast<uint32_t>(-6.25f),
776       bit_cast<uint32_t>(6.25f),
777       bit_cast<uint32_t>(6.50f),
778       bit_cast<uint32_t>(6.75f),
779       bit_cast<uint32_t>(7.50f),
780   };
781 
782   const size_t kConvertF32ToIntInputsSize = sizeof(kConvertF32ToIntInputs) / sizeof(uint32_t);
783   ASSERT_EQ(kConvertF32ToIntInputsSize, expected.size());
784 
785   auto expected_it = expected.begin();
786   for (size_t input_it = 0; input_it < kConvertF32ToIntInputsSize; input_it++) {
787     ASSERT_EQ(AsmFunc(kConvertF32ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
788   }
789 }
790 
TEST(Arm64InsnTest,AsmConvertF32I32TieAway)791 TEST(Arm64InsnTest, AsmConvertF32I32TieAway) {
792   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %s1");
793   TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
794 }
795 
TEST(Arm64InsnTest,AsmConvertF32U32TieAway)796 TEST(Arm64InsnTest, AsmConvertF32U32TieAway) {
797   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %s1");
798   TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
799 }
800 
TEST(Arm64InsnTest,AsmConvertF32I32NegInf)801 TEST(Arm64InsnTest, AsmConvertF32I32NegInf) {
802   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %s1");
803   TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
804 }
805 
TEST(Arm64InsnTest,AsmConvertF32U32NegInf)806 TEST(Arm64InsnTest, AsmConvertF32U32NegInf) {
807   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %s1");
808   TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
809 }
810 
TEST(Arm64InsnTest,AsmConvertF32I32TieEven)811 TEST(Arm64InsnTest, AsmConvertF32I32TieEven) {
812   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %s1");
813   TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
814 }
815 
TEST(Arm64InsnTest,AsmConvertF32U32TieEven)816 TEST(Arm64InsnTest, AsmConvertF32U32TieEven) {
817   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %s1");
818   TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
819 }
820 
TEST(Arm64InsnTest,AsmConvertF32I32PosInf)821 TEST(Arm64InsnTest, AsmConvertF32I32PosInf) {
822   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %s1");
823   TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
824 }
825 
TEST(Arm64InsnTest,AsmConvertF32U32PosInf)826 TEST(Arm64InsnTest, AsmConvertF32U32PosInf) {
827   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %s1");
828   TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
829 }
830 
TEST(Arm64InsnTest,AsmConvertF32I32Truncate)831 TEST(Arm64InsnTest, AsmConvertF32I32Truncate) {
832   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1");
833   TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
834 }
835 
TEST(Arm64InsnTest,AsmConvertF32U32Truncate)836 TEST(Arm64InsnTest, AsmConvertF32U32Truncate) {
837   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %s1");
838   TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
839 }
840 
TEST(Arm64InsnTest,AsmConvertF32I64TieAway)841 TEST(Arm64InsnTest, AsmConvertF32I64TieAway) {
842   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %s1");
843   TestConvertF32ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
844 }
845 
TEST(Arm64InsnTest,AsmConvertF32U64TieAway)846 TEST(Arm64InsnTest, AsmConvertF32U64TieAway) {
847   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %s1");
848   TestConvertF32ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
849 }
850 
TEST(Arm64InsnTest,AsmConvertF32I64NegInf)851 TEST(Arm64InsnTest, AsmConvertF32I64NegInf) {
852   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %s1");
853   TestConvertF32ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
854 }
855 
TEST(Arm64InsnTest,AsmConvertF32U64NegInf)856 TEST(Arm64InsnTest, AsmConvertF32U64NegInf) {
857   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %s1");
858   TestConvertF32ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
859 }
860 
TEST(Arm64InsnTest,AsmConvertF32I64TieEven)861 TEST(Arm64InsnTest, AsmConvertF32I64TieEven) {
862   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %s1");
863   TestConvertF32ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
864 }
865 
TEST(Arm64InsnTest,AsmConvertF32U64TieEven)866 TEST(Arm64InsnTest, AsmConvertF32U64TieEven) {
867   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %s1");
868   TestConvertF32ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
869 }
870 
TEST(Arm64InsnTest,AsmConvertF32I64PosInf)871 TEST(Arm64InsnTest, AsmConvertF32I64PosInf) {
872   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %s1");
873   TestConvertF32ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
874 }
875 
TEST(Arm64InsnTest,AsmConvertF32U64PosInf)876 TEST(Arm64InsnTest, AsmConvertF32U64PosInf) {
877   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %s1");
878   TestConvertF32ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
879 }
880 
TEST(Arm64InsnTest,AsmConvertF32I64Truncate)881 TEST(Arm64InsnTest, AsmConvertF32I64Truncate) {
882   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1");
883   TestConvertF32ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
884 }
885 
TEST(Arm64InsnTest,AsmConvertF32U64Truncate)886 TEST(Arm64InsnTest, AsmConvertF32U64Truncate) {
887   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1");
888   TestConvertF32ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
889 }
890 
891 template <typename IntType, typename FuncType>
TestConvertF64ToInt(FuncType AsmFunc,std::initializer_list<int> expected)892 void TestConvertF64ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
893   // Note that bit_cast isn't a constexpr.
894   static const uint64_t kConvertF64ToIntInputs[] = {
895       bit_cast<uint64_t>(-7.50),
896       bit_cast<uint64_t>(-6.75),
897       bit_cast<uint64_t>(-6.50),
898       bit_cast<uint64_t>(-6.25),
899       bit_cast<uint64_t>(6.25),
900       bit_cast<uint64_t>(6.50),
901       bit_cast<uint64_t>(6.75),
902       bit_cast<uint64_t>(7.50),
903   };
904 
905   const size_t kConvertF64ToIntInputsSize = sizeof(kConvertF64ToIntInputs) / sizeof(uint64_t);
906   ASSERT_EQ(kConvertF64ToIntInputsSize, expected.size());
907 
908   auto expected_it = expected.begin();
909   for (size_t input_it = 0; input_it < kConvertF64ToIntInputsSize; input_it++) {
910     ASSERT_EQ(AsmFunc(kConvertF64ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
911   }
912 }
913 
TEST(Arm64InsnTest,AsmConvertF64I32TieAway)914 TEST(Arm64InsnTest, AsmConvertF64I32TieAway) {
915   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %d1");
916   TestConvertF64ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
917 }
918 
TEST(Arm64InsnTest,AsmConvertF64U32TieAway)919 TEST(Arm64InsnTest, AsmConvertF64U32TieAway) {
920   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %d1");
921   TestConvertF64ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
922 }
923 
TEST(Arm64InsnTest,AsmConvertF64I32NegInf)924 TEST(Arm64InsnTest, AsmConvertF64I32NegInf) {
925   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %d1");
926   TestConvertF64ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
927 }
928 
TEST(Arm64InsnTest,AsmConvertF64U32NegInf)929 TEST(Arm64InsnTest, AsmConvertF64U32NegInf) {
930   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %d1");
931   TestConvertF64ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
932 }
933 
TEST(Arm64InsnTest,AsmConvertF64I32TieEven)934 TEST(Arm64InsnTest, AsmConvertF64I32TieEven) {
935   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %d1");
936   TestConvertF64ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
937 }
938 
TEST(Arm64InsnTest,AsmConvertF64U32TieEven)939 TEST(Arm64InsnTest, AsmConvertF64U32TieEven) {
940   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %d1");
941   TestConvertF64ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
942 }
943 
TEST(Arm64InsnTest,AsmConvertF64I32PosInf)944 TEST(Arm64InsnTest, AsmConvertF64I32PosInf) {
945   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %d1");
946   TestConvertF64ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
947 }
948 
TEST(Arm64InsnTest,AsmConvertF64U32PosInf)949 TEST(Arm64InsnTest, AsmConvertF64U32PosInf) {
950   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %d1");
951   TestConvertF64ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
952 }
953 
TEST(Arm64InsnTest,AsmConvertF64I32Truncate)954 TEST(Arm64InsnTest, AsmConvertF64I32Truncate) {
955   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1");
956   TestConvertF64ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
957 }
958 
TEST(Arm64InsnTest,AsmConvertF64U32Truncate)959 TEST(Arm64InsnTest, AsmConvertF64U32Truncate) {
960   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1");
961   TestConvertF64ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
962 }
963 
TEST(Arm64InsnTest,AsmConvertF64I64TieAway)964 TEST(Arm64InsnTest, AsmConvertF64I64TieAway) {
965   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %d1");
966   TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
967 }
968 
TEST(Arm64InsnTest,AsmConvertF64U64TieAway)969 TEST(Arm64InsnTest, AsmConvertF64U64TieAway) {
970   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %d1");
971   TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
972 }
973 
TEST(Arm64InsnTest,AsmConvertF64I64NegInf)974 TEST(Arm64InsnTest, AsmConvertF64I64NegInf) {
975   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %d1");
976   TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
977 }
978 
TEST(Arm64InsnTest,AsmConvertF64U64NegInf)979 TEST(Arm64InsnTest, AsmConvertF64U64NegInf) {
980   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %d1");
981   TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
982 }
983 
TEST(Arm64InsnTest,AsmConvertF64I64TieEven)984 TEST(Arm64InsnTest, AsmConvertF64I64TieEven) {
985   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %d1");
986   TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
987 }
988 
TEST(Arm64InsnTest,AsmConvertF64U64TieEven)989 TEST(Arm64InsnTest, AsmConvertF64U64TieEven) {
990   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %d1");
991   TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
992 }
993 
TEST(Arm64InsnTest,AsmConvertF64I64PosInf)994 TEST(Arm64InsnTest, AsmConvertF64I64PosInf) {
995   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %d1");
996   TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
997 }
998 
TEST(Arm64InsnTest,AsmConvertF64U64PosInf)999 TEST(Arm64InsnTest, AsmConvertF64U64PosInf) {
1000   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %d1");
1001   TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1002 }
1003 
TEST(Arm64InsnTest,AsmConvertF64I64Truncate)1004 TEST(Arm64InsnTest, AsmConvertF64I64Truncate) {
1005   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1");
1006   TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1007 }
1008 
TEST(Arm64InsnTest,AsmConvertF64U64Truncate)1009 TEST(Arm64InsnTest, AsmConvertF64U64Truncate) {
1010   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1");
1011   TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1012 }
1013 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieAway)1014 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieAway) {
1015   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %s0, %s1");
1016   TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1017 }
1018 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieAway)1019 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieAway) {
1020   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %s0, %s1");
1021   TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1022 }
1023 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarNegInf)1024 TEST(Arm64InsnTest, AsmConvertF32I32ScalarNegInf) {
1025   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %s0, %s1");
1026   TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1027 }
1028 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarNegInf)1029 TEST(Arm64InsnTest, AsmConvertF32U32ScalarNegInf) {
1030   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %s0, %s1");
1031   TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1032 }
1033 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieEven)1034 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieEven) {
1035   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %s0, %s1");
1036   TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1037 }
1038 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieEven)1039 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieEven) {
1040   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %s0, %s1");
1041   TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1042 }
1043 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarPosInf)1044 TEST(Arm64InsnTest, AsmConvertF32I32ScalarPosInf) {
1045   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %s0, %s1");
1046   TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1047 }
1048 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarPosInf)1049 TEST(Arm64InsnTest, AsmConvertF32U32ScalarPosInf) {
1050   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %s0, %s1");
1051   TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1052 }
1053 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTruncate)1054 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTruncate) {
1055   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %s0, %s1");
1056   TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1057 }
1058 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTruncate)1059 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTruncate) {
1060   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %s0, %s1");
1061   TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1062 }
1063 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieAway)1064 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieAway) {
1065   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %d0, %d1");
1066   TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1067 }
1068 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieAway)1069 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieAway) {
1070   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %d0, %d1");
1071   TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1072 }
1073 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarNegInf)1074 TEST(Arm64InsnTest, AsmConvertF64I64ScalarNegInf) {
1075   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %d0, %d1");
1076   TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1077 }
1078 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarNegInf)1079 TEST(Arm64InsnTest, AsmConvertF64U64ScalarNegInf) {
1080   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %d0, %d1");
1081   TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1082 }
1083 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieEven)1084 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieEven) {
1085   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %d0, %d1");
1086   TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1087 }
1088 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieEven)1089 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieEven) {
1090   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %d0, %d1");
1091   TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1092 }
1093 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarPosInf)1094 TEST(Arm64InsnTest, AsmConvertF64I64ScalarPosInf) {
1095   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %d0, %d1");
1096   TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1097 }
1098 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarPosInf)1099 TEST(Arm64InsnTest, AsmConvertF64U64ScalarPosInf) {
1100   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %d0, %d1");
1101   TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1102 }
1103 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTruncate)1104 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTruncate) {
1105   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %d0, %d1");
1106   TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1107 }
1108 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTruncate)1109 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTruncate) {
1110   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %d0, %d1");
1111   TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1112 }
1113 
TEST(Arm64InsnTest,AsmConvertF32I32x4TieAway)1114 TEST(Arm64InsnTest, AsmConvertF32I32x4TieAway) {
1115   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.4s, %1.4s");
1116   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1117   ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffff9ULL));
1118   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1119   ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1120 }
1121 
TEST(Arm64InsnTest,AsmConvertF32U32x4TieAway)1122 TEST(Arm64InsnTest, AsmConvertF32U32x4TieAway) {
1123   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.4s, %1.4s");
1124   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1125   ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1126   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1127   ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1128 }
1129 
TEST(Arm64InsnTest,AsmConvertF32I32x4NegInf)1130 TEST(Arm64InsnTest, AsmConvertF32I32x4NegInf) {
1131   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.4s, %1.4s");
1132   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1133   ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffff9fffffff9ULL));
1134   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1135   ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1136 }
1137 
TEST(Arm64InsnTest,AsmConvertF32U32x4NegInf)1138 TEST(Arm64InsnTest, AsmConvertF32U32x4NegInf) {
1139   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.4s, %1.4s");
1140   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1141   ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1142   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1143   ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1144 }
1145 
TEST(Arm64InsnTest,AsmConvertF32I32x4TieEven)1146 TEST(Arm64InsnTest, AsmConvertF32I32x4TieEven) {
1147   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.4s, %1.4s");
1148   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1149   ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffffaULL));
1150   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1151   ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1152 }
1153 
TEST(Arm64InsnTest,AsmConvertF32U32x4TieEven)1154 TEST(Arm64InsnTest, AsmConvertF32U32x4TieEven) {
1155   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.4s, %1.4s");
1156   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1157   ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1158   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1159   ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1160 }
1161 
TEST(Arm64InsnTest,AsmConvertF32I32x4PosInf)1162 TEST(Arm64InsnTest, AsmConvertF32I32x4PosInf) {
1163   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.4s, %1.4s");
1164   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1165   ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1166   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1167   ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1168 }
1169 
TEST(Arm64InsnTest,AsmConvertF32U32x4PosInf)1170 TEST(Arm64InsnTest, AsmConvertF32U32x4PosInf) {
1171   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.4s, %1.4s");
1172   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1173   ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1174   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1175   ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1176 }
1177 
TEST(Arm64InsnTest,AsmConvertF32I32x4Truncate)1178 TEST(Arm64InsnTest, AsmConvertF32I32x4Truncate) {
1179   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s");
1180   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1181   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1182   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1183   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1184 }
1185 
TEST(Arm64InsnTest,AsmConvertF32U32x4Truncate)1186 TEST(Arm64InsnTest, AsmConvertF32U32x4Truncate) {
1187   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s");
1188   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1189   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1190   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1191   ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1192 }
1193 
TEST(Arm64InsnTest,AsmConvertF64I64x4TieAway)1194 TEST(Arm64InsnTest, AsmConvertF64I64x4TieAway) {
1195   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.2d, %1.2d");
1196   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1197   ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1198   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1199   ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1200   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1201   ASSERT_EQ(AsmFcvtas(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1202   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1203   ASSERT_EQ(AsmFcvtas(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1204 }
1205 
TEST(Arm64InsnTest,AsmConvertF64U64x4TieAway)1206 TEST(Arm64InsnTest, AsmConvertF64U64x4TieAway) {
1207   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.2d, %1.2d");
1208   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1209   ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1210   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1211   ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1212   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1213   ASSERT_EQ(AsmFcvtau(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1214   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1215   ASSERT_EQ(AsmFcvtau(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1216 }
1217 
TEST(Arm64InsnTest,AsmConvertF64I64x4NegInf)1218 TEST(Arm64InsnTest, AsmConvertF64I64x4NegInf) {
1219   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.2d, %1.2d");
1220   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1221   ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1222   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1223   ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffff9ULL));
1224   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1225   ASSERT_EQ(AsmFcvtms(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1226   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1227   ASSERT_EQ(AsmFcvtms(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1228 }
1229 
TEST(Arm64InsnTest,AsmConvertF64U64x4NegInf)1230 TEST(Arm64InsnTest, AsmConvertF64U64x4NegInf) {
1231   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.2d, %1.2d");
1232   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1233   ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1234   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1235   ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1236   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1237   ASSERT_EQ(AsmFcvtmu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1238   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1239   ASSERT_EQ(AsmFcvtmu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1240 }
1241 
TEST(Arm64InsnTest,AsmConvertF64I64x4TieEven)1242 TEST(Arm64InsnTest, AsmConvertF64I64x4TieEven) {
1243   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.2d, %1.2d");
1244   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1245   ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1246   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1247   ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1248   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1249   ASSERT_EQ(AsmFcvtns(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1250   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1251   ASSERT_EQ(AsmFcvtns(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1252 }
1253 
TEST(Arm64InsnTest,AsmConvertF64U64x4TieEven)1254 TEST(Arm64InsnTest, AsmConvertF64U64x4TieEven) {
1255   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.2d, %1.2d");
1256   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1257   ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1258   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1259   ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1260   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1261   ASSERT_EQ(AsmFcvtnu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1262   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1263   ASSERT_EQ(AsmFcvtnu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1264 }
1265 
TEST(Arm64InsnTest,AsmConvertF64I64x4PosInf)1266 TEST(Arm64InsnTest, AsmConvertF64I64x4PosInf) {
1267   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.2d, %1.2d");
1268   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1269   ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1270   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1271   ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1272   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1273   ASSERT_EQ(AsmFcvtps(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1274   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1275   ASSERT_EQ(AsmFcvtps(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1276 }
1277 
TEST(Arm64InsnTest,AsmConvertF64U64x4PosInf)1278 TEST(Arm64InsnTest, AsmConvertF64U64x4PosInf) {
1279   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.2d, %1.2d");
1280   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1281   ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1282   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1283   ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1284   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1285   ASSERT_EQ(AsmFcvtpu(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1286   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1287   ASSERT_EQ(AsmFcvtpu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1288 }
1289 
TEST(Arm64InsnTest,AsmConvertF64I64x4Truncate)1290 TEST(Arm64InsnTest, AsmConvertF64I64x4Truncate) {
1291   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.2d, %1.2d");
1292   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1293   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1294   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1295   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1296   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1297   ASSERT_EQ(AsmFcvtzs(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1298   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1299   ASSERT_EQ(AsmFcvtzs(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1300 }
1301 
TEST(Arm64InsnTest,AsmConvertF64U64x4Truncate)1302 TEST(Arm64InsnTest, AsmConvertF64U64x4Truncate) {
1303   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.2d, %1.2d");
1304   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1305   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1306   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1307   ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1308   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1309   ASSERT_EQ(AsmFcvtzu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1310   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1311   ASSERT_EQ(AsmFcvtzu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1312 }
1313 
TEST(Arm64InsnTest,AsmConvertX32F32Scalar)1314 TEST(Arm64InsnTest, AsmConvertX32F32Scalar) {
1315   constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1, #7");
1316 
1317   ASSERT_EQ(AsmConvertX32F32(0x610), MakeUInt128(0x41420000ULL, 0U));
1318 
1319   ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1320 }
1321 
TEST(Arm64InsnTest,AsmConvertX32F64Scalar)1322 TEST(Arm64InsnTest, AsmConvertX32F64Scalar) {
1323   constexpr auto AsmConvertX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1, #8");
1324 
1325   ASSERT_EQ(AsmConvertX32F64(0x487), MakeUInt128(0x40121c0000000000ULL, 0U));
1326 
1327   ASSERT_EQ(AsmConvertX32F64(1 << 31), MakeUInt128(0xc160000000000000ULL, 0U));
1328 }
1329 
TEST(Arm64InsnTest,AsmConvertX32F32)1330 TEST(Arm64InsnTest, AsmConvertX32F32) {
1331   constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1, #7");
1332 
1333   ASSERT_EQ(AsmConvertX32F32(0x123), MakeUInt128(0x40118000ULL, 0U));
1334 
1335   ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1336 }
1337 
TEST(Arm64InsnTest,AsmConvertX32x4F32x4)1338 TEST(Arm64InsnTest, AsmConvertX32x4F32x4) {
1339   constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s, #11");
1340   __uint128_t arg = MakeUInt128(0x80000000ffff9852ULL, 0x0000110200001254ULL);
1341   ASSERT_EQ(AsmConvertX32F32(arg), MakeUInt128(0xc9800000c14f5c00ULL, 0x400810004012a000ULL));
1342 }
1343 
TEST(Arm64InsnTest,AsmConvertUX32F32Scalar)1344 TEST(Arm64InsnTest, AsmConvertUX32F32Scalar) {
1345   constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1, #7");
1346 
1347   ASSERT_EQ(AsmConvertUX32F32(0x857), MakeUInt128(0x41857000ULL, 0U));
1348 
1349   ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1350 
1351   // Test the default rounding behavior (FPRounding_TIEEVEN).
1352   ASSERT_EQ(AsmConvertUX32F32(0x80000080), MakeUInt128(0x4b800000ULL, 0U));
1353   ASSERT_EQ(AsmConvertUX32F32(0x800000c0), MakeUInt128(0x4b800001ULL, 0U));
1354   ASSERT_EQ(AsmConvertUX32F32(0x80000140), MakeUInt128(0x4b800001ULL, 0U));
1355   ASSERT_EQ(AsmConvertUX32F32(0x80000180), MakeUInt128(0x4b800002ULL, 0U));
1356 }
1357 
TEST(Arm64InsnTest,AsmConvertUX32F64Scalar)1358 TEST(Arm64InsnTest, AsmConvertUX32F64Scalar) {
1359   constexpr auto AsmConvertUX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1, #8");
1360 
1361   ASSERT_EQ(AsmConvertUX32F64(0x361), MakeUInt128(0x400b080000000000ULL, 0U));
1362 
1363   ASSERT_EQ(AsmConvertUX32F64(1U << 31), MakeUInt128(0x4160000000000000ULL, 0U));
1364 }
1365 
TEST(Arm64InsnTest,AsmConvertUX32F32)1366 TEST(Arm64InsnTest, AsmConvertUX32F32) {
1367   constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1, #7");
1368 
1369   ASSERT_EQ(AsmConvertUX32F32(0x456), MakeUInt128(0x410ac000ULL, 0U));
1370 
1371   ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1372 }
1373 
TEST(Arm64InsnTest,AsmConvertUX32x4F32x4)1374 TEST(Arm64InsnTest, AsmConvertUX32x4F32x4) {
1375   constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s, #11");
1376   __uint128_t arg = MakeUInt128(0x8000000000008023ULL, 0x0000201800001956ULL);
1377   ASSERT_EQ(AsmConvertUX32F32(arg), MakeUInt128(0x4980000041802300ULL, 0x40806000404ab000ULL));
1378 }
1379 
TEST(Arm64InsnTest,AsmConvertX64F32Scalar)1380 TEST(Arm64InsnTest, AsmConvertX64F32Scalar) {
1381   constexpr auto AsmConvertX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1, #10");
1382 
1383   ASSERT_EQ(AsmConvertX64F32(0x2234), MakeUInt128(0x4108d000ULL, 0U));
1384 }
1385 
TEST(Arm64InsnTest,AsmConvertX64F64Scalar)1386 TEST(Arm64InsnTest, AsmConvertX64F64Scalar) {
1387   constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1, #10");
1388 
1389   ASSERT_EQ(AsmConvertX64F64(0x1324), MakeUInt128(0x4013240000000000ULL, 0U));
1390 }
1391 
TEST(Arm64InsnTest,AsmConvertUX64F32Scalar)1392 TEST(Arm64InsnTest, AsmConvertUX64F32Scalar) {
1393   constexpr auto AsmConvertUX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1, #10");
1394 
1395   ASSERT_EQ(AsmConvertUX64F32(0x5763), MakeUInt128(0x41aec600ULL, 0U));
1396 }
1397 
TEST(Arm64InsnTest,AsmConvertUX64F64Scalar)1398 TEST(Arm64InsnTest, AsmConvertUX64F64Scalar) {
1399   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1, #10");
1400 
1401   ASSERT_EQ(AsmConvertUX64F64(0x2217), MakeUInt128(0x40210b8000000000ULL, 0U));
1402 }
1403 
TEST(Arm64InsnTest,AsmConvertX64F64)1404 TEST(Arm64InsnTest, AsmConvertX64F64) {
1405   constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1, #12");
1406 
1407   ASSERT_EQ(AsmConvertX64F64(0x723), MakeUInt128(0x3fdc8c0000000000ULL, 0U));
1408 
1409   ASSERT_EQ(AsmConvertX64F64(1ULL << 63), MakeUInt128(0xc320000000000000ULL, 0U));
1410 }
1411 
TEST(Arm64InsnTest,AsmConvertUX64F64)1412 TEST(Arm64InsnTest, AsmConvertUX64F64) {
1413   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #12");
1414 
1415   ASSERT_EQ(AsmConvertUX64F64(0x416), MakeUInt128(0x3fd0580000000000ULL, 0U));
1416 
1417   ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x4320000000000000ULL, 0U));
1418 }
1419 
TEST(Arm64InsnTest,AsmConvertUX64F64With64BitFraction)1420 TEST(Arm64InsnTest, AsmConvertUX64F64With64BitFraction) {
1421   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #64");
1422 
1423   ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x3fe0'0000'0000'0000ULL, 0U));
1424 }
1425 
TEST(Arm64InsnTest,AsmConvertX64x2F64x2)1426 TEST(Arm64InsnTest, AsmConvertX64x2F64x2) {
1427   constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d, #12");
1428   __uint128_t arg = MakeUInt128(1ULL << 63, 0x8086U);
1429   ASSERT_EQ(AsmConvertX64F64(arg), MakeUInt128(0xc320000000000000ULL, 0x402010c000000000ULL));
1430 }
1431 
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2)1432 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2) {
1433   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #12");
1434   __uint128_t arg = MakeUInt128(1ULL << 63, 0x6809U);
1435   ASSERT_EQ(AsmConvertUX64F64(arg), MakeUInt128(0x4320000000000000ULL, 0x401a024000000000ULL));
1436 }
1437 
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2With64BitFraction)1438 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2With64BitFraction) {
1439   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #64");
1440   __uint128_t arg = MakeUInt128(0x7874'211c'b7aa'f597ULL, 0x2c0f'5504'd25e'f673ULL);
1441   ASSERT_EQ(AsmConvertUX64F64(arg),
1442             MakeUInt128(0x3fde'1d08'472d'eabdULL, 0x3fc6'07aa'8269'2f7bULL));
1443 }
1444 
TEST(Arm64InsnTest,AsmConvertF32X32Scalar)1445 TEST(Arm64InsnTest, AsmConvertF32X32Scalar) {
1446   constexpr auto AsmConvertF32X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1447   uint32_t arg1 = 0x4091eb85U;  // 4.56 in float
1448   ASSERT_EQ(AsmConvertF32X32(arg1), MakeUInt128(0x00048f5cU, 0U));
1449 
1450   uint32_t arg2 = 0xc0d80000U;  // -6.75 in float
1451   ASSERT_EQ(AsmConvertF32X32(arg2), MakeUInt128(0xfff94000U, 0U));
1452 
1453   ASSERT_EQ(AsmConvertF32X32(kDefaultNaN32), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1454 }
1455 
TEST(Arm64InsnTest,AsmConvertF32UX32Scalar)1456 TEST(Arm64InsnTest, AsmConvertF32UX32Scalar) {
1457   constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1458   uint32_t arg1 = 0x41223d71U;  // 10.14 in float
1459   ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x000a23d7U, 0U));
1460 
1461   uint32_t arg2 = 0xc1540000U;  // -13.25 in float
1462   ASSERT_EQ(AsmConvertF32UX32(arg2), MakeUInt128(0xfff2c000U, 0U));
1463 
1464   ASSERT_EQ(AsmConvertF32UX32(kDefaultNaN32), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1465 }
1466 
TEST(Arm64InsnTest,AsmConvertF32UX32With31FractionalBits)1467 TEST(Arm64InsnTest, AsmConvertF32UX32With31FractionalBits) {
1468   constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #31");
1469   uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1470   ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x20000000U, 0U));
1471 }
1472 
TEST(Arm64InsnTest,AsmConvertF64X32Scalar)1473 TEST(Arm64InsnTest, AsmConvertF64X32Scalar) {
1474   constexpr auto AsmConvertF64X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1, #16");
1475   uint64_t arg1 = 0x401e8f5c28f5c28fULL;  // 7.46 in double
1476   ASSERT_EQ(AsmConvertF64X32(arg1), MakeUInt128(0x0007a3d7U, 0U));
1477 
1478   uint64_t arg2 = 0xc040200000000000ULL;  // -32.44 in double
1479   ASSERT_EQ(AsmConvertF64X32(arg2), MakeUInt128(0xffdfc000U, 0U));
1480 }
1481 
TEST(Arm64InsnTest,AsmConvertF32X64Scalar)1482 TEST(Arm64InsnTest, AsmConvertF32X64Scalar) {
1483   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #16");
1484   uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1485   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1486 
1487   uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1488   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1489 }
1490 
TEST(Arm64InsnTest,AsmConvertF32UX64With63FractionalBits)1491 TEST(Arm64InsnTest, AsmConvertF32UX64With63FractionalBits) {
1492   constexpr auto AsmConvertF32UX64 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #63");
1493   uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1494   ASSERT_EQ(AsmConvertF32UX64(arg1), MakeUInt128(0x20000000'00000000ULL, 0U));
1495 }
1496 
TEST(Arm64InsnTest,AsmConvertF64X64Scalar)1497 TEST(Arm64InsnTest, AsmConvertF64X64Scalar) {
1498   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1, #16");
1499   uint64_t arg1 = bit_cast<uint64_t>(7.50);
1500   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1501 
1502   uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1503   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1504 }
1505 
TEST(Arm64InsnTest,AsmConvertF32X32x4)1506 TEST(Arm64InsnTest, AsmConvertF32X32x4) {
1507   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s, #2");
1508   __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1509   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffeaULL, 0x0000001a00000000ULL));
1510 }
1511 
TEST(Arm64InsnTest,AsmConvertF64UX32Scalar)1512 TEST(Arm64InsnTest, AsmConvertF64UX32Scalar) {
1513   constexpr auto AsmConvertF64UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1, #16");
1514   uint64_t arg1 = 0x4020947ae147ae14ULL;  // 8.29 in double
1515   ASSERT_EQ(AsmConvertF64UX32(arg1), MakeUInt128(0x00084a3dU, 0U));
1516 
1517   uint64_t arg2 = 0xc023666666666666ULL;  // -9.70 in double
1518   ASSERT_EQ(AsmConvertF64UX32(arg2), MakeUInt128(0U, 0U));
1519 }
1520 
TEST(Arm64InsnTest,AsmConvertF32UX64Scalar)1521 TEST(Arm64InsnTest, AsmConvertF32UX64Scalar) {
1522   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1, #16");
1523   uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1524   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1525   uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1526   ASSERT_EQ(AsmFcvtzu(arg2), 0ULL);
1527 }
1528 
TEST(Arm64InsnTest,AsmConvertF64UX64Scalar)1529 TEST(Arm64InsnTest, AsmConvertF64UX64Scalar) {
1530   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #16");
1531   uint64_t arg1 = bit_cast<uint64_t>(7.50);
1532   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1533 
1534   uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1535   ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0ULL, 0ULL));
1536 }
1537 
TEST(Arm64InsnTest,AsmConvertF64UX64ScalarWith64BitFraction)1538 TEST(Arm64InsnTest, AsmConvertF64UX64ScalarWith64BitFraction) {
1539   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #64");
1540   uint64_t arg = bit_cast<uint64_t>(0.625);
1541   ASSERT_EQ(AsmFcvtzu(arg), MakeUInt128(0xa000'0000'0000'0000ULL, 0ULL));
1542 }
1543 
TEST(Arm64InsnTest,AsmConvertF32UX32x4)1544 TEST(Arm64InsnTest, AsmConvertF32UX32x4) {
1545   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s, #2");
1546   __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1547   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000001a00000000ULL));
1548 }
1549 
TEST(Arm64InsnTest,Fp32ConditionalSelect)1550 TEST(Arm64InsnTest, Fp32ConditionalSelect) {
1551   uint64_t int_arg1 = 3;
1552   uint64_t int_arg2 = 7;
1553   uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1554   uint64_t fp_arg2 = 0x0123456789abcdefULL;
1555   __uint128_t res;
1556 
1557   asm("cmp %x1,%x2\n\t"
1558       "fcsel %s0, %s3, %s4, eq"
1559       : "=w"(res)
1560       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1561   ASSERT_EQ(res, MakeUInt128(0x89abcdefULL, 0U));
1562 
1563   asm("cmp %x1,%x2\n\t"
1564       "fcsel %s0, %s3, %s4, ne"
1565       : "=w"(res)
1566       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1567   ASSERT_EQ(res, MakeUInt128(0x76543210ULL, 0U));
1568 }
1569 
TEST(Arm64InsnTest,Fp64ConditionalSelect)1570 TEST(Arm64InsnTest, Fp64ConditionalSelect) {
1571   uint64_t int_arg1 = 8;
1572   uint64_t int_arg2 = 3;
1573   uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1574   uint64_t fp_arg2 = 0x0123456789abcdefULL;
1575   __uint128_t res;
1576 
1577   asm("cmp %x1,%x2\n\t"
1578       "fcsel %d0, %d3, %d4, eq"
1579       : "=w"(res)
1580       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1581   ASSERT_EQ(res, MakeUInt128(0x0123456789abcdefULL, 0U));
1582 
1583   asm("cmp %x1,%x2\n\t"
1584       "fcsel %d0, %d3, %d4, ne"
1585       : "=w"(res)
1586       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1587   ASSERT_EQ(res, MakeUInt128(0xfedcba9876543210ULL, 0U));
1588 }
1589 
TEST(Arm64InsnTest,RoundUpFp32)1590 TEST(Arm64InsnTest, RoundUpFp32) {
1591   // The lower 32-bit represents 2.7182817 in float.
1592   uint64_t fp_arg = 0xdeadbeef402df854ULL;
1593   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %s0, %s1")(fp_arg);
1594   ASSERT_EQ(res, MakeUInt128(0x40400000ULL, 0U));  // 3.0 in float
1595 }
1596 
TEST(Arm64InsnTest,RoundUpFp64)1597 TEST(Arm64InsnTest, RoundUpFp64) {
1598   // 2.7182817 in double.
1599   uint64_t fp_arg = 0x4005BF0A8B145769ULL;
1600   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %d0, %d1")(fp_arg);
1601   ASSERT_EQ(res, MakeUInt128(0x4008000000000000ULL, 0U));  // 3.0 in double
1602 }
1603 
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp64)1604 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp64) {
1605   constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %d0, %d1");
1606 
1607   // -7.50 -> -8.00 (ties away from zero as opposted to even)
1608   ASSERT_EQ(AsmFrinta(0xc01E000000000000ULL), MakeUInt128(0xc020000000000000ULL, 0U));
1609 
1610   // -6.75 -> -7.00
1611   ASSERT_EQ(AsmFrinta(0xc01B000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1612 
1613   // -6.50 -> -7.00 (ties away from zero as opposted to even)
1614   ASSERT_EQ(AsmFrinta(0xc01A000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1615 
1616   // -6.25 -> -6.00
1617   ASSERT_EQ(AsmFrinta(0xc019000000000000ULL), MakeUInt128(0xc018000000000000ULL, 0U));
1618 
1619   // 6.25 -> 6.00
1620   ASSERT_EQ(AsmFrinta(0x4019000000000000ULL), MakeUInt128(0x4018000000000000ULL, 0U));
1621 
1622   // 6.50 -> 7.00 (ties away from zero as opposted to even)
1623   ASSERT_EQ(AsmFrinta(0x401A000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1624 
1625   // 6.75 -> 7.00
1626   ASSERT_EQ(AsmFrinta(0x401B000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1627 
1628   // 7.50 -> 8.00 (ties away from zero as opposted to even)
1629   ASSERT_EQ(AsmFrinta(0x401E000000000000ULL), MakeUInt128(0x4020000000000000ULL, 0U));
1630 
1631   // -0.49999999999999994 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1632   ASSERT_EQ(AsmFrinta(0xBFDFFFFFFFFFFFFF), MakeUInt128(0x8000000000000000U, 0U));
1633 
1634   // A number too large to have fractional precision, should not change upon rounding with tie-away
1635   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(0.5 / std::numeric_limits<double>::epsilon())),
1636             MakeUInt128(bit_cast<uint64_t>(0.5 / std::numeric_limits<double>::epsilon()), 0U));
1637   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-0.5 / std::numeric_limits<double>::epsilon())),
1638             MakeUInt128(bit_cast<uint64_t>(-0.5 / std::numeric_limits<double>::epsilon()), 0U));
1639   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(0.75 / std::numeric_limits<double>::epsilon())),
1640             MakeUInt128(bit_cast<uint64_t>(0.75 / std::numeric_limits<double>::epsilon()), 0U));
1641   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-0.75 / std::numeric_limits<double>::epsilon())),
1642             MakeUInt128(bit_cast<uint64_t>(-0.75 / std::numeric_limits<double>::epsilon()), 0U));
1643   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(1.0 / std::numeric_limits<double>::epsilon())),
1644             MakeUInt128(bit_cast<uint64_t>(1.0 / std::numeric_limits<double>::epsilon()), 0U));
1645   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-1.0 / std::numeric_limits<double>::epsilon())),
1646             MakeUInt128(bit_cast<uint64_t>(-1.0 / std::numeric_limits<double>::epsilon()), 0U));
1647   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(2.0 / std::numeric_limits<double>::epsilon())),
1648             MakeUInt128(bit_cast<uint64_t>(2.0 / std::numeric_limits<double>::epsilon()), 0U));
1649   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-2.0 / std::numeric_limits<double>::epsilon())),
1650             MakeUInt128(bit_cast<uint64_t>(-2.0 / std::numeric_limits<double>::epsilon()), 0U));
1651   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(1.0e100)), MakeUInt128(bit_cast<uint64_t>(1.0e100), 0U));
1652   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-1.0e100)), MakeUInt128(bit_cast<uint64_t>(-1.0e100), 0U));
1653 }
1654 
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp32)1655 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp32) {
1656   constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %s0, %s1");
1657 
1658   // -7.50 -> -8.00 (ties away from zero as opposted to even)
1659   ASSERT_EQ(AsmFrinta(0xc0f00000U), MakeUInt128(0xc1000000U, 0U));
1660 
1661   // -6.75 -> -7.00
1662   ASSERT_EQ(AsmFrinta(0xc0d80000U), MakeUInt128(0xc0e00000U, 0U));
1663 
1664   // -6.50 -> -7.00 (ties away from zero as opposted to even)
1665   ASSERT_EQ(AsmFrinta(0xc0d00000U), MakeUInt128(0xc0e00000U, 0U));
1666 
1667   // -6.25 -> -6.00
1668   ASSERT_EQ(AsmFrinta(0xc0c80000U), MakeUInt128(0xc0c00000U, 0U));
1669 
1670   // 6.25 -> 6.00
1671   ASSERT_EQ(AsmFrinta(0x40c80000U), MakeUInt128(0x40c00000U, 0U));
1672 
1673   // 6.50 -> 7.00 (ties away from zero as opposted to even)
1674   ASSERT_EQ(AsmFrinta(0x40d00000U), MakeUInt128(0x40e00000U, 0U));
1675 
1676   // 6.75 -> 7.00
1677   ASSERT_EQ(AsmFrinta(0x40d80000U), MakeUInt128(0x40e00000U, 0U));
1678 
1679   // 7.50 -> 8.00 (ties away from zero as opposted to even)
1680   ASSERT_EQ(AsmFrinta(0x40f00000U), MakeUInt128(0x41000000U, 0U));
1681 
1682   // -0.49999997019767761 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1683   ASSERT_EQ(AsmFrinta(0xbeffffff), MakeUInt128(0x80000000U, 0U));
1684 
1685   // A number too large to have fractional precision, should not change upon rounding with tie-away
1686   ASSERT_EQ(
1687       AsmFrinta(bit_cast<uint32_t>(float{0.5 / std::numeric_limits<float>::epsilon()})),
1688       MakeUInt128(bit_cast<uint32_t>(float{0.5 / std::numeric_limits<float>::epsilon()}), 0U));
1689   ASSERT_EQ(
1690       AsmFrinta(bit_cast<uint32_t>(float{-0.5 / std::numeric_limits<float>::epsilon()})),
1691       MakeUInt128(bit_cast<uint32_t>(float{-0.5 / std::numeric_limits<float>::epsilon()}), 0U));
1692   ASSERT_EQ(
1693       AsmFrinta(bit_cast<uint32_t>(float{0.75 / std::numeric_limits<float>::epsilon()})),
1694       MakeUInt128(bit_cast<uint32_t>(float{0.75 / std::numeric_limits<float>::epsilon()}), 0U));
1695   ASSERT_EQ(
1696       AsmFrinta(bit_cast<uint32_t>(float{-0.75 / std::numeric_limits<float>::epsilon()})),
1697       MakeUInt128(bit_cast<uint32_t>(float{-0.75 / std::numeric_limits<float>::epsilon()}), 0U));
1698   ASSERT_EQ(
1699       AsmFrinta(bit_cast<uint32_t>(float{1.0 / std::numeric_limits<float>::epsilon()})),
1700       MakeUInt128(bit_cast<uint32_t>(float{1.0 / std::numeric_limits<float>::epsilon()}), 0U));
1701   ASSERT_EQ(
1702       AsmFrinta(bit_cast<uint32_t>(float{-1.0 / std::numeric_limits<float>::epsilon()})),
1703       MakeUInt128(bit_cast<uint32_t>(float{-1.0 / std::numeric_limits<float>::epsilon()}), 0U));
1704   ASSERT_EQ(
1705       AsmFrinta(bit_cast<uint32_t>(float{2.0 / std::numeric_limits<float>::epsilon()})),
1706       MakeUInt128(bit_cast<uint32_t>(float{2.0 / std::numeric_limits<float>::epsilon()}), 0U));
1707   ASSERT_EQ(
1708       AsmFrinta(bit_cast<uint32_t>(float{-2.0 / std::numeric_limits<float>::epsilon()})),
1709       MakeUInt128(bit_cast<uint32_t>(float{-2.0 / std::numeric_limits<float>::epsilon()}), 0U));
1710   ASSERT_EQ(AsmFrinta(bit_cast<uint32_t>(1.0e38f)), MakeUInt128(bit_cast<uint32_t>(1.0e38f), 0U));
1711   ASSERT_EQ(AsmFrinta(bit_cast<uint32_t>(-1.0e38f)), MakeUInt128(bit_cast<uint32_t>(-1.0e38f), 0U));
1712 }
1713 
TEST(Arm64InsnTest,RoundToIntDownwardFp64)1714 TEST(Arm64InsnTest, RoundToIntDownwardFp64) {
1715   constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %d0, %d1");
1716 
1717   // 7.7 -> 7.00
1718   ASSERT_EQ(AsmFrintm(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1719 
1720   // 7.1 -> 7.00
1721   ASSERT_EQ(AsmFrintm(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1722 
1723   // -7.10 -> -8.00
1724   ASSERT_EQ(AsmFrintm(0xc01c666666666666ULL), MakeUInt128(0xc020000000000000, 0U));
1725 
1726   // -7.90 -> -8.00
1727   ASSERT_EQ(AsmFrintm(0xc01f99999999999aULL), MakeUInt128(0xc020000000000000, 0U));
1728 
1729   // 0 -> 0
1730   ASSERT_EQ(AsmFrintm(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1731 
1732   // -0 -> -0
1733   ASSERT_EQ(AsmFrintm(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1734 }
1735 
TEST(Arm64InsnTest,RoundToIntDownwardFp32)1736 TEST(Arm64InsnTest, RoundToIntDownwardFp32) {
1737   constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %s0, %s1");
1738 
1739   // 7.7 -> 7.00
1740   ASSERT_EQ(AsmFrintm(0x40f66666), 0x40e00000);
1741 
1742   // 7.1 -> 7.00
1743   ASSERT_EQ(AsmFrintm(0x40e33333), 0x40e00000);
1744 
1745   // -7.10 -> -8.00
1746   ASSERT_EQ(AsmFrintm(0xc0e33333), 0xc1000000);
1747 
1748   // -7.90 -> -8.00
1749   ASSERT_EQ(AsmFrintm(0xc0fccccd), 0xc1000000);
1750 
1751   // 0 -> 0
1752   ASSERT_EQ(AsmFrintm(0x00000000), 0x00000000);
1753 
1754   // -0 -> -0
1755   ASSERT_EQ(AsmFrintm(0x80000000), 0x80000000);
1756 }
1757 
TEST(Arm64InsnTest,RoundToIntNearestFp64)1758 TEST(Arm64InsnTest, RoundToIntNearestFp64) {
1759   constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %d0, %d1");
1760 
1761   // 7.5 -> 8.00 (ties to even)
1762   ASSERT_EQ(AsmFrintn(0x401e000000000000ULL), MakeUInt128(0x4020000000000000, 0U));
1763 
1764   // 8.5 -> 8.00 (ties to even)
1765   ASSERT_EQ(AsmFrintn(0x4021000000000000), MakeUInt128(0x4020000000000000, 0U));
1766 
1767   // 7.10 -> 7.00
1768   ASSERT_EQ(AsmFrintn(0x401c666666666666), MakeUInt128(0x401c000000000000, 0U));
1769 
1770   // 7.90 -> 8.00
1771   ASSERT_EQ(AsmFrintn(0x401f99999999999a), MakeUInt128(0x4020000000000000, 0U));
1772 
1773   // -7.5 -> -8.00 (ties to even)
1774   ASSERT_EQ(AsmFrintn(0xc01e000000000000), MakeUInt128(0xc020000000000000, 0U));
1775 
1776   // // -8.5 -> -8.00 (ties to even)
1777   ASSERT_EQ(AsmFrintn(0xc021000000000000), MakeUInt128(0xc020000000000000, 0U));
1778 
1779   // -7.10 -> -7.00
1780   ASSERT_EQ(AsmFrintn(0xc01c666666666666), MakeUInt128(0xc01c000000000000, 0U));
1781 
1782   // -7.90 -> -8.00
1783   ASSERT_EQ(AsmFrintn(0xc01f99999999999a), MakeUInt128(0xc020000000000000, 0U));
1784 
1785   // 0 -> 0
1786   ASSERT_EQ(AsmFrintn(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1787 
1788   // -0 -> -0
1789   ASSERT_EQ(AsmFrintn(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1790 }
1791 
TEST(Arm64InsnTest,RoundToIntToNearestFp32)1792 TEST(Arm64InsnTest, RoundToIntToNearestFp32) {
1793   constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %s0, %s1");
1794 
1795   // 7.5 -> 8.00 (ties to even)
1796   ASSERT_EQ(AsmFrintn(0x40f00000), 0x41000000);
1797 
1798   // 8.5 -> 8.00 (ties to even)
1799   ASSERT_EQ(AsmFrintn(0x41080000), 0x41000000);
1800 
1801   // 7.10 -> 7.00
1802   ASSERT_EQ(AsmFrintn(0x40e33333), 0x40e00000);
1803 
1804   // 7.90 -> 8.00
1805   ASSERT_EQ(AsmFrintn(0x40fccccd), 0x41000000);
1806 
1807   // -7.5 -> -8.00 (ties to even)
1808   ASSERT_EQ(AsmFrintn(0xc0f00000), 0xc1000000);
1809 
1810   // -8.5 -> -8.00 (ties to even)
1811   ASSERT_EQ(AsmFrintn(0xc1080000), 0xc1000000);
1812 
1813   // -7.10 -> -7.00
1814   ASSERT_EQ(AsmFrintn(0xc0e33333), 0xc0e00000);
1815 
1816   // -7.90 -> -8.00
1817   ASSERT_EQ(AsmFrintn(0xc0fccccd), 0xc1000000);
1818 
1819   // 0 -> 0
1820   ASSERT_EQ(AsmFrintn(0x00000000), 0x00000000);
1821 
1822   // -0 -> -0
1823   ASSERT_EQ(AsmFrintn(0x80000000), 0x80000000);
1824 }
1825 
TEST(Arm64InsnTest,RoundToIntTowardZeroFp64)1826 TEST(Arm64InsnTest, RoundToIntTowardZeroFp64) {
1827   constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %d0, %d1");
1828 
1829   // 7.7 -> 7.00
1830   ASSERT_EQ(AsmFrintz(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1831 
1832   // 7.1 -> 7.00
1833   ASSERT_EQ(AsmFrintz(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1834 
1835   // -7.10 -> -7.00
1836   ASSERT_EQ(AsmFrintz(0xc01c666666666666ULL), MakeUInt128(0xc01c000000000000, 0U));
1837 
1838   // -7.90 -> -7.00
1839   ASSERT_EQ(AsmFrintz(0xc01f99999999999aULL), MakeUInt128(0xc01c000000000000, 0U));
1840 
1841   // 0 -> 0
1842   ASSERT_EQ(AsmFrintz(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1843 
1844   // -0 -> -0
1845   ASSERT_EQ(AsmFrintz(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1846 }
1847 
TEST(Arm64InsnTest,RoundToIntTowardZeroFp32)1848 TEST(Arm64InsnTest, RoundToIntTowardZeroFp32) {
1849   constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %s0, %s1");
1850 
1851   // 7.7 -> 7.00
1852   ASSERT_EQ(AsmFrintz(0x40f66666), 0x40e00000);
1853 
1854   // 7.1 -> 7.00
1855   ASSERT_EQ(AsmFrintz(0x40e33333), 0x40e00000);
1856 
1857   // -7.10 -> -7.00
1858   ASSERT_EQ(AsmFrintz(0xc0e33333), 0xc0e00000);
1859 
1860   // -7.90 -> -7.00
1861   ASSERT_EQ(AsmFrintz(0xc0fccccd), 0xc0e00000);
1862 
1863   // 0 -> 0
1864   ASSERT_EQ(AsmFrintz(0x00000000), 0x00000000);
1865 
1866   // -0 -> -0
1867   ASSERT_EQ(AsmFrintz(0x80000000), 0x80000000);
1868 }
1869 
TEST(Arm64InsnTest,AsmConvertF32x4TieAway)1870 TEST(Arm64InsnTest, AsmConvertF32x4TieAway) {
1871   constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.4s, %1.4s");
1872   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1873   ASSERT_EQ(AsmFcvta(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -6.00f));
1874   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1875   ASSERT_EQ(AsmFcvta(arg2), MakeF32x4(6.00f, 7.00f, 7.00f, 8.00f));
1876 }
1877 
TEST(Arm64InsnTest,AsmConvertF32x4NegInf)1878 TEST(Arm64InsnTest, AsmConvertF32x4NegInf) {
1879   constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.4s, %1.4s");
1880   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1881   ASSERT_EQ(AsmFcvtm(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
1882   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1883   ASSERT_EQ(AsmFcvtm(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1884 }
1885 
TEST(Arm64InsnTest,AsmConvertF32x4TieEven)1886 TEST(Arm64InsnTest, AsmConvertF32x4TieEven) {
1887   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.4s, %1.4s");
1888   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1889   ASSERT_EQ(AsmFcvtn(arg1), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
1890   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1891   ASSERT_EQ(AsmFcvtn(arg2), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
1892 }
1893 
TEST(Arm64InsnTest,AsmConvertF32x4PosInf)1894 TEST(Arm64InsnTest, AsmConvertF32x4PosInf) {
1895   constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.4s, %1.4s");
1896   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1897   ASSERT_EQ(AsmFcvtp(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1898   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1899   ASSERT_EQ(AsmFcvtp(arg2), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
1900 }
1901 
TEST(Arm64InsnTest,AsmConvertF32x4Truncate)1902 TEST(Arm64InsnTest, AsmConvertF32x4Truncate) {
1903   constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.4s, %1.4s");
1904   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1905   ASSERT_EQ(AsmFcvtz(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1906   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1907   ASSERT_EQ(AsmFcvtz(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1908 }
1909 
TEST(Arm64InsnTest,AsmConvertF64x4TieAway)1910 TEST(Arm64InsnTest, AsmConvertF64x4TieAway) {
1911   constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.2d, %1.2d");
1912   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1913   ASSERT_EQ(AsmFcvta(arg1), MakeF64x2(-8.00, -7.00));
1914   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1915   ASSERT_EQ(AsmFcvta(arg2), MakeF64x2(-7.00, -6.00));
1916   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1917   ASSERT_EQ(AsmFcvta(arg3), MakeF64x2(6.00, 7.00));
1918   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1919   ASSERT_EQ(AsmFcvta(arg4), MakeF64x2(7.00, 8.00));
1920 }
1921 
TEST(Arm64InsnTest,AsmConvertF64x4NegInf)1922 TEST(Arm64InsnTest, AsmConvertF64x4NegInf) {
1923   constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.2d, %1.2d");
1924   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1925   ASSERT_EQ(AsmFcvtm(arg1), MakeF64x2(-8.00, -7.00));
1926   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1927   ASSERT_EQ(AsmFcvtm(arg2), MakeF64x2(-7.00, -7.00));
1928   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1929   ASSERT_EQ(AsmFcvtm(arg3), MakeF64x2(6.00, 6.00));
1930   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1931   ASSERT_EQ(AsmFcvtm(arg4), MakeF64x2(6.00, 7.00));
1932 }
1933 
TEST(Arm64InsnTest,AsmConvertF64x4TieEven)1934 TEST(Arm64InsnTest, AsmConvertF64x4TieEven) {
1935   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.2d, %1.2d");
1936   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1937   ASSERT_EQ(AsmFcvtn(arg1), MakeF64x2(-8.00, -7.00));
1938   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1939   ASSERT_EQ(AsmFcvtn(arg2), MakeF64x2(-6.00, -6.00));
1940   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1941   ASSERT_EQ(AsmFcvtn(arg3), MakeF64x2(6.00, 6.00));
1942   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1943   ASSERT_EQ(AsmFcvtn(arg4), MakeF64x2(7.00, 8.00));
1944 }
1945 
TEST(Arm64InsnTest,AsmConvertF64x4PosInf)1946 TEST(Arm64InsnTest, AsmConvertF64x4PosInf) {
1947   constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.2d, %1.2d");
1948   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1949   ASSERT_EQ(AsmFcvtp(arg1), MakeF64x2(-7.00, -6.00));
1950   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1951   ASSERT_EQ(AsmFcvtp(arg2), MakeF64x2(-6.00, -6.00));
1952   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1953   ASSERT_EQ(AsmFcvtp(arg3), MakeF64x2(7.00, 7.00));
1954   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1955   ASSERT_EQ(AsmFcvtp(arg4), MakeF64x2(7.00, 8.00));
1956 }
1957 
TEST(Arm64InsnTest,AsmConvertF64x4Truncate)1958 TEST(Arm64InsnTest, AsmConvertF64x4Truncate) {
1959   constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.2d, %1.2d");
1960   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1961   ASSERT_EQ(AsmFcvtz(arg1), MakeF64x2(-7.00, -6.00));
1962   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1963   ASSERT_EQ(AsmFcvtz(arg2), MakeF64x2(-6.00, -6.00));
1964   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1965   ASSERT_EQ(AsmFcvtz(arg3), MakeF64x2(6.00, 6.00));
1966   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1967   ASSERT_EQ(AsmFcvtz(arg4), MakeF64x2(6.00, 7.00));
1968 }
1969 
TEST(Arm64InsnTest,AsmRoundCurrentModeF32)1970 TEST(Arm64InsnTest, AsmRoundCurrentModeF32) {
1971   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %s0, %s1");
1972   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
1973   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
1974   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1975   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1976   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1977   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1978   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
1979   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
1980   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
1981   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1982   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1983   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1984   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1985   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1986   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1987   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
1988   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
1989   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1990   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1991   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1992   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1993   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1994   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1995   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
1996   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
1997   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1998   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1999   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2000   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2001   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2002   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2003   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
2004 }
2005 
TEST(Arm64InsnTest,AsmRoundCurrentModeF64)2006 TEST(Arm64InsnTest, AsmRoundCurrentModeF64) {
2007   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %d0, %d1");
2008   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
2009   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
2010   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2011   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2012   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2013   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2014   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
2015   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
2016   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
2017   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2018   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2019   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2020   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2021   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2022   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2023   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
2024   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
2025   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2026   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2027   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2028   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2029   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2030   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2031   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
2032   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
2033   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2034   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2035   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2036   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2037   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2038   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2039   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
2040 }
2041 
TEST(Arm64InsnTest,AsmRoundCurrentModeF32x4)2042 TEST(Arm64InsnTest, AsmRoundCurrentModeF32x4) {
2043   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.4s, %1.4s");
2044   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2045   ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2046   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2047   ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2048   __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2049   ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2050   __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2051   ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2052   __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2053   ASSERT_EQ(AsmFrinti(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2054   __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2055   ASSERT_EQ(AsmFrinti(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2056   __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2057   ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2058   __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2059   ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2060 }
2061 
TEST(Arm64InsnTest,AsmRoundCurrentModeF64x2)2062 TEST(Arm64InsnTest, AsmRoundCurrentModeF64x2) {
2063   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.2d, %1.2d");
2064   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2065   ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2066   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2067   ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2068   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2069   ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2070   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2071   ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2072   __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2073   ASSERT_EQ(AsmFrinti(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2074   __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2075   ASSERT_EQ(AsmFrinti(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2076   __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2077   ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2078   __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2079   ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2080   __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2081   ASSERT_EQ(AsmFrinti(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2082   __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2083   ASSERT_EQ(AsmFrinti(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2084   __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2085   ASSERT_EQ(AsmFrinti(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2086   __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2087   ASSERT_EQ(AsmFrinti(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2088   __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2089   ASSERT_EQ(AsmFrinti(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2090   __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2091   ASSERT_EQ(AsmFrinti(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2092   __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2093   ASSERT_EQ(AsmFrinti(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2094   __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2095   ASSERT_EQ(AsmFrinti(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2096 }
2097 
TEST(Arm64InsnTest,AsmRoundExactF32)2098 TEST(Arm64InsnTest, AsmRoundExactF32) {
2099   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %s0, %s1");
2100   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
2101   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
2102   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2103   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2104   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2105   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2106   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
2107   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
2108   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
2109   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2110   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2111   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2112   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2113   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2114   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2115   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
2116   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
2117   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2118   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2119   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2120   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2121   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2122   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2123   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
2124   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
2125   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2126   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2127   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2128   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2129   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2130   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2131   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
2132 }
2133 
TEST(Arm64InsnTest,AsmRoundExactF64)2134 TEST(Arm64InsnTest, AsmRoundExactF64) {
2135   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %d0, %d1");
2136   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
2137   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
2138   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2139   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2140   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2141   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2142   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
2143   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
2144   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
2145   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2146   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2147   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2148   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2149   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2150   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2151   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
2152   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
2153   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2154   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2155   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2156   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2157   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2158   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2159   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
2160   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
2161   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2162   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2163   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2164   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2165   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2166   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2167   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
2168 }
2169 
TEST(Arm64InsnTest,AsmRoundExactF32x4)2170 TEST(Arm64InsnTest, AsmRoundExactF32x4) {
2171   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.4s, %1.4s");
2172   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2173   ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2174   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2175   ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2176   __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2177   ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2178   __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2179   ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2180   __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2181   ASSERT_EQ(AsmFrintx(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2182   __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2183   ASSERT_EQ(AsmFrintx(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2184   __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2185   ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2186   __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2187   ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2188 }
2189 
TEST(Arm64InsnTest,AsmRoundExactF64x2)2190 TEST(Arm64InsnTest, AsmRoundExactF64x2) {
2191   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.2d, %1.2d");
2192   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2193   ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2194   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2195   ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2196   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2197   ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2198   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2199   ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2200   __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2201   ASSERT_EQ(AsmFrintx(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2202   __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2203   ASSERT_EQ(AsmFrintx(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2204   __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2205   ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2206   __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2207   ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2208   __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2209   ASSERT_EQ(AsmFrintx(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2210   __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2211   ASSERT_EQ(AsmFrintx(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2212   __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2213   ASSERT_EQ(AsmFrintx(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2214   __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2215   ASSERT_EQ(AsmFrintx(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2216   __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2217   ASSERT_EQ(AsmFrintx(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2218   __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2219   ASSERT_EQ(AsmFrintx(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2220   __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2221   ASSERT_EQ(AsmFrintx(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2222   __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2223   ASSERT_EQ(AsmFrintx(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2224 }
2225 
Fp32Compare(uint64_t arg1,uint64_t arg2)2226 uint64_t Fp32Compare(uint64_t arg1, uint64_t arg2) {
2227   uint64_t res;
2228   asm("fcmp %s1, %s2\n\t"
2229       "mrs %x0, nzcv"
2230       : "=r"(res)
2231       : "w"(arg1), "w"(arg2));
2232   return res;
2233 }
2234 
Fp64Compare(uint64_t arg1,uint64_t arg2)2235 uint64_t Fp64Compare(uint64_t arg1, uint64_t arg2) {
2236   uint64_t res;
2237   asm("fcmp %d1, %d2\n\t"
2238       "mrs %x0, nzcv"
2239       : "=r"(res)
2240       : "w"(arg1), "w"(arg2));
2241   return res;
2242 }
2243 
MakeNZCV(uint64_t nzcv)2244 constexpr uint64_t MakeNZCV(uint64_t nzcv) {
2245   return nzcv << 28;
2246 }
2247 
TEST(Arm64InsnTest,Fp32Compare)2248 TEST(Arm64InsnTest, Fp32Compare) {
2249   // NaN and 1.83
2250   ASSERT_EQ(Fp32Compare(0x7fc00000ULL, 0x3fea3d71ULL), MakeNZCV(0b0011));
2251 
2252   // 6.31 == 6.31
2253   ASSERT_EQ(Fp32Compare(0x40c9eb85ULL, 0x40c9eb85ULL), MakeNZCV(0b0110));
2254 
2255   // 1.23 < 2.34
2256   ASSERT_EQ(Fp32Compare(0x3f9d70a4ULL, 0x4015c28fULL), MakeNZCV(0b1000));
2257 
2258   // 5.25 > 2.94
2259   ASSERT_EQ(Fp32Compare(0x40a80000ULL, 0x403c28f6ULL), MakeNZCV(0b0010));
2260 }
2261 
TEST(Arm64InsnTest,Fp32CompareZero)2262 TEST(Arm64InsnTest, Fp32CompareZero) {
2263   constexpr auto Fp32CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2264       "fcmp %s1, #0.0\n\t"
2265       "mrs %x0, nzcv");
2266 
2267   // NaN and 0.00
2268   ASSERT_EQ(Fp32CompareZero(0x7fa00000ULL), MakeNZCV(0b0011));
2269 
2270   // 0.00 == 0.00
2271   ASSERT_EQ(Fp32CompareZero(0x00000000ULL), MakeNZCV(0b0110));
2272 
2273   // -2.67 < 0.00
2274   ASSERT_EQ(Fp32CompareZero(0xc02ae148ULL), MakeNZCV(0b1000));
2275 
2276   // 1.56 > 0.00
2277   ASSERT_EQ(Fp32CompareZero(0x3fc7ae14ULL), MakeNZCV(0b0010));
2278 }
2279 
TEST(Arm64InsnTest,Fp64Compare)2280 TEST(Arm64InsnTest, Fp64Compare) {
2281   // NaN and 1.19
2282   ASSERT_EQ(Fp64Compare(0x7ff8000000000000ULL, 0x3ff30a3d70a3d70aULL), MakeNZCV(0b0011));
2283 
2284   // 8.42 == 8.42
2285   ASSERT_EQ(Fp64Compare(0x4020d70a3d70a3d7ULL, 0x4020d70a3d70a3d7ULL), MakeNZCV(0b0110));
2286 
2287   // 0.50 < 1.00
2288   ASSERT_EQ(Fp64Compare(0x3fe0000000000000ULL, 0x3ff0000000000000ULL), MakeNZCV(0b1000));
2289 
2290   // 7.38 > 1.54
2291   ASSERT_EQ(Fp64Compare(0x401d851eb851eb85ULL, 0x3ff8a3d70a3d70a4ULL), MakeNZCV(0b0010));
2292 }
2293 
TEST(Arm64InsnTest,Fp64CompareZero)2294 TEST(Arm64InsnTest, Fp64CompareZero) {
2295   constexpr auto Fp64CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2296       "fcmp %d1, #0.0\n\t"
2297       "mrs %x0, nzcv");
2298 
2299   // NaN and 0.00
2300   ASSERT_EQ(Fp64CompareZero(0x7ff4000000000000ULL), MakeNZCV(0b0011));
2301 
2302   // 0.00 == 0.00
2303   ASSERT_EQ(Fp64CompareZero(0x0000000000000000ULL), MakeNZCV(0b0110));
2304 
2305   // -7.23 < 0.00
2306   ASSERT_EQ(Fp64CompareZero(0xc01ceb851eb851ecULL), MakeNZCV(0b1000));
2307 
2308   // 5.39 > 0.00
2309   ASSERT_EQ(Fp64CompareZero(0x40158f5c28f5c28fULL), MakeNZCV(0b0010));
2310 }
2311 
Fp32CompareIfEqualOrSetAllFlags(float arg1,float arg2,uint64_t nzcv)2312 uint64_t Fp32CompareIfEqualOrSetAllFlags(float arg1, float arg2, uint64_t nzcv) {
2313   asm("msr nzcv, %x0\n\t"
2314       "fccmp %s2, %s3, #15, eq\n\t"
2315       "mrs %x0, nzcv\n\t"
2316       : "=r"(nzcv)
2317       : "0"(nzcv), "w"(arg1), "w"(arg2));
2318   return nzcv;
2319 }
2320 
TEST(Arm64InsnTest,Fp32ConditionalCompare)2321 TEST(Arm64InsnTest, Fp32ConditionalCompare) {
2322   // Comparison is performed.
2323   constexpr uint64_t kEqual = MakeNZCV(0b0100);
2324   constexpr float kNan = std::numeric_limits<float>::quiet_NaN();
2325   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kEqual), MakeNZCV(0b0110));
2326   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kEqual), MakeNZCV(0b1000));
2327   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kEqual), MakeNZCV(0b0010));
2328   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kEqual), MakeNZCV(0b0011));
2329   // Comparison is not performed; alt-nzcv is returned.
2330   constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2331   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2332   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kNotEqual), MakeNZCV(0b1111));
2333   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2334   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2335 }
2336 
Fp64CompareIfEqualOrSetAllFlags(double arg1,double arg2,uint64_t nzcv)2337 uint64_t Fp64CompareIfEqualOrSetAllFlags(double arg1, double arg2, uint64_t nzcv) {
2338   asm("msr nzcv, %x0\n\t"
2339       "fccmp %d2, %d3, #15, eq\n\t"
2340       "mrs %x0, nzcv\n\t"
2341       : "=r"(nzcv)
2342       : "0"(nzcv), "w"(arg1), "w"(arg2));
2343   return nzcv;
2344 }
2345 
TEST(Arm64InsnTest,Fp64ConditionalCompare)2346 TEST(Arm64InsnTest, Fp64ConditionalCompare) {
2347   // Comparison is performed.
2348   constexpr uint64_t kEqual = MakeNZCV(0b0100);
2349   constexpr double kNan = std::numeric_limits<double>::quiet_NaN();
2350   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kEqual), MakeNZCV(0b0110));
2351   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kEqual), MakeNZCV(0b1000));
2352   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kEqual), MakeNZCV(0b0010));
2353   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0, kEqual), MakeNZCV(0b0011));
2354   // Comparison is not performed; alt-nzcv is returned.
2355   constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2356   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2357   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kNotEqual), MakeNZCV(0b1111));
2358   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2359   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2360 }
2361 
TEST(Arm64InsnTest,ConvertFp32ToFp64)2362 TEST(Arm64InsnTest, ConvertFp32ToFp64) {
2363   uint64_t arg = 0x40cd70a4ULL;  // 6.42 in float
2364   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %s1")(arg);
2365   ASSERT_EQ(res, MakeUInt128(0x4019ae1480000000ULL, 0U));
2366 }
2367 
TEST(Arm64InsnTest,ConvertFp64ToFp32)2368 TEST(Arm64InsnTest, ConvertFp64ToFp32) {
2369   uint64_t arg = 0x401a0a3d70a3d70aULL;  // 6.51 in double
2370   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %d1")(arg);
2371   ASSERT_EQ(res, MakeUInt128(0x40d051ecULL, 0U));
2372 }
2373 
TEST(Arm64InsnTest,ConvertFp32ToFp16)2374 TEST(Arm64InsnTest, ConvertFp32ToFp16) {
2375   constexpr auto AsmFcvt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %s1");
2376   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(2.5f)), MakeUInt128(0x4100U, 0U));
2377   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(4.5f)), MakeUInt128(0x4480U, 0U));
2378   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(8.5f)), MakeUInt128(0x4840U, 0U));
2379   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(16.5f)), MakeUInt128(0x4c20U, 0U));
2380 }
2381 
TEST(Arm64InsnTest,ConvertFp16ToFp32)2382 TEST(Arm64InsnTest, ConvertFp16ToFp32) {
2383   uint64_t arg = 0x4100U;
2384   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %h1")(arg);
2385   ASSERT_EQ(res, bit_cast<uint32_t>(2.5f));
2386 }
2387 
TEST(Arm64InsnTest,ConvertFp64ToFp16)2388 TEST(Arm64InsnTest, ConvertFp64ToFp16) {
2389   uint64_t arg = bit_cast<uint64_t>(2.5);
2390   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %d1")(arg);
2391   ASSERT_EQ(res, MakeUInt128(0x4100U, 0U));
2392 }
2393 
TEST(Arm64InsnTest,ConvertFp16ToFp64)2394 TEST(Arm64InsnTest, ConvertFp16ToFp64) {
2395   uint64_t arg = 0x4100U;
2396   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %h1")(arg);
2397   ASSERT_EQ(res, bit_cast<uint64_t>(2.5));
2398 }
2399 
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2)2400 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2) {
2401   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.2s, %1.2d");
2402   ASSERT_EQ(AsmFcvtn(MakeF64x2(2.0, 3.0)), MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2403   // Overflow or inf arguments result in inf.
2404   __uint128_t res = AsmFcvtn(
2405       MakeF64x2(std::numeric_limits<double>::max(), std::numeric_limits<double>::infinity()));
2406   ASSERT_EQ(res,
2407             MakeF32x4(std::numeric_limits<float>::infinity(),
2408                       std::numeric_limits<float>::infinity(),
2409                       0.0f,
2410                       0.0f));
2411   res = AsmFcvtn(
2412       MakeF64x2(std::numeric_limits<double>::lowest(), -std::numeric_limits<double>::infinity()));
2413   ASSERT_EQ(res,
2414             MakeF32x4(-std::numeric_limits<float>::infinity(),
2415                       -std::numeric_limits<float>::infinity(),
2416                       0.0f,
2417                       0.0f));
2418 }
2419 
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2Upper)2420 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2Upper) {
2421   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.4s, %1.2d");
2422   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2423   __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2424   ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2425 }
2426 
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32)2427 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32) {
2428   constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %s0, %d1");
2429   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(2.0)), bit_cast<uint32_t>(2.0f));
2430   // Overflow is saturated.
2431   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::max())),
2432             bit_cast<uint32_t>(std::numeric_limits<float>::max()));
2433   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::lowest())),
2434             bit_cast<uint32_t>(std::numeric_limits<float>::lowest()));
2435   // inf is converted to inf.
2436   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::infinity())),
2437             bit_cast<uint32_t>(std::numeric_limits<float>::infinity()));
2438   // -inf is converted to -inf.
2439   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(-std::numeric_limits<double>::infinity())),
2440             bit_cast<uint32_t>(-std::numeric_limits<float>::infinity()));
2441 }
2442 
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2)2443 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2) {
2444   constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %0.2s, %1.2d");
2445   __uint128_t res = AsmFcvtxn(MakeF64x2(2.0, 3.0));
2446   ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2447 }
2448 
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2Upper)2449 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2Upper) {
2450   constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtxn2 %0.4s, %1.2d");
2451   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2452   __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2453   ASSERT_EQ(AsmFcvtxn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2454 }
2455 
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Lower)2456 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Lower) {
2457   constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.2d, %1.2s");
2458   __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2459   ASSERT_EQ(AsmFcvtl(arg), MakeF64x2(2.0, 3.0));
2460 }
2461 
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Upper)2462 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Upper) {
2463   constexpr auto AsmFcvtl2 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.2d, %1.4s");
2464   __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2465   ASSERT_EQ(AsmFcvtl2(arg), MakeF64x2(4.0, 5.0));
2466 }
2467 
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Lower)2468 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Lower) {
2469   constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.4s, %1.4h");
2470   // 4xF16 in the lower half.
2471   __uint128_t arg = MakeUInt128(0x4c20'4840'4480'4100ULL, 0);
2472   ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2473 }
2474 
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Upper)2475 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Upper) {
2476   constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.4s, %1.8h");
2477   // 4xF16 in the upper half.
2478   __uint128_t arg = MakeUInt128(0, 0x4c20'4840'4480'4100ULL);
2479   ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2480 }
2481 
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Lower)2482 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Lower) {
2483   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.4h, %1.4s");
2484   __uint128_t arg = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2485   // 4xF16 in the lower half.
2486   ASSERT_EQ(AsmFcvtn(arg), MakeUInt128(0x4c20'4840'4480'4100ULL, 0));
2487 }
2488 
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Upper)2489 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Upper) {
2490   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.8h, %1.4s");
2491   __uint128_t arg1 = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2492   __uint128_t arg2 = MakeF32x4(3.0f, 5.0f, 7.0f, 11.0f);
2493   // 4xF16 in the upper half, lower half preserved.
2494   ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeUInt128(uint64_t(arg2), 0x4c20'4840'4480'4100ULL));
2495 }
2496 
TEST(Arm64InsnTest,AbsF32)2497 TEST(Arm64InsnTest, AbsF32) {
2498   uint32_t arg = 0xc1273333U;  // -10.45 in float
2499   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %s0, %s1")(arg);
2500   ASSERT_EQ(res, MakeUInt128(0x41273333ULL, 0U));  // 10.45 in float
2501 }
2502 
TEST(Arm64InsnTest,AbsF64)2503 TEST(Arm64InsnTest, AbsF64) {
2504   uint64_t arg = 0xc03de8f5c28f5c29ULL;  // -29.91 in double
2505   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %d0, %d1")(arg);
2506   ASSERT_EQ(res, MakeUInt128(0x403de8f5c28f5c29ULL, 0U));  // 29.91 in double
2507 }
2508 
TEST(Arm64InsnTest,AbsF32x4)2509 TEST(Arm64InsnTest, AbsF32x4) {
2510   constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.4s, %1.4s");
2511   __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 3.0f, -7.0f);
2512   ASSERT_EQ(AsmFabs(arg), MakeF32x4(0.0f, 0.0f, 3.0f, 7.0f));
2513 }
2514 
TEST(Arm64InsnTest,AbsF64x2)2515 TEST(Arm64InsnTest, AbsF64x2) {
2516   constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.2d, %1.2d");
2517   __uint128_t arg = MakeF64x2(-0.0, 3.0);
2518   ASSERT_EQ(AsmFabs(arg), MakeF64x2(0.0, 3.0));
2519 }
2520 
TEST(Arm64InsnTest,AbdF32)2521 TEST(Arm64InsnTest, AbdF32) {
2522   uint32_t arg1 = 0x4181851fU;  // 16.19 in float
2523   uint32_t arg2 = 0x41211eb8U;  // 10.06 in float
2524   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %s0, %s1, %s2")(arg1, arg2);
2525   ASSERT_EQ(res, MakeUInt128(0x40c3d70cULL, 0U));  // 6.12 in float
2526 }
2527 
TEST(Arm64InsnTest,AbdF64)2528 TEST(Arm64InsnTest, AbdF64) {
2529   constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %d0, %d1, %d2");
2530   uint64_t arg1 = 0x403828f5c28f5c29U;  // 24.16 in double
2531   uint64_t arg2 = 0x4027d70a3d70a3d7U;  // 11.92 in double
2532   __uint128_t res = AsmFabd(arg1, arg2);
2533   ASSERT_EQ(res, MakeUInt128(0x40287ae147ae147bULL, 0U));  // 12.24 in double
2534 }
2535 
TEST(Arm64InsnTest,AbdF32x4)2536 TEST(Arm64InsnTest, AbdF32x4) {
2537   constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.4s, %1.4s, %2.4s");
2538   __uint128_t arg1 = MakeF32x4(1.0f, 5.0f, -3.0f, -2.0f);
2539   __uint128_t arg2 = MakeF32x4(-1.0f, 2.0f, -5.0f, 3.0f);
2540   __uint128_t res = AsmFabd(arg1, arg2);
2541   ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 2.0f, 5.0f));
2542 }
2543 
TEST(Arm64InsnTest,AbdF64x2)2544 TEST(Arm64InsnTest, AbdF64x2) {
2545   constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.2d, %1.2d, %2.2d");
2546   __uint128_t arg1 = MakeF64x2(5.0, -2.0);
2547   __uint128_t arg2 = MakeF64x2(4.0, 3.0);
2548   __uint128_t res = AsmFabd(arg1, arg2);
2549   ASSERT_EQ(res, MakeF64x2(1.0, 5.0));
2550 }
2551 
TEST(Arm64InsnTest,NegF32)2552 TEST(Arm64InsnTest, NegF32) {
2553   uint32_t arg = 0x40eeb852U;  // 7.46 in float
2554   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %s0, %s1")(arg);
2555   ASSERT_EQ(res, MakeUInt128(0xc0eeb852ULL, 0U));  // -7.46 in float
2556 }
2557 
TEST(Arm64InsnTest,NegF64)2558 TEST(Arm64InsnTest, NegF64) {
2559   uint64_t arg = 0x4054b28f5c28f5c3ULL;  // 82.79 in double
2560   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %d0, %d1")(arg);
2561   ASSERT_EQ(res, MakeUInt128(0xc054b28f5c28f5c3ULL, 0U));  // -82.79 in double
2562 }
2563 
TEST(Arm64InsnTest,NegF32x4)2564 TEST(Arm64InsnTest, NegF32x4) {
2565   constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.4s, %1.4s");
2566   __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 1.0f, -3.0f);
2567   ASSERT_EQ(AsmFneg(arg), MakeF32x4(0.0f, -0.0f, -1.0f, 3.0f));
2568 }
2569 
TEST(Arm64InsnTest,NegF64x2)2570 TEST(Arm64InsnTest, NegF64x2) {
2571   constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.2d, %1.2d");
2572   __uint128_t arg = MakeF64x2(0.0, 3.0);
2573   ASSERT_EQ(AsmFneg(arg), MakeF64x2(-0.0, -3.0));
2574 }
2575 
TEST(Arm64InsnTest,SqrtF32)2576 TEST(Arm64InsnTest, SqrtF32) {
2577   uint32_t arg = 0x41f3cac1U;  // 30.474 in float
2578   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %s0, %s1")(arg);
2579   ASSERT_EQ(res, MakeUInt128(0x40b0a683ULL, 0U));  // 5.5203261 in float
2580 }
2581 
TEST(Arm64InsnTest,SqrtF64)2582 TEST(Arm64InsnTest, SqrtF64) {
2583   uint64_t arg = 0x403d466666666666ULL;  // 29.275 in double
2584   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %d0, %d1")(arg);
2585   ASSERT_EQ(res, MakeUInt128(0x4015a47e3392efb8ULL, 0U));  // 5.41... in double
2586 }
2587 
TEST(Arm64InsnTest,SqrtF32x4)2588 TEST(Arm64InsnTest, SqrtF32x4) {
2589   constexpr auto AsmSqrt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %0.4s, %1.4s");
2590   __uint128_t arg = MakeF32x4(0.0f, 1.0f, 4.0f, 9.0f);
2591   ASSERT_EQ(AsmSqrt(arg), MakeF32x4(0.0f, 1.0f, 2.0f, 3.0f));
2592 }
2593 
TEST(Arm64InsnTest,RecipEstimateF32)2594 TEST(Arm64InsnTest, RecipEstimateF32) {
2595   constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %s0, %s1");
2596   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.25f)), bit_cast<uint32_t>(3.9921875f));
2597   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.50f)), bit_cast<uint32_t>(1.99609375f));
2598   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(2.00f)), bit_cast<uint32_t>(0.4990234375f));
2599   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(4.00f)), bit_cast<uint32_t>(0.24951171875f));
2600 }
2601 
TEST(Arm64InsnTest,RecipEstimateF32x4)2602 TEST(Arm64InsnTest, RecipEstimateF32x4) {
2603   constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %0.4s, %1.4s");
2604   __uint128_t res = AsmFrecpe(MakeF32x4(0.25f, 0.50f, 2.00f, 4.00f));
2605   ASSERT_EQ(res, MakeF32x4(3.9921875f, 1.99609375f, 0.4990234375f, 0.24951171875f));
2606 }
2607 
TEST(Arm64InsnTest,RecipStepF32)2608 TEST(Arm64InsnTest, RecipStepF32) {
2609   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %s0, %s1, %s2");
2610   __uint128_t res1 = AsmFrecps(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2611   ASSERT_EQ(res1, bit_cast<uint32_t>(1.25f));
2612   __uint128_t res2 = AsmFrecps(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2613   ASSERT_EQ(res2, bit_cast<uint32_t>(1.00f));
2614   __uint128_t res3 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2615   ASSERT_EQ(res3, bit_cast<uint32_t>(1.25f));
2616   __uint128_t res4 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2617   ASSERT_EQ(res4, bit_cast<uint32_t>(0.50f));
2618 }
2619 
TEST(Arm64InsnTest,RecipStepF64)2620 TEST(Arm64InsnTest, RecipStepF64) {
2621   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %d0, %d1, %d2");
2622   __uint128_t res1 = AsmFrecps(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2623   ASSERT_EQ(res1, bit_cast<uint64_t>(1.25));
2624   __uint128_t res2 = AsmFrecps(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2625   ASSERT_EQ(res2, bit_cast<uint64_t>(1.00));
2626   __uint128_t res3 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2627   ASSERT_EQ(res3, bit_cast<uint64_t>(1.25));
2628   __uint128_t res4 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2629   ASSERT_EQ(res4, bit_cast<uint64_t>(0.50));
2630 }
2631 
TEST(Arm64InsnTest,RecipStepF32x4)2632 TEST(Arm64InsnTest, RecipStepF32x4) {
2633   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.4s, %1.4s, %2.4s");
2634   __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2635   __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2636   __uint128_t res = AsmFrecps(arg1, arg2);
2637   ASSERT_EQ(res, MakeF32x4(1.25f, 1.00f, 1.25f, 0.50f));
2638 }
2639 
TEST(Arm64InsnTest,RecipStepF64x2)2640 TEST(Arm64InsnTest, RecipStepF64x2) {
2641   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.2d, %1.2d, %2.2d");
2642   __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2643   __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2644   ASSERT_EQ(AsmFrecps(arg1, arg2), MakeF64x2(1.25, 1.00));
2645   __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2646   __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2647   ASSERT_EQ(AsmFrecps(arg3, arg4), MakeF64x2(1.25, 0.50));
2648 }
2649 
TEST(Arm64InsnTest,RecipSqrtEstimateF32)2650 TEST(Arm64InsnTest, RecipSqrtEstimateF32) {
2651   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %s0, %s1");
2652   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(2.0f)), bit_cast<uint32_t>(0.705078125f));
2653   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(3.0f)), bit_cast<uint32_t>(0.576171875f));
2654   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(4.0f)), bit_cast<uint32_t>(0.4990234375f));
2655   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(5.0f)), bit_cast<uint32_t>(0.4462890625f));
2656 }
2657 
TEST(Arm64InsnTest,RecipSqrtEstimateF32x4)2658 TEST(Arm64InsnTest, RecipSqrtEstimateF32x4) {
2659   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.4s, %1.4s");
2660   __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2661   __uint128_t res = AsmFrsqrte(arg);
2662   ASSERT_EQ(res, MakeF32x4(0.705078125f, 0.576171875f, 0.4990234375f, 0.4462890625f));
2663 }
2664 
TEST(Arm64InsnTest,RecipSqrtEstimateF64)2665 TEST(Arm64InsnTest, RecipSqrtEstimateF64) {
2666   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %d0, %d1");
2667   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(2.0)), bit_cast<uint64_t>(0.705078125));
2668   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(3.0)), bit_cast<uint64_t>(0.576171875));
2669   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(4.0)), bit_cast<uint64_t>(0.4990234375));
2670   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(5.0)), bit_cast<uint64_t>(0.4462890625));
2671 }
2672 
TEST(Arm64InsnTest,RecipSqrtEstimateF64x2)2673 TEST(Arm64InsnTest, RecipSqrtEstimateF64x2) {
2674   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.2d, %1.2d");
2675   __uint128_t arg = MakeF64x2(2.0, 3.0);
2676   __uint128_t res = AsmFrsqrte(arg);
2677   ASSERT_EQ(res, MakeUInt128(bit_cast<uint64_t>(0.705078125), bit_cast<uint64_t>(0.576171875)));
2678 }
2679 
TEST(Arm64InsnTest,RecipSqrtStepF32)2680 TEST(Arm64InsnTest, RecipSqrtStepF32) {
2681   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %s0, %s1, %s2");
2682   __uint128_t res1 = AsmFrsqrts(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2683   ASSERT_EQ(res1, bit_cast<uint32_t>(1.125f));
2684   __uint128_t res2 = AsmFrsqrts(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2685   ASSERT_EQ(res2, bit_cast<uint32_t>(1.000f));
2686   __uint128_t res3 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2687   ASSERT_EQ(res3, bit_cast<uint32_t>(1.125f));
2688   __uint128_t res4 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2689   ASSERT_EQ(res4, bit_cast<uint32_t>(0.750f));
2690 }
2691 
TEST(Arm64InsnTest,RecipSqrtStepF64)2692 TEST(Arm64InsnTest, RecipSqrtStepF64) {
2693   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %d0, %d1, %d2");
2694   __uint128_t res1 = AsmFrsqrts(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2695   ASSERT_EQ(res1, bit_cast<uint64_t>(1.125));
2696   __uint128_t res2 = AsmFrsqrts(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2697   ASSERT_EQ(res2, bit_cast<uint64_t>(1.000));
2698   __uint128_t res3 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2699   ASSERT_EQ(res3, bit_cast<uint64_t>(1.125));
2700   __uint128_t res4 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2701   ASSERT_EQ(res4, bit_cast<uint64_t>(0.750));
2702 }
2703 
TEST(Arm64InsnTest,RecipSqrtStepF32x4)2704 TEST(Arm64InsnTest, RecipSqrtStepF32x4) {
2705   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.4s, %1.4s, %2.4s");
2706   __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2707   __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2708   __uint128_t res = AsmFrsqrts(arg1, arg2);
2709   ASSERT_EQ(res, MakeF32x4(1.125f, 1.000f, 1.125f, 0.750f));
2710 }
2711 
TEST(Arm64InsnTest,RecipSqrtStepF64x2)2712 TEST(Arm64InsnTest, RecipSqrtStepF64x2) {
2713   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.2d, %1.2d, %2.2d");
2714   __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2715   __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2716   ASSERT_EQ(AsmFrsqrts(arg1, arg2), MakeF64x2(1.125, 1.000));
2717   __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2718   __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2719   ASSERT_EQ(AsmFrsqrts(arg3, arg4), MakeF64x2(1.125, 0.750));
2720 }
2721 
TEST(Arm64InsnTest,AddFp32)2722 TEST(Arm64InsnTest, AddFp32) {
2723   uint64_t fp_arg1 = 0x40d5c28fULL;  // 6.68 in float
2724   uint64_t fp_arg2 = 0x409f5c29ULL;  // 4.98 in float
2725   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %s0, %s1, %s2")(fp_arg1, fp_arg2);
2726   ASSERT_EQ(rd, MakeUInt128(0x413a8f5cULL, 0U));  // 11.66 in float
2727 }
2728 
TEST(Arm64InsnTest,AddFp64)2729 TEST(Arm64InsnTest, AddFp64) {
2730   uint64_t fp_arg1 = 0x402099999999999aULL;  // 8.30 in double
2731   uint64_t fp_arg2 = 0x4010ae147ae147aeULL;  // 4.17 in double
2732   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %d0, %d1, %d2")(fp_arg1, fp_arg2);
2733   ASSERT_EQ(rd, MakeUInt128(0x4028f0a3d70a3d71ULL, 0U));  // 12.47 in double
2734 }
2735 
TEST(Arm64InsnTest,AddF32x4)2736 TEST(Arm64InsnTest, AddF32x4) {
2737   constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.4s, %1.4s, %2.4s");
2738   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2739   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2740   ASSERT_EQ(AsmFadd(arg1, arg2), MakeF32x4(3.0f, 3.0f, -1.0f, 5.0f));
2741 }
2742 
TEST(Arm64InsnTest,AddF64x2)2743 TEST(Arm64InsnTest, AddF64x2) {
2744   constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.2d, %1.2d, %2.2d");
2745   __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2746   __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2747   ASSERT_EQ(AsmFadd(arg1, arg2), MakeF64x2(-1.0, 7.0));
2748 }
2749 
TEST(Arm64InsnTest,AddPairwiseF32x2)2750 TEST(Arm64InsnTest, AddPairwiseF32x2) {
2751   constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("faddp %s0, %1.2s");
2752   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 8.0f);
2753   ASSERT_EQ(AsmFaddp(arg1), bit_cast<uint32_t>(3.0f));
2754 }
2755 
TEST(Arm64InsnTest,AddPairwiseF32x4)2756 TEST(Arm64InsnTest, AddPairwiseF32x4) {
2757   constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("faddp %0.4s, %1.4s, %2.4s");
2758   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2759   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2760   ASSERT_EQ(AsmFaddp(arg1, arg2), MakeF32x4(-1.0f, 7.0f, 7.0f, -3.0f));
2761 }
2762 
TEST(Arm64InsnTest,SubFp32)2763 TEST(Arm64InsnTest, SubFp32) {
2764   uint64_t fp_arg1 = 0x411f5c29ULL;  // 9.96 in float
2765   uint64_t fp_arg2 = 0x404851ecULL;  // 3.13 in float
2766   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %s0, %s1, %s2")(fp_arg1, fp_arg2);
2767   ASSERT_EQ(rd, MakeUInt128(0x40da8f5cULL, 0U));  // 6.83 in float
2768 }
2769 
TEST(Arm64InsnTest,SubFp64)2770 TEST(Arm64InsnTest, SubFp64) {
2771   uint64_t fp_arg1 = 0x401ee147ae147ae1ULL;  // 7.72 in double
2772   uint64_t fp_arg2 = 0x4015666666666666ULL;  // 5.35 in double
2773   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %d0, %d1, %d2")(fp_arg1, fp_arg2);
2774   ASSERT_EQ(rd, MakeUInt128(0x4002f5c28f5c28f6ULL, 0U));  // 2.37 in double
2775 }
2776 
TEST(Arm64InsnTest,SubF32x4)2777 TEST(Arm64InsnTest, SubF32x4) {
2778   constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.4s, %1.4s, %2.4s");
2779   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2780   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2781   ASSERT_EQ(AsmFsub(arg1, arg2), MakeF32x4(-9.0f, 1.0f, 15.0f, -5.0f));
2782 }
2783 
TEST(Arm64InsnTest,SubF64x2)2784 TEST(Arm64InsnTest, SubF64x2) {
2785   constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.2d, %1.2d, %2.2d");
2786   __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2787   __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2788   ASSERT_EQ(AsmFsub(arg1, arg2), MakeF64x2(7.0, 3.0));
2789 }
2790 
TEST(Arm64InsnTest,MaxFp32)2791 TEST(Arm64InsnTest, MaxFp32) {
2792   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %s0, %s1, %s2");
2793   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2794   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2795 
2796   ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2797   ASSERT_EQ(AsmFmax(kDefaultNaN32, fp_arg_three), kDefaultNaN32);
2798   ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN32), kDefaultNaN32);
2799 }
2800 
TEST(Arm64InsnTest,MaxFp64)2801 TEST(Arm64InsnTest, MaxFp64) {
2802   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %d0, %d1, %d2");
2803   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2804   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2805 
2806   ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2807   ASSERT_EQ(AsmFmax(kDefaultNaN64, fp_arg_three), kDefaultNaN64);
2808   ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN64), kDefaultNaN64);
2809 }
2810 
TEST(Arm64InsnTest,MaxF32x4)2811 TEST(Arm64InsnTest, MaxF32x4) {
2812   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.4s, %1.4s, %2.4s");
2813   __uint128_t arg1 = MakeF32x4(-0.0f, 2.0f, 3.0f, -4.0f);
2814   __uint128_t arg2 = MakeF32x4(0.0f, 1.0f, -3.0f, -3.0f);
2815   ASSERT_EQ(AsmFmax(arg1, arg2), MakeF32x4(0.0f, 2.0f, 3.0f, -3.0f));
2816 
2817   __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, -4.0f);
2818   __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32));
2819   ASSERT_EQ(AsmFmax(arg3, arg4),
2820             MakeF32x4(0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, bit_cast<float>(kDefaultNaN32)));
2821 }
2822 
TEST(Arm64InsnTest,MaxF64x2)2823 TEST(Arm64InsnTest, MaxF64x2) {
2824   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.2d, %1.2d, %2.2d");
2825   __uint128_t arg1 = MakeF64x2(-0.0, 3.0);
2826   __uint128_t arg2 = MakeF64x2(0.0, -3.0);
2827   ASSERT_EQ(AsmFmax(arg1, arg2), MakeF64x2(0.0, 3.0));
2828 
2829   __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64), 3.0);
2830   __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64));
2831   ASSERT_EQ(AsmFmax(arg3, arg4),
2832             MakeF64x2(bit_cast<double>(kDefaultNaN64), bit_cast<double>(kDefaultNaN64)));
2833 }
2834 
TEST(Arm64InsnTest,MaxNumberFp32)2835 TEST(Arm64InsnTest, MaxNumberFp32) {
2836   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %s0, %s1, %s2");
2837   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2838   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2839   uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2840 
2841   ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2842 
2843   ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN32), MakeU32x4(fp_arg_two, 0, 0, 0));
2844   ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN32), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2845   ASSERT_EQ(AsmFmaxnm(kQuietNaN32, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2846   ASSERT_EQ(AsmFmaxnm(kQuietNaN32, fp_arg_minus_two), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2847 }
2848 
TEST(Arm64InsnTest,MaxNumberFp64)2849 TEST(Arm64InsnTest, MaxNumberFp64) {
2850   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %d0, %d1, %d2");
2851   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2852   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2853   uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2854 
2855   ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2856 
2857   ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN64), MakeUInt128(fp_arg_two, 0U));
2858   ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN64), MakeUInt128(fp_arg_minus_two, 0));
2859   ASSERT_EQ(AsmFmaxnm(kQuietNaN64, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2860   ASSERT_EQ(AsmFmaxnm(kQuietNaN64, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2861 }
2862 
TEST(Arm64InsnTest,MinNumberFp32)2863 TEST(Arm64InsnTest, MinNumberFp32) {
2864   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %s0, %s1, %s2");
2865   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2866   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2867   uint32_t fp_arg_minus_two = bit_cast<uint32_t>(-2.0f);
2868 
2869   ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
2870 
2871   ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN32), MakeU32x4(fp_arg_two, 0, 0, 0));
2872   ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN32), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2873   ASSERT_EQ(AsmFminnm(kQuietNaN32, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2874   ASSERT_EQ(AsmFminnm(kQuietNaN32, fp_arg_minus_two), MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2875 }
2876 
TEST(Arm64InsnTest,MinNumberFp64)2877 TEST(Arm64InsnTest, MinNumberFp64) {
2878   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %d0, %d1, %d2");
2879   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2880   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2881   uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2882 
2883   ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
2884 
2885   ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN64), MakeUInt128(fp_arg_two, 0U));
2886   ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN64), MakeUInt128(fp_arg_minus_two, 0));
2887   ASSERT_EQ(AsmFminnm(kQuietNaN64, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2888   ASSERT_EQ(AsmFminnm(kQuietNaN64, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2889 }
2890 
TEST(Arm64InsnTest,MaxNumberF32x4)2891 TEST(Arm64InsnTest, MaxNumberF32x4) {
2892   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.4s, %1.4s, %2.4s");
2893   __uint128_t arg1 = MakeF32x4(-1.0f, 2.0f, 3.0f, -4.0f);
2894   __uint128_t arg2 = MakeF32x4(2.0f, 1.0f, -3.0f, -3.0f);
2895   ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF32x4(2.0f, 2.0f, 3.0f, -3.0f));
2896 
2897   __uint128_t arg3 =
2898       MakeU32x4(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kQuietNaN32, kQuietNaN32);
2899   __uint128_t arg4 =
2900       MakeU32x4(kQuietNaN32, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2901   ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2902 
2903   __uint128_t arg5 = MakeU32x4(
2904       bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kSignalingNaN32_1, kQuietNaN32);
2905   __uint128_t arg6 = MakeU32x4(
2906       kSignalingNaN32_1, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2907 }
2908 
TEST(Arm64InsnTest,MaxNumberF64x2)2909 TEST(Arm64InsnTest, MaxNumberF64x2) {
2910   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.2d, %1.2d, %2.2d");
2911   __uint128_t arg1 = MakeF64x2(-1.0, -4.0);
2912   __uint128_t arg2 = MakeF64x2(2.0, -3.0);
2913   ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF64x2(2.0, -3.0));
2914 
2915   __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64);
2916   __uint128_t arg4 = MakeUInt128(kQuietNaN64, bit_cast<uint64_t>(-1.0));
2917   ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF64x2(1.0, -1.0));
2918 }
2919 
TEST(Arm64InsnTest,MinNumberF32x4)2920 TEST(Arm64InsnTest, MinNumberF32x4) {
2921   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.4s, %1.4s, %2.4s");
2922   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
2923   __uint128_t arg2 = MakeF32x4(-0.0f, 1.0f, -3.0f, -3.0f);
2924   ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF32x4(-0.0f, 1.0f, -3.0f, -4.0f));
2925 
2926   __uint128_t arg3 =
2927       MakeU32x4(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f), kQuietNaN32, kQuietNaN32);
2928   __uint128_t arg4 =
2929       MakeU32x4(kQuietNaN32, kQuietNaN32, bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f));
2930   __uint128_t res = AsmFminnm(arg3, arg4);
2931   ASSERT_EQ(res, MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2932 }
2933 
TEST(Arm64InsnTest,MinNumberF64x2)2934 TEST(Arm64InsnTest, MinNumberF64x2) {
2935   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.2d, %1.2d, %2.2d");
2936   __uint128_t arg1 = MakeF64x2(0.0, 3.0);
2937   __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
2938   ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF64x2(-0.0, -3.0));
2939 
2940   __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64);
2941   __uint128_t arg4 = MakeUInt128(kQuietNaN64, bit_cast<uint64_t>(-1.0));
2942   __uint128_t res = AsmFminnm(arg3, arg4);
2943   ASSERT_EQ(res, MakeF64x2(1.0, -1.0));
2944 }
2945 
TEST(Arm64InsnTest,MinFp32)2946 TEST(Arm64InsnTest, MinFp32) {
2947   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %s0, %s1, %s2");
2948   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2949   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2950 
2951   ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
2952   ASSERT_EQ(AsmFmin(kDefaultNaN32, fp_arg_three), kDefaultNaN32);
2953   ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN32), kDefaultNaN32);
2954 }
2955 
TEST(Arm64InsnTest,MinFp64)2956 TEST(Arm64InsnTest, MinFp64) {
2957   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %d0, %d1, %d2");
2958   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2959   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2960 
2961   ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
2962   ASSERT_EQ(AsmFmin(kDefaultNaN64, fp_arg_three), kDefaultNaN64);
2963   ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN64), kDefaultNaN64);
2964 }
2965 
TEST(Arm64InsnTest,MinF32x4)2966 TEST(Arm64InsnTest, MinF32x4) {
2967   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.4s, %1.4s, %2.4s");
2968   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
2969   __uint128_t arg2 = MakeF32x4(-0.0f, 1.0f, -3.0f, -3.0f);
2970   ASSERT_EQ(AsmFmin(arg1, arg2), MakeF32x4(-0.0f, 1.0f, -3.0f, -4.0f));
2971 
2972   __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), 3.0f, -4.0f);
2973   __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32));
2974   ASSERT_EQ(
2975       AsmFmin(arg3, arg4),
2976       MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32), -3.0f, bit_cast<float>(kDefaultNaN32)));
2977 }
2978 
TEST(Arm64InsnTest,MinF64x2)2979 TEST(Arm64InsnTest, MinF64x2) {
2980   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.2d, %1.2d, %2.2d");
2981   __uint128_t arg1 = MakeF64x2(0.0, 3.0);
2982   __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
2983   ASSERT_EQ(AsmFmin(arg1, arg2), MakeF64x2(-0.0, -3.0));
2984 
2985   __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64), 3.0);
2986   __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64));
2987   ASSERT_EQ(AsmFmin(arg3, arg4),
2988             MakeF64x2(bit_cast<double>(kDefaultNaN64), bit_cast<double>(kDefaultNaN64)));
2989 }
2990 
TEST(Arm64InsnTest,MaxPairwiseF32Scalar)2991 TEST(Arm64InsnTest, MaxPairwiseF32Scalar) {
2992   constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxp %s0, %1.2s");
2993   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2994   ASSERT_EQ(AsmFmaxp(arg1), bit_cast<uint32_t>(2.0f));
2995 
2996   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, -0.0f);
2997   ASSERT_EQ(AsmFmaxp(arg2), kDefaultNaN32);
2998 }
2999 
TEST(Arm64InsnTest,MaxPairwiseF32x4)3000 TEST(Arm64InsnTest, MaxPairwiseF32x4) {
3001   constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxp %0.4s, %1.4s, %2.4s");
3002   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3003   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3004   ASSERT_EQ(AsmFmaxp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3005 
3006   __uint128_t arg3 =
3007       MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32));
3008   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3009   ASSERT_EQ(AsmFmaxp(arg3, arg4),
3010             MakeF32x4(bit_cast<float>(kDefaultNaN32), bit_cast<float>(kDefaultNaN32), 6.0f, 5.0f));
3011 }
3012 
TEST(Arm64InsnTest,MinPairwiseF32Scalar)3013 TEST(Arm64InsnTest, MinPairwiseF32Scalar) {
3014   constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminp %s0, %1.2s");
3015   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3016   ASSERT_EQ(AsmFminp(arg1), bit_cast<uint32_t>(-3.0f));
3017 
3018   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, -0.0f);
3019   ASSERT_EQ(AsmFminp(arg2), kDefaultNaN32);
3020 }
3021 
TEST(Arm64InsnTest,MinPairwiseF32x4)3022 TEST(Arm64InsnTest, MinPairwiseF32x4) {
3023   constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminp %0.4s, %1.4s, %2.4s");
3024   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3025   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3026   ASSERT_EQ(AsmFminp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
3027 
3028   __uint128_t arg3 =
3029       MakeF32x4(bit_cast<float>(kDefaultNaN32), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32));
3030   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3031   ASSERT_EQ(AsmFminp(arg3, arg4),
3032             MakeF32x4(bit_cast<float>(kDefaultNaN32), bit_cast<float>(kDefaultNaN32), 1.0f, -8.0f));
3033 }
3034 
TEST(Arm64InsnTest,MaxPairwiseNumberF32Scalar)3035 TEST(Arm64InsnTest, MaxPairwiseNumberF32Scalar) {
3036   constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmp %s0, %1.2s");
3037   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3038   ASSERT_EQ(AsmFmaxnmp(arg1), bit_cast<uint32_t>(2.0f));
3039 
3040   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, -0.0f);
3041   ASSERT_EQ(AsmFmaxnmp(arg2), bit_cast<uint32_t>(2.0f));
3042 }
3043 
TEST(Arm64InsnTest,MaxPairwiseNumberF32x4)3044 TEST(Arm64InsnTest, MaxPairwiseNumberF32x4) {
3045   constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnmp %0.4s, %1.4s, %2.4s");
3046   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3047   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3048   ASSERT_EQ(AsmFmaxnmp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3049 
3050   __uint128_t arg3 =
3051       MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32));
3052   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3053   ASSERT_EQ(AsmFmaxnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3054 }
3055 
TEST(Arm64InsnTest,MinPairwiseNumberF32Scalar)3056 TEST(Arm64InsnTest, MinPairwiseNumberF32Scalar) {
3057   constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmp %s0, %1.2s");
3058   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3059   ASSERT_EQ(AsmFminnmp(arg1), bit_cast<uint32_t>(-3.0f));
3060 
3061   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, -0.0f);
3062   ASSERT_EQ(AsmFminnmp(arg2), bit_cast<uint32_t>(2.0f));
3063 }
3064 
TEST(Arm64InsnTest,MinPairwiseNumberF32x4)3065 TEST(Arm64InsnTest, MinPairwiseNumberF32x4) {
3066   constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnmp %0.4s, %1.4s, %2.4s");
3067   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3068   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3069   ASSERT_EQ(AsmFminnmp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
3070 
3071   __uint128_t arg3 =
3072       MakeF32x4(bit_cast<float>(kQuietNaN32), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32));
3073   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3074   ASSERT_EQ(AsmFminnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 1.0f, -8.0f));
3075 }
3076 
TEST(Arm64InsnTest,MaxAcrossF32x4)3077 TEST(Arm64InsnTest, MaxAcrossF32x4) {
3078   constexpr auto AsmFmaxv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxv %s0, %1.4s");
3079   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3080   ASSERT_EQ(AsmFmaxv(arg1), bit_cast<uint32_t>(3.0f));
3081 
3082   __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32), -4.0f);
3083   ASSERT_EQ(AsmFmaxv(arg2), kDefaultNaN32);
3084 }
3085 
TEST(Arm64InsnTest,MinAcrossF32x4)3086 TEST(Arm64InsnTest, MinAcrossF32x4) {
3087   constexpr auto AsmFminv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminv %s0, %1.4s");
3088   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3089   ASSERT_EQ(AsmFminv(arg1), bit_cast<uint32_t>(-4.0f));
3090 
3091   __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32), -4.0f);
3092   ASSERT_EQ(AsmFminv(arg2), kDefaultNaN32);
3093 }
3094 
TEST(Arm64InsnTest,MaxNumberAcrossF32x4)3095 TEST(Arm64InsnTest, MaxNumberAcrossF32x4) {
3096   constexpr auto AsmFmaxnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmv %s0, %1.4s");
3097   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3098   ASSERT_EQ(AsmFmaxnmv(arg1), bit_cast<uint32_t>(3.0f));
3099 
3100   __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32), 3.0f, -4.0f);
3101   ASSERT_EQ(AsmFmaxnmv(arg2), bit_cast<uint32_t>(3.0f));
3102 }
3103 
TEST(Arm64InsnTest,MinNumberAcrossF32x4)3104 TEST(Arm64InsnTest, MinNumberAcrossF32x4) {
3105   constexpr auto AsmFminnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmv %s0, %1.4s");
3106   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3107   ASSERT_EQ(AsmFminnmv(arg1), bit_cast<uint32_t>(-4.0f));
3108 
3109   __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32), 3.0f, -4.0f);
3110   ASSERT_EQ(AsmFminnmv(arg2), bit_cast<uint32_t>(-4.0f));
3111 }
3112 
TEST(Arm64InsnTest,MulFp32)3113 TEST(Arm64InsnTest, MulFp32) {
3114   uint64_t fp_arg1 = 0x40a1999aULL;  // 5.05 in float
3115   uint64_t fp_arg2 = 0x40dae148ULL;  // 6.84 in float
3116   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3117   ASSERT_EQ(rd, MakeUInt128(0x420a2b03ULL, 0U));  // 34.5420 in float
3118 }
3119 
TEST(Arm64InsnTest,MulFp64)3120 TEST(Arm64InsnTest, MulFp64) {
3121   uint64_t fp_arg1 = 0x40226b851eb851ecULL;  // 9.21 in double
3122   uint64_t fp_arg2 = 0x4020c7ae147ae148ULL;  // 8.39 in double
3123   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3124   ASSERT_EQ(rd, MakeUInt128(0x40535166cf41f214ULL, 0U));  // 77.2719 in double
3125 }
3126 
TEST(Arm64InsnTest,MulF32x4)3127 TEST(Arm64InsnTest, MulF32x4) {
3128   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.4s");
3129   __uint128_t arg1 = MakeF32x4(1.0f, -2.0f, 3.0f, -4.0f);
3130   __uint128_t arg2 = MakeF32x4(-3.0f, -1.0f, 4.0f, 1.0f);
3131   ASSERT_EQ(AsmFmul(arg1, arg2), MakeF32x4(-3.0f, 2.0f, 12.0f, -4.0f));
3132 }
3133 
TEST(Arm64InsnTest,MulF64x2)3134 TEST(Arm64InsnTest, MulF64x2) {
3135   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.2d");
3136   __uint128_t arg1 = MakeF64x2(-4.0, 2.0);
3137   __uint128_t arg2 = MakeF64x2(2.0, 3.0);
3138   ASSERT_EQ(AsmFmul(arg1, arg2), MakeF64x2(-8.0, 6.0));
3139 }
3140 
TEST(Arm64InsnTest,MulF32x4ByScalar)3141 TEST(Arm64InsnTest, MulF32x4ByScalar) {
3142   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
3143   __uint128_t arg2 = MakeF32x4(6.0f, 7.0f, 8.0f, 9.0f);
3144   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.s[3]")(arg1, arg2);
3145   ASSERT_EQ(res, MakeF32x4(18.0f, 27.0f, 36.0f, 45.0f));
3146 }
3147 
TEST(Arm64InsnTest,MulF64x2ByScalar)3148 TEST(Arm64InsnTest, MulF64x2ByScalar) {
3149   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3150   __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3151   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.d[1]")(arg1, arg2);
3152   ASSERT_EQ(res, MakeF64x2(8.0, 12.0));
3153 }
3154 
TEST(Arm64InsnTest,MulF32IndexedElem)3155 TEST(Arm64InsnTest, MulF32IndexedElem) {
3156   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %2.s[2]");
3157   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3158   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3159   ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint32_t>(34.0f));
3160 }
3161 
TEST(Arm64InsnTest,MulF64IndexedElem)3162 TEST(Arm64InsnTest, MulF64IndexedElem) {
3163   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %2.d[1]");
3164   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3165   __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3166   ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint64_t>(8.0));
3167 }
3168 
TEST(Arm64InsnTest,MulExtendedF32)3169 TEST(Arm64InsnTest, MulExtendedF32) {
3170   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %s2");
3171   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3172   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3173   ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(22.0f));
3174 }
3175 
TEST(Arm64InsnTest,MulExtendedF32x4)3176 TEST(Arm64InsnTest, MulExtendedF32x4) {
3177   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.4s");
3178   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3179   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3180   ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(22.0f, 39.0f, 85.0f, 133.0f));
3181 }
3182 
TEST(Arm64InsnTest,MulExtendedF32IndexedElem)3183 TEST(Arm64InsnTest, MulExtendedF32IndexedElem) {
3184   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %2.s[2]");
3185   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3186   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3187   ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(34.0f));
3188 }
3189 
TEST(Arm64InsnTest,MulExtendedF64IndexedElem)3190 TEST(Arm64InsnTest, MulExtendedF64IndexedElem) {
3191   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %d0, %d1, %2.d[1]");
3192   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3193   __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3194   ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint64_t>(8.0));
3195 }
3196 
TEST(Arm64InsnTest,MulExtendedF32x4IndexedElem)3197 TEST(Arm64InsnTest, MulExtendedF32x4IndexedElem) {
3198   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.s[2]");
3199   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3200   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3201   ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(34.0f, 51.0f, 85.0f, 119.0f));
3202 }
3203 
TEST(Arm64InsnTest,MulNegFp32)3204 TEST(Arm64InsnTest, MulNegFp32) {
3205   uint64_t fp_arg1 = bit_cast<uint32_t>(2.0f);
3206   uint64_t fp_arg2 = bit_cast<uint32_t>(3.0f);
3207   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3208   ASSERT_EQ(rd, MakeUInt128(bit_cast<uint32_t>(-6.0f), 0U));
3209 }
3210 
TEST(Arm64InsnTest,MulNegFp64)3211 TEST(Arm64InsnTest, MulNegFp64) {
3212   uint64_t fp_arg1 = bit_cast<uint64_t>(2.0);
3213   uint64_t fp_arg2 = bit_cast<uint64_t>(3.0);
3214   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3215   ASSERT_EQ(rd, MakeUInt128(bit_cast<uint64_t>(-6.0), 0U));
3216 }
3217 
TEST(Arm64InsnTest,DivFp32)3218 TEST(Arm64InsnTest, DivFp32) {
3219   constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %s0, %s1, %s2");
3220 
3221   uint32_t arg1 = 0x40c23d71U;                                     // 6.07 in float
3222   uint32_t arg2 = 0x401a3d71U;                                     // 2.41 in float
3223   ASSERT_EQ(AsmFdiv(arg1, arg2), MakeUInt128(0x402131edULL, 0U));  // 2.5186722 in float
3224 
3225   // Make sure that FDIV can produce a denormal result under the default FPCR,
3226   // where the FZ bit (flush-to-zero) is off.
3227   uint32_t arg3 = 0xa876eff9U;  // exponent (without offset) = -47
3228   uint32_t arg4 = 0xe7d86b60U;  // exponent (without offset) = 80
3229   ASSERT_EQ(AsmFdiv(arg3, arg4), MakeUInt128(0x0049065cULL, 0U));  // denormal
3230 }
3231 
TEST(Arm64InsnTest,DivFp64)3232 TEST(Arm64InsnTest, DivFp64) {
3233   uint64_t fp_arg1 = 0x401e5c28f5c28f5cULL;  // 7.59 in double
3234   uint64_t fp_arg2 = 0x3ff28f5c28f5c28fULL;  // 1.16 in double
3235   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %d0, %d1, %d2")(fp_arg1, fp_arg2);
3236   ASSERT_EQ(rd, MakeUInt128(0x401a2c234f72c235ULL, 0U));  // 6.5431034482758620995923593 in double
3237 }
3238 
TEST(Arm64InsnTest,DivFp32_FlagsWhenDivByZero)3239 TEST(Arm64InsnTest, DivFp32_FlagsWhenDivByZero) {
3240   uint64_t fpsr;
3241   volatile float dividend = 123.0f;
3242   volatile float divisor = 0.0f;
3243   float res;
3244   asm volatile(
3245       "msr fpsr, xzr\n\t"
3246       "fdiv %s1, %s2, %s3\n\t"
3247       "mrs %0, fpsr"
3248       : "=r"(fpsr), "=w"(res)
3249       : "w"(dividend), "w"(divisor));
3250   ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3251 
3252   // Previous bug caused IOC to be set upon scalar div by zero.
3253   ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3254 }
3255 
TEST(Arm64InsnTest,DivFp64_FlagsWhenDivByZero)3256 TEST(Arm64InsnTest, DivFp64_FlagsWhenDivByZero) {
3257   uint64_t fpsr;
3258   double res;
3259   asm volatile(
3260       "msr fpsr, xzr\n\t"
3261       "fdiv %d1, %d2, %d3\n\t"
3262       "mrs %0, fpsr"
3263       : "=r"(fpsr), "=w"(res)
3264       : "w"(123.0), "w"(0.0));
3265   ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3266 
3267   // Previous bug caused IOC to be set upon scalar div by zero.
3268   ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3269 }
3270 
TEST(Arm64InsnTest,DivFp32x4)3271 TEST(Arm64InsnTest, DivFp32x4) {
3272   constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.4s, %1.4s, %2.4s");
3273 
3274   // 16.39, 80.286, 41.16, 98.01
3275   __uint128_t arg1 = MakeUInt128(0x41831eb842a0926fULL, 0x4224a3d742c4051fULL);
3276   // 13.3, 45.45, 7.89, -2.63
3277   __uint128_t arg2 = MakeUInt128(0x4154cccd4235cccdULL, 0x40fc7ae1c02851ecULL);
3278   __uint128_t res1 = AsmFdiv(arg1, arg2);
3279   // 1.2323308, 1.7664686, 5.21673, -37.26616
3280   ASSERT_EQ(res1, MakeUInt128(0x3f9dbd043fe21ba5ULL, 0x40a6ef74c215108cULL));
3281 
3282   // Verify that fdiv produces a denormal result under the default FPCR.
3283   __uint128_t arg3 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.eddff2p-47f);
3284   __uint128_t arg4 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.b0d6c0p80f);
3285   __uint128_t res2 = AsmFdiv(arg3, arg4);
3286   __uint128_t expected2 = MakeF32x4(1.0f, 1.0f, 1.0f, 0x0.920cb8p-126f);
3287   ASSERT_EQ(res2, expected2);
3288 }
3289 
TEST(Arm64InsnTest,DivFp64x2)3290 TEST(Arm64InsnTest, DivFp64x2) {
3291   // 6.23, 65.02
3292   __uint128_t arg1 = MakeUInt128(0x4018EB851EB851ECULL, 0x40504147AE147AE1ULL);
3293   // -7.54, 11.92
3294   __uint128_t arg2 = MakeUInt128(0xC01E28F5C28F5C29ULL, 0x4027D70A3D70A3D7ULL);
3295   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.2d, %1.2d, %2.2d")(arg1, arg2);
3296   // -0.82625994695, 5.45469798658
3297   ASSERT_EQ(res, MakeUInt128(0xbfea70b8b3449564ULL, 0x4015d19c59579fc9ULL));
3298 }
3299 
TEST(Arm64InsnTest,MulAddFp32)3300 TEST(Arm64InsnTest, MulAddFp32) {
3301   constexpr auto AsmFmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %s0, %s1, %s2, %s3");
3302 
3303   __uint128_t res1 =
3304       AsmFmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3305   ASSERT_EQ(res1, MakeF32x4(11.0f, 0, 0, 0));
3306 
3307   __uint128_t res2 =
3308       AsmFmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3309   ASSERT_EQ(res2, MakeF32x4(0, 0, 0, 0));
3310 
3311   // These tests verify that fmadd does not lose precision while doing the mult + add.
3312   __uint128_t res3 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3313                               bit_cast<uint32_t>(0x1.000002p0f),
3314                               bit_cast<uint32_t>(-0x1.p23f));
3315   ASSERT_EQ(res3, MakeF32x4(0x1.fffffcp-2f, 0, 0, 0));
3316 
3317   __uint128_t res4 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3318                               bit_cast<uint32_t>(0x1.000002p0f),
3319                               bit_cast<uint32_t>(-0x1.fffffep22f));
3320   ASSERT_EQ(res4, MakeF32x4(0x1.fffffep-1f, 0, 0, 0));
3321 
3322   __uint128_t res5 = AsmFmadd(bit_cast<uint32_t>(0x1.p23f),
3323                               bit_cast<uint32_t>(0x1.fffffep-1f),
3324                               bit_cast<uint32_t>(-0x1.000002p23f));
3325   ASSERT_EQ(res5, MakeF32x4(-0x1.80p0f, 0, 0, 0));
3326 }
3327 
TEST(Arm64InsnTest,MulAddFp64)3328 TEST(Arm64InsnTest, MulAddFp64) {
3329   uint64_t arg1 = 0x40323d70a3d70a3dULL;  // 18.24
3330   uint64_t arg2 = 0x40504147ae147ae1ULL;  // 65.02
3331   uint64_t arg3 = 0x4027d70a3d70a3d7ULL;  // 11.92
3332   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3333   ASSERT_EQ(res1, MakeUInt128(0x4092b78a0902de00ULL, 0U));  // 1197.8848
3334   __uint128_t res2 =
3335       ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3336   ASSERT_EQ(res2, MakeUInt128(0xc092b78a0902de00ULL, 0U));  // -1197.8848
3337 }
3338 
TEST(Arm64InsnTest,MulAddFp64Precision)3339 TEST(Arm64InsnTest, MulAddFp64Precision) {
3340   uint64_t arg1 = bit_cast<uint64_t>(0x1.0p1023);
3341   uint64_t arg2 = bit_cast<uint64_t>(0x1.0p-1);
3342   uint64_t arg3 = bit_cast<uint64_t>(0x1.fffffffffffffp1022);
3343   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3344   ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3345 }
3346 
TEST(Arm64InsnTest,NegMulAddFp32)3347 TEST(Arm64InsnTest, NegMulAddFp32) {
3348   constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %s0, %s1, %s2, %s3");
3349 
3350   __uint128_t res1 =
3351       AsmFnmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3352   ASSERT_EQ(res1, MakeF32x4(-11.0f, 0, 0, 0));
3353 
3354   // No -0 (proper negation)
3355   __uint128_t res2 =
3356       AsmFnmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3357   ASSERT_EQ(res2, MakeF32x4(0.0f, 0, 0, 0));
3358 
3359   // These tests verify that fmadd does not lose precision while doing the mult + add.
3360   __uint128_t res3 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3361                                bit_cast<uint32_t>(0x1.000002p0f),
3362                                bit_cast<uint32_t>(-0x1.p23f));
3363   ASSERT_EQ(res3, MakeF32x4(-0x1.fffffcp-2f, 0, 0, 0));
3364 
3365   __uint128_t res4 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3366                                bit_cast<uint32_t>(0x1.000002p0f),
3367                                bit_cast<uint32_t>(-0x1.fffffep22f));
3368   ASSERT_EQ(res4, MakeF32x4(-0x1.fffffep-1f, 0, 0, 0));
3369 
3370   __uint128_t res5 = AsmFnmadd(bit_cast<uint32_t>(0x1.p23f),
3371                                bit_cast<uint32_t>(0x1.fffffep-1f),
3372                                bit_cast<uint32_t>(-0x1.000002p23f));
3373   ASSERT_EQ(res5, MakeF32x4(0x1.80p0f, 0, 0, 0));
3374 }
3375 
TEST(Arm64InsnTest,NegMulAddFp64)3376 TEST(Arm64InsnTest, NegMulAddFp64) {
3377   constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3");
3378 
3379   __uint128_t res1 =
3380       AsmFnmadd(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3381   ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3382 
3383   // Proper negation (no -0 in this case)
3384   __uint128_t res2 =
3385       AsmFnmadd(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(-5.0));
3386   ASSERT_EQ(res2, MakeF64x2(0.0, 0));
3387 }
3388 
TEST(Arm64InsnTest,NegMulSubFp64)3389 TEST(Arm64InsnTest, NegMulSubFp64) {
3390   constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3391 
3392   __uint128_t res1 =
3393       AsmFnmsub(bit_cast<uint64_t>(-2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3394   ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3395 
3396   uint64_t arg1 = 0x40357ae147ae147bULL;  // 21.48
3397   uint64_t arg2 = 0x404ce3d70a3d70a4ull;  // 57.78
3398   uint64_t arg3 = 0x405e29999999999aULL;  // 120.65
3399   __uint128_t res2 = AsmFnmsub(arg1, arg2, arg3);
3400   ASSERT_EQ(res2, MakeUInt128(0x409181db8bac710dULL, 0U));  // 1120.4644
3401 
3402   // Assert no -0 in this case
3403   __uint128_t res3 =
3404       AsmFnmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3405   ASSERT_EQ(res3, MakeF64x2(0.0, 0));
3406 }
3407 
TEST(Arm64InsnTest,NegMulSubFp64Precision)3408 TEST(Arm64InsnTest, NegMulSubFp64Precision) {
3409   constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3410 
3411   __uint128_t res = AsmFnmsub(bit_cast<uint64_t>(0x1.0p1023),
3412                               bit_cast<uint64_t>(0x1.0p-1),
3413                               bit_cast<uint64_t>(-0x1.fffffffffffffp1022));
3414   ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3415 }
3416 
TEST(Arm64InsnTest,MulAddF32x4)3417 TEST(Arm64InsnTest, MulAddF32x4) {
3418   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.4s");
3419   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3420   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3421   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3422   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(5.0f, 5.0f, 9.0f, 14.0f));
3423 }
3424 
TEST(Arm64InsnTest,MulAddF32IndexedElem)3425 TEST(Arm64InsnTest, MulAddF32IndexedElem) {
3426   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %s0, %s1, %2.s[2]");
3427   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3428   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3429   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3430   // 2 + (1 * 2)
3431   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3432 }
3433 
TEST(Arm64InsnTest,MulAddF64IndexedElem)3434 TEST(Arm64InsnTest, MulAddF64IndexedElem) {
3435   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %d0, %d1, %2.d[1]");
3436   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3437   __uint128_t arg2 = MakeF64x2(4.0, 5.0);
3438   __uint128_t arg3 = MakeF64x2(6.0, 7.0);
3439   // 6 + (2 * 5)
3440   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint64_t>(16.0));
3441 }
3442 
TEST(Arm64InsnTest,MulAddF64x2)3443 TEST(Arm64InsnTest, MulAddF64x2) {
3444   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.2d, %1.2d, %2.2d");
3445   __uint128_t arg1 = MakeF64x2(1.0f, 2.0f);
3446   __uint128_t arg2 = MakeF64x2(3.0f, 1.0f);
3447   __uint128_t arg3 = MakeF64x2(2.0f, 3.0f);
3448   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF64x2(5.0f, 5.0f));
3449 }
3450 
TEST(Arm64InsnTest,MulAddF32x4IndexedElem)3451 TEST(Arm64InsnTest, MulAddF32x4IndexedElem) {
3452   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.s[2]");
3453   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3454   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3455   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3456   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(4.0f, 7.0f, 9.0f, 8.0f));
3457 }
3458 
TEST(Arm64InsnTest,MulSubFp32)3459 TEST(Arm64InsnTest, MulSubFp32) {
3460   uint32_t arg1 = bit_cast<uint32_t>(2.0f);
3461   uint32_t arg2 = bit_cast<uint32_t>(5.0f);
3462   uint32_t arg3 = bit_cast<uint32_t>(3.0f);
3463   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3464   ASSERT_EQ(res1, MakeUInt128(bit_cast<uint32_t>(-7.0f), 0U));
3465   __uint128_t res2 =
3466       ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3467   ASSERT_EQ(res2, MakeUInt128(bit_cast<uint32_t>(7.0f), 0U));
3468 }
3469 
TEST(Arm64InsnTest,MulSubFp64)3470 TEST(Arm64InsnTest, MulSubFp64) {
3471   constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3472 
3473   uint64_t arg1 = 0x40357ae147ae147bULL;  // 21.48
3474   uint64_t arg2 = 0x404ce3d70a3d70a4ull;  // 57.78
3475   uint64_t arg3 = 0x405e29999999999aULL;  // 120.65
3476   __uint128_t res1 = AsmFmsub(arg1, arg2, arg3);
3477   ASSERT_EQ(res1, MakeUInt128(0xc09181db8bac710dULL, 0U));  // -1120.4644
3478 
3479   // Basic case
3480   __uint128_t res3 =
3481       AsmFmsub(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(-5.0));
3482   ASSERT_EQ(res3, MakeF64x2(-11.0, 0));
3483 
3484   // No -0 in this case (proper negation order)
3485   __uint128_t res4 =
3486       AsmFmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3487   ASSERT_EQ(res4, MakeF64x2(0.0, 0));
3488 }
3489 
TEST(Arm64InsnTest,MulSubFp64Precision)3490 TEST(Arm64InsnTest, MulSubFp64Precision) {
3491   constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3492   __uint128_t res5 = AsmFmsub(bit_cast<uint64_t>(-0x1.0p1023),
3493                               bit_cast<uint64_t>(0x1.0p-1),
3494                               bit_cast<uint64_t>(0x1.fffffffffffffp1022));
3495   ASSERT_EQ(res5, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3496 }
3497 
TEST(Arm64InsnTest,MulSubF32x4)3498 TEST(Arm64InsnTest, MulSubF32x4) {
3499   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.4s");
3500   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3501   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3502   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3503   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(-1.0f, 1.0f, -7.0f, -10.0f));
3504 }
3505 
TEST(Arm64InsnTest,MulSubF32IndexedElem)3506 TEST(Arm64InsnTest, MulSubF32IndexedElem) {
3507   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %s0, %s1, %2.s[2]");
3508   __uint128_t arg1 = MakeF32x4(2.0f, 1.0f, 4.0f, 3.0f);
3509   __uint128_t arg2 = MakeF32x4(4.0f, 3.0f, 2.0f, 1.0f);
3510   __uint128_t arg3 = MakeF32x4(8.0f, 3.0f, 1.0f, 2.0f);
3511   // 8 - (2 * 2)
3512   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3513 }
3514 
TEST(Arm64InsnTest,MulSubF32x4IndexedElem)3515 TEST(Arm64InsnTest, MulSubF32x4IndexedElem) {
3516   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.s[2]");
3517   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3518   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3519   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3520   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(0.0f, -1.0f, -7.0f, -4.0f));
3521 }
3522 
TEST(Arm64InsnTest,MulSubF64x2)3523 TEST(Arm64InsnTest, MulSubF64x2) {
3524   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.2d, %1.2d, %2.2d");
3525   __uint128_t arg1 = MakeF64x2(1.0f, 2.0f);
3526   __uint128_t arg2 = MakeF64x2(3.0f, 1.0f);
3527   __uint128_t arg3 = MakeF64x2(2.0f, 3.0f);
3528   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF64x2(-1.0f, 1.0f));
3529 }
3530 
TEST(Arm64InsnTest,MulSubF64IndexedElem)3531 TEST(Arm64InsnTest, MulSubF64IndexedElem) {
3532   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %d0, %d1, %2.d[1]");
3533   __uint128_t arg1 = MakeF64x2(2.0, 5.0);
3534   __uint128_t arg2 = MakeF64x2(4.0, 1.0);
3535   __uint128_t arg3 = MakeF64x2(6.0, 7.0f);
3536   // 6 - (2 * 1)
3537   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint64_t>(4.0));
3538 }
3539 
TEST(Arm64InsnTest,CompareEqualF32)3540 TEST(Arm64InsnTest, CompareEqualF32) {
3541   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %s0, %s1, %s2");
3542   uint32_t two = bit_cast<uint32_t>(2.0f);
3543   uint32_t six = bit_cast<uint32_t>(6.0f);
3544   ASSERT_EQ(AsmFcmeq(two, six), 0x00000000ULL);
3545   ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffULL);
3546   ASSERT_EQ(AsmFcmeq(kDefaultNaN32, two), 0x00000000ULL);
3547   ASSERT_EQ(AsmFcmeq(two, kDefaultNaN32), 0x00000000ULL);
3548 }
3549 
TEST(Arm64InsnTest,CompareEqualF32x4)3550 TEST(Arm64InsnTest, CompareEqualF32x4) {
3551   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.4s, %1.4s, %2.4s");
3552   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3553   __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3554   __uint128_t res = AsmFcmeq(arg1, arg2);
3555   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3556 }
3557 
TEST(Arm64InsnTest,CompareGreaterEqualF32)3558 TEST(Arm64InsnTest, CompareGreaterEqualF32) {
3559   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %s0, %s1, %s2");
3560   uint32_t two = bit_cast<uint32_t>(2.0f);
3561   uint32_t six = bit_cast<uint32_t>(6.0f);
3562   ASSERT_EQ(AsmFcmge(two, six), 0x00000000ULL);
3563   ASSERT_EQ(AsmFcmge(two, two), 0xffffffffULL);
3564   ASSERT_EQ(AsmFcmge(six, two), 0xffffffffULL);
3565   ASSERT_EQ(AsmFcmge(kDefaultNaN32, two), 0x00000000ULL);
3566   ASSERT_EQ(AsmFcmge(two, kDefaultNaN32), 0x00000000ULL);
3567 }
3568 
TEST(Arm64InsnTest,CompareGreaterEqualF32x4)3569 TEST(Arm64InsnTest, CompareGreaterEqualF32x4) {
3570   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.4s, %1.4s, %2.4s");
3571   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3572   __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3573   __uint128_t res = AsmFcmge(arg1, arg2);
3574   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x00000000ffffffffULL));
3575 }
3576 
TEST(Arm64InsnTest,CompareGreaterF32)3577 TEST(Arm64InsnTest, CompareGreaterF32) {
3578   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %s0, %s1, %s2");
3579   uint32_t two = bit_cast<uint32_t>(2.0f);
3580   uint32_t six = bit_cast<uint32_t>(6.0f);
3581   ASSERT_EQ(AsmFcmgt(two, six), 0x00000000ULL);
3582   ASSERT_EQ(AsmFcmgt(two, two), 0x00000000ULL);
3583   ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffULL);
3584   ASSERT_EQ(AsmFcmgt(kDefaultNaN32, two), 0x00000000ULL);
3585   ASSERT_EQ(AsmFcmgt(two, kDefaultNaN32), 0x00000000ULL);
3586 }
3587 
TEST(Arm64InsnTest,CompareGreaterF32x4)3588 TEST(Arm64InsnTest, CompareGreaterF32x4) {
3589   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.4s, %1.4s, %2.4s");
3590   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3591   __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3592   __uint128_t res = AsmFcmgt(arg1, arg2);
3593   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x00000000ffffffffULL));
3594 }
3595 
TEST(Arm64InsnTest,CompareEqualZeroF32)3596 TEST(Arm64InsnTest, CompareEqualZeroF32) {
3597   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %s0, %s1, #0");
3598   ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3599   ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(4.0f)), 0x00000000ULL);
3600 }
3601 
TEST(Arm64InsnTest,CompareEqualZeroF32x4)3602 TEST(Arm64InsnTest, CompareEqualZeroF32x4) {
3603   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %0.4s, %1.4s, #0");
3604   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3605   __uint128_t res = AsmFcmeq(arg);
3606   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3607 }
3608 
TEST(Arm64InsnTest,CompareGreaterThanZeroF32)3609 TEST(Arm64InsnTest, CompareGreaterThanZeroF32) {
3610   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %s0, %s1, #0");
3611   ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3612   ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3613   ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3614 }
3615 
TEST(Arm64InsnTest,CompareGreaterThanZeroF32x4)3616 TEST(Arm64InsnTest, CompareGreaterThanZeroF32x4) {
3617   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %0.4s, %1.4s, #0");
3618   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3619   __uint128_t res = AsmFcmgt(arg);
3620   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3621 }
3622 
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32)3623 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32) {
3624   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %s0, %s1, #0");
3625   ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3626   ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3627   ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3628 }
3629 
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32x4)3630 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32x4) {
3631   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %0.4s, %1.4s, #0");
3632   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3633   __uint128_t res = AsmFcmge(arg);
3634   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0xffffffffffffffffULL));
3635 }
3636 
TEST(Arm64InsnTest,CompareLessThanZeroF32)3637 TEST(Arm64InsnTest, CompareLessThanZeroF32) {
3638   constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %s0, %s1, #0");
3639   ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3640   ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3641   ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3642 }
3643 
TEST(Arm64InsnTest,CompareLessThanZeroF32x4)3644 TEST(Arm64InsnTest, CompareLessThanZeroF32x4) {
3645   constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %0.4s, %1.4s, #0");
3646   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3647   __uint128_t res = AsmFcmlt(arg);
3648   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
3649 }
3650 
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32)3651 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32) {
3652   constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %s0, %s1, #0");
3653   ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3654   ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3655   ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3656 }
3657 
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32x4)3658 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32x4) {
3659   constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %0.4s, %1.4s, #0");
3660   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3661   __uint128_t res = AsmFcmle(arg);
3662   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3663 }
3664 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32)3665 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32) {
3666   constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %s0, %s1, %s2");
3667   ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3668   ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3669   ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3670 }
3671 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanOrEqualF32)3672 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanOrEqualF32) {
3673   constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %s0, %s1, %s2");
3674   ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3675   ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3676   ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3677 }
3678 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32x4)3679 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32x4) {
3680   constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %0.4s, %1.4s, %2.4s");
3681   __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3682   __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3683   ASSERT_EQ(AsmFacgt(arg1, arg2), MakeUInt128(0x00000000ffffffffULL, 0xffffffff00000000ULL));
3684 }
3685 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanEqualF32x4)3686 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanEqualF32x4) {
3687   constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %0.4s, %1.4s, %2.4s");
3688   __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3689   __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3690   ASSERT_EQ(AsmFacge(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0xffffffff00000000ULL));
3691 }
3692 
TEST(Arm64InsnTest,CompareEqualF64)3693 TEST(Arm64InsnTest, CompareEqualF64) {
3694   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %d0, %d1, %d2");
3695   uint64_t two = bit_cast<uint64_t>(2.0);
3696   uint64_t six = bit_cast<uint64_t>(6.0);
3697   ASSERT_EQ(AsmFcmeq(two, six), 0x0000000000000000ULL);
3698   ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffffffffffULL);
3699   ASSERT_EQ(AsmFcmeq(kDefaultNaN64, two), 0x0000000000000000ULL);
3700   ASSERT_EQ(AsmFcmeq(two, kDefaultNaN64), 0x0000000000000000ULL);
3701 }
3702 
TEST(Arm64InsnTest,CompareEqualF64x2)3703 TEST(Arm64InsnTest, CompareEqualF64x2) {
3704   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.2d, %1.2d, %2.2d");
3705   __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3706   __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3707   __uint128_t res = AsmFcmeq(arg1, arg2);
3708   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3709   arg1 = MakeF64x2(7.0, -0.0);
3710   arg2 = MakeF64x2(-8.0, 5.0);
3711   res = AsmFcmeq(arg1, arg2);
3712   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3713 }
3714 
TEST(Arm64InsnTest,CompareGreaterEqualF64)3715 TEST(Arm64InsnTest, CompareGreaterEqualF64) {
3716   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %d0, %d1, %d2");
3717   uint64_t two = bit_cast<uint64_t>(2.0);
3718   uint64_t six = bit_cast<uint64_t>(6.0);
3719   ASSERT_EQ(AsmFcmge(two, six), 0x0000000000000000ULL);
3720   ASSERT_EQ(AsmFcmge(two, two), 0xffffffffffffffffULL);
3721   ASSERT_EQ(AsmFcmge(six, two), 0xffffffffffffffffULL);
3722   ASSERT_EQ(AsmFcmge(kDefaultNaN64, two), 0x0000000000000000ULL);
3723   ASSERT_EQ(AsmFcmge(two, kDefaultNaN64), 0x0000000000000000ULL);
3724 }
3725 
TEST(Arm64InsnTest,CompareGreaterEqualF64x2)3726 TEST(Arm64InsnTest, CompareGreaterEqualF64x2) {
3727   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.2d, %1.2d, %2.2d");
3728   __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3729   __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3730   __uint128_t res = AsmFcmge(arg1, arg2);
3731   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3732   arg1 = MakeF64x2(7.0, -0.0);
3733   arg2 = MakeF64x2(-8.0, 5.0);
3734   res = AsmFcmge(arg1, arg2);
3735   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3736 }
3737 
TEST(Arm64InsnTest,CompareGreaterF64)3738 TEST(Arm64InsnTest, CompareGreaterF64) {
3739   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %d0, %d1, %d2");
3740   uint64_t two = bit_cast<uint64_t>(2.0);
3741   uint64_t six = bit_cast<uint64_t>(6.0);
3742   ASSERT_EQ(AsmFcmgt(two, six), 0x0000000000000000ULL);
3743   ASSERT_EQ(AsmFcmgt(two, two), 0x0000000000000000ULL);
3744   ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffffffffffULL);
3745   ASSERT_EQ(AsmFcmgt(kDefaultNaN64, two), 0x0000000000000000ULL);
3746   ASSERT_EQ(AsmFcmgt(two, kDefaultNaN64), 0x0000000000000000ULL);
3747 }
3748 
TEST(Arm64InsnTest,CompareGreaterF64x2)3749 TEST(Arm64InsnTest, CompareGreaterF64x2) {
3750   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.2d, %1.2d, %2.2d");
3751   __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3752   __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3753   __uint128_t res = AsmFcmgt(arg1, arg2);
3754   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3755   arg1 = MakeF64x2(7.0, -0.0);
3756   arg2 = MakeF64x2(-8.0, 5.0);
3757   res = AsmFcmgt(arg1, arg2);
3758   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3759 }
3760 
TEST(Arm64InsnTest,AndInt8x16)3761 TEST(Arm64InsnTest, AndInt8x16) {
3762   __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3763   __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3764   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.16b, %1.16b, %2.16b")(op1, op2);
3765   ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0x2000004100001060ULL));
3766 }
3767 
TEST(Arm64InsnTest,AndInt8x8)3768 TEST(Arm64InsnTest, AndInt8x8) {
3769   __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3770   __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3771   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.8b, %1.8b, %2.8b")(op1, op2);
3772   ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0));
3773 }
3774 
TEST(Arm64InsnTest,OrInt8x16)3775 TEST(Arm64InsnTest, OrInt8x16) {
3776   __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3777   __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3778   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.16b, %1.16b, %2.16b")(op1, op2);
3779   ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0x136a56c8136a56c8ULL));
3780 }
3781 
TEST(Arm64InsnTest,OrInt8x8)3782 TEST(Arm64InsnTest, OrInt8x8) {
3783   __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3784   __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3785   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.8b, %1.8b, %2.8b")(op1, op2);
3786   ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0));
3787 }
3788 
TEST(Arm64InsnTest,XorInt8x16)3789 TEST(Arm64InsnTest, XorInt8x16) {
3790   __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3791   __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3792   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.16b, %1.16b, %2.16b")(op1, op2);
3793   ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0xc6bd2031ce224768ULL));
3794 }
3795 
TEST(Arm64InsnTest,XorInt8x8)3796 TEST(Arm64InsnTest, XorInt8x8) {
3797   __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3798   __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3799   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.8b, %1.8b, %2.8b")(op1, op2);
3800   ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0));
3801 }
3802 
TEST(Arm64InsnTest,AndNotInt8x16)3803 TEST(Arm64InsnTest, AndNotInt8x16) {
3804   __uint128_t op1 = MakeUInt128(0x0313783875288658ULL, 0x7533208381420617ULL);
3805   __uint128_t op2 = MakeUInt128(0x2327917860857843ULL, 0x8382796797668145ULL);
3806   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.16b, %1.16b, %2.16b")(op1, op2);
3807   ASSERT_EQ(rd, MakeUInt128(0x0010680015288618ULL, 0x7431008000000612ULL));
3808 }
3809 
TEST(Arm64InsnTest,AndNotInt8x8)3810 TEST(Arm64InsnTest, AndNotInt8x8) {
3811   __uint128_t op1 = MakeUInt128(0x4861045432664821ULL, 0x2590360011330530ULL);
3812   __uint128_t op2 = MakeUInt128(0x5420199561121290ULL, 0x8572424541506959ULL);
3813   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.8b, %1.8b, %2.8b")(op1, op2);
3814   ASSERT_EQ(rd, MakeUInt128(0x0841044012644821ULL, 0x0000000000000000ULL));
3815 }
3816 
TEST(Arm64InsnTest,AndNotInt16x4Imm)3817 TEST(Arm64InsnTest, AndNotInt16x4Imm) {
3818   __uint128_t res = MakeUInt128(0x9690314950191085ULL, 0x7598442391986291ULL);
3819 
3820   asm("bic %0.4h, #0x3" : "=w"(res) : "0"(res));
3821 
3822   ASSERT_EQ(res, MakeUInt128(0x9690314850181084ULL, 0x0000000000000000ULL));
3823 }
3824 
TEST(Arm64InsnTest,AndNotInt16x4ImmShiftedBy8)3825 TEST(Arm64InsnTest, AndNotInt16x4ImmShiftedBy8) {
3826   __uint128_t res = MakeUInt128(0x8354056704038674ULL, 0x3513622224771589ULL);
3827 
3828   asm("bic %0.4h, #0xa8, lsl #8" : "=w"(res) : "0"(res));
3829 
3830   ASSERT_EQ(res, MakeUInt128(0x0354056704030674ULL, 0x0000000000000000ULL));
3831 }
3832 
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy8)3833 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy8) {
3834   __uint128_t res = MakeUInt128(0x1842631298608099ULL, 0x8886874132604721ULL);
3835 
3836   asm("bic %0.2s, #0xd3, lsl #8" : "=w"(res) : "0"(res));
3837 
3838   ASSERT_EQ(res, MakeUInt128(0x1842201298600099ULL, 0x0000000000000000ULL));
3839 }
3840 
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy16)3841 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy16) {
3842   __uint128_t res = MakeUInt128(0x2947867242292465ULL, 0x4366800980676928ULL);
3843 
3844   asm("bic %0.2s, #0x22, lsl #16" : "=w"(res) : "0"(res));
3845 
3846   ASSERT_EQ(res, MakeUInt128(0x2945867242092465ULL, 0x0000000000000000ULL));
3847 }
3848 
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy24)3849 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy24) {
3850   __uint128_t res = MakeUInt128(0x0706977942236250ULL, 0x8221688957383798ULL);
3851 
3852   asm("bic %0.2s, #0x83, lsl #24" : "=w"(res) : "0"(res));
3853 
3854   ASSERT_EQ(res, MakeUInt128(0x0406977940236250ULL, 0x0000000000000000ULL));
3855 }
3856 
TEST(Arm64InsnTest,OrInt16x4Imm)3857 TEST(Arm64InsnTest, OrInt16x4Imm) {
3858   __uint128_t res = MakeUInt128(0x0841284886269456ULL, 0x0424196528502221ULL);
3859 
3860   asm("orr %0.4h, #0x5" : "=w"(res) : "0"(res));
3861 
3862   ASSERT_EQ(res, MakeUInt128(0x0845284d86279457ULL, 0x0000000000000000ULL));
3863 }
3864 
TEST(Arm64InsnTest,OrNotInt8x16)3865 TEST(Arm64InsnTest, OrNotInt8x16) {
3866   __uint128_t op1 = MakeUInt128(0x5428584447952658ULL, 0x6782105114135473ULL);
3867   __uint128_t op2 = MakeUInt128(0x3558764024749647ULL, 0x3263914199272604ULL);
3868   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.16b, %1.16b, %2.16b")(op1, op2);
3869   ASSERT_EQ(rd, MakeUInt128(0xdeafd9ffdf9f6ff8ULL, 0xef9e7eff76dbddfbULL));
3870 }
3871 
TEST(Arm64InsnTest,OrNotInt8x8)3872 TEST(Arm64InsnTest, OrNotInt8x8) {
3873   __uint128_t op1 = MakeUInt128(0x3279178608578438ULL, 0x3827967976681454ULL);
3874   __uint128_t op2 = MakeUInt128(0x6838689427741559ULL, 0x9185592524595395ULL);
3875   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.8b, %1.8b, %2.8b")(op1, op2);
3876   ASSERT_EQ(rd, MakeUInt128(0xb7ff97efd8dfeebeULL, 0x0000000000000000ULL));
3877 }
3878 
TEST(Arm64InsnTest,BitwiseSelectInt8x8)3879 TEST(Arm64InsnTest, BitwiseSelectInt8x8) {
3880   __uint128_t op1 = MakeUInt128(0x2000568127145263ULL, 0x5608277857713427ULL);
3881   __uint128_t op2 = MakeUInt128(0x0792279689258923ULL, 0x5420199561121290ULL);
3882   __uint128_t op3 = MakeUInt128(0x8372978049951059ULL, 0x7317328160963185ULL);
3883   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bsl %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3884   ASSERT_EQ(res, MakeUInt128(0x0480369681349963ULL, 0x0000000000000000ULL));
3885 }
3886 
TEST(Arm64InsnTest,BitwiseInsertIfTrueInt8x8)3887 TEST(Arm64InsnTest, BitwiseInsertIfTrueInt8x8) {
3888   __uint128_t op1 = MakeUInt128(0x3678925903600113ULL, 0x3053054882046652ULL);
3889   __uint128_t op2 = MakeUInt128(0x9326117931051185ULL, 0x4807446237996274ULL);
3890   __uint128_t op3 = MakeUInt128(0x6430860213949463ULL, 0x9522473719070217ULL);
3891   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bit %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3892   ASSERT_EQ(res, MakeUInt128(0x7630965b03908563ULL, 0x0000000000000000ULL));
3893 }
3894 
TEST(Arm64InsnTest,BitwiseInsertIfFalseInt8x8)3895 TEST(Arm64InsnTest, BitwiseInsertIfFalseInt8x8) {
3896   __uint128_t op1 = MakeUInt128(0x7067982148086513ULL, 0x2823066470938446ULL);
3897   __uint128_t op2 = MakeUInt128(0x5964462294895493ULL, 0x0381964428810975ULL);
3898   __uint128_t op3 = MakeUInt128(0x0348610454326648ULL, 0x2133936072602491ULL);
3899   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bif %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3900   ASSERT_EQ(res, MakeUInt128(0x2143d8015c006500ULL, 0x0000000000000000ULL));
3901 }
3902 
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x1)3903 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x1) {
3904   __uint128_t arg = MakeUInt128(0x9486015046652681ULL, 0x4398770516153170ULL);
3905   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #39")(arg);
3906   ASSERT_EQ(res, MakeUInt128(0xffffffffff290c02ULL, 0x0000000000000000ULL));
3907 }
3908 
TEST(Arm64InsnTest,ArithmeticShiftRightBy64Int64x1)3909 TEST(Arm64InsnTest, ArithmeticShiftRightBy64Int64x1) {
3910   __uint128_t arg = MakeUInt128(0x9176042601763387ULL, 0x0454990176143641ULL);
3911   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #64")(arg);
3912   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3913 }
3914 
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x2)3915 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x2) {
3916   __uint128_t arg = MakeUInt128(0x7501116498327856ULL, 0x3531614516845769ULL);
3917   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %0.2d, %1.2d, #35")(arg);
3918   ASSERT_EQ(res, MakeUInt128(0x000000000ea0222cULL, 0x0000000006a62c28ULL));
3919 }
3920 
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt64x1)3921 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt64x1) {
3922   __uint128_t arg1 = MakeUInt128(0x9667179643468760ULL, 0x0770479995378833ULL);
3923   __uint128_t arg2 = MakeUInt128(0x2557176908196030ULL, 0x9201824018842705ULL);
3924   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #40")(arg1, arg2);
3925   ASSERT_EQ(res, MakeUInt128(0x2557176907afc747ULL, 0x0000000000000000ULL));
3926 }
3927 
TEST(Arm64InsnTest,ArithmeticShiftRightBy64AccumulateInt64x1)3928 TEST(Arm64InsnTest, ArithmeticShiftRightBy64AccumulateInt64x1) {
3929   __uint128_t arg1 = MakeUInt128(0x9223343657791601ULL, 0x2809317940171859ULL);
3930   __uint128_t arg2 = MakeUInt128(0x3498025249906698ULL, 0x4233017350358044ULL);
3931   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #64")(arg1, arg2);
3932   ASSERT_EQ(res, MakeUInt128(0x3498025249906697ULL, 0x0000000000000000ULL));
3933 }
3934 
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt16x8)3935 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt16x8) {
3936   __uint128_t arg1 = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
3937   __uint128_t arg2 = MakeUInt128(0x0101655256375678ULL, 0x5667227966198857ULL);
3938   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %0.8h, %1.8h, #12")(arg1, arg2);
3939   ASSERT_EQ(res, MakeUInt128(0x00fa6556563a567dULL, 0x5669227b6611885cULL));
3940 }
3941 
TEST(Arm64InsnTest,ArithmeticRoundingShiftRightAccumulateInt16x8)3942 TEST(Arm64InsnTest, ArithmeticRoundingShiftRightAccumulateInt16x8) {
3943   __uint128_t arg1 = MakeUInt128(0x9894671543578468ULL, 0x7886144458123145ULL);
3944   __uint128_t arg2 = MakeUInt128(0x1412147805734551ULL, 0x0500801908699603ULL);
3945   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
3946   ASSERT_EQ(res, MakeUInt128(0x140c147e05774549ULL, 0x0508801a086f9606ULL));
3947 }
3948 
TEST(Arm64InsnTest,LogicalShiftRightInt64x1)3949 TEST(Arm64InsnTest, LogicalShiftRightInt64x1) {
3950   __uint128_t arg = MakeUInt128(0x9859771921805158ULL, 0x5321473926532515ULL);
3951   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #33")(arg);
3952   ASSERT_EQ(res, MakeUInt128(0x000000004c2cbb8cULL, 0x0000000000000000ULL));
3953 }
3954 
TEST(Arm64InsnTest,LogicalShiftRightBy64Int64x1)3955 TEST(Arm64InsnTest, LogicalShiftRightBy64Int64x1) {
3956   __uint128_t arg = MakeUInt128(0x9474696134360928ULL, 0x6148494178501718ULL);
3957   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #64")(arg);
3958   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3959 }
3960 
TEST(Arm64InsnTest,LogicalShiftRightInt64x2)3961 TEST(Arm64InsnTest, LogicalShiftRightInt64x2) {
3962   __uint128_t op = MakeUInt128(0x3962657978771855ULL, 0x6084552965412665ULL);
3963   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %0.2d, %1.2d, #33")(op);
3964   ASSERT_EQ(rd, MakeUInt128(0x000000001cb132bcULL, 0x0000000030422a94ULL));
3965 }
3966 
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt64x1)3967 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt64x1) {
3968   __uint128_t arg1 = MakeUInt128(0x9004112453790153ULL, 0x3296615697052237ULL);
3969   __uint128_t arg2 = MakeUInt128(0x0499939532215362ULL, 0x2748476603613677ULL);
3970   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #40")(arg1, arg2);
3971   ASSERT_EQ(res, MakeUInt128(0x0499939532b15773ULL, 0x0000000000000000ULL));
3972 }
3973 
TEST(Arm64InsnTest,LogicalShiftRightBy64AccumulateInt64x1)3974 TEST(Arm64InsnTest, LogicalShiftRightBy64AccumulateInt64x1) {
3975   __uint128_t arg1 = MakeUInt128(0x9886592578662856ULL, 0x1249665523533829ULL);
3976   __uint128_t arg2 = MakeUInt128(0x3559152534784459ULL, 0x8183134112900199ULL);
3977   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #64")(arg1, arg2);
3978   ASSERT_EQ(res, MakeUInt128(0x3559152534784459ULL, 0x0000000000000000ULL));
3979 }
3980 
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt16x8)3981 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt16x8) {
3982   __uint128_t arg1 = MakeUInt128(0x9984345225161050ULL, 0x7027056235266012ULL);
3983   __uint128_t arg2 = MakeUInt128(0x4628654036036745ULL, 0x3286510570658748ULL);
3984   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %0.8h, %1.8h, #12")(arg1, arg2);
3985   ASSERT_EQ(res, MakeUInt128(0x4631654336056746ULL, 0x328d51057068874eULL));
3986 }
3987 
TEST(Arm64InsnTest,LogicalRoundingShiftRightAccumulateInt16x8)3988 TEST(Arm64InsnTest, LogicalRoundingShiftRightAccumulateInt16x8) {
3989   __uint128_t arg1 = MakeUInt128(0x9843452251610507ULL, 0x0270562352660127ULL);
3990   __uint128_t arg2 = MakeUInt128(0x6286540360367453ULL, 0x2865105706587488ULL);
3991   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
3992   ASSERT_EQ(res, MakeUInt128(0x62805407603b7453ULL, 0x2865105c065d7488ULL));
3993 }
3994 
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x1)3995 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x1) {
3996   __uint128_t arg = MakeUInt128(0x9323685785585581ULL, 0x9555604215625088ULL);
3997   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %d0, %d1, #40")(arg);
3998   ASSERT_EQ(res, MakeUInt128(0xffffffffff932368ULL, 0x0000000000000000ULL));
3999 }
4000 
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x2)4001 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x2) {
4002   __uint128_t arg = MakeUInt128(0x8714878398908107ULL, 0x4295309410605969ULL);
4003   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %0.2d, %1.2d, #36")(arg);
4004   ASSERT_EQ(res, MakeUInt128(0xfffffffff8714878ULL, 0x0000000004295309ULL));
4005 }
4006 
TEST(Arm64InsnTest,SignedRoundingShiftRightAccumulateInt64x1)4007 TEST(Arm64InsnTest, SignedRoundingShiftRightAccumulateInt64x1) {
4008   __uint128_t arg1 = MakeUInt128(0x9946016520577405ULL, 0x2942305360178031ULL);
4009   __uint128_t arg2 = MakeUInt128(0x3960188013782542ULL, 0x1927094767337191ULL);
4010   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %d0, %d1, #33")(arg1, arg2);
4011   ASSERT_EQ(res, MakeUInt128(0x3960187fe01b25f5ULL, 0x0000000000000000ULL));
4012 }
4013 
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x1)4014 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x1) {
4015   __uint128_t arg = MakeUInt128(0x9713552208445285ULL, 0x2640081252027665ULL);
4016   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %d0, %d1, #33")(arg);
4017   ASSERT_EQ(res, MakeUInt128(0x000000004b89aa91ULL, 0x0000000000000000ULL));
4018 }
4019 
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x2)4020 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x2) {
4021   __uint128_t arg = MakeUInt128(0x6653398573888786ULL, 0x6147629443414010ULL);
4022   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %0.2d, %1.2d, #34")(arg);
4023   ASSERT_EQ(res, MakeUInt128(0x000000001994ce61ULL, 0x000000001851d8a5ULL));
4024 }
4025 
TEST(Arm64InsnTest,UnsignedRoundingShiftRightAccumulateInt64x1)4026 TEST(Arm64InsnTest, UnsignedRoundingShiftRightAccumulateInt64x1) {
4027   __uint128_t arg1 = MakeUInt128(0x9616143204006381ULL, 0x3224658411111577ULL);
4028   __uint128_t arg2 = MakeUInt128(0x7184728147519983ULL, 0x5050478129771859ULL);
4029   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ursra %d0, %d1, #33")(arg1, arg2);
4030   ASSERT_EQ(res, MakeUInt128(0x71847281925ca39cULL, 0x0000000000000000ULL));
4031 }
4032 
TEST(Arm64InsnTest,ShiftLeftInt64x1)4033 TEST(Arm64InsnTest, ShiftLeftInt64x1) {
4034   __uint128_t arg = MakeUInt128(0x3903594664691623ULL, 0x5396809201394578ULL);
4035   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %d0, %d1, #35")(arg);
4036   ASSERT_EQ(res, MakeUInt128(0x2348b11800000000ULL, 0x0000000000000000ULL));
4037 }
4038 
TEST(Arm64InsnTest,ShiftLeftInt64x2)4039 TEST(Arm64InsnTest, ShiftLeftInt64x2) {
4040   __uint128_t arg = MakeUInt128(0x0750111649832785ULL, 0x6353161451684576ULL);
4041   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.2d, %1.2d, #37")(arg);
4042   ASSERT_EQ(res, MakeUInt128(0x3064f0a000000000ULL, 0x2d08aec000000000ULL));
4043 }
4044 
TEST(Arm64InsnTest,ShiftLeftInt8x8)4045 TEST(Arm64InsnTest, ShiftLeftInt8x8) {
4046   __uint128_t arg = MakeUInt128(0x0402956047346131ULL, 0x1382638788975517ULL);
4047   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.8b, %1.8b, #6")(arg);
4048   ASSERT_EQ(res, MakeUInt128(0x00804000c0004040ULL, 0x0000000000000000ULL));
4049 }
4050 
TEST(Arm64InsnTest,ShiftRightInsertInt64x1)4051 TEST(Arm64InsnTest, ShiftRightInsertInt64x1) {
4052   __uint128_t arg1 = MakeUInt128(0x9112232618794059ULL, 0x9415540632701319ULL);
4053   __uint128_t arg2 = MakeUInt128(0x1537675115830432ULL, 0x0849872092028092ULL);
4054   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %d0, %d1, #20")(arg1, arg2);
4055   ASSERT_EQ(res, MakeUInt128(0x1537691122326187ULL, 0x0000000000000000ULL));
4056 }
4057 
TEST(Arm64InsnTest,ShiftRightInsertInt64x2)4058 TEST(Arm64InsnTest, ShiftRightInsertInt64x2) {
4059   __uint128_t arg1 = MakeUInt128(0x7332335603484653ULL, 0x1873029302665964ULL);
4060   __uint128_t arg2 = MakeUInt128(0x5013718375428897ULL, 0x5579714499246540ULL);
4061   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %0.2d, %1.2d, #21")(arg1, arg2);
4062   ASSERT_EQ(res, MakeUInt128(0x50137399919ab01aULL, 0x557970c398149813ULL));
4063 }
4064 
TEST(Arm64InsnTest,ShiftLeftInsertInt64x1)4065 TEST(Arm64InsnTest, ShiftLeftInsertInt64x1) {
4066   __uint128_t arg1 = MakeUInt128(0x3763526969344354ULL, 0x4004730671988689ULL);
4067   __uint128_t arg2 = MakeUInt128(0x6369498567302175ULL, 0x2313252926537589ULL);
4068   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %d0, %d1, #23")(arg1, arg2);
4069   ASSERT_EQ(res, MakeUInt128(0x34b49a21aa302175ULL, 0x0000000000000000ULL));
4070 }
4071 
TEST(Arm64InsnTest,ShiftLeftInsertInt64x2)4072 TEST(Arm64InsnTest, ShiftLeftInsertInt64x2) {
4073   __uint128_t arg1 = MakeUInt128(0x3270206902872323ULL, 0x3005386216347988ULL);
4074   __uint128_t arg2 = MakeUInt128(0x5094695472004795ULL, 0x2311201504329322ULL);
4075   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %0.2d, %1.2d, #21")(arg1, arg2);
4076   ASSERT_EQ(res, MakeUInt128(0x0d2050e464604795ULL, 0x0c42c68f31129322ULL));
4077 }
4078 
TEST(Arm64InsnTest,ShiftLeftLongInt8x8)4079 TEST(Arm64InsnTest, ShiftLeftLongInt8x8) {
4080   __uint128_t arg = MakeUInt128(0x2650697620201995ULL, 0x5484126500053944ULL);
4081   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll %0.8h, %1.8b, #8")(arg);
4082   ASSERT_EQ(res, MakeUInt128(0x2000200019009500ULL, 0x2600500069007600ULL));
4083 }
4084 
TEST(Arm64InsnTest,ShiftLeftLongInt8x8Upper)4085 TEST(Arm64InsnTest, ShiftLeftLongInt8x8Upper) {
4086   __uint128_t arg = MakeUInt128(0x9050429225978771ULL, 0x0667873840000616ULL);
4087   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll2 %0.8h, %1.16b, #8")(arg);
4088   ASSERT_EQ(res, MakeUInt128(0x4000000006001600ULL, 0x0600670087003800ULL));
4089 }
4090 
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2)4091 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2) {
4092   __uint128_t arg = MakeUInt128(0x9075407923424023ULL, 0x0092590070173196ULL);
4093   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll %0.2d, %1.2s, #9")(arg);
4094   ASSERT_EQ(res, MakeUInt128(0x0000004684804600ULL, 0xffffff20ea80f200ULL));
4095 }
4096 
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2Upper)4097 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2Upper) {
4098   __uint128_t arg = MakeUInt128(0x9382432227188515ULL, 0x9740547021482897ULL);
4099   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll2 %0.2d, %1.4s, #9")(arg);
4100   ASSERT_EQ(res, MakeUInt128(0x0000004290512e00ULL, 0xffffff2e80a8e000ULL));
4101 }
4102 
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2By0)4103 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2By0) {
4104   __uint128_t arg = MakeUInt128(0x9008777697763127ULL, 0x9572267265556259ULL);
4105   // SXTL is an alias for SSHLL for the shift count being zero.
4106   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sxtl %0.2d, %1.2s")(arg);
4107   ASSERT_EQ(res, MakeUInt128(0xffffffff97763127ULL, 0xffffffff90087776ULL));
4108 }
4109 
TEST(Arm64InsnTest,ShiftLeftLongInt32x2)4110 TEST(Arm64InsnTest, ShiftLeftLongInt32x2) {
4111   __uint128_t arg = MakeUInt128(0x9094334676851422ULL, 0x1447737939375170ULL);
4112   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll %0.2d, %1.2s, #9")(arg);
4113   ASSERT_EQ(res, MakeUInt128(0x000000ed0a284400ULL, 0x0000012128668c00ULL));
4114 }
4115 
TEST(Arm64InsnTest,ShiftLeftLongInt32x2Upper)4116 TEST(Arm64InsnTest, ShiftLeftLongInt32x2Upper) {
4117   __uint128_t arg = MakeUInt128(0x7096834080053559ULL, 0x8491754173818839ULL);
4118   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll2 %0.2d, %1.4s, #17")(arg);
4119   ASSERT_EQ(res, MakeUInt128(0x0000e70310720000ULL, 0x00010922ea820000ULL));
4120 }
4121 
TEST(Arm64InsnTest,ShiftLeftLongInt32x2By0)4122 TEST(Arm64InsnTest, ShiftLeftLongInt32x2By0) {
4123   __uint128_t arg = MakeUInt128(0x9945681506526530ULL, 0x5371829412703369ULL);
4124   // UXTL is an alias for USHLL for the shift count being zero.
4125   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uxtl %0.2d, %1.2s")(arg);
4126   ASSERT_EQ(res, MakeUInt128(0x0000000006526530ULL, 0x0000000099456815ULL));
4127 }
4128 
TEST(Arm64InsnTest,ShiftRightNarrowI16x8)4129 TEST(Arm64InsnTest, ShiftRightNarrowI16x8) {
4130   __uint128_t arg = MakeUInt128(0x9378541786109696ULL, 0x9202538865034577ULL);
4131   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shrn %0.8b, %1.8h, #2")(arg);
4132   ASSERT_EQ(res, MakeUInt128(0x80e2405dde0584a5ULL, 0x0000000000000000ULL));
4133 }
4134 
TEST(Arm64InsnTest,ShiftRightNarrowI16x8Upper)4135 TEST(Arm64InsnTest, ShiftRightNarrowI16x8Upper) {
4136   __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
4137   __uint128_t arg2 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
4138   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("shrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4139   ASSERT_EQ(res, MakeUInt128(0x3879158299848645ULL, 0xd8988dc1de009890ULL));
4140 }
4141 
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8)4142 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8) {
4143   __uint128_t arg = MakeUInt128(0x9303774688099929ULL, 0x6877582441047878ULL);
4144   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rshrn %0.8b, %1.8h, #2")(arg);
4145   ASSERT_EQ(res, MakeUInt128(0x1e09411ec1d2024aULL, 0x0000000000000000ULL));
4146 }
4147 
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8Upper)4148 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8Upper) {
4149   __uint128_t arg1 = MakeUInt128(0x9314507607167064ULL, 0x3556827437743965ULL);
4150   __uint128_t arg2 = MakeUInt128(0x2103098604092717ULL, 0x0909512808630902ULL);
4151   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("rshrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4152   ASSERT_EQ(res, MakeUInt128(0x2103098604092717ULL, 0x569ddd59c51ec619ULL));
4153 }
4154 
TEST(Arm64InsnTest,AddInt64x1)4155 TEST(Arm64InsnTest, AddInt64x1) {
4156   __uint128_t arg1 = MakeUInt128(0x0080000000000003ULL, 0xdeadbeef01234567ULL);
4157   __uint128_t arg2 = MakeUInt128(0x0080000000000005ULL, 0x0123deadbeef4567ULL);
4158   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %d0, %d1, %d2")(arg1, arg2);
4159   ASSERT_EQ(res, MakeUInt128(0x0100000000000008ULL, 0x0ULL));
4160 }
4161 
TEST(Arm64InsnTest,AddInt32x4)4162 TEST(Arm64InsnTest, AddInt32x4) {
4163   // The "add" below adds two vectors, each with four 32-bit elements.  We set the sign
4164   // bit for each element to verify that the carry does not affect any lane.
4165   __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4166   __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4167   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.4s, %1.4s, %2.4s")(op1, op2);
4168   ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0x0000000f0000000bULL));
4169 }
4170 
TEST(Arm64InsnTest,AddInt32x2)4171 TEST(Arm64InsnTest, AddInt32x2) {
4172   __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4173   __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4174   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2s, %1.2s, %2.2s")(op1, op2);
4175   ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0));
4176 }
4177 
TEST(Arm64InsnTest,AddInt64x2)4178 TEST(Arm64InsnTest, AddInt64x2) {
4179   __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4180   __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4181   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2d, %1.2d, %2.2d")(op1, op2);
4182   ASSERT_EQ(rd, MakeUInt128(0x0000000800000003ULL, 0x000000100000000bULL));
4183 }
4184 
TEST(Arm64InsnTest,SubInt64x1)4185 TEST(Arm64InsnTest, SubInt64x1) {
4186   __uint128_t arg1 = MakeUInt128(0x0000000000000002ULL, 0x0011223344556677ULL);
4187   __uint128_t arg2 = MakeUInt128(0x0000000000000003ULL, 0x0123456789abcdefULL);
4188   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %d0, %d1, %d2")(arg1, arg2);
4189   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0ULL));
4190 }
4191 
TEST(Arm64InsnTest,SubInt64x2)4192 TEST(Arm64InsnTest, SubInt64x2) {
4193   constexpr auto AsmSub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.2d, %1.2d, %2.2d");
4194   __uint128_t arg1 = MakeUInt128(0x6873115956286388ULL, 0x2353787593751957ULL);
4195   __uint128_t arg2 = MakeUInt128(0x7818577805321712ULL, 0x2680661300192787ULL);
4196   __uint128_t res = AsmSub(arg1, arg2);
4197   ASSERT_EQ(res, MakeUInt128(0xf05ab9e150f64c76ULL, 0xfcd31262935bf1d0ULL));
4198 }
4199 
TEST(Arm64InsnTest,SubInt32x4)4200 TEST(Arm64InsnTest, SubInt32x4) {
4201   __uint128_t op1 = MakeUInt128(0x0000000A00000005ULL, 0x0000000C00000C45ULL);
4202   __uint128_t op2 = MakeUInt128(0x0000000500000003ULL, 0x0000000200000C45ULL);
4203   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.4s, %1.4s, %2.4s")(op1, op2);
4204   ASSERT_EQ(rd, MakeUInt128(0x0000000500000002ULL, 0x00000000A00000000ULL));
4205 }
4206 
TEST(Arm64InsnTest,SubInt32x2)4207 TEST(Arm64InsnTest, SubInt32x2) {
4208   __uint128_t op1 = MakeUInt128(0x0000000000000005ULL, 0x0000000000000C45ULL);
4209   __uint128_t op2 = MakeUInt128(0x0000000000000003ULL, 0x0000000000000C45ULL);
4210   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.2s, %1.2s, %2.2s")(op1, op2);
4211   ASSERT_EQ(rd, MakeUInt128(0x0000000000000002ULL, 0x00000000000000000ULL));
4212 }
4213 
TEST(Arm64InsnTest,SubInt16x4)4214 TEST(Arm64InsnTest, SubInt16x4) {
4215   __uint128_t arg1 = MakeUInt128(0x8888777766665555ULL, 0);
4216   __uint128_t arg2 = MakeUInt128(0x1111222233334444ULL, 0);
4217   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.4h, %1.4h, %2.4h")(arg1, arg2);
4218   ASSERT_EQ(res, MakeUInt128(0x7777555533331111ULL, 0));
4219 }
4220 
TEST(Arm64InsnTest,MultiplyI8x8)4221 TEST(Arm64InsnTest, MultiplyI8x8) {
4222   __uint128_t arg1 = MakeUInt128(0x5261365549781893ULL, 0x1297848216829989ULL);
4223   __uint128_t arg2 = MakeUInt128(0x4542858444795265ULL, 0x8678210511413547ULL);
4224   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4225   ASSERT_EQ(res, MakeUInt128(0x1a020ed464b8b0ffULL, 0x0000000000000000ULL));
4226 }
4227 
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8)4228 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8) {
4229   __uint128_t arg1 = MakeUInt128(0x5848406353422072ULL, 0x2258284886481584ULL);
4230   __uint128_t arg2 = MakeUInt128(0x7823986456596116ULL, 0x3548862305774564ULL);
4231   __uint128_t arg3 = MakeUInt128(0x8797108931456691ULL, 0x3686722874894056ULL);
4232   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4233   ASSERT_EQ(res, MakeUInt128(0xc76f10351337865dULL, 0x0000000000000000ULL));
4234 }
4235 
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElem)4236 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElem) {
4237   __uint128_t arg1 = MakeUInt128(0x4143334547762416ULL, 0x8625189835694855ULL);
4238   __uint128_t arg2 = MakeUInt128(0x5346462080466842ULL, 0x5906949129331367ULL);
4239   __uint128_t arg3 = MakeUInt128(0x0355876402474964ULL, 0x7326391419927260ULL);
4240   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.4h, %1.4h, %2.h[0]")(arg1, arg2, arg3);
4241   ASSERT_EQ(res, MakeUInt128(0x0e9bc72e5eb38710ULL, 0x0000000000000000ULL));
4242 }
4243 
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElemPosition2)4244 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElemPosition2) {
4245   __uint128_t arg1 = MakeUInt128(0x1431429809190659ULL, 0x2509372216964615ULL);
4246   __uint128_t arg2 = MakeUInt128(0x2686838689427741ULL, 0x5599185592524595ULL);
4247   __uint128_t arg3 = MakeUInt128(0x6099124608051243ULL, 0x8843904512441365ULL);
4248   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.2s, %1.2s, %2.s[2]")(arg1, arg2, arg3);
4249   ASSERT_EQ(res, MakeUInt128(0x6ce7ccbedccdc110ULL, 0x0000000000000000ULL));
4250 }
4251 
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8IndexedElem)4252 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8IndexedElem) {
4253   __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4254   __uint128_t arg2 = MakeUInt128(0x6549911988183479ULL, 0x7753566369807426ULL);
4255   __uint128_t arg3 = MakeUInt128(0x4524919217321721ULL, 0x4772350141441973ULL);
4256   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.4h, %1.4h, %2.h[1]")(arg1, arg2, arg3);
4257   ASSERT_EQ(res, MakeUInt128(0xcefce99ad58a9ad9ULL, 0x0000000000000000ULL));
4258 }
4259 
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8)4260 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8) {
4261   __uint128_t arg1 = MakeUInt128(0x0635342207222582ULL, 0x8488648158456028ULL);
4262   __uint128_t arg2 = MakeUInt128(0x9864565961163548ULL, 0x8623057745649803ULL);
4263   __uint128_t arg3 = MakeUInt128(0x1089314566913686ULL, 0x7228748940560101ULL);
4264   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4265   ASSERT_EQ(res, MakeUInt128(0x80d5b973bfa58df6ULL, 0x0000000000000000ULL));
4266 }
4267 
TEST(Arm64InsnTest,MultiplyI32x4IndexedElem)4268 TEST(Arm64InsnTest, MultiplyI32x4IndexedElem) {
4269   __uint128_t arg1 = MakeUInt128(0x143334547762416ULL, 0x8625189835694855ULL);
4270   __uint128_t arg2 = MakeUInt128(0x627232791786085ULL, 0x7843838279679766ULL);
4271   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.4s, %1.4s, %2.s[1]")(arg1, arg2);
4272   ASSERT_EQ(res, MakeUInt128(0xcec23e830d48815aULL, 0xd12b87288ae0a3f3ULL));
4273 }
4274 
TEST(Arm64InsnTest,PolynomialMultiplyU8x8)4275 TEST(Arm64InsnTest, PolynomialMultiplyU8x8) {
4276   __uint128_t arg1 = MakeUInt128(0x1862056476931257ULL, 0x0586356620185581ULL);
4277   __uint128_t arg2 = MakeUInt128(0x1668039626579787ULL, 0x7185560845529654ULL);
4278   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4279   ASSERT_EQ(res, MakeUInt128(0xd0d00f18f4095e25ULL, 0x0000000000000000ULL));
4280 }
4281 
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8)4282 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8) {
4283   __uint128_t arg1 = MakeUInt128(0x1327656180937734ULL, 0x4403070746921120ULL);
4284   __uint128_t arg2 = MakeUInt128(0x9838952286847831ULL, 0x2355265821314495ULL);
4285   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.8h, %1.8b, %2.8b")(arg1, arg2);
4286   ASSERT_EQ(res, MakeUInt128(0x43004bcc17e805f4ULL, 0x082807a835210ce2ULL));
4287 }
4288 
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8Upper)4289 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8Upper) {
4290   __uint128_t arg1 = MakeUInt128(0x4439658253375438ULL, 0x8569094113031509ULL);
4291   __uint128_t arg2 = MakeUInt128(0x1865619673378623ULL, 0x6256125216320862ULL);
4292   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
4293   ASSERT_EQ(res, MakeUInt128(0x015a005600a80372ULL, 0x30ea1da6008214d2ULL));
4294 }
4295 
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2)4296 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2) {
4297   __uint128_t arg1 = MakeUInt128(0x1000100010001000ULL, 0xffffeeeeffffeeeeULL);
4298   __uint128_t arg2 = MakeUInt128(0x10001ULL, 0xffffeeeeffffeeeeULL);
4299   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.1q, %1.1d, %2.1d")(arg1, arg2);
4300   ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4301 }
4302 
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2Upper)4303 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2Upper) {
4304   __uint128_t arg1 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x1000100010001000ULL);
4305   __uint128_t arg2 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x10001ULL);
4306   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.1q, %1.2d, %2.2d")(arg1, arg2);
4307   ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4308 }
4309 
TEST(Arm64InsnTest,PairwiseAddInt8x16)4310 TEST(Arm64InsnTest, PairwiseAddInt8x16) {
4311   __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4312   __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4313   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.16b, %1.16b, %2.16b")(op1, op2);
4314   ASSERT_EQ(rd, MakeUInt128(0xeda96521dd995511ULL, 0x1d1915110d090501ULL));
4315 }
4316 
TEST(Arm64InsnTest,PairwiseAddInt8x8)4317 TEST(Arm64InsnTest, PairwiseAddInt8x8) {
4318   __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4319   __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4320   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.8b, %1.8b, %2.8b")(op1, op2);
4321   ASSERT_EQ(rd, MakeUInt128(0x0d090501dd995511ULL, 0));
4322 }
4323 
TEST(Arm64InsnTest,PairwiseAddInt64x2)4324 TEST(Arm64InsnTest, PairwiseAddInt64x2) {
4325   __uint128_t op1 = MakeUInt128(1ULL, 2ULL);
4326   __uint128_t op2 = MakeUInt128(3ULL, 4ULL);
4327   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.2d, %1.2d, %2.2d")(op1, op2);
4328   ASSERT_EQ(rd, MakeUInt128(3ULL, 7ULL));
4329 }
4330 
TEST(Arm64InsnTest,CompareEqualInt8x16)4331 TEST(Arm64InsnTest, CompareEqualInt8x16) {
4332   __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4333   __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4334   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.16b, %1.16b, %2.16b")(op1, op2);
4335   ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0x0000ff00ff0000ffULL));
4336 }
4337 
TEST(Arm64InsnTest,CompareEqualInt8x8)4338 TEST(Arm64InsnTest, CompareEqualInt8x8) {
4339   __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4340   __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4341   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.8b, %1.8b, %2.8b")(op1, op2);
4342   ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0));
4343 }
4344 
TEST(Arm64InsnTest,CompareEqualInt16x4)4345 TEST(Arm64InsnTest, CompareEqualInt16x4) {
4346   __uint128_t op1 = MakeUInt128(0x4444333322221111ULL, 0);
4347   __uint128_t op2 = MakeUInt128(0x8888333300001111ULL, 0);
4348   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.4h, %1.4h, %2.4h")(op1, op2);
4349   ASSERT_EQ(rd, MakeUInt128(0x0000ffff0000ffffULL, 0));
4350 }
4351 
TEST(Arm64InsnTest,CompareEqualInt64x1)4352 TEST(Arm64InsnTest, CompareEqualInt64x1) {
4353   constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %d0, %d1, %d2");
4354   __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4355   __uint128_t arg2 = MakeUInt128(0x0665499119881834ULL, 0x7977535663698074ULL);
4356   __uint128_t arg3 = MakeUInt128(0x8297455570674983ULL, 0x1452491921732172ULL);
4357   ASSERT_EQ(AsmCmeq(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4358   ASSERT_EQ(AsmCmeq(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4359 }
4360 
TEST(Arm64InsnTest,CompareEqualZeroInt64x1)4361 TEST(Arm64InsnTest, CompareEqualZeroInt64x1) {
4362   constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %d0, %d1, #0");
4363   __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4364   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x1746089232839170ULL);
4365   ASSERT_EQ(AsmCmeq(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4366   ASSERT_EQ(AsmCmeq(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4367 }
4368 
TEST(Arm64InsnTest,CompareEqualZeroInt8x16)4369 TEST(Arm64InsnTest, CompareEqualZeroInt8x16) {
4370   __uint128_t op = MakeUInt128(0x0000555500332200ULL, 0x0000000077001100ULL);
4371   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.16b, %1.16b, #0")(op);
4372   ASSERT_EQ(rd, MakeUInt128(0xffff0000ff0000ffULL, 0xffffffff00ff00ffULL));
4373 }
4374 
TEST(Arm64InsnTest,CompareEqualZeroInt8x8)4375 TEST(Arm64InsnTest, CompareEqualZeroInt8x8) {
4376   __uint128_t op = MakeUInt128(0x001122330000aaaaULL, 0xdeadbeef0000cafeULL);
4377   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.8b, %1.8b, #0")(op);
4378   ASSERT_EQ(rd, MakeUInt128(0xff000000ffff0000ULL, 0));
4379 }
4380 
TEST(Arm64InsnTest,CompareGreaterInt64x1)4381 TEST(Arm64InsnTest, CompareGreaterInt64x1) {
4382   constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %d0, %d1, %d2");
4383   __uint128_t arg1 = MakeUInt128(0x1976668559233565ULL, 0x4639138363185745ULL);
4384   __uint128_t arg2 = MakeUInt128(0x3474940784884423ULL, 0x7721751543342603ULL);
4385   __uint128_t arg3 = MakeUInt128(0x1976668559233565ULL, 0x8183196376370761ULL);
4386   __uint128_t arg4 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
4387   ASSERT_EQ(AsmCmgt(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4388   ASSERT_EQ(AsmCmgt(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4389   ASSERT_EQ(AsmCmgt(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4390 }
4391 
TEST(Arm64InsnTest,CompareGreaterZeroInt64x1)4392 TEST(Arm64InsnTest, CompareGreaterZeroInt64x1) {
4393   constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %d0, %d1, #0");
4394   __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4395   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x6174599705674507ULL);
4396   __uint128_t arg3 = MakeUInt128(0x9592057668278967ULL, 0x7644531840404185ULL);
4397   ASSERT_EQ(AsmCmgt(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4398   ASSERT_EQ(AsmCmgt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4399   ASSERT_EQ(AsmCmgt(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4400 }
4401 
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x16)4402 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x16) {
4403   __uint128_t op = MakeUInt128(0x807fff00017efe02ULL, 0xff7f80000102fe02ULL);
4404   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.16b, %1.16b, #0")(op);
4405   ASSERT_EQ(rd, MakeUInt128(0x00ff0000ffff00ffULL, 0x00ff0000ffff00ffULL));
4406 }
4407 
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x8)4408 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x8) {
4409   __uint128_t op = MakeUInt128(0x00ff7f80017efe00ULL, 0x0000cafedeadbeefULL);
4410   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.8b, %1.8b, #0")(op);
4411   ASSERT_EQ(rd, MakeUInt128(0x0000ff00ffff0000ULL, 0));
4412 }
4413 
TEST(Arm64InsnTest,CompareGreaterThanInt16x8)4414 TEST(Arm64InsnTest, CompareGreaterThanInt16x8) {
4415   __uint128_t arg1 = MakeUInt128(0x9789389001852956ULL, 0x9196780455448285ULL);
4416   __uint128_t arg2 = MakeUInt128(0x7269389081795897ULL, 0x5469399264218285);
4417   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.8h, %1.8h, %2.8h")(arg1, arg2);
4418   ASSERT_EQ(res, MakeUInt128(0x00000000ffff0000ULL, 0x0000ffff00000000ULL));
4419 }
4420 
TEST(Arm64InsnTest,CompareGreaterThanInt32x4)4421 TEST(Arm64InsnTest, CompareGreaterThanInt32x4) {
4422   __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4423   __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4424   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.4s, %1.4s, %2.4s")(arg1, arg2);
4425   ASSERT_EQ(res, MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL));
4426 }
4427 
TEST(Arm64InsnTest,CompareLessZeroInt64x1)4428 TEST(Arm64InsnTest, CompareLessZeroInt64x1) {
4429   constexpr auto AsmCmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %d0, %d1, #0");
4430   __uint128_t arg1 = MakeUInt128(0x4784264567633881ULL, 0x8807565612168960ULL);
4431   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x8955999911209916ULL);
4432   __uint128_t arg3 = MakeUInt128(0x9364610175685060ULL, 0x1671453543158148ULL);
4433   ASSERT_EQ(AsmCmlt(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4434   ASSERT_EQ(AsmCmlt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4435   ASSERT_EQ(AsmCmlt(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4436 }
4437 
TEST(Arm64InsnTest,CompareLessThanZeroInt8x16)4438 TEST(Arm64InsnTest, CompareLessThanZeroInt8x16) {
4439   __uint128_t op = MakeUInt128(0xff00017ffe020180ULL, 0x0001027e7ffeff80ULL);
4440   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.16b, %1.16b, #0")(op);
4441   ASSERT_EQ(rd, MakeUInt128(0xff000000ff0000ffULL, 0x0000000000ffffffULL));
4442 }
4443 
TEST(Arm64InsnTest,CompareLessThanZeroInt8x8)4444 TEST(Arm64InsnTest, CompareLessThanZeroInt8x8) {
4445   __uint128_t op = MakeUInt128(0x0002017e7fff8000ULL, 0x001100220000ffffULL);
4446   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.8b, %1.8b, #0")(op);
4447   ASSERT_EQ(rd, MakeUInt128(0x0000000000ffff00ULL, 0));
4448 }
4449 
TEST(Arm64InsnTest,CompareGreaterThanEqualInt64x1)4450 TEST(Arm64InsnTest, CompareGreaterThanEqualInt64x1) {
4451   constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %d0, %d1, %d2");
4452   __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4453   __uint128_t arg2 = MakeUInt128(0x5890939568814856ULL, 0x0263224393726562ULL);
4454   __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x5511995818319637ULL);
4455   __uint128_t arg4 = MakeUInt128(0x9427141009391369ULL, 0x1381072581378135ULL);
4456   ASSERT_EQ(AsmCmge(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4457   ASSERT_EQ(AsmCmge(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4458   ASSERT_EQ(AsmCmge(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4459 }
4460 
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt64x1)4461 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt64x1) {
4462   constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %d0, %d1, #0");
4463   __uint128_t arg1 = MakeUInt128(0x5562116715468484ULL, 0x7780394475697980ULL);
4464   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x3548487562529875ULL);
4465   __uint128_t arg3 = MakeUInt128(0x9212366168902596ULL, 0x2730430679316531ULL);
4466   ASSERT_EQ(AsmCmge(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4467   ASSERT_EQ(AsmCmge(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4468   ASSERT_EQ(AsmCmge(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4469 }
4470 
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x16)4471 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x16) {
4472   __uint128_t op = MakeUInt128(0x00ff01027ffe8002ULL, 0x80fffe7f7e020100ULL);
4473   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.16b, %1.16b, #0")(op);
4474   ASSERT_EQ(rd, MakeUInt128(0xff00ffffff0000ffULL, 0x000000ffffffffffULL));
4475 }
4476 
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x8)4477 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x8) {
4478   __uint128_t op = MakeUInt128(0x0001027f80feff00ULL, 0x0011223344556677ULL);
4479   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.8b, %1.8b, #0")(op);
4480   ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4481 }
4482 
TEST(Arm64InsnTest,CompareGreaterEqualInt16x8)4483 TEST(Arm64InsnTest, CompareGreaterEqualInt16x8) {
4484   __uint128_t arg1 = MakeUInt128(0x4391962838870543ULL, 0x6777432242768091ULL);
4485   __uint128_t arg2 = MakeUInt128(0x4391838548318875ULL, 0x0142432208995068ULL);
4486   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %0.8h, %1.8h, %2.8h")(arg1, arg2);
4487   ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffffffff0000ULL));
4488 }
4489 
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt64x1)4490 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt64x1) {
4491   constexpr auto AsmCmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %d0, %d1, #0");
4492   __uint128_t arg1 = MakeUInt128(0x3643296406335728ULL, 0x1070788758164043ULL);
4493   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x5865720227637840ULL);
4494   __uint128_t arg3 = MakeUInt128(0x8694346828590066ULL, 0x6408063140777577ULL);
4495   ASSERT_EQ(AsmCmle(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4496   ASSERT_EQ(AsmCmle(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4497   ASSERT_EQ(AsmCmle(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4498 }
4499 
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x16)4500 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x16) {
4501   __uint128_t op = MakeUInt128(0x80fffe7f7e020100ULL, 0x00ff01027ffe8002ULL);
4502   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.16b, %1.16b, #0")(op);
4503   ASSERT_EQ(rd, MakeUInt128(0xffffff00000000ffULL, 0xffff000000ffff00ULL));
4504 }
4505 
TEST(Arm64InsnTest,CompareHigherInt64x1)4506 TEST(Arm64InsnTest, CompareHigherInt64x1) {
4507   constexpr auto AsmCmhi = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %d0, %d1, %d2");
4508   __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4509   __uint128_t arg2 = MakeUInt128(0x0759167297007850ULL, 0x5807171863810549ULL);
4510   __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x6026322439372656ULL);
4511   __uint128_t arg4 = MakeUInt128(0x9087839523245323ULL, 0x7896029841669225ULL);
4512   ASSERT_EQ(AsmCmhi(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4513   ASSERT_EQ(AsmCmhi(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4514   ASSERT_EQ(AsmCmhi(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4515 }
4516 
TEST(Arm64InsnTest,CompareHigherInt16x8)4517 TEST(Arm64InsnTest, CompareHigherInt16x8) {
4518   __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4519   __uint128_t arg2 = MakeUInt128(0x2057166778967764ULL, 0x4531840442045540ULL);
4520   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.8h, %1.8h, %2.8h")(arg1, arg2);
4521   ASSERT_EQ(res, MakeUInt128(0xffff000000000000ULL, 0x0000ffff00000000ULL));
4522 }
4523 
TEST(Arm64InsnTest,CompareHigherInt32x4)4524 TEST(Arm64InsnTest, CompareHigherInt32x4) {
4525   __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4526   __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4527   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.4s, %1.4s, %2.4s")(arg1, arg2);
4528   ASSERT_EQ(res, MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL));
4529 }
4530 
TEST(Arm64InsnTest,CompareHigherSameInt64x1)4531 TEST(Arm64InsnTest, CompareHigherSameInt64x1) {
4532   constexpr auto AsmCmhs = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %d0, %d1, %d2");
4533   __uint128_t arg1 = MakeUInt128(0x3529566139788848ULL, 0x6050978608595701ULL);
4534   __uint128_t arg2 = MakeUInt128(0x1769845875810446ULL, 0x6283998806006162ULL);
4535   __uint128_t arg3 = MakeUInt128(0x3529566139788848ULL, 0x9001852956919678ULL);
4536   __uint128_t arg4 = MakeUInt128(0x9628388705436777ULL, 0x4322427680913236ULL);
4537   ASSERT_EQ(AsmCmhs(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4538   ASSERT_EQ(AsmCmhs(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4539   ASSERT_EQ(AsmCmhs(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4540 }
4541 
TEST(Arm64InsnTest,CompareHigherSameInt16x8)4542 TEST(Arm64InsnTest, CompareHigherSameInt16x8) {
4543   __uint128_t arg1 = MakeUInt128(0x4599705674507183ULL, 0x3206503455664403ULL);
4544   __uint128_t arg2 = MakeUInt128(0x4264705633881880ULL, 0x3206612168960504ULL);
4545   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %0.8h, %1.8h, %2.8h")(arg1, arg2);
4546   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0xffff00000000ffffULL));
4547 }
4548 
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x8)4549 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x8) {
4550   __uint128_t op = MakeUInt128(0x00fffe807f020100ULL, 0x00aabbccddeeff00ULL);
4551   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.8b, %1.8b, #0")(op);
4552   ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4553 }
4554 
TEST(Arm64InsnTest,TestInt64x1)4555 TEST(Arm64InsnTest, TestInt64x1) {
4556   constexpr auto AsmCmtst = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %d0, %d1, %d2");
4557   __uint128_t arg1 = MakeUInt128(0xaaaaaaaa55555555ULL, 0x7698385483188750ULL);
4558   __uint128_t arg2 = MakeUInt128(0x55555555aaaaaaaaULL, 0x1429389089950685ULL);
4559   __uint128_t arg3 = MakeUInt128(0xaa00aa0055005500ULL, 0x4530765116803337ULL);
4560   ASSERT_EQ(AsmCmtst(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4561   ASSERT_EQ(AsmCmtst(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4562 }
4563 
TEST(Arm64InsnTest,TestInt16x8)4564 TEST(Arm64InsnTest, TestInt16x8) {
4565   __uint128_t arg1 = MakeUInt128(0x5999911209916464ULL, 0x6441191856827700ULL);
4566   __uint128_t arg2 = MakeUInt128(0x6101756850601671ULL, 0x4535431581480105ULL);
4567   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %0.8h, %1.8h, %2.8h")(arg1, arg2);
4568   ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffff0000ffffULL));
4569 }
4570 
TEST(Arm64InsnTest,ExtractVectorFromPair)4571 TEST(Arm64InsnTest, ExtractVectorFromPair) {
4572   __uint128_t op1 = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4573   __uint128_t op2 = MakeUInt128(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
4574   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.16b, %1.16b, %2.16b, #8")(op1, op2);
4575   ASSERT_EQ(rd, MakeUInt128(0x8899aabbccddeeffULL, 0x0001020304050607ULL));
4576 }
4577 
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidth)4578 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidth) {
4579   __uint128_t op1 = MakeUInt128(0x8138268683868942ULL, 0x7741559918559252ULL);
4580   __uint128_t op2 = MakeUInt128(0x3622262609912460ULL, 0x8051243884390451ULL);
4581   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #3")(op1, op2);
4582   ASSERT_EQ(res, MakeUInt128(0x9124608138268683ULL, 0x0000000000000000ULL));
4583 }
4584 
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidthPosition1)4585 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidthPosition1) {
4586   __uint128_t op1 = MakeUInt128(0x9471329621073404ULL, 0x3751895735961458ULL);
4587   __uint128_t op2 = MakeUInt128(0x9048010941214722ULL, 0x1317947647772622ULL);
4588   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #1")(op1, op2);
4589   ASSERT_EQ(res, MakeUInt128(0x2294713296210734ULL, 0x0000000000000000ULL));
4590 }
4591 
TEST(Arm64InsnTest,Load1OneI8x8)4592 TEST(Arm64InsnTest, Load1OneI8x8) {
4593   static constexpr uint64_t arg = 0x8867915896904956ULL;
4594   __uint128_t res;
4595   asm("ld1 {%0.8b}, [%1]" : "=w"(res) : "r"(&arg) : "memory");
4596   ASSERT_EQ(res, arg);
4597 }
4598 
TEST(Arm64InsnTest,Load1ThreeI8x8)4599 TEST(Arm64InsnTest, Load1ThreeI8x8) {
4600   static constexpr uint64_t arg[3] = {
4601       0x3415354584283376ULL, 0x4378111988556318ULL, 0x7777925372011667ULL};
4602   __uint128_t res[3];
4603   asm("ld1 {v0.8b-v2.8b}, [%3]\n\t"
4604       "mov %0.16b, v0.16b\n\t"
4605       "mov %1.16b, v1.16b\n\t"
4606       "mov %2.16b, v2.16b"
4607       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4608       : "r"(arg)
4609       : "v0", "v1", "v2", "memory");
4610   ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4611   ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4612   ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4613 }
4614 
TEST(Arm64InsnTest,Load1FourI8x8)4615 TEST(Arm64InsnTest, Load1FourI8x8) {
4616   static constexpr uint64_t arg[4] = {
4617       0x9523688483099930ULL,
4618       0x2757419916463841ULL,
4619       0x4270779887088742ULL,
4620       0x2927705389122717ULL,
4621   };
4622   __uint128_t res[4];
4623   asm("ld1 {v0.8b-v3.8b}, [%4]\n\t"
4624       "mov %0.16b, v0.16b\n\t"
4625       "mov %1.16b, v1.16b\n\t"
4626       "mov %2.16b, v2.16b\n\t"
4627       "mov %3.16b, v3.16b"
4628       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
4629       : "r"(arg)
4630       : "v0", "v1", "v2", "v3", "memory");
4631   ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4632   ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4633   ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4634   ASSERT_EQ(res[3], static_cast<__uint128_t>(arg[3]));
4635 }
4636 
TEST(Arm64InsnTest,Store1OneI8x16)4637 TEST(Arm64InsnTest, Store1OneI8x16) {
4638   static constexpr __uint128_t arg = MakeUInt128(0x7642291583425006ULL, 0x7361245384916067ULL);
4639   __uint128_t res;
4640   asm("st1 {%0.16b}, [%1]" : : "w"(arg), "r"(&res) : "memory");
4641   ASSERT_EQ(res, arg);
4642 }
4643 
TEST(Arm64InsnTest,Store1ThreeI8x8)4644 TEST(Arm64InsnTest, Store1ThreeI8x8) {
4645   static constexpr uint64_t arg[3] = {
4646       0x3086436111389069ULL, 0x4202790881431194ULL, 0x4879941715404210ULL};
4647   uint64_t res[3];
4648   asm("mov v0.16b, %0.16b\n\t"
4649       "mov v1.16b, %1.16b\n\t"
4650       "mov v2.16b, %2.16b\n\t"
4651       "st1 {v0.8b-v2.8b}, [%3]"
4652       :
4653       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4654       : "v0", "v1", "v2", "memory");
4655   ASSERT_EQ(res[0], arg[0]);
4656   ASSERT_EQ(res[1], arg[1]);
4657   ASSERT_EQ(res[2], arg[2]);
4658 }
4659 
TEST(Arm64InsnTest,Store1FourI8x8)4660 TEST(Arm64InsnTest, Store1FourI8x8) {
4661   static constexpr uint64_t arg[4] = {
4662       0x8954750448339314ULL, 0x6896307633966572ULL, 0x2672704339321674ULL, 0x5421824557062524ULL};
4663   uint64_t res[4];
4664   asm("mov v0.16b, %0.16b\n\t"
4665       "mov v1.16b, %1.16b\n\t"
4666       "mov v2.16b, %2.16b\n\t"
4667       "mov v3.16b, %3.16b\n\t"
4668       "st1 {v0.8b-v3.8b}, [%4]"
4669       :
4670       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
4671       : "v0", "v1", "v2", "v3", "memory");
4672   ASSERT_EQ(res[0], arg[0]);
4673   ASSERT_EQ(res[1], arg[1]);
4674   ASSERT_EQ(res[2], arg[2]);
4675   ASSERT_EQ(res[3], arg[3]);
4676 }
4677 
TEST(Arm64InsnTest,Load1TwoPostIndex)4678 TEST(Arm64InsnTest, Load1TwoPostIndex) {
4679   __uint128_t op0 = MakeUInt128(0x5499119881834797ULL, 0x0507922796892589ULL);
4680   __uint128_t op1 = MakeUInt128(0x0511854807446237ULL, 0x6691368672287489ULL);
4681   __uint128_t array[] = {
4682       op0,
4683       op1,
4684   };
4685   __uint128_t* addr = &array[0];
4686   __uint128_t res0 = 0;
4687   __uint128_t res1 = 0;
4688 
4689   // The "memory" below ensures that the array contents are up to date.  Without it, the
4690   // compiler might decide to initialize the array after the asm statement.
4691   //
4692   // We hardcode SIMD registers v0 and v1 below because there is no other way to express
4693   // consecutive registers, which in turn requires the mov instructions to retrieve the
4694   // loaded values into res0 and res1.
4695   asm("ld1 {v0.16b, v1.16b}, [%2], #32\n\t"
4696       "mov %0.16b, v0.16b\n\t"
4697       "mov %1.16b, v1.16b"
4698       : "=w"(res0), "=w"(res1), "+r"(addr)
4699       :
4700       : "v0", "v1", "memory");
4701 
4702   ASSERT_EQ(res0, op0);
4703   ASSERT_EQ(res1, op1);
4704   ASSERT_EQ(addr, &array[2]);
4705 }
4706 
TEST(Arm64InsnTest,Load1OnePostIndexReg)4707 TEST(Arm64InsnTest, Load1OnePostIndexReg) {
4708   static constexpr __uint128_t arg = MakeUInt128(0x4884761005564018ULL, 0x2423921926950620ULL);
4709   __uint128_t res_val;
4710   uint64_t res_addr;
4711   asm("ld1 {%0.16b}, [%1], %2"
4712       : "=w"(res_val), "=r"(res_addr)
4713       : "r"(static_cast<uint64_t>(32U)), "1"(&arg)
4714       : "memory");
4715   ASSERT_EQ(res_val, arg);
4716   ASSERT_EQ(res_addr, reinterpret_cast<uint64_t>(&arg) + 32);
4717 }
4718 
TEST(Arm64InsnTest,LoadSingleInt8)4719 TEST(Arm64InsnTest, LoadSingleInt8) {
4720   static constexpr __uint128_t reg_before =
4721       MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4722   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4723   __uint128_t reg_after;
4724   asm("ld1 {%0.b}[3], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4725   ASSERT_EQ(reg_after, MakeUInt128(0x00112233'08'556677ULL, 0x8899aabbccddeeffULL));
4726 }
4727 
TEST(Arm64InsnTest,LoadSingleInt16)4728 TEST(Arm64InsnTest, LoadSingleInt16) {
4729   static constexpr __uint128_t reg_before =
4730       MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4731   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4732   __uint128_t reg_after;
4733   asm("ld1 {%0.h}[2], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4734   ASSERT_EQ(reg_after, MakeUInt128(0x0000'0708'22223333ULL, 0x4444555566667777ULL));
4735 }
4736 
TEST(Arm64InsnTest,LoadSingleInt32)4737 TEST(Arm64InsnTest, LoadSingleInt32) {
4738   static constexpr __uint128_t reg_before =
4739       MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4740   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4741   __uint128_t reg_after;
4742   asm("ld1 {%0.s}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4743   ASSERT_EQ(reg_after, MakeUInt128(0x0506070811111111ULL, 0x2222222233333333ULL));
4744 }
4745 
TEST(Arm64InsnTest,LoadSingleInt64)4746 TEST(Arm64InsnTest, LoadSingleInt64) {
4747   static constexpr __uint128_t reg_before =
4748       MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4749   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4750   __uint128_t reg_after;
4751   asm("ld1 {%0.d}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4752   ASSERT_EQ(reg_after, MakeUInt128(0x0000000000000000ULL, 0x0102030405060708ULL));
4753 }
4754 
TEST(Arm64InsnTest,StoreSingleInt8)4755 TEST(Arm64InsnTest, StoreSingleInt8) {
4756   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4757   __uint128_t mem_dest = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4758   asm("st1 {%1.b}[3], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4759   ASSERT_EQ(mem_dest, MakeUInt128(0x00112233445566'05ULL, 0x8899aabbccddeeffULL));
4760 }
4761 
TEST(Arm64InsnTest,StoreSingleInt16)4762 TEST(Arm64InsnTest, StoreSingleInt16) {
4763   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4764   __uint128_t mem_dest = MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4765   asm("st1 {%1.h}[5], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4766   ASSERT_EQ(mem_dest, MakeUInt128(0x000011112222'0d0eULL, 0x4444555566667777ULL));
4767 }
4768 
TEST(Arm64InsnTest,StoreSingleInt32)4769 TEST(Arm64InsnTest, StoreSingleInt32) {
4770   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4771   __uint128_t mem_dest = MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4772   asm("st1 {%1.s}[2], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4773   ASSERT_EQ(mem_dest, MakeUInt128(0x000000000'd0e0f10ULL, 0x2222222233333333ULL));
4774 }
4775 
TEST(Arm64InsnTest,StoreSingleInt64)4776 TEST(Arm64InsnTest, StoreSingleInt64) {
4777   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4778   __uint128_t mem_dest = MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4779   asm("st1 {%1.d}[1], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4780   ASSERT_EQ(mem_dest, MakeUInt128(0x090a0b0c0d0e0f10ULL, 0x1111111111111111ULL));
4781 }
4782 
TEST(Arm64InsnTest,LoadSinglePostIndexImmInt8)4783 TEST(Arm64InsnTest, LoadSinglePostIndexImmInt8) {
4784   static constexpr __uint128_t arg1 = MakeUInt128(0x5494167594605487ULL, 0x1172359464291058ULL);
4785   static constexpr __uint128_t arg2 = MakeUInt128(0x5090995021495879ULL, 0x3112196135908315ULL);
4786   __uint128_t res;
4787   uint8_t* addr;
4788   asm("ld1 {%0.b}[3], [%1], #1" : "=w"(res), "=r"(addr) : "0"(arg1), "1"(&arg2) : "memory");
4789   ASSERT_EQ(res, MakeUInt128(0x5494167579605487ULL, 0x1172359464291058ULL));
4790   ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 1);
4791 }
4792 
TEST(Arm64InsnTest,LoadSinglePostIndexRegInt16)4793 TEST(Arm64InsnTest, LoadSinglePostIndexRegInt16) {
4794   static constexpr __uint128_t arg1 = MakeUInt128(0x0080587824107493ULL, 0x5751488997891173ULL);
4795   static constexpr __uint128_t arg2 = MakeUInt128(0x9746129320351081ULL, 0x4327032514090304ULL);
4796   __uint128_t res;
4797   uint8_t* addr;
4798   asm("ld1 {%0.h}[7], [%1], %2"
4799       : "=w"(res), "=r"(addr)
4800       : "r"(static_cast<uint64_t>(17U)), "0"(arg1), "1"(&arg2)
4801       : "memory");
4802   ASSERT_EQ(res, MakeUInt128(0x0080587824107493ULL, 0x1081488997891173ULL));
4803   ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 17);
4804 }
4805 
TEST(Arm64InsnTest,StoreSimdPostIndex)4806 TEST(Arm64InsnTest, StoreSimdPostIndex) {
4807   __uint128_t old_val = MakeUInt128(0x4939965143142980ULL, 0x9190659250937221ULL);
4808   __uint128_t new_val = MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL);
4809   __uint128_t* addr = &old_val;
4810 
4811   // Verify that the interpreter accepts "str q0, [x0], #8" where the register numbers are
4812   // the same, when the data register is one of the SIMD registers.
4813   asm("mov x0, %0\n\t"
4814       "mov v0.2D, %1.2D\n\t"
4815       "str q0, [x0], #8\n\t"
4816       "mov %0, x0"
4817       : "+r"(addr)
4818       : "w"(new_val)
4819       : "v0", "x0", "memory");
4820 
4821   ASSERT_EQ(old_val, MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL));
4822   ASSERT_EQ(reinterpret_cast<uintptr_t>(addr), reinterpret_cast<uintptr_t>(&old_val) + 8);
4823 }
4824 
TEST(Arm64InsnTest,StoreZeroPostIndex1)4825 TEST(Arm64InsnTest, StoreZeroPostIndex1) {
4826   uint64_t res;
4827   asm("str xzr, [sp, #-16]!\n\t"
4828       "ldr %0, [sp, #0]\n\t"
4829       "add sp, sp, #16"
4830       : "=r"(res));
4831   ASSERT_EQ(res, 0);
4832 }
4833 
TEST(Arm64InsnTest,StoreZeroPostIndex2)4834 TEST(Arm64InsnTest, StoreZeroPostIndex2) {
4835   __uint128_t arg1 = MakeUInt128(0x9415573293820485ULL, 0x4212350817391254ULL);
4836   __uint128_t arg2 = MakeUInt128(0x9749819308714396ULL, 0x6151329420459193ULL);
4837   __uint128_t res1;
4838   __uint128_t res2;
4839   asm("mov v30.16b, %2.16b\n\t"
4840       "mov v31.16b, %3.16b\n\t"
4841       "stp q30, q31, [sp, #-32]!\n\t"
4842       "ldr %q0, [sp, #0]\n\t"
4843       "ldr %q1, [sp, #16]\n\t"
4844       "add sp, sp, #32"
4845       : "=w"(res1), "=w"(res2)
4846       : "w"(arg1), "w"(arg2)
4847       : "v30", "v31");
4848 
4849   ASSERT_EQ(res1, arg1);
4850   ASSERT_EQ(res2, arg2);
4851 }
4852 
TEST(Arm64InsnTest,Load2MultipleInt8x8)4853 TEST(Arm64InsnTest, Load2MultipleInt8x8) {
4854   static constexpr uint8_t mem[] = {0x02,
4855                                     0x16,
4856                                     0x91,
4857                                     0x83,
4858                                     0x37,
4859                                     0x23,
4860                                     0x68,
4861                                     0x03,
4862                                     0x99,
4863                                     0x02,
4864                                     0x79,
4865                                     0x31,
4866                                     0x60,
4867                                     0x64,
4868                                     0x20,
4869                                     0x43};
4870   __uint128_t res[2];
4871   asm("ld2 {v0.8b, v1.8b}, [%2]\n\t"
4872       "mov %0.16b, v0.16b\n\t"
4873       "mov %1.16b, v1.16b"
4874       : "=w"(res[0]), "=w"(res[1])
4875       : "r"(mem)
4876       : "v0", "v1", "memory");
4877   ASSERT_EQ(res[0], MakeUInt128(0x2060799968379102ULL, 0U));
4878   ASSERT_EQ(res[1], MakeUInt128(0x4364310203238316ULL, 0U));
4879 }
4880 
TEST(Arm64InsnTest,Load3MultipleInt8x8)4881 TEST(Arm64InsnTest, Load3MultipleInt8x8) {
4882   static constexpr uint8_t mem[3 * 8] = {0x32, 0x87, 0x67, 0x03, 0x80, 0x92, 0x52, 0x16,
4883                                          0x79, 0x07, 0x57, 0x12, 0x04, 0x06, 0x12, 0x37,
4884                                          0x59, 0x63, 0x27, 0x68, 0x56, 0x74, 0x84, 0x50};
4885   __uint128_t res[3];
4886   asm("ld3 {v7.8b-v9.8b}, [%3]\n\t"
4887       "mov %0.16b, v7.16b\n\t"
4888       "mov %1.16b, v8.16b\n\t"
4889       "mov %2.16b, v9.16b"
4890       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4891       : "r"(mem)
4892       : "v7", "v8", "v9", "memory");
4893   ASSERT_EQ(res[0], MakeUInt128(0x7427370407520332ULL, 0U));
4894   ASSERT_EQ(res[1], MakeUInt128(0x8468590657168087ULL, 0U));
4895   ASSERT_EQ(res[2], MakeUInt128(0x5056631212799267ULL, 0U));
4896 }
4897 
TEST(Arm64InsnTest,Store3MultipleInt8x8)4898 TEST(Arm64InsnTest, Store3MultipleInt8x8) {
4899   static constexpr uint64_t arg[3] = {
4900       0x7427370407520332ULL, 0x8468590657168087ULL, 0x5056631212799267ULL};
4901   uint64_t res[3];
4902   asm("mov v0.16b, %0.16b\n\t"
4903       "mov v1.16b, %1.16b\n\t"
4904       "mov v2.16b, %2.16b\n\t"
4905       "st3 {v0.8b-v2.8b}, [%3]"
4906       :
4907       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4908       : "v0", "v1", "v2", "memory");
4909   ASSERT_EQ(res[0], 0x1652928003678732ULL);
4910   ASSERT_EQ(res[1], 0x3712060412570779ULL);
4911   ASSERT_EQ(res[2], 0x5084745668276359ULL);
4912 }
4913 
TEST(Arm64InsnTest,Load3MultipleInt8x16)4914 TEST(Arm64InsnTest, Load3MultipleInt8x16) {
4915   static constexpr uint8_t mem[3 * 16] = {
4916       0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96, 0x25, 0x32, 0x83, 0x38,
4917       0x52, 0x27, 0x99, 0x24, 0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88,
4918       0x91, 0x29, 0x63, 0x62, 0x59, 0x54, 0x32, 0x73, 0x45, 0x44, 0x37, 0x16,
4919       0x33, 0x55, 0x77, 0x43, 0x29, 0x49, 0x99, 0x28, 0x81, 0x05, 0x57, 0x17};
4920   __uint128_t res[3];
4921   asm("ld3 {v7.16b-v9.16b}, [%3]\n\t"
4922       "mov %0.16b, v7.16b\n\t"
4923       "mov %1.16b, v8.16b\n\t"
4924       "mov %2.16b, v9.16b"
4925       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4926       : "r"(mem)
4927       : "v7", "v8", "v9", "memory");
4928   ASSERT_EQ(res[0], MakeUInt128(0x4797245232446569ULL, 0x599433344326291ULL));
4929   ASSERT_EQ(res[1], MakeUInt128(0x2386592783966320ULL, 0x5728295537735929ULL));
4930   ASSERT_EQ(res[2], MakeUInt128(0x8859609938253835ULL, 0x1781497716455463ULL));
4931 }
4932 
TEST(Arm64InsnTest,Store3MultipleInt8x16)4933 TEST(Arm64InsnTest, Store3MultipleInt8x16) {
4934   static constexpr __uint128_t arg[3] = {MakeUInt128(0x4797245232446569ULL, 0x599433344326291ULL),
4935                                          MakeUInt128(0x2386592783966320ULL, 0x5728295537735929ULL),
4936                                          MakeUInt128(0x8859609938253835ULL, 0x1781497716455463ULL)};
4937   __uint128_t res[3];
4938   asm("mov v0.16b, %0.16b\n\t"
4939       "mov v1.16b, %1.16b\n\t"
4940       "mov v2.16b, %2.16b\n\t"
4941       "st3 {v0.16b-v2.16b}, [%3]"
4942       :
4943       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4944       : "v0", "v1", "v2", "memory");
4945   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
4946   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
4947   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
4948 }
4949 
TEST(Arm64InsnTest,Load3MultipleInt16x4)4950 TEST(Arm64InsnTest, Load3MultipleInt16x4) {
4951   static constexpr uint16_t mem[3 * 4] = {0x2069,
4952                                           0x6535,
4953                                           0x3863,
4954                                           0x9644,
4955                                           0x3225,
4956                                           0x3883,
4957                                           0x2752,
4958                                           0x2499,
4959                                           0x6059,
4960                                           0x8697,
4961                                           0x4759,
4962                                           0x8823};
4963   __uint128_t res[3];
4964   asm("ld3 {v30.4h-v0.4h}, [%3]\n\t"
4965       "mov %0.16b, v30.16b\n\t"
4966       "mov %1.16b, v31.16b\n\t"
4967       "mov %2.16b, v0.16b"
4968       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4969       : "r"(mem)
4970       : "v30", "v31", "v0", "memory");
4971   ASSERT_EQ(res[0], MakeUInt128(0x8697275296442069ULL, 0));
4972   ASSERT_EQ(res[1], MakeUInt128(0x4759249932256535ULL, 0));
4973   ASSERT_EQ(res[2], MakeUInt128(0x8823605938833863ULL, 0));
4974 }
4975 
TEST(Arm64InsnTest,Store3MultipleInt16x4)4976 TEST(Arm64InsnTest, Store3MultipleInt16x4) {
4977   static constexpr uint64_t arg[3] = {
4978       0x8697275296442069ULL, 0x4759249932256535ULL, 0x8823605938833863ULL};
4979   uint64_t res[3];
4980   asm("mov v0.16b, %0.16b\n\t"
4981       "mov v1.16b, %1.16b\n\t"
4982       "mov v2.16b, %2.16b\n\t"
4983       "st3 {v0.4h-v2.4h}, [%3]"
4984       :
4985       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4986       : "v0", "v1", "v2", "memory");
4987   ASSERT_EQ(res[0], 0x9644386365352069ULL);
4988   ASSERT_EQ(res[1], 0x2499275238833225ULL);
4989   ASSERT_EQ(res[2], 0x8823475986976059ULL);
4990 }
4991 
TEST(Arm64InsnTest,Load3MultipleInt16x8)4992 TEST(Arm64InsnTest, Load3MultipleInt16x8) {
4993   static constexpr uint16_t mem[3 * 8] = {0x2069, 0x6535, 0x3863, 0x9644, 0x3225, 0x3883,
4994                                           0x2752, 0x2499, 0x6059, 0x8697, 0x4759, 0x8823,
4995                                           0x2991, 0x6263, 0x5459, 0x7332, 0x4445, 0x1637,
4996                                           0x5533, 0x4377, 0x4929, 0x2899, 0x0581, 0x1757};
4997   __uint128_t res[3];
4998   asm("ld3 {v30.8h-v0.8h}, [%3]\n\t"
4999       "mov %0.16b, v30.16b\n\t"
5000       "mov %1.16b, v31.16b\n\t"
5001       "mov %2.16b, v0.16b"
5002       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5003       : "r"(mem)
5004       : "v30", "v31", "v0", "memory");
5005   ASSERT_EQ(res[0], MakeUInt128(0x8697275296442069ULL, 0x2899553373322991ULL));
5006   ASSERT_EQ(res[1], MakeUInt128(0x4759249932256535ULL, 0x581437744456263ULL));
5007   ASSERT_EQ(res[2], MakeUInt128(0x8823605938833863ULL, 0x1757492916375459ULL));
5008 }
5009 
TEST(Arm64InsnTest,Store3MultipleInt16x8)5010 TEST(Arm64InsnTest, Store3MultipleInt16x8) {
5011   static constexpr __uint128_t arg[3] = {MakeUInt128(0x8697275296442069ULL, 0x2899553373322991ULL),
5012                                          MakeUInt128(0x4759249932256535ULL, 0x581437744456263ULL),
5013                                          MakeUInt128(0x8823605938833863ULL, 0x1757492916375459ULL)};
5014   __uint128_t res[3];
5015   asm("mov v0.16b, %0.16b\n\t"
5016       "mov v1.16b, %1.16b\n\t"
5017       "mov v2.16b, %2.16b\n\t"
5018       "st3 {v0.8h-v2.8h}, [%3]"
5019       :
5020       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5021       : "v0", "v1", "v2", "memory");
5022   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5023   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5024   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5025 }
5026 
TEST(Arm64InsnTest,Load3MultipleInt32x2)5027 TEST(Arm64InsnTest, Load3MultipleInt32x2) {
5028   static constexpr uint32_t mem[3 * 2] = {
5029       0x65352069, 0x96443863, 0x38833225, 0x24992752, 0x86976059, 0x88234759};
5030   __uint128_t res[3];
5031   asm("ld3 {v30.2s-v0.2s}, [%3]\n\t"
5032       "mov %0.16b, v30.16b\n\t"
5033       "mov %1.16b, v31.16b\n\t"
5034       "mov %2.16b, v0.16b"
5035       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5036       : "r"(mem)
5037       : "v30", "v31", "v0", "memory");
5038   ASSERT_EQ(res[0], MakeUInt128(0x2499275265352069ULL, 0));
5039   ASSERT_EQ(res[1], MakeUInt128(0x8697605996443863ULL, 0));
5040   ASSERT_EQ(res[2], MakeUInt128(0x8823475938833225ULL, 0));
5041 }
5042 
TEST(Arm64InsnTest,Store3MultipleInt32x2)5043 TEST(Arm64InsnTest, Store3MultipleInt32x2) {
5044   static constexpr uint64_t arg[3] = {
5045       0x2499275265352069ULL, 0x8697605996443863ULL, 0x8823475938833225ULL};
5046   uint64_t res[3];
5047   asm("mov v0.16b, %0.16b\n\t"
5048       "mov v1.16b, %1.16b\n\t"
5049       "mov v2.16b, %2.16b\n\t"
5050       "st3 {v0.2s-v2.2s}, [%3]"
5051       :
5052       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5053       : "v0", "v1", "v2", "memory");
5054   ASSERT_EQ(res[0], 0x9644386365352069ULL);
5055   ASSERT_EQ(res[1], 0x2499275238833225ULL);
5056   ASSERT_EQ(res[2], 0x8823475986976059ULL);
5057 }
5058 
TEST(Arm64InsnTest,Load3MultipleInt32x4)5059 TEST(Arm64InsnTest, Load3MultipleInt32x4) {
5060   static constexpr uint32_t mem[3 * 4] = {0x65352069,
5061                                           0x96443863,
5062                                           0x38833225,
5063                                           0x24992752,
5064                                           0x86976059,
5065                                           0x88234759,
5066                                           0x62632991,
5067                                           0x73325459,
5068                                           0x16374445,
5069                                           0x43775533,
5070                                           0x28994929,
5071                                           0x17570581};
5072   __uint128_t res[3];
5073   asm("ld3 {v30.4s-v0.4s}, [%3]\n\t"
5074       "mov %0.16b, v30.16b\n\t"
5075       "mov %1.16b, v31.16b\n\t"
5076       "mov %2.16b, v0.16b"
5077       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5078       : "r"(mem)
5079       : "v30", "v31", "v0", "memory");
5080   ASSERT_EQ(res[0], MakeUInt128(0x2499275265352069ULL, 0x4377553362632991ULL));
5081   ASSERT_EQ(res[1], MakeUInt128(0x8697605996443863ULL, 0x2899492973325459ULL));
5082   ASSERT_EQ(res[2], MakeUInt128(0x8823475938833225ULL, 0x1757058116374445ULL));
5083 }
5084 
TEST(Arm64InsnTest,Store3MultipleInt32x4)5085 TEST(Arm64InsnTest, Store3MultipleInt32x4) {
5086   static constexpr __uint128_t arg[3] = {MakeUInt128(0x2499275265352069ULL, 0x4377553362632991ULL),
5087                                          MakeUInt128(0x8697605996443863ULL, 0x2899492973325459ULL),
5088                                          MakeUInt128(0x8823475938833225ULL, 0x1757058116374445ULL)};
5089   __uint128_t res[3];
5090   asm("mov v0.16b, %0.16b\n\t"
5091       "mov v1.16b, %1.16b\n\t"
5092       "mov v2.16b, %2.16b\n\t"
5093       "st3 {v0.4s-v2.4s}, [%3]"
5094       :
5095       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5096       : "v0", "v1", "v2", "memory");
5097   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5098   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5099   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5100 }
5101 
TEST(Arm64InsnTest,Load3MultipleInt64x2)5102 TEST(Arm64InsnTest, Load3MultipleInt64x2) {
5103   static constexpr uint64_t mem[3 * 2] = {0x9644386365352069,
5104                                           0x2499275238833225,
5105                                           0x8823475986976059,
5106                                           0x7332545962632991,
5107                                           0x4377553316374445,
5108                                           0x1757058128994929};
5109   __uint128_t res[3];
5110   asm("ld3 {v30.2d-v0.2d}, [%3]\n\t"
5111       "mov %0.16b, v30.16b\n\t"
5112       "mov %1.16b, v31.16b\n\t"
5113       "mov %2.16b, v0.16b"
5114       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5115       : "r"(mem)
5116       : "v30", "v31", "v0", "memory");
5117   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x7332545962632991ULL));
5118   ASSERT_EQ(res[1], MakeUInt128(0x2499275238833225ULL, 0x4377553316374445ULL));
5119   ASSERT_EQ(res[2], MakeUInt128(0x8823475986976059ULL, 0x1757058128994929ULL));
5120 }
5121 
TEST(Arm64InsnTest,Store3MultipleInt64x2)5122 TEST(Arm64InsnTest, Store3MultipleInt64x2) {
5123   static constexpr __uint128_t arg[3] = {MakeUInt128(0x9644386365352069ULL, 0x7332545962632991ULL),
5124                                          MakeUInt128(0x2499275238833225ULL, 0x4377553316374445ULL),
5125                                          MakeUInt128(0x8823475986976059ULL, 0x1757058128994929ULL)};
5126   __uint128_t res[3];
5127   asm("mov v0.16b, %0.16b\n\t"
5128       "mov v1.16b, %1.16b\n\t"
5129       "mov v2.16b, %2.16b\n\t"
5130       "st3 {v0.2d-v2.2d}, [%3]"
5131       :
5132       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5133       : "v0", "v1", "v2", "memory");
5134   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5135   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5136   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5137 }
5138 
TEST(Arm64InsnTest,Load4MultipleInt8x8)5139 TEST(Arm64InsnTest, Load4MultipleInt8x8) {
5140   static constexpr uint8_t mem[4 * 8] = {0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96,
5141                                          0x25, 0x32, 0x83, 0x38, 0x52, 0x27, 0x99, 0x24,
5142                                          0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88,
5143                                          0x91, 0x29, 0x63, 0x62, 0x59, 0x54, 0x32, 0x73};
5144   __uint128_t res[4];
5145   asm("ld4 {v7.8b-v10.8b}, [%4]\n\t"
5146       "mov %0.16b, v7.16b\n\t"
5147       "mov %1.16b, v8.16b\n\t"
5148       "mov %2.16b, v9.16b\n\t"
5149       "mov %3.16b, v10.16b"
5150       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5151       : "r"(mem)
5152       : "v7", "v8", "v9", "v10", "memory");
5153   ASSERT_EQ(res[0], MakeUInt128(0x5991595952256369ULL, 0));
5154   ASSERT_EQ(res[1], MakeUInt128(0x5429476027323820ULL, 0));
5155   ASSERT_EQ(res[2], MakeUInt128(0x3263239799834435ULL, 0));
5156   ASSERT_EQ(res[3], MakeUInt128(0x7362888624389665ULL, 0));
5157 }
5158 
TEST(Arm64InsnTest,Store4MultipleInt8x8)5159 TEST(Arm64InsnTest, Store4MultipleInt8x8) {
5160   static constexpr uint64_t arg[4] = {
5161       0x5991595952256369ULL, 0x5429476027323820ULL, 0x3263239799834435ULL, 0x7362888624389665ULL};
5162   uint64_t res[4];
5163   asm("mov v7.16b, %0.16b\n\t"
5164       "mov v8.16b, %1.16b\n\t"
5165       "mov v9.16b, %2.16b\n\t"
5166       "mov v10.16b, %3.16b\n\t"
5167       "st4 {v7.8b-v10.8b}, [%4]"
5168       :
5169       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5170       : "v7", "v8", "v9", "v10", "memory");
5171   ASSERT_EQ(res[0], 0x9644386365352069ULL);
5172   ASSERT_EQ(res[1], 0x2499275238833225ULL);
5173   ASSERT_EQ(res[2], 0x8823475986976059ULL);
5174   ASSERT_EQ(res[3], 0x7332545962632991ULL);
5175 }
5176 
TEST(Arm64InsnTest,Load4MultipleInt8x16)5177 TEST(Arm64InsnTest, Load4MultipleInt8x16) {
5178   static constexpr uint8_t mem[4 * 16] = {
5179       0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96, 0x25, 0x32, 0x83, 0x38, 0x52,
5180       0x27, 0x99, 0x24, 0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88, 0x91, 0x29,
5181       0x63, 0x62, 0x59, 0x54, 0x32, 0x73, 0x45, 0x44, 0x37, 0x16, 0x33, 0x55, 0x77,
5182       0x43, 0x29, 0x49, 0x99, 0x28, 0x81, 0x05, 0x57, 0x17, 0x81, 0x98, 0x78, 0x50,
5183       0x68, 0x14, 0x62, 0x52, 0x32, 0x13, 0x47, 0x52, 0x37, 0x38, 0x11, 0x65};
5184   __uint128_t res[4];
5185   asm("ld4 {v7.16b-v10.16b}, [%4]\n\t"
5186       "mov %0.16b, v7.16b\n\t"
5187       "mov %1.16b, v8.16b\n\t"
5188       "mov %2.16b, v9.16b\n\t"
5189       "mov %3.16b, v10.16b"
5190       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5191       : "r"(mem)
5192       : "v7", "v8", "v9", "v10", "memory");
5193   ASSERT_EQ(res[0], MakeUInt128(0x5991595952256369ULL, 0x3732688181293345ULL));
5194   ASSERT_EQ(res[1], MakeUInt128(0x5429476027323820ULL, 0x3813149805495544ULL));
5195   ASSERT_EQ(res[2], MakeUInt128(0x3263239799834435ULL, 0x1147627857997737ULL));
5196   ASSERT_EQ(res[3], MakeUInt128(0x7362888624389665ULL, 0x6552525017284316ULL));
5197 }
5198 
TEST(Arm64InsnTest,Store4MultipleInt8x16)5199 TEST(Arm64InsnTest, Store4MultipleInt8x16) {
5200   static constexpr __uint128_t arg[4] = {MakeUInt128(0x5991595952256369ULL, 0x3732688181293345ULL),
5201                                          MakeUInt128(0x5429476027323820ULL, 0x3813149805495544ULL),
5202                                          MakeUInt128(0x3263239799834435ULL, 0x1147627857997737ULL),
5203                                          MakeUInt128(0x7362888624389665ULL, 0x6552525017284316ULL)};
5204   __uint128_t res[4];
5205   asm("mov v7.16b, %0.16b\n\t"
5206       "mov v8.16b, %1.16b\n\t"
5207       "mov v9.16b, %2.16b\n\t"
5208       "mov v10.16b, %3.16b\n\t"
5209       "st4 {v7.16b-v10.16b}, [%4]"
5210       :
5211       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5212       : "v7", "v8", "v9", "v10", "memory");
5213   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5214   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5215   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5216   ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5217 }
5218 
TEST(Arm64InsnTest,Load4MultipleInt16x4)5219 TEST(Arm64InsnTest, Load4MultipleInt16x4) {
5220   static constexpr uint16_t mem[4 * 4] = {0x2069,
5221                                           0x6535,
5222                                           0x3863,
5223                                           0x9644,
5224                                           0x3225,
5225                                           0x3883,
5226                                           0x2752,
5227                                           0x2499,
5228                                           0x6059,
5229                                           0x8697,
5230                                           0x4759,
5231                                           0x8823,
5232                                           0x2991,
5233                                           0x6263,
5234                                           0x5459,
5235                                           0x7332};
5236   __uint128_t res[4];
5237   asm("ld4 {v30.4h-v1.4h}, [%4]\n\t"
5238       "mov %0.16b, v30.16b\n\t"
5239       "mov %1.16b, v31.16b\n\t"
5240       "mov %2.16b, v0.16b\n\t"
5241       "mov %3.16b, v1.16b"
5242       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5243       : "r"(mem)
5244       : "v30", "v31", "v0", "v1", "memory");
5245   ASSERT_EQ(res[0], MakeUInt128(0x2991605932252069ULL, 0));
5246   ASSERT_EQ(res[1], MakeUInt128(0x6263869738836535ULL, 0));
5247   ASSERT_EQ(res[2], MakeUInt128(0x5459475927523863ULL, 0));
5248   ASSERT_EQ(res[3], MakeUInt128(0x7332882324999644ULL, 0));
5249 }
5250 
TEST(Arm64InsnTest,Store4MultipleInt16x4)5251 TEST(Arm64InsnTest, Store4MultipleInt16x4) {
5252   static constexpr uint64_t arg[4] = {
5253       0x2991605932252069ULL, 0x6263869738836535ULL, 0x5459475927523863ULL, 0x7332882324999644ULL};
5254   uint64_t res[4];
5255   asm("mov v30.16b, %0.16b\n\t"
5256       "mov v31.16b, %1.16b\n\t"
5257       "mov v0.16b, %2.16b\n\t"
5258       "mov v1.16b, %3.16b\n\t"
5259       "st4 {v30.4h-v1.4h}, [%4]"
5260       :
5261       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5262       : "v30", "v31", "v0", "v1", "memory");
5263   ASSERT_EQ(res[0], 0x9644386365352069ULL);
5264   ASSERT_EQ(res[1], 0x2499275238833225ULL);
5265   ASSERT_EQ(res[2], 0x8823475986976059ULL);
5266   ASSERT_EQ(res[3], 0x7332545962632991ULL);
5267 }
5268 
TEST(Arm64InsnTest,Load4MultipleInt16x8)5269 TEST(Arm64InsnTest, Load4MultipleInt16x8) {
5270   static constexpr uint16_t mem[4 * 8] = {
5271       0x2069, 0x6535, 0x3863, 0x9644, 0x3225, 0x3883, 0x2752, 0x2499, 0x6059, 0x8697, 0x4759,
5272       0x8823, 0x2991, 0x6263, 0x5459, 0x7332, 0x4445, 0x1637, 0x5533, 0x4377, 0x4929, 0x2899,
5273       0x0581, 0x1757, 0x9881, 0x5078, 0x1468, 0x5262, 0x1332, 0x5247, 0x3837, 0x6511};
5274   __uint128_t res[4];
5275   asm("ld4 {v30.8h-v1.8h}, [%4]\n\t"
5276       "mov %0.16b, v30.16b\n\t"
5277       "mov %1.16b, v31.16b\n\t"
5278       "mov %2.16b, v0.16b\n\t"
5279       "mov %3.16b, v1.16b"
5280       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5281       : "r"(mem)
5282       : "v30", "v31", "v0", "v1", "memory");
5283   ASSERT_EQ(res[0], MakeUInt128(0x2991605932252069ULL, 0x1332988149294445ULL));
5284   ASSERT_EQ(res[1], MakeUInt128(0x6263869738836535ULL, 0x5247507828991637ULL));
5285   ASSERT_EQ(res[2], MakeUInt128(0x5459475927523863ULL, 0x3837146805815533ULL));
5286   ASSERT_EQ(res[3], MakeUInt128(0x7332882324999644ULL, 0x6511526217574377ULL));
5287 }
5288 
TEST(Arm64InsnTest,Store4MultipleInt16x8)5289 TEST(Arm64InsnTest, Store4MultipleInt16x8) {
5290   static constexpr __uint128_t arg[4] = {MakeUInt128(0x2991605932252069ULL, 0x1332988149294445ULL),
5291                                          MakeUInt128(0x6263869738836535ULL, 0x5247507828991637ULL),
5292                                          MakeUInt128(0x5459475927523863ULL, 0x3837146805815533ULL),
5293                                          MakeUInt128(0x7332882324999644ULL, 0x6511526217574377ULL)};
5294   __uint128_t res[4];
5295   asm("mov v30.16b, %0.16b\n\t"
5296       "mov v31.16b, %1.16b\n\t"
5297       "mov v0.16b, %2.16b\n\t"
5298       "mov v1.16b, %3.16b\n\t"
5299       "st4 {v30.8h-v1.8h}, [%4]"
5300       :
5301       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5302       : "v30", "v31", "v0", "v1", "memory");
5303   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5304   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5305   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5306   ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5307 }
5308 
TEST(Arm64InsnTest,Load4MultipleInt32x2)5309 TEST(Arm64InsnTest, Load4MultipleInt32x2) {
5310   static constexpr uint32_t mem[4 * 2] = {0x65352069,
5311                                           0x96443863,
5312                                           0x38833225,
5313                                           0x24992752,
5314                                           0x86976059,
5315                                           0x88234759,
5316                                           0x62632991,
5317                                           0x73325459};
5318   __uint128_t res[4];
5319   asm("ld4 {v30.2s-v1.2s}, [%4]\n\t"
5320       "mov %0.16b, v30.16b\n\t"
5321       "mov %1.16b, v31.16b\n\t"
5322       "mov %2.16b, v0.16b\n\t"
5323       "mov %3.16b, v1.16b"
5324       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5325       : "r"(mem)
5326       : "v30", "v31", "v0", "v1", "memory");
5327   ASSERT_EQ(res[0], MakeUInt128(0x8697605965352069ULL, 0));
5328   ASSERT_EQ(res[1], MakeUInt128(0x8823475996443863ULL, 0));
5329   ASSERT_EQ(res[2], MakeUInt128(0x6263299138833225ULL, 0));
5330   ASSERT_EQ(res[3], MakeUInt128(0x7332545924992752ULL, 0));
5331 }
5332 
TEST(Arm64InsnTest,Store4MultipleInt32x2)5333 TEST(Arm64InsnTest, Store4MultipleInt32x2) {
5334   static constexpr uint64_t arg[4] = {
5335       0x8697605965352069ULL, 0x8823475996443863ULL, 0x6263299138833225ULL, 0x7332545924992752ULL};
5336   uint64_t res[4];
5337   asm("mov v30.16b, %0.16b\n\t"
5338       "mov v31.16b, %1.16b\n\t"
5339       "mov v0.16b, %2.16b\n\t"
5340       "mov v1.16b, %3.16b\n\t"
5341       "st4 {v30.2s-v1.2s}, [%4]"
5342       :
5343       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5344       : "v30", "v31", "v0", "v1", "memory");
5345   ASSERT_EQ(res[0], 0x9644386365352069ULL);
5346   ASSERT_EQ(res[1], 0x2499275238833225ULL);
5347   ASSERT_EQ(res[2], 0x8823475986976059ULL);
5348   ASSERT_EQ(res[3], 0x7332545962632991ULL);
5349 }
5350 
TEST(Arm64InsnTest,Load4MultipleInt32x4)5351 TEST(Arm64InsnTest, Load4MultipleInt32x4) {
5352   static constexpr uint32_t mem[4 * 4] = {0x65352069,
5353                                           0x96443863,
5354                                           0x38833225,
5355                                           0x24992752,
5356                                           0x86976059,
5357                                           0x88234759,
5358                                           0x62632991,
5359                                           0x73325459,
5360                                           0x16374445,
5361                                           0x43775533,
5362                                           0x28994929,
5363                                           0x17570581,
5364                                           0x50789881,
5365                                           0x52621468,
5366                                           0x52471332,
5367                                           0x65113837};
5368   __uint128_t res[4];
5369   asm("ld4 {v30.4s-v1.4s}, [%4]\n\t"
5370       "mov %0.16b, v30.16b\n\t"
5371       "mov %1.16b, v31.16b\n\t"
5372       "mov %2.16b, v0.16b\n\t"
5373       "mov %3.16b, v1.16b"
5374       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5375       : "r"(mem)
5376       : "v30", "v31", "v0", "v1", "memory");
5377   ASSERT_EQ(res[0], MakeUInt128(0x8697605965352069ULL, 0x5078988116374445ULL));
5378   ASSERT_EQ(res[1], MakeUInt128(0x8823475996443863ULL, 0x5262146843775533ULL));
5379   ASSERT_EQ(res[2], MakeUInt128(0x6263299138833225ULL, 0x5247133228994929ULL));
5380   ASSERT_EQ(res[3], MakeUInt128(0x7332545924992752ULL, 0x6511383717570581ULL));
5381 }
5382 
TEST(Arm64InsnTest,Store4MultipleInt32x4)5383 TEST(Arm64InsnTest, Store4MultipleInt32x4) {
5384   static constexpr __uint128_t arg[4] = {MakeUInt128(0x8697605965352069ULL, 0x5078988116374445ULL),
5385                                          MakeUInt128(0x8823475996443863ULL, 0x5262146843775533ULL),
5386                                          MakeUInt128(0x6263299138833225ULL, 0x5247133228994929ULL),
5387                                          MakeUInt128(0x7332545924992752ULL, 0x6511383717570581ULL)};
5388   __uint128_t res[4];
5389   asm("mov v30.16b, %0.16b\n\t"
5390       "mov v31.16b, %1.16b\n\t"
5391       "mov v0.16b, %2.16b\n\t"
5392       "mov v1.16b, %3.16b\n\t"
5393       "st4 {v30.4s-v1.4s}, [%4]"
5394       :
5395       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5396       : "v30", "v31", "v0", "v1", "memory");
5397   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5398   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5399   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5400   ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5401 }
5402 
TEST(Arm64InsnTest,Load4MultipleInt64x2)5403 TEST(Arm64InsnTest, Load4MultipleInt64x2) {
5404   static constexpr uint64_t mem[4 * 2] = {0x9644386365352069,
5405                                           0x2499275238833225,
5406                                           0x8823475986976059,
5407                                           0x7332545962632991,
5408                                           0x4377553316374445,
5409                                           0x1757058128994929,
5410                                           0x5262146850789881,
5411                                           0x6511383752471332};
5412   __uint128_t res[4];
5413   asm("ld4 {v30.2d-v1.2d}, [%4]\n\t"
5414       "mov %0.16b, v30.16b\n\t"
5415       "mov %1.16b, v31.16b\n\t"
5416       "mov %2.16b, v0.16b\n\t"
5417       "mov %3.16b, v1.16b"
5418       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5419       : "r"(mem)
5420       : "v30", "v31", "v0", "v1", "memory");
5421   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x4377553316374445ULL));
5422   ASSERT_EQ(res[1], MakeUInt128(0x2499275238833225ULL, 0x1757058128994929ULL));
5423   ASSERT_EQ(res[2], MakeUInt128(0x8823475986976059ULL, 0x5262146850789881ULL));
5424   ASSERT_EQ(res[3], MakeUInt128(0x7332545962632991ULL, 0x6511383752471332ULL));
5425 }
5426 
TEST(Arm64InsnTest,Store4MultipleInt64x2)5427 TEST(Arm64InsnTest, Store4MultipleInt64x2) {
5428   static constexpr __uint128_t arg[4] = {MakeUInt128(0x9644386365352069ULL, 0x4377553316374445ULL),
5429                                          MakeUInt128(0x2499275238833225ULL, 0x1757058128994929ULL),
5430                                          MakeUInt128(0x8823475986976059ULL, 0x5262146850789881ULL),
5431                                          MakeUInt128(0x7332545962632991ULL, 0x6511383752471332ULL)};
5432   __uint128_t res[4];
5433   asm("mov v30.16b, %0.16b\n\t"
5434       "mov v31.16b, %1.16b\n\t"
5435       "mov v0.16b, %2.16b\n\t"
5436       "mov v1.16b, %3.16b\n\t"
5437       "st4 {v30.2d-v1.2d}, [%4]"
5438       :
5439       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5440       : "v30", "v31", "v0", "v1", "memory");
5441   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5442   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5443   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5444   ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5445 }
5446 
TEST(Arm64InsnTest,Load1ReplicateInt8x8)5447 TEST(Arm64InsnTest, Load1ReplicateInt8x8) {
5448   static constexpr uint8_t mem = 0x81U;
5449   __uint128_t res;
5450   asm("ld1r {%0.8b}, [%1]" : "=w"(res) : "r"(&mem) : "memory");
5451   ASSERT_EQ(res, MakeUInt128(0x8181818181818181ULL, 0U));
5452 }
5453 
TEST(Arm64InsnTest,Load2ReplicateInt16x8)5454 TEST(Arm64InsnTest, Load2ReplicateInt16x8) {
5455   static constexpr uint16_t mem[] = {0x7904, 0x8715};
5456   __uint128_t res[2];
5457   asm("ld2r {v6.8h, v7.8h}, [%2]\n\t"
5458       "mov %0.16b, v6.16b\n\t"
5459       "mov %1.16b, v7.16b"
5460       : "=w"(res[0]), "=w"(res[1])
5461       : "r"(mem)
5462       : "v6", "v7", "memory");
5463   ASSERT_EQ(res[0], MakeUInt128(0x7904790479047904ULL, 0x7904790479047904ULL));
5464   ASSERT_EQ(res[1], MakeUInt128(0x8715871587158715ULL, 0x8715871587158715ULL));
5465 }
5466 
TEST(Arm64InsnTest,Load3ReplicateInt32x4)5467 TEST(Arm64InsnTest, Load3ReplicateInt32x4) {
5468   static constexpr uint32_t mem[] = {0x78713710U, 0x60510637U, 0x95558588U};
5469   __uint128_t res[3];
5470   asm("ld3r {v30.4s-v0.4s}, [%3]\n\t"
5471       "mov %0.16b, v30.16b\n\t"
5472       "mov %1.16b, v31.16b\n\t"
5473       "mov %2.16b, v0.16b"
5474       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5475       : "r"(mem)
5476       : "v30", "v31", "v0", "memory");
5477   ASSERT_EQ(res[0], MakeUInt128(0x7871371078713710ULL, 0x7871371078713710ULL));
5478   ASSERT_EQ(res[1], MakeUInt128(0x6051063760510637ULL, 0x6051063760510637ULL));
5479   ASSERT_EQ(res[2], MakeUInt128(0x9555858895558588ULL, 0x9555858895558588ULL));
5480 }
5481 
TEST(Arm64InsnTest,Load4ReplicateInt64x2)5482 TEST(Arm64InsnTest, Load4ReplicateInt64x2) {
5483   static constexpr uint64_t mem[] = {
5484       0x8150781468526213ULL, 0x3252473837651192ULL, 0x9901561091897779ULL, 0x2200870579339646ULL};
5485   __uint128_t res[4];
5486   asm("ld4r {v29.2d-v0.2d}, [%4]\n\t"
5487       "mov %0.16b, v29.16b\n\t"
5488       "mov %1.16b, v30.16b\n\t"
5489       "mov %2.16b, v31.16b\n\t"
5490       "mov %3.16b, v0.16b"
5491       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5492       : "r"(mem)
5493       : "v29", "v30", "v31", "v0", "memory");
5494   ASSERT_EQ(res[0], MakeUInt128(mem[0], mem[0]));
5495   ASSERT_EQ(res[1], MakeUInt128(mem[1], mem[1]));
5496   ASSERT_EQ(res[2], MakeUInt128(mem[2], mem[2]));
5497   ASSERT_EQ(res[3], MakeUInt128(mem[3], mem[3]));
5498 }
5499 
TEST(Arm64InsnTest,LoadPairNonTemporarlInt64)5500 TEST(Arm64InsnTest, LoadPairNonTemporarlInt64) {
5501   static constexpr uint64_t mem[] = {0x3843601737474215ULL, 0x2476085152099016ULL};
5502   __uint128_t res[2];
5503   asm("ldnp %d0, %d1, [%2]" : "=w"(res[0]), "=w"(res[1]) : "r"(mem) : "memory");
5504   ASSERT_EQ(res[0], MakeUInt128(0x3843601737474215ULL, 0U));
5505   ASSERT_EQ(res[1], MakeUInt128(0x2476085152099016ULL, 0U));
5506 }
5507 
TEST(Arm64InsnTest,MoviVector2S)5508 TEST(Arm64InsnTest, MoviVector2S) {
5509   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0xe4")();
5510   ASSERT_EQ(rd, MakeUInt128(0x000000e4000000e4ULL, 0x0000000000000000ULL));
5511 }
5512 
TEST(Arm64InsnTest,MoviVector2D)5513 TEST(Arm64InsnTest, MoviVector2D) {
5514   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2d, #0xff")();
5515   ASSERT_EQ(rd, MakeUInt128(0x00000000000000ffULL, 0x00000000000000ffULL));
5516 }
5517 
TEST(Arm64InsnTest,MoviVector8B)5518 TEST(Arm64InsnTest, MoviVector8B) {
5519   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.8b, #0xda")();
5520   ASSERT_EQ(res, MakeUInt128(0xdadadadadadadadaULL, 0x0000000000000000ULL));
5521 }
5522 
TEST(Arm64InsnTest,MoviVector4HShiftBy8)5523 TEST(Arm64InsnTest, MoviVector4HShiftBy8) {
5524   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.4h, #0xd1, lsl #8")();
5525   ASSERT_EQ(res, MakeUInt128(0xd100d100d100d100ULL, 0x0000000000000000ULL));
5526 }
5527 
TEST(Arm64InsnTest,MoviVector2SShiftBy16)5528 TEST(Arm64InsnTest, MoviVector2SShiftBy16) {
5529   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0x37, msl #16")();
5530   ASSERT_EQ(res, MakeUInt128(0x0037ffff0037ffffULL, 0x0000000000000000ULL));
5531 }
5532 
TEST(Arm64InsnTest,MvniVector4H)5533 TEST(Arm64InsnTest, MvniVector4H) {
5534   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.4h, #0xbc")();
5535   ASSERT_EQ(res, MakeUInt128(0xff43ff43ff43ff43ULL, 0x0000000000000000ULL));
5536 }
5537 
TEST(Arm64InsnTest,MvniVector2SShiftBy8)5538 TEST(Arm64InsnTest, MvniVector2SShiftBy8) {
5539   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x24, lsl #8")();
5540   ASSERT_EQ(res, MakeUInt128(0xffffdbffffffdbffULL, 0x0000000000000000ULL));
5541 }
5542 
TEST(Arm64InsnTest,MvniVector2SShiftBy16)5543 TEST(Arm64InsnTest, MvniVector2SShiftBy16) {
5544   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x25, msl #16")();
5545   ASSERT_EQ(res, MakeUInt128(0xffda0000ffda0000ULL, 0x0000000000000000ULL));
5546 }
5547 
TEST(Arm64InsnTest,LoadSimdRegPlusReg)5548 TEST(Arm64InsnTest, LoadSimdRegPlusReg) {
5549   __uint128_t array[] = {
5550       MakeUInt128(0x6517980694113528ULL, 0x0131470130478164ULL),
5551       MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL),
5552   };
5553   uint64_t offset = 16;
5554   __uint128_t rd;
5555 
5556   asm("ldr %q0, [%1, %2]" : "=w"(rd) : "r"(array), "r"(offset) : "memory");
5557 
5558   ASSERT_EQ(rd, MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL));
5559 }
5560 
TEST(Arm64InsnTest,ExtractNarrowI16x8ToI8x8)5561 TEST(Arm64InsnTest, ExtractNarrowI16x8ToI8x8) {
5562   __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5563   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.8b, %1.8h")(arg);
5564   ASSERT_EQ(res, MakeUInt128(0x113355772367abefULL, 0x0ULL));
5565 }
5566 
TEST(Arm64InsnTest,ExtractNarrowI32x4ToI16x4)5567 TEST(Arm64InsnTest, ExtractNarrowI32x4ToI16x4) {
5568   __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5569   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.4h, %1.4s")(arg);
5570   ASSERT_EQ(res, MakeUInt128(0x223366774567cdefULL, 0x0ULL));
5571 }
5572 
TEST(Arm64InsnTest,ExtractNarrowI64x2ToI32x2)5573 TEST(Arm64InsnTest, ExtractNarrowI64x2ToI32x2) {
5574   __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5575   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.2s, %1.2d")(arg);
5576   ASSERT_EQ(res, MakeUInt128(0x4455667789abcdefULL, 0x0ULL));
5577 }
5578 
TEST(Arm64InsnTest,ExtractNarrow2Int16x8ToInt8x16)5579 TEST(Arm64InsnTest, ExtractNarrow2Int16x8ToInt8x16) {
5580   __uint128_t arg1 = MakeUInt128(0x1844396582533754ULL, 0x3885690941130315ULL);
5581   __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
5582   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("xtn2 %0.16b, %1.8h")(arg1, arg2);
5583   ASSERT_EQ(res, MakeUInt128(0x6121865619673378ULL, 0x8509131544655354ULL));
5584 }
5585 
TEST(Arm64InsnTest,LoadLiteralSimd)5586 TEST(Arm64InsnTest, LoadLiteralSimd) {
5587   // We call an external assembly function to perform LDR literal because we
5588   // need to place the literal in .rodata.  The literal placed in .text would
5589   // trigger a segfault.
5590   ASSERT_EQ(get_fp64_literal(), 0x0123456789abcdefULL);
5591 }
5592 
TEST(Arm64InsnTest,AbsInt64x1)5593 TEST(Arm64InsnTest, AbsInt64x1) {
5594   __uint128_t arg = MakeUInt128(0xfffffffffffffffdULL, 0xdeadbeef01234567ULL);
5595   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %d0, %d1")(arg);
5596   ASSERT_EQ(res, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
5597 }
5598 
TEST(Arm64InsnTest,AbsInt8x8)5599 TEST(Arm64InsnTest, AbsInt8x8) {
5600   __uint128_t arg = MakeUInt128(0x0001027e7f8081ffULL, 0x0123456789abcdefULL);
5601   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %0.8b, %1.8b")(arg);
5602   ASSERT_EQ(res, MakeUInt128(0x0001027e7f807f01ULL, 0x0ULL));
5603 }
5604 
TEST(Arm64InsnTest,UseV31)5605 TEST(Arm64InsnTest, UseV31) {
5606   __uint128_t res;
5607 
5608   asm("movi v31.2d, #0xffffffffffffffff\n\t"
5609       "mov %0.16b, v31.16b"
5610       : "=w"(res)
5611       :
5612       : "v31");
5613 
5614   ASSERT_EQ(res, MakeUInt128(~0ULL, ~0ULL));
5615 }
5616 
TEST(Arm64InsnTest,AddHighNarrowInt16x8)5617 TEST(Arm64InsnTest, AddHighNarrowInt16x8) {
5618   __uint128_t arg1 = MakeUInt128(0x2296617119637792ULL, 0x1337575114959501ULL);
5619   __uint128_t arg2 = MakeUInt128(0x0941214722131794ULL, 0x7647772622414254ULL);
5620   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5621   ASSERT_EQ(res, MakeUInt128(0x89ce36d72b823b8fULL, 0x0ULL));
5622 }
5623 
TEST(Arm64InsnTest,AddHighNarrowUpperInt16x8)5624 TEST(Arm64InsnTest, AddHighNarrowUpperInt16x8) {
5625   __uint128_t arg1 = MakeUInt128(0x6561809377344403ULL, 0x0707469211201913ULL);
5626   __uint128_t arg2 = MakeUInt128(0x6095752706957220ULL, 0x9175671167229109ULL);
5627   __uint128_t arg3 = MakeUInt128(0x5797877185560845ULL, 0x5296541266540853ULL);
5628   __uint128_t res =
5629       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("addhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5630   ASSERT_EQ(res, MakeUInt128(0x5797877185560845ULL, 0x98ad78aac5f57db6ULL));
5631 }
5632 
TEST(Arm64InsnTest,SubHighNarrowInt16x8)5633 TEST(Arm64InsnTest, SubHighNarrowInt16x8) {
5634   __uint128_t arg1 = MakeUInt128(0x4978189312978482ULL, 0x1682998948722658ULL);
5635   __uint128_t arg2 = MakeUInt128(0x1210835791513698ULL, 0x8209144421006751ULL);
5636   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("subhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5637   ASSERT_EQ(res, MakeUInt128(0x948527bf3795814dULL, 0x0ULL));
5638 }
5639 
TEST(Arm64InsnTest,SubHighNarrowUpperInt16x8)5640 TEST(Arm64InsnTest, SubHighNarrowUpperInt16x8) {
5641   __uint128_t arg1 = MakeUInt128(0x5324944166803962ULL, 0x6579787718556084ULL);
5642   __uint128_t arg2 = MakeUInt128(0x1066587969981635ULL, 0x7473638405257145ULL);
5643   __uint128_t arg3 = MakeUInt128(0x3142980919065925ULL, 0x0937221696461515ULL);
5644   __uint128_t res =
5645       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("subhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5646   ASSERT_EQ(res, MakeUInt128(0x3142980919065925ULL, 0xf11413ef423bfc23ULL));
5647 }
5648 
TEST(Arm64InsnTest,RoundingAddHighNarrowInt16x8)5649 TEST(Arm64InsnTest, RoundingAddHighNarrowInt16x8) {
5650   __uint128_t arg1 = MakeUInt128(0x8039626579787718ULL, 0x5560845529654126ULL);
5651   __uint128_t arg2 = MakeUInt128(0x3440171274947042ULL, 0x0562230538994561ULL);
5652   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("raddhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5653   ASSERT_EQ(res, MakeUInt128(0x5ba76287b479eee7ULL, 0x0000000000000000ULL));
5654 }
5655 
TEST(Arm64InsnTest,RoundingSubHighNarrowInt16x8)5656 TEST(Arm64InsnTest, RoundingSubHighNarrowInt16x8) {
5657   __uint128_t arg1 = MakeUInt128(0x3063432858785698ULL, 0x3052358089330657ULL);
5658   __uint128_t arg2 = MakeUInt128(0x0216471550979259ULL, 0x2309907965473761ULL);
5659   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("rsubhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5660   ASSERT_EQ(res, MakeUInt128(0x0da524cf2efc08c4ULL, 0x0000000000000000ULL));
5661 }
5662 
TEST(Arm64InsnTest,ScalarPairwiseAddInt8x2)5663 TEST(Arm64InsnTest, ScalarPairwiseAddInt8x2) {
5664   __uint128_t arg = MakeUInt128(0x6257591633303910ULL, 0x7225383742182140ULL);
5665   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addp %d0, %1.2d")(arg);
5666   ASSERT_EQ(res, MakeUInt128(0xd47c914d75485a50ULL, 0x0000000000000000ULL));
5667 }
5668 
TEST(Arm64InsnTest,AddAcrossInt8x8)5669 TEST(Arm64InsnTest, AddAcrossInt8x8) {
5670   __uint128_t arg = MakeUInt128(0x0681216028764962ULL, 0x8674460477464915ULL);
5671   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addv %b0, %1.8b")(arg);
5672   ASSERT_EQ(res, MakeUInt128(0x51ULL, 0x0ULL));
5673 }
5674 
TEST(Arm64InsnTest,SignedAddLongAcrossInt16x8)5675 TEST(Arm64InsnTest, SignedAddLongAcrossInt16x8) {
5676   __uint128_t arg = MakeUInt128(0x9699557377273756ULL, 0x6761552711392258ULL);
5677   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlv %s0, %1.8h")(arg);
5678   ASSERT_EQ(res, MakeUInt128(0x0000000000018aa2ULL, 0x0000000000000000ULL));
5679 }
5680 
TEST(Arm64InsnTest,UnsignedAddLongAcrossInt16x8)5681 TEST(Arm64InsnTest, UnsignedAddLongAcrossInt16x8) {
5682   __uint128_t arg = MakeUInt128(0x7986396522961312ULL, 0x8017826797172898ULL);
5683   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlv %s0, %1.8h")(arg);
5684   ASSERT_EQ(res, MakeUInt128(0x000000000002aac0ULL, 0x0000000000000000ULL));
5685 }
5686 
TEST(Arm64InsnTest,SignedMaximumAcrossInt16x8)5687 TEST(Arm64InsnTest, SignedMaximumAcrossInt16x8) {
5688   __uint128_t arg = MakeUInt128(0x8482065967379473ULL, 0x1680864156456505ULL);
5689   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("smaxv %h0, %1.8h")(arg);
5690   ASSERT_EQ(res, MakeUInt128(0x0000000000006737ULL, 0x0000000000000000ULL));
5691 }
5692 
TEST(Arm64InsnTest,SignedMinimumAcrossInt16x8)5693 TEST(Arm64InsnTest, SignedMinimumAcrossInt16x8) {
5694   __uint128_t arg = MakeUInt128(0x6772530431825197ULL, 0x5791679296996504ULL);
5695   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sminv %h0, %1.8h")(arg);
5696   ASSERT_EQ(res, MakeUInt128(0x0000000000009699ULL, 0x0000000000000000ULL));
5697 }
5698 
TEST(Arm64InsnTest,UnsignedMaximumAcrossInt16x8)5699 TEST(Arm64InsnTest, UnsignedMaximumAcrossInt16x8) {
5700   __uint128_t arg = MakeUInt128(0x6500378070466126ULL, 0x4706021457505793ULL);
5701   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("umaxv %h0, %1.8h")(arg);
5702   ASSERT_EQ(res, MakeUInt128(0x0000000000007046ULL, 0x0000000000000000ULL));
5703 }
5704 
TEST(Arm64InsnTest,UnsignedMinimumAcrossInt16x8)5705 TEST(Arm64InsnTest, UnsignedMinimumAcrossInt16x8) {
5706   __uint128_t arg = MakeUInt128(0x5223572397395128ULL, 0x8181640597859142ULL);
5707   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uminv %h0, %1.8h")(arg);
5708   ASSERT_EQ(res, MakeUInt128(0x0000000000005128ULL, 0x0000000000000000ULL));
5709 }
5710 
TEST(Arm64InsnTest,CountLeadingZerosI8x8)5711 TEST(Arm64InsnTest, CountLeadingZerosI8x8) {
5712   __uint128_t arg = MakeUInt128(0x1452635608277857ULL, 0x7134275778960917ULL);
5713   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("clz %0.8b, %1.8b")(arg);
5714   ASSERT_EQ(res, MakeUInt128(0x0301010104020101ULL, 0x0000000000000000ULL));
5715 }
5716 
TEST(Arm64InsnTest,CountLeadingSignBitsI8x8)5717 TEST(Arm64InsnTest, CountLeadingSignBitsI8x8) {
5718   __uint128_t arg = MakeUInt128(0x8925892354201995ULL, 0x6112129021960864ULL);
5719   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cls %0.8b, %1.8b")(arg);
5720   ASSERT_EQ(res, MakeUInt128(0x0001000100010200ULL, 0x0000000000000000ULL));
5721 }
5722 
TEST(Arm64InsnTest,Cnt)5723 TEST(Arm64InsnTest, Cnt) {
5724   __uint128_t arg = MakeUInt128(0x9835484875625298ULL, 0x7524238730775595ULL);
5725   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cnt %0.16b, %1.16b")(arg);
5726   ASSERT_EQ(res, MakeUInt128(0x0304020205030303ULL, 0x0502030402060404ULL));
5727 }
5728 
TEST(Arm64InsnTest,SimdScalarMove)5729 TEST(Arm64InsnTest, SimdScalarMove) {
5730   __uint128_t arg = MakeUInt128(0x1433345477624168ULL, 0x6251898356948556ULL);
5731   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("mov %b0, %1.b[5]")(arg);
5732   ASSERT_EQ(res, MakeUInt128(0x0000000000000034ULL, 0x0000000000000000ULL));
5733 }
5734 
TEST(Arm64InsnTest,SimdVectorElemDuplicate)5735 TEST(Arm64InsnTest, SimdVectorElemDuplicate) {
5736   __uint128_t arg = MakeUInt128(0x3021647155097925ULL, 0x9230990796547376ULL);
5737   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.8b, %1.b[5]")(arg);
5738   ASSERT_EQ(res, MakeUInt128(0x6464646464646464ULL, 0x0000000000000000ULL));
5739 }
5740 
TEST(Arm64InsnTest,SimdVectorElemDuplicateInt16AtIndex7)5741 TEST(Arm64InsnTest, SimdVectorElemDuplicateInt16AtIndex7) {
5742   __uint128_t arg = MakeUInt128(0x2582262052248940ULL, 0x7726719478268482ULL);
5743   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.4h, %1.h[7]")(arg);
5744   ASSERT_EQ(res, MakeUInt128(0x7726772677267726ULL, 0x0000000000000000ULL));
5745 }
5746 
TEST(Arm64InsnTest,SimdVectorElemInsert)5747 TEST(Arm64InsnTest, SimdVectorElemInsert) {
5748   __uint128_t arg1 = MakeUInt128(0x7120844335732654ULL, 0x8938239119325974ULL);
5749   __uint128_t arg2 = MakeUInt128(0x7656180937734440ULL, 0x3070746921120191ULL);
5750   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("mov %0.s[2], %1.s[1]")(arg1, arg2);
5751   ASSERT_EQ(res, MakeUInt128(0x7656180937734440ULL, 0x3070746971208443ULL));
5752 }
5753 
TEST(Arm64InsnTest,NegateInt64x1)5754 TEST(Arm64InsnTest, NegateInt64x1) {
5755   constexpr auto AsmNeg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %d0, %d1");
5756   __uint128_t arg1 = MakeUInt128(0x8389522868478312ULL, 0x3552658213144957ULL);
5757   ASSERT_EQ(AsmNeg(arg1), MakeUInt128(0x7c76add797b87ceeULL, 0x0000000000000000ULL));
5758 
5759   __uint128_t arg2 = MakeUInt128(1ULL << 63, 0U);
5760   ASSERT_EQ(AsmNeg(arg2), MakeUInt128(1ULL << 63, 0U));
5761 }
5762 
TEST(Arm64InsnTest,NegateInt16x8)5763 TEST(Arm64InsnTest, NegateInt16x8) {
5764   __uint128_t arg = MakeUInt128(0x4411010446823252ULL, 0x7162010526522721ULL);
5765   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %0.8h, %1.8h")(arg);
5766   ASSERT_EQ(res, MakeUInt128(0xbbeffefcb97ecdaeULL, 0x8e9efefbd9aed8dfULL));
5767 }
5768 
TEST(Arm64InsnTest,NotI8x8)5769 TEST(Arm64InsnTest, NotI8x8) {
5770   __uint128_t arg = MakeUInt128(0x6205647693125705ULL, 0x8635662018558100ULL);
5771   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("not %0.8b, %1.8b")(arg);
5772   ASSERT_EQ(res, MakeUInt128(0x9dfa9b896ceda8faULL, 0x0000000000000000ULL));
5773 }
5774 
TEST(Arm64InsnTest,RbitInt8x8)5775 TEST(Arm64InsnTest, RbitInt8x8) {
5776   __uint128_t arg = MakeUInt128(0x4713296210734043ULL, 0x7518957359614589ULL);
5777   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rbit %0.8b, %1.8b")(arg);
5778   ASSERT_EQ(res, MakeUInt128(0xe2c8944608ce02c2ULL, 0x0000000000000000ULL));
5779 }
5780 
TEST(Arm64InsnTest,Rev16Int8x16)5781 TEST(Arm64InsnTest, Rev16Int8x16) {
5782   __uint128_t arg = MakeUInt128(0x9904801094121472ULL, 0x2131794764777262ULL);
5783   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev16 %0.16b, %1.16b")(arg);
5784   ASSERT_EQ(res, MakeUInt128(0x0499108012947214ULL, 0x3121477977646272ULL));
5785 }
5786 
TEST(Arm64InsnTest,Rev32Int16x8)5787 TEST(Arm64InsnTest, Rev32Int16x8) {
5788   __uint128_t arg = MakeUInt128(0x8662237172159160ULL, 0x7716692547487389ULL);
5789   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev32 %0.8h, %1.8h")(arg);
5790   ASSERT_EQ(res, MakeUInt128(0x2371866291607215ULL, 0x6925771673894748ULL));
5791 }
5792 
TEST(Arm64InsnTest,Rev64Int32x4)5793 TEST(Arm64InsnTest, Rev64Int32x4) {
5794   __uint128_t arg = MakeUInt128(0x5306736096571209ULL, 0x1807638327166416ULL);
5795   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev64 %0.4s, %1.4s")(arg);
5796   ASSERT_EQ(res, MakeUInt128(0x9657120953067360ULL, 0x2716641618076383ULL));
5797 }
5798 
TEST(Arm64InsnTest,TblInt8x8)5799 TEST(Arm64InsnTest, TblInt8x8) {
5800   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5801   __uint128_t arg2 = MakeUInt128(0x0104011509120605ULL, 0x0315080907091312ULL);
5802   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.8b, {%1.16b}, %2.8b")(arg1, arg2);
5803   ASSERT_EQ(res, MakeUInt128(0x1144110099006655ULL, 0x0000000000000000ULL));
5804 }
5805 
TEST(Arm64InsnTest,TblInt8x16)5806 TEST(Arm64InsnTest, TblInt8x16) {
5807   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5808   __uint128_t arg2 = MakeUInt128(0x0905060808010408ULL, 0x0506000206030202ULL);
5809   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.16b, {%1.16b}, %2.16b")(arg1, arg2);
5810   ASSERT_EQ(res, MakeUInt128(0x9955668888114488ULL, 0x5566002266332222ULL));
5811 }
5812 
TEST(Arm64InsnTest,Tbl2Int8x16)5813 TEST(Arm64InsnTest, Tbl2Int8x16) {
5814   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5815   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5816   __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5817   __uint128_t res;
5818 
5819   // Hardcode v30 and v0 so that the TBL instruction gets consecutive registers.
5820   asm("mov v31.16b, %1.16b\n\t"
5821       "mov v0.16b, %2.16b\n\t"
5822       "tbl %0.16b, {v31.16b, v0.16b}, %3.16b"
5823       : "=w"(res)
5824       : "w"(arg1), "w"(arg2), "w"(arg3)
5825       : "v31", "v0");
5826 
5827   ASSERT_EQ(res, MakeUInt128(0x22005500002299ffULL, 0x8777003398000033ULL));
5828 }
5829 
TEST(Arm64InsnTest,Tbl3Int8x16)5830 TEST(Arm64InsnTest, Tbl3Int8x16) {
5831   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5832   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5833   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5834   __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5835   __uint128_t res;
5836 
5837   // Hardcode v0, v1, and v2 so that the TBL instruction gets consecutive registers.
5838   asm("mov v30.16b, %1.16b\n\t"
5839       "mov v31.16b, %2.16b\n\t"
5840       "mov v0.16b, %3.16b\n\t"
5841       "tbl %0.16b, {v30.16b-v0.16b}, %4.16b"
5842       : "=w"(res)
5843       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4)
5844       : "v0", "v1", "v2");
5845 
5846   ASSERT_EQ(res, MakeUInt128(0x778760000090ff00ULL, 0x0060980000103244ULL));
5847 }
5848 
TEST(Arm64InsnTest,Tbl4Int8x16)5849 TEST(Arm64InsnTest, Tbl4Int8x16) {
5850   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5851   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5852   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5853   __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
5854   __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5855   __uint128_t res;
5856 
5857   // Hardcode v30, v31, v0, and v1 so that the TBX instruction gets consecutive registers.
5858   asm("mov v30.16b, %1.16b\n\t"
5859       "mov v31.16b, %2.16b\n\t"
5860       "mov v0.16b, %3.16b\n\t"
5861       "mov v1.16b, %4.16b\n\t"
5862       "tbl %0.16b, {v30.16b-v1.16b}, %5.16b"
5863       : "=w"(res)
5864       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5)
5865       : "v30", "v31", "v0", "v1");
5866 
5867   ASSERT_EQ(res, MakeUInt128(0x778760009f90ff5fULL, 0x5f60980000103244ULL));
5868 }
5869 
TEST(Arm64InsnTest,TbxInt8x16)5870 TEST(Arm64InsnTest, TbxInt8x16) {
5871   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5872   __uint128_t arg2 = MakeUInt128(0x0915061808010408ULL, 0x0516000206031202ULL);
5873   __uint128_t arg3 = MakeUInt128(0x6668559233565463ULL, 0x9138363185745698ULL);
5874   __uint128_t res =
5875       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("tbx %0.16b, {%1.16b}, %2.16b")(arg1, arg2, arg3);
5876   ASSERT_EQ(res, MakeUInt128(0x9968669288114488ULL, 0x5538002266335622ULL));
5877 }
5878 
TEST(Arm64InsnTest,Tbx2Int8x16)5879 TEST(Arm64InsnTest, Tbx2Int8x16) {
5880   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5881   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5882   __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5883   __uint128_t res = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
5884 
5885   // Hardcode v0 and v1 so that the TBX instruction gets consecutive registers.
5886   asm("mov v0.16b, %1.16b\n\t"
5887       "mov v1.16b, %2.16b\n\t"
5888       "tbx %0.16b, {v0.16b, v1.16b}, %3.16b"
5889       : "=w"(res)
5890       : "w"(arg1), "w"(arg2), "w"(arg3), "0"(res)
5891       : "v0", "v1");
5892 
5893   ASSERT_EQ(res, MakeUInt128(0x22945584002299ffULL, 0x8777153398000333ULL));
5894 }
5895 
TEST(Arm64InsnTest,Tbx3Int8x16)5896 TEST(Arm64InsnTest, Tbx3Int8x16) {
5897   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5898   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5899   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5900   __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5901   __uint128_t res = MakeUInt128(0x0136776310849135ULL, 0x1615642269847507ULL);
5902 
5903   // Hardcode v0, v1, and v2 so that the TBX instruction gets consecutive registers.
5904   asm("mov v0.16b, %1.16b\n\t"
5905       "mov v1.16b, %2.16b\n\t"
5906       "mov v2.16b, %3.16b\n\t"
5907       "tbx %0.16b, {v0.16b, v1.16b, v2.16b}, %4.16b"
5908       : "=w"(res)
5909       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "0"(res)
5910       : "v0", "v1", "v2");
5911 
5912   ASSERT_EQ(res, MakeUInt128(0x778760631090ff35ULL, 0x1660980069103244ULL));
5913 }
5914 
TEST(Arm64InsnTest,Tbx4Int8x16)5915 TEST(Arm64InsnTest, Tbx4Int8x16) {
5916   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5917   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5918   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5919   __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
5920   __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5921   __uint128_t res = MakeUInt128(0x5818319637637076ULL, 0x1799191920357958ULL);
5922 
5923   // Hardcode v0, v1, v2, and v3 so that the TBX instruction gets consecutive registers.
5924   asm("mov v0.16b, %1.16b\n\t"
5925       "mov v1.16b, %2.16b\n\t"
5926       "mov v2.16b, %3.16b\n\t"
5927       "mov v3.16b, %4.16b\n\t"
5928       "tbx %0.16b, {v0.16b-v3.16b}, %5.16b"
5929       : "=w"(res)
5930       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5), "0"(res)
5931       : "v0", "v1", "v2", "v3");
5932 
5933   ASSERT_EQ(res, MakeUInt128(0x778760969f90ff5fULL, 0x5f60980020103244ULL));
5934 }
5935 
TEST(Arm64InsnTest,Trn1Int8x8)5936 TEST(Arm64InsnTest, Trn1Int8x8) {
5937   __uint128_t arg1 = MakeUInt128(0x2075916729700785ULL, 0x0580717186381054ULL);
5938   __uint128_t arg2 = MakeUInt128(0x2786099055690013ULL, 0x4137182368370991ULL);
5939   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5940   ASSERT_EQ(res, MakeUInt128(0x8675906769701385ULL, 0x0000000000000000ULL));
5941 }
5942 
TEST(Arm64InsnTest,Trn2Int16x8)5943 TEST(Arm64InsnTest, Trn2Int16x8) {
5944   __uint128_t arg1 = MakeUInt128(0x6685592335654639ULL, 0x1383631857456981ULL);
5945   __uint128_t arg2 = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
5946   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5947   ASSERT_EQ(res, MakeUInt128(0x7494668588443565ULL, 0x2175138334265745ULL));
5948 }
5949 
TEST(Arm64InsnTest,Uzp1Int8x8)5950 TEST(Arm64InsnTest, Uzp1Int8x8) {
5951   __uint128_t arg1 = MakeUInt128(0x4954893139394489ULL, 0x9216125525597701ULL);
5952   __uint128_t arg2 = MakeUInt128(0x2783467926101995ULL, 0x5852247172201777ULL);
5953   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5954   ASSERT_EQ(res, MakeUInt128(0x8379109554313989ULL, 0x0000000000000000ULL));
5955 }
5956 
TEST(Arm64InsnTest,Uzp2Int16x8)5957 TEST(Arm64InsnTest, Uzp2Int16x8) {
5958   __uint128_t arg1 = MakeUInt128(0x6745642390585850ULL, 0x2167190313952629ULL);
5959   __uint128_t arg2 = MakeUInt128(0x3620129476918749ULL, 0x7519101147231528ULL);
5960   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5961   ASSERT_EQ(res, MakeUInt128(0x2167139567459058ULL, 0x7519472336207691ULL));
5962 }
5963 
TEST(Arm64InsnTest,Zip2Int64x2)5964 TEST(Arm64InsnTest, Zip2Int64x2) {
5965   __uint128_t arg1 = MakeUInt128(0x1494271410093913ULL, 0x6913810725813781ULL);
5966   __uint128_t arg2 = MakeUInt128(0x3578940055995001ULL, 0x8354251184172136ULL);
5967   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.2d, %1.2d, %2.2d")(arg1, arg2);
5968   ASSERT_EQ(res, MakeUInt128(0x6913810725813781ULL, 0x8354251184172136ULL));
5969 }
5970 
TEST(Arm64InsnTest,Zip1Int8x8)5971 TEST(Arm64InsnTest, Zip1Int8x8) {
5972   __uint128_t arg1 = MakeUInt128(0x7499235630254947ULL, 0x8024901141952123ULL);
5973   __uint128_t arg2 = MakeUInt128(0x3331239480494707ULL, 0x9119153267343028ULL);
5974   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
5975   ASSERT_EQ(res, MakeUInt128(0x8030492547490747ULL, 0x0000000000000000ULL));
5976 }
5977 
TEST(Arm64InsnTest,Zip1Int64x2)5978 TEST(Arm64InsnTest, Zip1Int64x2) {
5979   __uint128_t arg1 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
5980   __uint128_t arg2 = MakeUInt128(0x0551199581831963ULL, 0x7637076179919192ULL);
5981   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.2d, %1.2d, %2.2d")(arg1, arg2);
5982   ASSERT_EQ(res, MakeUInt128(0x9243530136776310ULL, 0x0551199581831963ULL));
5983 }
5984 
TEST(Arm64InsnTest,Zip2Int16x8)5985 TEST(Arm64InsnTest, Zip2Int16x8) {
5986   __uint128_t arg1 = MakeUInt128(0x5831832713142517ULL, 0x0296923488962766ULL);
5987   __uint128_t arg2 = MakeUInt128(0x2934595889706953ULL, 0x6534940603402166ULL);
5988   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
5989   ASSERT_EQ(res, MakeUInt128(0x0340889621662766ULL, 0x6534029694069234ULL));
5990 }
5991 
TEST(Arm64InsnTest,SignedMaxInt16x8)5992 TEST(Arm64InsnTest, SignedMaxInt16x8) {
5993   __uint128_t arg1 = MakeUInt128(0x9901573466102371ULL, 0x2235478911292547ULL);
5994   __uint128_t arg2 = MakeUInt128(0x4922157650450812ULL, 0x0677173571202718ULL);
5995   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smax %0.8h, %1.8h, %2.8h")(arg1, arg2);
5996   ASSERT_EQ(res, MakeUInt128(0x4922573466102371ULL, 0x2235478971202718ULL));
5997 }
5998 
TEST(Arm64InsnTest,SignedMinInt16x8)5999 TEST(Arm64InsnTest, SignedMinInt16x8) {
6000   __uint128_t arg1 = MakeUInt128(0x7820385653909910ULL, 0x4775941413215432ULL);
6001   __uint128_t arg2 = MakeUInt128(0x0084531214065935ULL, 0x8090412711359200ULL);
6002   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smin %0.8h, %1.8h, %2.8h")(arg1, arg2);
6003   ASSERT_EQ(res, MakeUInt128(0x0084385614069910ULL, 0x8090941411359200ULL));
6004 }
6005 
TEST(Arm64InsnTest,SignedMaxPairwiseInt16x8)6006 TEST(Arm64InsnTest, SignedMaxPairwiseInt16x8) {
6007   __uint128_t arg1 = MakeUInt128(0x6998469884770232ULL, 0x3823840055655517ULL);
6008   __uint128_t arg2 = MakeUInt128(0x3272867600724817ULL, 0x2987637569816335ULL);
6009   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6010   ASSERT_EQ(res, MakeUInt128(0x3823556569980232ULL, 0x6375698132724817ULL));
6011 }
6012 
TEST(Arm64InsnTest,SignedMinPairwiseInt16x8)6013 TEST(Arm64InsnTest, SignedMinPairwiseInt16x8) {
6014   __uint128_t arg1 = MakeUInt128(0x8865701568501691ULL, 0x8647488541679154ULL);
6015   __uint128_t arg2 = MakeUInt128(0x1821553559732353ULL, 0x0686043010675760ULL);
6016   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6017   ASSERT_EQ(res, MakeUInt128(0x8647915488651691ULL, 0x0430106718212353ULL));
6018 }
6019 
TEST(Arm64InsnTest,UnsignedMaxInt16x8)6020 TEST(Arm64InsnTest, UnsignedMaxInt16x8) {
6021   __uint128_t arg1 = MakeUInt128(0x7639975974619383ULL, 0x5845749159880976ULL);
6022   __uint128_t arg2 = MakeUInt128(0x5928493695941434ULL, 0x0814685298150539ULL);
6023   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umax %0.8h, %1.8h, %2.8h")(arg1, arg2);
6024   ASSERT_EQ(res, MakeUInt128(0x7639975995949383ULL, 0x5845749198150976ULL));
6025 }
6026 
TEST(Arm64InsnTest,UnsignedMinInt16x8)6027 TEST(Arm64InsnTest, UnsignedMinInt16x8) {
6028   __uint128_t arg1 = MakeUInt128(0x2888773717663748ULL, 0x6027660634960353ULL);
6029   __uint128_t arg2 = MakeUInt128(0x6983349515101986ULL, 0x4269887847171939ULL);
6030   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umin %0.8h, %1.8h, %2.8h")(arg1, arg2);
6031   ASSERT_EQ(res, MakeUInt128(0x2888349515101986ULL, 0x4269660634960353ULL));
6032 }
6033 
TEST(Arm64InsnTest,UnsignedMaxPairwiseInt16x8)6034 TEST(Arm64InsnTest, UnsignedMaxPairwiseInt16x8) {
6035   __uint128_t arg1 = MakeUInt128(0x1318583584066747ULL, 0x2370297149785084ULL);
6036   __uint128_t arg2 = MakeUInt128(0x4570249413983163ULL, 0x4332378975955680ULL);
6037   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6038   ASSERT_EQ(res, MakeUInt128(0x2971508458358406ULL, 0x4332759545703163ULL));
6039 }
6040 
TEST(Arm64InsnTest,UnsignedMinPairwiseInt16x8)6041 TEST(Arm64InsnTest, UnsignedMinPairwiseInt16x8) {
6042   __uint128_t arg1 = MakeUInt128(0x9538121791319145ULL, 0x1350099384631177ULL);
6043   __uint128_t arg2 = MakeUInt128(0x7769055481028850ULL, 0x2080858008781157ULL);
6044   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6045   ASSERT_EQ(res, MakeUInt128(0x0993117712179131ULL, 0x2080087805548102ULL));
6046 }
6047 
TEST(Arm64InsnTest,SignedHalvingAddInt16x8)6048 TEST(Arm64InsnTest, SignedHalvingAddInt16x8) {
6049   __uint128_t arg1 = MakeUInt128(0x1021944719713869ULL, 0x2560841624511239ULL);
6050   __uint128_t arg2 = MakeUInt128(0x8062011318454124ULL, 0x4782050110798760ULL);
6051   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6052   ASSERT_EQ(res, MakeUInt128(0xc841caad18db3cc6ULL, 0x3671c48b1a65ccccULL));
6053 }
6054 
TEST(Arm64InsnTest,SignedHalvingSubInt16x8)6055 TEST(Arm64InsnTest, SignedHalvingSubInt16x8) {
6056   __uint128_t arg1 = MakeUInt128(0x9041210873032402ULL, 0x0106853419472304ULL);
6057   __uint128_t arg2 = MakeUInt128(0x7666672174986986ULL, 0x8547076781205124ULL);
6058   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
6059   ASSERT_EQ(res, MakeUInt128(0x8ceddcf3ff35dd3eULL, 0x3ddfbee64c13e8f0ULL));
6060 }
6061 
TEST(Arm64InsnTest,SignedRoundingHalvingAddInt16x8)6062 TEST(Arm64InsnTest, SignedRoundingHalvingAddInt16x8) {
6063   __uint128_t arg1 = MakeUInt128(0x5871487839890810ULL, 0x7429530941060596ULL);
6064   __uint128_t arg2 = MakeUInt128(0x9443158477539700ULL, 0x9439883949144323ULL);
6065   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6066   ASSERT_EQ(res, MakeUInt128(0xf65a2efe586ecf88ULL, 0x0431eda1450d245dULL));
6067 }
6068 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceInt16x8)6069 TEST(Arm64InsnTest, SignedAbsoluteDifferenceInt16x8) {
6070   __uint128_t arg1 = MakeUInt128(0x1349607501116498ULL, 0x3278563531614516ULL);
6071   __uint128_t arg2 = MakeUInt128(0x8457695687109002ULL, 0x9997698412632665ULL);
6072   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6073   ASSERT_EQ(res, MakeUInt128(0x8ef208e17a01d496ULL, 0x98e1134f1efe1eb1ULL));
6074 }
6075 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt16x8)6076 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt16x8) {
6077   __uint128_t arg1 = MakeUInt128(0x7419850973346267ULL, 0x9332107268687076ULL);
6078   __uint128_t arg2 = MakeUInt128(0x8062639919361965ULL, 0x0440995421676278ULL);
6079   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6080   ASSERT_EQ(res, MakeUInt128(0x000059fe00004902ULL, 0x0000f3b70000de90ULL));
6081 }
6082 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongUpperInt16x8)6083 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongUpperInt16x8) {
6084   __uint128_t arg1 = MakeUInt128(0x4980559610330799ULL, 0x4145347784574699ULL);
6085   __uint128_t arg2 = MakeUInt128(0x9921285999993996ULL, 0x1228161521931488ULL);
6086   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6087   ASSERT_EQ(res, MakeUInt128(0x00009d3c00003211ULL, 0x00002f1d00001e62ULL));
6088 }
6089 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt16x8)6090 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt16x8) {
6091   // The lowest element tests the overflow.
6092   __uint128_t arg1 = MakeUInt128(0x8967'0031'9258'7fffULL, 0x9410'5105'3358'4384ULL);
6093   __uint128_t arg2 = MakeUInt128(0x6560'2339'1796'8000ULL, 0x6784'4763'7084'7497ULL);
6094   __uint128_t arg3 = MakeUInt128(0x8333'6555'7900'5555ULL, 0x1914'7319'8862'7135ULL);
6095   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
6096   ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'fe3e'5554ULL, 0xec88'7cbb'c58e'a248ULL));
6097 }
6098 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt32x4)6099 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt32x4) {
6100   // The lowest element tests the overflow.
6101   __uint128_t arg1 = MakeUInt128(0x8967'0031'7fff'ffffULL, 0x9410'5105'3358'4384ULL);
6102   __uint128_t arg2 = MakeUInt128(0x6560'2339'8000'0000ULL, 0x6784'4763'7084'7497ULL);
6103   __uint128_t arg3 = MakeUInt128(0x8333'6555'aaaa'5555ULL, 0x1914'7319'8862'7135ULL);
6104   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.4s, %1.4s, %2.4s")(arg1, arg2, arg3);
6105   ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'aaaa'5554ULL, 0xec88'6977'c58e'a248ULL));
6106 }
6107 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongInt16x4)6108 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongInt16x4) {
6109   __uint128_t arg1 = MakeUInt128(0x078464167452167ULL, 0x719048310967671ULL);
6110   __uint128_t arg2 = MakeUInt128(0x344349481926268ULL, 0x110739948250607ULL);
6111   __uint128_t arg3 = MakeUInt128(0x949507350316901ULL, 0x731852119552635ULL);
6112   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6113   ASSERT_EQ(res, MakeUInt128(0x094a36265031aa02ULL, 0x073187ed195537e2ULL));
6114 }
6115 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt32x2)6116 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt32x2) {
6117   __uint128_t arg1 = MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL);
6118   __uint128_t arg2 = MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL);
6119   __uint128_t arg3 = MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL);
6120   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.2d, %1.2s, %2.2s")(arg1, arg2, arg3);
6121   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6122 }
6123 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongUpperInt16x8)6124 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
6125   __uint128_t arg1 = MakeUInt128(0x690943470482932ULL, 0x414041114654092ULL);
6126   __uint128_t arg2 = MakeUInt128(0x988344435159133ULL, 0x010773944111840ULL);
6127   __uint128_t arg3 = MakeUInt128(0x410768498106634ULL, 0x241048239358274ULL);
6128   __uint128_t res =
6129       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
6130   ASSERT_EQ(res, MakeUInt128(0x0410a63098108e86ULL, 0x024108863935f59cULL));
6131 }
6132 
TEST(Arm64InsnTest,UnsignedHalvingAddInt16x8)6133 TEST(Arm64InsnTest, UnsignedHalvingAddInt16x8) {
6134   __uint128_t arg1 = MakeUInt128(0x4775379853799732ULL, 0x2344561227858432ULL);
6135   __uint128_t arg2 = MakeUInt128(0x9684664751333657ULL, 0x3692387201464723ULL);
6136   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6137   ASSERT_EQ(res, MakeUInt128(0x6efc4eef525666c4ULL, 0x2ceb4742146565aaULL));
6138 }
6139 
TEST(Arm64InsnTest,UnsignedHalvingSubInt16x8)6140 TEST(Arm64InsnTest, UnsignedHalvingSubInt16x8) {
6141   __uint128_t arg1 = MakeUInt128(0x9926884349592876ULL, 0x1240075587569464ULL);
6142   __uint128_t arg2 = MakeUInt128(0x1370562514001179ULL, 0x7133166207153715ULL);
6143   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
6144   ASSERT_EQ(res, MakeUInt128(0x42db190f1aac0b7eULL, 0xd086f87940202ea7ULL));
6145 }
6146 
TEST(Arm64InsnTest,UnsignedRoundingHalvingAddInt16x8)6147 TEST(Arm64InsnTest, UnsignedRoundingHalvingAddInt16x8) {
6148   __uint128_t arg1 = MakeUInt128(0x5066533985738887ULL, 0x8661476294434140ULL);
6149   __uint128_t arg2 = MakeUInt128(0x1049888993160051ULL, 0x2076781035886116ULL);
6150   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6151   ASSERT_EQ(res, MakeUInt128(0x30586de18c45446cULL, 0x536c5fb964e6512bULL));
6152 }
6153 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceInt16x8)6154 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceInt16x8) {
6155   __uint128_t arg1 = MakeUInt128(0x8574664607722834ULL, 0x1540311441529418ULL);
6156   __uint128_t arg2 = MakeUInt128(0x8047825438761770ULL, 0x7904300015669867ULL);
6157   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6158   ASSERT_EQ(res, MakeUInt128(0x052d1c0e310410c4ULL, 0x63c401142bec044fULL));
6159 }
6160 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongInt16x8)6161 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongInt16x8) {
6162   __uint128_t arg1 = MakeUInt128(0x1614585505839727ULL, 0x4209809097817293ULL);
6163   __uint128_t arg2 = MakeUInt128(0x2393010676638682ULL, 0x4040111304024700ULL);
6164   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6165   ASSERT_EQ(res, MakeUInt128(0x000070e0000010a5ULL, 0x00000d7f0000574fULL));
6166 }
6167 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongUpperInt16x8)6168 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongUpperInt16x8) {
6169   __uint128_t arg1 = MakeUInt128(0x0347999588867695ULL, 0x0161249722820403ULL);
6170   __uint128_t arg2 = MakeUInt128(0x0399546327883069ULL, 0x5976249361510102ULL);
6171   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6172   ASSERT_EQ(res, MakeUInt128(0x00003ecf00000301ULL, 0x0000581500000004ULL));
6173 }
6174 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateInt16x8)6175 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateInt16x8) {
6176   __uint128_t arg1 = MakeUInt128(0x0857466460772283ULL, 0x4154031144152941ULL);
6177   __uint128_t arg2 = MakeUInt128(0x8804782543876177ULL, 0x0790430001566986ULL);
6178   __uint128_t arg3 = MakeUInt128(0x7767957609099669ULL, 0x3607559496515273ULL);
6179   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uaba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
6180   ASSERT_EQ(res, MakeUInt128(0xf714c73725f9d55dULL, 0x6fcb9583d91092b8ULL));
6181 }
6182 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongInt16x4)6183 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongInt16x4) {
6184   __uint128_t arg1 = MakeUInt128(0x8343417044157348ULL, 0x2481833301640566ULL);
6185   __uint128_t arg2 = MakeUInt128(0x9596688667695634ULL, 0x9141632842641497ULL);
6186   __uint128_t arg3 = MakeUInt128(0x4533349999480002ULL, 0x6699875888159350ULL);
6187   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6188   ASSERT_EQ(res, MakeUInt128(0x453357ed99481d16ULL, 0x669999ab8815ba66ULL));
6189 }
6190 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8)6191 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
6192   __uint128_t arg1 = MakeUInt128(0x998685541703188ULL, 0x778867592902607ULL);
6193   __uint128_t arg2 = MakeUInt128(0x043212666179192ULL, 0x352093822787888ULL);
6194   __uint128_t arg3 = MakeUInt128(0x988633599116081ULL, 0x235355570464634ULL);
6195   __uint128_t res =
6196       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
6197   ASSERT_EQ(res, MakeUInt128(0x0988d34d9911b302ULL, 0x0235397b7046c371ULL));
6198 }
6199 
TEST(Arm64InsnTest,SignedAddLongPairwiseInt8x16)6200 TEST(Arm64InsnTest, SignedAddLongPairwiseInt8x16) {
6201   __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
6202   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.8h, %1.16b")(arg);
6203   ASSERT_EQ(res, MakeUInt128(0x00c50051ffbb0099ULL, 0x0078ffd2006a000bULL));
6204 }
6205 
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt8x16)6206 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt8x16) {
6207   __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
6208   __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
6209   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.8h, %1.16b")(arg1, arg2);
6210   ASSERT_EQ(res, MakeUInt128(0x71c183272fe809c2ULL, 0x510924703608905fULL));
6211 }
6212 
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt16x8)6213 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt16x8) {
6214   __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
6215   __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
6216   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.4s, %1.8h")(arg1, arg2);
6217   ASSERT_EQ(res, MakeUInt128(0x72180054304fb4afULL, 0x51090e88357296efULL));
6218 }
6219 
TEST(Arm64InsnTest,UnsignedAddLongPairwiseInt8x16)6220 TEST(Arm64InsnTest, UnsignedAddLongPairwiseInt8x16) {
6221   __uint128_t arg = MakeUInt128(0x1483287348089574ULL, 0x7777527834422109ULL);
6222   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlp %0.8h, %1.16b")(arg);
6223   ASSERT_EQ(res, MakeUInt128(0x0097009b00500109ULL, 0x00ee00ca0076002aULL));
6224 }
6225 
TEST(Arm64InsnTest,UnsignedAddAccumulateLongPairwiseInt8x16)6226 TEST(Arm64InsnTest, UnsignedAddAccumulateLongPairwiseInt8x16) {
6227   __uint128_t arg1 = MakeUInt128(0x9348154691631162ULL, 0x4928873574718824ULL);
6228   __uint128_t arg2 = MakeUInt128(0x5207665738825139ULL, 0x6391635767231510ULL);
6229   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("uadalp %0.8h, %1.16b")(arg1, arg2);
6230   ASSERT_EQ(res, MakeUInt128(0x52e266b2397651acULL, 0x64026413680815bcULL));
6231 }
6232 
TEST(Arm64InsnTest,SignedAddLong)6233 TEST(Arm64InsnTest, SignedAddLong) {
6234   __uint128_t arg1 = MakeUInt128(0x3478074585067606ULL, 0x3048229409653041ULL);
6235   __uint128_t arg2 = MakeUInt128(0x1183066710818930ULL, 0x3110887172816751ULL);
6236   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6237   ASSERT_EQ(res, MakeUInt128(0xffff9587ffffff36ULL, 0x000045fb00000dacULL));
6238 }
6239 
TEST(Arm64InsnTest,SignedAddLongUpper)6240 TEST(Arm64InsnTest, SignedAddLongUpper) {
6241   __uint128_t arg1 = MakeUInt128(0x3160683158679946ULL, 0x0165205774052942ULL);
6242   __uint128_t arg2 = MakeUInt128(0x3053601780313357ULL, 0x2632670547903384ULL);
6243   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6244   ASSERT_EQ(res, MakeUInt128(0x0000bb9500005cc6ULL, 0x000027970000875cULL));
6245 }
6246 
TEST(Arm64InsnTest,SignedSubLong)6247 TEST(Arm64InsnTest, SignedSubLong) {
6248   __uint128_t arg1 = MakeUInt128(0x8566746260879482ULL, 0x0186474876727272ULL);
6249   __uint128_t arg2 = MakeUInt128(0x2206267646533809ULL, 0x9801966883680994ULL);
6250   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6251   ASSERT_EQ(res, MakeUInt128(0x00001a34ffff5c79ULL, 0xffff636000004decULL));
6252 }
6253 
TEST(Arm64InsnTest,SignedSubLongUpper)6254 TEST(Arm64InsnTest, SignedSubLongUpper) {
6255   __uint128_t arg1 = MakeUInt128(0x3011331753305329ULL, 0x8020166888174813ULL);
6256   __uint128_t arg2 = MakeUInt128(0x4298868158557781ULL, 0x0343231753064784ULL);
6257   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6258   ASSERT_EQ(res, MakeUInt128(0xffff35110000008fULL, 0xffff7cddfffff351ULL));
6259 }
6260 
TEST(Arm64InsnTest,UnsignedAddLong)6261 TEST(Arm64InsnTest, UnsignedAddLong) {
6262   __uint128_t arg1 = MakeUInt128(0x3126059505777727ULL, 0x5424712416483128ULL);
6263   __uint128_t arg2 = MakeUInt128(0x3298207236175057ULL, 0x4673870128209575ULL);
6264   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6265   ASSERT_EQ(res, MakeUInt128(0x00003b8e0000c77eULL, 0x000063be00002607ULL));
6266 }
6267 
TEST(Arm64InsnTest,UnsignedAddLongUpper)6268 TEST(Arm64InsnTest, UnsignedAddLongUpper) {
6269   __uint128_t arg1 = MakeUInt128(0x3384698499778726ULL, 0x7065551918544686ULL);
6270   __uint128_t arg2 = MakeUInt128(0x9846947849573462ULL, 0x2606294219624557ULL);
6271   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6272   ASSERT_EQ(res, MakeUInt128(0x000031b600008bddULL, 0x0000966b00007e5bULL));
6273 }
6274 
TEST(Arm64InsnTest,UnsignedSubLong)6275 TEST(Arm64InsnTest, UnsignedSubLong) {
6276   __uint128_t arg1 = MakeUInt128(0x4378111988556318ULL, 0x7777925372011667ULL);
6277   __uint128_t arg2 = MakeUInt128(0x1853954183598443ULL, 0x8305203762819440ULL);
6278   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6279   ASSERT_EQ(res, MakeUInt128(0x000004fcffffded5ULL, 0x00002b25ffff7bd8ULL));
6280 }
6281 
TEST(Arm64InsnTest,UnsignedSubLongUpper)6282 TEST(Arm64InsnTest, UnsignedSubLongUpper) {
6283   __uint128_t arg1 = MakeUInt128(0x5228717440266638ULL, 0x9148817173086436ULL);
6284   __uint128_t arg2 = MakeUInt128(0x1113890694202790ULL, 0x8814311944879941ULL);
6285   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6286   ASSERT_EQ(res, MakeUInt128(0x00002e81ffffcaf5ULL, 0x0000093400005058ULL));
6287 }
6288 
TEST(Arm64InsnTest,SignedAddWide)6289 TEST(Arm64InsnTest, SignedAddWide) {
6290   __uint128_t arg1 = MakeUInt128(0x7844598183134112ULL, 0x9001999205981352ULL);
6291   __uint128_t arg2 = MakeUInt128(0x2051173365856407ULL, 0x8264849427644113ULL);
6292   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6293   ASSERT_EQ(res, MakeUInt128(0x7844bf068313a519ULL, 0x9001b9e305982a85ULL));
6294 }
6295 
TEST(Arm64InsnTest,SignedAddWideUpper)6296 TEST(Arm64InsnTest, SignedAddWideUpper) {
6297   __uint128_t arg1 = MakeUInt128(0x3407092233436577ULL, 0x9160128093179401ULL);
6298   __uint128_t arg2 = MakeUInt128(0x7185985999338492ULL, 0x3549564005709955ULL);
6299   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6300   ASSERT_EQ(res, MakeUInt128(0x34070e923342feccULL, 0x916047c99317ea41ULL));
6301 }
6302 
TEST(Arm64InsnTest,SignedSubWide)6303 TEST(Arm64InsnTest, SignedSubWide) {
6304   __uint128_t arg1 = MakeUInt128(0x2302847007312065ULL, 0x8032626417116165ULL);
6305   __uint128_t arg2 = MakeUInt128(0x9576132723515666ULL, 0x6253667271899853ULL);
6306   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6307   ASSERT_EQ(res, MakeUInt128(0x2302611f0730c9ffULL, 0x8032ccee17114e3eULL));
6308 }
6309 
TEST(Arm64InsnTest,SignedSubWideUpper)6310 TEST(Arm64InsnTest, SignedSubWideUpper) {
6311   __uint128_t arg1 = MakeUInt128(0x4510824783572905ULL, 0x6919885554678860ULL);
6312   __uint128_t arg2 = MakeUInt128(0x7946280537122704ULL, 0x2466543192145281ULL);
6313   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6314   ASSERT_EQ(res, MakeUInt128(0x4510f0338356d684ULL, 0x691963ef5467342fULL));
6315 }
6316 
TEST(Arm64InsnTest,UnsignedAddWide)6317 TEST(Arm64InsnTest, UnsignedAddWide) {
6318   __uint128_t arg1 = MakeUInt128(0x5870785951298344ULL, 0x1729535195378855ULL);
6319   __uint128_t arg2 = MakeUInt128(0x3457374260859029ULL, 0x0817651557803905ULL);
6320   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6321   ASSERT_EQ(res, MakeUInt128(0x5870d8de512a136dULL, 0x172987a89537bf97ULL));
6322 }
6323 
TEST(Arm64InsnTest,UnsignedAddWideUpper)6324 TEST(Arm64InsnTest, UnsignedAddWideUpper) {
6325   __uint128_t arg1 = MakeUInt128(0x7516493270950493ULL, 0x4639382432227188ULL);
6326   __uint128_t arg2 = MakeUInt128(0x5159740547021482ULL, 0x8971117779237612ULL);
6327   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6328   ASSERT_EQ(res, MakeUInt128(0x7516c25570957aa5ULL, 0x4639c195322282ffULL));
6329 }
6330 
TEST(Arm64InsnTest,UnsignedSubWide)6331 TEST(Arm64InsnTest, UnsignedSubWide) {
6332   __uint128_t arg1 = MakeUInt128(0x0625247972199786ULL, 0x6854279897799233ULL);
6333   __uint128_t arg2 = MakeUInt128(0x9579057581890622ULL, 0x5254735822052364ULL);
6334   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6335   ASSERT_EQ(res, MakeUInt128(0x0624a2f072199164ULL, 0x6853921f97798cbeULL));
6336 }
6337 
TEST(Arm64InsnTest,UnsignedSubWideUpper)6338 TEST(Arm64InsnTest, UnsignedSubWideUpper) {
6339   __uint128_t arg1 = MakeUInt128(0x8242392192695062ULL, 0x0831838145469839ULL);
6340   __uint128_t arg2 = MakeUInt128(0x2366461363989101ULL, 0x2102177095976704ULL);
6341   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6342   ASSERT_EQ(res, MakeUInt128(0x8241a38a9268e95eULL, 0x0831627f454680c9ULL));
6343 }
6344 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8)6345 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8) {
6346   __uint128_t arg1 = MakeUInt128(0x9191791552241718ULL, 0x9585361680594741ULL);
6347   __uint128_t arg2 = MakeUInt128(0x2341933984202187ULL, 0x4564925644346239ULL);
6348   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.8h, %1.8b, %2.8b")(arg1, arg2);
6349   ASSERT_EQ(res, MakeUInt128(0xd848048002f7f4a8ULL, 0xf0d3e3d1cc7b04adULL));
6350 }
6351 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8Upper)6352 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8Upper) {
6353   __uint128_t arg1 = MakeUInt128(0x9314052976347574ULL, 0x8119356709110137ULL);
6354   __uint128_t arg2 = MakeUInt128(0x7517210080315590ULL, 0x2485309066920376ULL);
6355   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
6356   ASSERT_EQ(res, MakeUInt128(0x0396f8b20003195aULL, 0xee24f3fd09f0d2f0ULL));
6357 }
6358 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8)6359 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8) {
6360   __uint128_t arg1 = MakeUInt128(0x9149055628425039ULL, 0x1275771028402799ULL);
6361   __uint128_t arg2 = MakeUInt128(0x8066365825488926ULL, 0x4880254566101729ULL);
6362   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.8h, %1.8b, %2.8b")(arg1, arg2);
6363   ASSERT_EQ(res, MakeUInt128(0x05c812902ad00876ULL, 0x48801d16010e1d90ULL));
6364 }
6365 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8Upper)6366 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8Upper) {
6367   __uint128_t arg1 = MakeUInt128(0x9709683408005355ULL, 0x9849175417381883ULL);
6368   __uint128_t arg2 = MakeUInt128(0x9994469748676265ULL, 0x5165827658483588ULL);
6369   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
6370   ASSERT_EQ(res, MakeUInt128(0x07e80fc004f84598ULL, 0x30181ccd0bae26b8ULL));
6371 }
6372 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElem)6373 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElem) {
6374   __uint128_t arg1 = MakeUInt128(0x9293459588970695ULL, 0x3653494060340216ULL);
6375   __uint128_t arg2 = MakeUInt128(0x6544375589004563ULL, 0x2882250545255640ULL);
6376   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
6377   ASSERT_EQ(res, MakeUInt128(0xe630cb23016c3279ULL, 0xe8593fcf0f0a1d79ULL));
6378 }
6379 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElemUpper)6380 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElemUpper) {
6381   __uint128_t arg1 = MakeUInt128(0x9279068212073883ULL, 0x7781423356282360ULL);
6382   __uint128_t arg2 = MakeUInt128(0x8963208068222468ULL, 0x0122482611771858ULL);
6383   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
6384   ASSERT_EQ(res, MakeUInt128(0x0af01400047db000ULL, 0x0f2be08008677980ULL));
6385 }
6386 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem)6387 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem) {
6388   __uint128_t arg1 = MakeUInt128(0x9086996033027634ULL, 0x7870810817545011ULL);
6389   __uint128_t arg2 = MakeUInt128(0x9307141223390866ULL, 0x3938339529425786ULL);
6390   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
6391   ASSERT_EQ(res, MakeUInt128(0x03ffbe2409445fa8ULL, 0x0b54a16c0c0648c0ULL));
6392 }
6393 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem2)6394 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem2) {
6395   __uint128_t arg1 = MakeUInt128(0x9132710495478599ULL, 0x1801969678353214ULL);
6396   __uint128_t arg2 = MakeUInt128(0x6444118926063152ULL, 0x6618167443193550ULL);
6397   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[4]")(arg1, arg2);
6398   ASSERT_EQ(res, MakeUInt128(0x1f1659301bd26cd0ULL, 0x1e3cb9a017892540ULL));
6399 }
6400 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElemUpper)6401 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElemUpper) {
6402   __uint128_t arg1 = MakeUInt128(0x9815793678976697ULL, 0x4220575059683440ULL);
6403   __uint128_t arg2 = MakeUInt128(0x8697350201410206ULL, 0x7235850200724522ULL);
6404   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
6405   ASSERT_EQ(res, MakeUInt128(0x12833ad00ad1a880ULL, 0x0db1244012143ea0ULL));
6406 }
6407 
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8)6408 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8) {
6409   __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
6410   __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
6411   __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
6412   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6413   ASSERT_EQ(res, MakeUInt128(0x3b5b1ca28ec69893ULL, 0x8b7836c02ef25620ULL));
6414 }
6415 
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8Upper)6416 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8Upper) {
6417   __uint128_t arg1 = MakeUInt128(0x5514435021828702ULL, 0x6685610665003531ULL);
6418   __uint128_t arg2 = MakeUInt128(0x0502163182060176ULL, 0x0921798468493686ULL);
6419   __uint128_t arg3 = MakeUInt128(0x3161293727951873ULL, 0x0789726373537171ULL);
6420   __uint128_t res =
6421       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6422   ASSERT_EQ(res, MakeUInt128(0x5a69293732c30119ULL, 0x0b1f6288a12c6e89ULL));
6423 }
6424 
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8)6425 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8) {
6426   __uint128_t arg1 = MakeUInt128(0x9662539339538092ULL, 0x2195591918188552ULL);
6427   __uint128_t arg2 = MakeUInt128(0x6780621499231727ULL, 0x6316321833989693ULL);
6428   __uint128_t arg3 = MakeUInt128(0x8075616855911752ULL, 0x9984501320671293ULL);
6429   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6430   ASSERT_EQ(res, MakeUInt128(0x9764560f61112814ULL, 0xc42a811300a11b17ULL));
6431 }
6432 
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8Upper)6433 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8Upper) {
6434   __uint128_t arg1 = MakeUInt128(0x9826903089111856ULL, 0x8798692947051352ULL);
6435   __uint128_t arg2 = MakeUInt128(0x4816091743243015ULL, 0x3836847072928989ULL);
6436   __uint128_t arg3 = MakeUInt128(0x8284602223730145ULL, 0x2655679898627767ULL);
6437   __uint128_t res =
6438       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6439   ASSERT_EQ(res, MakeUInt128(0x62e662482c482763ULL, 0x40cd7d88cb3e6577ULL));
6440 }
6441 
TEST(Arm64InsnTest,SignedMultiplyAddLongInt16x4)6442 TEST(Arm64InsnTest, SignedMultiplyAddLongInt16x4) {
6443   __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
6444   __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
6445   __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
6446   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6447   ASSERT_EQ(res, MakeUInt128(0x3b6bd2a28eac7893ULL, 0x8b4c38c02edab620ULL));
6448 }
6449 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8)6450 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8) {
6451   __uint128_t arg1 = MakeUInt128(0x9696920253886503ULL, 0x4577183176686885ULL);
6452   __uint128_t arg2 = MakeUInt128(0x9236814884752764ULL, 0x9846882194973972ULL);
6453   __uint128_t arg3 = MakeUInt128(0x9707737187188400ULL, 0x4143231276365048ULL);
6454   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6455   ASSERT_EQ(res, MakeUInt128(0xc1d3b199967b852cULL, 0x96cf42b6bfc850d8ULL));
6456 }
6457 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8Upper)6458 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8Upper) {
6459   __uint128_t arg1 = MakeUInt128(0x9055637695252326ULL, 0x5361442478023082ULL);
6460   __uint128_t arg2 = MakeUInt128(0x6811831037735887ULL, 0x0892406130313364ULL);
6461   __uint128_t arg3 = MakeUInt128(0x7737101162821461ULL, 0x4661679404090518ULL);
6462   __uint128_t res =
6463       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6464   ASSERT_EQ(res, MakeUInt128(0x8db710736c124729ULL, 0x48f99ee6150912bcULL));
6465 }
6466 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8)6467 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8) {
6468   __uint128_t arg1 = MakeUInt128(0x4577772457520386ULL, 0x5437542828256714ULL);
6469   __uint128_t arg2 = MakeUInt128(0x1288583454443513ULL, 0x2562054464241011ULL);
6470   __uint128_t arg3 = MakeUInt128(0x0379554641905811ULL, 0x6862305964476958ULL);
6471   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6472   ASSERT_EQ(res, MakeUInt128(0xe6ed3f7e40f14e1fULL, 0x6388f1213b5f6208ULL));
6473 }
6474 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8Upper)6475 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8Upper) {
6476   __uint128_t arg1 = MakeUInt128(0x4739376564336319ULL, 0x7978680367187307ULL);
6477   __uint128_t arg2 = MakeUInt128(0x9693924236321448ULL, 0x4503547763156702ULL);
6478   __uint128_t arg3 = MakeUInt128(0x5539006542311792ULL, 0x0153464977929066ULL);
6479   __uint128_t res =
6480       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6481   ASSERT_EQ(res, MakeUInt128(0x2d64fe6d13ec1784ULL, 0xe0b644e155728f01ULL));
6482 }
6483 
TEST(Arm64InsnTest,SignedShiftLeftInt64x1)6484 TEST(Arm64InsnTest, SignedShiftLeftInt64x1) {
6485   constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %d0, %d1, %d2");
6486   __uint128_t arg = MakeUInt128(0x9007497297363549ULL, 0x6453328886984406ULL);
6487   ASSERT_EQ(AsmSshl(arg, -65), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6488   ASSERT_EQ(AsmSshl(arg, -64), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6489   ASSERT_EQ(AsmSshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6490   ASSERT_EQ(AsmSshl(arg, -1), MakeUInt128(0xc803a4b94b9b1aa4ULL, 0x0000000000000000ULL));
6491   ASSERT_EQ(AsmSshl(arg, 0), MakeUInt128(0x9007497297363549ULL, 0x0000000000000000ULL));
6492   ASSERT_EQ(AsmSshl(arg, 1), MakeUInt128(0x200e92e52e6c6a92ULL, 0x0000000000000000ULL));
6493   ASSERT_EQ(AsmSshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6494   ASSERT_EQ(AsmSshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6495   ASSERT_EQ(AsmSshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6496 }
6497 
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt64x1)6498 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt64x1) {
6499   constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %d0, %d1, %d2");
6500   __uint128_t arg = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
6501   ASSERT_EQ(AsmSrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6502   ASSERT_EQ(AsmSrshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6503   ASSERT_EQ(AsmSrshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6504   ASSERT_EQ(AsmSrshl(arg, -1), MakeUInt128(0xc93b22bc98832bc9ULL, 0x0000000000000000ULL));
6505   ASSERT_EQ(AsmSrshl(arg, 0), MakeUInt128(0x9276457931065792ULL, 0x0000000000000000ULL));
6506   ASSERT_EQ(AsmSrshl(arg, 1), MakeUInt128(0x24ec8af2620caf24ULL, 0x0000000000000000ULL));
6507   ASSERT_EQ(AsmSrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6508   ASSERT_EQ(AsmSrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6509   ASSERT_EQ(AsmSrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6510 }
6511 
TEST(Arm64InsnTest,UnsignedShiftLeftInt64x1)6512 TEST(Arm64InsnTest, UnsignedShiftLeftInt64x1) {
6513   constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %d0, %d1, %d2");
6514   __uint128_t arg = MakeUInt128(0x9138296682468185ULL, 0x7103188790652870ULL);
6515   ASSERT_EQ(AsmUshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6516   ASSERT_EQ(AsmUshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6517   ASSERT_EQ(AsmUshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6518   ASSERT_EQ(AsmUshl(arg, -1), MakeUInt128(0x489c14b3412340c2ULL, 0x0000000000000000ULL));
6519   ASSERT_EQ(AsmUshl(arg, 0), MakeUInt128(0x9138296682468185ULL, 0x0000000000000000ULL));
6520   ASSERT_EQ(AsmUshl(arg, 1), MakeUInt128(0x227052cd048d030aULL, 0x0000000000000000ULL));
6521   ASSERT_EQ(AsmUshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6522   ASSERT_EQ(AsmUshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6523   ASSERT_EQ(AsmUshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6524 }
6525 
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt64x1)6526 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt64x1) {
6527   constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %d0, %d1, %d2");
6528   __uint128_t arg = MakeUInt128(0x9023452924407736ULL, 0x5949563051007421ULL);
6529   ASSERT_EQ(AsmUrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6530   ASSERT_EQ(AsmUrshl(arg, -64), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6531   ASSERT_EQ(AsmUrshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6532   ASSERT_EQ(AsmUrshl(arg, -1), MakeUInt128(0x4811a29492203b9bULL, 0x0000000000000000ULL));
6533   ASSERT_EQ(AsmUrshl(arg, 0), MakeUInt128(0x9023452924407736ULL, 0x0000000000000000ULL));
6534   ASSERT_EQ(AsmUrshl(arg, 1), MakeUInt128(0x20468a524880ee6cULL, 0x0000000000000000ULL));
6535   ASSERT_EQ(AsmUrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6536   ASSERT_EQ(AsmUrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6537   ASSERT_EQ(AsmUrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6538 }
6539 
TEST(Arm64InsnTest,SignedShiftLeftInt16x8)6540 TEST(Arm64InsnTest, SignedShiftLeftInt16x8) {
6541   constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %0.8h, %1.8h, %2.8h");
6542   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6543   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6544   ASSERT_EQ(AsmSshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xccccffffffffffffULL));
6545   ASSERT_EQ(AsmSshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6546 }
6547 
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt16x8)6548 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt16x8) {
6549   constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %0.8h, %1.8h, %2.8h");
6550   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6551   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6552   ASSERT_EQ(AsmSrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xcccdffff00000000ULL));
6553   ASSERT_EQ(AsmSrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6554 }
6555 
TEST(Arm64InsnTest,UnsignedShiftLeftInt16x8)6556 TEST(Arm64InsnTest, UnsignedShiftLeftInt16x8) {
6557   constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %0.8h, %1.8h, %2.8h");
6558   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6559   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6560   ASSERT_EQ(AsmUshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccc000100000000ULL));
6561   ASSERT_EQ(AsmUshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6562 }
6563 
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt16x8)6564 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt16x8) {
6565   constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %0.8h, %1.8h, %2.8h");
6566   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6567   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6568   ASSERT_EQ(AsmUrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccd000100010000ULL));
6569   ASSERT_EQ(AsmUrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6570 }
6571 
TEST(Arm64InsnTest,UnsignedReciprocalSquareRootEstimateInt32x4)6572 TEST(Arm64InsnTest, UnsignedReciprocalSquareRootEstimateInt32x4) {
6573   __uint128_t arg = MakeUInt128(0x9641122821407533ULL, 0x0265510042410489ULL);
6574   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ursqrte %0.4s, %1.4s")(arg);
6575   ASSERT_EQ(res, MakeUInt128(0xa7000000ffffffffULL, 0xfffffffffb800000ULL));
6576 }
6577 
TEST(Arm64InsnTest,UnsignedReciprocalEstimateInt32x4)6578 TEST(Arm64InsnTest, UnsignedReciprocalEstimateInt32x4) {
6579   __uint128_t arg = MakeUInt128(0x9714864899468611ULL, 0x2476054286734367ULL);
6580   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urecpe %0.4s, %1.4s")(arg);
6581   ASSERT_EQ(res, MakeUInt128(0xd8800000d6000000ULL, 0xfffffffff4000000ULL));
6582 }
6583 
IsQcBitSet(uint32_t fpsr)6584 bool IsQcBitSet(uint32_t fpsr) {
6585   return (fpsr & kFpsrQcBit) != 0;
6586 }
6587 
TEST(Arm64InsnTest,SignedSaturatingAddInt64x1)6588 TEST(Arm64InsnTest, SignedSaturatingAddInt64x1) {
6589   constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %d0, %d2, %d3");
6590 
6591   __uint128_t arg1 = MakeUInt128(0x4342527753119724ULL, 0x7430873043619511ULL);
6592   __uint128_t arg2 = MakeUInt128(0x3961190800302558ULL, 0x7838764420608504ULL);
6593   auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
6594   ASSERT_EQ(res1, MakeUInt128(0x7ca36b7f5341bc7cULL, 0x0000000000000000ULL));
6595   ASSERT_FALSE(IsQcBitSet(fpsr1));
6596 
6597   __uint128_t arg3 = MakeUInt128(0x2557185308919284ULL, 0x4038050710300647ULL);
6598   __uint128_t arg4 = MakeUInt128(0x7684786324319100ULL, 0x0223929785255372ULL);
6599   auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
6600   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6601   ASSERT_TRUE(IsQcBitSet(fpsr2));
6602 }
6603 
TEST(Arm64InsnTest,SignedSaturatingAddInt32x4)6604 TEST(Arm64InsnTest, SignedSaturatingAddInt32x4) {
6605   constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %0.4s, %2.4s, %3.4s");
6606 
6607   __uint128_t arg1 = MakeUInt128(0x9883554445602495ULL, 0x5666843660292219ULL);
6608   __uint128_t arg2 = MakeUInt128(0x5124830910605377ULL, 0x2019802183101032ULL);
6609   auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
6610   ASSERT_EQ(res1, MakeUInt128(0xe9a7d84d55c0780cULL, 0x76800457e339324bULL));
6611   ASSERT_FALSE(IsQcBitSet(fpsr1));
6612 
6613   __uint128_t arg3 = MakeUInt128(0x9713308844617410ULL, 0x7959162511714864ULL);
6614   __uint128_t arg4 = MakeUInt128(0x8744686112476054ULL, 0x2867343670904667ULL);
6615   auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
6616   ASSERT_EQ(res2, MakeUInt128(0x8000000056a8d464ULL, 0x7fffffff7fffffffULL));
6617   ASSERT_TRUE(IsQcBitSet(fpsr2));
6618 }
6619 
TEST(Arm64InsnTest,UnsignedSaturatingAddInt8x1)6620 TEST(Arm64InsnTest, UnsignedSaturatingAddInt8x1) {
6621   constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %b0, %b2, %b3");
6622 
6623   __uint128_t arg1 = MakeUInt128(0x6017174229960273ULL, 0x5310276871944944ULL);
6624   __uint128_t arg2 = MakeUInt128(0x4917939785144631ULL, 0x5973144353518504ULL);
6625   auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6626   ASSERT_EQ(res1, MakeUInt128(0x00000000000000a4ULL, 0x0000000000000000ULL));
6627   ASSERT_FALSE(IsQcBitSet(fpsr1));
6628 
6629   __uint128_t arg3 = MakeUInt128(0x3306263695626490ULL, 0x9108276271159038ULL);
6630   __uint128_t arg4 = MakeUInt128(0x5699505124652999ULL, 0x6062855443838330ULL);
6631   auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6632   ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
6633   ASSERT_TRUE(IsQcBitSet(fpsr2));
6634 }
6635 
TEST(Arm64InsnTest,UnsignedSaturatingAddInt64x1)6636 TEST(Arm64InsnTest, UnsignedSaturatingAddInt64x1) {
6637   constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %d0, %d2, %d3");
6638 
6639   __uint128_t arg1 = MakeUInt128(0x0606885137234627ULL, 0x0799732723313469ULL);
6640   __uint128_t arg2 = MakeUInt128(0x3971456285542615ULL, 0x4676506324656766ULL);
6641   auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6642   ASSERT_EQ(res1, MakeUInt128(0x3f77cdb3bc776c3cULL, 0x0000000000000000ULL));
6643   ASSERT_FALSE(IsQcBitSet(fpsr1));
6644 
6645   __uint128_t arg3 = MakeUInt128(0x9534957018600154ULL, 0x1262396228641389ULL);
6646   __uint128_t arg4 = MakeUInt128(0x7796733329070567ULL, 0x3769621564981845ULL);
6647   auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6648   ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6649   ASSERT_TRUE(IsQcBitSet(fpsr2));
6650 }
6651 
TEST(Arm64InsnTest,UnsignedSaturatingAddInt32x4)6652 TEST(Arm64InsnTest, UnsignedSaturatingAddInt32x4) {
6653   constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %0.4s, %2.4s, %3.4s");
6654 
6655   __uint128_t arg1 = MakeUInt128(0x9737425700735921ULL, 0x0031541508936793ULL);
6656   __uint128_t arg2 = MakeUInt128(0x0081699805365202ULL, 0x7600727749674584ULL);
6657   auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6658   ASSERT_EQ(res1, MakeUInt128(0x97b8abef05a9ab23ULL, 0x7631c68c51faad17ULL));
6659   ASSERT_FALSE(IsQcBitSet(fpsr1));
6660 
6661   __uint128_t arg3 = MakeUInt128(0x9727856471983963ULL, 0x0878154322116691ULL);
6662   __uint128_t arg4 = MakeUInt128(0x8654522268126887ULL, 0x2684459684424161ULL);
6663   auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6664   ASSERT_EQ(res2, MakeUInt128(0xffffffffd9aaa1eaULL, 0x2efc5ad9a653a7f2ULL));
6665   ASSERT_TRUE(IsQcBitSet(fpsr2));
6666 }
6667 
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x1)6668 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x1) {
6669   constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %s0, %s2, %s3");
6670 
6671   __uint128_t arg1 = MakeUInt128(0x3178534870760322ULL, 0x1982970579751191ULL);
6672   __uint128_t arg2 = MakeUInt128(0x4405109942358830ULL, 0x3454635349234982ULL);
6673   auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6674   ASSERT_EQ(res1, MakeUInt128(0x2e407af2ULL, 0U));
6675   ASSERT_FALSE(IsQcBitSet(fpsr1));
6676 
6677   __uint128_t arg3 = MakeUInt128(0x1423696483086410ULL, 0x2592887457999322ULL);
6678   __uint128_t arg4 = MakeUInt128(0x3749551912219519ULL, 0x0342445230753513ULL);
6679   auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6680   ASSERT_EQ(res2, MakeUInt128(0x80000000ULL, 0U));
6681   ASSERT_TRUE(IsQcBitSet(fpsr2));
6682 
6683   __uint128_t arg5 = MakeUInt128(0x3083508879584152ULL, 0x1489912761065137ULL);
6684   __uint128_t arg6 = MakeUInt128(0x4153943580721139ULL, 0x0328574918769094ULL);
6685   auto [res3, fpsr3] = AsmSqsub(arg5, arg6);
6686   ASSERT_EQ(res3, MakeUInt128(0x7fffffffULL, 0U));
6687   ASSERT_TRUE(IsQcBitSet(fpsr3));
6688 }
6689 
TEST(Arm64InsnTest,SignedSaturatingSubtractInt64x1)6690 TEST(Arm64InsnTest, SignedSaturatingSubtractInt64x1) {
6691   constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %d0, %d2, %d3");
6692 
6693   __uint128_t arg1 = MakeUInt128(0x4416125223196943ULL, 0x4712064173754912ULL);
6694   __uint128_t arg2 = MakeUInt128(0x1635700857369439ULL, 0x7305979709719726ULL);
6695   auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6696   ASSERT_EQ(res1, MakeUInt128(0x2de0a249cbe2d50aULL, 0x0000000000000000ULL));
6697   ASSERT_FALSE(IsQcBitSet(fpsr1));
6698 
6699   __uint128_t arg3 = MakeUInt128(0x7862766490242516ULL, 0x1990277471090335ULL);
6700   __uint128_t arg4 = MakeUInt128(0x9333093049483805ULL, 0x9785662884478744ULL);
6701   auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6702   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6703   ASSERT_TRUE(IsQcBitSet(fpsr2));
6704 }
6705 
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x4)6706 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x4) {
6707   constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %0.4s, %2.4s, %3.4s");
6708 
6709   __uint128_t arg1 = MakeUInt128(0x4485680977569630ULL, 0x3129588719161129ULL);
6710   __uint128_t arg2 = MakeUInt128(0x2946818849363386ULL, 0x4739274760122696ULL);
6711   auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6712   ASSERT_EQ(res1, MakeUInt128(0x1b3ee6812e2062aaULL, 0xe9f03140b903ea93ULL));
6713   ASSERT_FALSE(IsQcBitSet(fpsr1));
6714 
6715   __uint128_t arg3 = MakeUInt128(0x9304127100727784ULL, 0x9301555038895360ULL);
6716   __uint128_t arg4 = MakeUInt128(0x3382619293437970ULL, 0x8187432094991415ULL);
6717   auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6718   ASSERT_EQ(res2, MakeUInt128(0x800000006d2efe14ULL, 0x117a12307fffffffULL));
6719   ASSERT_TRUE(IsQcBitSet(fpsr2));
6720 }
6721 
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x1)6722 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x1) {
6723   constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %s0, %s2, %s3");
6724 
6725   __uint128_t arg1 = MakeUInt128(0x2548156091372812ULL, 0x8406333039373562ULL);
6726   __uint128_t arg2 = MakeUInt128(0x4200160456645574ULL, 0x1458816605216660ULL);
6727   auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6728   ASSERT_EQ(res1, MakeUInt128(0x3ad2d29eULL, 0U));
6729   ASSERT_FALSE(IsQcBitSet(fpsr1));
6730 
6731   __uint128_t arg3 = MakeUInt128(0x1259960281839309ULL, 0x5487090590738613ULL);
6732   __uint128_t arg4 = MakeUInt128(0x5191459181951029ULL, 0x7327875571049729ULL);
6733   auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6734   ASSERT_EQ(res2, MakeUInt128(0U, 0U));
6735   ASSERT_TRUE(IsQcBitSet(fpsr2));
6736 }
6737 
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt64x1)6738 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt64x1) {
6739   constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %d0, %d2, %d3");
6740 
6741   __uint128_t arg1 = MakeUInt128(0x9691077542576474ULL, 0x8832534141213280ULL);
6742   __uint128_t arg2 = MakeUInt128(0x0626717094009098ULL, 0x2235296579579978ULL);
6743   auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6744   ASSERT_EQ(res1, MakeUInt128(0x906a9604ae56d3dcULL, 0x0000000000000000ULL));
6745   ASSERT_FALSE(IsQcBitSet(fpsr1));
6746 
6747   __uint128_t arg3 = MakeUInt128(0x7752929106925043ULL, 0x2614469501098610ULL);
6748   __uint128_t arg4 = MakeUInt128(0x8889991465855188ULL, 0x1873582528164302ULL);
6749   auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6750   ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6751   ASSERT_TRUE(IsQcBitSet(fpsr2));
6752 }
6753 
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x4)6754 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x4) {
6755   constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %0.4s, %2.4s, %3.4s");
6756 
6757   __uint128_t arg1 = MakeUInt128(0x6884962578665885ULL, 0x9991798675205545ULL);
6758   __uint128_t arg2 = MakeUInt128(0x5809900455646117ULL, 0x8755249370124553ULL);
6759   auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6760   ASSERT_EQ(res1, MakeUInt128(0x107b06212301f76eULL, 0x123c54f3050e0ff2ULL));
6761   ASSERT_FALSE(IsQcBitSet(fpsr1));
6762 
6763   __uint128_t arg3 = MakeUInt128(0x5032678340586301ULL, 0x9301932429963972ULL);
6764   __uint128_t arg4 = MakeUInt128(0x0444517928812285ULL, 0x4478211953530898ULL);
6765   auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6766   ASSERT_EQ(res2, MakeUInt128(0x4bee160a17d7407cULL, 0x4e89720b00000000ULL));
6767   ASSERT_TRUE(IsQcBitSet(fpsr2));
6768 }
6769 
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt8x1)6770 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt8x1) {
6771   constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %b0, %b2");
6772 
6773   __uint128_t arg1 = MakeUInt128(0x8918016855727981ULL, 0x5642185819119749ULL);
6774   auto [res1, fpsr1] = AsmSqabs(arg1);
6775   ASSERT_EQ(res1, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6776   ASSERT_FALSE(IsQcBitSet(fpsr1));
6777 
6778   __uint128_t arg2 = MakeUInt128(0x0000000000000080ULL, 0x6464607287574305ULL);
6779   auto [res2, fpsr2] = AsmSqabs(arg2);
6780   ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6781   ASSERT_TRUE(IsQcBitSet(fpsr2));
6782 }
6783 
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt64x1)6784 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt64x1) {
6785   constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %d0, %d2");
6786 
6787   __uint128_t arg1 = MakeUInt128(0x9717317281315179ULL, 0x3290443112181587ULL);
6788   auto [res1, fpsr1] = AsmSqabs(arg1);
6789   ASSERT_EQ(res1, MakeUInt128(0x68e8ce8d7eceae87ULL, 0x0000000000000000ULL));
6790   ASSERT_FALSE(IsQcBitSet(fpsr1));
6791 
6792   __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x1001237687219447ULL);
6793   auto [res2, fpsr2] = AsmSqabs(arg2);
6794   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6795   ASSERT_TRUE(IsQcBitSet(fpsr2));
6796 }
6797 
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt32x4)6798 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt32x4) {
6799   constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %0.4s, %2.4s");
6800 
6801   __uint128_t arg1 = MakeUInt128(0x9133820578492800ULL, 0x6982551957402018ULL);
6802   auto [res1, fpsr1] = AsmSqabs(arg1);
6803   ASSERT_EQ(res1, MakeUInt128(0x6ecc7dfb78492800ULL, 0x6982551957402018ULL));
6804   ASSERT_FALSE(IsQcBitSet(fpsr1));
6805 
6806   __uint128_t arg2 = MakeUInt128(0x1810564129725083ULL, 0x6070356880000000ULL);
6807   auto [res2, fpsr2] = AsmSqabs(arg2);
6808   ASSERT_EQ(res2, MakeUInt128(0x1810564129725083ULL, 0x607035687fffffffULL));
6809   ASSERT_TRUE(IsQcBitSet(fpsr2));
6810 }
6811 
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x1)6812 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x1) {
6813   constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %s0, %s2");
6814 
6815   __uint128_t arg1 = MakeUInt128(0x6461582694563802ULL, 0x3950283712168644ULL);
6816   auto [res1, fpsr1] = AsmSqneg(arg1);
6817   ASSERT_EQ(res1, MakeUInt128(0x000000006ba9c7feULL, 0x0000000000000000ULL));
6818   ASSERT_FALSE(IsQcBitSet(fpsr1));
6819 
6820   __uint128_t arg2 = MakeUInt128(0x6561785280000000ULL, 0x1277128269186886ULL);
6821   auto [res2, fpsr2] = AsmSqneg(arg2);
6822   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6823   ASSERT_TRUE(IsQcBitSet(fpsr2));
6824 }
6825 
TEST(Arm64InsnTest,SignedSaturatingNegateInt64x1)6826 TEST(Arm64InsnTest, SignedSaturatingNegateInt64x1) {
6827   constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %d0, %d2");
6828 
6829   __uint128_t arg1 = MakeUInt128(0x9703600795698276ULL, 0x2639234410714658ULL);
6830   auto [res1, fpsr1] = AsmSqneg(arg1);
6831   ASSERT_EQ(res1, MakeUInt128(0x68fc9ff86a967d8aULL, 0x0000000000000000ULL));
6832   ASSERT_FALSE(IsQcBitSet(fpsr1));
6833 
6834   __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x4052295369374997ULL);
6835   auto [res2, fpsr2] = AsmSqneg(arg2);
6836   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6837   ASSERT_TRUE(IsQcBitSet(fpsr2));
6838 }
6839 
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x4)6840 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x4) {
6841   constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %0.4s, %2.4s");
6842 
6843   __uint128_t arg1 = MakeUInt128(0x9172320202822291ULL, 0x4886959399729974ULL);
6844   auto [res1, fpsr1] = AsmSqneg(arg1);
6845   ASSERT_EQ(res1, MakeUInt128(0x6e8dcdfefd7ddd6fULL, 0xb7796a6d668d668cULL));
6846   ASSERT_FALSE(IsQcBitSet(fpsr1));
6847 
6848   __uint128_t arg2 = MakeUInt128(0x2974711553718589ULL, 0x2423849380000000ULL);
6849   auto [res2, fpsr2] = AsmSqneg(arg2);
6850   ASSERT_EQ(res2, MakeUInt128(0xd68b8eebac8e7a77ULL, 0xdbdc7b6d7fffffffULL));
6851   ASSERT_TRUE(IsQcBitSet(fpsr2));
6852 }
6853 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x1)6854 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x1) {
6855   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %s0, %s2, #20");
6856 
6857   __uint128_t arg1 = MakeUInt128(0x9724611600000181ULL, 0x0003509892864120ULL);
6858   auto [res1, fpsr1] = AsmSqshl(arg1);
6859   ASSERT_EQ(res1, MakeUInt128(0x0000000018100000ULL, 0x0000000000000000ULL));
6860   ASSERT_FALSE(IsQcBitSet(fpsr1));
6861 
6862   __uint128_t arg2 = MakeUInt128(0x4195163551108763ULL, 0x2042676129798265ULL);
6863   auto [res2, fpsr2] = AsmSqshl(arg2);
6864   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6865   ASSERT_TRUE(IsQcBitSet(fpsr2));
6866 }
6867 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt64x1)6868 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt64x1) {
6869   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %d0, %d2, #28");
6870 
6871   __uint128_t arg1 = MakeUInt128(0x0000000774000539ULL, 0x2622760323659751ULL);
6872   auto [res1, fpsr1] = AsmSqshl(arg1);
6873   ASSERT_EQ(res1, MakeUInt128(0x7740005390000000ULL, 0x0000000000000000ULL));
6874   ASSERT_FALSE(IsQcBitSet(fpsr1));
6875 
6876   __uint128_t arg2 = MakeUInt128(0x9938714995449137ULL, 0x3020518436690767ULL);
6877   auto [res2, fpsr2] = AsmSqshl(arg2);
6878   ASSERT_EQ(res2, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6879   ASSERT_TRUE(IsQcBitSet(fpsr2));
6880 }
6881 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x4)6882 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x4) {
6883   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %0.4s, %2.4s, #12");
6884 
6885   __uint128_t arg1 = MakeUInt128(0x0007256800042011ULL, 0x0000313500033555ULL);
6886   auto [res1, fpsr1] = AsmSqshl(arg1);
6887   ASSERT_EQ(res1, MakeUInt128(0x7256800042011000ULL, 0x0313500033555000ULL));
6888   ASSERT_FALSE(IsQcBitSet(fpsr1));
6889 
6890   __uint128_t arg2 = MakeUInt128(0x0944031900072034ULL, 0x8651010561049872ULL);
6891   auto [res2, fpsr2] = AsmSqshl(arg2);
6892   ASSERT_EQ(res2, MakeUInt128(0x7fffffff72034000ULL, 0x800000007fffffffULL));
6893   ASSERT_TRUE(IsQcBitSet(fpsr2));
6894 }
6895 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt32x1)6896 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt32x1) {
6897   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %s0, %s2, %s3");
6898 
6899   __uint128_t res;
6900   uint32_t fpsr;
6901   __uint128_t arg1 = MakeUInt128(0x7480771811555330ULL, 0x9098870255052076ULL);
6902 
6903   std::tie(res, fpsr) = AsmSqshl(arg1, -33);
6904   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6905   ASSERT_FALSE(IsQcBitSet(fpsr));
6906 
6907   std::tie(res, fpsr) = AsmSqshl(arg1, -32);
6908   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6909   ASSERT_FALSE(IsQcBitSet(fpsr));
6910 
6911   std::tie(res, fpsr) = AsmSqshl(arg1, -31);
6912   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6913   ASSERT_FALSE(IsQcBitSet(fpsr));
6914 
6915   std::tie(res, fpsr) = AsmSqshl(arg1, -1);
6916   ASSERT_EQ(res, MakeUInt128(0x08aaa998ULL, 0U));
6917   ASSERT_FALSE(IsQcBitSet(fpsr));
6918 
6919   std::tie(res, fpsr) = AsmSqshl(arg1, 0);
6920   ASSERT_EQ(res, MakeUInt128(0x11555330ULL, 0U));
6921   ASSERT_FALSE(IsQcBitSet(fpsr));
6922 
6923   std::tie(res, fpsr) = AsmSqshl(arg1, 1);
6924   ASSERT_EQ(res, MakeUInt128(0x22aaa660ULL, 0U));
6925   ASSERT_FALSE(IsQcBitSet(fpsr));
6926 
6927   std::tie(res, fpsr) = AsmSqshl(arg1, 31);
6928   ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6929   ASSERT_TRUE(IsQcBitSet(fpsr));
6930 
6931   std::tie(res, fpsr) = AsmSqshl(arg1, 32);
6932   ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6933   ASSERT_TRUE(IsQcBitSet(fpsr));
6934 
6935   std::tie(res, fpsr) = AsmSqshl(arg1, 33);
6936   ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
6937   ASSERT_TRUE(IsQcBitSet(fpsr));
6938 }
6939 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt64x1)6940 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt64x1) {
6941   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %d0, %d2, #28");
6942 
6943   __uint128_t arg1 = MakeUInt128(0x0000000961573564ULL, 0x8883443185280853ULL);
6944   auto [res1, fpsr1] = AsmUqshl(arg1);
6945   ASSERT_EQ(res1, MakeUInt128(0x9615735640000000ULL, 0x0000000000000000ULL));
6946   ASSERT_FALSE(IsQcBitSet(fpsr1));
6947 
6948   __uint128_t arg2 = MakeUInt128(0x9759277344336553ULL, 0x8418834030351782ULL);
6949   auto [res2, fpsr2] = AsmUqshl(arg2);
6950   ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6951   ASSERT_TRUE(IsQcBitSet(fpsr2));
6952 }
6953 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt32x4)6954 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt32x4) {
6955   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %0.4s, %2.4s, #12");
6956 
6957   __uint128_t arg1 = MakeUInt128(0x0000326300096218ULL, 0x0004565900066853ULL);
6958   auto [res1, fpsr1] = AsmUqshl(arg1);
6959   ASSERT_EQ(res1, MakeUInt128(0x0326300096218000ULL, 0x4565900066853000ULL));
6960   ASSERT_FALSE(IsQcBitSet(fpsr1));
6961 
6962   __uint128_t arg2 = MakeUInt128(0x0009911314010804ULL, 0x0009732335449090ULL);
6963   auto [res2, fpsr2] = AsmUqshl(arg2);
6964   ASSERT_EQ(res2, MakeUInt128(0x99113000ffffffffULL, 0x97323000ffffffffULL));
6965   ASSERT_TRUE(IsQcBitSet(fpsr2));
6966 }
6967 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt32x1)6968 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt32x1) {
6969   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %s0, %s2, %s3");
6970 
6971   __uint128_t res;
6972   uint32_t fpsr;
6973   __uint128_t arg1 = MakeUInt128(0x9714978507414585ULL, 0x3085781339156270ULL);
6974 
6975   std::tie(res, fpsr) = AsmUqshl(arg1, -33);
6976   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6977   ASSERT_FALSE(IsQcBitSet(fpsr));
6978 
6979   std::tie(res, fpsr) = AsmUqshl(arg1, -32);
6980   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6981   ASSERT_FALSE(IsQcBitSet(fpsr));
6982 
6983   std::tie(res, fpsr) = AsmUqshl(arg1, -31);
6984   ASSERT_EQ(res, MakeUInt128(0U, 0U));
6985   ASSERT_FALSE(IsQcBitSet(fpsr));
6986 
6987   std::tie(res, fpsr) = AsmUqshl(arg1, -1);
6988   ASSERT_EQ(res, MakeUInt128(0x03a0a2c2ULL, 0U));
6989   ASSERT_FALSE(IsQcBitSet(fpsr));
6990 
6991   std::tie(res, fpsr) = AsmUqshl(arg1, 0);
6992   ASSERT_EQ(res, MakeUInt128(0x07414585ULL, 0U));
6993   ASSERT_FALSE(IsQcBitSet(fpsr));
6994 
6995   std::tie(res, fpsr) = AsmUqshl(arg1, 1);
6996   ASSERT_EQ(res, MakeUInt128(0x0e828b0aULL, 0U));
6997   ASSERT_FALSE(IsQcBitSet(fpsr));
6998 
6999   std::tie(res, fpsr) = AsmUqshl(arg1, 31);
7000   ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7001   ASSERT_TRUE(IsQcBitSet(fpsr));
7002 
7003   std::tie(res, fpsr) = AsmUqshl(arg1, 32);
7004   ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7005   ASSERT_TRUE(IsQcBitSet(fpsr));
7006 
7007   std::tie(res, fpsr) = AsmUqshl(arg1, 33);
7008   ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7009   ASSERT_TRUE(IsQcBitSet(fpsr));
7010 }
7011 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt16x8)7012 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt16x8) {
7013   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %0.8h, %2.8h, %3.8h");
7014 
7015   __uint128_t arg1 = 0U;
7016   __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
7017   auto [res1, fpsr1] = AsmSqshl(arg1, arg2);
7018   ASSERT_EQ(res1, MakeUInt128(0U, 0U));
7019   ASSERT_FALSE(IsQcBitSet(fpsr1));
7020 
7021   __uint128_t arg3 = MakeUInt128(0x3333333333333333ULL, 0x3333333333333333ULL);
7022   auto [res2, fpsr2] = AsmSqshl(arg3, arg2);
7023   ASSERT_EQ(res2, MakeUInt128(0x0000000000001999ULL, 0x66667fff7fff7fffULL));
7024   ASSERT_TRUE(IsQcBitSet(fpsr2));
7025 }
7026 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt16x8)7027 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt16x8) {
7028   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %0.8h, %2.8h, %3.8h");
7029 
7030   __uint128_t arg1 = 0U;
7031   __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
7032   auto [res1, fpsr1] = AsmUqshl(arg1, arg2);
7033   ASSERT_EQ(res1, MakeUInt128(0U, 0U));
7034   ASSERT_FALSE(IsQcBitSet(fpsr1));
7035 
7036   __uint128_t arg3 = MakeUInt128(0x7777777777777777ULL, 0x7777777777777777ULL);
7037   auto [res2, fpsr2] = AsmUqshl(arg3, arg2);
7038   ASSERT_EQ(res2, MakeUInt128(0x0000000000003bbbULL, 0xeeeeffffffffffffULL));
7039   ASSERT_TRUE(IsQcBitSet(fpsr2));
7040 }
7041 
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x2ToInt32x2)7042 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x2ToInt32x2) {
7043   constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %0.2s, %2.2d");
7044 
7045   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7046   auto [res1, fpsr1] = AsmSqxtn(arg1);
7047   ASSERT_EQ(res1, MakeUInt128(0x800000007fffffffULL, 0x0000000000000000ULL));
7048   ASSERT_TRUE(IsQcBitSet(fpsr1));
7049 
7050   __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
7051   auto [res2, fpsr2] = AsmSqxtn(arg2);
7052   ASSERT_EQ(res2, MakeUInt128(0x7ecdba9801234567ULL, 0x0000000000000000ULL));
7053   ASSERT_FALSE(IsQcBitSet(fpsr2));
7054 }
7055 
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x1ToInt32x1)7056 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x1ToInt32x1) {
7057   constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %s0, %d2");
7058 
7059   __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
7060   auto [res1, fpsr1] = AsmSqxtn(arg1);
7061   ASSERT_EQ(res1, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7062   ASSERT_TRUE(IsQcBitSet(fpsr1));
7063 
7064   __uint128_t arg2 = MakeUInt128(0x0000000012345678ULL, 0x0ULL);
7065   auto [res2, fpsr2] = AsmSqxtn(arg2);
7066   ASSERT_EQ(res2, MakeUInt128(0x0000000012345678ULL, 0x0000000000000000ULL));
7067   ASSERT_FALSE(IsQcBitSet(fpsr2));
7068 }
7069 
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x2ToInt32x2)7070 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x2ToInt32x2) {
7071   constexpr auto AsmUqstn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %0.2s, %2.2d");
7072 
7073   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7074   auto [res1, fpsr1] = AsmUqstn(arg1);
7075   ASSERT_EQ(res1, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
7076   ASSERT_TRUE(IsQcBitSet(fpsr1));
7077 
7078   __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7079   auto [res2, fpsr2] = AsmUqstn(arg2);
7080   ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
7081   ASSERT_FALSE(IsQcBitSet(fpsr2));
7082 }
7083 
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x1ToInt32x1)7084 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x1ToInt32x1) {
7085   constexpr auto AsmUqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %s0, %d2");
7086 
7087   __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
7088   auto [res1, fpsr1] = AsmUqxtn(arg1);
7089   ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7090   ASSERT_TRUE(IsQcBitSet(fpsr1));
7091 
7092   __uint128_t arg2 = MakeUInt128(0x0000000087654321ULL, 0x0ULL);
7093   auto [res2, fpsr2] = AsmUqxtn(arg2);
7094   ASSERT_EQ(res2, MakeUInt128(0x0000000087654321ULL, 0x0000000000000000ULL));
7095   ASSERT_FALSE(IsQcBitSet(fpsr2));
7096 }
7097 
TEST(Arm64InsnTest,SignedSaturatingExtractNarrow2Int64x2ToInt32x2)7098 TEST(Arm64InsnTest, SignedSaturatingExtractNarrow2Int64x2ToInt32x2) {
7099   constexpr auto AsmSqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtn2 %0.4s, %2.2d");
7100 
7101   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7102   __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7103   auto [res1, fpsr1] = AsmSqxtn2(arg1, arg2);
7104   ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0x800000007fffffffULL));
7105   ASSERT_TRUE(IsQcBitSet(fpsr1));
7106 
7107   __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
7108   __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7109   auto [res2, fpsr2] = AsmSqxtn2(arg3, arg4);
7110   ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0x7ecdba9801234567ULL));
7111   ASSERT_FALSE(IsQcBitSet(fpsr2));
7112 }
7113 
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4)7114 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4) {
7115   constexpr auto AsmUqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqxtn2 %0.4s, %2.2d");
7116 
7117   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7118   __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7119   auto [res1, fpsr1] = AsmUqxtn2(arg1, arg2);
7120   ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0xffffffffffffffffULL));
7121   ASSERT_TRUE(IsQcBitSet(fpsr1));
7122 
7123   __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7124   __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7125   auto [res2, fpsr2] = AsmUqxtn2(arg3, arg4);
7126   ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0xfecdba9801234567ULL));
7127   ASSERT_FALSE(IsQcBitSet(fpsr2));
7128 }
7129 
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2)7130 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2) {
7131   constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %0.2s, %2.2d");
7132 
7133   __uint128_t arg1 = MakeUInt128(0x0000000044332211ULL, 0x00000001aabbccddULL);
7134   auto [res1, fpsr1] = AsmSqxtun(arg1);
7135   ASSERT_EQ(res1, MakeUInt128(0xffffffff44332211ULL, 0x0000000000000000ULL));
7136   ASSERT_TRUE(IsQcBitSet(fpsr1));
7137 
7138   __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7139   auto [res2, fpsr2] = AsmSqxtun(arg2);
7140   ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
7141   ASSERT_FALSE(IsQcBitSet(fpsr2));
7142 }
7143 
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1)7144 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1) {
7145   constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %s0, %d2");
7146 
7147   __uint128_t arg1 = MakeUInt128(0x00000001ff332211ULL, 0x0ULL);
7148   auto [res1, fpsr1] = AsmSqxtun(arg1);
7149   ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7150   ASSERT_TRUE(IsQcBitSet(fpsr1));
7151 
7152   __uint128_t arg2 = MakeUInt128(0x00000000ff332211ULL, 0x0ULL);
7153   auto [res2, fpsr2] = AsmSqxtun(arg2);
7154   ASSERT_EQ(res2, MakeUInt128(0x00000000ff332211ULL, 0x0000000000000000ULL));
7155   ASSERT_FALSE(IsQcBitSet(fpsr2));
7156 }
7157 
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4)7158 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4) {
7159   constexpr auto AsmSqxtun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtun2 %0.4s, %2.2d");
7160 
7161   __uint128_t arg1 = MakeUInt128(0x0000000089abcdefULL, 0xfedcba9876543210ULL);
7162   __uint128_t arg2 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7163   auto [res1, fpsr1] = AsmSqxtun2(arg1, arg2);
7164   ASSERT_EQ(res1, MakeUInt128(0x0123456789abcdefULL, 0x0000000089abcdefULL));
7165   ASSERT_TRUE(IsQcBitSet(fpsr1));
7166 
7167   __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7168   __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7169   auto [res2, fpsr2] = AsmSqxtun2(arg3, arg4);
7170   ASSERT_EQ(res2, MakeUInt128(0x0123456789abcdefULL, 0xfecdba9801234567ULL));
7171   ASSERT_FALSE(IsQcBitSet(fpsr2));
7172 }
7173 
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x1)7174 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x1) {
7175   constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %s0, %s2");
7176 
7177   __uint128_t arg1 = MakeUInt128(0x9392023115638719ULL, 0x5080502467972579ULL);
7178   __uint128_t arg2 = MakeUInt128(0x2497605762625913ULL, 0x3285597263712112ULL);
7179   auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
7180   ASSERT_EQ(res1, MakeUInt128(0x0000000077c5e02cULL, 0x0000000000000000ULL));
7181   ASSERT_FALSE(IsQcBitSet(fpsr1));
7182 
7183   __uint128_t arg3 = MakeUInt128(0x9099791776687477ULL, 0x4481882870632315ULL);
7184   __uint128_t arg4 = MakeUInt128(0x5158650328981642ULL, 0x2828823274686610ULL);
7185   auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
7186   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7187   ASSERT_TRUE(IsQcBitSet(fpsr2));
7188 }
7189 
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x4)7190 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x4) {
7191   constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %0.4s, %2.4s");
7192 
7193   __uint128_t arg1 = MakeUInt128(0x2590181000350989ULL, 0x2864120419516355ULL);
7194   __uint128_t arg2 = MakeUInt128(0x1108763204267612ULL, 0x9798265294258829ULL);
7195   auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
7196   ASSERT_EQ(res1, MakeUInt128(0x36988e42045b7f9bULL, 0xbffc3856ad76eb7eULL));
7197   ASSERT_FALSE(IsQcBitSet(fpsr1));
7198 
7199   __uint128_t arg3 = MakeUInt128(0x9082888934938376ULL, 0x4393992569006040ULL);
7200   __uint128_t arg4 = MakeUInt128(0x6731142209331219ULL, 0x5936202982972351ULL);
7201   auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
7202   ASSERT_EQ(res2, MakeUInt128(0x7fffffff3dc6958fULL, 0x7fffffffeb978391ULL));
7203   ASSERT_TRUE(IsQcBitSet(fpsr2));
7204 }
7205 
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x1)7206 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x1) {
7207   constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %s0, %s2");
7208 
7209   __uint128_t arg1 = MakeUInt128(0x9052523242348615ULL, 0x3152097693846104ULL);
7210   __uint128_t arg2 = MakeUInt128(0x2582849714963475ULL, 0x3418375620030149ULL);
7211   auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
7212   ASSERT_EQ(res1, MakeUInt128(0x0000000056caba8aULL, 0x0000000000000000ULL));
7213   ASSERT_FALSE(IsQcBitSet(fpsr1));
7214 
7215   __uint128_t arg3 = MakeUInt128(0x9887125387801719ULL, 0x6071816407812484ULL);
7216   __uint128_t arg4 = MakeUInt128(0x7847257912407824ULL, 0x5443616823452395ULL);
7217   auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
7218   ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7219   ASSERT_TRUE(IsQcBitSet(fpsr2));
7220 
7221   __uint128_t arg5 = MakeUInt128(0x9708583970761645ULL, 0x8229630324424328ULL);
7222   __uint128_t arg6 = MakeUInt128(0x2377374595170285ULL, 0x6069806788952176ULL);
7223   auto [res3, fpsr3] = AsmUsqadd(arg5, arg6);
7224   ASSERT_EQ(res3, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7225   ASSERT_TRUE(IsQcBitSet(fpsr3));
7226 }
7227 
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x4)7228 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x4) {
7229   constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %0.4s, %2.4s");
7230 
7231   __uint128_t arg1 = MakeUInt128(0x4129137074982305ULL, 0x7592909166293919ULL);
7232   __uint128_t arg2 = MakeUInt128(0x5014721157586067ULL, 0x2700925477180257ULL);
7233   auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
7234   ASSERT_EQ(res1, MakeUInt128(0x913d8581cbf0836cULL, 0x9c9322e5dd413b70ULL));
7235   ASSERT_FALSE(IsQcBitSet(fpsr1));
7236 
7237   __uint128_t arg3 = MakeUInt128(0x7816422828823274ULL, 0x6866106592732197ULL);
7238   __uint128_t arg4 = MakeUInt128(0x9071623846421534ULL, 0x8985247621678905ULL);
7239   auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
7240   ASSERT_EQ(res2, MakeUInt128(0xffffffff6ec447a8ULL, 0xf1eb34db00000000ULL));
7241   ASSERT_TRUE(IsQcBitSet(fpsr2));
7242 }
7243 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt32x1)7244 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt32x1) {
7245   constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %s0, %s2, %s3");
7246 
7247   __uint128_t res;
7248   uint32_t fpsr;
7249 
7250   __uint128_t arg = MakeUInt128(0x9736705435580445ULL, 0x8657202276378404ULL);
7251   std::tie(res, fpsr) = AsmSqrshl(arg, -33);
7252   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7253   ASSERT_FALSE(IsQcBitSet(fpsr));
7254 
7255   std::tie(res, fpsr) = AsmSqrshl(arg, -32);
7256   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7257   ASSERT_FALSE(IsQcBitSet(fpsr));
7258 
7259   std::tie(res, fpsr) = AsmSqrshl(arg, -31);
7260   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7261   ASSERT_FALSE(IsQcBitSet(fpsr));
7262 
7263   std::tie(res, fpsr) = AsmSqrshl(arg, -1);
7264   ASSERT_EQ(res, MakeUInt128(0x000000001aac0223ULL, 0x0000000000000000ULL));
7265   ASSERT_FALSE(IsQcBitSet(fpsr));
7266 
7267   std::tie(res, fpsr) = AsmSqrshl(arg, 0);
7268   ASSERT_EQ(res, MakeUInt128(0x0000000035580445ULL, 0x0000000000000000ULL));
7269   ASSERT_FALSE(IsQcBitSet(fpsr));
7270 
7271   std::tie(res, fpsr) = AsmSqrshl(arg, 1);
7272   ASSERT_EQ(res, MakeUInt128(0x000000006ab0088aULL, 0x0000000000000000ULL));
7273   ASSERT_FALSE(IsQcBitSet(fpsr));
7274 
7275   std::tie(res, fpsr) = AsmSqrshl(arg, 31);
7276   ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7277   ASSERT_TRUE(IsQcBitSet(fpsr));
7278 
7279   std::tie(res, fpsr) = AsmSqrshl(arg, 32);
7280   ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7281   ASSERT_TRUE(IsQcBitSet(fpsr));
7282 
7283   std::tie(res, fpsr) = AsmSqrshl(arg, 33);
7284   ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7285   ASSERT_TRUE(IsQcBitSet(fpsr));
7286 }
7287 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt16x8)7288 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt16x8) {
7289   constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %0.8h, %2.8h, %3.8h");
7290 
7291   __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
7292   __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
7293   auto [res1, fpsr1] = AsmSqrshl(arg1, arg2);
7294   ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0xcccd000000000000ULL));
7295   ASSERT_FALSE(IsQcBitSet(fpsr1));
7296 
7297   __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
7298   auto [res2, fpsr2] = AsmSqrshl(arg3, arg2);
7299   ASSERT_EQ(res2, MakeUInt128(0x7fff7fff7fff0132ULL, 0x004d000000000000ULL));
7300   ASSERT_TRUE(IsQcBitSet(fpsr2));
7301 }
7302 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt32x1)7303 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt32x1) {
7304   constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %s0, %s2, %s3");
7305 
7306   __uint128_t res;
7307   uint32_t fpsr;
7308 
7309   __uint128_t arg = MakeUInt128(0x9984124848262367ULL, 0x3771467226061633ULL);
7310   std::tie(res, fpsr) = AsmUqrshl(arg, -33);
7311   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7312   ASSERT_FALSE(IsQcBitSet(fpsr));
7313 
7314   std::tie(res, fpsr) = AsmUqrshl(arg, -32);
7315   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7316   ASSERT_FALSE(IsQcBitSet(fpsr));
7317 
7318   std::tie(res, fpsr) = AsmUqrshl(arg, -31);
7319   ASSERT_EQ(res, MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
7320   ASSERT_FALSE(IsQcBitSet(fpsr));
7321 
7322   std::tie(res, fpsr) = AsmUqrshl(arg, -1);
7323   ASSERT_EQ(res, MakeUInt128(0x00000000241311b4ULL, 0x0000000000000000ULL));
7324   ASSERT_FALSE(IsQcBitSet(fpsr));
7325 
7326   std::tie(res, fpsr) = AsmUqrshl(arg, 0);
7327   ASSERT_EQ(res, MakeUInt128(0x0000000048262367ULL, 0x0000000000000000ULL));
7328   ASSERT_FALSE(IsQcBitSet(fpsr));
7329 
7330   std::tie(res, fpsr) = AsmUqrshl(arg, 1);
7331   ASSERT_EQ(res, MakeUInt128(0x00000000904c46ceULL, 0x0000000000000000ULL));
7332   ASSERT_FALSE(IsQcBitSet(fpsr));
7333 
7334   std::tie(res, fpsr) = AsmUqrshl(arg, 31);
7335   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7336   ASSERT_TRUE(IsQcBitSet(fpsr));
7337 
7338   std::tie(res, fpsr) = AsmUqrshl(arg, 32);
7339   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7340   ASSERT_TRUE(IsQcBitSet(fpsr));
7341 
7342   std::tie(res, fpsr) = AsmUqrshl(arg, 33);
7343   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7344   ASSERT_TRUE(IsQcBitSet(fpsr));
7345 }
7346 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt16x8)7347 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt16x8) {
7348   constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %0.8h, %2.8h, %3.8h");
7349 
7350   __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
7351   __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
7352   auto [res1, fpsr1] = AsmUqrshl(arg1, arg2);
7353   ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0x4ccd000000010000ULL));
7354   ASSERT_FALSE(IsQcBitSet(fpsr1));
7355 
7356   __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
7357   auto [res2, fpsr2] = AsmUqrshl(arg3, arg2);
7358   ASSERT_EQ(res2, MakeUInt128(0xffffffffffff0132ULL, 0x004d000000000000ULL));
7359   ASSERT_TRUE(IsQcBitSet(fpsr2));
7360 }
7361 
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x1)7362 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x1) {
7363   constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %b0, %h2, #4");
7364 
7365   __uint128_t arg1 = MakeUInt128(0x888786614762f943ULL, 0x4140104988899316ULL);
7366   auto [res1, fpsr1] = AsmSqshrn(arg1);
7367   ASSERT_EQ(res1, MakeUInt128(0x94U, 0U));
7368   ASSERT_FALSE(IsQcBitSet(fpsr1));
7369 
7370   __uint128_t arg2 = MakeUInt128(0x0051207678103588ULL, 0x6116602029611936ULL);
7371   auto [res2, fpsr2] = AsmSqshrn(arg2);
7372   ASSERT_EQ(res2, MakeUInt128(0x7fU, 0U));
7373   ASSERT_TRUE(IsQcBitSet(fpsr2));
7374 }
7375 
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8)7376 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8) {
7377   constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %0.8b, %2.8h, #4");
7378 
7379   __uint128_t arg1 = MakeUInt128(0x0625051604340253ULL, 0x0299028602670568ULL);
7380   auto [res1, fpsr1] = AsmSqshrn(arg1);
7381   ASSERT_EQ(res1, MakeUInt128(0x2928265662514325ULL, 0U));
7382   ASSERT_FALSE(IsQcBitSet(fpsr1));
7383 
7384   __uint128_t arg2 = MakeUInt128(0x2405806005642114ULL, 0x9386436864224724ULL);
7385   auto [res2, fpsr2] = AsmSqshrn(arg2);
7386   ASSERT_EQ(res2, MakeUInt128(0x807f7f7f7f80567fULL, 0U));
7387   ASSERT_TRUE(IsQcBitSet(fpsr2));
7388 }
7389 
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8Upper)7390 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8Upper) {
7391   constexpr auto AsmSqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrn2 %0.16b, %2.8h, #4");
7392 
7393   __uint128_t arg1 = MakeUInt128(0x0367034704100536ULL, 0x0175064803000078ULL);
7394   __uint128_t arg2 = MakeUInt128(0x3494819262681110ULL, 0x7399482506073949ULL);
7395   auto [res1, fpsr1] = AsmSqshrn2(arg1, arg2);
7396   ASSERT_EQ(res1, MakeUInt128(0x3494819262681110ULL, 0x1764300736344153ULL));
7397   ASSERT_FALSE(IsQcBitSet(fpsr1));
7398 
7399   __uint128_t arg3 = MakeUInt128(0x4641074501673719ULL, 0x0483109676711344ULL);
7400   auto [res2, fpsr2] = AsmSqshrn2(arg3, arg2);
7401   ASSERT_EQ(res2, MakeUInt128(0x3494819262681110ULL, 0x487f7f7f7f74167fULL));
7402   ASSERT_TRUE(IsQcBitSet(fpsr2));
7403 }
7404 
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x1)7405 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x1) {
7406   constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %b0, %h2, #4");
7407 
7408   __uint128_t arg1 = MakeUInt128(0x6797172898220360ULL, 0x7028806908776866ULL);
7409   auto [res1, fpsr1] = AsmUqshrn(arg1);
7410   ASSERT_EQ(res1, MakeUInt128(0x36U, 0U));
7411   ASSERT_FALSE(IsQcBitSet(fpsr1));
7412 
7413   __uint128_t arg2 = MakeUInt128(0x0593252746378405ULL, 0x3976918480820410ULL);
7414   auto [res2, fpsr2] = AsmUqshrn(arg2);
7415   ASSERT_EQ(res2, MakeUInt128(0xffU, 0U));
7416   ASSERT_TRUE(IsQcBitSet(fpsr2));
7417 }
7418 
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x8)7419 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x8) {
7420   constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %0.8b, %2.8h, #4");
7421 
7422   __uint128_t arg1 = MakeUInt128(0x0867067907600099ULL, 0x0693007509490515ULL);
7423   auto [res1, fpsr1] = AsmUqshrn(arg1);
7424   ASSERT_EQ(res1, MakeUInt128(0x6907945186677609ULL, 0U));
7425   ASSERT_FALSE(IsQcBitSet(fpsr1));
7426 
7427   __uint128_t arg2 = MakeUInt128(0x2736049811890413ULL, 0x0433116627747123ULL);
7428   auto [res2, fpsr2] = AsmUqshrn(arg2);
7429   ASSERT_EQ(res2, MakeUInt128(0x43ffffffff49ff41ULL, 0U));
7430   ASSERT_TRUE(IsQcBitSet(fpsr2));
7431 }
7432 
TEST(Arm64InsnTest,UnignedSaturatingShiftRightNarrowInt16x8Upper)7433 TEST(Arm64InsnTest, UnignedSaturatingShiftRightNarrowInt16x8Upper) {
7434   constexpr auto AsmUqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqshrn2 %0.16b, %2.8h, #4");
7435 
7436   __uint128_t arg1 = MakeUInt128(0x0441018407410768ULL, 0x0981066307240048ULL);
7437   __uint128_t arg2 = MakeUInt128(0x2393582740194493ULL, 0x5665161088463125ULL);
7438   auto [res1, fpsr1] = AsmUqshrn2(arg1, arg2);
7439   ASSERT_EQ(res1, MakeUInt128(0x2393582740194493ULL, 0x9866720444187476ULL));
7440   ASSERT_FALSE(IsQcBitSet(fpsr1));
7441 
7442   __uint128_t arg3 = MakeUInt128(0x0785297709734684ULL, 0x3030614624180358ULL);
7443   auto [res2, fpsr2] = AsmUqshrn2(arg3, arg2);
7444   ASSERT_EQ(res2, MakeUInt128(0x2393582740194493ULL, 0xffffff3578ff97ffULL));
7445   ASSERT_TRUE(IsQcBitSet(fpsr2));
7446 }
7447 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x1)7448 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x1) {
7449   constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %b0, %h2, #4");
7450 
7451   __uint128_t arg1 = MakeUInt128(0x9610330799410534ULL, 0x7784574699992128ULL);
7452   auto [res1, fpsr1] = AsmSqrshrn(arg1);
7453   ASSERT_EQ(res1, MakeUInt128(0x0000000000000053ULL, 0x0000000000000000ULL));
7454   ASSERT_FALSE(IsQcBitSet(fpsr1));
7455 
7456   __uint128_t arg2 = MakeUInt128(0x5999993996122816ULL, 0x1521931488876938ULL);
7457   auto [res2, fpsr2] = AsmSqrshrn(arg2);
7458   ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
7459   ASSERT_TRUE(IsQcBitSet(fpsr2));
7460 
7461   __uint128_t arg3 = MakeUInt128(0x8022281083009986ULL, 0x0165494165426169ULL);
7462   auto [res3, fpsr3] = AsmSqrshrn(arg3);
7463   ASSERT_EQ(res3, MakeUInt128(0x0000000000000080ULL, 0x0000000000000000ULL));
7464   ASSERT_TRUE(IsQcBitSet(fpsr3));
7465 }
7466 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8)7467 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8) {
7468   constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %0.8b, %2.8h, #4");
7469 
7470   __uint128_t arg1 = MakeUInt128(0x0666070401700260ULL, 0x0520059204930759ULL);
7471   auto [res1, fpsr1] = AsmSqrshrn(arg1);
7472   ASSERT_EQ(res1, MakeUInt128(0x5259497666701726ULL, 0x0000000000000000ULL));
7473   ASSERT_FALSE(IsQcBitSet(fpsr1));
7474 
7475   __uint128_t arg2 = MakeUInt128(0x4143408146852981ULL, 0x5053947178900451ULL);
7476   auto [res2, fpsr2] = AsmSqrshrn(arg2);
7477   ASSERT_EQ(res2, MakeUInt128(0x7f807f457f7f7f7fULL, 0x0000000000000000ULL));
7478   ASSERT_TRUE(IsQcBitSet(fpsr2));
7479 }
7480 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8Upper)7481 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
7482   constexpr auto AsmSqrshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrn2 %0.16b, %2.8h, #4");
7483 
7484   __uint128_t arg1 = MakeUInt128(0x0784017103960497ULL, 0x0707072501740336ULL);
7485   __uint128_t arg2 = MakeUInt128(0x5662725928440620ULL, 0x4302141137199227ULL);
7486   auto [res1, fpsr1] = AsmSqrshrn2(arg1, arg2);
7487   ASSERT_EQ(res1, MakeUInt128(0x5662725928440620ULL, 0x7072173378173949ULL));
7488   ASSERT_FALSE(IsQcBitSet(fpsr1));
7489 
7490   __uint128_t arg3 = MakeUInt128(0x2066886512756882ULL, 0x6614973078865701ULL);
7491   __uint128_t arg4 = MakeUInt128(0x5685016918647488ULL, 0x5416791545965072ULL);
7492   auto [res2, fpsr2] = AsmSqrshrn2(arg3, arg4);
7493   ASSERT_EQ(res2, MakeUInt128(0x5685016918647488ULL, 0x7f807f7f7f807f7fULL));
7494   ASSERT_TRUE(IsQcBitSet(fpsr2));
7495 }
7496 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x1)7497 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x1) {
7498   constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %b0, %h2, #4");
7499 
7500   __uint128_t arg1 = MakeUInt128(0x9614236585950920ULL, 0x9083073323356034ULL);
7501   auto [res1, fpsr1] = AsmUqrshrn(arg1);
7502   ASSERT_EQ(res1, MakeUInt128(0x0000000000000092ULL, 0x0000000000000000ULL));
7503   ASSERT_FALSE(IsQcBitSet(fpsr1));
7504 
7505   __uint128_t arg2 = MakeUInt128(0x8465318730299026ULL, 0x6596450137183754ULL);
7506   auto [res2, fpsr2] = AsmUqrshrn(arg2);
7507   ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
7508   ASSERT_TRUE(IsQcBitSet(fpsr2));
7509 }
7510 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8)7511 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8) {
7512   constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %0.8b, %2.8h, #4");
7513 
7514   __uint128_t arg1 = MakeUInt128(0x0301067603860240ULL, 0x0011030402470073ULL);
7515   auto [res1, fpsr1] = AsmUqrshrn(arg1);
7516   ASSERT_EQ(res1, MakeUInt128(0x0130240730673824ULL, 0x0000000000000000ULL));
7517   ASSERT_FALSE(IsQcBitSet(fpsr1));
7518 
7519   __uint128_t arg2 = MakeUInt128(0x5085082872462713ULL, 0x4946368501815469ULL);
7520   auto [res2, fpsr2] = AsmUqrshrn(arg2);
7521   ASSERT_EQ(res2, MakeUInt128(0xffff18ffff83ffffULL, 0x0000000000000000ULL));
7522   ASSERT_TRUE(IsQcBitSet(fpsr2));
7523 }
7524 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper)7525 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
7526   constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqrshrn2 %0.16b, %2.8h, #4");
7527 
7528   __uint128_t arg1 = MakeUInt128(0x0388099005730661ULL, 0x0237022304780112ULL);
7529   __uint128_t arg2 = MakeUInt128(0x0392269110277722ULL, 0x6102544149221576ULL);
7530   auto [res1, fpsr1] = AsmUqrshrn(arg1, arg2);
7531   ASSERT_EQ(res1, MakeUInt128(0x0392269110277722ULL, 0x2322481139995766ULL));
7532   ASSERT_FALSE(IsQcBitSet(fpsr1));
7533 
7534   __uint128_t arg3 = MakeUInt128(0x9254069617600504ULL, 0x7974928060721268ULL);
7535   __uint128_t arg4 = MakeUInt128(0x8414695726397884ULL, 0x2560084531214065ULL);
7536   auto [res2, fpsr2] = AsmUqrshrn(arg3, arg4);
7537   ASSERT_EQ(res2, MakeUInt128(0x8414695726397884ULL, 0xffffffffff69ff50ULL));
7538   ASSERT_TRUE(IsQcBitSet(fpsr2));
7539 }
7540 
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x1)7541 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x1) {
7542   constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %b0, %h2, #4");
7543 
7544   __uint128_t arg1 = MakeUInt128(0x9143611439920063ULL, 0x8005083214098760ULL);
7545   auto [res1, fpsr1] = AsmSqshrun(arg1);
7546   ASSERT_EQ(res1, MakeUInt128(0x06U, 0U));
7547   ASSERT_FALSE(IsQcBitSet(fpsr1));
7548 
7549   __uint128_t arg2 = MakeUInt128(0x3815174571259975ULL, 0x4953580239983146ULL);
7550   auto [res2, fpsr2] = AsmSqshrun(arg2);
7551   ASSERT_EQ(res2, MakeUInt128(0x00U, 0U));
7552   ASSERT_TRUE(IsQcBitSet(fpsr2));
7553 
7554   __uint128_t arg3 = MakeUInt128(0x4599309324851025ULL, 0x1682944672606661ULL);
7555   auto [res3, fpsr3] = AsmSqshrun(arg3);
7556   ASSERT_EQ(res3, MakeUInt128(0xffU, 0U));
7557   ASSERT_TRUE(IsQcBitSet(fpsr3));
7558 }
7559 
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8)7560 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8) {
7561   constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %0.8b, %2.8h, #4");
7562 
7563   __uint128_t arg1 = MakeUInt128(0x0911066408340874ULL, 0x0800074107250670ULL);
7564   auto [res1, fpsr1] = AsmSqshrun(arg1);
7565   ASSERT_EQ(res1, MakeUInt128(0x8074726791668387ULL, 0U));
7566   ASSERT_FALSE(IsQcBitSet(fpsr1));
7567 
7568   __uint128_t arg2 = MakeUInt128(0x4792258319129415ULL, 0x7390809143831384ULL);
7569   auto [res2, fpsr2] = AsmSqshrun(arg2);
7570   ASSERT_EQ(res2, MakeUInt128(0xff00ffffffffff00ULL, 0U));
7571   ASSERT_TRUE(IsQcBitSet(fpsr2));
7572 }
7573 
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper)7574 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper) {
7575   constexpr auto AsmSqshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrun2 %0.16b, %2.8h, #4");
7576 
7577   __uint128_t arg1 = MakeUInt128(0x0625082101740415ULL, 0x0233074903960353ULL);
7578   __uint128_t arg2 = MakeUInt128(0x0136178653673760ULL, 0x6421667781377399ULL);
7579   auto [res1, fpsr1] = AsmSqshrun2(arg1, arg2);
7580   ASSERT_EQ(res1, MakeUInt128(0x0136178653673760ULL, 0x2374393562821741ULL));
7581   ASSERT_FALSE(IsQcBitSet(fpsr1));
7582 
7583   __uint128_t arg3 = MakeUInt128(0x4295810545651083ULL, 0x1046297282937584ULL);
7584   __uint128_t arg4 = MakeUInt128(0x1611625325625165ULL, 0x7249807849209989ULL);
7585   auto [res2, fpsr2] = AsmSqshrun2(arg3, arg4);
7586   ASSERT_EQ(res2, MakeUInt128(0x1611625325625165ULL, 0xffff00ffff00ffffULL));
7587   ASSERT_TRUE(IsQcBitSet(fpsr2));
7588 }
7589 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1)7590 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1) {
7591   constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %b0, %h2, #4");
7592 
7593   __uint128_t arg1 = MakeUInt128(0x5760186946490886ULL, 0x8154528562134698ULL);
7594   auto [res1, fpsr1] = AsmSqrshrun(arg1);
7595   ASSERT_EQ(res1, MakeUInt128(0x88ULL, 0U));
7596   ASSERT_FALSE(IsQcBitSet(fpsr1));
7597 
7598   __uint128_t arg2 = MakeUInt128(0x8355444560249556ULL, 0x6684366029221951ULL);
7599   auto [res2, fpsr2] = AsmSqrshrun(arg2);
7600   ASSERT_EQ(res2, MakeUInt128(0x00ULL, 0U));
7601   ASSERT_TRUE(IsQcBitSet(fpsr2));
7602 
7603   __uint128_t arg3 = MakeUInt128(0x2483091060537720ULL, 0x1980218310103270ULL);
7604   auto [res3, fpsr3] = AsmSqrshrun(arg3);
7605   ASSERT_EQ(res3, MakeUInt128(0xffULL, 0U));
7606   ASSERT_TRUE(IsQcBitSet(fpsr3));
7607 }
7608 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8)7609 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8) {
7610   constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %0.8b, %2.8h, #4");
7611 
7612   __uint128_t arg1 = MakeUInt128(0x0150069001490702ULL, 0x0673033808340550ULL);
7613   auto [res1, fpsr1] = AsmSqrshrun(arg1);
7614   ASSERT_EQ(res1, MakeUInt128(0x6734835515691570ULL, 0U));
7615   ASSERT_FALSE(IsQcBitSet(fpsr1));
7616 
7617   __uint128_t arg2 = MakeUInt128(0x8363660178487710ULL, 0x6080980426924713ULL);
7618   auto [res2, fpsr2] = AsmSqrshrun(arg2);
7619   ASSERT_EQ(res2, MakeUInt128(0xff00ffff00ffffffULL, 0U));
7620   ASSERT_TRUE(IsQcBitSet(fpsr2));
7621 }
7622 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper)7623 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper) {
7624   constexpr auto AsmSqrshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrun2 %0.16b, %2.8h, #4");
7625 
7626   __uint128_t arg1 = MakeUInt128(0x0733049502080757ULL, 0x0651018705990498ULL);
7627   __uint128_t arg2 = MakeUInt128(0x5693795623875551ULL, 0x6175754380917805ULL);
7628   auto [res1, fpsr1] = AsmSqrshrun2(arg1, arg2);
7629   ASSERT_EQ(res1, MakeUInt128(0x5693795623875551ULL, 0x65185a4a73492175ULL));
7630   ASSERT_FALSE(IsQcBitSet(fpsr1));
7631 
7632   __uint128_t arg3 = MakeUInt128(0x1444671298615527ULL, 0x5982014514102756ULL);
7633   __uint128_t arg4 = MakeUInt128(0x0068929750246304ULL, 0x0173514891945763ULL);
7634   auto [res2, fpsr2] = AsmSqrshrun2(arg3, arg4);
7635   ASSERT_EQ(res2, MakeUInt128(0x0068929750246304ULL, 0xff14ffffffff00ffULL));
7636   ASSERT_TRUE(IsQcBitSet(fpsr2));
7637 }
7638 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x1)7639 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x1) {
7640   constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %s0, %s2, #4");
7641 
7642   __uint128_t arg1 = MakeUInt128(0x9704033001862556ULL, 0x1473321177711744ULL);
7643   auto [res1, fpsr1] = AsmSqshlu(arg1);
7644   ASSERT_EQ(res1, MakeUInt128(0x18625560ULL, 0U));
7645   ASSERT_FALSE(IsQcBitSet(fpsr1));
7646 
7647   __uint128_t arg2 = MakeUInt128(0x3095760196946490ULL, 0x8868154528562134ULL);
7648   auto [res2, fpsr2] = AsmSqshlu(arg2);
7649   ASSERT_EQ(res2, MakeUInt128(0x00000000ULL, 0U));
7650   ASSERT_TRUE(IsQcBitSet(fpsr2));
7651 
7652   __uint128_t arg3 = MakeUInt128(0x1335028160884035ULL, 0x1781452541964320ULL);
7653   auto [res3, fpsr3] = AsmSqshlu(arg3);
7654   ASSERT_EQ(res3, MakeUInt128(0xffffffffULL, 0U));
7655   ASSERT_TRUE(IsQcBitSet(fpsr3));
7656 }
7657 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x4)7658 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x4) {
7659   constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %0.4s, %2.4s, #4");
7660 
7661   __uint128_t arg1 = MakeUInt128(0x0865174507877133ULL, 0x0813875205980941ULL);
7662   auto [res1, fpsr1] = AsmSqshlu(arg1);
7663   ASSERT_EQ(res1, MakeUInt128(0x8651745078771330ULL, 0x8138752059809410ULL));
7664   ASSERT_FALSE(IsQcBitSet(fpsr1));
7665 
7666   __uint128_t arg2 = MakeUInt128(0x2174227300352296ULL, 0x0080891797050682ULL);
7667   auto [res2, fpsr2] = AsmSqshlu(arg2);
7668   ASSERT_EQ(res2, MakeUInt128(0xffffffff03522960ULL, 0x0808917000000000ULL));
7669   ASSERT_TRUE(IsQcBitSet(fpsr2));
7670 }
7671 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x2)7672 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x2) {
7673   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.2s");
7674 
7675   __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7676   __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7677   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7678   ASSERT_EQ(res1, MakeUInt128(0x0000000000000010ULL, 0x000000000000000cULL));
7679   ASSERT_FALSE(IsQcBitSet(fpsr1));
7680 
7681   __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7682   __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0xfeed00040000002ULL);
7683   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7684   ASSERT_EQ(res2, MakeUInt128(0x0000000000000010ULL, 0x7fffffffffffffffULL));
7685   ASSERT_TRUE(IsQcBitSet(fpsr2));
7686 }
7687 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong16x4)7688 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong16x4) {
7689   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.4h");
7690 
7691   __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7692   __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7693   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7694   ASSERT_EQ(res1, MakeUInt128(0x0000f00000000010ULL, 0x000000400000000cULL));
7695   ASSERT_FALSE(IsQcBitSet(fpsr1));
7696 
7697   __uint128_t arg3 = MakeUInt128(0x8000000200f00004ULL, 0xfeedfeedfeedfeedULL);
7698   __uint128_t arg4 = MakeUInt128(0x8000000300800002ULL, 0xabcd0123ffff4567ULL);
7699   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7700   ASSERT_EQ(res2, MakeUInt128(0x0000f00000000010ULL, 0x7fffffff0000000cULL));
7701   ASSERT_TRUE(IsQcBitSet(fpsr2));
7702 }
7703 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x2)7704 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x2) {
7705   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.4s");
7706 
7707   __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7708   __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7709   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7710   ASSERT_EQ(res1, MakeUInt128(0x0000000800000040ULL, 0xffddc4ed7f98e000ULL));
7711   ASSERT_FALSE(IsQcBitSet(fpsr1));
7712 
7713   __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7714   __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7715   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7716   ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffffffffffffULL));
7717   ASSERT_TRUE(IsQcBitSet(fpsr2));
7718 }
7719 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper16x4)7720 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper16x4) {
7721   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.8h");
7722 
7723   __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7724   __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7725   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7726   ASSERT_EQ(res1, MakeUInt128(0x00000226ff6ae4b6ULL, 0x00b4e592fffd8eceULL));
7727   ASSERT_FALSE(IsQcBitSet(fpsr1));
7728 
7729   __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7730   __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7731   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7732   ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffff00000000ULL));
7733   ASSERT_TRUE(IsQcBitSet(fpsr2));
7734 }
7735 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x2IndexedElem)7736 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x2IndexedElem) {
7737   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.s[1]");
7738 
7739   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7740   __uint128_t arg2 = MakeUInt128(0x0000000200000000ULL, 0x000000000000000ULL);
7741   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7742   ASSERT_EQ(res1, MakeUInt128(0x000000004488cd10ULL, 0x0000000000880088ULL));
7743   ASSERT_FALSE(IsQcBitSet(fpsr1));
7744 
7745   __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400110011LL);
7746   __uint128_t arg4 = MakeUInt128(0x8000000000000000ULL, 0x000000000000000ULL);
7747   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7748   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xffddffde00000000ULL));
7749   ASSERT_TRUE(IsQcBitSet(fpsr2));
7750 }
7751 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x4IndexedElem)7752 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x4IndexedElem) {
7753   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.h[4]");
7754 
7755   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7756   __uint128_t arg2 = MakeUInt128(0x000f000f000f000fULL, 0x000f000f000f0002ULL);
7757   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7758   ASSERT_EQ(res1, MakeUInt128(0x000044880000cd10ULL, 0x0000008800000088ULL));
7759   ASSERT_FALSE(IsQcBitSet(fpsr1));
7760 
7761   __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400118000ULL);
7762   __uint128_t arg4 = MakeUInt128(0x1111111122222222ULL, 0x1122334411228000ULL);
7763   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7764   ASSERT_EQ(res2, MakeUInt128(0x7fffffff00000000ULL, 0xffde0000ffde0000ULL));
7765   ASSERT_TRUE(IsQcBitSet(fpsr2));
7766 }
7767 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem)7768 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem) {
7769   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.s[3]");
7770 
7771   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7772   __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x00000002ffffffffULL);
7773   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7774   ASSERT_EQ(res1, MakeUInt128(0x0000000000440044ULL, 0x000000004488cd10ULL));
7775   ASSERT_FALSE(IsQcBitSet(fpsr1));
7776 
7777   __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x1122334480000000ULL);
7778   __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x80000000ffffffffULL);
7779   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7780   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xeeddccbc00000000ULL));
7781   ASSERT_TRUE(IsQcBitSet(fpsr2));
7782 }
7783 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem)7784 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem) {
7785   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.h[7]");
7786 
7787   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7788   __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x0002ffffffffffffULL);
7789   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7790   ASSERT_EQ(res1, MakeUInt128(0x0000004400000044ULL, 0x000044880000cd10ULL));
7791   ASSERT_FALSE(IsQcBitSet(fpsr1));
7792 
7793   __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x112233448000ffffULL);
7794   __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x8000ffffffffffffULL);
7795   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7796   ASSERT_EQ(res2, MakeUInt128(0x7fffffff00010000ULL, 0xeede0000ccbc0000ULL));
7797 }
7798 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1)7799 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1) {
7800   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %s3");
7801   __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7802   __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7803   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7804   ASSERT_EQ(res1, MakeUInt128(0x0222244440000000ULL, 0x0000000000000000ULL));
7805   ASSERT_FALSE(IsQcBitSet(fpsr1));
7806 
7807   __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
7808   __uint128_t arg4 = MakeUInt128(0xff11ff1180000000ULL, 0xffffffff11223344ULL);
7809   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7810   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7811   ASSERT_TRUE(IsQcBitSet(fpsr2));
7812 }
7813 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1)7814 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1) {
7815   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %h3");
7816   __uint128_t arg1 = MakeUInt128(0x1111111811112222ULL, 0xf000000700080006ULL);
7817   __uint128_t arg2 = MakeUInt128(0x0000000510004444ULL, 0xf000000300080002ULL);
7818   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7819   ASSERT_EQ(res1, MakeUInt128(0x0000000012343210ULL, 0x0000000000000000ULL));
7820   ASSERT_FALSE(IsQcBitSet(fpsr1));
7821 
7822   __uint128_t arg3 = MakeUInt128(0xaabbccdd00008000ULL, 0x1122334400110011ULL);
7823   __uint128_t arg4 = MakeUInt128(0xff11ff1100008000ULL, 0xffffffff11223344ULL);
7824   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7825   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7826   ASSERT_TRUE(IsQcBitSet(fpsr2));
7827 }
7828 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1IndexedElem)7829 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1IndexedElem) {
7830   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %3.h[7]");
7831   __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7832   __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x1111000300000002ULL);
7833   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7834   ASSERT_EQ(res1, MakeUInt128(0x00000000048d0c84ULL, 0x0000000000000000ULL));
7835   ASSERT_FALSE(IsQcBitSet(fpsr1));
7836 
7837   __uint128_t arg3 = MakeUInt128(0xaabbccddaabb8000ULL, 0x1122334400110011ULL);
7838   __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000aabb11223344ULL);
7839   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7840   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7841   ASSERT_TRUE(IsQcBitSet(fpsr2));
7842 }
7843 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1IndexedElem)7844 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1IndexedElem) {
7845   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %3.s[3]");
7846   __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7847   __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7848   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7849   ASSERT_EQ(res1, MakeUInt128(0x000000006666ccccULL, 0x0000000000000000ULL));
7850   ASSERT_FALSE(IsQcBitSet(fpsr1));
7851 
7852   __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
7853   __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000000011223344ULL);
7854   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7855   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7856   ASSERT_TRUE(IsQcBitSet(fpsr2));
7857 }
7858 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x2)7859 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x2) {
7860   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.2s");
7861 
7862   // No saturation.
7863   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7864   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7865   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7866   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7867   ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
7868   ASSERT_FALSE(IsQcBitSet(fpsr1));
7869 
7870   // Saturates in the multiplication.
7871   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7872   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
7873   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7874   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7875   ASSERT_EQ(res2, MakeUInt128(0x0000080000000910ULL, 0x7fffffffffffffffULL));
7876   ASSERT_TRUE(IsQcBitSet(fpsr2));
7877 
7878   // Saturates in the addition.
7879   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7880   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7881   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7882   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7883   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
7884   ASSERT_TRUE(IsQcBitSet(fpsr3));
7885 }
7886 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong16x4)7887 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong16x4) {
7888   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.4h");
7889 
7890   // No saturation.
7891   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7892   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7893   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7894   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7895   ASSERT_EQ(res1, MakeUInt128(0x0100010001011100ULL, 0x03f0040004024600ULL));
7896   ASSERT_FALSE(IsQcBitSet(fpsr1));
7897 
7898   // Saturates in the multiplication.
7899   __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7900   __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
7901   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7902   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7903   ASSERT_EQ(res2, MakeUInt128(0x0369cba90369cba9ULL, 0x7fffffff0369cba9ULL));
7904   ASSERT_TRUE(IsQcBitSet(fpsr2));
7905 
7906   // Saturates in the addition.
7907   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7908   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7909   __uint128_t arg9 = MakeUInt128(0x7fffffff12345678ULL, 0x00000a000000b000ULL);
7910   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7911   ASSERT_EQ(res3, MakeUInt128(0x7fffffff12356678ULL, 0x00000a0000013800ULL));
7912   ASSERT_TRUE(IsQcBitSet(fpsr3));
7913 }
7914 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x2)7915 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x2) {
7916   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.4s");
7917 
7918   // No saturation.
7919   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7920   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7921   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7922   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7923   ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
7924   ASSERT_FALSE(IsQcBitSet(fpsr1));
7925 
7926   // Saturates in the multiplication.
7927   __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
7928   __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
7929   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
7930   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7931   ASSERT_EQ(res2, MakeUInt128(0x00024a0066000d00ULL, 0x7fffffffffffffffULL));
7932   ASSERT_TRUE(IsQcBitSet(fpsr2));
7933 
7934   // Saturates in the addition.
7935   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7936   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
7937   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffffffffffffULL);
7938   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7939   ASSERT_EQ(res3, MakeUInt128(0x13419a0a7d513f58ULL, 0x7fffffffffffffffULL));
7940   ASSERT_TRUE(IsQcBitSet(fpsr3));
7941 }
7942 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper16x4)7943 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper16x4) {
7944   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.8h");
7945 
7946   // No saturation.
7947   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
7948   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
7949   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
7950   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7951   ASSERT_EQ(res1, MakeUInt128(0x020d03f81c24e9e0ULL, 0x050d06f81f24ece0ULL));
7952   ASSERT_FALSE(IsQcBitSet(fpsr1));
7953 
7954   // Saturates in the multiplication.
7955   __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7956   __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
7957   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
7958   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7959   ASSERT_EQ(res2, MakeUInt128(0x03b9fa8703b9fa87ULL, 0x7fffffff03b9fa87ULL));
7960   ASSERT_TRUE(IsQcBitSet(fpsr2));
7961 
7962   // Saturates in the addition.
7963   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
7964   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
7965   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffff0000b000ULL);
7966   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7967   ASSERT_EQ(res3, MakeUInt128(0x134159702d593f58ULL, 0x7fffffff1b2598e0ULL));
7968   ASSERT_TRUE(IsQcBitSet(fpsr3));
7969 }
7970 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1)7971 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1) {
7972   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %s3");
7973 
7974   // No saturation.
7975   __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
7976   __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
7977   __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
7978   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
7979   ASSERT_EQ(res1, MakeUInt128(0x167ce349000000ffULL, 0x0000000000000000ULL));
7980   ASSERT_FALSE(IsQcBitSet(fpsr1));
7981 
7982   // Saturates in the multiplication.
7983   __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
7984   __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
7985   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
7986   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
7987   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7988   ASSERT_TRUE(IsQcBitSet(fpsr2));
7989 
7990   // Saturates in the addition.
7991   __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
7992   __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
7993   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
7994   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
7995   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7996   ASSERT_TRUE(IsQcBitSet(fpsr3));
7997 }
7998 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1)7999 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1) {
8000   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %h3");
8001 
8002   // No saturation.
8003   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8004   __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
8005   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8006   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8007   ASSERT_EQ(res1, MakeUInt128(0x0000000001011100ULL, 0x0000000000000000ULL));
8008   ASSERT_FALSE(IsQcBitSet(fpsr1));
8009 
8010   // Saturates in the multiplication.
8011   __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
8012   __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
8013   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8014   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8015   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8016   ASSERT_TRUE(IsQcBitSet(fpsr2));
8017 
8018   // Saturates in the addition.
8019   __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
8020   __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
8021   __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x00000a000000b000ULL);
8022   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8023   ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8024   ASSERT_TRUE(IsQcBitSet(fpsr3));
8025 }
8026 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem)8027 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem) {
8028   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.s[1]");
8029 
8030   // No saturation.
8031   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8032   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8033   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8034   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8035   ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
8036   ASSERT_FALSE(IsQcBitSet(fpsr1));
8037 
8038   // Saturates in the multiplication.
8039   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8040   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8041   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8042   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8043   ASSERT_EQ(res2, MakeUInt128(0x000007fc00000900ULL, 0x7fffffffffffffffULL));
8044   ASSERT_TRUE(IsQcBitSet(fpsr2));
8045 
8046   // Saturates in the addition.
8047   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8048   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8049   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8050   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8051   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
8052   ASSERT_TRUE(IsQcBitSet(fpsr3));
8053 }
8054 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem)8055 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem) {
8056   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.h[7]");
8057 
8058   // No saturation.
8059   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8060   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8061   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8062   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8063   ASSERT_EQ(res1, MakeUInt128(0x012eb10b89bbca1fULL, 0xfedf0524765b0d28ULL));
8064   ASSERT_FALSE(IsQcBitSet(fpsr1));
8065 
8066   // Saturates in the multiplication.
8067   __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
8068   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8069   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8070   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8071   ASSERT_EQ(res2, MakeUInt128(0xbbbc4567777f4567ULL, 0x7fffffff00004567ULL));
8072   ASSERT_TRUE(IsQcBitSet(fpsr2));
8073 
8074   // Saturates in the addition.
8075   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8076   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8077   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8078   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8079   ASSERT_EQ(res3, MakeUInt128(0x7fffffff004d4bffULL, 0x0026b00000275600ULL));
8080   ASSERT_TRUE(IsQcBitSet(fpsr3));
8081 }
8082 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem)8083 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem) {
8084   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.s[3]");
8085 
8086   // No saturation.
8087   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8088   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8089   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8090   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8091   ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
8092   ASSERT_FALSE(IsQcBitSet(fpsr1));
8093 
8094   // Saturates in the multiplication.
8095   __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
8096   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
8097   __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
8098   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8099   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xf1e0cfbf04040404ULL));
8100   ASSERT_TRUE(IsQcBitSet(fpsr2));
8101 
8102   // Saturates in the addition.
8103   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8104   __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
8105   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8106   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8107   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x010d4d926b1d98e0ULL));
8108   ASSERT_TRUE(IsQcBitSet(fpsr3));
8109 }
8110 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem)8111 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem) {
8112   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.h[7]");
8113 
8114   // No saturation.
8115   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8116   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8117   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8118   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8119   ASSERT_EQ(res1, MakeUInt128(0x0230485f8a1d9e4fULL, 0xffe9bd9076c60270ULL));
8120   ASSERT_FALSE(IsQcBitSet(fpsr1));
8121 
8122   // Saturates in the multiplication.
8123   __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
8124   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8125   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8126   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8127   ASSERT_EQ(res2, MakeUInt128(0x023645677fffffffULL, 0x0236456702364567ULL));
8128   ASSERT_TRUE(IsQcBitSet(fpsr2));
8129 
8130   // Saturates in the addition.
8131   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8132   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8133   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8134   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8135   ASSERT_EQ(res3, MakeUInt128(0x7fffffff0071d05fULL, 0x010d0cf800728060ULL));
8136   ASSERT_TRUE(IsQcBitSet(fpsr3));
8137 }
8138 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem)8139 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem) {
8140   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %3.s[3]");
8141 
8142   // No saturation.
8143   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8144   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8145   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8146   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8147   ASSERT_EQ(res1, MakeUInt128(0x012eb3d4d07fc65fULL, 0x0000000000000000ULL));
8148   ASSERT_FALSE(IsQcBitSet(fpsr1));
8149 
8150   // Saturates in the multiplication.
8151   __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
8152   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
8153   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8154   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8155   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8156   ASSERT_TRUE(IsQcBitSet(fpsr2));
8157 
8158   // Saturates in the addition.
8159   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8160   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8161   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8162   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8163   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8164   ASSERT_TRUE(IsQcBitSet(fpsr3));
8165 }
8166 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem)8167 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem) {
8168   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %3.h[7]");
8169 
8170   // No saturation.
8171   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8172   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8173   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8174   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8175   ASSERT_EQ(res1, MakeUInt128(0x0000000089bbca1fULL, 0x0000000000000000ULL));
8176   ASSERT_FALSE(IsQcBitSet(fpsr1));
8177 
8178   // Saturates in the multiplication.
8179   __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
8180   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8181   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8182   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8183   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8184   ASSERT_TRUE(IsQcBitSet(fpsr2));
8185 
8186   // Saturates in the addition.
8187   __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
8188   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
8189   __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x0011223344556677ULL);
8190   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8191   ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8192   ASSERT_TRUE(IsQcBitSet(fpsr3));
8193 }
8194 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x2)8195 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x2) {
8196   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.2s");
8197 
8198   // No saturation.
8199   __uint128_t arg1 = MakeUInt128(0x0000000080000001ULL, 0x7654321076543210ULL);
8200   __uint128_t arg2 = MakeUInt128(0x0000000100000004ULL, 0x0123456701234567ULL);
8201   __uint128_t arg3 = MakeUInt128(0x0000100000000001ULL, 0x0400040004000400ULL);
8202   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8203   ASSERT_EQ(res1, MakeUInt128(0x00001003fffffff9ULL, 0x0400040004000400ULL));
8204   ASSERT_FALSE(IsQcBitSet(fpsr1));
8205 
8206   // Saturates in the multiplication.
8207   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8208   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8209   __uint128_t arg6 = MakeUInt128(0x0000000000000900ULL, 0x00000a000000b000ULL);
8210   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8211   ASSERT_EQ(res2, MakeUInt128(0x00000000000008f0ULL, 0x80000a000000b001ULL));
8212   ASSERT_TRUE(IsQcBitSet(fpsr2));
8213 
8214   // Saturates in the subtraction.
8215   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8216   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8217   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8218   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8219   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
8220   ASSERT_TRUE(IsQcBitSet(fpsr3));
8221 }
8222 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong16x4)8223 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong16x4) {
8224   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.4h");
8225 
8226   // No saturation.
8227   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8228   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8229   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8230   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8231   ASSERT_EQ(res1, MakeUInt128(0x0100010000fef100ULL, 0x0410040003fdc200ULL));
8232   ASSERT_FALSE(IsQcBitSet(fpsr1));
8233 
8234   // Saturates in the multiplication.
8235   __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8236   __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8237   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8238   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8239   ASSERT_EQ(res2, MakeUInt128(0xfedcbf25fedcbf25ULL, 0x81234568fedcbf25ULL));
8240   ASSERT_TRUE(IsQcBitSet(fpsr2));
8241 
8242   // Saturates in the subtraction.
8243   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8244   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8245   __uint128_t arg9 = MakeUInt128(0x8000000012345678ULL, 0x00000a000000b000ULL);
8246   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8247   ASSERT_EQ(res3, MakeUInt128(0x8000000012334678ULL, 0x00000a0000002800ULL));
8248   ASSERT_TRUE(IsQcBitSet(fpsr3));
8249 }
8250 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x2)8251 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x2) {
8252   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.4s");
8253 
8254   // No saturation.
8255   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8256   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8257   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8258   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8259   ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
8260   ASSERT_FALSE(IsQcBitSet(fpsr1));
8261 
8262   // Saturates in the multiplication.
8263   __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
8264   __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
8265   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8266   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8267   ASSERT_EQ(res2, MakeUInt128(0xfffdc5ff9a000500ULL, 0x80000a000000b001ULL));
8268   ASSERT_TRUE(IsQcBitSet(fpsr2));
8269 
8270   // Saturates in the subtraction.
8271   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8272   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8273   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x8000000000000000ULL);
8274   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8275   ASSERT_EQ(res3, MakeUInt128(0x112712e5a7176d98ULL, 0x8000000000000000ULL));
8276   ASSERT_TRUE(IsQcBitSet(fpsr3));
8277 }
8278 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper16x4)8279 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper16x4) {
8280   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.8h");
8281 
8282   // No saturation.
8283   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8284   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8285   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8286   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8287   ASSERT_EQ(res1, MakeUInt128(0xfff2fe08e5db1820ULL, 0x02f30108e8db1b20ULL));
8288   ASSERT_FALSE(IsQcBitSet(fpsr1));
8289 
8290   // Saturates in the multiplication.
8291   __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8292   __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8293   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8294   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8295   ASSERT_EQ(res2, MakeUInt128(0xfe8c9047fe8c9047ULL, 0x81234568fe8c9047ULL));
8296   ASSERT_TRUE(IsQcBitSet(fpsr2));
8297 
8298   // Saturates in the subtraction.
8299   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8300   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8301   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x800000000000b000ULL);
8302   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8303   ASSERT_EQ(res3, MakeUInt128(0x11275380f70f6d98ULL, 0x80000000e4dbc720ULL));
8304   ASSERT_TRUE(IsQcBitSet(fpsr3));
8305 }
8306 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1)8307 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1) {
8308   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %s3");
8309 
8310   // No saturation.
8311   __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
8312   __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
8313   __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
8314   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8315   ASSERT_EQ(res1, MakeUInt128(0x0debc9a7000000ffULL, 0x0000000000000000ULL));
8316   ASSERT_FALSE(IsQcBitSet(fpsr1));
8317 
8318   // Saturates in the multiplication.
8319   __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
8320   __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
8321   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8322   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8323   ASSERT_EQ(res2, MakeUInt128(0x9122334411111112ULL, 0x0000000000000000ULL));
8324   ASSERT_TRUE(IsQcBitSet(fpsr2));
8325 
8326   // Saturates in the subtraction.
8327   __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
8328   __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
8329   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8330   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8331   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
8332   ASSERT_TRUE(IsQcBitSet(fpsr3));
8333 }
8334 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1)8335 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1) {
8336   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %h3");
8337 
8338   // No saturation.
8339   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8340   __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
8341   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8342   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8343   ASSERT_EQ(res1, MakeUInt128(0x0000000000fef100ULL, 0x0000000000000000ULL));
8344   ASSERT_FALSE(IsQcBitSet(fpsr1));
8345 
8346   // Saturates in the multiplication.
8347   __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
8348   __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
8349   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8350   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8351   ASSERT_EQ(res2, MakeUInt128(0x0000000091111112ULL, 0x0000000000000000ULL));
8352   ASSERT_TRUE(IsQcBitSet(fpsr2));
8353 
8354   // Saturates in the subtraction.
8355   __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
8356   __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
8357   __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x00000a000000b000ULL);
8358   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8359   ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL));
8360   ASSERT_TRUE(IsQcBitSet(fpsr3));
8361 }
8362 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem)8363 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem) {
8364   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.s[1]");
8365 
8366   // No saturation.
8367   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8368   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8369   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8370   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8371   ASSERT_EQ(res1, MakeUInt128(0x010000fef0fef100ULL, 0x040003ff7bff7c00ULL));
8372   ASSERT_FALSE(IsQcBitSet(fpsr1));
8373 
8374   // Saturates in the multiplication.
8375   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8376   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8377   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8378   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8379   ASSERT_EQ(res2, MakeUInt128(0x0000080400000900ULL, 0x80000a000000b001ULL));
8380   ASSERT_TRUE(IsQcBitSet(fpsr2));
8381 
8382   // Saturates in the subtraction.
8383   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8384   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8385   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8386   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8387   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
8388   ASSERT_TRUE(IsQcBitSet(fpsr3));
8389 }
8390 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem)8391 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem) {
8392   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.h[7]");
8393 
8394   // No saturation.
8395   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8396   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8397   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8398   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8399   ASSERT_EQ(res1, MakeUInt128(0x0117d9c3899bd1bfULL, 0xfeda700c764d56f8ULL));
8400   ASSERT_FALSE(IsQcBitSet(fpsr1));
8401 
8402   // Saturates in the multiplication.
8403   __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
8404   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8405   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8406   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8407   ASSERT_EQ(res2, MakeUInt128(0x468a45678ac74567ULL, 0x8123456802464567ULL));
8408   ASSERT_TRUE(IsQcBitSet(fpsr2));
8409 
8410   // Saturates in the subtraction.
8411   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8412   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8413   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8414   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8415   ASSERT_EQ(res3, MakeUInt128(0x80000000ffb2b400ULL, 0xffd96400ffda0a00ULL));
8416   ASSERT_TRUE(IsQcBitSet(fpsr3));
8417 }
8418 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem)8419 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem) {
8420   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.s[3]");
8421 
8422   // No saturation.
8423   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8424   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8425   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8426   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8427   ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
8428   ASSERT_FALSE(IsQcBitSet(fpsr1));
8429 
8430   // Saturates in the multiplication.
8431   __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
8432   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
8433   __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
8434   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8435   ASSERT_EQ(res2, MakeUInt128(0x8101010102020203ULL, 0x1425364704040404ULL));
8436   ASSERT_TRUE(IsQcBitSet(fpsr2));
8437 
8438   // Saturates in the subtraction.
8439   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8440   __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
8441   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8442   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8443   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0xfef2c66d94e3c720ULL));
8444   ASSERT_TRUE(IsQcBitSet(fpsr3));
8445 }
8446 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem)8447 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem) {
8448   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.h[7]");
8449 
8450   // No saturation.
8451   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8452   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8453   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8454   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8455   ASSERT_EQ(res1, MakeUInt128(0x0016426f8939fd8fULL, 0xfdcfb7a075e261b0ULL));
8456   ASSERT_FALSE(IsQcBitSet(fpsr1));
8457 
8458   // Saturates in the multiplication.
8459   __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
8460   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8461   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8462   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8463   ASSERT_EQ(res2, MakeUInt128(0x0010456781234568ULL, 0x0010456700104567ULL));
8464   ASSERT_TRUE(IsQcBitSet(fpsr2));
8465 
8466   // Saturates in the subtraction.
8467   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8468   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8469   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8470   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8471   ASSERT_EQ(res3, MakeUInt128(0x80000000ff8e2fa0ULL, 0xfef30708ff8edfa0ULL));
8472   ASSERT_TRUE(IsQcBitSet(fpsr3));
8473 }
8474 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem)8475 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem) {
8476   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %3.s[3]");
8477 
8478   // No saturation.
8479   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8480   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8481   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8482   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8483   ASSERT_EQ(res1, MakeUInt128(0x0117d6fa42d7d57fULL, 0x0ULL));
8484   ASSERT_FALSE(IsQcBitSet(fpsr1));
8485 
8486   // Saturates in the multiplication.
8487   __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
8488   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
8489   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8490   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8491   ASSERT_EQ(res2, MakeUInt128(0x8123456701234568ULL, 0x0ULL));
8492   ASSERT_TRUE(IsQcBitSet(fpsr2));
8493 
8494   // Saturates in the subtraction.
8495   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8496   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8497   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8498   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8499   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0ULL));
8500   ASSERT_TRUE(IsQcBitSet(fpsr3));
8501 }
8502 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem)8503 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem) {
8504   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %3.h[7]");
8505 
8506   // No saturation.
8507   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8508   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8509   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8510   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8511   ASSERT_EQ(res1, MakeUInt128(0x00000000899bd1bfULL, 0x0ULL));
8512   ASSERT_FALSE(IsQcBitSet(fpsr1));
8513 
8514   // Saturates in the multiplication.
8515   __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
8516   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8517   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8518   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8519   ASSERT_EQ(res2, MakeUInt128(0x0000000081234568ULL, 0x0ULL));
8520   ASSERT_TRUE(IsQcBitSet(fpsr2));
8521 
8522   // Saturates in the subtraction.
8523   __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
8524   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
8525   __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x0011223344556677ULL);
8526   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8527   ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0ULL));
8528   ASSERT_TRUE(IsQcBitSet(fpsr3));
8529 }
8530 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4)8531 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4) {
8532   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.4s");
8533 
8534   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8535   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
8536   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8537   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed9UL, 0x0UL));
8538   ASSERT_FALSE(IsQcBitSet(fpsr1));
8539 
8540   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8541   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
8542   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8543   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed2UL, 0x0UL));
8544   ASSERT_TRUE(IsQcBitSet(fpsr2));
8545 }
8546 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2)8547 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2) {
8548   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.2s");
8549 
8550   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8551   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8552   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8553   ASSERT_EQ(res1, MakeU32x4(0x3, 0x0UL, 0x0UL, 0x0UL));
8554   ASSERT_FALSE(IsQcBitSet(fpsr1));
8555 
8556   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8557   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8558   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8559   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8560   ASSERT_TRUE(IsQcBitSet(fpsr2));
8561 }
8562 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8)8563 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8) {
8564   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.8h");
8565 
8566   __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
8567   __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
8568   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8569   ASSERT_EQ(res1, MakeUInt128(0x0002000040000000ULL, 0x7ddc000000000000ULL));
8570   ASSERT_FALSE(IsQcBitSet(fpsr1));
8571 
8572   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
8573   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
8574   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8575   ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0002000000000000ULL));
8576   ASSERT_TRUE(IsQcBitSet(fpsr2));
8577 }
8578 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4)8579 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4) {
8580   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.4h");
8581 
8582   __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
8583   __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
8584   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8585   ASSERT_EQ(res1, MakeUInt128(0x0003000040000000ULL, 0x0000000000000000ULL));
8586   ASSERT_FALSE(IsQcBitSet(fpsr1));
8587 
8588   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
8589   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
8590   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8591   ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0000000000000000ULL));
8592   ASSERT_TRUE(IsQcBitSet(fpsr2));
8593 }
8594 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem)8595 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem) {
8596   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.s[0]");
8597 
8598   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003, 0x00000010UL);
8599   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeed, 0xfeedfeedUL);
8600   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8601   // Without rounding, result should be 7 instead of 8.
8602   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x8UL, 0x0UL));
8603   ASSERT_FALSE(IsQcBitSet(fpsr1));
8604 
8605   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8606   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8607   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8608   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8609   ASSERT_TRUE(IsQcBitSet(fpsr2));
8610 }
8611 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem)8612 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem) {
8613   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.s[0]");
8614 
8615   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8616   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8617   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8618   ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8619   ASSERT_FALSE(IsQcBitSet(fpsr1));
8620 
8621   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8622   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8623   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8624   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8625   ASSERT_TRUE(IsQcBitSet(fpsr2));
8626 }
8627 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem)8628 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem) {
8629   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.h[7]");
8630 
8631   __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8632   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8633   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8634   ASSERT_EQ(res1, MakeUInt128(0x0008fff800040000ULL, 0x0000000800020004ULL));
8635   ASSERT_FALSE(IsQcBitSet(fpsr1));
8636 
8637   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8638   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8639   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8640   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8641   ASSERT_TRUE(IsQcBitSet(fpsr2));
8642 }
8643 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem)8644 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem) {
8645   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.h[7]");
8646 
8647   __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8648   __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8649   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8650   ASSERT_EQ(res1, MakeUInt128(0x0004fffc00030000ULL, 0x0000000000000000ULL));
8651   ASSERT_FALSE(IsQcBitSet(fpsr1));
8652 
8653   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8654   __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8655   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8656   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
8657   ASSERT_TRUE(IsQcBitSet(fpsr2));
8658 }
8659 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1)8660 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1) {
8661   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %s3");
8662 
8663   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8664   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8665   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8666   // Without roundings, result should be 2 instead of 3.
8667   ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8668   ASSERT_FALSE(IsQcBitSet(fpsr1));
8669 
8670   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8671   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8672   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8673   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8674   ASSERT_TRUE(IsQcBitSet(fpsr2));
8675 }
8676 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1)8677 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1) {
8678   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %h3");
8679 
8680   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8681   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
8682   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8683   ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8684   ASSERT_FALSE(IsQcBitSet(fpsr1));
8685 
8686   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8687   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8688   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8689   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8690   ASSERT_TRUE(IsQcBitSet(fpsr2));
8691 }
8692 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem)8693 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem) {
8694   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %3.s[2]");
8695 
8696   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8697   __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
8698   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8699   // Without rounding, result should be 2 instead of 3.
8700   ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8701   ASSERT_FALSE(IsQcBitSet(fpsr1));
8702 
8703   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8704   __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
8705   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8706   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8707   ASSERT_TRUE(IsQcBitSet(fpsr2));
8708 }
8709 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem)8710 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem) {
8711   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %3.h[7]");
8712 
8713   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8714   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
8715   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8716   // Without rounding, result should be 2 instead of 3.
8717   ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8718   ASSERT_FALSE(IsQcBitSet(fpsr1));
8719 
8720   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8721   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8722   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8723   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8724   ASSERT_TRUE(IsQcBitSet(fpsr2));
8725 }
8726 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4)8727 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4) {
8728   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.4s");
8729 
8730   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8731   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
8732   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8733   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed8UL, 0x0UL));
8734   ASSERT_FALSE(IsQcBitSet(fpsr1));
8735 
8736   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8737   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
8738   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8739   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed1UL, 0x0UL));
8740   ASSERT_TRUE(IsQcBitSet(fpsr2));
8741 }
8742 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2)8743 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2) {
8744   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.2s");
8745 
8746   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8747   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8748   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8749   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8750   ASSERT_FALSE(IsQcBitSet(fpsr1));
8751 
8752   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8753   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8754   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8755   ASSERT_EQ(res2, MakeU32x4(0x7fffffff, 0x0UL, 0x0UL, 0x0UL));
8756   ASSERT_TRUE(IsQcBitSet(fpsr2));
8757 }
8758 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8)8759 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8) {
8760   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.8h");
8761 
8762   __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
8763   __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
8764   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8765   ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x7ddc000000000000ULL));
8766   ASSERT_FALSE(IsQcBitSet(fpsr1));
8767 
8768   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
8769   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
8770   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8771   ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0002000000000000ULL));
8772   ASSERT_TRUE(IsQcBitSet(fpsr2));
8773 }
8774 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4)8775 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4) {
8776   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.4h");
8777 
8778   __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
8779   __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
8780   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8781   ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x0000000000000000ULL));
8782   ASSERT_FALSE(IsQcBitSet(fpsr1));
8783 
8784   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
8785   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
8786   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8787   ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0000000000000000ULL));
8788   ASSERT_TRUE(IsQcBitSet(fpsr2));
8789 }
8790 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem)8791 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem) {
8792   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.s[0]");
8793 
8794   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8795   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8796   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8797   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7UL, 0x0UL));
8798   ASSERT_FALSE(IsQcBitSet(fpsr1));
8799 
8800   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8801   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8802   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8803   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8804   ASSERT_TRUE(IsQcBitSet(fpsr2));
8805 }
8806 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem)8807 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem) {
8808   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.s[0]");
8809 
8810   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8811   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8812   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8813   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8814   ASSERT_FALSE(IsQcBitSet(fpsr1));
8815 
8816   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8817   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8818   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8819   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8820   ASSERT_TRUE(IsQcBitSet(fpsr2));
8821 }
8822 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem)8823 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem) {
8824   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.h[7]");
8825 
8826   __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8827   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8828   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8829   ASSERT_EQ(res1, MakeUInt128(0x0007fff800040000ULL, 0xffff000700020004ULL));
8830   ASSERT_FALSE(IsQcBitSet(fpsr1));
8831 
8832   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8833   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8834   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8835   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8836   ASSERT_TRUE(IsQcBitSet(fpsr2));
8837 }
8838 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem)8839 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem) {
8840   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.h[7]");
8841 
8842   __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8843   __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8844   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8845   ASSERT_EQ(res1, MakeUInt128(0x0003fffc00020000ULL, 0x0000000000000000ULL));
8846   ASSERT_FALSE(IsQcBitSet(fpsr1));
8847 
8848   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8849   __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8850   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8851   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
8852   ASSERT_TRUE(IsQcBitSet(fpsr2));
8853 }
8854 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1)8855 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1) {
8856   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %s3");
8857 
8858   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8859   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8860   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8861   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0ULL));
8862   ASSERT_FALSE(IsQcBitSet(fpsr1));
8863 
8864   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8865   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8866   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8867   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8868   ASSERT_TRUE(IsQcBitSet(fpsr2));
8869 }
8870 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1)8871 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1) {
8872   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %h3");
8873 
8874   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8875   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
8876   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8877   ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
8878   ASSERT_FALSE(IsQcBitSet(fpsr1));
8879 
8880   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8881   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8882   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8883   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8884   ASSERT_TRUE(IsQcBitSet(fpsr2));
8885 }
8886 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem)8887 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem) {
8888   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %3.s[2]");
8889 
8890   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8891   __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
8892   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8893   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8894   ASSERT_FALSE(IsQcBitSet(fpsr1));
8895 
8896   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8897   __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
8898   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8899   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8900   ASSERT_TRUE(IsQcBitSet(fpsr2));
8901 }
8902 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem)8903 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem) {
8904   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %3.h[7]");
8905 
8906   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8907   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
8908   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8909   ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
8910   ASSERT_FALSE(IsQcBitSet(fpsr1));
8911 
8912   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8913   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8914   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8915   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8916   ASSERT_TRUE(IsQcBitSet(fpsr2));
8917 }
8918 
8919 class FpcrBitSupport : public testing::TestWithParam<uint64_t> {};
8920 
TEST_P(FpcrBitSupport,SupportsBit)8921 TEST_P(FpcrBitSupport, SupportsBit) {
8922   uint64_t fpcr1;
8923   asm("msr fpcr, %x1\n\t"
8924       "mrs %x0, fpcr"
8925       : "=r"(fpcr1)
8926       : "r"(static_cast<uint64_t>(GetParam())));
8927   ASSERT_EQ(fpcr1, GetParam()) << "Should be able to set then get FPCR bit: " << GetParam();
8928 };
8929 
8930 // Note: The exception enablement flags (such as IOE) are not checked, because when tested on actual
8931 // ARM64 device we find that the tests fail either because they cannot be written or are RAZ (read
8932 // as zero).
8933 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
8934                          FpcrBitSupport,
8935                          testing::Values(kFpcrRModeTieEven,
8936                                          kFpcrRModeZero,
8937                                          kFpcrRModeNegInf,
8938                                          kFpcrRModePosInf,
8939                                          kFpcrFzBit,
8940                                          kFpcrDnBit,
8941                                          0));
8942 
8943 class FpsrBitSupport : public testing::TestWithParam<uint64_t> {};
8944 
TEST_P(FpsrBitSupport,SupportsBit)8945 TEST_P(FpsrBitSupport, SupportsBit) {
8946   uint64_t fpsr1;
8947   asm("msr fpsr, %1\n\t"
8948       "mrs %0, fpsr"
8949       : "=r"(fpsr1)
8950       : "r"(static_cast<uint64_t>(GetParam())));
8951   ASSERT_EQ(fpsr1, GetParam()) << "Should be able to set then get FPSR bit";
8952 };
8953 
8954 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
8955                          FpsrBitSupport,
8956                          testing::Values(kFpsrIocBit,
8957                                          kFpsrDzcBit,
8958                                          kFpsrOfcBit,
8959                                          kFpsrUfcBit,
8960                                          kFpsrIxcBit,
8961                                          kFpsrIdcBit,
8962                                          kFpsrQcBit));
8963 
TEST(Arm64InsnTest,UnsignedDivide64)8964 TEST(Arm64InsnTest, UnsignedDivide64) {
8965   auto udiv64 = [](uint64_t num, uint64_t den) {
8966     uint64_t result;
8967     asm("udiv %0, %1, %2" : "=r"(result) : "r"(num), "r"(den));
8968     return result;
8969   };
8970   ASSERT_EQ(udiv64(0x8'0000'0000ULL, 2ULL), 0x4'0000'0000ULL) << "Division should be 64-bit.";
8971   ASSERT_EQ(udiv64(123ULL, 0ULL), 0ULL) << "Div by 0 should result in 0.";
8972 }
8973 
TEST(Arm64InsnTest,SignedDivide64)8974 TEST(Arm64InsnTest, SignedDivide64) {
8975   auto div64 = [](int64_t num, int64_t den) {
8976     int64_t result;
8977     asm("sdiv %0, %1, %2" : "=r"(result) : "r"(num), "r"(den));
8978     return result;
8979   };
8980   ASSERT_EQ(div64(67802402LL, -1LL), -67802402LL)
8981       << "Division by -1 should flip sign if dividend is not numeric_limits::min.";
8982   ASSERT_EQ(div64(-531675317891LL, -1LL), 531675317891LL)
8983       << "Division by -1 should flip sign if dividend is not numeric_limits::min.";
8984   ASSERT_EQ(div64(std::numeric_limits<int64_t>::min(), -1LL), std::numeric_limits<int64_t>::min())
8985       << "Div of numeric_limits::min by -1 should result in numeric_limits::min.";
8986 }
8987 
TEST(Arm64InsnTest,AesEncode)8988 TEST(Arm64InsnTest, AesEncode) {
8989   __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
8990   __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
8991   __uint128_t res;
8992   asm("aese %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
8993   ASSERT_EQ(res, MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL));
8994 }
8995 
TEST(Arm64InsnTest,AesMixColumns)8996 TEST(Arm64InsnTest, AesMixColumns) {
8997   __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
8998   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesmc %0.16b, %1.16b")(arg);
8999   ASSERT_EQ(res, MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL));
9000 }
9001 
TEST(Arm64InsnTest,AesDecode)9002 TEST(Arm64InsnTest, AesDecode) {
9003   // Check that it's opposite to AesEncode with extra XORs.
9004   __uint128_t arg = MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL);
9005   __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
9006   arg ^= key;
9007   __uint128_t res;
9008   asm("aesd %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
9009   ASSERT_EQ(res ^ key, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
9010 }
9011 
TEST(Arm64InsnTest,AesInverseMixColumns)9012 TEST(Arm64InsnTest, AesInverseMixColumns) {
9013   __uint128_t arg = MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL);
9014   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesimc %0.16b, %1.16b")(arg);
9015   ASSERT_EQ(res, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
9016 }
9017 
9018 }  // namespace
9019