1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "gtest/gtest.h"
18 
19 #include "xmmintrin.h"
20 
21 #include <array>
22 #include <cstdint>
23 #include <tuple>
24 
25 #include "berberis/base/bit_util.h"
26 #include "berberis/intrinsics/vector_intrinsics.h"
27 
28 namespace berberis::intrinsics {
29 
30 namespace {
31 
TEST(VectorIntrinsics,VectorBroadcast)32 TEST(VectorIntrinsics, VectorBroadcast) {
33   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int8_t>::min()}>(),
34             VectorBroadcast<Wrapping{std::numeric_limits<int8_t>::min()}>());
35   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int8_t>::max()}>(),
36             VectorBroadcast<Wrapping{std::numeric_limits<int8_t>::max()}>());
37   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint8_t>::max()}>(),
38             VectorBroadcast<Wrapping{std::numeric_limits<uint8_t>::max()}>());
39   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int16_t>::min()}>(),
40             VectorBroadcast<Wrapping{std::numeric_limits<int16_t>::min()}>());
41   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int16_t>::max()}>(),
42             VectorBroadcast<Wrapping{std::numeric_limits<int16_t>::max()}>());
43   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint16_t>::max()}>(),
44             VectorBroadcast<Wrapping{std::numeric_limits<uint16_t>::max()}>());
45   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int32_t>::min()}>(),
46             VectorBroadcast<Wrapping{std::numeric_limits<int32_t>::min()}>());
47   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int32_t>::max()}>(),
48             VectorBroadcast<Wrapping{std::numeric_limits<int32_t>::max()}>());
49   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint32_t>::max()}>(),
50             VectorBroadcast<Wrapping{std::numeric_limits<uint32_t>::max()}>());
51   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int64_t>::min()}>(),
52             VectorBroadcast<Wrapping{std::numeric_limits<int64_t>::min()}>());
53   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int64_t>::max()}>(),
54             VectorBroadcast<Wrapping{std::numeric_limits<int64_t>::max()}>());
55   ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint64_t>::max()}>(),
56             VectorBroadcast<Wrapping{std::numeric_limits<uint64_t>::max()}>());
57 }
58 
TEST(VectorIntrinsics,MakeBitmaskFromVl)59 TEST(VectorIntrinsics, MakeBitmaskFromVl) {
60   for (size_t vl = 0; vl < 128; ++vl) {
61     ASSERT_EQ(MakeBitmaskFromVlForTests(vl), MakeBitmaskFromVl(vl));
62   }
63 }
64 
TEST(VectorIntrinsics,Make8bitMaskFromBitmask)65 TEST(VectorIntrinsics, Make8bitMaskFromBitmask) {
66   for (size_t mask = 0; mask < 131071; ++mask) {
67     ASSERT_EQ(BitMaskToSimdMaskForTests<Int8>(mask), BitMaskToSimdMask<Int8>(mask));
68     const auto [simd_mask] = BitMaskToSimdMask<Int8>(mask);
69     ASSERT_EQ(SimdMaskToBitMaskForTests<Int8>(simd_mask), SimdMaskToBitMask<Int8>(simd_mask));
70   }
71 }
72 
TEST(VectorIntrinsics,Make16bitMaskFromBitmask)73 TEST(VectorIntrinsics, Make16bitMaskFromBitmask) {
74   for (size_t mask = 0; mask < 511; ++mask) {
75     ASSERT_EQ(BitMaskToSimdMaskForTests<Int16>(mask), BitMaskToSimdMask<Int16>(mask));
76     const auto [simd_mask] = BitMaskToSimdMask<Int16>(mask);
77     ASSERT_EQ(SimdMaskToBitMaskForTests<Int16>(simd_mask), SimdMaskToBitMask<Int16>(simd_mask));
78   }
79 }
80 
TEST(VectorIntrinsics,Make32bitMaskFromBitmask)81 TEST(VectorIntrinsics, Make32bitMaskFromBitmask) {
82   for (size_t mask = 0; mask < 31; ++mask) {
83     ASSERT_EQ(BitMaskToSimdMaskForTests<Int32>(mask), BitMaskToSimdMask<Int32>(mask));
84     const auto [simd_mask] = BitMaskToSimdMask<Int32>(mask);
85     ASSERT_EQ(SimdMaskToBitMaskForTests<Int32>(simd_mask), SimdMaskToBitMask<Int32>(simd_mask));
86   }
87 }
88 
TEST(VectorIntrinsics,Make64bitMaskFromBitmask)89 TEST(VectorIntrinsics, Make64bitMaskFromBitmask) {
90   for (size_t mask = 0; mask < 7; ++mask) {
91     ASSERT_EQ(BitMaskToSimdMaskForTests<Int64>(mask), BitMaskToSimdMask<Int64>(mask));
92     const auto [simd_mask] = BitMaskToSimdMask<Int64>(mask);
93     ASSERT_EQ(SimdMaskToBitMaskForTests<Int64>(simd_mask), SimdMaskToBitMask<Int64>(simd_mask));
94   }
95 }
96 template <typename ElementType>
TestVidv()97 void TestVidv() {
98   for (size_t index = 0; index < 8; ++index) {
99     ASSERT_EQ(VidvForTests<ElementType>(index), Vidv<ElementType>(index));
100   }
101 }
TEST(VectorIntrinsics,Vidv)102 TEST(VectorIntrinsics, Vidv) {
103   TestVidv<Int8>();
104   TestVidv<Int16>();
105   TestVidv<Int32>();
106   TestVidv<Int64>();
107   TestVidv<UInt8>();
108   TestVidv<UInt16>();
109   TestVidv<UInt32>();
110   TestVidv<UInt64>();
111 }
112 // Easily recognizable bit pattern for target register.
113 constexpr __m128i kUndisturbedResult = {0x5555'5555'5555'5555, 0x5555'5555'5555'5555};
114 
115 template <auto kElement>
TestVectorMaskedElementTo()116 void TestVectorMaskedElementTo() {
117   size_t max_mask = sizeof(kElement) == sizeof(uint8_t)    ? 131071
118                     : sizeof(kElement) == sizeof(uint16_t) ? 511
119                     : sizeof(kElement) == sizeof(uint32_t) ? 31
120                                                            : 7;
121   for (size_t mask = 0; mask < max_mask; ++mask) {
122     const SIMD128Register src = kUndisturbedResult;
123     const auto [simd_mask] = BitMaskToSimdMask<decltype(kElement)>(mask);
124     ASSERT_EQ(VectorMaskedElementToForTests<kElement>(simd_mask, src),
125               VectorMaskedElementTo<kElement>(simd_mask, src));
126   }
127 }
128 
TEST(VectorIntrinsics,VectorMaskedElementTo)129 TEST(VectorIntrinsics, VectorMaskedElementTo) {
130   TestVectorMaskedElementTo<std::numeric_limits<int8_t>::min()>();
131   TestVectorMaskedElementTo<std::numeric_limits<int8_t>::max()>();
132   TestVectorMaskedElementTo<std::numeric_limits<uint8_t>::min()>();
133   TestVectorMaskedElementTo<std::numeric_limits<uint8_t>::max()>();
134   TestVectorMaskedElementTo<std::numeric_limits<int16_t>::min()>();
135   TestVectorMaskedElementTo<std::numeric_limits<int16_t>::max()>();
136   TestVectorMaskedElementTo<std::numeric_limits<uint16_t>::min()>();
137   TestVectorMaskedElementTo<std::numeric_limits<uint16_t>::max()>();
138   TestVectorMaskedElementTo<std::numeric_limits<int32_t>::min()>();
139   TestVectorMaskedElementTo<std::numeric_limits<int32_t>::max()>();
140   TestVectorMaskedElementTo<std::numeric_limits<uint32_t>::min()>();
141   TestVectorMaskedElementTo<std::numeric_limits<uint32_t>::max()>();
142   TestVectorMaskedElementTo<std::numeric_limits<int64_t>::min()>();
143   TestVectorMaskedElementTo<std::numeric_limits<int64_t>::max()>();
144   TestVectorMaskedElementTo<std::numeric_limits<uint64_t>::min()>();
145   TestVectorMaskedElementTo<std::numeric_limits<uint64_t>::max()>();
146 }
147 
TEST(VectorIntrinsics,Vaddvv)148 TEST(VectorIntrinsics, Vaddvv) {
149   auto Verify = []<typename ElementType>(
150                     auto Vaddvv,
151                     SIMD128Register arg2,
152                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
153     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
154                   kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, 16)),
155               std::tuple{result_to_check});
156     ASSERT_EQ(
157         (VectorMasking<Wrapping<ElementType>,
158                        TailProcessing::kAgnostic,
159                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
160                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
161                                                       0,
162                                                       16,
163                                                       RawInt16{0xffff})),
164         std::tuple{result_to_check});
165   };
166   Verify(Vaddvv<UInt8>,
167          __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
168          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
169   Verify(Vaddvv<UInt8>,
170          __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
171          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
172   Verify(Vaddvv<UInt16>,
173          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
174          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
175   Verify(Vaddvv<UInt16>,
176          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
177          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
178   Verify(Vaddvv<UInt32>,
179          __v4su{0, 1, 0, 1},
180          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
181   Verify(Vaddvv<UInt32>,
182          __v4su{1, 0, 1, 0},
183          __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
184   Verify(Vaddvv<UInt64>, __v2du{0, 1}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
185   Verify(Vaddvv<UInt64>, __v2du{1, 0}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
186 }
187 
TEST(VectorIntrinsics,Vaddvx)188 TEST(VectorIntrinsics, Vaddvx) {
189   auto Verify = []<typename ElementType>(
190                     auto Vaddvx,
191                     SIMD128Register arg1,
192                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
193     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
194                   kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, 16)),
195               std::tuple{result_to_check});
196     ASSERT_EQ(
197         (VectorMasking<Wrapping<ElementType>,
198                        TailProcessing::kAgnostic,
199                        InactiveProcessing::kAgnostic>(
200             kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, 16, RawInt16{0xffff})),
201         std::tuple{result_to_check});
202   };
203   Verify(Vaddvx<UInt8>,
204          __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
205          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
206   Verify(Vaddvx<UInt8>,
207          __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
208          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
209   Verify(Vaddvx<UInt16>,
210          __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
211          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
212   Verify(Vaddvx<UInt16>,
213          __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
214          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
215   Verify(Vaddvx<UInt32>,
216          __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
217          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
218   Verify(Vaddvx<UInt32>,
219          __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
220          __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
221   Verify(Vaddvx<UInt64>,
222          __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
223          __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
224   Verify(Vaddvx<UInt64>,
225          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
226          __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
227 }
228 
TEST(VectorIntrinsics,VlArgForVv)229 TEST(VectorIntrinsics, VlArgForVv) {
230   auto Verify = []<typename ElementType>(
231                     auto Vaddvv,
232                     SIMD128Register arg2,
233                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic,
234                     [[gnu::vector_size(16),
235                       gnu::may_alias]] ElementType result_to_check_undisturbed) {
236     constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
237     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
238                   kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, kHalfLen)),
239               std::tuple{result_to_check_agnostic});
240     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
241                   kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, kHalfLen)),
242               std::tuple{result_to_check_undisturbed});
243     ASSERT_EQ(
244         (VectorMasking<Wrapping<ElementType>,
245                        TailProcessing::kAgnostic,
246                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
247                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
248                                                       0,
249                                                       kHalfLen,
250                                                       RawInt16{0xffff})),
251         std::tuple{result_to_check_agnostic});
252     ASSERT_EQ(
253         (VectorMasking<Wrapping<ElementType>,
254                        TailProcessing::kAgnostic,
255                        InactiveProcessing::kUndisturbed>(kUndisturbedResult,
256                                                          std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
257                                                          0,
258                                                          kHalfLen,
259                                                          RawInt16{0xffff})),
260         std::tuple{result_to_check_agnostic});
261     ASSERT_EQ(
262         (VectorMasking<Wrapping<ElementType>,
263                        TailProcessing::kUndisturbed,
264                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
265                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
266                                                       0,
267                                                       kHalfLen,
268                                                       RawInt16{0xffff})),
269         std::tuple{result_to_check_undisturbed});
270     ASSERT_EQ(
271         (VectorMasking<Wrapping<ElementType>,
272                        TailProcessing::kUndisturbed,
273                        InactiveProcessing::kUndisturbed>(kUndisturbedResult,
274                                                          std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
275                                                          0,
276                                                          kHalfLen,
277                                                          RawInt16{0xffff})),
278         std::tuple{result_to_check_undisturbed});
279   };
280   Verify(Vaddvv<UInt8>,
281          __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
282          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
283          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
284   Verify(Vaddvv<UInt8>,
285          __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
286          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
287          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
288   Verify(Vaddvv<UInt16>,
289          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
290          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
291          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
292   Verify(Vaddvv<UInt16>,
293          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
294          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
295          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
296   Verify(Vaddvv<UInt32>,
297          __v4su{0, 1, 0, 1},
298          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
299          __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555});
300   Verify(Vaddvv<UInt32>,
301          __v4su{1, 0, 1, 0},
302          __v4su{0x0000'0000, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
303          __v4su{0x0000'0000, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
304   Verify(Vaddvv<UInt64>,
305          __v2du{0, 1},
306          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
307          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555});
308   Verify(Vaddvv<UInt64>,
309          __v2du{1, 0},
310          __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff},
311          __v2du{0x0000'0000'0000'0000, 0x5555'5555'5555'5555});
312 }
313 
TEST(VectorIntrinsics,VlArgForVx)314 TEST(VectorIntrinsics, VlArgForVx) {
315   auto Verify = []<typename ElementType>(
316                     auto Vaddvx,
317                     SIMD128Register arg1,
318                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic,
319                     [[gnu::vector_size(16),
320                       gnu::may_alias]] ElementType result_to_check_undisturbed) {
321     constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
322     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
323                   kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, kHalfLen)),
324               std::tuple{result_to_check_agnostic});
325     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
326                   kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, kHalfLen)),
327               std::tuple{result_to_check_undisturbed});
328     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
329                              TailProcessing::kAgnostic,
330                              InactiveProcessing::kAgnostic>(kUndisturbedResult,
331                                                             std::get<0>(Vaddvx(arg1, UInt8{1})),
332                                                             0,
333                                                             kHalfLen,
334                                                             RawInt16{0xffff})),
335               std::tuple{result_to_check_agnostic});
336     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
337                              TailProcessing::kAgnostic,
338                              InactiveProcessing::kUndisturbed>(kUndisturbedResult,
339                                                                std::get<0>(Vaddvx(arg1, UInt8{1})),
340                                                                0,
341                                                                kHalfLen,
342                                                                RawInt16{0xffff})),
343               std::tuple{result_to_check_agnostic});
344     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
345                              TailProcessing::kUndisturbed,
346                              InactiveProcessing::kAgnostic>(kUndisturbedResult,
347                                                             std::get<0>(Vaddvx(arg1, UInt8{1})),
348                                                             0,
349                                                             kHalfLen,
350                                                             RawInt16{0xffff})),
351               std::tuple{result_to_check_undisturbed});
352     ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
353                              TailProcessing::kUndisturbed,
354                              InactiveProcessing::kUndisturbed>(kUndisturbedResult,
355                                                                std::get<0>(Vaddvx(arg1, UInt8{1})),
356                                                                0,
357                                                                kHalfLen,
358                                                                RawInt16{0xffff})),
359               std::tuple{result_to_check_undisturbed});
360   };
361   Verify(Vaddvx<UInt8>,
362          __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
363          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
364          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
365   Verify(Vaddvx<UInt8>,
366          __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
367          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
368          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
369   Verify(Vaddvx<UInt16>,
370          __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
371          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
372          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
373   Verify(Vaddvx<UInt16>,
374          __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
375          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
376          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
377   Verify(Vaddvx<UInt32>,
378          __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
379          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
380          __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555});
381   Verify(Vaddvx<UInt32>,
382          __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
383          __v4su{0x0000'0000, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
384          __v4su{0x0000'0000, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
385   Verify(Vaddvx<UInt64>,
386          __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
387          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
388          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555});
389   Verify(Vaddvx<UInt64>,
390          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
391          __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff},
392          __v2du{0x0000'0000'0000'0000, 0x5555'5555'5555'5555});
393 }
394 
TEST(VectorIntrinsics,VmaskArgForVvv)395 TEST(VectorIntrinsics, VmaskArgForVvv) {
396   auto Verify =
397       []<typename ElementType>(
398           auto Vaddvv,
399           SIMD128Register arg2,
400           [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic_agnostic,
401           [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic_undisturbed,
402           [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_undisturbed_agnostic,
403           [[gnu::vector_size(16),
404             gnu::may_alias]] ElementType result_to_check_undisturbed_undisturbed) {
405         constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
406         ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
407                                  TailProcessing::kAgnostic,
408                                  InactiveProcessing::kAgnostic>(
409                       kUndisturbedResult,
410                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
411                       0,
412                       kHalfLen,
413                       RawInt16{0xfdda})),
414                   std::tuple{result_to_check_agnostic_agnostic});
415         ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
416                                  TailProcessing::kAgnostic,
417                                  InactiveProcessing::kUndisturbed>(
418                       kUndisturbedResult,
419                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
420                       0,
421                       kHalfLen,
422                       RawInt16{0xfdda})),
423                   std::tuple{result_to_check_agnostic_undisturbed});
424         ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
425                                  TailProcessing::kUndisturbed,
426                                  InactiveProcessing::kAgnostic>(
427                       kUndisturbedResult,
428                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
429                       0,
430                       kHalfLen,
431                       RawInt16{0xfdda})),
432                   std::tuple{result_to_check_undisturbed_agnostic});
433         ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
434                                  TailProcessing::kUndisturbed,
435                                  InactiveProcessing::kUndisturbed>(
436                       kUndisturbedResult,
437                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
438                       0,
439                       kHalfLen,
440                       RawInt16{0xfdda})),
441                   std::tuple{result_to_check_undisturbed_undisturbed});
442       };
443   Verify(
444       Vaddvv<UInt8>,
445       __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
446       __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
447       __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
448       __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
449       __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
450   Verify(
451       Vaddvv<UInt8>,
452       __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
453       __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
454       __v16qu{0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
455       __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
456       __v16qu{
457           0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
458   Verify(Vaddvv<UInt16>,
459          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
460          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
461          __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
462          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555},
463          __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
464   Verify(Vaddvv<UInt16>,
465          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
466          __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
467          __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
468          __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555},
469          __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
470   Verify(Vaddvv<UInt32>,
471          __v4su{0, 1, 0, 1},
472          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
473          __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
474          __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555},
475          __v4su{0x5555'5555, 0x0000'0000, 0x5555'5555, 0x5555'5555});
476   Verify(Vaddvv<UInt32>,
477          __v4su{1, 0, 1, 0},
478          __v4su{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
479          __v4su{0x5555'5555, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
480          __v4su{0xffff'ffff, 0xffff'ffff, 0x5555'5555, 0x5555'5555},
481          __v4su{0x5555'5555, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
482   Verify(Vaddvv<UInt64>,
483          __v2du{0, 1},
484          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
485          __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
486          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
487          __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
488   Verify(Vaddvv<UInt64>,
489          __v2du{1, 0},
490          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
491          __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
492          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
493          __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
494 }
495 
TEST(VectorIntrinsics,VmaskArgForVvx)496 TEST(VectorIntrinsics, VmaskArgForVvx) {
497   auto Verify =
498       []<typename ElementType>(
499           auto Vaddvx,
500           SIMD128Register arg1,
501           [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic_agnostic,
502           [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic_undisturbed,
503           [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_undisturbed_agnostic,
504           [[gnu::vector_size(16),
505             gnu::may_alias]] ElementType result_to_check_undisturbed_undisturbed) {
506         constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
507         ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
508                                  TailProcessing::kAgnostic,
509                                  InactiveProcessing::kAgnostic>(kUndisturbedResult,
510                                                                 std::get<0>(Vaddvx(arg1, UInt8{1})),
511                                                                 0,
512                                                                 kHalfLen,
513                                                                 RawInt16{0xfdda})),
514                   std::tuple{result_to_check_agnostic_agnostic});
515         ASSERT_EQ(
516             (VectorMasking<Wrapping<ElementType>,
517                            TailProcessing::kAgnostic,
518                            InactiveProcessing::kUndisturbed>(kUndisturbedResult,
519                                                              std::get<0>(Vaddvx(arg1, UInt8{1})),
520                                                              0,
521                                                              kHalfLen,
522                                                              RawInt16{0xfdda})),
523             std::tuple{result_to_check_agnostic_undisturbed});
524         ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
525                                  TailProcessing::kUndisturbed,
526                                  InactiveProcessing::kAgnostic>(kUndisturbedResult,
527                                                                 std::get<0>(Vaddvx(arg1, UInt8{1})),
528                                                                 0,
529                                                                 kHalfLen,
530                                                                 RawInt16{0xfdda})),
531                   std::tuple{result_to_check_undisturbed_agnostic});
532         ASSERT_EQ(
533             (VectorMasking<Wrapping<ElementType>,
534                            TailProcessing::kUndisturbed,
535                            InactiveProcessing::kUndisturbed>(kUndisturbedResult,
536                                                              std::get<0>(Vaddvx(arg1, UInt8{1})),
537                                                              0,
538                                                              kHalfLen,
539                                                              RawInt16{0xfdda})),
540             std::tuple{result_to_check_undisturbed_undisturbed});
541       };
542   Verify(
543       Vaddvx<UInt8>,
544       __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
545       __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
546       __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
547       __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
548       __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
549   Verify(
550       Vaddvx<UInt8>,
551       __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
552       __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
553       __v16qu{0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
554       __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
555       __v16qu{
556           0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
557   Verify(Vaddvx<UInt16>,
558          __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
559          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
560          __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
561          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555},
562          __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
563   Verify(Vaddvx<UInt16>,
564          __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
565          __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
566          __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
567          __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555},
568          __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
569   Verify(Vaddvx<UInt32>,
570          __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
571          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
572          __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
573          __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555},
574          __v4su{0x5555'5555, 0x0000'0000, 0x5555'5555, 0x5555'5555});
575   Verify(Vaddvx<UInt32>,
576          __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
577          __v4su{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
578          __v4su{0x5555'5555, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
579          __v4su{0xffff'ffff, 0xffff'ffff, 0x5555'5555, 0x5555'5555},
580          __v4su{0x5555'5555, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
581   Verify(Vaddvx<UInt64>,
582          __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
583          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
584          __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
585          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
586          __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
587   Verify(Vaddvx<UInt64>,
588          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
589          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
590          __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
591          __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
592          __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
593 }
594 
TEST(VectorIntrinsics,VstartArgVv)595 TEST(VectorIntrinsics, VstartArgVv) {
596   auto Verify = []<typename ElementType>(
597                     auto Vaddvv,
598                     SIMD128Register arg2,
599                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
600     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
601                   kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 1, 16)),
602               std::tuple{result_to_check});
603     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
604                   kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 1, 16)),
605               std::tuple{result_to_check});
606     ASSERT_EQ(
607         (VectorMasking<Wrapping<ElementType>,
608                        TailProcessing::kAgnostic,
609                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
610                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
611                                                       1,
612                                                       16,
613                                                       RawInt16{0xffff})),
614         std::tuple{result_to_check});
615     ASSERT_EQ(
616         (VectorMasking<Wrapping<ElementType>,
617                        TailProcessing::kAgnostic,
618                        InactiveProcessing::kUndisturbed>(kUndisturbedResult,
619                                                          std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
620                                                          1,
621                                                          16,
622                                                          RawInt16{0xffff})),
623         std::tuple{result_to_check});
624     ASSERT_EQ(
625         (VectorMasking<Wrapping<ElementType>,
626                        TailProcessing::kUndisturbed,
627                        InactiveProcessing::kAgnostic>(kUndisturbedResult,
628                                                       std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
629                                                       1,
630                                                       16,
631                                                       RawInt16{0xffff})),
632         std::tuple{result_to_check});
633     ASSERT_EQ(
634         (VectorMasking<Wrapping<ElementType>,
635                        TailProcessing::kUndisturbed,
636                        InactiveProcessing::kUndisturbed>(kUndisturbedResult,
637                                                          std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
638                                                          1,
639                                                          16,
640                                                          RawInt16{0xffff})),
641         std::tuple{result_to_check});
642   };
643   Verify(Vaddvv<UInt8>,
644          __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
645          __v16qu{0x55, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
646   Verify(Vaddvv<UInt8>,
647          __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
648          __v16qu{0x55, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
649   Verify(Vaddvv<UInt16>,
650          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
651          __v8hu{0x5555, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
652   Verify(Vaddvv<UInt16>,
653          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
654          __v8hu{0x5555, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
655   Verify(Vaddvv<UInt32>,
656          __v4su{0, 1, 0, 1},
657          __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
658   Verify(Vaddvv<UInt32>,
659          __v4su{1, 0, 1, 0},
660          __v4su{0x5555'5555, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
661   Verify(Vaddvv<UInt64>, __v2du{0, 1}, __v2du{0x5555'5555'5555'5555, 0x0000'0000'0000'0000});
662   Verify(Vaddvv<UInt64>, __v2du{1, 0}, __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff});
663 }
664 
TEST(VectorIntrinsics,VstartArgVx)665 TEST(VectorIntrinsics, VstartArgVx) {
666   auto Verify = []<typename ElementType>(
667                     auto Vaddvx,
668                     SIMD128Register arg1,
669                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
670     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
671                   kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16)),
672               std::tuple{result_to_check});
673     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
674                   kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16)),
675               std::tuple{result_to_check});
676     ASSERT_EQ(
677         (VectorMasking<Wrapping<ElementType>,
678                        TailProcessing::kAgnostic,
679                        InactiveProcessing::kAgnostic>(
680             kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
681         std::tuple{result_to_check});
682     ASSERT_EQ(
683         (VectorMasking<Wrapping<ElementType>,
684                        TailProcessing::kAgnostic,
685                        InactiveProcessing::kUndisturbed>(
686             kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
687         std::tuple{result_to_check});
688     ASSERT_EQ(
689         (VectorMasking<Wrapping<ElementType>,
690                        TailProcessing::kUndisturbed,
691                        InactiveProcessing::kAgnostic>(
692             kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
693         std::tuple{result_to_check});
694     ASSERT_EQ(
695         (VectorMasking<Wrapping<ElementType>,
696                        TailProcessing::kUndisturbed,
697                        InactiveProcessing::kUndisturbed>(
698             kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
699         std::tuple{result_to_check});
700   };
701   Verify(Vaddvx<UInt8>,
702          __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
703          __v16qu{0x55, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
704   Verify(Vaddvx<UInt8>,
705          __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
706          __v16qu{0x55, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
707   Verify(Vaddvx<UInt16>,
708          __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
709          __v8hu{0x5555, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
710   Verify(Vaddvx<UInt16>,
711          __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
712          __v8hu{0x5555, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
713   Verify(Vaddvx<UInt32>,
714          __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
715          __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
716   Verify(Vaddvx<UInt32>,
717          __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
718          __v4su{0x5555'5555, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
719   Verify(Vaddvx<UInt64>,
720          __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
721          __v2du{0x5555'5555'5555'5555, 0x0000'0000'0000'0000});
722   Verify(Vaddvx<UInt64>,
723          __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
724          __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff});
725 }
726 
TEST(VectorIntrinsics,Vsubvv)727 TEST(VectorIntrinsics, Vsubvv) {
728   auto Verify = []<typename ElementType>(
729                     auto Vsubvv,
730                     SIMD128Register arg2,
731                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
732     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
733                   kUndisturbedResult, std::get<0>(Vsubvv(__m128i{0, 0}, arg2)), 0, 16)),
734               std::tuple{result_to_check});
735     ASSERT_EQ(
736         (VectorMasking<Wrapping<ElementType>,
737                        TailProcessing::kAgnostic,
738                        InactiveProcessing::kAgnostic>(
739             kUndisturbedResult, std::get<0>(Vsubvv(__m128i{0, 0}, arg2)), 0, 16, RawInt16{0xffff})),
740         std::tuple{result_to_check});
741   };
742   Verify(Vsubvv<UInt8>,
743          __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
744          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
745   Verify(Vsubvv<UInt8>,
746          __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
747          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
748   Verify(Vsubvv<UInt16>,
749          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
750          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
751   Verify(Vsubvv<UInt16>,
752          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
753          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
754   Verify(Vsubvv<UInt32>,
755          __v4su{0, 1, 0, 1},
756          __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
757   Verify(Vsubvv<UInt64>, __v2du{0, 1}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
758   Verify(Vsubvv<UInt64>, __v2du{1, 0}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
759 }
760 
TEST(VectorIntrinsics,Vsubvx)761 TEST(VectorIntrinsics, Vsubvx) {
762   auto Verify = []<typename ElementType>(
763                     auto Vsubvx,
764                     SIMD128Register arg1,
765                     [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
766     ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
767                   kUndisturbedResult, std::get<0>(Vsubvx(arg1, UInt8{1})), 0, 16)),
768               std::tuple{result_to_check});
769     ASSERT_EQ(
770         (VectorMasking<Wrapping<ElementType>,
771                        TailProcessing::kAgnostic,
772                        InactiveProcessing::kAgnostic>(
773             kUndisturbedResult, std::get<0>(Vsubvx(arg1, UInt8{1})), 0, 16, RawInt16{0xffff})),
774         std::tuple{result_to_check});
775   };
776   Verify(Vsubvx<UInt8>,
777          __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
778          __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
779   Verify(Vsubvx<UInt8>,
780          __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
781          __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
782   Verify(Vsubvx<UInt16>,
783          __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
784          __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
785   Verify(Vsubvx<UInt16>,
786          __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
787          __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
788   Verify(Vsubvx<UInt32>,
789          __v4su{1, 0, 1, 0},
790          __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
791   Verify(Vsubvx<UInt32>,
792          __v4su{0, 1, 0, 1},
793          __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
794   Verify(Vsubvx<UInt64>, __v2du{1, 0}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
795   Verify(Vsubvx<UInt64>, __v2du{0, 1}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
796 }
797 
798 }  // namespace
799 
800 }  // namespace berberis::intrinsics
801