1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "gtest/gtest.h"
18
19 #include "xmmintrin.h"
20
21 #include <array>
22 #include <cstdint>
23 #include <tuple>
24
25 #include "berberis/base/bit_util.h"
26 #include "berberis/intrinsics/vector_intrinsics.h"
27
28 namespace berberis::intrinsics {
29
30 namespace {
31
TEST(VectorIntrinsics,VectorBroadcast)32 TEST(VectorIntrinsics, VectorBroadcast) {
33 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int8_t>::min()}>(),
34 VectorBroadcast<Wrapping{std::numeric_limits<int8_t>::min()}>());
35 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int8_t>::max()}>(),
36 VectorBroadcast<Wrapping{std::numeric_limits<int8_t>::max()}>());
37 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint8_t>::max()}>(),
38 VectorBroadcast<Wrapping{std::numeric_limits<uint8_t>::max()}>());
39 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int16_t>::min()}>(),
40 VectorBroadcast<Wrapping{std::numeric_limits<int16_t>::min()}>());
41 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int16_t>::max()}>(),
42 VectorBroadcast<Wrapping{std::numeric_limits<int16_t>::max()}>());
43 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint16_t>::max()}>(),
44 VectorBroadcast<Wrapping{std::numeric_limits<uint16_t>::max()}>());
45 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int32_t>::min()}>(),
46 VectorBroadcast<Wrapping{std::numeric_limits<int32_t>::min()}>());
47 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int32_t>::max()}>(),
48 VectorBroadcast<Wrapping{std::numeric_limits<int32_t>::max()}>());
49 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint32_t>::max()}>(),
50 VectorBroadcast<Wrapping{std::numeric_limits<uint32_t>::max()}>());
51 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int64_t>::min()}>(),
52 VectorBroadcast<Wrapping{std::numeric_limits<int64_t>::min()}>());
53 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<int64_t>::max()}>(),
54 VectorBroadcast<Wrapping{std::numeric_limits<int64_t>::max()}>());
55 ASSERT_EQ(VectorBroadcastForTests<Wrapping{std::numeric_limits<uint64_t>::max()}>(),
56 VectorBroadcast<Wrapping{std::numeric_limits<uint64_t>::max()}>());
57 }
58
TEST(VectorIntrinsics,MakeBitmaskFromVl)59 TEST(VectorIntrinsics, MakeBitmaskFromVl) {
60 for (size_t vl = 0; vl < 128; ++vl) {
61 ASSERT_EQ(MakeBitmaskFromVlForTests(vl), MakeBitmaskFromVl(vl));
62 }
63 }
64
TEST(VectorIntrinsics,Make8bitMaskFromBitmask)65 TEST(VectorIntrinsics, Make8bitMaskFromBitmask) {
66 for (size_t mask = 0; mask < 131071; ++mask) {
67 ASSERT_EQ(BitMaskToSimdMaskForTests<Int8>(mask), BitMaskToSimdMask<Int8>(mask));
68 const auto [simd_mask] = BitMaskToSimdMask<Int8>(mask);
69 ASSERT_EQ(SimdMaskToBitMaskForTests<Int8>(simd_mask), SimdMaskToBitMask<Int8>(simd_mask));
70 }
71 }
72
TEST(VectorIntrinsics,Make16bitMaskFromBitmask)73 TEST(VectorIntrinsics, Make16bitMaskFromBitmask) {
74 for (size_t mask = 0; mask < 511; ++mask) {
75 ASSERT_EQ(BitMaskToSimdMaskForTests<Int16>(mask), BitMaskToSimdMask<Int16>(mask));
76 const auto [simd_mask] = BitMaskToSimdMask<Int16>(mask);
77 ASSERT_EQ(SimdMaskToBitMaskForTests<Int16>(simd_mask), SimdMaskToBitMask<Int16>(simd_mask));
78 }
79 }
80
TEST(VectorIntrinsics,Make32bitMaskFromBitmask)81 TEST(VectorIntrinsics, Make32bitMaskFromBitmask) {
82 for (size_t mask = 0; mask < 31; ++mask) {
83 ASSERT_EQ(BitMaskToSimdMaskForTests<Int32>(mask), BitMaskToSimdMask<Int32>(mask));
84 const auto [simd_mask] = BitMaskToSimdMask<Int32>(mask);
85 ASSERT_EQ(SimdMaskToBitMaskForTests<Int32>(simd_mask), SimdMaskToBitMask<Int32>(simd_mask));
86 }
87 }
88
TEST(VectorIntrinsics,Make64bitMaskFromBitmask)89 TEST(VectorIntrinsics, Make64bitMaskFromBitmask) {
90 for (size_t mask = 0; mask < 7; ++mask) {
91 ASSERT_EQ(BitMaskToSimdMaskForTests<Int64>(mask), BitMaskToSimdMask<Int64>(mask));
92 const auto [simd_mask] = BitMaskToSimdMask<Int64>(mask);
93 ASSERT_EQ(SimdMaskToBitMaskForTests<Int64>(simd_mask), SimdMaskToBitMask<Int64>(simd_mask));
94 }
95 }
96 template <typename ElementType>
TestVidv()97 void TestVidv() {
98 for (size_t index = 0; index < 8; ++index) {
99 ASSERT_EQ(VidvForTests<ElementType>(index), Vidv<ElementType>(index));
100 }
101 }
TEST(VectorIntrinsics,Vidv)102 TEST(VectorIntrinsics, Vidv) {
103 TestVidv<Int8>();
104 TestVidv<Int16>();
105 TestVidv<Int32>();
106 TestVidv<Int64>();
107 TestVidv<UInt8>();
108 TestVidv<UInt16>();
109 TestVidv<UInt32>();
110 TestVidv<UInt64>();
111 }
112 // Easily recognizable bit pattern for target register.
113 constexpr __m128i kUndisturbedResult = {0x5555'5555'5555'5555, 0x5555'5555'5555'5555};
114
115 template <auto kElement>
TestVectorMaskedElementTo()116 void TestVectorMaskedElementTo() {
117 size_t max_mask = sizeof(kElement) == sizeof(uint8_t) ? 131071
118 : sizeof(kElement) == sizeof(uint16_t) ? 511
119 : sizeof(kElement) == sizeof(uint32_t) ? 31
120 : 7;
121 for (size_t mask = 0; mask < max_mask; ++mask) {
122 const SIMD128Register src = kUndisturbedResult;
123 const auto [simd_mask] = BitMaskToSimdMask<decltype(kElement)>(mask);
124 ASSERT_EQ(VectorMaskedElementToForTests<kElement>(simd_mask, src),
125 VectorMaskedElementTo<kElement>(simd_mask, src));
126 }
127 }
128
TEST(VectorIntrinsics,VectorMaskedElementTo)129 TEST(VectorIntrinsics, VectorMaskedElementTo) {
130 TestVectorMaskedElementTo<std::numeric_limits<int8_t>::min()>();
131 TestVectorMaskedElementTo<std::numeric_limits<int8_t>::max()>();
132 TestVectorMaskedElementTo<std::numeric_limits<uint8_t>::min()>();
133 TestVectorMaskedElementTo<std::numeric_limits<uint8_t>::max()>();
134 TestVectorMaskedElementTo<std::numeric_limits<int16_t>::min()>();
135 TestVectorMaskedElementTo<std::numeric_limits<int16_t>::max()>();
136 TestVectorMaskedElementTo<std::numeric_limits<uint16_t>::min()>();
137 TestVectorMaskedElementTo<std::numeric_limits<uint16_t>::max()>();
138 TestVectorMaskedElementTo<std::numeric_limits<int32_t>::min()>();
139 TestVectorMaskedElementTo<std::numeric_limits<int32_t>::max()>();
140 TestVectorMaskedElementTo<std::numeric_limits<uint32_t>::min()>();
141 TestVectorMaskedElementTo<std::numeric_limits<uint32_t>::max()>();
142 TestVectorMaskedElementTo<std::numeric_limits<int64_t>::min()>();
143 TestVectorMaskedElementTo<std::numeric_limits<int64_t>::max()>();
144 TestVectorMaskedElementTo<std::numeric_limits<uint64_t>::min()>();
145 TestVectorMaskedElementTo<std::numeric_limits<uint64_t>::max()>();
146 }
147
TEST(VectorIntrinsics,Vaddvv)148 TEST(VectorIntrinsics, Vaddvv) {
149 auto Verify = []<typename ElementType>(
150 auto Vaddvv,
151 SIMD128Register arg2,
152 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
153 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
154 kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, 16)),
155 std::tuple{result_to_check});
156 ASSERT_EQ(
157 (VectorMasking<Wrapping<ElementType>,
158 TailProcessing::kAgnostic,
159 InactiveProcessing::kAgnostic>(kUndisturbedResult,
160 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
161 0,
162 16,
163 RawInt16{0xffff})),
164 std::tuple{result_to_check});
165 };
166 Verify(Vaddvv<UInt8>,
167 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
168 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
169 Verify(Vaddvv<UInt8>,
170 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
171 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
172 Verify(Vaddvv<UInt16>,
173 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
174 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
175 Verify(Vaddvv<UInt16>,
176 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
177 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
178 Verify(Vaddvv<UInt32>,
179 __v4su{0, 1, 0, 1},
180 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
181 Verify(Vaddvv<UInt32>,
182 __v4su{1, 0, 1, 0},
183 __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
184 Verify(Vaddvv<UInt64>, __v2du{0, 1}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
185 Verify(Vaddvv<UInt64>, __v2du{1, 0}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
186 }
187
TEST(VectorIntrinsics,Vaddvx)188 TEST(VectorIntrinsics, Vaddvx) {
189 auto Verify = []<typename ElementType>(
190 auto Vaddvx,
191 SIMD128Register arg1,
192 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
193 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
194 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, 16)),
195 std::tuple{result_to_check});
196 ASSERT_EQ(
197 (VectorMasking<Wrapping<ElementType>,
198 TailProcessing::kAgnostic,
199 InactiveProcessing::kAgnostic>(
200 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, 16, RawInt16{0xffff})),
201 std::tuple{result_to_check});
202 };
203 Verify(Vaddvx<UInt8>,
204 __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
205 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
206 Verify(Vaddvx<UInt8>,
207 __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
208 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
209 Verify(Vaddvx<UInt16>,
210 __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
211 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
212 Verify(Vaddvx<UInt16>,
213 __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
214 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
215 Verify(Vaddvx<UInt32>,
216 __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
217 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
218 Verify(Vaddvx<UInt32>,
219 __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
220 __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
221 Verify(Vaddvx<UInt64>,
222 __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
223 __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
224 Verify(Vaddvx<UInt64>,
225 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
226 __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
227 }
228
TEST(VectorIntrinsics,VlArgForVv)229 TEST(VectorIntrinsics, VlArgForVv) {
230 auto Verify = []<typename ElementType>(
231 auto Vaddvv,
232 SIMD128Register arg2,
233 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic,
234 [[gnu::vector_size(16),
235 gnu::may_alias]] ElementType result_to_check_undisturbed) {
236 constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
237 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
238 kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, kHalfLen)),
239 std::tuple{result_to_check_agnostic});
240 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
241 kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 0, kHalfLen)),
242 std::tuple{result_to_check_undisturbed});
243 ASSERT_EQ(
244 (VectorMasking<Wrapping<ElementType>,
245 TailProcessing::kAgnostic,
246 InactiveProcessing::kAgnostic>(kUndisturbedResult,
247 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
248 0,
249 kHalfLen,
250 RawInt16{0xffff})),
251 std::tuple{result_to_check_agnostic});
252 ASSERT_EQ(
253 (VectorMasking<Wrapping<ElementType>,
254 TailProcessing::kAgnostic,
255 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
256 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
257 0,
258 kHalfLen,
259 RawInt16{0xffff})),
260 std::tuple{result_to_check_agnostic});
261 ASSERT_EQ(
262 (VectorMasking<Wrapping<ElementType>,
263 TailProcessing::kUndisturbed,
264 InactiveProcessing::kAgnostic>(kUndisturbedResult,
265 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
266 0,
267 kHalfLen,
268 RawInt16{0xffff})),
269 std::tuple{result_to_check_undisturbed});
270 ASSERT_EQ(
271 (VectorMasking<Wrapping<ElementType>,
272 TailProcessing::kUndisturbed,
273 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
274 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
275 0,
276 kHalfLen,
277 RawInt16{0xffff})),
278 std::tuple{result_to_check_undisturbed});
279 };
280 Verify(Vaddvv<UInt8>,
281 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
282 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
283 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
284 Verify(Vaddvv<UInt8>,
285 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
286 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
287 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
288 Verify(Vaddvv<UInt16>,
289 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
290 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
291 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
292 Verify(Vaddvv<UInt16>,
293 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
294 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
295 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
296 Verify(Vaddvv<UInt32>,
297 __v4su{0, 1, 0, 1},
298 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
299 __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555});
300 Verify(Vaddvv<UInt32>,
301 __v4su{1, 0, 1, 0},
302 __v4su{0x0000'0000, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
303 __v4su{0x0000'0000, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
304 Verify(Vaddvv<UInt64>,
305 __v2du{0, 1},
306 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
307 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555});
308 Verify(Vaddvv<UInt64>,
309 __v2du{1, 0},
310 __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff},
311 __v2du{0x0000'0000'0000'0000, 0x5555'5555'5555'5555});
312 }
313
TEST(VectorIntrinsics,VlArgForVx)314 TEST(VectorIntrinsics, VlArgForVx) {
315 auto Verify = []<typename ElementType>(
316 auto Vaddvx,
317 SIMD128Register arg1,
318 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic,
319 [[gnu::vector_size(16),
320 gnu::may_alias]] ElementType result_to_check_undisturbed) {
321 constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
322 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
323 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, kHalfLen)),
324 std::tuple{result_to_check_agnostic});
325 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
326 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 0, kHalfLen)),
327 std::tuple{result_to_check_undisturbed});
328 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
329 TailProcessing::kAgnostic,
330 InactiveProcessing::kAgnostic>(kUndisturbedResult,
331 std::get<0>(Vaddvx(arg1, UInt8{1})),
332 0,
333 kHalfLen,
334 RawInt16{0xffff})),
335 std::tuple{result_to_check_agnostic});
336 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
337 TailProcessing::kAgnostic,
338 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
339 std::get<0>(Vaddvx(arg1, UInt8{1})),
340 0,
341 kHalfLen,
342 RawInt16{0xffff})),
343 std::tuple{result_to_check_agnostic});
344 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
345 TailProcessing::kUndisturbed,
346 InactiveProcessing::kAgnostic>(kUndisturbedResult,
347 std::get<0>(Vaddvx(arg1, UInt8{1})),
348 0,
349 kHalfLen,
350 RawInt16{0xffff})),
351 std::tuple{result_to_check_undisturbed});
352 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
353 TailProcessing::kUndisturbed,
354 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
355 std::get<0>(Vaddvx(arg1, UInt8{1})),
356 0,
357 kHalfLen,
358 RawInt16{0xffff})),
359 std::tuple{result_to_check_undisturbed});
360 };
361 Verify(Vaddvx<UInt8>,
362 __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
363 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
364 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
365 Verify(Vaddvx<UInt8>,
366 __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
367 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
368 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
369 Verify(Vaddvx<UInt16>,
370 __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
371 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
372 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
373 Verify(Vaddvx<UInt16>,
374 __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
375 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
376 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
377 Verify(Vaddvx<UInt32>,
378 __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
379 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
380 __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555});
381 Verify(Vaddvx<UInt32>,
382 __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
383 __v4su{0x0000'0000, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
384 __v4su{0x0000'0000, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
385 Verify(Vaddvx<UInt64>,
386 __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
387 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
388 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555});
389 Verify(Vaddvx<UInt64>,
390 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
391 __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff},
392 __v2du{0x0000'0000'0000'0000, 0x5555'5555'5555'5555});
393 }
394
TEST(VectorIntrinsics,VmaskArgForVvv)395 TEST(VectorIntrinsics, VmaskArgForVvv) {
396 auto Verify =
397 []<typename ElementType>(
398 auto Vaddvv,
399 SIMD128Register arg2,
400 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic_agnostic,
401 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic_undisturbed,
402 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_undisturbed_agnostic,
403 [[gnu::vector_size(16),
404 gnu::may_alias]] ElementType result_to_check_undisturbed_undisturbed) {
405 constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
406 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
407 TailProcessing::kAgnostic,
408 InactiveProcessing::kAgnostic>(
409 kUndisturbedResult,
410 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
411 0,
412 kHalfLen,
413 RawInt16{0xfdda})),
414 std::tuple{result_to_check_agnostic_agnostic});
415 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
416 TailProcessing::kAgnostic,
417 InactiveProcessing::kUndisturbed>(
418 kUndisturbedResult,
419 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
420 0,
421 kHalfLen,
422 RawInt16{0xfdda})),
423 std::tuple{result_to_check_agnostic_undisturbed});
424 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
425 TailProcessing::kUndisturbed,
426 InactiveProcessing::kAgnostic>(
427 kUndisturbedResult,
428 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
429 0,
430 kHalfLen,
431 RawInt16{0xfdda})),
432 std::tuple{result_to_check_undisturbed_agnostic});
433 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
434 TailProcessing::kUndisturbed,
435 InactiveProcessing::kUndisturbed>(
436 kUndisturbedResult,
437 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
438 0,
439 kHalfLen,
440 RawInt16{0xfdda})),
441 std::tuple{result_to_check_undisturbed_undisturbed});
442 };
443 Verify(
444 Vaddvv<UInt8>,
445 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
446 __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
447 __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
448 __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
449 __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
450 Verify(
451 Vaddvv<UInt8>,
452 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
453 __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
454 __v16qu{0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
455 __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
456 __v16qu{
457 0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
458 Verify(Vaddvv<UInt16>,
459 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
460 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
461 __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
462 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555},
463 __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
464 Verify(Vaddvv<UInt16>,
465 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
466 __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
467 __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
468 __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555},
469 __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
470 Verify(Vaddvv<UInt32>,
471 __v4su{0, 1, 0, 1},
472 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
473 __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
474 __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555},
475 __v4su{0x5555'5555, 0x0000'0000, 0x5555'5555, 0x5555'5555});
476 Verify(Vaddvv<UInt32>,
477 __v4su{1, 0, 1, 0},
478 __v4su{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
479 __v4su{0x5555'5555, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
480 __v4su{0xffff'ffff, 0xffff'ffff, 0x5555'5555, 0x5555'5555},
481 __v4su{0x5555'5555, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
482 Verify(Vaddvv<UInt64>,
483 __v2du{0, 1},
484 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
485 __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
486 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
487 __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
488 Verify(Vaddvv<UInt64>,
489 __v2du{1, 0},
490 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
491 __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
492 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
493 __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
494 }
495
TEST(VectorIntrinsics,VmaskArgForVvx)496 TEST(VectorIntrinsics, VmaskArgForVvx) {
497 auto Verify =
498 []<typename ElementType>(
499 auto Vaddvx,
500 SIMD128Register arg1,
501 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic_agnostic,
502 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_agnostic_undisturbed,
503 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check_undisturbed_agnostic,
504 [[gnu::vector_size(16),
505 gnu::may_alias]] ElementType result_to_check_undisturbed_undisturbed) {
506 constexpr size_t kHalfLen = sizeof(SIMD128Register) / sizeof(ElementType) / 2;
507 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
508 TailProcessing::kAgnostic,
509 InactiveProcessing::kAgnostic>(kUndisturbedResult,
510 std::get<0>(Vaddvx(arg1, UInt8{1})),
511 0,
512 kHalfLen,
513 RawInt16{0xfdda})),
514 std::tuple{result_to_check_agnostic_agnostic});
515 ASSERT_EQ(
516 (VectorMasking<Wrapping<ElementType>,
517 TailProcessing::kAgnostic,
518 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
519 std::get<0>(Vaddvx(arg1, UInt8{1})),
520 0,
521 kHalfLen,
522 RawInt16{0xfdda})),
523 std::tuple{result_to_check_agnostic_undisturbed});
524 ASSERT_EQ((VectorMasking<Wrapping<ElementType>,
525 TailProcessing::kUndisturbed,
526 InactiveProcessing::kAgnostic>(kUndisturbedResult,
527 std::get<0>(Vaddvx(arg1, UInt8{1})),
528 0,
529 kHalfLen,
530 RawInt16{0xfdda})),
531 std::tuple{result_to_check_undisturbed_agnostic});
532 ASSERT_EQ(
533 (VectorMasking<Wrapping<ElementType>,
534 TailProcessing::kUndisturbed,
535 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
536 std::get<0>(Vaddvx(arg1, UInt8{1})),
537 0,
538 kHalfLen,
539 RawInt16{0xfdda})),
540 std::tuple{result_to_check_undisturbed_undisturbed});
541 };
542 Verify(
543 Vaddvx<UInt8>,
544 __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
545 __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
546 __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255},
547 __v16qu{255, 0, 255, 0, 255, 255, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
548 __v16qu{0x55, 0, 0x55, 0, 255, 0x55, 255, 0, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
549 Verify(
550 Vaddvx<UInt8>,
551 __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
552 __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
553 __v16qu{0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255},
554 __v16qu{255, 255, 255, 255, 0, 255, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55},
555 __v16qu{
556 0x55, 255, 0x55, 255, 0, 0x55, 0, 255, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55});
557 Verify(Vaddvx<UInt16>,
558 __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
559 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
560 __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff},
561 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555},
562 __v8hu{0x5555, 0x0000, 0x5555, 0x0000, 0x5555, 0x5555, 0x5555, 0x5555});
563 Verify(Vaddvx<UInt16>,
564 __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
565 __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
566 __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff},
567 __v8hu{0xffff, 0xffff, 0xffff, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555},
568 __v8hu{0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0x5555, 0x5555, 0x5555});
569 Verify(Vaddvx<UInt32>,
570 __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
571 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
572 __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0xffff'ffff},
573 __v4su{0xffff'ffff, 0x0000'0000, 0x5555'5555, 0x5555'5555},
574 __v4su{0x5555'5555, 0x0000'0000, 0x5555'5555, 0x5555'5555});
575 Verify(Vaddvx<UInt32>,
576 __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
577 __v4su{0xffff'ffff, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
578 __v4su{0x5555'5555, 0xffff'ffff, 0xffff'ffff, 0xffff'ffff},
579 __v4su{0xffff'ffff, 0xffff'ffff, 0x5555'5555, 0x5555'5555},
580 __v4su{0x5555'5555, 0xffff'ffff, 0x5555'5555, 0x5555'5555});
581 Verify(Vaddvx<UInt64>,
582 __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
583 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
584 __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
585 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
586 __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
587 Verify(Vaddvx<UInt64>,
588 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
589 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff},
590 __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff},
591 __v2du{0xffff'ffff'ffff'ffff, 0x5555'5555'5555'5555},
592 __v2du{0x5555'5555'5555'5555, 0x5555'5555'5555'5555});
593 }
594
TEST(VectorIntrinsics,VstartArgVv)595 TEST(VectorIntrinsics, VstartArgVv) {
596 auto Verify = []<typename ElementType>(
597 auto Vaddvv,
598 SIMD128Register arg2,
599 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
600 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
601 kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 1, 16)),
602 std::tuple{result_to_check});
603 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
604 kUndisturbedResult, std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)), 1, 16)),
605 std::tuple{result_to_check});
606 ASSERT_EQ(
607 (VectorMasking<Wrapping<ElementType>,
608 TailProcessing::kAgnostic,
609 InactiveProcessing::kAgnostic>(kUndisturbedResult,
610 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
611 1,
612 16,
613 RawInt16{0xffff})),
614 std::tuple{result_to_check});
615 ASSERT_EQ(
616 (VectorMasking<Wrapping<ElementType>,
617 TailProcessing::kAgnostic,
618 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
619 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
620 1,
621 16,
622 RawInt16{0xffff})),
623 std::tuple{result_to_check});
624 ASSERT_EQ(
625 (VectorMasking<Wrapping<ElementType>,
626 TailProcessing::kUndisturbed,
627 InactiveProcessing::kAgnostic>(kUndisturbedResult,
628 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
629 1,
630 16,
631 RawInt16{0xffff})),
632 std::tuple{result_to_check});
633 ASSERT_EQ(
634 (VectorMasking<Wrapping<ElementType>,
635 TailProcessing::kUndisturbed,
636 InactiveProcessing::kUndisturbed>(kUndisturbedResult,
637 std::get<0>(Vaddvv(__m128i{-1, -1}, arg2)),
638 1,
639 16,
640 RawInt16{0xffff})),
641 std::tuple{result_to_check});
642 };
643 Verify(Vaddvv<UInt8>,
644 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
645 __v16qu{0x55, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
646 Verify(Vaddvv<UInt8>,
647 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
648 __v16qu{0x55, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
649 Verify(Vaddvv<UInt16>,
650 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
651 __v8hu{0x5555, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
652 Verify(Vaddvv<UInt16>,
653 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
654 __v8hu{0x5555, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
655 Verify(Vaddvv<UInt32>,
656 __v4su{0, 1, 0, 1},
657 __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
658 Verify(Vaddvv<UInt32>,
659 __v4su{1, 0, 1, 0},
660 __v4su{0x5555'5555, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
661 Verify(Vaddvv<UInt64>, __v2du{0, 1}, __v2du{0x5555'5555'5555'5555, 0x0000'0000'0000'0000});
662 Verify(Vaddvv<UInt64>, __v2du{1, 0}, __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff});
663 }
664
TEST(VectorIntrinsics,VstartArgVx)665 TEST(VectorIntrinsics, VstartArgVx) {
666 auto Verify = []<typename ElementType>(
667 auto Vaddvx,
668 SIMD128Register arg1,
669 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
670 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
671 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16)),
672 std::tuple{result_to_check});
673 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kUndisturbed>(
674 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16)),
675 std::tuple{result_to_check});
676 ASSERT_EQ(
677 (VectorMasking<Wrapping<ElementType>,
678 TailProcessing::kAgnostic,
679 InactiveProcessing::kAgnostic>(
680 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
681 std::tuple{result_to_check});
682 ASSERT_EQ(
683 (VectorMasking<Wrapping<ElementType>,
684 TailProcessing::kAgnostic,
685 InactiveProcessing::kUndisturbed>(
686 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
687 std::tuple{result_to_check});
688 ASSERT_EQ(
689 (VectorMasking<Wrapping<ElementType>,
690 TailProcessing::kUndisturbed,
691 InactiveProcessing::kAgnostic>(
692 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
693 std::tuple{result_to_check});
694 ASSERT_EQ(
695 (VectorMasking<Wrapping<ElementType>,
696 TailProcessing::kUndisturbed,
697 InactiveProcessing::kUndisturbed>(
698 kUndisturbedResult, std::get<0>(Vaddvx(arg1, UInt8{1})), 1, 16, RawInt16{0xffff})),
699 std::tuple{result_to_check});
700 };
701 Verify(Vaddvx<UInt8>,
702 __v16qu{254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255},
703 __v16qu{0x55, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
704 Verify(Vaddvx<UInt8>,
705 __v16qu{255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254, 255, 254},
706 __v16qu{0x55, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
707 Verify(Vaddvx<UInt16>,
708 __v8hu{0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff},
709 __v8hu{0x5555, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
710 Verify(Vaddvx<UInt16>,
711 __v8hu{0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe, 0xffff, 0xfffe},
712 __v8hu{0x5555, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
713 Verify(Vaddvx<UInt32>,
714 __v4su{0xffff'fffe, 0xffff'ffff, 0xffff'fffe, 0xffff'ffff},
715 __v4su{0x5555'5555, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
716 Verify(Vaddvx<UInt32>,
717 __v4su{0xffff'ffff, 0xffff'fffe, 0xffff'ffff, 0xffff'fffe},
718 __v4su{0x5555'5555, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
719 Verify(Vaddvx<UInt64>,
720 __v2du{0xffff'ffff'ffff'fffe, 0xffff'ffff'ffff'ffff},
721 __v2du{0x5555'5555'5555'5555, 0x0000'0000'0000'0000});
722 Verify(Vaddvx<UInt64>,
723 __v2du{0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'fffe},
724 __v2du{0x5555'5555'5555'5555, 0xffff'ffff'ffff'ffff});
725 }
726
TEST(VectorIntrinsics,Vsubvv)727 TEST(VectorIntrinsics, Vsubvv) {
728 auto Verify = []<typename ElementType>(
729 auto Vsubvv,
730 SIMD128Register arg2,
731 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
732 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
733 kUndisturbedResult, std::get<0>(Vsubvv(__m128i{0, 0}, arg2)), 0, 16)),
734 std::tuple{result_to_check});
735 ASSERT_EQ(
736 (VectorMasking<Wrapping<ElementType>,
737 TailProcessing::kAgnostic,
738 InactiveProcessing::kAgnostic>(
739 kUndisturbedResult, std::get<0>(Vsubvv(__m128i{0, 0}, arg2)), 0, 16, RawInt16{0xffff})),
740 std::tuple{result_to_check});
741 };
742 Verify(Vsubvv<UInt8>,
743 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
744 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
745 Verify(Vsubvv<UInt8>,
746 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
747 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
748 Verify(Vsubvv<UInt16>,
749 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
750 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
751 Verify(Vsubvv<UInt16>,
752 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
753 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
754 Verify(Vsubvv<UInt32>,
755 __v4su{0, 1, 0, 1},
756 __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
757 Verify(Vsubvv<UInt64>, __v2du{0, 1}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
758 Verify(Vsubvv<UInt64>, __v2du{1, 0}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
759 }
760
TEST(VectorIntrinsics,Vsubvx)761 TEST(VectorIntrinsics, Vsubvx) {
762 auto Verify = []<typename ElementType>(
763 auto Vsubvx,
764 SIMD128Register arg1,
765 [[gnu::vector_size(16), gnu::may_alias]] ElementType result_to_check) {
766 ASSERT_EQ((VectorMasking<Wrapping<ElementType>, TailProcessing::kAgnostic>(
767 kUndisturbedResult, std::get<0>(Vsubvx(arg1, UInt8{1})), 0, 16)),
768 std::tuple{result_to_check});
769 ASSERT_EQ(
770 (VectorMasking<Wrapping<ElementType>,
771 TailProcessing::kAgnostic,
772 InactiveProcessing::kAgnostic>(
773 kUndisturbedResult, std::get<0>(Vsubvx(arg1, UInt8{1})), 0, 16, RawInt16{0xffff})),
774 std::tuple{result_to_check});
775 };
776 Verify(Vsubvx<UInt8>,
777 __v16qu{1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
778 __v16qu{0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255});
779 Verify(Vsubvx<UInt8>,
780 __v16qu{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
781 __v16qu{255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0});
782 Verify(Vsubvx<UInt16>,
783 __v8hu{1, 0, 1, 0, 1, 0, 1, 0},
784 __v8hu{0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff});
785 Verify(Vsubvx<UInt16>,
786 __v8hu{0, 1, 0, 1, 0, 1, 0, 1},
787 __v8hu{0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000, 0xffff, 0x0000});
788 Verify(Vsubvx<UInt32>,
789 __v4su{1, 0, 1, 0},
790 __v4su{0x0000'0000, 0xffff'ffff, 0x0000'0000, 0xffff'ffff});
791 Verify(Vsubvx<UInt32>,
792 __v4su{0, 1, 0, 1},
793 __v4su{0xffff'ffff, 0x0000'0000, 0xffff'ffff, 0x0000'0000});
794 Verify(Vsubvx<UInt64>, __v2du{1, 0}, __v2du{0x0000'0000'0000'0000, 0xffff'ffff'ffff'ffff});
795 Verify(Vsubvx<UInt64>, __v2du{0, 1}, __v2du{0xffff'ffff'ffff'ffff, 0x0000'0000'0000'0000});
796 }
797
798 } // namespace
799
800 } // namespace berberis::intrinsics
801