1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <tuple>
13
14 #define SIMD_CHECK 1
15 #include "aom_dsp/aom_simd_inline.h"
16 #include "aom_dsp/simd/v256_intrinsics_c.h"
17 #include "gtest/gtest.h"
18 #include "test/register_state_check.h"
19
20 namespace SIMD_NAMESPACE {
21
22 template <typename param_signature>
23 class TestIntrinsic : public ::testing::TestWithParam<param_signature> {
24 public:
25 ~TestIntrinsic() override = default;
SetUp()26 void SetUp() override {
27 mask = std::get<0>(this->GetParam());
28 maskwidth = std::get<1>(this->GetParam());
29 name = std::get<2>(this->GetParam());
30 }
31
32 protected:
33 uint32_t mask, maskwidth;
34 const char *name;
35 };
36
37 // Create one typedef for each function signature
38 #define TYPEDEF_SIMD(name) \
39 typedef TestIntrinsic<std::tuple<uint32_t, uint32_t, const char *> > \
40 ARCH_POSTFIX(name)
41
42 TYPEDEF_SIMD(V64_U8);
43 TYPEDEF_SIMD(V64_U16);
44 TYPEDEF_SIMD(V64_U32);
45 TYPEDEF_SIMD(V64_V64);
46 TYPEDEF_SIMD(U32_V64);
47 TYPEDEF_SIMD(S32_V64);
48 TYPEDEF_SIMD(U64_V64);
49 TYPEDEF_SIMD(S64_V64);
50 TYPEDEF_SIMD(V64_U32U32);
51 TYPEDEF_SIMD(V64_V64V64);
52 TYPEDEF_SIMD(S64_V64V64);
53 TYPEDEF_SIMD(V64_V64U32);
54 TYPEDEF_SIMD(U32_V64V64);
55 TYPEDEF_SIMD(V128_V64);
56 TYPEDEF_SIMD(V128_V128);
57 TYPEDEF_SIMD(U32_V128);
58 TYPEDEF_SIMD(U64_V128);
59 TYPEDEF_SIMD(V64_V128);
60 TYPEDEF_SIMD(V128_U8);
61 TYPEDEF_SIMD(V128_U16);
62 TYPEDEF_SIMD(V128_U32);
63 TYPEDEF_SIMD(V128_U64);
64 TYPEDEF_SIMD(V128_U64U64);
65 TYPEDEF_SIMD(V128_V64V64);
66 TYPEDEF_SIMD(V128_V128V128);
67 TYPEDEF_SIMD(V128_V128V128V128);
68 TYPEDEF_SIMD(S64_V128V128);
69 TYPEDEF_SIMD(V128_V128U32);
70 TYPEDEF_SIMD(U32_V128V128);
71 TYPEDEF_SIMD(U64_V128V128);
72 TYPEDEF_SIMD(V256_V128);
73 TYPEDEF_SIMD(V256_V256);
74 TYPEDEF_SIMD(U64_V256);
75 TYPEDEF_SIMD(V256_V128V128);
76 TYPEDEF_SIMD(V256_V256V256);
77 TYPEDEF_SIMD(V256_V256V256V256);
78 TYPEDEF_SIMD(U64_V256V256);
79 TYPEDEF_SIMD(S64_V256V256);
80 TYPEDEF_SIMD(V256_V256U32);
81 TYPEDEF_SIMD(U32_V256V256);
82 TYPEDEF_SIMD(V256_U8);
83 TYPEDEF_SIMD(V256_U16);
84 TYPEDEF_SIMD(V256_U32);
85 TYPEDEF_SIMD(V256_U64);
86 TYPEDEF_SIMD(U32_V256);
87 TYPEDEF_SIMD(V64_V256);
88
89 // Google Test allows up to 50 tests per case, so split the largest
90 typedef ARCH_POSTFIX(V64_V64) ARCH_POSTFIX(V64_V64_Part2);
91 typedef ARCH_POSTFIX(V64_V64V64) ARCH_POSTFIX(V64_V64V64_Part2);
92 typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part2);
93 typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part3);
94 typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part4);
95 typedef ARCH_POSTFIX(V128_V128V128) ARCH_POSTFIX(V128_V128V128_Part2);
96 typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part2);
97 typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part3);
98 typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part4);
99 typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part5);
100 typedef ARCH_POSTFIX(V256_V256V256) ARCH_POSTFIX(V256_V256V256_Part2);
101
102 // These functions are machine tuned located elsewhere
103 template <typename c_ret, typename c_arg>
104 void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
105 const char *name);
106
107 template <typename c_ret, typename c_arg1, typename c_arg2>
108 void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
109 const char *name);
110
111 template <typename c_ret, typename c_arg1, typename c_arg2, typename c_arg3>
112 void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
113 const char *name);
114
115 const int kIterations = 65536;
116
117 // Add a macro layer since TEST_P will quote the name so we need to
118 // expand it first with the prefix.
119 #define MY_TEST_P(name, test) TEST_P(name, test)
120
MY_TEST_P(ARCH_POSTFIX (V64_U8),TestIntrinsics)121 MY_TEST_P(ARCH_POSTFIX(V64_U8), TestIntrinsics) {
122 TestSimd1Arg<c_v64, uint8_t>(kIterations, mask, maskwidth, name);
123 }
124
MY_TEST_P(ARCH_POSTFIX (V64_U16),TestIntrinsics)125 MY_TEST_P(ARCH_POSTFIX(V64_U16), TestIntrinsics) {
126 TestSimd1Arg<c_v64, uint16_t>(kIterations, mask, maskwidth, name);
127 }
128
MY_TEST_P(ARCH_POSTFIX (V64_U32),TestIntrinsics)129 MY_TEST_P(ARCH_POSTFIX(V64_U32), TestIntrinsics) {
130 TestSimd1Arg<c_v64, uint32_t>(kIterations, mask, maskwidth, name);
131 }
132
MY_TEST_P(ARCH_POSTFIX (V64_V64),TestIntrinsics)133 MY_TEST_P(ARCH_POSTFIX(V64_V64), TestIntrinsics) {
134 TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
135 }
136
MY_TEST_P(ARCH_POSTFIX (U64_V64),TestIntrinsics)137 MY_TEST_P(ARCH_POSTFIX(U64_V64), TestIntrinsics) {
138 TestSimd1Arg<uint64_t, c_v64>(kIterations, mask, maskwidth, name);
139 }
140
MY_TEST_P(ARCH_POSTFIX (S64_V64),TestIntrinsics)141 MY_TEST_P(ARCH_POSTFIX(S64_V64), TestIntrinsics) {
142 TestSimd1Arg<int64_t, c_v64>(kIterations, mask, maskwidth, name);
143 }
144
MY_TEST_P(ARCH_POSTFIX (U32_V64),TestIntrinsics)145 MY_TEST_P(ARCH_POSTFIX(U32_V64), TestIntrinsics) {
146 TestSimd1Arg<uint32_t, c_v64>(kIterations, mask, maskwidth, name);
147 }
148
MY_TEST_P(ARCH_POSTFIX (S32_V64),TestIntrinsics)149 MY_TEST_P(ARCH_POSTFIX(S32_V64), TestIntrinsics) {
150 TestSimd1Arg<int32_t, c_v64>(kIterations, mask, maskwidth, name);
151 }
152
MY_TEST_P(ARCH_POSTFIX (V64_U32U32),TestIntrinsics)153 MY_TEST_P(ARCH_POSTFIX(V64_U32U32), TestIntrinsics) {
154 TestSimd2Args<c_v64, uint32_t, uint32_t>(kIterations, mask, maskwidth, name);
155 }
156
MY_TEST_P(ARCH_POSTFIX (V64_V64V64),TestIntrinsics)157 MY_TEST_P(ARCH_POSTFIX(V64_V64V64), TestIntrinsics) {
158 TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
159 }
160
MY_TEST_P(ARCH_POSTFIX (S64_V64V64),TestIntrinsics)161 MY_TEST_P(ARCH_POSTFIX(S64_V64V64), TestIntrinsics) {
162 TestSimd2Args<int64_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
163 }
164
MY_TEST_P(ARCH_POSTFIX (U32_V64V64),TestIntrinsics)165 MY_TEST_P(ARCH_POSTFIX(U32_V64V64), TestIntrinsics) {
166 TestSimd2Args<uint32_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
167 }
168
MY_TEST_P(ARCH_POSTFIX (V64_V64U32),TestIntrinsics)169 MY_TEST_P(ARCH_POSTFIX(V64_V64U32), TestIntrinsics) {
170 TestSimd2Args<c_v64, c_v64, uint32_t>(kIterations, mask, maskwidth, name);
171 }
172
173 // Google Test allows up to 50 tests per case, so split the largest
MY_TEST_P(ARCH_POSTFIX (V64_V64_Part2),TestIntrinsics)174 MY_TEST_P(ARCH_POSTFIX(V64_V64_Part2), TestIntrinsics) {
175 TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
176 }
177
MY_TEST_P(ARCH_POSTFIX (V64_V64V64_Part2),TestIntrinsics)178 MY_TEST_P(ARCH_POSTFIX(V64_V64V64_Part2), TestIntrinsics) {
179 TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
180 }
181
MY_TEST_P(ARCH_POSTFIX (U32_V128),TestIntrinsics)182 MY_TEST_P(ARCH_POSTFIX(U32_V128), TestIntrinsics) {
183 TestSimd1Arg<uint32_t, c_v128>(kIterations, mask, maskwidth, name);
184 }
185
MY_TEST_P(ARCH_POSTFIX (U64_V128),TestIntrinsics)186 MY_TEST_P(ARCH_POSTFIX(U64_V128), TestIntrinsics) {
187 TestSimd1Arg<uint64_t, c_v128>(kIterations, mask, maskwidth, name);
188 }
189
MY_TEST_P(ARCH_POSTFIX (V64_V128),TestIntrinsics)190 MY_TEST_P(ARCH_POSTFIX(V64_V128), TestIntrinsics) {
191 TestSimd1Arg<c_v64, c_v128>(kIterations, mask, maskwidth, name);
192 }
193
MY_TEST_P(ARCH_POSTFIX (V128_V128),TestIntrinsics)194 MY_TEST_P(ARCH_POSTFIX(V128_V128), TestIntrinsics) {
195 TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
196 }
197
MY_TEST_P(ARCH_POSTFIX (V128_U8),TestIntrinsics)198 MY_TEST_P(ARCH_POSTFIX(V128_U8), TestIntrinsics) {
199 TestSimd1Arg<c_v128, uint8_t>(kIterations, mask, maskwidth, name);
200 }
201
MY_TEST_P(ARCH_POSTFIX (V128_U16),TestIntrinsics)202 MY_TEST_P(ARCH_POSTFIX(V128_U16), TestIntrinsics) {
203 TestSimd1Arg<c_v128, uint16_t>(kIterations, mask, maskwidth, name);
204 }
205
MY_TEST_P(ARCH_POSTFIX (V128_U32),TestIntrinsics)206 MY_TEST_P(ARCH_POSTFIX(V128_U32), TestIntrinsics) {
207 TestSimd1Arg<c_v128, uint32_t>(kIterations, mask, maskwidth, name);
208 }
209
MY_TEST_P(ARCH_POSTFIX (V128_U64),TestIntrinsics)210 MY_TEST_P(ARCH_POSTFIX(V128_U64), TestIntrinsics) {
211 TestSimd1Arg<c_v128, uint64_t>(kIterations, mask, maskwidth, name);
212 }
213
MY_TEST_P(ARCH_POSTFIX (V128_V64),TestIntrinsics)214 MY_TEST_P(ARCH_POSTFIX(V128_V64), TestIntrinsics) {
215 TestSimd1Arg<c_v128, c_v64>(kIterations, mask, maskwidth, name);
216 }
217
MY_TEST_P(ARCH_POSTFIX (V128_V128V128),TestIntrinsics)218 MY_TEST_P(ARCH_POSTFIX(V128_V128V128), TestIntrinsics) {
219 TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
220 }
221
MY_TEST_P(ARCH_POSTFIX (V128_V128V128V128),TestIntrinsics)222 MY_TEST_P(ARCH_POSTFIX(V128_V128V128V128), TestIntrinsics) {
223 TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(kIterations, mask, maskwidth,
224 name);
225 }
226
MY_TEST_P(ARCH_POSTFIX (U32_V128V128),TestIntrinsics)227 MY_TEST_P(ARCH_POSTFIX(U32_V128V128), TestIntrinsics) {
228 TestSimd2Args<uint32_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
229 }
230
MY_TEST_P(ARCH_POSTFIX (U64_V128V128),TestIntrinsics)231 MY_TEST_P(ARCH_POSTFIX(U64_V128V128), TestIntrinsics) {
232 TestSimd2Args<uint64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
233 }
234
MY_TEST_P(ARCH_POSTFIX (S64_V128V128),TestIntrinsics)235 MY_TEST_P(ARCH_POSTFIX(S64_V128V128), TestIntrinsics) {
236 TestSimd2Args<int64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
237 }
238
MY_TEST_P(ARCH_POSTFIX (V128_U64U64),TestIntrinsics)239 MY_TEST_P(ARCH_POSTFIX(V128_U64U64), TestIntrinsics) {
240 TestSimd2Args<c_v128, uint64_t, uint64_t>(kIterations, mask, maskwidth, name);
241 }
242
MY_TEST_P(ARCH_POSTFIX (V128_V64V64),TestIntrinsics)243 MY_TEST_P(ARCH_POSTFIX(V128_V64V64), TestIntrinsics) {
244 TestSimd2Args<c_v128, c_v64, c_v64>(kIterations, mask, maskwidth, name);
245 }
246
MY_TEST_P(ARCH_POSTFIX (V128_V128U32),TestIntrinsics)247 MY_TEST_P(ARCH_POSTFIX(V128_V128U32), TestIntrinsics) {
248 TestSimd2Args<c_v128, c_v128, uint32_t>(kIterations, mask, maskwidth, name);
249 }
250
MY_TEST_P(ARCH_POSTFIX (V128_V128V128_Part2),TestIntrinsics)251 MY_TEST_P(ARCH_POSTFIX(V128_V128V128_Part2), TestIntrinsics) {
252 TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
253 }
254
MY_TEST_P(ARCH_POSTFIX (V128_V128_Part2),TestIntrinsics)255 MY_TEST_P(ARCH_POSTFIX(V128_V128_Part2), TestIntrinsics) {
256 TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
257 }
258
MY_TEST_P(ARCH_POSTFIX (V128_V128_Part3),TestIntrinsics)259 MY_TEST_P(ARCH_POSTFIX(V128_V128_Part3), TestIntrinsics) {
260 TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
261 }
262
MY_TEST_P(ARCH_POSTFIX (V128_V128_Part4),TestIntrinsics)263 MY_TEST_P(ARCH_POSTFIX(V128_V128_Part4), TestIntrinsics) {
264 TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
265 }
266
MY_TEST_P(ARCH_POSTFIX (U64_V256),TestIntrinsics)267 MY_TEST_P(ARCH_POSTFIX(U64_V256), TestIntrinsics) {
268 TestSimd1Arg<uint64_t, c_v256>(kIterations, mask, maskwidth, name);
269 }
270
MY_TEST_P(ARCH_POSTFIX (V256_V256),TestIntrinsics)271 MY_TEST_P(ARCH_POSTFIX(V256_V256), TestIntrinsics) {
272 TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
273 }
274
MY_TEST_P(ARCH_POSTFIX (V256_V128),TestIntrinsics)275 MY_TEST_P(ARCH_POSTFIX(V256_V128), TestIntrinsics) {
276 TestSimd1Arg<c_v256, c_v128>(kIterations, mask, maskwidth, name);
277 }
278
MY_TEST_P(ARCH_POSTFIX (V256_V256V256),TestIntrinsics)279 MY_TEST_P(ARCH_POSTFIX(V256_V256V256), TestIntrinsics) {
280 TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
281 }
282
MY_TEST_P(ARCH_POSTFIX (V256_V256V256V256),TestIntrinsics)283 MY_TEST_P(ARCH_POSTFIX(V256_V256V256V256), TestIntrinsics) {
284 TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(kIterations, mask, maskwidth,
285 name);
286 }
287
MY_TEST_P(ARCH_POSTFIX (V256_V128V128),TestIntrinsics)288 MY_TEST_P(ARCH_POSTFIX(V256_V128V128), TestIntrinsics) {
289 TestSimd2Args<c_v256, c_v128, c_v128>(kIterations, mask, maskwidth, name);
290 }
291
MY_TEST_P(ARCH_POSTFIX (U32_V256V256),TestIntrinsics)292 MY_TEST_P(ARCH_POSTFIX(U32_V256V256), TestIntrinsics) {
293 TestSimd2Args<uint32_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
294 }
295
MY_TEST_P(ARCH_POSTFIX (U64_V256V256),TestIntrinsics)296 MY_TEST_P(ARCH_POSTFIX(U64_V256V256), TestIntrinsics) {
297 TestSimd2Args<uint64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
298 }
299
MY_TEST_P(ARCH_POSTFIX (S64_V256V256),TestIntrinsics)300 MY_TEST_P(ARCH_POSTFIX(S64_V256V256), TestIntrinsics) {
301 TestSimd2Args<int64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
302 }
303
MY_TEST_P(ARCH_POSTFIX (V256_V256V256_Part2),TestIntrinsics)304 MY_TEST_P(ARCH_POSTFIX(V256_V256V256_Part2), TestIntrinsics) {
305 TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
306 }
307
MY_TEST_P(ARCH_POSTFIX (V256_V256U32),TestIntrinsics)308 MY_TEST_P(ARCH_POSTFIX(V256_V256U32), TestIntrinsics) {
309 TestSimd2Args<c_v256, c_v256, uint32_t>(kIterations, mask, maskwidth, name);
310 }
311
MY_TEST_P(ARCH_POSTFIX (V256_V256_Part2),TestIntrinsics)312 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part2), TestIntrinsics) {
313 TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
314 }
315
MY_TEST_P(ARCH_POSTFIX (V256_V256_Part3),TestIntrinsics)316 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part3), TestIntrinsics) {
317 TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
318 }
319
MY_TEST_P(ARCH_POSTFIX (V256_V256_Part4),TestIntrinsics)320 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part4), TestIntrinsics) {
321 TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
322 }
323
MY_TEST_P(ARCH_POSTFIX (V256_V256_Part5),TestIntrinsics)324 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part5), TestIntrinsics) {
325 TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
326 }
327
MY_TEST_P(ARCH_POSTFIX (V256_U8),TestIntrinsics)328 MY_TEST_P(ARCH_POSTFIX(V256_U8), TestIntrinsics) {
329 TestSimd1Arg<c_v256, uint8_t>(kIterations, mask, maskwidth, name);
330 }
331
MY_TEST_P(ARCH_POSTFIX (V256_U16),TestIntrinsics)332 MY_TEST_P(ARCH_POSTFIX(V256_U16), TestIntrinsics) {
333 TestSimd1Arg<c_v256, uint16_t>(kIterations, mask, maskwidth, name);
334 }
335
MY_TEST_P(ARCH_POSTFIX (V256_U32),TestIntrinsics)336 MY_TEST_P(ARCH_POSTFIX(V256_U32), TestIntrinsics) {
337 TestSimd1Arg<c_v256, uint32_t>(kIterations, mask, maskwidth, name);
338 }
339
MY_TEST_P(ARCH_POSTFIX (V256_U64),TestIntrinsics)340 MY_TEST_P(ARCH_POSTFIX(V256_U64), TestIntrinsics) {
341 TestSimd1Arg<c_v256, uint64_t>(kIterations, mask, maskwidth, name);
342 }
343
MY_TEST_P(ARCH_POSTFIX (U32_V256),TestIntrinsics)344 MY_TEST_P(ARCH_POSTFIX(U32_V256), TestIntrinsics) {
345 TestSimd1Arg<uint32_t, c_v256>(kIterations, mask, maskwidth, name);
346 }
347
MY_TEST_P(ARCH_POSTFIX (V64_V256),TestIntrinsics)348 MY_TEST_P(ARCH_POSTFIX(V64_V256), TestIntrinsics) {
349 TestSimd1Arg<c_v64, c_v256>(kIterations, mask, maskwidth, name);
350 }
351
352 // Add a macro layer since INSTANTIATE_TEST_SUITE_P will quote the name
353 // so we need to expand it first with the prefix
354 #define INSTANTIATE(name, type, ...) \
355 INSTANTIATE_TEST_SUITE_P(name, type, ::testing::Values(__VA_ARGS__))
356
357 #define SIMD_TUPLE(name, mask, maskwidth) \
358 std::make_tuple(mask, maskwidth, static_cast<const char *>(#name))
359
360 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64V64), SIMD_TUPLE(v64_sad_u8, 0U, 0U),
361 SIMD_TUPLE(v64_ssd_u8, 0U, 0U));
362
363 INSTANTIATE(
364 ARCH, ARCH_POSTFIX(V64_V64V64), SIMD_TUPLE(v64_add_8, 0U, 0U),
365 SIMD_TUPLE(v64_add_16, 0U, 0U), SIMD_TUPLE(v64_sadd_s16, 0U, 0U),
366 SIMD_TUPLE(v64_add_32, 0U, 0U), SIMD_TUPLE(v64_sub_8, 0U, 0U),
367 SIMD_TUPLE(v64_ssub_u8, 0U, 0U), SIMD_TUPLE(v64_ssub_s8, 0U, 0U),
368 SIMD_TUPLE(v64_sub_16, 0U, 0U), SIMD_TUPLE(v64_ssub_s16, 0U, 0U),
369 SIMD_TUPLE(v64_ssub_u16, 0U, 0U), SIMD_TUPLE(v64_sub_32, 0U, 0U),
370 SIMD_TUPLE(v64_ziplo_8, 0U, 0U), SIMD_TUPLE(v64_ziphi_8, 0U, 0U),
371 SIMD_TUPLE(v64_ziplo_16, 0U, 0U), SIMD_TUPLE(v64_ziphi_16, 0U, 0U),
372 SIMD_TUPLE(v64_ziplo_32, 0U, 0U), SIMD_TUPLE(v64_ziphi_32, 0U, 0U),
373 SIMD_TUPLE(v64_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v64_pack_s16_u8, 0U, 0U),
374 SIMD_TUPLE(v64_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v64_unziphi_8, 0U, 0U),
375 SIMD_TUPLE(v64_unziplo_8, 0U, 0U), SIMD_TUPLE(v64_unziphi_16, 0U, 0U),
376 SIMD_TUPLE(v64_unziplo_16, 0U, 0U), SIMD_TUPLE(v64_or, 0U, 0U),
377 SIMD_TUPLE(v64_xor, 0U, 0U), SIMD_TUPLE(v64_and, 0U, 0U),
378 SIMD_TUPLE(v64_andn, 0U, 0U), SIMD_TUPLE(v64_mullo_s16, 0U, 0U),
379 SIMD_TUPLE(v64_mulhi_s16, 0U, 0U), SIMD_TUPLE(v64_mullo_s32, 0U, 0U),
380 SIMD_TUPLE(v64_madd_s16, 0U, 0U), SIMD_TUPLE(v64_madd_us8, 0U, 0U),
381 SIMD_TUPLE(v64_avg_u8, 0U, 0U), SIMD_TUPLE(v64_rdavg_u8, 0U, 0U),
382 SIMD_TUPLE(v64_avg_u16, 0U, 0U), SIMD_TUPLE(v64_min_u8, 0U, 0U),
383 SIMD_TUPLE(v64_max_u8, 0U, 0U), SIMD_TUPLE(v64_min_s8, 0U, 0U),
384 SIMD_TUPLE(v64_max_s8, 0U, 0U), SIMD_TUPLE(v64_min_s16, 0U, 0U),
385 SIMD_TUPLE(v64_max_s16, 0U, 0U), SIMD_TUPLE(v64_cmpgt_s8, 0U, 0U),
386 SIMD_TUPLE(v64_cmplt_s8, 0U, 0U), SIMD_TUPLE(v64_cmpeq_8, 0U, 0U),
387 SIMD_TUPLE(v64_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v64_cmplt_s16, 0U, 0U),
388 SIMD_TUPLE(v64_cmpeq_16, 0U, 0U));
389
390 INSTANTIATE(
391 ARCH, ARCH_POSTFIX(V64_V64V64_Part2), SIMD_TUPLE(v64_shuffle_8, 7U, 8U),
392 SIMD_TUPLE(v64_pack_s32_u16, 0U, 0U), SIMD_TUPLE(v64_rdavg_u16, 0U, 0U),
393 SIMD_TUPLE(v64_sadd_s8, 0U, 0U), SIMD_TUPLE(v64_sadd_u8, 0U, 0U),
394 SIMD_TUPLE(imm_v64_align<1>, 0U, 0U), SIMD_TUPLE(imm_v64_align<2>, 0U, 0U),
395 SIMD_TUPLE(imm_v64_align<3>, 0U, 0U), SIMD_TUPLE(imm_v64_align<4>, 0U, 0U),
396 SIMD_TUPLE(imm_v64_align<5>, 0U, 0U), SIMD_TUPLE(imm_v64_align<6>, 0U, 0U),
397 SIMD_TUPLE(imm_v64_align<7>, 0U, 0U));
398
399 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64), SIMD_TUPLE(v64_abs_s8, 0U, 0U),
400 SIMD_TUPLE(v64_abs_s16, 0U, 0U),
401 SIMD_TUPLE(v64_unpacklo_u8_s16, 0U, 0U),
402 SIMD_TUPLE(v64_unpackhi_u8_s16, 0U, 0U),
403 SIMD_TUPLE(v64_unpacklo_s8_s16, 0U, 0U),
404 SIMD_TUPLE(v64_unpackhi_s8_s16, 0U, 0U),
405 SIMD_TUPLE(v64_unpacklo_u16_s32, 0U, 0U),
406 SIMD_TUPLE(v64_unpacklo_s16_s32, 0U, 0U),
407 SIMD_TUPLE(v64_unpackhi_u16_s32, 0U, 0U),
408 SIMD_TUPLE(v64_unpackhi_s16_s32, 0U, 0U),
409 SIMD_TUPLE(imm_v64_shr_n_byte<1>, 0U, 0U),
410 SIMD_TUPLE(imm_v64_shr_n_byte<2>, 0U, 0U),
411 SIMD_TUPLE(imm_v64_shr_n_byte<3>, 0U, 0U),
412 SIMD_TUPLE(imm_v64_shr_n_byte<4>, 0U, 0U),
413 SIMD_TUPLE(imm_v64_shr_n_byte<5>, 0U, 0U),
414 SIMD_TUPLE(imm_v64_shr_n_byte<6>, 0U, 0U),
415 SIMD_TUPLE(imm_v64_shr_n_byte<7>, 0U, 0U),
416 SIMD_TUPLE(imm_v64_shl_n_byte<1>, 0U, 0U),
417 SIMD_TUPLE(imm_v64_shl_n_byte<2>, 0U, 0U),
418 SIMD_TUPLE(imm_v64_shl_n_byte<3>, 0U, 0U),
419 SIMD_TUPLE(imm_v64_shl_n_byte<4>, 0U, 0U),
420 SIMD_TUPLE(imm_v64_shl_n_byte<5>, 0U, 0U),
421 SIMD_TUPLE(imm_v64_shl_n_byte<6>, 0U, 0U),
422 SIMD_TUPLE(imm_v64_shl_n_byte<7>, 0U, 0U),
423 SIMD_TUPLE(imm_v64_shl_n_8<1>, 0U, 0U),
424 SIMD_TUPLE(imm_v64_shl_n_8<2>, 0U, 0U),
425 SIMD_TUPLE(imm_v64_shl_n_8<3>, 0U, 0U),
426 SIMD_TUPLE(imm_v64_shl_n_8<4>, 0U, 0U),
427 SIMD_TUPLE(imm_v64_shl_n_8<5>, 0U, 0U),
428 SIMD_TUPLE(imm_v64_shl_n_8<6>, 0U, 0U),
429 SIMD_TUPLE(imm_v64_shl_n_8<7>, 0U, 0U),
430 SIMD_TUPLE(imm_v64_shr_n_u8<1>, 0U, 0U),
431 SIMD_TUPLE(imm_v64_shr_n_u8<2>, 0U, 0U),
432 SIMD_TUPLE(imm_v64_shr_n_u8<3>, 0U, 0U),
433 SIMD_TUPLE(imm_v64_shr_n_u8<4>, 0U, 0U),
434 SIMD_TUPLE(imm_v64_shr_n_u8<5>, 0U, 0U),
435 SIMD_TUPLE(imm_v64_shr_n_u8<6>, 0U, 0U),
436 SIMD_TUPLE(imm_v64_shr_n_u8<7>, 0U, 0U),
437 SIMD_TUPLE(imm_v64_shr_n_s8<1>, 0U, 0U),
438 SIMD_TUPLE(imm_v64_shr_n_s8<2>, 0U, 0U),
439 SIMD_TUPLE(imm_v64_shr_n_s8<3>, 0U, 0U),
440 SIMD_TUPLE(imm_v64_shr_n_s8<4>, 0U, 0U),
441 SIMD_TUPLE(imm_v64_shr_n_s8<5>, 0U, 0U),
442 SIMD_TUPLE(imm_v64_shr_n_s8<6>, 0U, 0U),
443 SIMD_TUPLE(imm_v64_shr_n_s8<7>, 0U, 0U),
444 SIMD_TUPLE(imm_v64_shl_n_16<1>, 0U, 0U),
445 SIMD_TUPLE(imm_v64_shl_n_16<2>, 0U, 0U),
446 SIMD_TUPLE(imm_v64_shl_n_16<4>, 0U, 0U),
447 SIMD_TUPLE(imm_v64_shl_n_16<6>, 0U, 0U),
448 SIMD_TUPLE(imm_v64_shl_n_16<8>, 0U, 0U));
449
450 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64_Part2),
451 SIMD_TUPLE(imm_v64_shl_n_16<10>, 0U, 0U),
452 SIMD_TUPLE(imm_v64_shl_n_16<12>, 0U, 0U),
453 SIMD_TUPLE(imm_v64_shl_n_16<14>, 0U, 0U),
454 SIMD_TUPLE(imm_v64_shr_n_u16<1>, 0U, 0U),
455 SIMD_TUPLE(imm_v64_shr_n_u16<2>, 0U, 0U),
456 SIMD_TUPLE(imm_v64_shr_n_u16<4>, 0U, 0U),
457 SIMD_TUPLE(imm_v64_shr_n_u16<6>, 0U, 0U),
458 SIMD_TUPLE(imm_v64_shr_n_u16<8>, 0U, 0U),
459 SIMD_TUPLE(imm_v64_shr_n_u16<10>, 0U, 0U),
460 SIMD_TUPLE(imm_v64_shr_n_u16<12>, 0U, 0U),
461 SIMD_TUPLE(imm_v64_shr_n_u16<14>, 0U, 0U),
462 SIMD_TUPLE(imm_v64_shr_n_s16<1>, 0U, 0U),
463 SIMD_TUPLE(imm_v64_shr_n_s16<2>, 0U, 0U),
464 SIMD_TUPLE(imm_v64_shr_n_s16<4>, 0U, 0U),
465 SIMD_TUPLE(imm_v64_shr_n_s16<6>, 0U, 0U),
466 SIMD_TUPLE(imm_v64_shr_n_s16<8>, 0U, 0U),
467 SIMD_TUPLE(imm_v64_shr_n_s16<10>, 0U, 0U),
468 SIMD_TUPLE(imm_v64_shr_n_s16<12>, 0U, 0U),
469 SIMD_TUPLE(imm_v64_shr_n_s16<14>, 0U, 0U),
470 SIMD_TUPLE(imm_v64_shl_n_32<1>, 0U, 0U),
471 SIMD_TUPLE(imm_v64_shl_n_32<4>, 0U, 0U),
472 SIMD_TUPLE(imm_v64_shl_n_32<8>, 0U, 0U),
473 SIMD_TUPLE(imm_v64_shl_n_32<12>, 0U, 0U),
474 SIMD_TUPLE(imm_v64_shl_n_32<16>, 0U, 0U),
475 SIMD_TUPLE(imm_v64_shl_n_32<20>, 0U, 0U),
476 SIMD_TUPLE(imm_v64_shl_n_32<24>, 0U, 0U),
477 SIMD_TUPLE(imm_v64_shl_n_32<28>, 0U, 0U),
478 SIMD_TUPLE(imm_v64_shr_n_u32<1>, 0U, 0U),
479 SIMD_TUPLE(imm_v64_shr_n_u32<4>, 0U, 0U),
480 SIMD_TUPLE(imm_v64_shr_n_u32<8>, 0U, 0U),
481 SIMD_TUPLE(imm_v64_shr_n_u32<12>, 0U, 0U),
482 SIMD_TUPLE(imm_v64_shr_n_u32<16>, 0U, 0U),
483 SIMD_TUPLE(imm_v64_shr_n_u32<20>, 0U, 0U),
484 SIMD_TUPLE(imm_v64_shr_n_u32<24>, 0U, 0U),
485 SIMD_TUPLE(imm_v64_shr_n_u32<28>, 0U, 0U),
486 SIMD_TUPLE(imm_v64_shr_n_s32<1>, 0U, 0U),
487 SIMD_TUPLE(imm_v64_shr_n_s32<4>, 0U, 0U),
488 SIMD_TUPLE(imm_v64_shr_n_s32<8>, 0U, 0U),
489 SIMD_TUPLE(imm_v64_shr_n_s32<12>, 0U, 0U),
490 SIMD_TUPLE(imm_v64_shr_n_s32<16>, 0U, 0U),
491 SIMD_TUPLE(imm_v64_shr_n_s32<20>, 0U, 0U),
492 SIMD_TUPLE(imm_v64_shr_n_s32<24>, 0U, 0U),
493 SIMD_TUPLE(imm_v64_shr_n_s32<28>, 0U, 0U));
494
495 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64U32), SIMD_TUPLE(v64_shl_8, 7U, 32U),
496 SIMD_TUPLE(v64_shr_u8, 7U, 32U), SIMD_TUPLE(v64_shr_s8, 7U, 32U),
497 SIMD_TUPLE(v64_shl_16, 15U, 32U), SIMD_TUPLE(v64_shr_u16, 15U, 32U),
498 SIMD_TUPLE(v64_shr_s16, 15U, 32U), SIMD_TUPLE(v64_shl_32, 31U, 32U),
499 SIMD_TUPLE(v64_shr_u32, 31U, 32U),
500 SIMD_TUPLE(v64_shr_s32, 31U, 32U));
501
502 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V64), SIMD_TUPLE(v64_hadd_u8, 0U, 0U),
503 SIMD_TUPLE(v64_u64, 0U, 0U));
504
505 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64), SIMD_TUPLE(v64_hadd_s16, 0U, 0U));
506
507 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64), SIMD_TUPLE(v64_low_u32, 0U, 0U),
508 SIMD_TUPLE(v64_high_u32, 0U, 0U));
509
510 INSTANTIATE(ARCH, ARCH_POSTFIX(S32_V64), SIMD_TUPLE(v64_low_s32, 0U, 0U),
511 SIMD_TUPLE(v64_high_s32, 0U, 0U));
512
513 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64V64), SIMD_TUPLE(v64_dotp_s16, 0U, 0U),
514 SIMD_TUPLE(v64_dotp_su8, 0U, 0U));
515
516 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U8), SIMD_TUPLE(v64_dup_8, 0U, 0U));
517
518 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U16), SIMD_TUPLE(v64_dup_16, 0U, 0U));
519
520 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32), SIMD_TUPLE(v64_dup_32, 0U, 0U));
521
522 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32U32), SIMD_TUPLE(v64_from_32, 0U, 0U));
523
524 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128V128), SIMD_TUPLE(v128_sad_u8, 0U, 0U),
525 SIMD_TUPLE(v128_ssd_u8, 0U, 0U), SIMD_TUPLE(v128_sad_u16, 0U, 0U));
526 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128V128), SIMD_TUPLE(v128_ssd_s16, 0U, 0U));
527
528 INSTANTIATE(
529 ARCH, ARCH_POSTFIX(V128_V128V128), SIMD_TUPLE(v128_add_8, 0U, 0U),
530 SIMD_TUPLE(v128_add_16, 0U, 0U), SIMD_TUPLE(v128_sadd_s16, 0U, 0U),
531 SIMD_TUPLE(v128_add_32, 0U, 0U), SIMD_TUPLE(v128_sub_8, 0U, 0U),
532 SIMD_TUPLE(v128_ssub_u8, 0U, 0U), SIMD_TUPLE(v128_ssub_s8, 0U, 0U),
533 SIMD_TUPLE(v128_sub_16, 0U, 0U), SIMD_TUPLE(v128_ssub_s16, 0U, 0U),
534 SIMD_TUPLE(v128_ssub_u16, 0U, 0U), SIMD_TUPLE(v128_sub_32, 0U, 0U),
535 SIMD_TUPLE(v128_ziplo_8, 0U, 0U), SIMD_TUPLE(v128_ziphi_8, 0U, 0U),
536 SIMD_TUPLE(v128_ziplo_16, 0U, 0U), SIMD_TUPLE(v128_ziphi_16, 0U, 0U),
537 SIMD_TUPLE(v128_ziplo_32, 0U, 0U), SIMD_TUPLE(v128_ziphi_32, 0U, 0U),
538 SIMD_TUPLE(v128_ziplo_64, 0U, 0U), SIMD_TUPLE(v128_ziphi_64, 0U, 0U),
539 SIMD_TUPLE(v128_unziphi_8, 0U, 0U), SIMD_TUPLE(v128_unziplo_8, 0U, 0U),
540 SIMD_TUPLE(v128_unziphi_16, 0U, 0U), SIMD_TUPLE(v128_unziplo_16, 0U, 0U),
541 SIMD_TUPLE(v128_unziphi_32, 0U, 0U), SIMD_TUPLE(v128_unziplo_32, 0U, 0U),
542 SIMD_TUPLE(v128_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v128_pack_s16_u8, 0U, 0U),
543 SIMD_TUPLE(v128_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v128_or, 0U, 0U),
544 SIMD_TUPLE(v128_xor, 0U, 0U), SIMD_TUPLE(v128_and, 0U, 0U),
545 SIMD_TUPLE(v128_andn, 0U, 0U), SIMD_TUPLE(v128_mullo_s16, 0U, 0U),
546 SIMD_TUPLE(v128_mulhi_s16, 0U, 0U), SIMD_TUPLE(v128_mullo_s32, 0U, 0U),
547 SIMD_TUPLE(v128_madd_s16, 0U, 0U), SIMD_TUPLE(v128_madd_us8, 0U, 0U),
548 SIMD_TUPLE(v128_avg_u8, 0U, 0U), SIMD_TUPLE(v128_rdavg_u8, 0U, 0U),
549 SIMD_TUPLE(v128_avg_u16, 0U, 0U), SIMD_TUPLE(v128_min_u8, 0U, 0U),
550 SIMD_TUPLE(v128_max_u8, 0U, 0U), SIMD_TUPLE(v128_min_s8, 0U, 0U),
551 SIMD_TUPLE(v128_max_s8, 0U, 0U), SIMD_TUPLE(v128_min_s16, 0U, 0U),
552 SIMD_TUPLE(v128_max_s16, 0U, 0U), SIMD_TUPLE(v128_cmpgt_s8, 0U, 0U),
553 SIMD_TUPLE(v128_cmplt_s8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_8, 0U, 0U),
554 SIMD_TUPLE(v128_cmpgt_s16, 0U, 0U));
555
556 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2),
557 SIMD_TUPLE(v128_pack_s32_u16, 0U, 0U),
558 SIMD_TUPLE(v128_rdavg_u16, 0U, 0U), SIMD_TUPLE(v128_add_64, 0U, 0U),
559 SIMD_TUPLE(v128_sub_64, 0U, 0U), SIMD_TUPLE(v128_sadd_s8, 0U, 0U),
560 SIMD_TUPLE(v128_sadd_u8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_16, 0U, 0U),
561 SIMD_TUPLE(v128_cmplt_s16, 0U, 0U),
562 SIMD_TUPLE(v128_cmplt_s32, 0U, 0U),
563 SIMD_TUPLE(v128_cmpeq_32, 0U, 0U),
564 SIMD_TUPLE(v128_cmpgt_s32, 0U, 0U),
565 SIMD_TUPLE(v128_shuffle_8, 15U, 8U),
566 SIMD_TUPLE(v128_min_s32, 0U, 0U), SIMD_TUPLE(v128_max_s32, 0U, 0U),
567 SIMD_TUPLE(imm_v128_align<1>, 0U, 0U),
568 SIMD_TUPLE(imm_v128_align<2>, 0U, 0U),
569 SIMD_TUPLE(imm_v128_align<3>, 0U, 0U),
570 SIMD_TUPLE(imm_v128_align<4>, 0U, 0U),
571 SIMD_TUPLE(imm_v128_align<5>, 0U, 0U),
572 SIMD_TUPLE(imm_v128_align<6>, 0U, 0U),
573 SIMD_TUPLE(imm_v128_align<7>, 0U, 0U),
574 SIMD_TUPLE(imm_v128_align<8>, 0U, 0U),
575 SIMD_TUPLE(imm_v128_align<9>, 0U, 0U),
576 SIMD_TUPLE(imm_v128_align<10>, 0U, 0U),
577 SIMD_TUPLE(imm_v128_align<11>, 0U, 0U),
578 SIMD_TUPLE(imm_v128_align<12>, 0U, 0U),
579 SIMD_TUPLE(imm_v128_align<13>, 0U, 0U),
580 SIMD_TUPLE(imm_v128_align<14>, 0U, 0U),
581 SIMD_TUPLE(imm_v128_align<15>, 0U, 0U));
582
583 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128V128),
584 SIMD_TUPLE(v128_blend_8, 0U, 0U));
585
586 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s8, 0U, 0U),
587 SIMD_TUPLE(v128_abs_s16, 0U, 0U), SIMD_TUPLE(v128_padd_s16, 0U, 0U),
588 SIMD_TUPLE(v128_unpacklo_u8_s16, 0U, 0U),
589 SIMD_TUPLE(v128_unpacklo_s8_s16, 0U, 0U),
590 SIMD_TUPLE(v128_unpacklo_u16_s32, 0U, 0U),
591 SIMD_TUPLE(v128_unpacklo_s16_s32, 0U, 0U),
592 SIMD_TUPLE(v128_unpackhi_u8_s16, 0U, 0U),
593 SIMD_TUPLE(v128_unpackhi_s8_s16, 0U, 0U),
594 SIMD_TUPLE(v128_unpackhi_u16_s32, 0U, 0U),
595 SIMD_TUPLE(v128_unpackhi_s16_s32, 0U, 0U),
596 SIMD_TUPLE(imm_v128_shr_n_byte<1>, 0U, 0U),
597 SIMD_TUPLE(imm_v128_shr_n_byte<2>, 0U, 0U),
598 SIMD_TUPLE(imm_v128_shr_n_byte<3>, 0U, 0U),
599 SIMD_TUPLE(imm_v128_shr_n_byte<4>, 0U, 0U),
600 SIMD_TUPLE(imm_v128_shr_n_byte<5>, 0U, 0U),
601 SIMD_TUPLE(imm_v128_shr_n_byte<6>, 0U, 0U),
602 SIMD_TUPLE(imm_v128_shr_n_byte<7>, 0U, 0U),
603 SIMD_TUPLE(imm_v128_shr_n_byte<8>, 0U, 0U),
604 SIMD_TUPLE(imm_v128_shr_n_byte<9>, 0U, 0U),
605 SIMD_TUPLE(imm_v128_shr_n_byte<10>, 0U, 0U),
606 SIMD_TUPLE(imm_v128_shr_n_byte<11>, 0U, 0U),
607 SIMD_TUPLE(imm_v128_shr_n_byte<12>, 0U, 0U),
608 SIMD_TUPLE(imm_v128_shr_n_byte<13>, 0U, 0U),
609 SIMD_TUPLE(imm_v128_shr_n_byte<14>, 0U, 0U),
610 SIMD_TUPLE(imm_v128_shr_n_byte<15>, 0U, 0U),
611 SIMD_TUPLE(imm_v128_shl_n_byte<1>, 0U, 0U),
612 SIMD_TUPLE(imm_v128_shl_n_byte<2>, 0U, 0U),
613 SIMD_TUPLE(imm_v128_shl_n_byte<3>, 0U, 0U),
614 SIMD_TUPLE(imm_v128_shl_n_byte<4>, 0U, 0U),
615 SIMD_TUPLE(imm_v128_shl_n_byte<5>, 0U, 0U),
616 SIMD_TUPLE(imm_v128_shl_n_byte<6>, 0U, 0U),
617 SIMD_TUPLE(imm_v128_shl_n_byte<7>, 0U, 0U),
618 SIMD_TUPLE(imm_v128_shl_n_byte<8>, 0U, 0U),
619 SIMD_TUPLE(imm_v128_shl_n_byte<9>, 0U, 0U),
620 SIMD_TUPLE(imm_v128_shl_n_byte<10>, 0U, 0U),
621 SIMD_TUPLE(imm_v128_shl_n_byte<11>, 0U, 0U),
622 SIMD_TUPLE(imm_v128_shl_n_byte<12>, 0U, 0U),
623 SIMD_TUPLE(imm_v128_shl_n_byte<13>, 0U, 0U),
624 SIMD_TUPLE(imm_v128_shl_n_byte<14>, 0U, 0U),
625 SIMD_TUPLE(imm_v128_shl_n_byte<15>, 0U, 0U),
626 SIMD_TUPLE(imm_v128_shl_n_8<1>, 0U, 0U),
627 SIMD_TUPLE(imm_v128_shl_n_8<2>, 0U, 0U),
628 SIMD_TUPLE(imm_v128_shl_n_8<3>, 0U, 0U),
629 SIMD_TUPLE(imm_v128_shl_n_8<4>, 0U, 0U),
630 SIMD_TUPLE(imm_v128_shl_n_8<5>, 0U, 0U),
631 SIMD_TUPLE(imm_v128_shl_n_8<6>, 0U, 0U),
632 SIMD_TUPLE(imm_v128_shl_n_8<7>, 0U, 0U),
633 SIMD_TUPLE(imm_v128_shr_n_u8<1>, 0U, 0U));
634
635 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part2),
636 SIMD_TUPLE(imm_v128_shr_n_u8<2>, 0U, 0U),
637 SIMD_TUPLE(imm_v128_shr_n_u8<3>, 0U, 0U),
638 SIMD_TUPLE(imm_v128_shr_n_u8<4>, 0U, 0U),
639 SIMD_TUPLE(imm_v128_shr_n_u8<5>, 0U, 0U),
640 SIMD_TUPLE(imm_v128_shr_n_u8<6>, 0U, 0U),
641 SIMD_TUPLE(imm_v128_shr_n_u8<7>, 0U, 0U),
642 SIMD_TUPLE(imm_v128_shr_n_s8<1>, 0U, 0U),
643 SIMD_TUPLE(imm_v128_shr_n_s8<2>, 0U, 0U),
644 SIMD_TUPLE(imm_v128_shr_n_s8<3>, 0U, 0U),
645 SIMD_TUPLE(imm_v128_shr_n_s8<4>, 0U, 0U),
646 SIMD_TUPLE(imm_v128_shr_n_s8<5>, 0U, 0U),
647 SIMD_TUPLE(imm_v128_shr_n_s8<6>, 0U, 0U),
648 SIMD_TUPLE(imm_v128_shr_n_s8<7>, 0U, 0U),
649 SIMD_TUPLE(imm_v128_shl_n_16<1>, 0U, 0U),
650 SIMD_TUPLE(imm_v128_shl_n_16<2>, 0U, 0U),
651 SIMD_TUPLE(imm_v128_shl_n_16<4>, 0U, 0U),
652 SIMD_TUPLE(imm_v128_shl_n_16<6>, 0U, 0U),
653 SIMD_TUPLE(imm_v128_shl_n_16<8>, 0U, 0U),
654 SIMD_TUPLE(imm_v128_shl_n_16<10>, 0U, 0U),
655 SIMD_TUPLE(imm_v128_shl_n_16<12>, 0U, 0U),
656 SIMD_TUPLE(imm_v128_shl_n_16<14>, 0U, 0U),
657 SIMD_TUPLE(imm_v128_shr_n_u16<1>, 0U, 0U),
658 SIMD_TUPLE(imm_v128_shr_n_u16<2>, 0U, 0U),
659 SIMD_TUPLE(imm_v128_shr_n_u16<4>, 0U, 0U),
660 SIMD_TUPLE(imm_v128_shr_n_u16<6>, 0U, 0U),
661 SIMD_TUPLE(imm_v128_shr_n_u16<8>, 0U, 0U),
662 SIMD_TUPLE(imm_v128_shr_n_u16<10>, 0U, 0U),
663 SIMD_TUPLE(imm_v128_shr_n_u16<12>, 0U, 0U),
664 SIMD_TUPLE(imm_v128_shr_n_u16<14>, 0U, 0U),
665 SIMD_TUPLE(imm_v128_shr_n_s16<1>, 0U, 0U),
666 SIMD_TUPLE(imm_v128_shr_n_s16<2>, 0U, 0U),
667 SIMD_TUPLE(imm_v128_shr_n_s16<4>, 0U, 0U),
668 SIMD_TUPLE(imm_v128_shr_n_s16<6>, 0U, 0U),
669 SIMD_TUPLE(imm_v128_shr_n_s16<8>, 0U, 0U),
670 SIMD_TUPLE(imm_v128_shr_n_s16<10>, 0U, 0U),
671 SIMD_TUPLE(imm_v128_shr_n_s16<12>, 0U, 0U),
672 SIMD_TUPLE(imm_v128_shr_n_s16<14>, 0U, 0U),
673 SIMD_TUPLE(imm_v128_shl_n_32<1>, 0U, 0U),
674 SIMD_TUPLE(imm_v128_shl_n_32<4>, 0U, 0U),
675 SIMD_TUPLE(imm_v128_shl_n_32<8>, 0U, 0U),
676 SIMD_TUPLE(imm_v128_shl_n_32<12>, 0U, 0U),
677 SIMD_TUPLE(imm_v128_shl_n_32<16>, 0U, 0U),
678 SIMD_TUPLE(imm_v128_shl_n_32<20>, 0U, 0U),
679 SIMD_TUPLE(imm_v128_shl_n_32<24>, 0U, 0U),
680 SIMD_TUPLE(imm_v128_shl_n_32<28>, 0U, 0U),
681 SIMD_TUPLE(imm_v128_shr_n_u32<1>, 0U, 0U),
682 SIMD_TUPLE(imm_v128_shr_n_u32<4>, 0U, 0U));
683
684 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part3),
685 SIMD_TUPLE(imm_v128_shr_n_u32<8>, 0U, 0U),
686 SIMD_TUPLE(imm_v128_shr_n_u32<12>, 0U, 0U),
687 SIMD_TUPLE(imm_v128_shr_n_u32<16>, 0U, 0U),
688 SIMD_TUPLE(imm_v128_shr_n_u32<20>, 0U, 0U),
689 SIMD_TUPLE(imm_v128_shr_n_u32<24>, 0U, 0U),
690 SIMD_TUPLE(imm_v128_shr_n_u32<28>, 0U, 0U),
691 SIMD_TUPLE(imm_v128_shr_n_s32<1>, 0U, 0U),
692 SIMD_TUPLE(imm_v128_shr_n_s32<4>, 0U, 0U),
693 SIMD_TUPLE(imm_v128_shr_n_s32<8>, 0U, 0U),
694 SIMD_TUPLE(imm_v128_shr_n_s32<12>, 0U, 0U),
695 SIMD_TUPLE(imm_v128_shr_n_s32<16>, 0U, 0U),
696 SIMD_TUPLE(imm_v128_shr_n_s32<20>, 0U, 0U),
697 SIMD_TUPLE(imm_v128_shr_n_s32<24>, 0U, 0U),
698 SIMD_TUPLE(imm_v128_shr_n_s32<28>, 0U, 0U));
699
700 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part4),
701 SIMD_TUPLE(imm_v128_shl_n_64<1>, 0U, 0U),
702 SIMD_TUPLE(imm_v128_shl_n_64<4>, 0U, 0U),
703 SIMD_TUPLE(imm_v128_shl_n_64<8>, 0U, 0U),
704 SIMD_TUPLE(imm_v128_shl_n_64<12>, 0U, 0U),
705 SIMD_TUPLE(imm_v128_shl_n_64<16>, 0U, 0U),
706 SIMD_TUPLE(imm_v128_shl_n_64<20>, 0U, 0U),
707 SIMD_TUPLE(imm_v128_shl_n_64<24>, 0U, 0U),
708 SIMD_TUPLE(imm_v128_shl_n_64<28>, 0U, 0U),
709 SIMD_TUPLE(imm_v128_shl_n_64<32>, 0U, 0U),
710 SIMD_TUPLE(imm_v128_shl_n_64<36>, 0U, 0U),
711 SIMD_TUPLE(imm_v128_shl_n_64<40>, 0U, 0U),
712 SIMD_TUPLE(imm_v128_shl_n_64<44>, 0U, 0U),
713 SIMD_TUPLE(imm_v128_shl_n_64<48>, 0U, 0U),
714 SIMD_TUPLE(imm_v128_shl_n_64<52>, 0U, 0U),
715 SIMD_TUPLE(imm_v128_shl_n_64<56>, 0U, 0U),
716 SIMD_TUPLE(imm_v128_shl_n_64<60>, 0U, 0U),
717 SIMD_TUPLE(imm_v128_shr_n_u64<1>, 0U, 0U),
718 SIMD_TUPLE(imm_v128_shr_n_u64<4>, 0U, 0U),
719 SIMD_TUPLE(imm_v128_shr_n_u64<8>, 0U, 0U),
720 SIMD_TUPLE(imm_v128_shr_n_u64<12>, 0U, 0U),
721 SIMD_TUPLE(imm_v128_shr_n_u64<16>, 0U, 0U),
722 SIMD_TUPLE(imm_v128_shr_n_u64<20>, 0U, 0U),
723 SIMD_TUPLE(imm_v128_shr_n_u64<24>, 0U, 0U),
724 SIMD_TUPLE(imm_v128_shr_n_u64<28>, 0U, 0U),
725 SIMD_TUPLE(imm_v128_shr_n_u64<32>, 0U, 0U),
726 SIMD_TUPLE(imm_v128_shr_n_u64<36>, 0U, 0U),
727 SIMD_TUPLE(imm_v128_shr_n_u64<40>, 0U, 0U),
728 SIMD_TUPLE(imm_v128_shr_n_u64<44>, 0U, 0U),
729 SIMD_TUPLE(imm_v128_shr_n_u64<48>, 0U, 0U),
730 SIMD_TUPLE(imm_v128_shr_n_u64<52>, 0U, 0U),
731 SIMD_TUPLE(imm_v128_shr_n_u64<56>, 0U, 0U),
732 SIMD_TUPLE(imm_v128_shr_n_u64<60>, 0U, 0U),
733 SIMD_TUPLE(imm_v128_shr_n_s64<1>, 0U, 0U),
734 SIMD_TUPLE(imm_v128_shr_n_s64<4>, 0U, 0U),
735 SIMD_TUPLE(imm_v128_shr_n_s64<8>, 0U, 0U),
736 SIMD_TUPLE(imm_v128_shr_n_s64<12>, 0U, 0U),
737 SIMD_TUPLE(imm_v128_shr_n_s64<16>, 0U, 0U),
738 SIMD_TUPLE(imm_v128_shr_n_s64<20>, 0U, 0U),
739 SIMD_TUPLE(imm_v128_shr_n_s64<24>, 0U, 0U),
740 SIMD_TUPLE(imm_v128_shr_n_s64<28>, 0U, 0U),
741 SIMD_TUPLE(imm_v128_shr_n_s64<32>, 0U, 0U),
742 SIMD_TUPLE(imm_v128_shr_n_s64<36>, 0U, 0U),
743 SIMD_TUPLE(imm_v128_shr_n_s64<40>, 0U, 0U),
744 SIMD_TUPLE(imm_v128_shr_n_s64<44>, 0U, 0U),
745 SIMD_TUPLE(imm_v128_shr_n_s64<48>, 0U, 0U),
746 SIMD_TUPLE(imm_v128_shr_n_s64<52>, 0U, 0U),
747 SIMD_TUPLE(imm_v128_shr_n_s64<56>, 0U, 0U),
748 SIMD_TUPLE(imm_v128_shr_n_s64<60>, 0U, 0U),
749 SIMD_TUPLE(v128_padd_u8, 0U, 0U));
750
751 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64V64), SIMD_TUPLE(v128_from_v64, 0U, 0U),
752 SIMD_TUPLE(v128_zip_8, 0U, 0U), SIMD_TUPLE(v128_zip_16, 0U, 0U),
753 SIMD_TUPLE(v128_zip_32, 0U, 0U), SIMD_TUPLE(v128_mul_s16, 0U, 0U));
754
755 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64U64), SIMD_TUPLE(v128_from_64, 0U, 0U));
756
757 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64),
758 SIMD_TUPLE(v128_unpack_u8_s16, 0U, 0U),
759 SIMD_TUPLE(v128_unpack_s8_s16, 0U, 0U),
760 SIMD_TUPLE(v128_unpack_u16_s32, 0U, 0U),
761 SIMD_TUPLE(v128_unpack_s16_s32, 0U, 0U));
762
763 INSTANTIATE(
764 ARCH, ARCH_POSTFIX(V128_V128U32), SIMD_TUPLE(v128_shl_8, 7U, 32U),
765 SIMD_TUPLE(v128_shr_u8, 7U, 32U), SIMD_TUPLE(v128_shr_s8, 7U, 32U),
766 SIMD_TUPLE(v128_shl_16, 15U, 32U), SIMD_TUPLE(v128_shr_u16, 15U, 32U),
767 SIMD_TUPLE(v128_shr_s16, 15U, 32U), SIMD_TUPLE(v128_shl_32, 31U, 32U),
768 SIMD_TUPLE(v128_shr_u32, 31U, 32U), SIMD_TUPLE(v128_shr_s32, 31U, 32U),
769 SIMD_TUPLE(v128_shl_64, 63U, 32U), SIMD_TUPLE(v128_shr_u64, 63U, 32U),
770 SIMD_TUPLE(v128_shr_s64, 63U, 32U));
771
772 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128), SIMD_TUPLE(v128_low_u32, 0U, 0U),
773 SIMD_TUPLE(v128_movemask_8, 0U, 0U));
774
775 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128), SIMD_TUPLE(v128_hadd_u8, 0U, 0U));
776
777 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V128), SIMD_TUPLE(v128_low_v64, 0U, 0U),
778 SIMD_TUPLE(v128_high_v64, 0U, 0U));
779
780 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U8), SIMD_TUPLE(v128_dup_8, 0U, 0U));
781
782 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U16), SIMD_TUPLE(v128_dup_16, 0U, 0U));
783
784 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U32), SIMD_TUPLE(v128_dup_32, 0U, 0U));
785
786 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64), SIMD_TUPLE(v128_dup_64, 0U, 0U));
787
788 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128), SIMD_TUPLE(v128_dotp_s16, 0U, 0U),
789 SIMD_TUPLE(v128_dotp_s32, 0U, 0U),
790 SIMD_TUPLE(v128_dotp_su8, 0U, 0U));
791
792 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256V256), SIMD_TUPLE(v256_sad_u8, 0U, 0U),
793 SIMD_TUPLE(v256_ssd_u8, 0U, 0U), SIMD_TUPLE(v256_sad_u16, 0U, 0U));
794
795 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256), SIMD_TUPLE(v256_hadd_u8, 0U, 0U),
796 SIMD_TUPLE(v256_low_u64, 0U, 0U));
797
798 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V256V256), SIMD_TUPLE(v256_dotp_s16, 0U, 0U),
799 SIMD_TUPLE(v256_dotp_s32, 0U, 0U),
800 SIMD_TUPLE(v256_dotp_su8, 0U, 0U));
801
802 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256V256), SIMD_TUPLE(v256_ssd_s16, 0U, 0U));
803
804 INSTANTIATE(
805 ARCH, ARCH_POSTFIX(V256_V256V256), SIMD_TUPLE(v256_add_8, 0U, 0U),
806 SIMD_TUPLE(v256_add_16, 0U, 0U), SIMD_TUPLE(v256_sadd_s16, 0U, 0U),
807 SIMD_TUPLE(v256_add_32, 0U, 0U), SIMD_TUPLE(v256_sub_8, 0U, 0U),
808 SIMD_TUPLE(v256_ssub_u8, 0U, 0U), SIMD_TUPLE(v256_ssub_s8, 0U, 0U),
809 SIMD_TUPLE(v256_sub_16, 0U, 0U), SIMD_TUPLE(v256_ssub_s16, 0U, 0U),
810 SIMD_TUPLE(v256_ssub_u16, 0U, 0U), SIMD_TUPLE(v256_sub_32, 0U, 0U),
811 SIMD_TUPLE(v256_ziplo_8, 0U, 0U), SIMD_TUPLE(v256_ziphi_8, 0U, 0U),
812 SIMD_TUPLE(v256_ziplo_16, 0U, 0U), SIMD_TUPLE(v256_ziphi_16, 0U, 0U),
813 SIMD_TUPLE(v256_ziplo_32, 0U, 0U), SIMD_TUPLE(v256_ziphi_32, 0U, 0U),
814 SIMD_TUPLE(v256_ziplo_64, 0U, 0U), SIMD_TUPLE(v256_ziphi_64, 0U, 0U),
815 SIMD_TUPLE(v256_ziplo_128, 0U, 0U), SIMD_TUPLE(v256_ziphi_128, 0U, 0U),
816 SIMD_TUPLE(v256_unziphi_8, 0U, 0U), SIMD_TUPLE(v256_unziplo_8, 0U, 0U),
817 SIMD_TUPLE(v256_unziphi_16, 0U, 0U), SIMD_TUPLE(v256_unziplo_16, 0U, 0U),
818 SIMD_TUPLE(v256_unziphi_32, 0U, 0U), SIMD_TUPLE(v256_unziplo_32, 0U, 0U),
819 SIMD_TUPLE(v256_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v256_pack_s16_u8, 0U, 0U),
820 SIMD_TUPLE(v256_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v256_or, 0U, 0U),
821 SIMD_TUPLE(v256_xor, 0U, 0U), SIMD_TUPLE(v256_and, 0U, 0U),
822 SIMD_TUPLE(v256_andn, 0U, 0U), SIMD_TUPLE(v256_mullo_s16, 0U, 0U),
823 SIMD_TUPLE(v256_mulhi_s16, 0U, 0U), SIMD_TUPLE(v256_mullo_s32, 0U, 0U),
824 SIMD_TUPLE(v256_madd_s16, 0U, 0U), SIMD_TUPLE(v256_madd_us8, 0U, 0U),
825 SIMD_TUPLE(v256_avg_u8, 0U, 0U), SIMD_TUPLE(v256_rdavg_u8, 0U, 0U),
826 SIMD_TUPLE(v256_avg_u16, 0U, 0U), SIMD_TUPLE(v256_min_u8, 0U, 0U),
827 SIMD_TUPLE(v256_max_u8, 0U, 0U), SIMD_TUPLE(v256_min_s8, 0U, 0U),
828 SIMD_TUPLE(v256_max_s8, 0U, 0U), SIMD_TUPLE(v256_min_s16, 0U, 0U),
829 SIMD_TUPLE(v256_max_s16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s8, 0U, 0U),
830 SIMD_TUPLE(v256_cmplt_s8, 0U, 0U));
831
832 INSTANTIATE(
833 ARCH, ARCH_POSTFIX(V256_V256V256_Part2), SIMD_TUPLE(v256_cmpeq_8, 0U, 0U),
834 SIMD_TUPLE(v256_min_s32, 0U, 0U), SIMD_TUPLE(v256_max_s32, 0U, 0U),
835 SIMD_TUPLE(v256_add_64, 0U, 0U), SIMD_TUPLE(v256_sub_64, 0U, 0U),
836 SIMD_TUPLE(v256_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v256_cmplt_s16, 0U, 0U),
837 SIMD_TUPLE(v256_cmpeq_16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s32, 0U, 0U),
838 SIMD_TUPLE(v256_cmplt_s32, 0U, 0U), SIMD_TUPLE(v256_cmpeq_32, 0U, 0U),
839 SIMD_TUPLE(v256_shuffle_8, 31U, 8U), SIMD_TUPLE(v256_pshuffle_8, 15U, 8U),
840 SIMD_TUPLE(imm_v256_align<1>, 0U, 0U), SIMD_TUPLE(v256_sadd_s8, 0U, 0U),
841 SIMD_TUPLE(v256_sadd_u8, 0U, 0U), SIMD_TUPLE(v256_pack_s32_u16, 0U, 0U),
842 SIMD_TUPLE(v256_rdavg_u16, 0U, 0U), SIMD_TUPLE(imm_v256_align<2>, 0U, 0U),
843 SIMD_TUPLE(v256_unziphi_64, 0U, 0U), SIMD_TUPLE(v256_unziplo_64, 0U, 0U),
844 SIMD_TUPLE(imm_v256_align<3>, 0U, 0U),
845 SIMD_TUPLE(imm_v256_align<4>, 0U, 0U),
846 SIMD_TUPLE(imm_v256_align<5>, 0U, 0U),
847 SIMD_TUPLE(imm_v256_align<6>, 0U, 0U),
848 SIMD_TUPLE(imm_v256_align<7>, 0U, 0U),
849 SIMD_TUPLE(imm_v256_align<8>, 0U, 0U),
850 SIMD_TUPLE(imm_v256_align<9>, 0U, 0U),
851 SIMD_TUPLE(imm_v256_align<10>, 0U, 0U),
852 SIMD_TUPLE(imm_v256_align<11>, 0U, 0U),
853 SIMD_TUPLE(imm_v256_align<12>, 0U, 0U),
854 SIMD_TUPLE(imm_v256_align<13>, 0U, 0U),
855 SIMD_TUPLE(imm_v256_align<14>, 0U, 0U),
856 SIMD_TUPLE(imm_v256_align<15>, 0U, 0U),
857 SIMD_TUPLE(imm_v256_align<16>, 0U, 0U),
858 SIMD_TUPLE(imm_v256_align<17>, 0U, 0U),
859 SIMD_TUPLE(imm_v256_align<18>, 0U, 0U),
860 SIMD_TUPLE(imm_v256_align<19>, 0U, 0U),
861 SIMD_TUPLE(imm_v256_align<20>, 0U, 0U),
862 SIMD_TUPLE(imm_v256_align<21>, 0U, 0U),
863 SIMD_TUPLE(imm_v256_align<22>, 0U, 0U),
864 SIMD_TUPLE(imm_v256_align<23>, 0U, 0U),
865 SIMD_TUPLE(imm_v256_align<24>, 0U, 0U),
866 SIMD_TUPLE(imm_v256_align<25>, 0U, 0U),
867 SIMD_TUPLE(imm_v256_align<26>, 0U, 0U),
868 SIMD_TUPLE(imm_v256_align<27>, 0U, 0U),
869 SIMD_TUPLE(imm_v256_align<28>, 0U, 0U),
870 SIMD_TUPLE(imm_v256_align<29>, 0U, 0U),
871 SIMD_TUPLE(imm_v256_align<30>, 0U, 0U),
872 SIMD_TUPLE(imm_v256_align<31>, 0U, 0U));
873
874 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128V128),
875 SIMD_TUPLE(v256_from_v128, 0U, 0U), SIMD_TUPLE(v256_zip_8, 0U, 0U),
876 SIMD_TUPLE(v256_zip_16, 0U, 0U), SIMD_TUPLE(v256_zip_32, 0U, 0U),
877 SIMD_TUPLE(v256_mul_s16, 0U, 0U));
878
879 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128),
880 SIMD_TUPLE(v256_unpack_u8_s16, 0U, 0U),
881 SIMD_TUPLE(v256_unpack_s8_s16, 0U, 0U),
882 SIMD_TUPLE(v256_unpack_u16_s32, 0U, 0U),
883 SIMD_TUPLE(v256_unpack_s16_s32, 0U, 0U));
884
885 INSTANTIATE(
886 ARCH, ARCH_POSTFIX(V256_V256U32), SIMD_TUPLE(v256_shl_8, 7U, 32U),
887 SIMD_TUPLE(v256_shr_u8, 7U, 32U), SIMD_TUPLE(v256_shr_s8, 7U, 32U),
888 SIMD_TUPLE(v256_shl_16, 15U, 32U), SIMD_TUPLE(v256_shr_u16, 15U, 32U),
889 SIMD_TUPLE(v256_shr_s16, 15U, 32U), SIMD_TUPLE(v256_shl_32, 31U, 32U),
890 SIMD_TUPLE(v256_shr_u32, 31U, 32U), SIMD_TUPLE(v256_shr_s32, 31U, 32U),
891 SIMD_TUPLE(v256_shl_64, 63U, 32U), SIMD_TUPLE(v256_shr_u64, 63U, 32U),
892 SIMD_TUPLE(v256_shr_s64, 63U, 32U));
893
894 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256), SIMD_TUPLE(v256_abs_s8, 0U, 0U),
895 SIMD_TUPLE(v256_abs_s16, 0U, 0U), SIMD_TUPLE(v256_padd_s16, 0U, 0U),
896 SIMD_TUPLE(v256_unpacklo_u8_s16, 0U, 0U),
897 SIMD_TUPLE(v256_unpacklo_s8_s16, 0U, 0U),
898 SIMD_TUPLE(v256_unpacklo_u16_s32, 0U, 0U),
899 SIMD_TUPLE(v256_unpacklo_s16_s32, 0U, 0U),
900 SIMD_TUPLE(v256_unpackhi_u8_s16, 0U, 0U),
901 SIMD_TUPLE(v256_unpackhi_s8_s16, 0U, 0U),
902 SIMD_TUPLE(v256_unpackhi_u16_s32, 0U, 0U),
903 SIMD_TUPLE(v256_unpackhi_s16_s32, 0U, 0U),
904 SIMD_TUPLE(imm_v256_shr_n_byte<1>, 0U, 0U),
905 SIMD_TUPLE(imm_v256_shr_n_byte<2>, 0U, 0U),
906 SIMD_TUPLE(imm_v256_shr_n_byte<3>, 0U, 0U),
907 SIMD_TUPLE(imm_v256_shr_n_byte<4>, 0U, 0U),
908 SIMD_TUPLE(imm_v256_shr_n_byte<5>, 0U, 0U),
909 SIMD_TUPLE(imm_v256_shr_n_byte<6>, 0U, 0U),
910 SIMD_TUPLE(imm_v256_shr_n_byte<7>, 0U, 0U),
911 SIMD_TUPLE(imm_v256_shr_n_byte<8>, 0U, 0U),
912 SIMD_TUPLE(imm_v256_shr_n_byte<9>, 0U, 0U),
913 SIMD_TUPLE(imm_v256_shr_n_byte<10>, 0U, 0U),
914 SIMD_TUPLE(imm_v256_shr_n_byte<11>, 0U, 0U),
915 SIMD_TUPLE(imm_v256_shr_n_byte<12>, 0U, 0U),
916 SIMD_TUPLE(imm_v256_shr_n_byte<13>, 0U, 0U),
917 SIMD_TUPLE(imm_v256_shr_n_byte<14>, 0U, 0U),
918 SIMD_TUPLE(imm_v256_shr_n_byte<15>, 0U, 0U),
919 SIMD_TUPLE(imm_v256_shr_n_byte<16>, 0U, 0U),
920 SIMD_TUPLE(imm_v256_shr_n_byte<17>, 0U, 0U),
921 SIMD_TUPLE(imm_v256_shr_n_byte<18>, 0U, 0U),
922 SIMD_TUPLE(imm_v256_shr_n_byte<19>, 0U, 0U),
923 SIMD_TUPLE(imm_v256_shr_n_byte<20>, 0U, 0U),
924 SIMD_TUPLE(imm_v256_shr_n_byte<21>, 0U, 0U),
925 SIMD_TUPLE(imm_v256_shr_n_byte<22>, 0U, 0U),
926 SIMD_TUPLE(imm_v256_shr_n_byte<23>, 0U, 0U),
927 SIMD_TUPLE(imm_v256_shr_n_byte<24>, 0U, 0U),
928 SIMD_TUPLE(imm_v256_shr_n_byte<25>, 0U, 0U),
929 SIMD_TUPLE(imm_v256_shr_n_byte<26>, 0U, 0U),
930 SIMD_TUPLE(imm_v256_shr_n_byte<27>, 0U, 0U),
931 SIMD_TUPLE(imm_v256_shr_n_byte<28>, 0U, 0U),
932 SIMD_TUPLE(imm_v256_shr_n_byte<29>, 0U, 0U),
933 SIMD_TUPLE(imm_v256_shr_n_byte<30>, 0U, 0U),
934 SIMD_TUPLE(imm_v256_shr_n_byte<31>, 0U, 0U),
935 SIMD_TUPLE(imm_v256_shl_n_byte<1>, 0U, 0U),
936 SIMD_TUPLE(imm_v256_shl_n_byte<2>, 0U, 0U),
937 SIMD_TUPLE(imm_v256_shl_n_byte<3>, 0U, 0U),
938 SIMD_TUPLE(imm_v256_shl_n_byte<4>, 0U, 0U),
939 SIMD_TUPLE(imm_v256_shl_n_byte<5>, 0U, 0U),
940 SIMD_TUPLE(imm_v256_shl_n_byte<6>, 0U, 0U),
941 SIMD_TUPLE(imm_v256_shl_n_byte<7>, 0U, 0U),
942 SIMD_TUPLE(imm_v256_shl_n_byte<8>, 0U, 0U));
943
944 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part2),
945 SIMD_TUPLE(imm_v256_shl_n_byte<9>, 0U, 0U),
946 SIMD_TUPLE(imm_v256_shl_n_byte<10>, 0U, 0U),
947 SIMD_TUPLE(imm_v256_shl_n_byte<11>, 0U, 0U),
948 SIMD_TUPLE(imm_v256_shl_n_byte<12>, 0U, 0U),
949 SIMD_TUPLE(imm_v256_shl_n_byte<13>, 0U, 0U),
950 SIMD_TUPLE(imm_v256_shl_n_byte<14>, 0U, 0U),
951 SIMD_TUPLE(imm_v256_shl_n_byte<15>, 0U, 0U),
952 SIMD_TUPLE(imm_v256_shl_n_byte<16>, 0U, 0U),
953 SIMD_TUPLE(imm_v256_shl_n_byte<17>, 0U, 0U),
954 SIMD_TUPLE(imm_v256_shl_n_byte<18>, 0U, 0U),
955 SIMD_TUPLE(imm_v256_shl_n_byte<19>, 0U, 0U),
956 SIMD_TUPLE(imm_v256_shl_n_byte<20>, 0U, 0U),
957 SIMD_TUPLE(imm_v256_shl_n_byte<21>, 0U, 0U),
958 SIMD_TUPLE(imm_v256_shl_n_byte<22>, 0U, 0U),
959 SIMD_TUPLE(imm_v256_shl_n_byte<23>, 0U, 0U),
960 SIMD_TUPLE(imm_v256_shl_n_byte<24>, 0U, 0U),
961 SIMD_TUPLE(imm_v256_shl_n_byte<25>, 0U, 0U),
962 SIMD_TUPLE(imm_v256_shl_n_byte<26>, 0U, 0U),
963 SIMD_TUPLE(imm_v256_shl_n_byte<27>, 0U, 0U),
964 SIMD_TUPLE(imm_v256_shl_n_byte<28>, 0U, 0U),
965 SIMD_TUPLE(imm_v256_shl_n_byte<29>, 0U, 0U),
966 SIMD_TUPLE(imm_v256_shl_n_byte<30>, 0U, 0U),
967 SIMD_TUPLE(imm_v256_shl_n_byte<31>, 0U, 0U),
968 SIMD_TUPLE(imm_v256_shl_n_8<1>, 0U, 0U),
969 SIMD_TUPLE(imm_v256_shl_n_8<2>, 0U, 0U),
970 SIMD_TUPLE(imm_v256_shl_n_8<3>, 0U, 0U),
971 SIMD_TUPLE(imm_v256_shl_n_8<4>, 0U, 0U),
972 SIMD_TUPLE(imm_v256_shl_n_8<5>, 0U, 0U),
973 SIMD_TUPLE(imm_v256_shl_n_8<6>, 0U, 0U),
974 SIMD_TUPLE(imm_v256_shl_n_8<7>, 0U, 0U),
975 SIMD_TUPLE(imm_v256_shr_n_u8<1>, 0U, 0U),
976 SIMD_TUPLE(imm_v256_shr_n_u8<2>, 0U, 0U),
977 SIMD_TUPLE(imm_v256_shr_n_u8<3>, 0U, 0U),
978 SIMD_TUPLE(imm_v256_shr_n_u8<4>, 0U, 0U),
979 SIMD_TUPLE(imm_v256_shr_n_u8<5>, 0U, 0U),
980 SIMD_TUPLE(imm_v256_shr_n_u8<6>, 0U, 0U),
981 SIMD_TUPLE(imm_v256_shr_n_u8<7>, 0U, 0U),
982 SIMD_TUPLE(imm_v256_shr_n_s8<1>, 0U, 0U),
983 SIMD_TUPLE(imm_v256_shr_n_s8<2>, 0U, 0U),
984 SIMD_TUPLE(imm_v256_shr_n_s8<3>, 0U, 0U),
985 SIMD_TUPLE(imm_v256_shr_n_s8<4>, 0U, 0U),
986 SIMD_TUPLE(imm_v256_shr_n_s8<5>, 0U, 0U),
987 SIMD_TUPLE(imm_v256_shr_n_s8<6>, 0U, 0U),
988 SIMD_TUPLE(imm_v256_shr_n_s8<7>, 0U, 0U),
989 SIMD_TUPLE(imm_v256_shl_n_16<1>, 0U, 0U),
990 SIMD_TUPLE(imm_v256_shl_n_16<2>, 0U, 0U),
991 SIMD_TUPLE(imm_v256_shl_n_16<4>, 0U, 0U),
992 SIMD_TUPLE(imm_v256_shl_n_16<6>, 0U, 0U),
993 SIMD_TUPLE(imm_v256_shl_n_16<8>, 0U, 0U),
994 SIMD_TUPLE(imm_v256_shl_n_16<10>, 0U, 0U));
995
996 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part3),
997 SIMD_TUPLE(imm_v256_shl_n_16<12>, 0U, 0U),
998 SIMD_TUPLE(imm_v256_shl_n_16<14>, 0U, 0U),
999 SIMD_TUPLE(imm_v256_shr_n_u16<1>, 0U, 0U),
1000 SIMD_TUPLE(imm_v256_shr_n_u16<2>, 0U, 0U),
1001 SIMD_TUPLE(imm_v256_shr_n_u16<4>, 0U, 0U),
1002 SIMD_TUPLE(imm_v256_shr_n_u16<6>, 0U, 0U),
1003 SIMD_TUPLE(imm_v256_shr_n_u16<8>, 0U, 0U),
1004 SIMD_TUPLE(imm_v256_shr_n_u16<10>, 0U, 0U),
1005 SIMD_TUPLE(imm_v256_shr_n_u16<12>, 0U, 0U),
1006 SIMD_TUPLE(imm_v256_shr_n_u16<14>, 0U, 0U),
1007 SIMD_TUPLE(imm_v256_shr_n_s16<1>, 0U, 0U),
1008 SIMD_TUPLE(imm_v256_shr_n_s16<2>, 0U, 0U),
1009 SIMD_TUPLE(imm_v256_shr_n_s16<4>, 0U, 0U),
1010 SIMD_TUPLE(imm_v256_shr_n_s16<6>, 0U, 0U),
1011 SIMD_TUPLE(imm_v256_shr_n_s16<8>, 0U, 0U),
1012 SIMD_TUPLE(imm_v256_shr_n_s16<10>, 0U, 0U),
1013 SIMD_TUPLE(imm_v256_shr_n_s16<12>, 0U, 0U),
1014 SIMD_TUPLE(imm_v256_shr_n_s16<14>, 0U, 0U),
1015 SIMD_TUPLE(imm_v256_shl_n_32<1>, 0U, 0U),
1016 SIMD_TUPLE(imm_v256_shl_n_32<4>, 0U, 0U),
1017 SIMD_TUPLE(imm_v256_shl_n_32<8>, 0U, 0U),
1018 SIMD_TUPLE(imm_v256_shl_n_32<12>, 0U, 0U),
1019 SIMD_TUPLE(imm_v256_shl_n_32<16>, 0U, 0U),
1020 SIMD_TUPLE(imm_v256_shl_n_32<20>, 0U, 0U),
1021 SIMD_TUPLE(imm_v256_shl_n_32<24>, 0U, 0U),
1022 SIMD_TUPLE(imm_v256_shl_n_32<28>, 0U, 0U),
1023 SIMD_TUPLE(imm_v256_shr_n_u32<1>, 0U, 0U),
1024 SIMD_TUPLE(imm_v256_shr_n_u32<4>, 0U, 0U),
1025 SIMD_TUPLE(imm_v256_shr_n_u32<8>, 0U, 0U),
1026 SIMD_TUPLE(imm_v256_shr_n_u32<12>, 0U, 0U),
1027 SIMD_TUPLE(imm_v256_shr_n_u32<16>, 0U, 0U),
1028 SIMD_TUPLE(imm_v256_shr_n_u32<20>, 0U, 0U),
1029 SIMD_TUPLE(imm_v256_shr_n_u32<24>, 0U, 0U),
1030 SIMD_TUPLE(imm_v256_shr_n_u32<28>, 0U, 0U),
1031 SIMD_TUPLE(imm_v256_shr_n_s32<1>, 0U, 0U),
1032 SIMD_TUPLE(imm_v256_shr_n_s32<4>, 0U, 0U),
1033 SIMD_TUPLE(imm_v256_shr_n_s32<8>, 0U, 0U),
1034 SIMD_TUPLE(imm_v256_shr_n_s32<12>, 0U, 0U),
1035 SIMD_TUPLE(imm_v256_shr_n_s32<16>, 0U, 0U),
1036 SIMD_TUPLE(imm_v256_shr_n_s32<20>, 0U, 0U),
1037 SIMD_TUPLE(imm_v256_shr_n_s32<24>, 0U, 0U),
1038 SIMD_TUPLE(imm_v256_shr_n_s32<28>, 0U, 0U));
1039
1040 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part4),
1041 SIMD_TUPLE(imm_v256_shl_n_64<1>, 0U, 0U),
1042 SIMD_TUPLE(imm_v256_shl_n_64<4>, 0U, 0U),
1043 SIMD_TUPLE(imm_v256_shl_n_64<8>, 0U, 0U),
1044 SIMD_TUPLE(imm_v256_shl_n_64<12>, 0U, 0U),
1045 SIMD_TUPLE(imm_v256_shl_n_64<16>, 0U, 0U),
1046 SIMD_TUPLE(imm_v256_shl_n_64<20>, 0U, 0U),
1047 SIMD_TUPLE(imm_v256_shl_n_64<24>, 0U, 0U),
1048 SIMD_TUPLE(imm_v256_shl_n_64<28>, 0U, 0U),
1049 SIMD_TUPLE(imm_v256_shl_n_64<32>, 0U, 0U),
1050 SIMD_TUPLE(imm_v256_shl_n_64<36>, 0U, 0U),
1051 SIMD_TUPLE(imm_v256_shl_n_64<40>, 0U, 0U),
1052 SIMD_TUPLE(imm_v256_shl_n_64<44>, 0U, 0U),
1053 SIMD_TUPLE(imm_v256_shl_n_64<48>, 0U, 0U),
1054 SIMD_TUPLE(imm_v256_shl_n_64<52>, 0U, 0U),
1055 SIMD_TUPLE(imm_v256_shl_n_64<56>, 0U, 0U),
1056 SIMD_TUPLE(imm_v256_shl_n_64<60>, 0U, 0U),
1057 SIMD_TUPLE(imm_v256_shr_n_u64<1>, 0U, 0U),
1058 SIMD_TUPLE(imm_v256_shr_n_u64<4>, 0U, 0U),
1059 SIMD_TUPLE(imm_v256_shr_n_u64<8>, 0U, 0U),
1060 SIMD_TUPLE(imm_v256_shr_n_u64<12>, 0U, 0U),
1061 SIMD_TUPLE(imm_v256_shr_n_u64<16>, 0U, 0U),
1062 SIMD_TUPLE(imm_v256_shr_n_u64<20>, 0U, 0U),
1063 SIMD_TUPLE(imm_v256_shr_n_u64<24>, 0U, 0U),
1064 SIMD_TUPLE(imm_v256_shr_n_u64<28>, 0U, 0U),
1065 SIMD_TUPLE(imm_v256_shr_n_u64<32>, 0U, 0U),
1066 SIMD_TUPLE(imm_v256_shr_n_u64<36>, 0U, 0U),
1067 SIMD_TUPLE(imm_v256_shr_n_u64<40>, 0U, 0U),
1068 SIMD_TUPLE(imm_v256_shr_n_u64<44>, 0U, 0U),
1069 SIMD_TUPLE(imm_v256_shr_n_u64<48>, 0U, 0U),
1070 SIMD_TUPLE(imm_v256_shr_n_u64<52>, 0U, 0U),
1071 SIMD_TUPLE(imm_v256_shr_n_u64<56>, 0U, 0U),
1072 SIMD_TUPLE(imm_v256_shr_n_u64<60>, 0U, 0U),
1073 SIMD_TUPLE(imm_v256_shr_n_s64<1>, 0U, 0U),
1074 SIMD_TUPLE(imm_v256_shr_n_s64<4>, 0U, 0U),
1075 SIMD_TUPLE(imm_v256_shr_n_s64<8>, 0U, 0U),
1076 SIMD_TUPLE(imm_v256_shr_n_s64<12>, 0U, 0U),
1077 SIMD_TUPLE(imm_v256_shr_n_s64<16>, 0U, 0U),
1078 SIMD_TUPLE(imm_v256_shr_n_s64<20>, 0U, 0U),
1079 SIMD_TUPLE(imm_v256_shr_n_s64<24>, 0U, 0U),
1080 SIMD_TUPLE(imm_v256_shr_n_s64<28>, 0U, 0U),
1081 SIMD_TUPLE(imm_v256_shr_n_s64<32>, 0U, 0U),
1082 SIMD_TUPLE(imm_v256_shr_n_s64<36>, 0U, 0U),
1083 SIMD_TUPLE(imm_v256_shr_n_s64<40>, 0U, 0U),
1084 SIMD_TUPLE(imm_v256_shr_n_s64<44>, 0U, 0U),
1085 SIMD_TUPLE(imm_v256_shr_n_s64<48>, 0U, 0U),
1086 SIMD_TUPLE(imm_v256_shr_n_s64<52>, 0U, 0U),
1087 SIMD_TUPLE(imm_v256_shr_n_s64<56>, 0U, 0U),
1088 SIMD_TUPLE(imm_v256_shr_n_s64<60>, 0U, 0U),
1089 SIMD_TUPLE(v256_padd_u8, 0U, 0U));
1090
1091 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part5),
1092 SIMD_TUPLE(imm_v256_shr_n_word<1>, 0U, 0U),
1093 SIMD_TUPLE(imm_v256_shr_n_word<2>, 0U, 0U),
1094 SIMD_TUPLE(imm_v256_shr_n_word<3>, 0U, 0U),
1095 SIMD_TUPLE(imm_v256_shr_n_word<4>, 0U, 0U),
1096 SIMD_TUPLE(imm_v256_shr_n_word<5>, 0U, 0U),
1097 SIMD_TUPLE(imm_v256_shr_n_word<6>, 0U, 0U),
1098 SIMD_TUPLE(imm_v256_shr_n_word<7>, 0U, 0U),
1099 SIMD_TUPLE(imm_v256_shr_n_word<8>, 0U, 0U),
1100 SIMD_TUPLE(imm_v256_shr_n_word<9>, 0U, 0U),
1101 SIMD_TUPLE(imm_v256_shr_n_word<10>, 0U, 0U),
1102 SIMD_TUPLE(imm_v256_shr_n_word<11>, 0U, 0U),
1103 SIMD_TUPLE(imm_v256_shr_n_word<12>, 0U, 0U),
1104 SIMD_TUPLE(imm_v256_shr_n_word<13>, 0U, 0U),
1105 SIMD_TUPLE(imm_v256_shr_n_word<14>, 0U, 0U),
1106 SIMD_TUPLE(imm_v256_shr_n_word<15>, 0U, 0U),
1107 SIMD_TUPLE(imm_v256_shl_n_word<1>, 0U, 0U),
1108 SIMD_TUPLE(imm_v256_shl_n_word<2>, 0U, 0U),
1109 SIMD_TUPLE(imm_v256_shl_n_word<3>, 0U, 0U),
1110 SIMD_TUPLE(imm_v256_shl_n_word<4>, 0U, 0U),
1111 SIMD_TUPLE(imm_v256_shl_n_word<5>, 0U, 0U),
1112 SIMD_TUPLE(imm_v256_shl_n_word<6>, 0U, 0U),
1113 SIMD_TUPLE(imm_v256_shl_n_word<7>, 0U, 0U),
1114 SIMD_TUPLE(imm_v256_shl_n_word<8>, 0U, 0U),
1115 SIMD_TUPLE(imm_v256_shl_n_word<9>, 0U, 0U),
1116 SIMD_TUPLE(imm_v256_shl_n_word<10>, 0U, 0U),
1117 SIMD_TUPLE(imm_v256_shl_n_word<11>, 0U, 0U),
1118 SIMD_TUPLE(imm_v256_shl_n_word<12>, 0U, 0U),
1119 SIMD_TUPLE(imm_v256_shl_n_word<13>, 0U, 0U),
1120 SIMD_TUPLE(imm_v256_shl_n_word<14>, 0U, 0U),
1121 SIMD_TUPLE(imm_v256_shl_n_word<15>, 0U, 0U));
1122
1123 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256V256V256),
1124 SIMD_TUPLE(v256_blend_8, 0U, 0U),
1125 SIMD_TUPLE(v256_wideshuffle_8, 63U, 8U));
1126
1127 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U8), SIMD_TUPLE(v256_dup_8, 0U, 0U));
1128
1129 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U16), SIMD_TUPLE(v256_dup_16, 0U, 0U));
1130
1131 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U32), SIMD_TUPLE(v256_dup_32, 0U, 0U));
1132
1133 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U64), SIMD_TUPLE(v256_dup_64, 0U, 0U));
1134
1135 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256), SIMD_TUPLE(v256_low_u32, 0U, 0U),
1136 SIMD_TUPLE(v256_movemask_8, 0U, 0U));
1137
1138 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V256), SIMD_TUPLE(v256_low_v64, 0U, 0U));
1139
1140 } // namespace SIMD_NAMESPACE
1141