xref: /aosp_15_r20/external/libaom/aom_dsp/simd/v128_intrinsics_c.h (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
13 #define AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 
19 #include "config/aom_config.h"
20 
21 #include "aom_dsp/simd/v64_intrinsics_c.h"
22 
23 typedef union {
24   uint8_t u8[16];
25   uint16_t u16[8];
26   uint32_t u32[4];
27   uint64_t u64[2];
28   int8_t s8[16];
29   int16_t s16[8];
30   int32_t s32[4];
31   int64_t s64[2];
32   c_v64 v64[2];
33 } c_v128;
34 
c_v128_low_u32(c_v128 a)35 SIMD_INLINE uint32_t c_v128_low_u32(c_v128 a) { return a.u32[0]; }
36 
c_v128_low_v64(c_v128 a)37 SIMD_INLINE c_v64 c_v128_low_v64(c_v128 a) { return a.v64[0]; }
38 
c_v128_high_v64(c_v128 a)39 SIMD_INLINE c_v64 c_v128_high_v64(c_v128 a) { return a.v64[1]; }
40 
c_v128_from_64(uint64_t hi,uint64_t lo)41 SIMD_INLINE c_v128 c_v128_from_64(uint64_t hi, uint64_t lo) {
42   c_v128 t;
43   t.u64[1] = hi;
44   t.u64[0] = lo;
45   return t;
46 }
47 
c_v128_from_v64(c_v64 hi,c_v64 lo)48 SIMD_INLINE c_v128 c_v128_from_v64(c_v64 hi, c_v64 lo) {
49   c_v128 t;
50   t.v64[1] = hi;
51   t.v64[0] = lo;
52   return t;
53 }
54 
c_v128_from_32(uint32_t a,uint32_t b,uint32_t c,uint32_t d)55 SIMD_INLINE c_v128 c_v128_from_32(uint32_t a, uint32_t b, uint32_t c,
56                                   uint32_t d) {
57   c_v128 t;
58   t.u32[3] = a;
59   t.u32[2] = b;
60   t.u32[1] = c;
61   t.u32[0] = d;
62   return t;
63 }
64 
c_v128_load_unaligned(const void * p)65 SIMD_INLINE c_v128 c_v128_load_unaligned(const void *p) {
66   c_v128 t;
67   memcpy(&t, p, 16);
68   return t;
69 }
70 
c_v128_load_aligned(const void * p)71 SIMD_INLINE c_v128 c_v128_load_aligned(const void *p) {
72   if (SIMD_CHECK && (uintptr_t)p & 15) {
73     fprintf(stderr, "Error: unaligned v128 load at %p\n", p);
74     abort();
75   }
76   return c_v128_load_unaligned(p);
77 }
78 
c_v128_store_unaligned(void * p,c_v128 a)79 SIMD_INLINE void c_v128_store_unaligned(void *p, c_v128 a) {
80   memcpy(p, &a, 16);
81 }
82 
c_v128_store_aligned(void * p,c_v128 a)83 SIMD_INLINE void c_v128_store_aligned(void *p, c_v128 a) {
84   if (SIMD_CHECK && (uintptr_t)p & 15) {
85     fprintf(stderr, "Error: unaligned v128 store at %p\n", p);
86     abort();
87   }
88   c_v128_store_unaligned(p, a);
89 }
90 
c_v128_zero(void)91 SIMD_INLINE c_v128 c_v128_zero(void) {
92   c_v128 t;
93   t.u64[1] = t.u64[0] = 0;
94   return t;
95 }
96 
c_v128_dup_8(uint8_t x)97 SIMD_INLINE c_v128 c_v128_dup_8(uint8_t x) {
98   c_v128 t;
99   t.v64[1] = t.v64[0] = c_v64_dup_8(x);
100   return t;
101 }
102 
c_v128_dup_16(uint16_t x)103 SIMD_INLINE c_v128 c_v128_dup_16(uint16_t x) {
104   c_v128 t;
105   t.v64[1] = t.v64[0] = c_v64_dup_16(x);
106   return t;
107 }
108 
c_v128_dup_32(uint32_t x)109 SIMD_INLINE c_v128 c_v128_dup_32(uint32_t x) {
110   c_v128 t;
111   t.v64[1] = t.v64[0] = c_v64_dup_32(x);
112   return t;
113 }
114 
c_v128_dup_64(uint64_t x)115 SIMD_INLINE c_v128 c_v128_dup_64(uint64_t x) {
116   c_v128 t;
117   t.u64[1] = t.u64[0] = x;
118   return t;
119 }
120 
c_v128_dotp_su8(c_v128 a,c_v128 b)121 SIMD_INLINE int64_t c_v128_dotp_su8(c_v128 a, c_v128 b) {
122   return c_v64_dotp_su8(a.v64[1], b.v64[1]) +
123          c_v64_dotp_su8(a.v64[0], b.v64[0]);
124 }
125 
c_v128_dotp_s16(c_v128 a,c_v128 b)126 SIMD_INLINE int64_t c_v128_dotp_s16(c_v128 a, c_v128 b) {
127   return c_v64_dotp_s16(a.v64[1], b.v64[1]) +
128          c_v64_dotp_s16(a.v64[0], b.v64[0]);
129 }
130 
c_v128_dotp_s32(c_v128 a,c_v128 b)131 SIMD_INLINE int64_t c_v128_dotp_s32(c_v128 a, c_v128 b) {
132   // 32 bit products, 64 bit sum
133   return (int64_t)(int32_t)((int64_t)a.s32[3] * b.s32[3]) +
134          (int64_t)(int32_t)((int64_t)a.s32[2] * b.s32[2]) +
135          (int64_t)(int32_t)((int64_t)a.s32[1] * b.s32[1]) +
136          (int64_t)(int32_t)((int64_t)a.s32[0] * b.s32[0]);
137 }
138 
c_v128_hadd_u8(c_v128 a)139 SIMD_INLINE uint64_t c_v128_hadd_u8(c_v128 a) {
140   return c_v64_hadd_u8(a.v64[1]) + c_v64_hadd_u8(a.v64[0]);
141 }
142 
143 typedef struct {
144   uint32_t val;
145   int count;
146 } c_sad128_internal;
147 
c_v128_sad_u8_init(void)148 SIMD_INLINE c_sad128_internal c_v128_sad_u8_init(void) {
149   c_sad128_internal t;
150   t.val = t.count = 0;
151   return t;
152 }
153 
154 /* Implementation dependent return value.  Result must be finalised with
155  * v128_sad_u8_sum(). The result for more than 32 v128_sad_u8() calls is
156  * undefined. */
c_v128_sad_u8(c_sad128_internal s,c_v128 a,c_v128 b)157 SIMD_INLINE c_sad128_internal c_v128_sad_u8(c_sad128_internal s, c_v128 a,
158                                             c_v128 b) {
159   int c;
160   for (c = 0; c < 16; c++)
161     s.val += a.u8[c] > b.u8[c] ? a.u8[c] - b.u8[c] : b.u8[c] - a.u8[c];
162   s.count++;
163   if (SIMD_CHECK && s.count > 32) {
164     fprintf(stderr,
165             "Error: sad called 32 times returning an undefined result\n");
166     abort();
167   }
168   return s;
169 }
170 
c_v128_sad_u8_sum(c_sad128_internal s)171 SIMD_INLINE uint32_t c_v128_sad_u8_sum(c_sad128_internal s) { return s.val; }
172 
173 typedef uint32_t c_ssd128_internal;
174 
c_v128_ssd_u8_init(void)175 SIMD_INLINE c_ssd128_internal c_v128_ssd_u8_init(void) { return 0; }
176 
177 /* Implementation dependent return value.  Result must be finalised with
178  * v128_ssd_u8_sum(). */
c_v128_ssd_u8(c_ssd128_internal s,c_v128 a,c_v128 b)179 SIMD_INLINE c_ssd128_internal c_v128_ssd_u8(c_ssd128_internal s, c_v128 a,
180                                             c_v128 b) {
181   int c;
182   for (c = 0; c < 16; c++) s += (a.u8[c] - b.u8[c]) * (a.u8[c] - b.u8[c]);
183   return s;
184 }
185 
c_v128_ssd_u8_sum(c_ssd128_internal s)186 SIMD_INLINE uint32_t c_v128_ssd_u8_sum(c_ssd128_internal s) { return s; }
187 
c_v128_or(c_v128 a,c_v128 b)188 SIMD_INLINE c_v128 c_v128_or(c_v128 a, c_v128 b) {
189   return c_v128_from_v64(c_v64_or(a.v64[1], b.v64[1]),
190                          c_v64_or(a.v64[0], b.v64[0]));
191 }
192 
c_v128_xor(c_v128 a,c_v128 b)193 SIMD_INLINE c_v128 c_v128_xor(c_v128 a, c_v128 b) {
194   return c_v128_from_v64(c_v64_xor(a.v64[1], b.v64[1]),
195                          c_v64_xor(a.v64[0], b.v64[0]));
196 }
197 
c_v128_and(c_v128 a,c_v128 b)198 SIMD_INLINE c_v128 c_v128_and(c_v128 a, c_v128 b) {
199   return c_v128_from_v64(c_v64_and(a.v64[1], b.v64[1]),
200                          c_v64_and(a.v64[0], b.v64[0]));
201 }
202 
c_v128_andn(c_v128 a,c_v128 b)203 SIMD_INLINE c_v128 c_v128_andn(c_v128 a, c_v128 b) {
204   return c_v128_from_v64(c_v64_andn(a.v64[1], b.v64[1]),
205                          c_v64_andn(a.v64[0], b.v64[0]));
206 }
207 
c_v128_add_8(c_v128 a,c_v128 b)208 SIMD_INLINE c_v128 c_v128_add_8(c_v128 a, c_v128 b) {
209   return c_v128_from_v64(c_v64_add_8(a.v64[1], b.v64[1]),
210                          c_v64_add_8(a.v64[0], b.v64[0]));
211 }
212 
c_v128_add_16(c_v128 a,c_v128 b)213 SIMD_INLINE c_v128 c_v128_add_16(c_v128 a, c_v128 b) {
214   return c_v128_from_v64(c_v64_add_16(a.v64[1], b.v64[1]),
215                          c_v64_add_16(a.v64[0], b.v64[0]));
216 }
217 
c_v128_sadd_u8(c_v128 a,c_v128 b)218 SIMD_INLINE c_v128 c_v128_sadd_u8(c_v128 a, c_v128 b) {
219   return c_v128_from_v64(c_v64_sadd_u8(a.v64[1], b.v64[1]),
220                          c_v64_sadd_u8(a.v64[0], b.v64[0]));
221 }
222 
c_v128_sadd_s8(c_v128 a,c_v128 b)223 SIMD_INLINE c_v128 c_v128_sadd_s8(c_v128 a, c_v128 b) {
224   return c_v128_from_v64(c_v64_sadd_s8(a.v64[1], b.v64[1]),
225                          c_v64_sadd_s8(a.v64[0], b.v64[0]));
226 }
227 
c_v128_sadd_s16(c_v128 a,c_v128 b)228 SIMD_INLINE c_v128 c_v128_sadd_s16(c_v128 a, c_v128 b) {
229   return c_v128_from_v64(c_v64_sadd_s16(a.v64[1], b.v64[1]),
230                          c_v64_sadd_s16(a.v64[0], b.v64[0]));
231 }
232 
c_v128_add_32(c_v128 a,c_v128 b)233 SIMD_INLINE c_v128 c_v128_add_32(c_v128 a, c_v128 b) {
234   return c_v128_from_v64(c_v64_add_32(a.v64[1], b.v64[1]),
235                          c_v64_add_32(a.v64[0], b.v64[0]));
236 }
237 
c_v128_add_64(c_v128 a,c_v128 b)238 SIMD_INLINE c_v128 c_v128_add_64(c_v128 a, c_v128 b) {
239   // Two complement overflow (silences sanitizers)
240   return c_v128_from_64(
241       a.v64[1].u64 > ~b.v64[1].u64 ? a.v64[1].u64 - ~b.v64[1].u64 - 1
242                                    : a.v64[1].u64 + b.v64[1].u64,
243       a.v64[0].u64 > ~b.v64[0].u64 ? a.v64[0].u64 - ~b.v64[0].u64 - 1
244                                    : a.v64[0].u64 + b.v64[0].u64);
245 }
246 
c_v128_padd_s16(c_v128 a)247 SIMD_INLINE c_v128 c_v128_padd_s16(c_v128 a) {
248   c_v128 t;
249   t.s32[0] = (int32_t)a.s16[0] + (int32_t)a.s16[1];
250   t.s32[1] = (int32_t)a.s16[2] + (int32_t)a.s16[3];
251   t.s32[2] = (int32_t)a.s16[4] + (int32_t)a.s16[5];
252   t.s32[3] = (int32_t)a.s16[6] + (int32_t)a.s16[7];
253   return t;
254 }
255 
c_v128_padd_u8(c_v128 a)256 SIMD_INLINE c_v128 c_v128_padd_u8(c_v128 a) {
257   c_v128 t;
258   t.u16[0] = (uint16_t)a.u8[0] + (uint16_t)a.u8[1];
259   t.u16[1] = (uint16_t)a.u8[2] + (uint16_t)a.u8[3];
260   t.u16[2] = (uint16_t)a.u8[4] + (uint16_t)a.u8[5];
261   t.u16[3] = (uint16_t)a.u8[6] + (uint16_t)a.u8[7];
262   t.u16[4] = (uint16_t)a.u8[8] + (uint16_t)a.u8[9];
263   t.u16[5] = (uint16_t)a.u8[10] + (uint16_t)a.u8[11];
264   t.u16[6] = (uint16_t)a.u8[12] + (uint16_t)a.u8[13];
265   t.u16[7] = (uint16_t)a.u8[14] + (uint16_t)a.u8[15];
266   return t;
267 }
268 
c_v128_sub_8(c_v128 a,c_v128 b)269 SIMD_INLINE c_v128 c_v128_sub_8(c_v128 a, c_v128 b) {
270   return c_v128_from_v64(c_v64_sub_8(a.v64[1], b.v64[1]),
271                          c_v64_sub_8(a.v64[0], b.v64[0]));
272 }
273 
c_v128_ssub_u8(c_v128 a,c_v128 b)274 SIMD_INLINE c_v128 c_v128_ssub_u8(c_v128 a, c_v128 b) {
275   return c_v128_from_v64(c_v64_ssub_u8(a.v64[1], b.v64[1]),
276                          c_v64_ssub_u8(a.v64[0], b.v64[0]));
277 }
278 
c_v128_ssub_s8(c_v128 a,c_v128 b)279 SIMD_INLINE c_v128 c_v128_ssub_s8(c_v128 a, c_v128 b) {
280   return c_v128_from_v64(c_v64_ssub_s8(a.v64[1], b.v64[1]),
281                          c_v64_ssub_s8(a.v64[0], b.v64[0]));
282 }
283 
c_v128_sub_16(c_v128 a,c_v128 b)284 SIMD_INLINE c_v128 c_v128_sub_16(c_v128 a, c_v128 b) {
285   return c_v128_from_v64(c_v64_sub_16(a.v64[1], b.v64[1]),
286                          c_v64_sub_16(a.v64[0], b.v64[0]));
287 }
288 
c_v128_ssub_s16(c_v128 a,c_v128 b)289 SIMD_INLINE c_v128 c_v128_ssub_s16(c_v128 a, c_v128 b) {
290   return c_v128_from_v64(c_v64_ssub_s16(a.v64[1], b.v64[1]),
291                          c_v64_ssub_s16(a.v64[0], b.v64[0]));
292 }
293 
c_v128_ssub_u16(c_v128 a,c_v128 b)294 SIMD_INLINE c_v128 c_v128_ssub_u16(c_v128 a, c_v128 b) {
295   return c_v128_from_v64(c_v64_ssub_u16(a.v64[1], b.v64[1]),
296                          c_v64_ssub_u16(a.v64[0], b.v64[0]));
297 }
298 
c_v128_sub_32(c_v128 a,c_v128 b)299 SIMD_INLINE c_v128 c_v128_sub_32(c_v128 a, c_v128 b) {
300   return c_v128_from_v64(c_v64_sub_32(a.v64[1], b.v64[1]),
301                          c_v64_sub_32(a.v64[0], b.v64[0]));
302 }
303 
c_v128_sub_64(c_v128 a,c_v128 b)304 SIMD_INLINE c_v128 c_v128_sub_64(c_v128 a, c_v128 b) {
305   // Two complement underflow (silences sanitizers)
306   return c_v128_from_64(
307       a.v64[1].u64 < b.v64[1].u64 ? a.v64[1].u64 + ~b.v64[1].u64 + 1
308                                   : a.v64[1].u64 - b.v64[1].u64,
309       a.v64[0].u64 < b.v64[0].u64 ? a.v64[0].u64 + ~b.v64[0].u64 + 1
310                                   : a.v64[0].u64 - b.v64[0].u64);
311 }
312 
c_v128_abs_s16(c_v128 a)313 SIMD_INLINE c_v128 c_v128_abs_s16(c_v128 a) {
314   return c_v128_from_v64(c_v64_abs_s16(a.v64[1]), c_v64_abs_s16(a.v64[0]));
315 }
316 
c_v128_abs_s8(c_v128 a)317 SIMD_INLINE c_v128 c_v128_abs_s8(c_v128 a) {
318   return c_v128_from_v64(c_v64_abs_s8(a.v64[1]), c_v64_abs_s8(a.v64[0]));
319 }
320 
c_v128_mul_s16(c_v64 a,c_v64 b)321 SIMD_INLINE c_v128 c_v128_mul_s16(c_v64 a, c_v64 b) {
322   c_v64 lo_bits = c_v64_mullo_s16(a, b);
323   c_v64 hi_bits = c_v64_mulhi_s16(a, b);
324   return c_v128_from_v64(c_v64_ziphi_16(hi_bits, lo_bits),
325                          c_v64_ziplo_16(hi_bits, lo_bits));
326 }
327 
c_v128_mullo_s16(c_v128 a,c_v128 b)328 SIMD_INLINE c_v128 c_v128_mullo_s16(c_v128 a, c_v128 b) {
329   return c_v128_from_v64(c_v64_mullo_s16(a.v64[1], b.v64[1]),
330                          c_v64_mullo_s16(a.v64[0], b.v64[0]));
331 }
332 
c_v128_mulhi_s16(c_v128 a,c_v128 b)333 SIMD_INLINE c_v128 c_v128_mulhi_s16(c_v128 a, c_v128 b) {
334   return c_v128_from_v64(c_v64_mulhi_s16(a.v64[1], b.v64[1]),
335                          c_v64_mulhi_s16(a.v64[0], b.v64[0]));
336 }
337 
c_v128_mullo_s32(c_v128 a,c_v128 b)338 SIMD_INLINE c_v128 c_v128_mullo_s32(c_v128 a, c_v128 b) {
339   return c_v128_from_v64(c_v64_mullo_s32(a.v64[1], b.v64[1]),
340                          c_v64_mullo_s32(a.v64[0], b.v64[0]));
341 }
342 
c_v128_madd_s16(c_v128 a,c_v128 b)343 SIMD_INLINE c_v128 c_v128_madd_s16(c_v128 a, c_v128 b) {
344   return c_v128_from_v64(c_v64_madd_s16(a.v64[1], b.v64[1]),
345                          c_v64_madd_s16(a.v64[0], b.v64[0]));
346 }
347 
c_v128_madd_us8(c_v128 a,c_v128 b)348 SIMD_INLINE c_v128 c_v128_madd_us8(c_v128 a, c_v128 b) {
349   return c_v128_from_v64(c_v64_madd_us8(a.v64[1], b.v64[1]),
350                          c_v64_madd_us8(a.v64[0], b.v64[0]));
351 }
352 
c_v128_avg_u8(c_v128 a,c_v128 b)353 SIMD_INLINE c_v128 c_v128_avg_u8(c_v128 a, c_v128 b) {
354   return c_v128_from_v64(c_v64_avg_u8(a.v64[1], b.v64[1]),
355                          c_v64_avg_u8(a.v64[0], b.v64[0]));
356 }
357 
c_v128_rdavg_u8(c_v128 a,c_v128 b)358 SIMD_INLINE c_v128 c_v128_rdavg_u8(c_v128 a, c_v128 b) {
359   return c_v128_from_v64(c_v64_rdavg_u8(a.v64[1], b.v64[1]),
360                          c_v64_rdavg_u8(a.v64[0], b.v64[0]));
361 }
362 
c_v128_rdavg_u16(c_v128 a,c_v128 b)363 SIMD_INLINE c_v128 c_v128_rdavg_u16(c_v128 a, c_v128 b) {
364   return c_v128_from_v64(c_v64_rdavg_u16(a.v64[1], b.v64[1]),
365                          c_v64_rdavg_u16(a.v64[0], b.v64[0]));
366 }
367 
c_v128_avg_u16(c_v128 a,c_v128 b)368 SIMD_INLINE c_v128 c_v128_avg_u16(c_v128 a, c_v128 b) {
369   return c_v128_from_v64(c_v64_avg_u16(a.v64[1], b.v64[1]),
370                          c_v64_avg_u16(a.v64[0], b.v64[0]));
371 }
372 
c_v128_min_u8(c_v128 a,c_v128 b)373 SIMD_INLINE c_v128 c_v128_min_u8(c_v128 a, c_v128 b) {
374   return c_v128_from_v64(c_v64_min_u8(a.v64[1], b.v64[1]),
375                          c_v64_min_u8(a.v64[0], b.v64[0]));
376 }
377 
c_v128_max_u8(c_v128 a,c_v128 b)378 SIMD_INLINE c_v128 c_v128_max_u8(c_v128 a, c_v128 b) {
379   return c_v128_from_v64(c_v64_max_u8(a.v64[1], b.v64[1]),
380                          c_v64_max_u8(a.v64[0], b.v64[0]));
381 }
382 
c_v128_min_s8(c_v128 a,c_v128 b)383 SIMD_INLINE c_v128 c_v128_min_s8(c_v128 a, c_v128 b) {
384   return c_v128_from_v64(c_v64_min_s8(a.v64[1], b.v64[1]),
385                          c_v64_min_s8(a.v64[0], b.v64[0]));
386 }
387 
c_v128_movemask_8(c_v128 a)388 SIMD_INLINE uint32_t c_v128_movemask_8(c_v128 a) {
389   return ((a.s8[15] < 0) << 15) | ((a.s8[14] < 0) << 14) |
390          ((a.s8[13] < 0) << 13) | ((a.s8[12] < 0) << 12) |
391          ((a.s8[11] < 0) << 11) | ((a.s8[10] < 0) << 10) |
392          ((a.s8[9] < 0) << 9) | ((a.s8[8] < 0) << 8) | ((a.s8[7] < 0) << 7) |
393          ((a.s8[6] < 0) << 6) | ((a.s8[5] < 0) << 5) | ((a.s8[4] < 0) << 4) |
394          ((a.s8[3] < 0) << 3) | ((a.s8[2] < 0) << 2) | ((a.s8[1] < 0) << 1) |
395          ((a.s8[0] < 0) << 0);
396 }
397 
c_v128_blend_8(c_v128 a,c_v128 b,c_v128 c)398 SIMD_INLINE c_v128 c_v128_blend_8(c_v128 a, c_v128 b, c_v128 c) {
399   c_v128 t;
400   for (int i = 0; i < 16; i++) t.u8[i] = c.s8[i] < 0 ? b.u8[i] : a.u8[i];
401   return t;
402 }
403 
c_v128_max_s8(c_v128 a,c_v128 b)404 SIMD_INLINE c_v128 c_v128_max_s8(c_v128 a, c_v128 b) {
405   return c_v128_from_v64(c_v64_max_s8(a.v64[1], b.v64[1]),
406                          c_v64_max_s8(a.v64[0], b.v64[0]));
407 }
408 
c_v128_min_s16(c_v128 a,c_v128 b)409 SIMD_INLINE c_v128 c_v128_min_s16(c_v128 a, c_v128 b) {
410   return c_v128_from_v64(c_v64_min_s16(a.v64[1], b.v64[1]),
411                          c_v64_min_s16(a.v64[0], b.v64[0]));
412 }
413 
c_v128_max_s16(c_v128 a,c_v128 b)414 SIMD_INLINE c_v128 c_v128_max_s16(c_v128 a, c_v128 b) {
415   return c_v128_from_v64(c_v64_max_s16(a.v64[1], b.v64[1]),
416                          c_v64_max_s16(a.v64[0], b.v64[0]));
417 }
418 
c_v128_max_s32(c_v128 a,c_v128 b)419 SIMD_INLINE c_v128 c_v128_max_s32(c_v128 a, c_v128 b) {
420   c_v128 t;
421   int c;
422   for (c = 0; c < 4; c++) t.s32[c] = a.s32[c] > b.s32[c] ? a.s32[c] : b.s32[c];
423   return t;
424 }
425 
c_v128_min_s32(c_v128 a,c_v128 b)426 SIMD_INLINE c_v128 c_v128_min_s32(c_v128 a, c_v128 b) {
427   c_v128 t;
428   int c;
429   for (c = 0; c < 4; c++) t.s32[c] = a.s32[c] > b.s32[c] ? b.s32[c] : a.s32[c];
430   return t;
431 }
432 
c_v128_ziplo_8(c_v128 a,c_v128 b)433 SIMD_INLINE c_v128 c_v128_ziplo_8(c_v128 a, c_v128 b) {
434   return c_v128_from_v64(c_v64_ziphi_8(a.v64[0], b.v64[0]),
435                          c_v64_ziplo_8(a.v64[0], b.v64[0]));
436 }
437 
c_v128_ziphi_8(c_v128 a,c_v128 b)438 SIMD_INLINE c_v128 c_v128_ziphi_8(c_v128 a, c_v128 b) {
439   return c_v128_from_v64(c_v64_ziphi_8(a.v64[1], b.v64[1]),
440                          c_v64_ziplo_8(a.v64[1], b.v64[1]));
441 }
442 
c_v128_ziplo_16(c_v128 a,c_v128 b)443 SIMD_INLINE c_v128 c_v128_ziplo_16(c_v128 a, c_v128 b) {
444   return c_v128_from_v64(c_v64_ziphi_16(a.v64[0], b.v64[0]),
445                          c_v64_ziplo_16(a.v64[0], b.v64[0]));
446 }
447 
c_v128_ziphi_16(c_v128 a,c_v128 b)448 SIMD_INLINE c_v128 c_v128_ziphi_16(c_v128 a, c_v128 b) {
449   return c_v128_from_v64(c_v64_ziphi_16(a.v64[1], b.v64[1]),
450                          c_v64_ziplo_16(a.v64[1], b.v64[1]));
451 }
452 
c_v128_ziplo_32(c_v128 a,c_v128 b)453 SIMD_INLINE c_v128 c_v128_ziplo_32(c_v128 a, c_v128 b) {
454   return c_v128_from_v64(c_v64_ziphi_32(a.v64[0], b.v64[0]),
455                          c_v64_ziplo_32(a.v64[0], b.v64[0]));
456 }
457 
c_v128_ziphi_32(c_v128 a,c_v128 b)458 SIMD_INLINE c_v128 c_v128_ziphi_32(c_v128 a, c_v128 b) {
459   return c_v128_from_v64(c_v64_ziphi_32(a.v64[1], b.v64[1]),
460                          c_v64_ziplo_32(a.v64[1], b.v64[1]));
461 }
462 
c_v128_ziplo_64(c_v128 a,c_v128 b)463 SIMD_INLINE c_v128 c_v128_ziplo_64(c_v128 a, c_v128 b) {
464   return c_v128_from_v64(a.v64[0], b.v64[0]);
465 }
466 
c_v128_ziphi_64(c_v128 a,c_v128 b)467 SIMD_INLINE c_v128 c_v128_ziphi_64(c_v128 a, c_v128 b) {
468   return c_v128_from_v64(a.v64[1], b.v64[1]);
469 }
470 
c_v128_zip_8(c_v64 a,c_v64 b)471 SIMD_INLINE c_v128 c_v128_zip_8(c_v64 a, c_v64 b) {
472   return c_v128_from_v64(c_v64_ziphi_8(a, b), c_v64_ziplo_8(a, b));
473 }
474 
c_v128_zip_16(c_v64 a,c_v64 b)475 SIMD_INLINE c_v128 c_v128_zip_16(c_v64 a, c_v64 b) {
476   return c_v128_from_v64(c_v64_ziphi_16(a, b), c_v64_ziplo_16(a, b));
477 }
478 
c_v128_zip_32(c_v64 a,c_v64 b)479 SIMD_INLINE c_v128 c_v128_zip_32(c_v64 a, c_v64 b) {
480   return c_v128_from_v64(c_v64_ziphi_32(a, b), c_v64_ziplo_32(a, b));
481 }
482 
_c_v128_unzip_8(c_v128 a,c_v128 b,int mode)483 SIMD_INLINE c_v128 _c_v128_unzip_8(c_v128 a, c_v128 b, int mode) {
484   c_v128 t;
485   if (mode) {
486     t.u8[15] = b.u8[15];
487     t.u8[14] = b.u8[13];
488     t.u8[13] = b.u8[11];
489     t.u8[12] = b.u8[9];
490     t.u8[11] = b.u8[7];
491     t.u8[10] = b.u8[5];
492     t.u8[9] = b.u8[3];
493     t.u8[8] = b.u8[1];
494     t.u8[7] = a.u8[15];
495     t.u8[6] = a.u8[13];
496     t.u8[5] = a.u8[11];
497     t.u8[4] = a.u8[9];
498     t.u8[3] = a.u8[7];
499     t.u8[2] = a.u8[5];
500     t.u8[1] = a.u8[3];
501     t.u8[0] = a.u8[1];
502   } else {
503     t.u8[15] = a.u8[14];
504     t.u8[14] = a.u8[12];
505     t.u8[13] = a.u8[10];
506     t.u8[12] = a.u8[8];
507     t.u8[11] = a.u8[6];
508     t.u8[10] = a.u8[4];
509     t.u8[9] = a.u8[2];
510     t.u8[8] = a.u8[0];
511     t.u8[7] = b.u8[14];
512     t.u8[6] = b.u8[12];
513     t.u8[5] = b.u8[10];
514     t.u8[4] = b.u8[8];
515     t.u8[3] = b.u8[6];
516     t.u8[2] = b.u8[4];
517     t.u8[1] = b.u8[2];
518     t.u8[0] = b.u8[0];
519   }
520   return t;
521 }
522 
c_v128_unziplo_8(c_v128 a,c_v128 b)523 SIMD_INLINE c_v128 c_v128_unziplo_8(c_v128 a, c_v128 b) {
524   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_8(a, b, 1)
525                            : _c_v128_unzip_8(a, b, 0);
526 }
527 
c_v128_unziphi_8(c_v128 a,c_v128 b)528 SIMD_INLINE c_v128 c_v128_unziphi_8(c_v128 a, c_v128 b) {
529   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_8(b, a, 0)
530                            : _c_v128_unzip_8(b, a, 1);
531 }
532 
_c_v128_unzip_16(c_v128 a,c_v128 b,int mode)533 SIMD_INLINE c_v128 _c_v128_unzip_16(c_v128 a, c_v128 b, int mode) {
534   c_v128 t;
535   if (mode) {
536     t.u16[7] = b.u16[7];
537     t.u16[6] = b.u16[5];
538     t.u16[5] = b.u16[3];
539     t.u16[4] = b.u16[1];
540     t.u16[3] = a.u16[7];
541     t.u16[2] = a.u16[5];
542     t.u16[1] = a.u16[3];
543     t.u16[0] = a.u16[1];
544   } else {
545     t.u16[7] = a.u16[6];
546     t.u16[6] = a.u16[4];
547     t.u16[5] = a.u16[2];
548     t.u16[4] = a.u16[0];
549     t.u16[3] = b.u16[6];
550     t.u16[2] = b.u16[4];
551     t.u16[1] = b.u16[2];
552     t.u16[0] = b.u16[0];
553   }
554   return t;
555 }
556 
c_v128_unziplo_16(c_v128 a,c_v128 b)557 SIMD_INLINE c_v128 c_v128_unziplo_16(c_v128 a, c_v128 b) {
558   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_16(a, b, 1)
559                            : _c_v128_unzip_16(a, b, 0);
560 }
561 
c_v128_unziphi_16(c_v128 a,c_v128 b)562 SIMD_INLINE c_v128 c_v128_unziphi_16(c_v128 a, c_v128 b) {
563   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_16(b, a, 0)
564                            : _c_v128_unzip_16(b, a, 1);
565 }
566 
_c_v128_unzip_32(c_v128 a,c_v128 b,int mode)567 SIMD_INLINE c_v128 _c_v128_unzip_32(c_v128 a, c_v128 b, int mode) {
568   c_v128 t;
569   if (mode) {
570     t.u32[3] = b.u32[3];
571     t.u32[2] = b.u32[1];
572     t.u32[1] = a.u32[3];
573     t.u32[0] = a.u32[1];
574   } else {
575     t.u32[3] = a.u32[2];
576     t.u32[2] = a.u32[0];
577     t.u32[1] = b.u32[2];
578     t.u32[0] = b.u32[0];
579   }
580   return t;
581 }
582 
c_v128_unziplo_32(c_v128 a,c_v128 b)583 SIMD_INLINE c_v128 c_v128_unziplo_32(c_v128 a, c_v128 b) {
584   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_32(a, b, 1)
585                            : _c_v128_unzip_32(a, b, 0);
586 }
587 
c_v128_unziphi_32(c_v128 a,c_v128 b)588 SIMD_INLINE c_v128 c_v128_unziphi_32(c_v128 a, c_v128 b) {
589   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_32(b, a, 0)
590                            : _c_v128_unzip_32(b, a, 1);
591 }
592 
c_v128_unpack_u8_s16(c_v64 a)593 SIMD_INLINE c_v128 c_v128_unpack_u8_s16(c_v64 a) {
594   return c_v128_from_v64(c_v64_unpackhi_u8_s16(a), c_v64_unpacklo_u8_s16(a));
595 }
596 
c_v128_unpacklo_u8_s16(c_v128 a)597 SIMD_INLINE c_v128 c_v128_unpacklo_u8_s16(c_v128 a) {
598   return c_v128_from_v64(c_v64_unpackhi_u8_s16(a.v64[0]),
599                          c_v64_unpacklo_u8_s16(a.v64[0]));
600 }
601 
c_v128_unpackhi_u8_s16(c_v128 a)602 SIMD_INLINE c_v128 c_v128_unpackhi_u8_s16(c_v128 a) {
603   return c_v128_from_v64(c_v64_unpackhi_u8_s16(a.v64[1]),
604                          c_v64_unpacklo_u8_s16(a.v64[1]));
605 }
606 
c_v128_unpack_s8_s16(c_v64 a)607 SIMD_INLINE c_v128 c_v128_unpack_s8_s16(c_v64 a) {
608   return c_v128_from_v64(c_v64_unpackhi_s8_s16(a), c_v64_unpacklo_s8_s16(a));
609 }
610 
c_v128_unpacklo_s8_s16(c_v128 a)611 SIMD_INLINE c_v128 c_v128_unpacklo_s8_s16(c_v128 a) {
612   return c_v128_from_v64(c_v64_unpackhi_s8_s16(a.v64[0]),
613                          c_v64_unpacklo_s8_s16(a.v64[0]));
614 }
615 
c_v128_unpackhi_s8_s16(c_v128 a)616 SIMD_INLINE c_v128 c_v128_unpackhi_s8_s16(c_v128 a) {
617   return c_v128_from_v64(c_v64_unpackhi_s8_s16(a.v64[1]),
618                          c_v64_unpacklo_s8_s16(a.v64[1]));
619 }
620 
c_v128_pack_s32_s16(c_v128 a,c_v128 b)621 SIMD_INLINE c_v128 c_v128_pack_s32_s16(c_v128 a, c_v128 b) {
622   return c_v128_from_v64(c_v64_pack_s32_s16(a.v64[1], a.v64[0]),
623                          c_v64_pack_s32_s16(b.v64[1], b.v64[0]));
624 }
625 
c_v128_pack_s32_u16(c_v128 a,c_v128 b)626 SIMD_INLINE c_v128 c_v128_pack_s32_u16(c_v128 a, c_v128 b) {
627   return c_v128_from_v64(c_v64_pack_s32_u16(a.v64[1], a.v64[0]),
628                          c_v64_pack_s32_u16(b.v64[1], b.v64[0]));
629 }
630 
c_v128_pack_s16_u8(c_v128 a,c_v128 b)631 SIMD_INLINE c_v128 c_v128_pack_s16_u8(c_v128 a, c_v128 b) {
632   return c_v128_from_v64(c_v64_pack_s16_u8(a.v64[1], a.v64[0]),
633                          c_v64_pack_s16_u8(b.v64[1], b.v64[0]));
634 }
635 
c_v128_pack_s16_s8(c_v128 a,c_v128 b)636 SIMD_INLINE c_v128 c_v128_pack_s16_s8(c_v128 a, c_v128 b) {
637   return c_v128_from_v64(c_v64_pack_s16_s8(a.v64[1], a.v64[0]),
638                          c_v64_pack_s16_s8(b.v64[1], b.v64[0]));
639 }
640 
c_v128_unpack_u16_s32(c_v64 a)641 SIMD_INLINE c_v128 c_v128_unpack_u16_s32(c_v64 a) {
642   return c_v128_from_v64(c_v64_unpackhi_u16_s32(a), c_v64_unpacklo_u16_s32(a));
643 }
644 
c_v128_unpack_s16_s32(c_v64 a)645 SIMD_INLINE c_v128 c_v128_unpack_s16_s32(c_v64 a) {
646   return c_v128_from_v64(c_v64_unpackhi_s16_s32(a), c_v64_unpacklo_s16_s32(a));
647 }
648 
c_v128_unpacklo_u16_s32(c_v128 a)649 SIMD_INLINE c_v128 c_v128_unpacklo_u16_s32(c_v128 a) {
650   return c_v128_from_v64(c_v64_unpackhi_u16_s32(a.v64[0]),
651                          c_v64_unpacklo_u16_s32(a.v64[0]));
652 }
653 
c_v128_unpacklo_s16_s32(c_v128 a)654 SIMD_INLINE c_v128 c_v128_unpacklo_s16_s32(c_v128 a) {
655   return c_v128_from_v64(c_v64_unpackhi_s16_s32(a.v64[0]),
656                          c_v64_unpacklo_s16_s32(a.v64[0]));
657 }
658 
c_v128_unpackhi_u16_s32(c_v128 a)659 SIMD_INLINE c_v128 c_v128_unpackhi_u16_s32(c_v128 a) {
660   return c_v128_from_v64(c_v64_unpackhi_u16_s32(a.v64[1]),
661                          c_v64_unpacklo_u16_s32(a.v64[1]));
662 }
663 
c_v128_unpackhi_s16_s32(c_v128 a)664 SIMD_INLINE c_v128 c_v128_unpackhi_s16_s32(c_v128 a) {
665   return c_v128_from_v64(c_v64_unpackhi_s16_s32(a.v64[1]),
666                          c_v64_unpacklo_s16_s32(a.v64[1]));
667 }
668 
c_v128_shuffle_8(c_v128 a,c_v128 pattern)669 SIMD_INLINE c_v128 c_v128_shuffle_8(c_v128 a, c_v128 pattern) {
670   c_v128 t;
671   int c;
672   for (c = 0; c < 16; c++)
673     t.u8[c] = a.u8[CONFIG_BIG_ENDIAN ? 15 - (pattern.u8[c] & 15)
674                                      : pattern.u8[c] & 15];
675 
676   return t;
677 }
678 
c_v128_cmpgt_s8(c_v128 a,c_v128 b)679 SIMD_INLINE c_v128 c_v128_cmpgt_s8(c_v128 a, c_v128 b) {
680   return c_v128_from_v64(c_v64_cmpgt_s8(a.v64[1], b.v64[1]),
681                          c_v64_cmpgt_s8(a.v64[0], b.v64[0]));
682 }
683 
c_v128_cmplt_s8(c_v128 a,c_v128 b)684 SIMD_INLINE c_v128 c_v128_cmplt_s8(c_v128 a, c_v128 b) {
685   return c_v128_from_v64(c_v64_cmplt_s8(a.v64[1], b.v64[1]),
686                          c_v64_cmplt_s8(a.v64[0], b.v64[0]));
687 }
688 
c_v128_cmpeq_8(c_v128 a,c_v128 b)689 SIMD_INLINE c_v128 c_v128_cmpeq_8(c_v128 a, c_v128 b) {
690   return c_v128_from_v64(c_v64_cmpeq_8(a.v64[1], b.v64[1]),
691                          c_v64_cmpeq_8(a.v64[0], b.v64[0]));
692 }
693 
c_v128_cmpgt_s16(c_v128 a,c_v128 b)694 SIMD_INLINE c_v128 c_v128_cmpgt_s16(c_v128 a, c_v128 b) {
695   return c_v128_from_v64(c_v64_cmpgt_s16(a.v64[1], b.v64[1]),
696                          c_v64_cmpgt_s16(a.v64[0], b.v64[0]));
697 }
698 
c_v128_cmplt_s16(c_v128 a,c_v128 b)699 SIMD_INLINE c_v128 c_v128_cmplt_s16(c_v128 a, c_v128 b) {
700   return c_v128_from_v64(c_v64_cmplt_s16(a.v64[1], b.v64[1]),
701                          c_v64_cmplt_s16(a.v64[0], b.v64[0]));
702 }
703 
c_v128_cmpeq_16(c_v128 a,c_v128 b)704 SIMD_INLINE c_v128 c_v128_cmpeq_16(c_v128 a, c_v128 b) {
705   return c_v128_from_v64(c_v64_cmpeq_16(a.v64[1], b.v64[1]),
706                          c_v64_cmpeq_16(a.v64[0], b.v64[0]));
707 }
708 
c_v128_cmpgt_s32(c_v128 a,c_v128 b)709 SIMD_INLINE c_v128 c_v128_cmpgt_s32(c_v128 a, c_v128 b) {
710   c_v128 t;
711   int c;
712   for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] > b.s32[c]);
713   return t;
714 }
715 
c_v128_cmplt_s32(c_v128 a,c_v128 b)716 SIMD_INLINE c_v128 c_v128_cmplt_s32(c_v128 a, c_v128 b) {
717   c_v128 t;
718   int c;
719   for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] < b.s32[c]);
720   return t;
721 }
722 
c_v128_cmpeq_32(c_v128 a,c_v128 b)723 SIMD_INLINE c_v128 c_v128_cmpeq_32(c_v128 a, c_v128 b) {
724   c_v128 t;
725   int c;
726   for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] == b.s32[c]);
727   return t;
728 }
729 
c_v128_shl_n_byte(c_v128 a,const unsigned int n)730 SIMD_INLINE c_v128 c_v128_shl_n_byte(c_v128 a, const unsigned int n) {
731   if (n == 0) return a;
732   if (n < 8)
733     return c_v128_from_v64(c_v64_or(c_v64_shl_n_byte(a.v64[1], n),
734                                     c_v64_shr_n_byte(a.v64[0], 8 - n)),
735                            c_v64_shl_n_byte(a.v64[0], n));
736   else
737     return c_v128_from_v64(c_v64_shl_n_byte(a.v64[0], n - 8), c_v64_zero());
738 }
739 
c_v128_shr_n_byte(c_v128 a,const unsigned int n)740 SIMD_INLINE c_v128 c_v128_shr_n_byte(c_v128 a, const unsigned int n) {
741   if (n == 0) return a;
742   if (n < 8)
743     return c_v128_from_v64(c_v64_shr_n_byte(a.v64[1], n),
744                            c_v64_or(c_v64_shr_n_byte(a.v64[0], n),
745                                     c_v64_shl_n_byte(a.v64[1], 8 - n)));
746   else
747     return c_v128_from_v64(c_v64_zero(), c_v64_shr_n_byte(a.v64[1], n - 8));
748 }
749 
c_v128_align(c_v128 a,c_v128 b,const unsigned int c)750 SIMD_INLINE c_v128 c_v128_align(c_v128 a, c_v128 b, const unsigned int c) {
751   if (SIMD_CHECK && c > 15) {
752     fprintf(stderr, "Error: undefined alignment %d\n", c);
753     abort();
754   }
755   return c ? c_v128_or(c_v128_shr_n_byte(b, c), c_v128_shl_n_byte(a, 16 - c))
756            : b;
757 }
758 
c_v128_shl_8(c_v128 a,const unsigned int c)759 SIMD_INLINE c_v128 c_v128_shl_8(c_v128 a, const unsigned int c) {
760   return c_v128_from_v64(c_v64_shl_8(a.v64[1], c), c_v64_shl_8(a.v64[0], c));
761 }
762 
c_v128_shr_u8(c_v128 a,const unsigned int c)763 SIMD_INLINE c_v128 c_v128_shr_u8(c_v128 a, const unsigned int c) {
764   return c_v128_from_v64(c_v64_shr_u8(a.v64[1], c), c_v64_shr_u8(a.v64[0], c));
765 }
766 
c_v128_shr_s8(c_v128 a,const unsigned int c)767 SIMD_INLINE c_v128 c_v128_shr_s8(c_v128 a, const unsigned int c) {
768   return c_v128_from_v64(c_v64_shr_s8(a.v64[1], c), c_v64_shr_s8(a.v64[0], c));
769 }
770 
c_v128_shl_16(c_v128 a,const unsigned int c)771 SIMD_INLINE c_v128 c_v128_shl_16(c_v128 a, const unsigned int c) {
772   return c_v128_from_v64(c_v64_shl_16(a.v64[1], c), c_v64_shl_16(a.v64[0], c));
773 }
774 
c_v128_shr_u16(c_v128 a,const unsigned int c)775 SIMD_INLINE c_v128 c_v128_shr_u16(c_v128 a, const unsigned int c) {
776   return c_v128_from_v64(c_v64_shr_u16(a.v64[1], c),
777                          c_v64_shr_u16(a.v64[0], c));
778 }
779 
c_v128_shr_s16(c_v128 a,const unsigned int c)780 SIMD_INLINE c_v128 c_v128_shr_s16(c_v128 a, const unsigned int c) {
781   return c_v128_from_v64(c_v64_shr_s16(a.v64[1], c),
782                          c_v64_shr_s16(a.v64[0], c));
783 }
784 
c_v128_shl_32(c_v128 a,const unsigned int c)785 SIMD_INLINE c_v128 c_v128_shl_32(c_v128 a, const unsigned int c) {
786   return c_v128_from_v64(c_v64_shl_32(a.v64[1], c), c_v64_shl_32(a.v64[0], c));
787 }
788 
c_v128_shr_u32(c_v128 a,const unsigned int c)789 SIMD_INLINE c_v128 c_v128_shr_u32(c_v128 a, const unsigned int c) {
790   return c_v128_from_v64(c_v64_shr_u32(a.v64[1], c),
791                          c_v64_shr_u32(a.v64[0], c));
792 }
793 
c_v128_shr_s32(c_v128 a,const unsigned int c)794 SIMD_INLINE c_v128 c_v128_shr_s32(c_v128 a, const unsigned int c) {
795   return c_v128_from_v64(c_v64_shr_s32(a.v64[1], c),
796                          c_v64_shr_s32(a.v64[0], c));
797 }
798 
c_v128_shl_64(c_v128 a,const unsigned int c)799 SIMD_INLINE c_v128 c_v128_shl_64(c_v128 a, const unsigned int c) {
800   a.v64[1].u64 <<= c;
801   a.v64[0].u64 <<= c;
802   return c_v128_from_v64(a.v64[1], a.v64[0]);
803 }
804 
c_v128_shr_u64(c_v128 a,const unsigned int c)805 SIMD_INLINE c_v128 c_v128_shr_u64(c_v128 a, const unsigned int c) {
806   a.v64[1].u64 >>= c;
807   a.v64[0].u64 >>= c;
808   return c_v128_from_v64(a.v64[1], a.v64[0]);
809 }
810 
c_v128_shr_s64(c_v128 a,const unsigned int c)811 SIMD_INLINE c_v128 c_v128_shr_s64(c_v128 a, const unsigned int c) {
812   a.v64[1].s64 >>= c;
813   a.v64[0].s64 >>= c;
814   return c_v128_from_v64(a.v64[1], a.v64[0]);
815 }
816 
c_v128_shl_n_8(c_v128 a,const unsigned int n)817 SIMD_INLINE c_v128 c_v128_shl_n_8(c_v128 a, const unsigned int n) {
818   return c_v128_shl_8(a, n);
819 }
820 
c_v128_shl_n_16(c_v128 a,const unsigned int n)821 SIMD_INLINE c_v128 c_v128_shl_n_16(c_v128 a, const unsigned int n) {
822   return c_v128_shl_16(a, n);
823 }
824 
c_v128_shl_n_32(c_v128 a,const unsigned int n)825 SIMD_INLINE c_v128 c_v128_shl_n_32(c_v128 a, const unsigned int n) {
826   return c_v128_shl_32(a, n);
827 }
828 
c_v128_shl_n_64(c_v128 a,const unsigned int n)829 SIMD_INLINE c_v128 c_v128_shl_n_64(c_v128 a, const unsigned int n) {
830   return c_v128_shl_64(a, n);
831 }
832 
c_v128_shr_n_u8(c_v128 a,const unsigned int n)833 SIMD_INLINE c_v128 c_v128_shr_n_u8(c_v128 a, const unsigned int n) {
834   return c_v128_shr_u8(a, n);
835 }
836 
c_v128_shr_n_u16(c_v128 a,const unsigned int n)837 SIMD_INLINE c_v128 c_v128_shr_n_u16(c_v128 a, const unsigned int n) {
838   return c_v128_shr_u16(a, n);
839 }
840 
c_v128_shr_n_u32(c_v128 a,const unsigned int n)841 SIMD_INLINE c_v128 c_v128_shr_n_u32(c_v128 a, const unsigned int n) {
842   return c_v128_shr_u32(a, n);
843 }
844 
c_v128_shr_n_u64(c_v128 a,const unsigned int n)845 SIMD_INLINE c_v128 c_v128_shr_n_u64(c_v128 a, const unsigned int n) {
846   return c_v128_shr_u64(a, n);
847 }
848 
c_v128_shr_n_s8(c_v128 a,const unsigned int n)849 SIMD_INLINE c_v128 c_v128_shr_n_s8(c_v128 a, const unsigned int n) {
850   return c_v128_shr_s8(a, n);
851 }
852 
c_v128_shr_n_s16(c_v128 a,const unsigned int n)853 SIMD_INLINE c_v128 c_v128_shr_n_s16(c_v128 a, const unsigned int n) {
854   return c_v128_shr_s16(a, n);
855 }
856 
c_v128_shr_n_s32(c_v128 a,const unsigned int n)857 SIMD_INLINE c_v128 c_v128_shr_n_s32(c_v128 a, const unsigned int n) {
858   return c_v128_shr_s32(a, n);
859 }
860 
c_v128_shr_n_s64(c_v128 a,const unsigned int n)861 SIMD_INLINE c_v128 c_v128_shr_n_s64(c_v128 a, const unsigned int n) {
862   return c_v128_shr_s64(a, n);
863 }
864 
865 typedef uint32_t c_sad128_internal_u16;
866 
c_v128_sad_u16_init(void)867 SIMD_INLINE c_sad128_internal_u16 c_v128_sad_u16_init(void) { return 0; }
868 
869 /* Implementation dependent return value.  Result must be finalised with
870  * v128_sad_u16_sum(). */
c_v128_sad_u16(c_sad128_internal_u16 s,c_v128 a,c_v128 b)871 SIMD_INLINE c_sad128_internal_u16 c_v128_sad_u16(c_sad128_internal_u16 s,
872                                                  c_v128 a, c_v128 b) {
873   int c;
874   for (c = 0; c < 8; c++)
875     s += a.u16[c] > b.u16[c] ? a.u16[c] - b.u16[c] : b.u16[c] - a.u16[c];
876   return s;
877 }
878 
c_v128_sad_u16_sum(c_sad128_internal_u16 s)879 SIMD_INLINE uint32_t c_v128_sad_u16_sum(c_sad128_internal_u16 s) { return s; }
880 
881 typedef uint64_t c_ssd128_internal_s16;
882 
c_v128_ssd_s16_init(void)883 SIMD_INLINE c_ssd128_internal_s16 c_v128_ssd_s16_init(void) { return 0; }
884 
885 /* Implementation dependent return value.  Result must be finalised with
886  * v128_ssd_s16_sum(). */
c_v128_ssd_s16(c_ssd128_internal_s16 s,c_v128 a,c_v128 b)887 SIMD_INLINE c_ssd128_internal_s16 c_v128_ssd_s16(c_ssd128_internal_s16 s,
888                                                  c_v128 a, c_v128 b) {
889   int c;
890   for (c = 0; c < 8; c++)
891     s += (int32_t)(int16_t)(a.s16[c] - b.s16[c]) *
892          (int32_t)(int16_t)(a.s16[c] - b.s16[c]);
893   return s;
894 }
895 
c_v128_ssd_s16_sum(c_ssd128_internal_s16 s)896 SIMD_INLINE uint64_t c_v128_ssd_s16_sum(c_ssd128_internal_s16 s) { return s; }
897 
898 #endif  // AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
899