xref: /aosp_15_r20/external/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1##
2##  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3##
4##  Use of this source code is governed by a BSD-style license
5##  that can be found in the LICENSE file in the root of the source
6##  tree. An additional intellectual property rights grant can be found
7##  in the file PATENTS.  All contributing project authors may
8##  be found in the AUTHORS file in the root of the source tree.
9##
10
11sub vpx_dsp_forward_decls() {
12print <<EOF
13/*
14 * DSP
15 */
16
17#include "vpx/vpx_integer.h"
18#include "vpx_dsp/vpx_dsp_common.h"
19#include "vpx_dsp/vpx_filter.h"
20#if CONFIG_VP9_ENCODER
21 struct macroblock_plane;
22 struct ScanOrder;
23#endif
24
25EOF
26}
27forward_decls qw/vpx_dsp_forward_decls/;
28
29# functions that are 64 bit only.
30$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
31if ($opts{arch} eq "x86_64") {
32  $mmx_x86_64 = 'mmx';
33  $sse2_x86_64 = 'sse2';
34  $ssse3_x86_64 = 'ssse3';
35  $avx_x86_64 = 'avx';
36  $avx2_x86_64 = 'avx2';
37  $avx512_x86_64 = 'avx512';
38}
39
40#
41# Intra prediction
42#
43
44add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
45specialize qw/vpx_d207_predictor_4x4 neon sse2/;
46
47add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
48specialize qw/vpx_d45_predictor_4x4 neon sse2/;
49
50add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
51
52add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
53specialize qw/vpx_d63_predictor_4x4 neon ssse3/;
54
55add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
56
57add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
58# TODO(crbug.com/webm/1522): Re-enable vsx implementation.
59specialize qw/vpx_h_predictor_4x4 neon dspr2 msa sse2/;
60
61add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
62
63add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
64specialize qw/vpx_d117_predictor_4x4 neon/;
65
66add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
67specialize qw/vpx_d135_predictor_4x4 neon/;
68
69add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
70specialize qw/vpx_d153_predictor_4x4 neon ssse3/;
71
72add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
73specialize qw/vpx_v_predictor_4x4 neon msa sse2/;
74
75add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
76
77add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
78# TODO(crbug.com/webm/1522): Re-enable vsx implementation.
79specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa sse2/;
80
81add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
82specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon sse2/;
83
84add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
85specialize qw/vpx_dc_top_predictor_4x4 msa neon sse2/;
86
87add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
88specialize qw/vpx_dc_left_predictor_4x4 msa neon sse2/;
89
90add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
91specialize qw/vpx_dc_128_predictor_4x4 msa neon sse2/;
92
93add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
94specialize qw/vpx_d207_predictor_8x8 neon ssse3/;
95
96add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
97# TODO(crbug.com/webm/1522): Re-enable vsx implementation.
98specialize qw/vpx_d45_predictor_8x8 neon sse2/;
99
100add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
101# TODO(crbug.com/webm/1522): Re-enable vsx implementation.
102specialize qw/vpx_d63_predictor_8x8 neon ssse3/;
103
104add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
105# TODO(crbug.com/webm/1522): Re-enable vsx implementation.
106specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2/;
107
108add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
109specialize qw/vpx_d117_predictor_8x8 neon/;
110
111add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
112specialize qw/vpx_d135_predictor_8x8 neon/;
113
114add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
115specialize qw/vpx_d153_predictor_8x8 neon ssse3/;
116
117add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
118specialize qw/vpx_v_predictor_8x8 neon msa sse2/;
119
120add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
121# TODO(crbug.com/webm/1522): Re-enable vsx implementation.
122specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa sse2/;
123
124add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
125# TODO(crbug.com/webm/1522): Re-enable vsx implementation.
126specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa sse2 lsx/;
127
128add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
129specialize qw/vpx_dc_top_predictor_8x8 neon msa sse2/;
130
131add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
132specialize qw/vpx_dc_left_predictor_8x8 neon msa sse2/;
133
134add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
135specialize qw/vpx_dc_128_predictor_8x8 neon msa sse2/;
136
137add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
138specialize qw/vpx_d207_predictor_16x16 neon ssse3/;
139
140add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
141specialize qw/vpx_d45_predictor_16x16 neon ssse3 vsx/;
142
143add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
144specialize qw/vpx_d63_predictor_16x16 neon ssse3 vsx/;
145
146add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
147specialize qw/vpx_h_predictor_16x16 neon dspr2 msa sse2 vsx/;
148
149add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
150specialize qw/vpx_d117_predictor_16x16 neon/;
151
152add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
153specialize qw/vpx_d135_predictor_16x16 neon/;
154
155add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
156specialize qw/vpx_d153_predictor_16x16 neon ssse3/;
157
158add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
159specialize qw/vpx_v_predictor_16x16 neon msa sse2 vsx/;
160
161add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
162specialize qw/vpx_tm_predictor_16x16 neon msa sse2 vsx/;
163
164add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
165specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa sse2 vsx lsx/;
166
167add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
168specialize qw/vpx_dc_top_predictor_16x16 neon msa sse2 vsx/;
169
170add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
171specialize qw/vpx_dc_left_predictor_16x16 neon msa sse2 vsx/;
172
173add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
174specialize qw/vpx_dc_128_predictor_16x16 neon msa sse2 vsx/;
175
176add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
177specialize qw/vpx_d207_predictor_32x32 neon ssse3/;
178
179add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
180specialize qw/vpx_d45_predictor_32x32 neon ssse3 vsx/;
181
182add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
183specialize qw/vpx_d63_predictor_32x32 neon ssse3 vsx/;
184
185add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
186specialize qw/vpx_h_predictor_32x32 neon msa sse2 vsx/;
187
188add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
189specialize qw/vpx_d117_predictor_32x32 neon/;
190
191add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
192specialize qw/vpx_d135_predictor_32x32 neon/;
193
194add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
195specialize qw/vpx_d153_predictor_32x32 neon ssse3/;
196
197add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
198specialize qw/vpx_v_predictor_32x32 neon msa sse2 vsx/;
199
200add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
201specialize qw/vpx_tm_predictor_32x32 neon msa sse2 vsx/;
202
203add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
204specialize qw/vpx_dc_predictor_32x32 msa neon sse2 vsx/;
205
206add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
207specialize qw/vpx_dc_top_predictor_32x32 msa neon sse2 vsx/;
208
209add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
210specialize qw/vpx_dc_left_predictor_32x32 msa neon sse2 vsx/;
211
212add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
213specialize qw/vpx_dc_128_predictor_32x32 msa neon sse2 vsx/;
214
215# High bitdepth functions
216if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
217  add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
218  specialize qw/vpx_highbd_d207_predictor_4x4 neon sse2/;
219
220  add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
221  specialize qw/vpx_highbd_d45_predictor_4x4 neon ssse3/;
222
223  add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
224  specialize qw/vpx_highbd_d63_predictor_4x4 neon sse2/;
225
226  add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
227  specialize qw/vpx_highbd_h_predictor_4x4 neon sse2/;
228
229  add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
230  specialize qw/vpx_highbd_d117_predictor_4x4 neon sse2/;
231
232  add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
233  specialize qw/vpx_highbd_d135_predictor_4x4 neon sse2/;
234
235  add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
236  specialize qw/vpx_highbd_d153_predictor_4x4 neon sse2/;
237
238  add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
239  specialize qw/vpx_highbd_v_predictor_4x4 neon sse2/;
240
241  add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
242  specialize qw/vpx_highbd_tm_predictor_4x4 neon sse2/;
243
244  add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
245  specialize qw/vpx_highbd_dc_predictor_4x4 neon sse2/;
246
247  add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
248  specialize qw/vpx_highbd_dc_top_predictor_4x4 neon sse2/;
249
250  add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
251  specialize qw/vpx_highbd_dc_left_predictor_4x4 neon sse2/;
252
253  add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
254  specialize qw/vpx_highbd_dc_128_predictor_4x4 neon sse2/;
255
256  add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
257  specialize qw/vpx_highbd_d207_predictor_8x8 neon ssse3/;
258
259  add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
260  specialize qw/vpx_highbd_d45_predictor_8x8 neon ssse3/;
261
262  add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
263  specialize qw/vpx_highbd_d63_predictor_8x8 neon ssse3/;
264
265  add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
266  specialize qw/vpx_highbd_h_predictor_8x8 neon sse2/;
267
268  add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
269  specialize qw/vpx_highbd_d117_predictor_8x8 neon ssse3/;
270
271  add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
272  specialize qw/vpx_highbd_d135_predictor_8x8 neon ssse3/;
273
274  add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
275  specialize qw/vpx_highbd_d153_predictor_8x8 neon ssse3/;
276
277  add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
278  specialize qw/vpx_highbd_v_predictor_8x8 neon sse2/;
279
280  add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
281  specialize qw/vpx_highbd_tm_predictor_8x8 neon sse2/;
282
283  add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
284  specialize qw/vpx_highbd_dc_predictor_8x8 neon sse2/;
285
286  add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
287  specialize qw/vpx_highbd_dc_top_predictor_8x8 neon sse2/;
288
289  add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
290  specialize qw/vpx_highbd_dc_left_predictor_8x8 neon sse2/;
291
292  add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
293  specialize qw/vpx_highbd_dc_128_predictor_8x8 neon sse2/;
294
295  add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
296  specialize qw/vpx_highbd_d207_predictor_16x16 neon ssse3/;
297
298  add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
299  specialize qw/vpx_highbd_d45_predictor_16x16 neon ssse3/;
300
301  add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
302  specialize qw/vpx_highbd_d63_predictor_16x16 neon ssse3/;
303
304  add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
305  specialize qw/vpx_highbd_h_predictor_16x16 neon sse2/;
306
307  add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
308  specialize qw/vpx_highbd_d117_predictor_16x16 neon ssse3/;
309
310  add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
311  specialize qw/vpx_highbd_d135_predictor_16x16 neon ssse3/;
312
313  add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
314  specialize qw/vpx_highbd_d153_predictor_16x16 neon ssse3/;
315
316  add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
317  specialize qw/vpx_highbd_v_predictor_16x16 neon sse2/;
318
319  add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
320  specialize qw/vpx_highbd_tm_predictor_16x16 neon sse2/;
321
322  add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
323  specialize qw/vpx_highbd_dc_predictor_16x16 neon sse2/;
324
325  add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
326  specialize qw/vpx_highbd_dc_top_predictor_16x16 neon sse2/;
327
328  add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
329  specialize qw/vpx_highbd_dc_left_predictor_16x16 neon sse2/;
330
331  add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
332  specialize qw/vpx_highbd_dc_128_predictor_16x16 neon sse2/;
333
334  add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
335  specialize qw/vpx_highbd_d207_predictor_32x32 neon ssse3/;
336
337  add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
338  specialize qw/vpx_highbd_d45_predictor_32x32 neon ssse3/;
339
340  add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
341  specialize qw/vpx_highbd_d63_predictor_32x32 neon ssse3/;
342
343  add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
344  specialize qw/vpx_highbd_h_predictor_32x32 neon sse2/;
345
346  add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
347  specialize qw/vpx_highbd_d117_predictor_32x32 neon ssse3/;
348
349  add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
350  specialize qw/vpx_highbd_d135_predictor_32x32 neon ssse3/;
351
352  add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
353  specialize qw/vpx_highbd_d153_predictor_32x32 neon ssse3/;
354
355  add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
356  specialize qw/vpx_highbd_v_predictor_32x32 neon sse2/;
357
358  add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
359  specialize qw/vpx_highbd_tm_predictor_32x32 neon sse2/;
360
361  add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
362  specialize qw/vpx_highbd_dc_predictor_32x32 neon sse2/;
363
364  add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
365  specialize qw/vpx_highbd_dc_top_predictor_32x32 neon sse2/;
366
367  add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
368  specialize qw/vpx_highbd_dc_left_predictor_32x32 neon sse2/;
369
370  add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
371  specialize qw/vpx_highbd_dc_128_predictor_32x32 neon sse2/;
372}  # CONFIG_VP9_HIGHBITDEPTH
373
374if (vpx_config("CONFIG_VP9") eq "yes") {
375#
376# Sub Pixel Filters
377#
378add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
379specialize qw/vpx_convolve_copy neon dspr2 msa sse2 vsx lsx/;
380
381add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
382specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx mmi lsx/;
383
384add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
385specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
386
387add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
388specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
389
390add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
391specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
392
393add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
394specialize qw/vpx_convolve8_avg sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
395
396add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
397specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
398
399add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
400specialize qw/vpx_convolve8_avg_vert sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
401
402add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
403specialize qw/vpx_scaled_2d ssse3 neon msa/;
404
405add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
406
407add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
408
409add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
410
411add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
412
413add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
414} #CONFIG_VP9
415
416if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
417  #
418  # Sub Pixel Filters
419  #
420  add_proto qw/void vpx_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
421  specialize qw/vpx_highbd_convolve_copy sse2 avx2 neon/;
422
423  add_proto qw/void vpx_highbd_convolve_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
424  specialize qw/vpx_highbd_convolve_avg sse2 avx2 neon/;
425
426  add_proto qw/void vpx_highbd_convolve8/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
427  specialize qw/vpx_highbd_convolve8 avx2 neon sve2/, "$sse2_x86_64";
428
429  add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
430  specialize qw/vpx_highbd_convolve8_horiz avx2 neon sve/, "$sse2_x86_64";
431
432  add_proto qw/void vpx_highbd_convolve8_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
433  specialize qw/vpx_highbd_convolve8_vert avx2 neon sve2/, "$sse2_x86_64";
434
435  add_proto qw/void vpx_highbd_convolve8_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
436  specialize qw/vpx_highbd_convolve8_avg avx2 neon sve2/, "$sse2_x86_64";
437
438  add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
439  specialize qw/vpx_highbd_convolve8_avg_horiz avx2 neon sve/, "$sse2_x86_64";
440
441  add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd";
442  specialize qw/vpx_highbd_convolve8_avg_vert avx2 neon sve2/, "$sse2_x86_64";
443}  # CONFIG_VP9_HIGHBITDEPTH
444
445if (vpx_config("CONFIG_VP9") eq "yes") {
446#
447# Loopfilter
448#
449add_proto qw/void vpx_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
450specialize qw/vpx_lpf_vertical_16 sse2 neon dspr2 msa/;
451
452add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
453specialize qw/vpx_lpf_vertical_16_dual sse2 neon dspr2 msa lsx/;
454
455add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
456specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa lsx/;
457
458add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
459specialize qw/vpx_lpf_vertical_8_dual sse2 neon dspr2 msa lsx/;
460
461add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
462specialize qw/vpx_lpf_vertical_4 sse2 neon dspr2 msa lsx/;
463
464add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
465specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa lsx/;
466
467add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
468specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon dspr2 msa/;
469
470add_proto qw/void vpx_lpf_horizontal_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
471specialize qw/vpx_lpf_horizontal_16_dual sse2 avx2 neon dspr2 msa lsx/;
472
473add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
474specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa lsx/;
475
476add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
477specialize qw/vpx_lpf_horizontal_8_dual sse2 neon dspr2 msa lsx/;
478
479add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
480specialize qw/vpx_lpf_horizontal_4 sse2 neon dspr2 msa lsx/;
481
482add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
483specialize qw/vpx_lpf_horizontal_4_dual sse2 neon dspr2 msa lsx/;
484} #CONFIG_VP9
485
486if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
487  add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
488  specialize qw/vpx_highbd_lpf_vertical_16 sse2 neon/;
489
490  add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
491  specialize qw/vpx_highbd_lpf_vertical_16_dual sse2 neon/;
492
493  add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
494  specialize qw/vpx_highbd_lpf_vertical_8 sse2 neon/;
495
496  add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
497  specialize qw/vpx_highbd_lpf_vertical_8_dual sse2 neon/;
498
499  add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
500  specialize qw/vpx_highbd_lpf_vertical_4 sse2 neon/;
501
502  add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
503  specialize qw/vpx_highbd_lpf_vertical_4_dual sse2 neon/;
504
505  add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
506  specialize qw/vpx_highbd_lpf_horizontal_16 sse2 neon/;
507
508  add_proto qw/void vpx_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
509  specialize qw/vpx_highbd_lpf_horizontal_16_dual sse2 neon/;
510
511  add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
512  specialize qw/vpx_highbd_lpf_horizontal_8 sse2 neon/;
513
514  add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
515  specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2 neon/;
516
517  add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
518  specialize qw/vpx_highbd_lpf_horizontal_4 sse2 neon/;
519
520  add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
521  specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2 neon/;
522}  # CONFIG_VP9_HIGHBITDEPTH
523
524#
525# Encoder functions.
526#
527
528#
529# Forward transform
530#
531if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
532if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
533  add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
534  specialize qw/vpx_fdct4x4 neon sse2/;
535
536  add_proto qw/void vpx_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
537  specialize qw/vpx_fdct4x4_1 sse2 neon/;
538  specialize qw/vpx_highbd_fdct4x4_1 neon/;
539  $vpx_highbd_fdct4x4_1_neon=vpx_fdct4x4_1_neon;
540
541  add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
542  specialize qw/vpx_fdct8x8 neon sse2/;
543
544  add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
545  specialize qw/vpx_fdct8x8_1 neon sse2 msa/;
546
547  add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
548  specialize qw/vpx_fdct16x16 neon sse2/;
549
550  add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
551  specialize qw/vpx_fdct16x16_1 sse2 neon/;
552
553  add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
554  specialize qw/vpx_fdct32x32 neon sse2/;
555
556  add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
557  specialize qw/vpx_fdct32x32_rd neon sse2/;
558
559  add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
560  specialize qw/vpx_fdct32x32_1 sse2 neon/;
561
562  add_proto qw/void vpx_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
563  specialize qw/vpx_highbd_fdct4x4 sse2 neon/;
564
565  add_proto qw/void vpx_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
566  specialize qw/vpx_highbd_fdct8x8 sse2 neon/;
567
568  add_proto qw/void vpx_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
569  specialize qw/vpx_highbd_fdct8x8_1 neon/;
570  $vpx_highbd_fdct8x8_1_neon=vpx_fdct8x8_1_neon;
571
572  add_proto qw/void vpx_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
573  specialize qw/vpx_highbd_fdct16x16 sse2 neon/;
574
575  add_proto qw/void vpx_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
576  specialize qw/vpx_highbd_fdct16x16_1 neon/;
577
578  add_proto qw/void vpx_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
579  specialize qw/vpx_highbd_fdct32x32 sse2 neon/;
580
581  add_proto qw/void vpx_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
582  specialize qw/vpx_highbd_fdct32x32_rd sse2 neon/;
583
584  add_proto qw/void vpx_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
585  specialize qw/vpx_highbd_fdct32x32_1 neon/;
586} else {
587  add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
588  specialize qw/vpx_fdct4x4 neon sse2 msa lsx/;
589
590  add_proto qw/void vpx_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
591  specialize qw/vpx_fdct4x4_1 sse2 neon/;
592
593  add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
594  specialize qw/vpx_fdct8x8 sse2 neon msa lsx/, "$ssse3_x86_64";
595
596  add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
597  specialize qw/vpx_fdct8x8_1 sse2 neon msa/;
598
599  add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
600  specialize qw/vpx_fdct16x16 neon sse2 avx2 msa lsx/;
601
602  add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
603  specialize qw/vpx_fdct16x16_1 sse2 neon msa/;
604
605  add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
606  specialize qw/vpx_fdct32x32 neon sse2 avx2 msa lsx/;
607
608  add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
609  specialize qw/vpx_fdct32x32_rd sse2 avx2 neon msa vsx lsx/;
610
611  add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
612  specialize qw/vpx_fdct32x32_1 sse2 neon msa/;
613}  # CONFIG_VP9_HIGHBITDEPTH
614}  # CONFIG_VP9_ENCODER
615
616#
617# Inverse transform
618if (vpx_config("CONFIG_VP9") eq "yes") {
619
620add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
621add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
622add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride";
623add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int stride";
624add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
625add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride";
626add_proto qw/void vpx_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int stride";
627add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int stride";
628add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
629add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int stride";
630add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int stride";
631add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int stride";
632add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
633add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int stride";
634add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int stride";
635
636if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
637  # Note that there are more specializations appended when
638  # CONFIG_VP9_HIGHBITDEPTH is off.
639  specialize qw/vpx_idct4x4_16_add neon sse2 vsx/;
640  specialize qw/vpx_idct4x4_1_add neon sse2/;
641  specialize qw/vpx_idct8x8_64_add neon sse2 vsx/;
642  specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/;
643  specialize qw/vpx_idct8x8_1_add neon sse2/;
644  specialize qw/vpx_idct16x16_256_add neon sse2 avx2 vsx/;
645  specialize qw/vpx_idct16x16_38_add neon sse2/;
646  specialize qw/vpx_idct16x16_10_add neon sse2/;
647  specialize qw/vpx_idct16x16_1_add neon sse2/;
648  specialize qw/vpx_idct32x32_1024_add neon sse2 avx2 vsx/;
649  specialize qw/vpx_idct32x32_135_add neon sse2 ssse3 avx2/;
650  specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/;
651  specialize qw/vpx_idct32x32_1_add neon sse2/;
652  specialize qw/vpx_iwht4x4_16_add sse2 vsx/;
653
654  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
655    # Note that these specializations are appended to the above ones.
656    specialize qw/vpx_idct4x4_16_add dspr2 msa/;
657    specialize qw/vpx_idct4x4_1_add dspr2 msa/;
658    specialize qw/vpx_idct8x8_64_add dspr2 msa/;
659    specialize qw/vpx_idct8x8_12_add dspr2 msa/;
660    specialize qw/vpx_idct8x8_1_add dspr2 msa/;
661    specialize qw/vpx_idct16x16_256_add dspr2 msa/;
662    specialize qw/vpx_idct16x16_38_add dspr2 msa/;
663    $vpx_idct16x16_38_add_dspr2=vpx_idct16x16_256_add_dspr2;
664    $vpx_idct16x16_38_add_msa=vpx_idct16x16_256_add_msa;
665    specialize qw/vpx_idct16x16_10_add dspr2 msa/;
666    specialize qw/vpx_idct16x16_1_add dspr2 msa/;
667    specialize qw/vpx_idct32x32_1024_add dspr2 msa lsx/;
668    specialize qw/vpx_idct32x32_135_add dspr2 msa/;
669    $vpx_idct32x32_135_add_dspr2=vpx_idct32x32_1024_add_dspr2;
670    $vpx_idct32x32_135_add_msa=vpx_idct32x32_1024_add_msa;
671    $vpx_idct32x32_135_add_lsx=vpx_idct32x32_1024_add_lsx;
672    specialize qw/vpx_idct32x32_34_add dspr2 msa lsx/;
673    specialize qw/vpx_idct32x32_1_add dspr2 msa lsx/;
674    specialize qw/vpx_iwht4x4_16_add msa/;
675    specialize qw/vpx_iwht4x4_1_add msa/;
676  } # !CONFIG_VP9_HIGHBITDEPTH
677}  # !CONFIG_EMULATE_HARDWARE
678
679if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
680  # Note as optimized versions of these functions are added we need to add a check to ensure
681  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
682
683  add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
684  add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
685  specialize qw/vpx_highbd_idct4x4_1_add neon sse2/;
686
687  add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
688  add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
689  add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
690  specialize qw/vpx_highbd_idct8x8_1_add neon sse2/;
691
692  add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
693  add_proto qw/void vpx_highbd_idct16x16_38_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
694  add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
695  add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
696  specialize qw/vpx_highbd_idct16x16_1_add neon sse2/;
697
698  add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
699  add_proto qw/void vpx_highbd_idct32x32_135_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
700  add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
701  add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
702  specialize qw/vpx_highbd_idct32x32_1_add neon sse2/;
703
704  add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
705  add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint16_t *dest, int stride, int bd";
706
707  if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
708    specialize qw/vpx_highbd_idct4x4_16_add neon sse2 sse4_1/;
709    specialize qw/vpx_highbd_idct8x8_64_add neon sse2 sse4_1/;
710    specialize qw/vpx_highbd_idct8x8_12_add neon sse2 sse4_1/;
711    specialize qw/vpx_highbd_idct16x16_256_add neon sse2 sse4_1/;
712    specialize qw/vpx_highbd_idct16x16_38_add neon sse2 sse4_1/;
713    specialize qw/vpx_highbd_idct16x16_10_add neon sse2 sse4_1/;
714    specialize qw/vpx_highbd_idct32x32_1024_add neon sse2 sse4_1/;
715    specialize qw/vpx_highbd_idct32x32_135_add neon sse2 sse4_1/;
716    specialize qw/vpx_highbd_idct32x32_34_add neon sse2 sse4_1/;
717  }  # !CONFIG_EMULATE_HARDWARE
718}  # CONFIG_VP9_HIGHBITDEPTH
719}  # CONFIG_VP9
720
721#
722# Quantization
723#
724if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
725  add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order";
726  specialize qw/vpx_quantize_b neon sse2 ssse3 avx avx2 vsx lsx/;
727
728  add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order";
729  specialize qw/vpx_quantize_b_32x32 neon ssse3 avx avx2 vsx lsx/;
730
731  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
732    add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order";
733    specialize qw/vpx_highbd_quantize_b neon sse2 avx2/;
734
735    add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order";
736    specialize qw/vpx_highbd_quantize_b_32x32 neon sse2 avx2/;
737  }  # CONFIG_VP9_HIGHBITDEPTH
738}  # CONFIG_VP9_ENCODER
739
740if (vpx_config("CONFIG_ENCODERS") eq "yes") {
741#
742# Block subtraction
743#
744add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
745specialize qw/vpx_subtract_block neon msa mmi sse2 avx2 vsx lsx/;
746
747add_proto qw/int64_t/, "vpx_sse", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, int width, int height";
748specialize qw/vpx_sse sse4_1 avx2 neon neon_dotprod/;
749
750#
751# Single block SAD
752#
753add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
754specialize qw/vpx_sad64x64 neon neon_dotprod avx2 msa sse2 vsx mmi lsx/;
755
756add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
757specialize qw/vpx_sad64x32 neon neon_dotprod avx2 msa sse2 vsx mmi/;
758
759add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
760specialize qw/vpx_sad32x64 neon neon_dotprod avx2 msa sse2 vsx mmi/;
761
762add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
763specialize qw/vpx_sad32x32 neon neon_dotprod avx2 msa sse2 vsx mmi lsx/;
764
765add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
766specialize qw/vpx_sad32x16 neon neon_dotprod avx2 msa sse2 vsx mmi/;
767
768add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
769specialize qw/vpx_sad16x32 neon neon_dotprod msa sse2 vsx mmi/;
770
771add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
772specialize qw/vpx_sad16x16 neon neon_dotprod msa sse2 vsx mmi lsx/;
773
774add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
775specialize qw/vpx_sad16x8 neon neon_dotprod msa sse2 vsx mmi/;
776
777add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
778specialize qw/vpx_sad8x16 neon msa sse2 vsx mmi/;
779
780add_proto qw/unsigned int vpx_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
781specialize qw/vpx_sad8x8 neon msa sse2 vsx mmi lsx/;
782
783add_proto qw/unsigned int vpx_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
784specialize qw/vpx_sad8x4 neon msa sse2 vsx mmi/;
785
786add_proto qw/unsigned int vpx_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
787specialize qw/vpx_sad4x8 neon msa sse2 mmi/;
788
789add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
790specialize qw/vpx_sad4x4 neon msa sse2 mmi/;
791
792add_proto qw/unsigned int vpx_sad_skip_64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
793specialize qw/vpx_sad_skip_64x64 neon neon_dotprod avx2 sse2/;
794
795add_proto qw/unsigned int vpx_sad_skip_64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
796specialize qw/vpx_sad_skip_64x32 neon neon_dotprod avx2 sse2/;
797
798add_proto qw/unsigned int vpx_sad_skip_32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
799specialize qw/vpx_sad_skip_32x64 neon neon_dotprod avx2 sse2/;
800
801add_proto qw/unsigned int vpx_sad_skip_32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
802specialize qw/vpx_sad_skip_32x32 neon neon_dotprod avx2 sse2/;
803
804add_proto qw/unsigned int vpx_sad_skip_32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
805specialize qw/vpx_sad_skip_32x16 neon neon_dotprod avx2 sse2/;
806
807add_proto qw/unsigned int vpx_sad_skip_16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
808specialize qw/vpx_sad_skip_16x32 neon neon_dotprod sse2/;
809
810add_proto qw/unsigned int vpx_sad_skip_16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
811specialize qw/vpx_sad_skip_16x16 neon neon_dotprod sse2/;
812
813add_proto qw/unsigned int vpx_sad_skip_16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
814specialize qw/vpx_sad_skip_16x8 neon neon_dotprod sse2/;
815
816add_proto qw/unsigned int vpx_sad_skip_8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
817specialize qw/vpx_sad_skip_8x16 neon sse2/;
818
819add_proto qw/unsigned int vpx_sad_skip_8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
820specialize qw/vpx_sad_skip_8x8 neon sse2/;
821
822add_proto qw/unsigned int vpx_sad_skip_8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
823specialize qw/vpx_sad_skip_8x4 neon/;
824
825add_proto qw/unsigned int vpx_sad_skip_4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
826specialize qw/vpx_sad_skip_4x8 neon sse2/;
827
828add_proto qw/unsigned int vpx_sad_skip_4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
829specialize qw/vpx_sad_skip_4x4 neon/;
830
831#
832# Avg
833#
834if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
835  add_proto qw/unsigned int vpx_avg_8x8/, "const uint8_t *, int p";
836  specialize qw/vpx_avg_8x8 sse2 neon msa/;
837
838  add_proto qw/unsigned int vpx_avg_4x4/, "const uint8_t *, int p";
839  specialize qw/vpx_avg_4x4 sse2 neon msa/;
840
841  add_proto qw/void vpx_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
842  specialize qw/vpx_minmax_8x8 sse2 neon msa/;
843
844  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
845    add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
846    specialize qw/vpx_hadamard_8x8 sse2 neon vsx lsx/, "$ssse3_x86_64";
847
848    add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
849    specialize qw/vpx_hadamard_16x16 avx2 sse2 neon vsx lsx/;
850
851    add_proto qw/void vpx_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
852    specialize qw/vpx_hadamard_32x32 sse2 avx2 neon/;
853
854    add_proto qw/void vpx_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
855    specialize qw/vpx_highbd_hadamard_8x8 avx2 neon/;
856
857    add_proto qw/void vpx_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
858    specialize qw/vpx_highbd_hadamard_16x16 avx2 neon/;
859
860    add_proto qw/void vpx_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
861    specialize qw/vpx_highbd_hadamard_32x32 avx2 neon/;
862
863    add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length";
864    specialize qw/vpx_satd avx2 sse2 neon/;
865
866    add_proto qw/int vpx_highbd_satd/, "const tran_low_t *coeff, int length";
867    specialize qw/vpx_highbd_satd avx2 neon/;
868  } else {
869    add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
870    specialize qw/vpx_hadamard_8x8 sse2 neon msa vsx lsx/, "$ssse3_x86_64";
871
872    add_proto qw/void vpx_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
873    specialize qw/vpx_hadamard_16x16 avx2 sse2 neon msa vsx lsx/;
874
875    add_proto qw/void vpx_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
876    specialize qw/vpx_hadamard_32x32 sse2 avx2 neon/;
877
878    add_proto qw/int vpx_satd/, "const int16_t *coeff, int length";
879    specialize qw/vpx_satd avx2 sse2 neon msa/;
880  }
881
882  add_proto qw/void vpx_int_pro_row/, "int16_t hbuf[16], const uint8_t *ref, const int ref_stride, const int height";
883  specialize qw/vpx_int_pro_row neon sse2 msa/;
884  add_proto qw/int16_t vpx_int_pro_col/, "const uint8_t *ref, const int width";
885  specialize qw/vpx_int_pro_col neon sse2 msa/;
886
887  add_proto qw/int vpx_vector_var/, "const int16_t *ref, const int16_t *src, const int bwl";
888  specialize qw/vpx_vector_var neon sse2 msa/;
889}  # CONFIG_VP9_ENCODER
890
891add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
892specialize qw/vpx_sad64x64_avg neon neon_dotprod avx2 msa sse2 vsx mmi lsx/;
893
894add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
895specialize qw/vpx_sad64x32_avg neon neon_dotprod avx2 msa sse2 vsx mmi/;
896
897add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
898specialize qw/vpx_sad32x64_avg neon neon_dotprod avx2 msa sse2 vsx mmi/;
899
900add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
901specialize qw/vpx_sad32x32_avg neon neon_dotprod avx2 msa sse2 vsx mmi lsx/;
902
903add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
904specialize qw/vpx_sad32x16_avg neon neon_dotprod avx2 msa sse2 vsx mmi/;
905
906add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
907specialize qw/vpx_sad16x32_avg neon neon_dotprod msa sse2 vsx mmi/;
908
909add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
910specialize qw/vpx_sad16x16_avg neon neon_dotprod msa sse2 vsx mmi/;
911
912add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
913specialize qw/vpx_sad16x8_avg neon neon_dotprod msa sse2 vsx mmi/;
914
915add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
916specialize qw/vpx_sad8x16_avg neon msa sse2 mmi/;
917
918add_proto qw/unsigned int vpx_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
919specialize qw/vpx_sad8x8_avg neon msa sse2 mmi/;
920
921add_proto qw/unsigned int vpx_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
922specialize qw/vpx_sad8x4_avg neon msa sse2 mmi/;
923
924add_proto qw/unsigned int vpx_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
925specialize qw/vpx_sad4x8_avg neon msa sse2 mmi/;
926
927add_proto qw/unsigned int vpx_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
928specialize qw/vpx_sad4x4_avg neon msa sse2 mmi/;
929
930#
931# Multi-block SAD, comparing a reference to N independent blocks
932#
933add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
934specialize qw/vpx_sad64x64x4d avx512 avx2 neon neon_dotprod msa sse2 vsx mmi lsx/;
935
936add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
937specialize qw/vpx_sad64x32x4d neon neon_dotprod msa sse2 vsx mmi lsx/;
938
939add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
940specialize qw/vpx_sad32x64x4d neon neon_dotprod msa sse2 vsx mmi lsx/;
941
942add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
943specialize qw/vpx_sad32x32x4d avx2 neon neon_dotprod msa sse2 vsx mmi lsx/;
944
945add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
946specialize qw/vpx_sad32x16x4d neon neon_dotprod msa sse2 vsx mmi/;
947
948add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
949specialize qw/vpx_sad16x32x4d neon neon_dotprod msa sse2 vsx mmi/;
950
951add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
952specialize qw/vpx_sad16x16x4d neon neon_dotprod msa sse2 vsx mmi lsx/;
953
954add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
955specialize qw/vpx_sad16x8x4d neon neon_dotprod msa sse2 vsx mmi/;
956
957add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
958specialize qw/vpx_sad8x16x4d neon msa sse2 mmi/;
959
960add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
961specialize qw/vpx_sad8x8x4d neon msa sse2 mmi lsx/;
962
963add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
964specialize qw/vpx_sad8x4x4d neon msa sse2 mmi/;
965
966add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
967specialize qw/vpx_sad4x8x4d neon msa sse2 mmi/;
968
969add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
970specialize qw/vpx_sad4x4x4d neon msa sse2 mmi/;
971
972add_proto qw/void vpx_sad_skip_64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
973specialize qw/vpx_sad_skip_64x64x4d neon neon_dotprod avx2 sse2/;
974
975add_proto qw/void vpx_sad_skip_64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
976specialize qw/vpx_sad_skip_64x32x4d neon neon_dotprod avx2 sse2/;
977
978add_proto qw/void vpx_sad_skip_32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
979specialize qw/vpx_sad_skip_32x64x4d neon neon_dotprod avx2 sse2/;
980
981add_proto qw/void vpx_sad_skip_32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
982specialize qw/vpx_sad_skip_32x32x4d neon neon_dotprod avx2 sse2/;
983
984add_proto qw/void vpx_sad_skip_32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
985specialize qw/vpx_sad_skip_32x16x4d neon neon_dotprod avx2 sse2/;
986
987add_proto qw/void vpx_sad_skip_16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
988specialize qw/vpx_sad_skip_16x32x4d neon neon_dotprod sse2/;
989
990add_proto qw/void vpx_sad_skip_16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
991specialize qw/vpx_sad_skip_16x16x4d neon neon_dotprod sse2/;
992
993add_proto qw/void vpx_sad_skip_16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
994specialize qw/vpx_sad_skip_16x8x4d neon neon_dotprod sse2/;
995
996add_proto qw/void vpx_sad_skip_8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
997specialize qw/vpx_sad_skip_8x16x4d neon sse2/;
998
999add_proto qw/void vpx_sad_skip_8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1000specialize qw/vpx_sad_skip_8x8x4d neon sse2/;
1001
1002add_proto qw/void vpx_sad_skip_8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1003specialize qw/vpx_sad_skip_8x4x4d neon/;
1004
1005add_proto qw/void vpx_sad_skip_4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1006specialize qw/vpx_sad_skip_4x8x4d neon sse2/;
1007
1008add_proto qw/void vpx_sad_skip_4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1009specialize qw/vpx_sad_skip_4x4x4d neon/;
1010
1011add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size";
1012specialize qw/vpx_sum_squares_2d_i16 neon sve sse2 msa/;
1013
1014#
1015# Structured Similarity (SSIM)
1016#
1017if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
1018    add_proto qw/void vpx_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
1019    specialize qw/vpx_ssim_parms_8x8/, "$sse2_x86_64";
1020}
1021
1022if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
1023  #
1024  # Block subtraction
1025  #
1026  add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src8_ptr, ptrdiff_t src_stride, const uint8_t *pred8_ptr, ptrdiff_t pred_stride, int bd";
1027  specialize qw/vpx_highbd_subtract_block neon avx2/;
1028
1029  add_proto qw/int64_t/, "vpx_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height";
1030  specialize qw/vpx_highbd_sse sse4_1 avx2 neon/;
1031
1032  #
1033  # Single block SAD
1034  #
1035  add_proto qw/unsigned int vpx_highbd_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1036  specialize qw/vpx_highbd_sad64x64 sse2 neon avx2/;
1037
1038  add_proto qw/unsigned int vpx_highbd_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1039  specialize qw/vpx_highbd_sad64x32 sse2 neon avx2/;
1040
1041  add_proto qw/unsigned int vpx_highbd_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1042  specialize qw/vpx_highbd_sad32x64 sse2 neon avx2/;
1043
1044  add_proto qw/unsigned int vpx_highbd_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1045  specialize qw/vpx_highbd_sad32x32 sse2 neon avx2/;
1046
1047  add_proto qw/unsigned int vpx_highbd_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1048  specialize qw/vpx_highbd_sad32x16 sse2 neon avx2/;
1049
1050  add_proto qw/unsigned int vpx_highbd_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1051  specialize qw/vpx_highbd_sad16x32 sse2 neon avx2/;
1052
1053  add_proto qw/unsigned int vpx_highbd_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1054  specialize qw/vpx_highbd_sad16x16 sse2 neon avx2/;
1055
1056  add_proto qw/unsigned int vpx_highbd_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1057  specialize qw/vpx_highbd_sad16x8 sse2 neon avx2/;
1058
1059  add_proto qw/unsigned int vpx_highbd_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1060  specialize qw/vpx_highbd_sad8x16 sse2 neon/;
1061
1062  add_proto qw/unsigned int vpx_highbd_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1063  specialize qw/vpx_highbd_sad8x8 sse2 neon/;
1064
1065  add_proto qw/unsigned int vpx_highbd_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1066  specialize qw/vpx_highbd_sad8x4 sse2 neon/;
1067
1068  add_proto qw/unsigned int vpx_highbd_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1069  specialize qw/vpx_highbd_sad4x8 neon/;
1070
1071  add_proto qw/unsigned int vpx_highbd_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1072  specialize qw/vpx_highbd_sad4x4 neon/;
1073
1074  add_proto qw/unsigned int vpx_highbd_sad_skip_64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1075  specialize qw/vpx_highbd_sad_skip_64x64 neon sse2 avx2/;
1076
1077  add_proto qw/unsigned int vpx_highbd_sad_skip_64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1078  specialize qw/vpx_highbd_sad_skip_64x32 neon sse2 avx2/;
1079
1080  add_proto qw/unsigned int vpx_highbd_sad_skip_32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1081  specialize qw/vpx_highbd_sad_skip_32x64 neon sse2 avx2/;
1082
1083  add_proto qw/unsigned int vpx_highbd_sad_skip_32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1084  specialize qw/vpx_highbd_sad_skip_32x32 neon sse2 avx2/;
1085
1086  add_proto qw/unsigned int vpx_highbd_sad_skip_32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1087  specialize qw/vpx_highbd_sad_skip_32x16 neon sse2 avx2/;
1088
1089  add_proto qw/unsigned int vpx_highbd_sad_skip_16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1090  specialize qw/vpx_highbd_sad_skip_16x32 neon sse2 avx2/;
1091
1092  add_proto qw/unsigned int vpx_highbd_sad_skip_16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1093  specialize qw/vpx_highbd_sad_skip_16x16 neon sse2 avx2/;
1094
1095  add_proto qw/unsigned int vpx_highbd_sad_skip_16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1096  specialize qw/vpx_highbd_sad_skip_16x8 neon sse2 avx2/;
1097
1098  add_proto qw/unsigned int vpx_highbd_sad_skip_8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1099  specialize qw/vpx_highbd_sad_skip_8x16 neon sse2/;
1100
1101  add_proto qw/unsigned int vpx_highbd_sad_skip_8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1102  specialize qw/vpx_highbd_sad_skip_8x8 neon sse2/;
1103
1104  add_proto qw/unsigned int vpx_highbd_sad_skip_8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1105  specialize qw/vpx_highbd_sad_skip_8x4 neon/;
1106
1107  add_proto qw/unsigned int vpx_highbd_sad_skip_4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1108  specialize qw/vpx_highbd_sad_skip_4x8 neon/;
1109
1110  add_proto qw/unsigned int vpx_highbd_sad_skip_4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
1111  specialize qw/vpx_highbd_sad_skip_4x4 neon/;
1112
1113  #
1114  # Avg
1115  #
1116  add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *s8, int p";
1117  specialize qw/vpx_highbd_avg_8x8 sse2 neon/;
1118
1119  add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *s8, int p";
1120  specialize qw/vpx_highbd_avg_4x4 sse2 neon/;
1121
1122  add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s8, int p, const uint8_t *d8, int dp, int *min, int *max";
1123  specialize qw/vpx_highbd_minmax_8x8 neon/;
1124
1125  add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1126  specialize qw/vpx_highbd_sad64x64_avg sse2 neon avx2/;
1127
1128  add_proto qw/unsigned int vpx_highbd_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1129  specialize qw/vpx_highbd_sad64x32_avg sse2 neon avx2/;
1130
1131  add_proto qw/unsigned int vpx_highbd_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1132  specialize qw/vpx_highbd_sad32x64_avg sse2 neon avx2/;
1133
1134  add_proto qw/unsigned int vpx_highbd_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1135  specialize qw/vpx_highbd_sad32x32_avg sse2 neon avx2/;
1136
1137  add_proto qw/unsigned int vpx_highbd_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1138  specialize qw/vpx_highbd_sad32x16_avg sse2 neon avx2/;
1139
1140  add_proto qw/unsigned int vpx_highbd_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1141  specialize qw/vpx_highbd_sad16x32_avg sse2 neon avx2/;
1142
1143  add_proto qw/unsigned int vpx_highbd_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1144  specialize qw/vpx_highbd_sad16x16_avg sse2 neon avx2/;
1145
1146  add_proto qw/unsigned int vpx_highbd_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1147  specialize qw/vpx_highbd_sad16x8_avg sse2 neon avx2/;
1148
1149  add_proto qw/unsigned int vpx_highbd_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1150  specialize qw/vpx_highbd_sad8x16_avg sse2 neon/;
1151
1152  add_proto qw/unsigned int vpx_highbd_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1153  specialize qw/vpx_highbd_sad8x8_avg sse2 neon/;
1154
1155  add_proto qw/unsigned int vpx_highbd_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1156  specialize qw/vpx_highbd_sad8x4_avg sse2 neon/;
1157
1158  add_proto qw/unsigned int vpx_highbd_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1159  specialize qw/vpx_highbd_sad4x8_avg neon/;
1160
1161  add_proto qw/unsigned int vpx_highbd_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
1162  specialize qw/vpx_highbd_sad4x4_avg neon/;
1163
1164  #
1165  # Multi-block SAD, comparing a reference to N independent blocks
1166  #
1167  add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1168  specialize qw/vpx_highbd_sad64x64x4d sse2 neon avx2/;
1169
1170  add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1171  specialize qw/vpx_highbd_sad64x32x4d sse2 neon avx2/;
1172
1173  add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1174  specialize qw/vpx_highbd_sad32x64x4d sse2 neon avx2/;
1175
1176  add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1177  specialize qw/vpx_highbd_sad32x32x4d sse2 neon avx2/;
1178
1179  add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1180  specialize qw/vpx_highbd_sad32x16x4d sse2 neon avx2/;
1181
1182  add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1183  specialize qw/vpx_highbd_sad16x32x4d sse2 neon avx2/;
1184
1185  add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1186  specialize qw/vpx_highbd_sad16x16x4d sse2 neon avx2/;
1187
1188  add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1189  specialize qw/vpx_highbd_sad16x8x4d sse2 neon avx2/;
1190
1191  add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1192  specialize qw/vpx_highbd_sad8x16x4d sse2 neon/;
1193
1194  add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1195  specialize qw/vpx_highbd_sad8x8x4d sse2 neon/;
1196
1197  add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1198  specialize qw/vpx_highbd_sad8x4x4d sse2 neon/;
1199
1200  add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1201  specialize qw/vpx_highbd_sad4x8x4d sse2 neon/;
1202
1203  add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1204  specialize qw/vpx_highbd_sad4x4x4d sse2 neon/;
1205
1206  add_proto qw/void vpx_highbd_sad_skip_64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1207  specialize qw/vpx_highbd_sad_skip_64x64x4d neon sse2 avx2/;
1208
1209  add_proto qw/void vpx_highbd_sad_skip_64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1210  specialize qw/vpx_highbd_sad_skip_64x32x4d neon sse2 avx2/;
1211
1212  add_proto qw/void vpx_highbd_sad_skip_32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1213  specialize qw/vpx_highbd_sad_skip_32x64x4d neon sse2 avx2/;
1214
1215  add_proto qw/void vpx_highbd_sad_skip_32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1216  specialize qw/vpx_highbd_sad_skip_32x32x4d neon sse2 avx2/;
1217
1218  add_proto qw/void vpx_highbd_sad_skip_32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1219  specialize qw/vpx_highbd_sad_skip_32x16x4d neon sse2 avx2/;
1220
1221  add_proto qw/void vpx_highbd_sad_skip_16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1222  specialize qw/vpx_highbd_sad_skip_16x32x4d neon sse2 avx2/;
1223
1224  add_proto qw/void vpx_highbd_sad_skip_16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1225  specialize qw/vpx_highbd_sad_skip_16x16x4d neon sse2 avx2/;
1226
1227  add_proto qw/void vpx_highbd_sad_skip_16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1228  specialize qw/vpx_highbd_sad_skip_16x8x4d neon sse2 avx2/;
1229
1230  add_proto qw/void vpx_highbd_sad_skip_8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1231  specialize qw/vpx_highbd_sad_skip_8x16x4d neon sse2/;
1232
1233  add_proto qw/void vpx_highbd_sad_skip_8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1234  specialize qw/vpx_highbd_sad_skip_8x8x4d neon sse2/;
1235
1236  add_proto qw/void vpx_highbd_sad_skip_8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1237  specialize qw/vpx_highbd_sad_skip_8x4x4d neon/;
1238
1239  add_proto qw/void vpx_highbd_sad_skip_4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1240  specialize qw/vpx_highbd_sad_skip_4x8x4d neon sse2/;
1241
1242  add_proto qw/void vpx_highbd_sad_skip_4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
1243  specialize qw/vpx_highbd_sad_skip_4x4x4d neon/;
1244
1245  #
1246  # Structured Similarity (SSIM)
1247  #
1248  if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
1249    add_proto qw/void vpx_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
1250  }
1251}  # CONFIG_VP9_HIGHBITDEPTH
1252}  # CONFIG_ENCODERS
1253
1254if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
1255
1256#
1257# Variance
1258#
1259add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1260  specialize qw/vpx_variance64x64 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/;
1261
1262add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1263  specialize qw/vpx_variance64x32 sse2 avx2 neon neon_dotprod msa mmi vsx/;
1264
1265add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1266  specialize qw/vpx_variance32x64 sse2 avx2 neon neon_dotprod msa mmi vsx/;
1267
1268add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1269  specialize qw/vpx_variance32x32 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/;
1270
1271add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1272  specialize qw/vpx_variance32x16 sse2 avx2 neon neon_dotprod msa mmi vsx/;
1273
1274add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1275  specialize qw/vpx_variance16x32 sse2 avx2 neon neon_dotprod msa mmi vsx/;
1276
1277add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1278  specialize qw/vpx_variance16x16 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/;
1279
1280add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1281  specialize qw/vpx_variance16x8 sse2 avx2 neon neon_dotprod msa mmi vsx/;
1282
1283add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1284  specialize qw/vpx_variance8x16 sse2 avx2 neon neon_dotprod msa mmi vsx/;
1285
1286add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1287  specialize qw/vpx_variance8x8 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/;
1288
1289add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1290  specialize qw/vpx_variance8x4 sse2 avx2 neon neon_dotprod msa mmi vsx/;
1291
1292add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1293  specialize qw/vpx_variance4x8 sse2 neon neon_dotprod msa mmi vsx/;
1294
1295add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1296  specialize qw/vpx_variance4x4 sse2 neon neon_dotprod msa mmi vsx/;
1297
1298#
1299# Specialty Variance
1300#
1301add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1302  specialize qw/vpx_get16x16var sse2 avx2 neon neon_dotprod msa vsx lsx/;
1303
1304add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1305  specialize qw/vpx_get8x8var sse2 neon neon_dotprod msa vsx/;
1306
1307add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1308  specialize qw/vpx_mse16x16 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/;
1309
1310add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1311  specialize qw/vpx_mse16x8 sse2 avx2 neon neon_dotprod msa mmi vsx/;
1312
1313add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1314  specialize qw/vpx_mse8x16 sse2 neon neon_dotprod msa mmi vsx/;
1315
1316add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1317  specialize qw/vpx_mse8x8 sse2 neon neon_dotprod msa mmi vsx/;
1318
1319add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
1320  specialize qw/vpx_get_mb_ss sse2 msa vsx/;
1321
1322add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride";
1323  specialize qw/vpx_get4x4sse_cs neon neon_dotprod msa vsx/;
1324
1325add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
1326  specialize qw/vpx_comp_avg_pred neon sse2 avx2 vsx lsx/;
1327
1328#
1329# Subpixel Variance
1330#
1331add_proto qw/uint32_t vpx_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1332  specialize qw/vpx_sub_pixel_variance64x64 avx2 neon msa mmi sse2 ssse3/;
1333
1334add_proto qw/uint32_t vpx_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1335  specialize qw/vpx_sub_pixel_variance64x32 neon msa mmi sse2 ssse3/;
1336
1337add_proto qw/uint32_t vpx_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1338  specialize qw/vpx_sub_pixel_variance32x64 neon msa mmi sse2 ssse3/;
1339
1340add_proto qw/uint32_t vpx_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1341  specialize qw/vpx_sub_pixel_variance32x32 avx2 neon msa mmi sse2 ssse3 lsx/;
1342
1343add_proto qw/uint32_t vpx_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1344  specialize qw/vpx_sub_pixel_variance32x16 neon msa mmi sse2 ssse3/;
1345
1346add_proto qw/uint32_t vpx_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1347  specialize qw/vpx_sub_pixel_variance16x32 neon msa mmi sse2 ssse3/;
1348
1349add_proto qw/uint32_t vpx_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1350  specialize qw/vpx_sub_pixel_variance16x16 neon msa mmi sse2 ssse3 lsx/;
1351
1352add_proto qw/uint32_t vpx_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1353  specialize qw/vpx_sub_pixel_variance16x8 neon msa mmi sse2 ssse3/;
1354
1355add_proto qw/uint32_t vpx_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1356  specialize qw/vpx_sub_pixel_variance8x16 neon msa mmi sse2 ssse3/;
1357
1358add_proto qw/uint32_t vpx_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1359  specialize qw/vpx_sub_pixel_variance8x8 neon msa mmi sse2 ssse3 lsx/;
1360
1361add_proto qw/uint32_t vpx_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1362  specialize qw/vpx_sub_pixel_variance8x4 neon msa mmi sse2 ssse3/;
1363
1364add_proto qw/uint32_t vpx_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1365  specialize qw/vpx_sub_pixel_variance4x8 neon msa mmi sse2 ssse3/;
1366
1367add_proto qw/uint32_t vpx_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1368  specialize qw/vpx_sub_pixel_variance4x4 neon msa mmi sse2 ssse3/;
1369
1370add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1371  specialize qw/vpx_sub_pixel_avg_variance64x64 neon avx2 msa mmi sse2 ssse3 lsx/;
1372
1373add_proto qw/uint32_t vpx_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1374  specialize qw/vpx_sub_pixel_avg_variance64x32 neon msa mmi sse2 ssse3/;
1375
1376add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1377  specialize qw/vpx_sub_pixel_avg_variance32x64 neon msa mmi sse2 ssse3/;
1378
1379add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1380  specialize qw/vpx_sub_pixel_avg_variance32x32 neon avx2 msa mmi sse2 ssse3/;
1381
1382add_proto qw/uint32_t vpx_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1383  specialize qw/vpx_sub_pixel_avg_variance32x16 neon msa mmi sse2 ssse3/;
1384
1385add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1386  specialize qw/vpx_sub_pixel_avg_variance16x32 neon msa mmi sse2 ssse3/;
1387
1388add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1389  specialize qw/vpx_sub_pixel_avg_variance16x16 neon msa mmi sse2 ssse3/;
1390
1391add_proto qw/uint32_t vpx_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1392  specialize qw/vpx_sub_pixel_avg_variance16x8 neon msa mmi sse2 ssse3/;
1393
1394add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1395  specialize qw/vpx_sub_pixel_avg_variance8x16 neon msa mmi sse2 ssse3/;
1396
1397add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1398  specialize qw/vpx_sub_pixel_avg_variance8x8 neon msa mmi sse2 ssse3/;
1399
1400add_proto qw/uint32_t vpx_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1401  specialize qw/vpx_sub_pixel_avg_variance8x4 neon msa mmi sse2 ssse3/;
1402
1403add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1404  specialize qw/vpx_sub_pixel_avg_variance4x8 neon msa mmi sse2 ssse3/;
1405
1406add_proto qw/uint32_t vpx_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1407  specialize qw/vpx_sub_pixel_avg_variance4x4 neon msa mmi sse2 ssse3/;
1408
1409if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
1410  add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1411  specialize qw/vpx_highbd_12_variance64x64 sse2 neon sve/;
1412
1413  add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1414  specialize qw/vpx_highbd_12_variance64x32 sse2 neon sve/;
1415
1416  add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1417  specialize qw/vpx_highbd_12_variance32x64 sse2 neon sve/;
1418
1419  add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1420  specialize qw/vpx_highbd_12_variance32x32 sse2 neon sve/;
1421
1422  add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1423  specialize qw/vpx_highbd_12_variance32x16 sse2 neon sve/;
1424
1425  add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1426  specialize qw/vpx_highbd_12_variance16x32 sse2 neon sve/;
1427
1428  add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1429  specialize qw/vpx_highbd_12_variance16x16 sse2 neon sve/;
1430
1431  add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1432  specialize qw/vpx_highbd_12_variance16x8 sse2 neon sve/;
1433
1434  add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1435  specialize qw/vpx_highbd_12_variance8x16 sse2 neon sve/;
1436
1437  add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1438  specialize qw/vpx_highbd_12_variance8x8 sse2 neon sve/;
1439
1440  add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1441  specialize qw/vpx_highbd_12_variance8x4 neon sve/;
1442  add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1443  specialize qw/vpx_highbd_12_variance4x8 neon sve/;
1444  add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1445  specialize qw/vpx_highbd_12_variance4x4 neon sve/;
1446
1447  add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1448  specialize qw/vpx_highbd_10_variance64x64 sse2 neon sve/;
1449
1450  add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1451  specialize qw/vpx_highbd_10_variance64x32 sse2 neon sve/;
1452
1453  add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1454  specialize qw/vpx_highbd_10_variance32x64 sse2 neon sve/;
1455
1456  add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1457  specialize qw/vpx_highbd_10_variance32x32 sse2 neon sve/;
1458
1459  add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1460  specialize qw/vpx_highbd_10_variance32x16 sse2 neon sve/;
1461
1462  add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1463  specialize qw/vpx_highbd_10_variance16x32 sse2 neon sve/;
1464
1465  add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1466  specialize qw/vpx_highbd_10_variance16x16 sse2 neon sve/;
1467
1468  add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1469  specialize qw/vpx_highbd_10_variance16x8 sse2 neon sve/;
1470
1471  add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1472  specialize qw/vpx_highbd_10_variance8x16 sse2 neon sve/;
1473
1474  add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1475  specialize qw/vpx_highbd_10_variance8x8 sse2 neon sve/;
1476
1477  add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1478  specialize qw/vpx_highbd_10_variance8x4 neon sve/;
1479  add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1480  specialize qw/vpx_highbd_10_variance4x8 neon sve/;
1481  add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1482  specialize qw/vpx_highbd_10_variance4x4 neon sve/;
1483
1484  add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1485  specialize qw/vpx_highbd_8_variance64x64 sse2 neon sve/;
1486
1487  add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1488  specialize qw/vpx_highbd_8_variance64x32 sse2 neon sve/;
1489
1490  add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1491  specialize qw/vpx_highbd_8_variance32x64 sse2 neon sve/;
1492
1493  add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1494  specialize qw/vpx_highbd_8_variance32x32 sse2 neon sve/;
1495
1496  add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1497  specialize qw/vpx_highbd_8_variance32x16 sse2 neon sve/;
1498
1499  add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1500  specialize qw/vpx_highbd_8_variance16x32 sse2 neon sve/;
1501
1502  add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1503  specialize qw/vpx_highbd_8_variance16x16 sse2 neon sve/;
1504
1505  add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1506  specialize qw/vpx_highbd_8_variance16x8 sse2 neon sve/;
1507
1508  add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1509  specialize qw/vpx_highbd_8_variance8x16 sse2 neon sve/;
1510
1511  add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1512  specialize qw/vpx_highbd_8_variance8x8 sse2 neon sve/;
1513
1514  add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1515  specialize qw/vpx_highbd_8_variance8x4 neon sve/;
1516  add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1517  specialize qw/vpx_highbd_8_variance4x8 neon sve/;
1518  add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1519  specialize qw/vpx_highbd_8_variance4x4 neon sve/;
1520
1521  add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1522  specialize qw/vpx_highbd_8_get16x16var sse2 neon sve/;
1523
1524  add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1525  specialize qw/vpx_highbd_8_get8x8var sse2 neon sve/;
1526
1527  add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1528  specialize qw/vpx_highbd_10_get16x16var sse2 neon sve/;
1529
1530  add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1531  specialize qw/vpx_highbd_10_get8x8var sse2 neon sve/;
1532
1533  add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1534  specialize qw/vpx_highbd_12_get16x16var sse2 neon sve/;
1535
1536  add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
1537  specialize qw/vpx_highbd_12_get8x8var sse2 neon sve/;
1538
1539  add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1540  specialize qw/vpx_highbd_8_mse16x16 sse2 neon neon_dotprod/;
1541
1542  add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1543  specialize qw/vpx_highbd_8_mse16x8 neon neon_dotprod/;
1544  add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1545  specialize qw/vpx_highbd_8_mse8x16 neon neon_dotprod/;
1546  add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1547  specialize qw/vpx_highbd_8_mse8x8 sse2 neon neon_dotprod/;
1548
1549  add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1550  specialize qw/vpx_highbd_10_mse16x16 sse2 neon sve/;
1551
1552  add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1553  specialize qw/vpx_highbd_10_mse16x8 neon sve/;
1554  add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1555  specialize qw/vpx_highbd_10_mse8x16 neon sve/;
1556  add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1557  specialize qw/vpx_highbd_10_mse8x8 sse2 neon sve/;
1558
1559  add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1560  specialize qw/vpx_highbd_12_mse16x16 sse2 neon sve/;
1561
1562  add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1563  specialize qw/vpx_highbd_12_mse16x8 neon sve/;
1564  add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1565  specialize qw/vpx_highbd_12_mse8x16 neon sve/;
1566  add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1567  specialize qw/vpx_highbd_12_mse8x8 sse2 neon sve/;
1568
1569  add_proto qw/void vpx_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint16_t *pred, int width, int height, const uint16_t *ref, int ref_stride";
1570  specialize qw/vpx_highbd_comp_avg_pred neon sse2/;
1571
1572  #
1573  # Subpixel Variance
1574  #
1575  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1576  specialize qw/vpx_highbd_12_sub_pixel_variance64x64 sse2 neon/;
1577
1578  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1579  specialize qw/vpx_highbd_12_sub_pixel_variance64x32 sse2 neon/;
1580
1581  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1582  specialize qw/vpx_highbd_12_sub_pixel_variance32x64 sse2 neon/;
1583
1584  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1585  specialize qw/vpx_highbd_12_sub_pixel_variance32x32 sse2 neon/;
1586
1587  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1588  specialize qw/vpx_highbd_12_sub_pixel_variance32x16 sse2 neon/;
1589
1590  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1591  specialize qw/vpx_highbd_12_sub_pixel_variance16x32 sse2 neon/;
1592
1593  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1594  specialize qw/vpx_highbd_12_sub_pixel_variance16x16 sse2 neon/;
1595
1596  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1597  specialize qw/vpx_highbd_12_sub_pixel_variance16x8 sse2 neon/;
1598
1599  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1600  specialize qw/vpx_highbd_12_sub_pixel_variance8x16 sse2 neon/;
1601
1602  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1603  specialize qw/vpx_highbd_12_sub_pixel_variance8x8 sse2 neon/;
1604
1605  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1606  specialize qw/vpx_highbd_12_sub_pixel_variance8x4 sse2 neon/;
1607
1608  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1609  specialize qw/vpx_highbd_12_sub_pixel_variance4x8 neon/;
1610  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1611  specialize qw/vpx_highbd_12_sub_pixel_variance4x4 neon/;
1612
1613  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1614  specialize qw/vpx_highbd_10_sub_pixel_variance64x64 sse2 neon/;
1615
1616  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1617  specialize qw/vpx_highbd_10_sub_pixel_variance64x32 sse2 neon/;
1618
1619  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1620  specialize qw/vpx_highbd_10_sub_pixel_variance32x64 sse2 neon/;
1621
1622  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1623  specialize qw/vpx_highbd_10_sub_pixel_variance32x32 sse2 neon/;
1624
1625  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1626  specialize qw/vpx_highbd_10_sub_pixel_variance32x16 sse2 neon/;
1627
1628  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1629  specialize qw/vpx_highbd_10_sub_pixel_variance16x32 sse2 neon/;
1630
1631  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1632  specialize qw/vpx_highbd_10_sub_pixel_variance16x16 sse2 neon/;
1633
1634  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1635  specialize qw/vpx_highbd_10_sub_pixel_variance16x8 sse2 neon/;
1636
1637  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1638  specialize qw/vpx_highbd_10_sub_pixel_variance8x16 sse2 neon/;
1639
1640  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1641  specialize qw/vpx_highbd_10_sub_pixel_variance8x8 sse2 neon/;
1642
1643  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1644  specialize qw/vpx_highbd_10_sub_pixel_variance8x4 sse2 neon/;
1645
1646  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1647  specialize qw/vpx_highbd_10_sub_pixel_variance4x8 neon/;
1648  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1649  specialize qw/vpx_highbd_10_sub_pixel_variance4x4 neon/;
1650
1651  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1652  specialize qw/vpx_highbd_8_sub_pixel_variance64x64 sse2 neon/;
1653
1654  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1655  specialize qw/vpx_highbd_8_sub_pixel_variance64x32 sse2 neon/;
1656
1657  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1658  specialize qw/vpx_highbd_8_sub_pixel_variance32x64 sse2 neon/;
1659
1660  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1661  specialize qw/vpx_highbd_8_sub_pixel_variance32x32 sse2 neon/;
1662
1663  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1664  specialize qw/vpx_highbd_8_sub_pixel_variance32x16 sse2 neon/;
1665
1666  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1667  specialize qw/vpx_highbd_8_sub_pixel_variance16x32 sse2 neon/;
1668
1669  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1670  specialize qw/vpx_highbd_8_sub_pixel_variance16x16 sse2 neon/;
1671
1672  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1673  specialize qw/vpx_highbd_8_sub_pixel_variance16x8 sse2 neon/;
1674
1675  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1676  specialize qw/vpx_highbd_8_sub_pixel_variance8x16 sse2 neon/;
1677
1678  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1679  specialize qw/vpx_highbd_8_sub_pixel_variance8x8 sse2 neon/;
1680
1681  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1682  specialize qw/vpx_highbd_8_sub_pixel_variance8x4 sse2 neon/;
1683
1684  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1685  specialize qw/vpx_highbd_8_sub_pixel_variance4x8 neon/;
1686  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1687  specialize qw/vpx_highbd_8_sub_pixel_variance4x4 neon/;
1688
1689  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1690  specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x64 sse2 neon/;
1691
1692  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1693  specialize qw/vpx_highbd_12_sub_pixel_avg_variance64x32 sse2 neon/;
1694
1695  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1696  specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x64 sse2 neon/;
1697
1698  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1699  specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x32 sse2 neon/;
1700
1701  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1702  specialize qw/vpx_highbd_12_sub_pixel_avg_variance32x16 sse2 neon/;
1703
1704  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1705  specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x32 sse2 neon/;
1706
1707  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1708  specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x16 sse2 neon/;
1709
1710  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1711  specialize qw/vpx_highbd_12_sub_pixel_avg_variance16x8 sse2 neon/;
1712
1713  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1714  specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x16 sse2 neon/;
1715
1716  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1717  specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x8 sse2 neon/;
1718
1719  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1720  specialize qw/vpx_highbd_12_sub_pixel_avg_variance8x4 sse2 neon/;
1721
1722  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1723  specialize qw/vpx_highbd_12_sub_pixel_avg_variance4x8 neon/;
1724  add_proto qw/uint32_t vpx_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1725  specialize qw/vpx_highbd_12_sub_pixel_avg_variance4x4 neon/;
1726
1727  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1728  specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x64 sse2 neon/;
1729
1730  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1731  specialize qw/vpx_highbd_10_sub_pixel_avg_variance64x32 sse2 neon/;
1732
1733  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1734  specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x64 sse2 neon/;
1735
1736  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1737  specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x32 sse2 neon/;
1738
1739  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1740  specialize qw/vpx_highbd_10_sub_pixel_avg_variance32x16 sse2 neon/;
1741
1742  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1743  specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x32 sse2 neon/;
1744
1745  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1746  specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x16 sse2 neon/;
1747
1748  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1749  specialize qw/vpx_highbd_10_sub_pixel_avg_variance16x8 sse2 neon/;
1750
1751  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1752  specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x16 sse2 neon/;
1753
1754  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1755  specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x8 sse2 neon/;
1756
1757  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1758  specialize qw/vpx_highbd_10_sub_pixel_avg_variance8x4 sse2 neon/;
1759
1760  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1761  specialize qw/vpx_highbd_10_sub_pixel_avg_variance4x8 neon/;
1762  add_proto qw/uint32_t vpx_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1763  specialize qw/vpx_highbd_10_sub_pixel_avg_variance4x4 neon/;
1764
1765  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1766  specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x64 sse2 neon/;
1767
1768  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1769  specialize qw/vpx_highbd_8_sub_pixel_avg_variance64x32 sse2 neon/;
1770
1771  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1772  specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x64 sse2 neon/;
1773
1774  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1775  specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x32 sse2 neon/;
1776
1777  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1778  specialize qw/vpx_highbd_8_sub_pixel_avg_variance32x16 sse2 neon/;
1779
1780  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1781  specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x32 sse2 neon/;
1782
1783  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1784  specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x16 sse2 neon/;
1785
1786  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1787  specialize qw/vpx_highbd_8_sub_pixel_avg_variance16x8 sse2 neon/;
1788
1789  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1790  specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x16 sse2 neon/;
1791
1792  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1793  specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x8 sse2 neon/;
1794
1795  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1796  specialize qw/vpx_highbd_8_sub_pixel_avg_variance8x4 sse2 neon/;
1797
1798  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1799  specialize qw/vpx_highbd_8_sub_pixel_avg_variance4x8 neon/;
1800  add_proto qw/uint32_t vpx_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1801  specialize qw/vpx_highbd_8_sub_pixel_avg_variance4x4 neon/;
1802
1803}  # CONFIG_VP9_HIGHBITDEPTH
1804
1805#
1806# Post Processing
1807#
1808if (vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
1809    add_proto qw/void vpx_plane_add_noise/, "uint8_t *start, const int8_t *noise, int blackclamp, int whiteclamp, int width, int height, int pitch";
1810    specialize qw/vpx_plane_add_noise sse2 msa/;
1811
1812    add_proto qw/void vpx_mbpost_proc_down/, "unsigned char *dst, int pitch, int rows, int cols,int flimit";
1813    specialize qw/vpx_mbpost_proc_down sse2 neon msa vsx/;
1814
1815    add_proto qw/void vpx_mbpost_proc_across_ip/, "unsigned char *src, int pitch, int rows, int cols,int flimit";
1816    specialize qw/vpx_mbpost_proc_across_ip sse2 neon msa vsx/;
1817
1818    add_proto qw/void vpx_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
1819    specialize qw/vpx_post_proc_down_and_across_mb_row sse2 neon msa vsx/;
1820
1821}
1822
1823}  # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
1824
18251;
1826