xref: /aosp_15_r20/external/libaom/aom_dsp/aom_dsp_rtcd_defs.pl (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1##
2## Copyright (c) 2017, Alliance for Open Media. All rights reserved.
3##
4## This source code is subject to the terms of the BSD 2 Clause License and
5## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6## was not distributed with this source code in the LICENSE file, you can
7## obtain it at www.aomedia.org/license/software. If the Alliance for Open
8## Media Patent License 1.0 was not distributed with this source code in the
9## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10##
11sub aom_dsp_forward_decls() {
12print <<EOF
13/*
14 * DSP
15 */
16
17#include "aom/aom_integer.h"
18#include "aom_dsp/aom_dsp_common.h"
19#include "av1/common/blockd.h"
20#include "av1/common/enums.h"
21
22EOF
23}
24forward_decls qw/aom_dsp_forward_decls/;
25
26# optimizations which depend on multiple features
27$avx2_ssse3 = '';
28if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
29  $avx2_ssse3 = 'avx2';
30}
31
32# functions that are 64 bit only.
33$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
34if ($opts{arch} eq "x86_64") {
35  $mmx_x86_64 = 'mmx';
36  $sse2_x86_64 = 'sse2';
37  $ssse3_x86_64 = 'ssse3';
38  $avx_x86_64 = 'avx';
39  $avx2_x86_64 = 'avx2';
40}
41
42@block_widths = (4, 8, 16, 32, 64, 128);
43
44@encoder_block_sizes = ();
45foreach $w (@block_widths) {
46  foreach $h (@block_widths) {
47    push @encoder_block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w);
48  }
49}
50
51if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
52  push @encoder_block_sizes, [4, 16];
53  push @encoder_block_sizes, [16, 4];
54  push @encoder_block_sizes, [8, 32];
55  push @encoder_block_sizes, [32, 8];
56  push @encoder_block_sizes, [16, 64];
57  push @encoder_block_sizes, [64, 16];
58}
59
60@tx_dims = (4, 8, 16, 32, 64);
61@tx_sizes = ();
62foreach $w (@tx_dims) {
63  push @tx_sizes, [$w, $w];
64  foreach $h (@tx_dims) {
65    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
66    if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") ||
67        (aom_config("CONFIG_AV1_DECODER") eq "yes")) {
68      push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w));
69    }  # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
70  }
71}
72
73@pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/;
74
75#
76# Intra prediction
77#
78
79foreach (@tx_sizes) {
80  ($w, $h) = @$_;
81  foreach $pred_name (@pred_names) {
82    add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
83              "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
84    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
85        add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
86                  "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
87    }
88  }
89}
90
91specialize qw/aom_dc_top_predictor_4x4 neon sse2/;
92specialize qw/aom_dc_top_predictor_4x8 neon sse2/;
93specialize qw/aom_dc_top_predictor_8x4 neon sse2/;
94specialize qw/aom_dc_top_predictor_8x8 neon sse2/;
95specialize qw/aom_dc_top_predictor_8x16 neon sse2/;
96specialize qw/aom_dc_top_predictor_16x8 neon sse2/;
97specialize qw/aom_dc_top_predictor_16x16 neon sse2/;
98specialize qw/aom_dc_top_predictor_16x32 neon sse2/;
99specialize qw/aom_dc_top_predictor_32x16 neon sse2 avx2/;
100specialize qw/aom_dc_top_predictor_32x32 neon sse2 avx2/;
101specialize qw/aom_dc_top_predictor_32x64 neon sse2 avx2/;
102specialize qw/aom_dc_top_predictor_64x32 neon sse2 avx2/;
103specialize qw/aom_dc_top_predictor_64x64 neon sse2 avx2/;
104
105specialize qw/aom_dc_left_predictor_4x4 neon sse2/;
106specialize qw/aom_dc_left_predictor_4x8 neon sse2/;
107specialize qw/aom_dc_left_predictor_8x4 neon sse2/;
108specialize qw/aom_dc_left_predictor_8x8 neon sse2/;
109specialize qw/aom_dc_left_predictor_8x16 neon sse2/;
110specialize qw/aom_dc_left_predictor_16x8 neon sse2/;
111specialize qw/aom_dc_left_predictor_16x16 neon sse2/;
112specialize qw/aom_dc_left_predictor_16x32 neon sse2/;
113specialize qw/aom_dc_left_predictor_32x16 neon sse2 avx2/;
114specialize qw/aom_dc_left_predictor_32x32 neon sse2 avx2/;
115specialize qw/aom_dc_left_predictor_32x64 neon sse2 avx2/;
116specialize qw/aom_dc_left_predictor_64x32 neon sse2 avx2/;
117specialize qw/aom_dc_left_predictor_64x64 neon sse2 avx2/;
118
119specialize qw/aom_dc_128_predictor_4x4 neon sse2/;
120specialize qw/aom_dc_128_predictor_4x8 neon sse2/;
121specialize qw/aom_dc_128_predictor_8x4 neon sse2/;
122specialize qw/aom_dc_128_predictor_8x8 neon sse2/;
123specialize qw/aom_dc_128_predictor_8x16 neon sse2/;
124specialize qw/aom_dc_128_predictor_16x8 neon sse2/;
125specialize qw/aom_dc_128_predictor_16x16 neon sse2/;
126specialize qw/aom_dc_128_predictor_16x32 neon sse2/;
127specialize qw/aom_dc_128_predictor_32x16 neon sse2 avx2/;
128specialize qw/aom_dc_128_predictor_32x32 neon sse2 avx2/;
129specialize qw/aom_dc_128_predictor_32x64 neon sse2 avx2/;
130specialize qw/aom_dc_128_predictor_64x32 neon sse2 avx2/;
131specialize qw/aom_dc_128_predictor_64x64 neon sse2 avx2/;
132
133specialize qw/aom_v_predictor_4x4 neon sse2/;
134specialize qw/aom_v_predictor_4x8 neon sse2/;
135specialize qw/aom_v_predictor_8x4 neon sse2/;
136specialize qw/aom_v_predictor_8x8 neon sse2/;
137specialize qw/aom_v_predictor_8x16 neon sse2/;
138specialize qw/aom_v_predictor_16x8 neon sse2/;
139specialize qw/aom_v_predictor_16x16 neon sse2/;
140specialize qw/aom_v_predictor_16x32 neon sse2/;
141specialize qw/aom_v_predictor_32x16 neon sse2 avx2/;
142specialize qw/aom_v_predictor_32x32 neon sse2 avx2/;
143specialize qw/aom_v_predictor_32x64 neon sse2 avx2/;
144specialize qw/aom_v_predictor_64x32 neon sse2 avx2/;
145specialize qw/aom_v_predictor_64x64 neon sse2 avx2/;
146
147specialize qw/aom_h_predictor_4x4 neon sse2/;
148specialize qw/aom_h_predictor_4x8 neon sse2/;
149specialize qw/aom_h_predictor_8x4 neon sse2/;
150specialize qw/aom_h_predictor_8x8 neon sse2/;
151specialize qw/aom_h_predictor_8x16 neon sse2/;
152specialize qw/aom_h_predictor_16x8 neon sse2/;
153specialize qw/aom_h_predictor_16x16 neon sse2/;
154specialize qw/aom_h_predictor_16x32 neon sse2/;
155specialize qw/aom_h_predictor_32x16 neon sse2/;
156specialize qw/aom_h_predictor_32x32 neon sse2 avx2/;
157specialize qw/aom_h_predictor_32x64 neon sse2/;
158specialize qw/aom_h_predictor_64x32 neon sse2/;
159specialize qw/aom_h_predictor_64x64 neon sse2/;
160
161specialize qw/aom_paeth_predictor_4x4 ssse3 neon/;
162specialize qw/aom_paeth_predictor_4x8 ssse3 neon/;
163specialize qw/aom_paeth_predictor_8x4 ssse3 neon/;
164specialize qw/aom_paeth_predictor_8x8 ssse3 neon/;
165specialize qw/aom_paeth_predictor_8x16 ssse3 neon/;
166specialize qw/aom_paeth_predictor_16x8 ssse3 avx2 neon/;
167specialize qw/aom_paeth_predictor_16x16 ssse3 avx2 neon/;
168specialize qw/aom_paeth_predictor_16x32 ssse3 avx2 neon/;
169specialize qw/aom_paeth_predictor_32x16 ssse3 avx2 neon/;
170specialize qw/aom_paeth_predictor_32x32 ssse3 avx2 neon/;
171specialize qw/aom_paeth_predictor_32x64 ssse3 avx2 neon/;
172specialize qw/aom_paeth_predictor_64x32 ssse3 avx2 neon/;
173specialize qw/aom_paeth_predictor_64x64 ssse3 avx2 neon/;
174
175specialize qw/aom_smooth_predictor_4x4 neon ssse3/;
176specialize qw/aom_smooth_predictor_4x8 neon ssse3/;
177specialize qw/aom_smooth_predictor_8x4 neon ssse3/;
178specialize qw/aom_smooth_predictor_8x8 neon ssse3/;
179specialize qw/aom_smooth_predictor_8x16 neon ssse3/;
180specialize qw/aom_smooth_predictor_16x8 neon ssse3/;
181specialize qw/aom_smooth_predictor_16x16 neon ssse3/;
182specialize qw/aom_smooth_predictor_16x32 neon ssse3/;
183specialize qw/aom_smooth_predictor_32x16 neon ssse3/;
184specialize qw/aom_smooth_predictor_32x32 neon ssse3/;
185specialize qw/aom_smooth_predictor_32x64 neon ssse3/;
186specialize qw/aom_smooth_predictor_64x32 neon ssse3/;
187specialize qw/aom_smooth_predictor_64x64 neon ssse3/;
188
189specialize qw/aom_smooth_v_predictor_4x4 neon ssse3/;
190specialize qw/aom_smooth_v_predictor_4x8 neon ssse3/;
191specialize qw/aom_smooth_v_predictor_8x4 neon ssse3/;
192specialize qw/aom_smooth_v_predictor_8x8 neon ssse3/;
193specialize qw/aom_smooth_v_predictor_8x16 neon ssse3/;
194specialize qw/aom_smooth_v_predictor_16x8 neon ssse3/;
195specialize qw/aom_smooth_v_predictor_16x16 neon ssse3/;
196specialize qw/aom_smooth_v_predictor_16x32 neon ssse3/;
197specialize qw/aom_smooth_v_predictor_32x16 neon ssse3/;
198specialize qw/aom_smooth_v_predictor_32x32 neon ssse3/;
199specialize qw/aom_smooth_v_predictor_32x64 neon ssse3/;
200specialize qw/aom_smooth_v_predictor_64x32 neon ssse3/;
201specialize qw/aom_smooth_v_predictor_64x64 neon ssse3/;
202
203specialize qw/aom_smooth_h_predictor_4x4 neon ssse3/;
204specialize qw/aom_smooth_h_predictor_4x8 neon ssse3/;
205specialize qw/aom_smooth_h_predictor_8x4 neon ssse3/;
206specialize qw/aom_smooth_h_predictor_8x8 neon ssse3/;
207specialize qw/aom_smooth_h_predictor_8x16 neon ssse3/;
208specialize qw/aom_smooth_h_predictor_16x8 neon ssse3/;
209specialize qw/aom_smooth_h_predictor_16x16 neon ssse3/;
210specialize qw/aom_smooth_h_predictor_16x32 neon ssse3/;
211specialize qw/aom_smooth_h_predictor_32x16 neon ssse3/;
212specialize qw/aom_smooth_h_predictor_32x32 neon ssse3/;
213specialize qw/aom_smooth_h_predictor_32x64 neon ssse3/;
214specialize qw/aom_smooth_h_predictor_64x32 neon ssse3/;
215specialize qw/aom_smooth_h_predictor_64x64 neon ssse3/;
216
217# TODO(yunqingwang): optimize rectangular DC_PRED to replace division
218# by multiply and shift.
219specialize qw/aom_dc_predictor_4x4 neon sse2/;
220specialize qw/aom_dc_predictor_4x8 neon sse2/;
221specialize qw/aom_dc_predictor_8x4 neon sse2/;
222specialize qw/aom_dc_predictor_8x8 neon sse2/;
223specialize qw/aom_dc_predictor_8x16 neon sse2/;
224specialize qw/aom_dc_predictor_16x8 neon sse2/;
225specialize qw/aom_dc_predictor_16x16 neon sse2/;
226specialize qw/aom_dc_predictor_16x32 neon sse2/;
227specialize qw/aom_dc_predictor_32x16 neon sse2 avx2/;
228specialize qw/aom_dc_predictor_32x32 neon sse2 avx2/;
229specialize qw/aom_dc_predictor_32x64 neon sse2 avx2/;
230specialize qw/aom_dc_predictor_64x64 neon sse2 avx2/;
231specialize qw/aom_dc_predictor_64x32 neon sse2 avx2/;
232
233
234if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || (aom_config("CONFIG_AV1_DECODER") eq "yes")) {
235  specialize qw/aom_dc_top_predictor_4x16 neon sse2/;
236  specialize qw/aom_dc_top_predictor_8x32 neon sse2/;
237  specialize qw/aom_dc_top_predictor_16x4 neon sse2/;
238  specialize qw/aom_dc_top_predictor_16x64 neon sse2/;
239  specialize qw/aom_dc_top_predictor_32x8 neon sse2/;
240  specialize qw/aom_dc_top_predictor_64x16 neon sse2 avx2/;
241
242  specialize qw/aom_dc_left_predictor_4x16 neon sse2/;
243  specialize qw/aom_dc_left_predictor_8x32 neon sse2/;
244  specialize qw/aom_dc_left_predictor_16x4 neon sse2/;
245  specialize qw/aom_dc_left_predictor_16x64 neon sse2/;
246  specialize qw/aom_dc_left_predictor_32x8 neon sse2/;
247  specialize qw/aom_dc_left_predictor_64x16 neon sse2 avx2/;
248
249  specialize qw/aom_dc_128_predictor_4x16 neon sse2/;
250  specialize qw/aom_dc_128_predictor_8x32 neon sse2/;
251  specialize qw/aom_dc_128_predictor_16x4 neon sse2/;
252  specialize qw/aom_dc_128_predictor_16x64 neon sse2/;
253  specialize qw/aom_dc_128_predictor_32x8 neon sse2/;
254  specialize qw/aom_dc_128_predictor_64x16 neon sse2 avx2/;
255
256  specialize qw/aom_v_predictor_4x16 neon sse2/;
257  specialize qw/aom_v_predictor_8x32 neon sse2/;
258  specialize qw/aom_v_predictor_16x4 neon sse2/;
259  specialize qw/aom_v_predictor_16x64 neon sse2/;
260  specialize qw/aom_v_predictor_32x8 neon sse2/;
261  specialize qw/aom_v_predictor_64x16 neon sse2 avx2/;
262
263  specialize qw/aom_h_predictor_4x16 neon sse2/;
264  specialize qw/aom_h_predictor_8x32 neon sse2/;
265  specialize qw/aom_h_predictor_16x4 neon sse2/;
266  specialize qw/aom_h_predictor_16x64 neon sse2/;
267  specialize qw/aom_h_predictor_32x8 neon sse2/;
268  specialize qw/aom_h_predictor_64x16 neon sse2/;
269
270  specialize qw/aom_paeth_predictor_4x16 ssse3 neon/;
271  specialize qw/aom_paeth_predictor_8x32 ssse3 neon/;
272  specialize qw/aom_paeth_predictor_16x4 ssse3 neon/;
273  specialize qw/aom_paeth_predictor_16x64 ssse3 avx2 neon/;
274  specialize qw/aom_paeth_predictor_32x8 ssse3 neon/;
275  specialize qw/aom_paeth_predictor_64x16 ssse3 avx2 neon/;
276
277  specialize qw/aom_smooth_predictor_4x16 neon ssse3/;
278  specialize qw/aom_smooth_predictor_8x32 neon ssse3/;
279  specialize qw/aom_smooth_predictor_16x4 neon ssse3/;
280  specialize qw/aom_smooth_predictor_16x64 neon ssse3/;
281  specialize qw/aom_smooth_predictor_32x8 neon ssse3/;
282  specialize qw/aom_smooth_predictor_64x16 neon ssse3/;
283
284  specialize qw/aom_smooth_v_predictor_4x16 neon ssse3/;
285  specialize qw/aom_smooth_v_predictor_8x32 neon ssse3/;
286  specialize qw/aom_smooth_v_predictor_16x4 neon ssse3/;
287  specialize qw/aom_smooth_v_predictor_16x64 neon ssse3/;
288  specialize qw/aom_smooth_v_predictor_32x8 neon ssse3/;
289  specialize qw/aom_smooth_v_predictor_64x16 neon ssse3/;
290
291  specialize qw/aom_smooth_h_predictor_4x16 neon ssse3/;
292  specialize qw/aom_smooth_h_predictor_8x32 neon ssse3/;
293  specialize qw/aom_smooth_h_predictor_16x4 neon ssse3/;
294  specialize qw/aom_smooth_h_predictor_16x64 neon ssse3/;
295  specialize qw/aom_smooth_h_predictor_32x8 neon ssse3/;
296  specialize qw/aom_smooth_h_predictor_64x16 neon ssse3/;
297
298  specialize qw/aom_dc_predictor_4x16 neon sse2/;
299  specialize qw/aom_dc_predictor_8x32 neon sse2/;
300  specialize qw/aom_dc_predictor_16x4 neon sse2/;
301  specialize qw/aom_dc_predictor_16x64 neon sse2/;
302  specialize qw/aom_dc_predictor_32x8 neon sse2/;
303  specialize qw/aom_dc_predictor_64x16 neon sse2 avx2/;
304}  # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
305
306if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
307  specialize qw/aom_highbd_v_predictor_4x4 sse2 neon/;
308  specialize qw/aom_highbd_v_predictor_4x8 sse2 neon/;
309  specialize qw/aom_highbd_v_predictor_8x4 sse2 neon/;
310  specialize qw/aom_highbd_v_predictor_8x8 sse2 neon/;
311  specialize qw/aom_highbd_v_predictor_8x16 sse2 neon/;
312  specialize qw/aom_highbd_v_predictor_16x8 sse2 neon/;
313  specialize qw/aom_highbd_v_predictor_16x16 sse2 neon/;
314  specialize qw/aom_highbd_v_predictor_16x32 sse2 neon/;
315  specialize qw/aom_highbd_v_predictor_32x16 sse2 neon/;
316  specialize qw/aom_highbd_v_predictor_32x32 sse2 neon/;
317  specialize qw/aom_highbd_v_predictor_32x64 neon/;
318  specialize qw/aom_highbd_v_predictor_64x32 neon/;
319  specialize qw/aom_highbd_v_predictor_64x64 neon/;
320
321  # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
322  # by multiply and shift.
323  specialize qw/aom_highbd_dc_predictor_4x4 sse2 neon/;
324  specialize qw/aom_highbd_dc_predictor_4x8 sse2 neon/;
325  specialize qw/aom_highbd_dc_predictor_8x4 sse2 neon/;
326  specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/;
327  specialize qw/aom_highbd_dc_predictor_8x16 sse2 neon/;
328  specialize qw/aom_highbd_dc_predictor_16x8 sse2 neon/;
329  specialize qw/aom_highbd_dc_predictor_16x16 sse2 neon/;
330  specialize qw/aom_highbd_dc_predictor_16x32 sse2 neon/;
331  specialize qw/aom_highbd_dc_predictor_32x16 sse2 neon/;
332  specialize qw/aom_highbd_dc_predictor_32x32 sse2 neon/;
333  specialize qw/aom_highbd_dc_predictor_32x64 neon/;
334  specialize qw/aom_highbd_dc_predictor_64x32 neon/;
335  specialize qw/aom_highbd_dc_predictor_64x64 neon/;
336
337  specialize qw/aom_highbd_h_predictor_4x4 sse2 neon/;
338  specialize qw/aom_highbd_h_predictor_4x8 sse2 neon/;
339  specialize qw/aom_highbd_h_predictor_8x4 sse2 neon/;
340  specialize qw/aom_highbd_h_predictor_8x8 sse2 neon/;
341  specialize qw/aom_highbd_h_predictor_8x16 sse2 neon/;
342  specialize qw/aom_highbd_h_predictor_16x8 sse2 neon/;
343  specialize qw/aom_highbd_h_predictor_16x16 sse2 neon/;
344  specialize qw/aom_highbd_h_predictor_16x32 sse2 neon/;
345  specialize qw/aom_highbd_h_predictor_32x16 sse2 neon/;
346  specialize qw/aom_highbd_h_predictor_32x32 sse2 neon/;
347  specialize qw/aom_highbd_h_predictor_32x64 neon/;
348  specialize qw/aom_highbd_h_predictor_64x32 neon/;
349  specialize qw/aom_highbd_h_predictor_64x64 neon/;
350
351  specialize qw/aom_highbd_dc_128_predictor_4x4 sse2 neon/;
352  specialize qw/aom_highbd_dc_128_predictor_4x8 sse2 neon/;
353  specialize qw/aom_highbd_dc_128_predictor_8x4 sse2 neon/;
354  specialize qw/aom_highbd_dc_128_predictor_8x8 sse2 neon/;
355  specialize qw/aom_highbd_dc_128_predictor_8x16 sse2 neon/;
356  specialize qw/aom_highbd_dc_128_predictor_16x8 sse2 neon/;
357  specialize qw/aom_highbd_dc_128_predictor_16x16 sse2 neon/;
358  specialize qw/aom_highbd_dc_128_predictor_16x32 sse2 neon/;
359  specialize qw/aom_highbd_dc_128_predictor_32x16 sse2 neon/;
360  specialize qw/aom_highbd_dc_128_predictor_32x32 sse2 neon/;
361  specialize qw/aom_highbd_dc_128_predictor_32x64 neon/;
362  specialize qw/aom_highbd_dc_128_predictor_64x32 neon/;
363  specialize qw/aom_highbd_dc_128_predictor_64x64 neon/;
364
365  specialize qw/aom_highbd_dc_left_predictor_4x4 sse2 neon/;
366  specialize qw/aom_highbd_dc_left_predictor_4x8 sse2 neon/;
367  specialize qw/aom_highbd_dc_left_predictor_8x4 sse2 neon/;
368  specialize qw/aom_highbd_dc_left_predictor_8x8 sse2 neon/;
369  specialize qw/aom_highbd_dc_left_predictor_8x16 sse2 neon/;
370  specialize qw/aom_highbd_dc_left_predictor_16x8 sse2 neon/;
371  specialize qw/aom_highbd_dc_left_predictor_16x16 sse2 neon/;
372  specialize qw/aom_highbd_dc_left_predictor_16x32 sse2 neon/;
373  specialize qw/aom_highbd_dc_left_predictor_32x16 sse2 neon/;
374  specialize qw/aom_highbd_dc_left_predictor_32x32 sse2 neon/;
375  specialize qw/aom_highbd_dc_left_predictor_32x64 neon/;
376  specialize qw/aom_highbd_dc_left_predictor_64x32 neon/;
377  specialize qw/aom_highbd_dc_left_predictor_64x64 neon/;
378
379  specialize qw/aom_highbd_dc_top_predictor_4x4 sse2 neon/;
380  specialize qw/aom_highbd_dc_top_predictor_4x8 sse2 neon/;
381  specialize qw/aom_highbd_dc_top_predictor_8x4 sse2 neon/;
382  specialize qw/aom_highbd_dc_top_predictor_8x8 sse2 neon/;
383  specialize qw/aom_highbd_dc_top_predictor_8x16 sse2 neon/;
384  specialize qw/aom_highbd_dc_top_predictor_16x8 sse2 neon/;
385  specialize qw/aom_highbd_dc_top_predictor_16x16 sse2 neon/;
386  specialize qw/aom_highbd_dc_top_predictor_16x32 sse2 neon/;
387  specialize qw/aom_highbd_dc_top_predictor_32x16 sse2 neon/;
388  specialize qw/aom_highbd_dc_top_predictor_32x32 sse2 neon/;
389  specialize qw/aom_highbd_dc_top_predictor_32x64 neon/;
390  specialize qw/aom_highbd_dc_top_predictor_64x32 neon/;
391  specialize qw/aom_highbd_dc_top_predictor_64x64 neon/;
392
393  specialize qw/aom_highbd_paeth_predictor_4x4 neon/;
394  specialize qw/aom_highbd_paeth_predictor_4x8 neon/;
395  specialize qw/aom_highbd_paeth_predictor_8x4 neon/;
396  specialize qw/aom_highbd_paeth_predictor_8x8 neon/;
397  specialize qw/aom_highbd_paeth_predictor_8x16 neon/;
398  specialize qw/aom_highbd_paeth_predictor_16x8 neon/;
399  specialize qw/aom_highbd_paeth_predictor_16x16 neon/;
400  specialize qw/aom_highbd_paeth_predictor_16x32 neon/;
401  specialize qw/aom_highbd_paeth_predictor_32x16 neon/;
402  specialize qw/aom_highbd_paeth_predictor_32x32 neon/;
403  specialize qw/aom_highbd_paeth_predictor_32x64 neon/;
404  specialize qw/aom_highbd_paeth_predictor_64x32 neon/;
405  specialize qw/aom_highbd_paeth_predictor_64x64 neon/;
406
407  specialize qw/aom_highbd_smooth_predictor_4x4 neon/;
408  specialize qw/aom_highbd_smooth_predictor_4x8 neon/;
409  specialize qw/aom_highbd_smooth_predictor_8x4 neon/;
410  specialize qw/aom_highbd_smooth_predictor_8x8 neon/;
411  specialize qw/aom_highbd_smooth_predictor_8x16 neon/;
412  specialize qw/aom_highbd_smooth_predictor_16x8 neon/;
413  specialize qw/aom_highbd_smooth_predictor_16x16 neon/;
414  specialize qw/aom_highbd_smooth_predictor_16x32 neon/;
415  specialize qw/aom_highbd_smooth_predictor_32x16 neon/;
416  specialize qw/aom_highbd_smooth_predictor_32x32 neon/;
417  specialize qw/aom_highbd_smooth_predictor_32x64 neon/;
418  specialize qw/aom_highbd_smooth_predictor_64x32 neon/;
419  specialize qw/aom_highbd_smooth_predictor_64x64 neon/;
420
421  specialize qw/aom_highbd_smooth_v_predictor_4x4 neon/;
422  specialize qw/aom_highbd_smooth_v_predictor_4x8 neon/;
423  specialize qw/aom_highbd_smooth_v_predictor_8x4 neon/;
424  specialize qw/aom_highbd_smooth_v_predictor_8x8 neon/;
425  specialize qw/aom_highbd_smooth_v_predictor_8x16 neon/;
426  specialize qw/aom_highbd_smooth_v_predictor_16x8 neon/;
427  specialize qw/aom_highbd_smooth_v_predictor_16x16 neon/;
428  specialize qw/aom_highbd_smooth_v_predictor_16x32 neon/;
429  specialize qw/aom_highbd_smooth_v_predictor_32x16 neon/;
430  specialize qw/aom_highbd_smooth_v_predictor_32x32 neon/;
431  specialize qw/aom_highbd_smooth_v_predictor_32x64 neon/;
432  specialize qw/aom_highbd_smooth_v_predictor_64x32 neon/;
433  specialize qw/aom_highbd_smooth_v_predictor_64x64 neon/;
434  specialize qw/aom_highbd_smooth_h_predictor_4x4 neon/;
435  specialize qw/aom_highbd_smooth_h_predictor_4x8 neon/;
436
437  specialize qw/aom_highbd_smooth_h_predictor_8x4 neon/;
438  specialize qw/aom_highbd_smooth_h_predictor_8x8 neon/;
439  specialize qw/aom_highbd_smooth_h_predictor_8x16 neon/;
440  specialize qw/aom_highbd_smooth_h_predictor_16x8 neon/;
441  specialize qw/aom_highbd_smooth_h_predictor_16x16 neon/;
442  specialize qw/aom_highbd_smooth_h_predictor_16x32 neon/;
443  specialize qw/aom_highbd_smooth_h_predictor_32x16 neon/;
444  specialize qw/aom_highbd_smooth_h_predictor_32x32 neon/;
445  specialize qw/aom_highbd_smooth_h_predictor_32x64 neon/;
446  specialize qw/aom_highbd_smooth_h_predictor_64x32 neon/;
447  specialize qw/aom_highbd_smooth_h_predictor_64x64 neon/;
448
449  if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") ||
450      (aom_config("CONFIG_AV1_DECODER") eq "yes")) {
451    specialize qw/aom_highbd_v_predictor_4x16 neon/;
452    specialize qw/aom_highbd_v_predictor_8x32 neon/;
453    specialize qw/aom_highbd_v_predictor_16x4 neon/;
454    specialize qw/aom_highbd_v_predictor_16x64 neon/;
455    specialize qw/aom_highbd_v_predictor_32x8 neon/;
456    specialize qw/aom_highbd_v_predictor_64x16 neon/;
457
458    specialize qw/aom_highbd_dc_predictor_4x16 neon/;
459    specialize qw/aom_highbd_dc_predictor_8x32 neon/;
460    specialize qw/aom_highbd_dc_predictor_16x4 neon/;
461    specialize qw/aom_highbd_dc_predictor_16x64 neon/;
462    specialize qw/aom_highbd_dc_predictor_32x8 neon/;
463    specialize qw/aom_highbd_dc_predictor_64x16 neon/;
464
465    specialize qw/aom_highbd_h_predictor_4x16 neon/;
466    specialize qw/aom_highbd_h_predictor_8x32 neon/;
467    specialize qw/aom_highbd_h_predictor_16x4 neon/;
468    specialize qw/aom_highbd_h_predictor_16x64 neon/;
469    specialize qw/aom_highbd_h_predictor_32x8 neon/;
470    specialize qw/aom_highbd_h_predictor_64x16 neon/;
471
472    specialize qw/aom_highbd_dc_128_predictor_4x16 neon/;
473    specialize qw/aom_highbd_dc_128_predictor_8x32 neon/;
474    specialize qw/aom_highbd_dc_128_predictor_16x4 neon/;
475    specialize qw/aom_highbd_dc_128_predictor_16x64 neon/;
476    specialize qw/aom_highbd_dc_128_predictor_32x8 neon/;
477    specialize qw/aom_highbd_dc_128_predictor_64x16 neon/;
478
479    specialize qw/aom_highbd_dc_left_predictor_4x16 neon/;
480    specialize qw/aom_highbd_dc_left_predictor_8x32 neon/;
481    specialize qw/aom_highbd_dc_left_predictor_16x4 neon/;
482    specialize qw/aom_highbd_dc_left_predictor_16x64 neon/;
483    specialize qw/aom_highbd_dc_left_predictor_32x8 neon/;
484    specialize qw/aom_highbd_dc_left_predictor_64x16 neon/;
485
486    specialize qw/aom_highbd_dc_top_predictor_4x16 neon/;
487    specialize qw/aom_highbd_dc_top_predictor_8x32 neon/;
488    specialize qw/aom_highbd_dc_top_predictor_16x4 neon/;
489    specialize qw/aom_highbd_dc_top_predictor_16x64 neon/;
490    specialize qw/aom_highbd_dc_top_predictor_32x8 neon/;
491    specialize qw/aom_highbd_dc_top_predictor_64x16 neon/;
492
493    specialize qw/aom_highbd_paeth_predictor_4x16 neon/;
494    specialize qw/aom_highbd_paeth_predictor_8x32 neon/;
495    specialize qw/aom_highbd_paeth_predictor_16x4 neon/;
496    specialize qw/aom_highbd_paeth_predictor_16x64 neon/;
497    specialize qw/aom_highbd_paeth_predictor_32x8 neon/;
498    specialize qw/aom_highbd_paeth_predictor_64x16 neon/;
499
500    specialize qw/aom_highbd_smooth_predictor_4x16 neon/;
501    specialize qw/aom_highbd_smooth_predictor_8x32 neon/;
502    specialize qw/aom_highbd_smooth_predictor_16x4 neon/;
503    specialize qw/aom_highbd_smooth_predictor_16x64 neon/;
504    specialize qw/aom_highbd_smooth_predictor_32x8 neon/;
505    specialize qw/aom_highbd_smooth_predictor_64x16 neon/;
506
507    specialize qw/aom_highbd_smooth_v_predictor_4x16 neon/;
508    specialize qw/aom_highbd_smooth_v_predictor_8x32 neon/;
509    specialize qw/aom_highbd_smooth_v_predictor_16x4 neon/;
510    specialize qw/aom_highbd_smooth_v_predictor_16x64 neon/;
511    specialize qw/aom_highbd_smooth_v_predictor_32x8 neon/;
512    specialize qw/aom_highbd_smooth_v_predictor_64x16 neon/;
513
514    specialize qw/aom_highbd_smooth_h_predictor_4x16 neon/;
515    specialize qw/aom_highbd_smooth_h_predictor_8x32 neon/;
516    specialize qw/aom_highbd_smooth_h_predictor_16x4 neon/;
517    specialize qw/aom_highbd_smooth_h_predictor_16x64 neon/;
518    specialize qw/aom_highbd_smooth_h_predictor_32x8 neon/;
519    specialize qw/aom_highbd_smooth_h_predictor_64x16 neon/;
520  }  # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
521}
522#
523# Sub Pixel Filters
524#
525add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h";
526add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
527add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
528
529specialize qw/aom_convolve_copy       neon                        sse2 avx2/;
530specialize qw/aom_convolve8_horiz     neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3";
531specialize qw/aom_convolve8_vert      neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3";
532
533add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
534specialize qw/aom_scaled_2d ssse3 neon neon_dotprod neon_i8mm/;
535
536if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
537  add_proto qw/void aom_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, int w, int h";
538  specialize qw/aom_highbd_convolve_copy sse2 avx2 neon/;
539
540  add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
541  specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon sve/;
542
543  add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
544  specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon sve/;
545}
546
547#
548# Loopfilter
549#
550add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
551specialize qw/aom_lpf_vertical_14 sse2 neon/;
552
553add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
554specialize qw/aom_lpf_vertical_14_dual sse2 neon/;
555
556add_proto qw/void aom_lpf_vertical_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
557specialize qw/aom_lpf_vertical_14_quad avx2 sse2 neon/;
558
559add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
560specialize qw/aom_lpf_vertical_6 sse2 neon/;
561
562add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
563specialize qw/aom_lpf_vertical_8 sse2 neon/;
564
565add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
566specialize qw/aom_lpf_vertical_8_dual sse2 neon/;
567
568add_proto qw/void aom_lpf_vertical_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
569specialize qw/aom_lpf_vertical_8_quad sse2 neon/;
570
571add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
572specialize qw/aom_lpf_vertical_4 sse2 neon/;
573
574add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
575specialize qw/aom_lpf_vertical_4_dual sse2 neon/;
576
577add_proto qw/void aom_lpf_vertical_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
578specialize qw/aom_lpf_vertical_4_quad sse2 neon/;
579
580add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
581specialize qw/aom_lpf_horizontal_14 sse2 neon/;
582
583add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
584specialize qw/aom_lpf_horizontal_14_dual sse2 neon/;
585
586add_proto qw/void aom_lpf_horizontal_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
587specialize qw/aom_lpf_horizontal_14_quad sse2 avx2 neon/;
588
589add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
590specialize qw/aom_lpf_horizontal_6 sse2 neon/;
591
592add_proto qw/void aom_lpf_horizontal_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
593specialize qw/aom_lpf_horizontal_6_dual sse2 neon/;
594
595add_proto qw/void aom_lpf_horizontal_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
596specialize qw/aom_lpf_horizontal_6_quad sse2 avx2 neon/;
597
598add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
599specialize qw/aom_lpf_horizontal_8 sse2 neon/;
600
601add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
602specialize qw/aom_lpf_horizontal_8_dual sse2 neon/;
603
604add_proto qw/void aom_lpf_horizontal_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
605specialize qw/aom_lpf_horizontal_8_quad sse2 avx2 neon/;
606
607add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
608specialize qw/aom_lpf_horizontal_4 sse2 neon/;
609
610add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
611specialize qw/aom_lpf_horizontal_4_dual sse2 neon/;
612
613add_proto qw/void aom_lpf_horizontal_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
614specialize qw/aom_lpf_horizontal_4_quad sse2 neon/;
615
616add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
617specialize qw/aom_lpf_vertical_6_dual sse2 neon/;
618
619add_proto qw/void aom_lpf_vertical_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
620specialize qw/aom_lpf_vertical_6_quad sse2 neon/;
621
622if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
623  add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
624  specialize qw/aom_highbd_lpf_vertical_14 neon sse2/;
625
626  add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
627  specialize qw/aom_highbd_lpf_vertical_14_dual neon sse2 avx2/;
628
629  add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
630  specialize qw/aom_highbd_lpf_vertical_8 neon sse2/;
631
632  add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
633  specialize qw/aom_highbd_lpf_vertical_8_dual neon sse2 avx2/;
634
635  add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
636  specialize qw/aom_highbd_lpf_vertical_6 neon sse2/;
637
638  add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
639  specialize qw/aom_highbd_lpf_vertical_6_dual neon sse2/;
640
641  add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
642  specialize qw/aom_highbd_lpf_vertical_4 neon sse2/;
643
644  add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
645  specialize qw/aom_highbd_lpf_vertical_4_dual neon sse2 avx2/;
646
647  add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
648  specialize qw/aom_highbd_lpf_horizontal_14 neon sse2/;
649
650  add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd";
651  specialize qw/aom_highbd_lpf_horizontal_14_dual neon sse2 avx2/;
652
653  add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
654  specialize qw/aom_highbd_lpf_horizontal_6 neon sse2/;
655
656  add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
657  specialize qw/aom_highbd_lpf_horizontal_6_dual neon sse2/;
658
659  add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
660  specialize qw/aom_highbd_lpf_horizontal_8 neon sse2/;
661
662  add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
663  specialize qw/aom_highbd_lpf_horizontal_8_dual neon sse2 avx2/;
664
665  add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
666  specialize qw/aom_highbd_lpf_horizontal_4 neon sse2/;
667
668  add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
669  specialize qw/aom_highbd_lpf_horizontal_4_dual neon sse2 avx2/;
670}
671
672#
673# Encoder functions.
674#
675
676#
677# Forward transform
678#
679if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){
680    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
681    specialize qw/aom_fdct4x4 neon sse2/;
682
683    add_proto qw/void aom_fdct4x4_lp/, "const int16_t *input, int16_t *output, int stride";
684    specialize qw/aom_fdct4x4_lp neon sse2/;
685
686    if (aom_config("CONFIG_INTERNAL_STATS") eq "yes"){
687      # 8x8 DCT transform for psnr-hvs. Unlike other transforms isn't compatible
688      # with av1 scan orders, because it does two transposes.
689      add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
690      specialize qw/aom_fdct8x8 neon sse2/, "$ssse3_x86_64";
691      # High bit depth
692      if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
693        add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
694        specialize qw/aom_highbd_fdct8x8 sse2/;
695      }
696    }
697    # FFT/IFFT (float) only used for denoising (and noise power spectral density estimation)
698    add_proto qw/void aom_fft2x2_float/, "const float *input, float *temp, float *output";
699
700    add_proto qw/void aom_fft4x4_float/, "const float *input, float *temp, float *output";
701    specialize qw/aom_fft4x4_float                  sse2/;
702
703    add_proto qw/void aom_fft8x8_float/, "const float *input, float *temp, float *output";
704    specialize qw/aom_fft8x8_float avx2             sse2/;
705
706    add_proto qw/void aom_fft16x16_float/, "const float *input, float *temp, float *output";
707    specialize qw/aom_fft16x16_float avx2           sse2/;
708
709    add_proto qw/void aom_fft32x32_float/, "const float *input, float *temp, float *output";
710    specialize qw/aom_fft32x32_float avx2           sse2/;
711
712    add_proto qw/void aom_ifft2x2_float/, "const float *input, float *temp, float *output";
713
714    add_proto qw/void aom_ifft4x4_float/, "const float *input, float *temp, float *output";
715    specialize qw/aom_ifft4x4_float                 sse2/;
716
717    add_proto qw/void aom_ifft8x8_float/, "const float *input, float *temp, float *output";
718    specialize qw/aom_ifft8x8_float avx2            sse2/;
719
720    add_proto qw/void aom_ifft16x16_float/, "const float *input, float *temp, float *output";
721    specialize qw/aom_ifft16x16_float avx2          sse2/;
722
723    add_proto qw/void aom_ifft32x32_float/, "const float *input, float *temp, float *output";
724    specialize qw/aom_ifft32x32_float avx2          sse2/;
725}  # CONFIG_AV1_ENCODER
726
727#
728# Quantization
729#
730if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
731  add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
732  specialize qw/aom_quantize_b sse2 neon avx avx2/, "$ssse3_x86_64";
733
734  add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
735  specialize qw/aom_quantize_b_32x32 neon avx avx2/, "$ssse3_x86_64";
736
737  add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
738  specialize qw/aom_quantize_b_64x64 neon ssse3 avx2/;
739
740  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
741    add_proto qw/void aom_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
742    specialize qw/aom_quantize_b_adaptive sse2 avx2/;
743
744    add_proto qw/void aom_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
745    specialize qw/aom_quantize_b_32x32_adaptive sse2/;
746
747    add_proto qw/void aom_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
748    specialize qw/aom_quantize_b_64x64_adaptive sse2/;
749  }
750}  # CONFIG_AV1_ENCODER
751
752if (aom_config("CONFIG_AV1_ENCODER") eq "yes" && aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
753  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
754  specialize qw/aom_highbd_quantize_b sse2 avx2 neon/;
755
756  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
757  specialize qw/aom_highbd_quantize_b_32x32 sse2 avx2 neon/;
758
759  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
760  specialize qw/aom_highbd_quantize_b_64x64 sse2 avx2 neon/;
761
762  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
763    add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
764    specialize qw/aom_highbd_quantize_b_adaptive sse2 avx2 neon/;
765
766    add_proto qw/void aom_highbd_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
767    specialize qw/aom_highbd_quantize_b_32x32_adaptive sse2 avx2 neon/;
768
769    add_proto qw/void aom_highbd_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
770    specialize qw/aom_highbd_quantize_b_64x64_adaptive sse2 neon/;
771  }
772}  # CONFIG_AV1_ENCODER
773
774#
775# Alpha blending with mask
776#
777add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params";
778specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1 avx2 neon/;
779add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh";
780add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
781add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
782specialize "aom_blend_a64_mask", qw/sse4_1 neon avx2/;
783specialize "aom_blend_a64_hmask", qw/sse4_1 neon/;
784specialize "aom_blend_a64_vmask", qw/sse4_1 neon/;
785
786if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
787  add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, int bd";
788  add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
789  add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
790  add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params, const int bd";
791  specialize "aom_highbd_blend_a64_mask", qw/sse4_1 neon/;
792  specialize "aom_highbd_blend_a64_hmask", qw/sse4_1 neon/;
793  specialize "aom_highbd_blend_a64_vmask", qw/sse4_1 neon/;
794  specialize "aom_highbd_blend_a64_d16_mask", qw/sse4_1 neon avx2/;
795}
796
797if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
798  #
799  # Block subtraction
800  #
801  add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
802  specialize qw/aom_subtract_block neon sse2 avx2/;
803
804  add_proto qw/int64_t/, "aom_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height";
805  specialize qw/aom_sse sse4_1 avx2 neon neon_dotprod/;
806
807  add_proto qw/void/, "aom_get_blk_sse_sum", "const int16_t *data, int stride, int bw, int bh, int *x_sum, int64_t *x2_sum";
808  specialize qw/aom_get_blk_sse_sum sse2 avx2 neon sve/;
809
810  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
811    add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
812    specialize qw/aom_highbd_subtract_block sse2 neon/;
813
814    add_proto qw/int64_t/, "aom_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height";
815    specialize qw/aom_highbd_sse sse4_1 avx2 neon sve/;
816  }
817
818  #
819  # Sum of Squares
820  #
821  add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
822  specialize qw/aom_sum_squares_2d_i16 sse2 avx2 neon sve/;
823
824  add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
825  specialize qw/aom_sum_squares_i16 sse2 neon sve/;
826
827  add_proto qw/uint64_t aom_var_2d_u8/, "uint8_t *src, int src_stride, int width, int height";
828  specialize qw/aom_var_2d_u8 sse2 avx2 neon neon_dotprod/;
829
830  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
831    add_proto qw/uint64_t aom_var_2d_u16/, "uint8_t *src, int src_stride, int width, int height";
832    specialize qw/aom_var_2d_u16 sse2 avx2 neon sve/;
833  }
834
835  #
836  # Single block SAD / Single block Avg SAD
837  #
838  foreach (@encoder_block_sizes) {
839    ($w, $h) = @$_;
840    add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
841    add_proto qw/unsigned int/, "aom_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
842    add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
843    add_proto qw/unsigned int/, "aom_dist_wtd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param";
844  }
845
846  add_proto qw/uint64_t aom_sum_sse_2d_i16/, "const int16_t *src, int src_stride, int width, int height, int *sum";
847  specialize qw/aom_sum_sse_2d_i16 avx2 neon sse2 sve/;
848  specialize qw/aom_sad128x128    avx2 sse2 neon neon_dotprod/;
849  specialize qw/aom_sad128x64     avx2 sse2 neon neon_dotprod/;
850  specialize qw/aom_sad64x128     avx2 sse2 neon neon_dotprod/;
851  specialize qw/aom_sad64x64      avx2 sse2 neon neon_dotprod/;
852  specialize qw/aom_sad64x32      avx2 sse2 neon neon_dotprod/;
853  specialize qw/aom_sad32x64      avx2 sse2 neon neon_dotprod/;
854  specialize qw/aom_sad32x32      avx2 sse2 neon neon_dotprod/;
855  specialize qw/aom_sad32x16      avx2 sse2 neon neon_dotprod/;
856  specialize qw/aom_sad16x32           sse2 neon neon_dotprod/;
857  specialize qw/aom_sad16x16           sse2 neon neon_dotprod/;
858  specialize qw/aom_sad16x8            sse2 neon neon_dotprod/;
859  specialize qw/aom_sad8x16            sse2 neon/;
860  specialize qw/aom_sad8x8             sse2 neon/;
861  specialize qw/aom_sad8x4             sse2 neon/;
862  specialize qw/aom_sad4x8             sse2 neon/;
863  specialize qw/aom_sad4x4             sse2 neon/;
864
865  specialize qw/aom_sad4x16            sse2 neon/;
866  specialize qw/aom_sad16x4            sse2 neon neon_dotprod/;
867  specialize qw/aom_sad8x32            sse2 neon/;
868  specialize qw/aom_sad32x8            sse2 neon neon_dotprod/;
869  specialize qw/aom_sad16x64           sse2 neon neon_dotprod/;
870  specialize qw/aom_sad64x16           sse2 neon neon_dotprod/;
871
872  specialize qw/aom_sad_skip_128x128    avx2 sse2 neon neon_dotprod/;
873  specialize qw/aom_sad_skip_128x64     avx2 sse2 neon neon_dotprod/;
874  specialize qw/aom_sad_skip_64x128     avx2 sse2 neon neon_dotprod/;
875  specialize qw/aom_sad_skip_64x64      avx2 sse2 neon neon_dotprod/;
876  specialize qw/aom_sad_skip_64x32      avx2 sse2 neon neon_dotprod/;
877  specialize qw/aom_sad_skip_32x64      avx2 sse2 neon neon_dotprod/;
878  specialize qw/aom_sad_skip_32x32      avx2 sse2 neon neon_dotprod/;
879  specialize qw/aom_sad_skip_32x16      avx2 sse2 neon neon_dotprod/;
880  specialize qw/aom_sad_skip_16x32           sse2 neon neon_dotprod/;
881  specialize qw/aom_sad_skip_16x16           sse2 neon neon_dotprod/;
882  specialize qw/aom_sad_skip_16x8            sse2 neon neon_dotprod/;
883  specialize qw/aom_sad_skip_8x16            sse2 neon/;
884  specialize qw/aom_sad_skip_8x8             sse2 neon/;
885  specialize qw/aom_sad_skip_8x4                  neon/;
886  specialize qw/aom_sad_skip_4x8             sse2 neon/;
887  specialize qw/aom_sad_skip_4x4                  neon/;
888
889  specialize qw/aom_sad_skip_4x16            sse2 neon/;
890  specialize qw/aom_sad_skip_16x4                 neon neon_dotprod/;
891  specialize qw/aom_sad_skip_8x32            sse2 neon/;
892  specialize qw/aom_sad_skip_32x8            sse2 neon neon_dotprod/;
893  specialize qw/aom_sad_skip_16x64           sse2 neon neon_dotprod/;
894  specialize qw/aom_sad_skip_64x16           sse2 neon neon_dotprod/;
895
896  specialize qw/aom_sad128x128_avg avx2 sse2 neon neon_dotprod/;
897  specialize qw/aom_sad128x64_avg  avx2 sse2 neon neon_dotprod/;
898  specialize qw/aom_sad64x128_avg  avx2 sse2 neon neon_dotprod/;
899  specialize qw/aom_sad64x64_avg   avx2 sse2 neon neon_dotprod/;
900  specialize qw/aom_sad64x32_avg   avx2 sse2 neon neon_dotprod/;
901  specialize qw/aom_sad32x64_avg   avx2 sse2 neon neon_dotprod/;
902  specialize qw/aom_sad32x32_avg   avx2 sse2 neon neon_dotprod/;
903  specialize qw/aom_sad32x16_avg   avx2 sse2 neon neon_dotprod/;
904  specialize qw/aom_sad16x32_avg        sse2 neon neon_dotprod/;
905  specialize qw/aom_sad16x16_avg        sse2 neon neon_dotprod/;
906  specialize qw/aom_sad16x8_avg         sse2 neon neon_dotprod/;
907  specialize qw/aom_sad8x16_avg         sse2 neon/;
908  specialize qw/aom_sad8x8_avg          sse2 neon/;
909  specialize qw/aom_sad8x4_avg          sse2 neon/;
910  specialize qw/aom_sad4x8_avg          sse2 neon/;
911  specialize qw/aom_sad4x4_avg          sse2 neon/;
912
913  specialize qw/aom_sad4x16_avg         sse2 neon/;
914  specialize qw/aom_sad16x4_avg         sse2 neon neon_dotprod/;
915  specialize qw/aom_sad8x32_avg         sse2 neon/;
916  specialize qw/aom_sad32x8_avg         sse2 neon neon_dotprod/;
917  specialize qw/aom_sad16x64_avg        sse2 neon neon_dotprod/;
918  specialize qw/aom_sad64x16_avg        sse2 neon neon_dotprod/;
919
920  specialize qw/aom_dist_wtd_sad128x128_avg sse2 neon neon_dotprod/;
921  specialize qw/aom_dist_wtd_sad128x64_avg  sse2 neon neon_dotprod/;
922  specialize qw/aom_dist_wtd_sad64x128_avg  sse2 neon neon_dotprod/;
923  specialize qw/aom_dist_wtd_sad64x64_avg   sse2 neon neon_dotprod/;
924  specialize qw/aom_dist_wtd_sad64x32_avg   sse2 neon neon_dotprod/;
925  specialize qw/aom_dist_wtd_sad32x64_avg   sse2 neon neon_dotprod/;
926  specialize qw/aom_dist_wtd_sad32x32_avg   sse2 neon neon_dotprod/;
927  specialize qw/aom_dist_wtd_sad32x16_avg   sse2 neon neon_dotprod/;
928  specialize qw/aom_dist_wtd_sad16x32_avg   sse2 neon neon_dotprod/;
929  specialize qw/aom_dist_wtd_sad16x16_avg   sse2 neon neon_dotprod/;
930  specialize qw/aom_dist_wtd_sad16x8_avg    sse2 neon neon_dotprod/;
931  specialize qw/aom_dist_wtd_sad8x16_avg    sse2 neon/;
932  specialize qw/aom_dist_wtd_sad8x8_avg     sse2 neon/;
933  specialize qw/aom_dist_wtd_sad8x4_avg     sse2 neon/;
934  specialize qw/aom_dist_wtd_sad4x8_avg     sse2 neon/;
935  specialize qw/aom_dist_wtd_sad4x4_avg     sse2 neon/;
936
937  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
938    specialize qw/aom_dist_wtd_sad4x16_avg     sse2 neon/;
939    specialize qw/aom_dist_wtd_sad16x4_avg     sse2 neon neon_dotprod/;
940    specialize qw/aom_dist_wtd_sad8x32_avg     sse2 neon/;
941    specialize qw/aom_dist_wtd_sad32x8_avg     sse2 neon neon_dotprod/;
942    specialize qw/aom_dist_wtd_sad16x64_avg    sse2 neon neon_dotprod/;
943    specialize qw/aom_dist_wtd_sad64x16_avg    sse2 neon neon_dotprod/;
944  }
945
946  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
947    foreach (@encoder_block_sizes) {
948      ($w, $h) = @$_;
949      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
950      add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
951      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
952      if ($w != 128 && $h != 128 && $w != 4) {
953        specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
954        specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
955      }
956      add_proto qw/unsigned int/, "aom_highbd_dist_wtd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param";
957    }
958    specialize qw/aom_highbd_sad128x128 avx2      neon/;
959    specialize qw/aom_highbd_sad128x64  avx2      neon/;
960    specialize qw/aom_highbd_sad64x128  avx2      neon/;
961    specialize qw/aom_highbd_sad64x64   avx2 sse2 neon/;
962    specialize qw/aom_highbd_sad64x32   avx2 sse2 neon/;
963    specialize qw/aom_highbd_sad32x64   avx2 sse2 neon/;
964    specialize qw/aom_highbd_sad32x32   avx2 sse2 neon/;
965    specialize qw/aom_highbd_sad32x16   avx2 sse2 neon/;
966    specialize qw/aom_highbd_sad16x32   avx2 sse2 neon/;
967    specialize qw/aom_highbd_sad16x16   avx2 sse2 neon/;
968    specialize qw/aom_highbd_sad16x8    avx2 sse2 neon/;
969    specialize qw/aom_highbd_sad8x16         sse2 neon/;
970    specialize qw/aom_highbd_sad8x8          sse2 neon/;
971    specialize qw/aom_highbd_sad8x4          sse2 neon/;
972    specialize qw/aom_highbd_sad4x8          sse2 neon/;
973    specialize qw/aom_highbd_sad4x4          sse2 neon/;
974
975    specialize qw/aom_highbd_sad4x16         sse2 neon/;
976    specialize qw/aom_highbd_sad16x4    avx2 sse2 neon/;
977    specialize qw/aom_highbd_sad8x32         sse2 neon/;
978    specialize qw/aom_highbd_sad32x8    avx2 sse2 neon/;
979    specialize qw/aom_highbd_sad16x64   avx2 sse2 neon/;
980    specialize qw/aom_highbd_sad64x16   avx2 sse2 neon/;
981
982    specialize qw/aom_highbd_sad_skip_128x128 avx2      neon/;
983    specialize qw/aom_highbd_sad_skip_128x64  avx2      neon/;
984    specialize qw/aom_highbd_sad_skip_64x128  avx2      neon/;
985    specialize qw/aom_highbd_sad_skip_64x64   avx2 sse2 neon/;
986    specialize qw/aom_highbd_sad_skip_64x32   avx2 sse2 neon/;
987    specialize qw/aom_highbd_sad_skip_32x64   avx2 sse2 neon/;
988    specialize qw/aom_highbd_sad_skip_32x32   avx2 sse2 neon/;
989    specialize qw/aom_highbd_sad_skip_32x16   avx2 sse2 neon/;
990    specialize qw/aom_highbd_sad_skip_16x32   avx2 sse2 neon/;
991    specialize qw/aom_highbd_sad_skip_16x16   avx2 sse2 neon/;
992    specialize qw/aom_highbd_sad_skip_16x8    avx2 sse2 neon/;
993    specialize qw/aom_highbd_sad_skip_16x4              neon/;
994    specialize qw/aom_highbd_sad_skip_8x16         sse2 neon/;
995    specialize qw/aom_highbd_sad_skip_8x4               neon/;
996    specialize qw/aom_highbd_sad_skip_8x8          sse2 neon/;
997    specialize qw/aom_highbd_sad_skip_4x8          sse2 neon/;
998    specialize qw/aom_highbd_sad_skip_4x4               neon/;
999
1000    specialize qw/aom_highbd_sad_skip_4x16         sse2 neon/;
1001    specialize qw/aom_highbd_sad_skip_8x32         sse2 neon/;
1002    specialize qw/aom_highbd_sad_skip_32x8    avx2 sse2 neon/;
1003    specialize qw/aom_highbd_sad_skip_16x64   avx2 sse2 neon/;
1004    specialize qw/aom_highbd_sad_skip_64x16   avx2 sse2 neon/;
1005
1006    specialize qw/aom_highbd_sad128x128_avg avx2      neon/;
1007    specialize qw/aom_highbd_sad128x64_avg  avx2      neon/;
1008    specialize qw/aom_highbd_sad64x128_avg  avx2      neon/;
1009    specialize qw/aom_highbd_sad64x64_avg   avx2 sse2 neon/;
1010    specialize qw/aom_highbd_sad64x32_avg   avx2 sse2 neon/;
1011    specialize qw/aom_highbd_sad32x64_avg   avx2 sse2 neon/;
1012    specialize qw/aom_highbd_sad32x32_avg   avx2 sse2 neon/;
1013    specialize qw/aom_highbd_sad32x16_avg   avx2 sse2 neon/;
1014    specialize qw/aom_highbd_sad16x32_avg   avx2 sse2 neon/;
1015    specialize qw/aom_highbd_sad16x16_avg   avx2 sse2 neon/;
1016    specialize qw/aom_highbd_sad16x8_avg    avx2 sse2 neon/;
1017    specialize qw/aom_highbd_sad8x16_avg              neon/;
1018    specialize qw/aom_highbd_sad8x8_avg               neon/;
1019    specialize qw/aom_highbd_sad8x4_avg          sse2 neon/;
1020    specialize qw/aom_highbd_sad4x8_avg          sse2 neon/;
1021    specialize qw/aom_highbd_sad4x4_avg          sse2 neon/;
1022
1023    specialize qw/aom_highbd_sad4x16_avg         sse2 neon/;
1024    specialize qw/aom_highbd_sad8x32_avg         sse2 neon/;
1025    specialize qw/aom_highbd_sad16x4_avg    avx2 sse2 neon/;
1026    specialize qw/aom_highbd_sad16x64_avg   avx2 sse2 neon/;
1027    specialize qw/aom_highbd_sad32x8_avg    avx2 sse2 neon/;
1028    specialize qw/aom_highbd_sad64x16_avg   avx2 sse2 neon/;
1029  }
1030  #
1031  # Masked SAD
1032  #
1033  foreach (@encoder_block_sizes) {
1034    ($w, $h) = @$_;
1035    add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
1036    specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2 neon/;
1037  }
1038
1039  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1040    foreach (@encoder_block_sizes) {
1041      ($w, $h) = @$_;
1042      add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
1043      specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2 neon/;
1044    }
1045  }
1046
1047  #
1048  # OBMC SAD
1049  #
1050  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1051    foreach (@encoder_block_sizes) {
1052      ($w, $h) = @$_;
1053      add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
1054      if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
1055        specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/;
1056      }
1057    }
1058
1059    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1060      foreach (@encoder_block_sizes) {
1061        ($w, $h) = @$_;
1062        add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
1063        if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
1064          specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/;
1065        }
1066      }
1067    }
1068  }
1069
1070  #
1071  # Multi-block SAD, comparing a reference to N independent blocks
1072  #
1073  foreach (@encoder_block_sizes) {
1074    ($w, $h) = @$_;
1075    add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1076    add_proto qw/void/, "aom_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1077    add_proto qw/void/, "aom_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1078  }
1079
1080  specialize qw/aom_sad128x128x4d avx2 sse2 neon neon_dotprod/;
1081  specialize qw/aom_sad128x64x4d  avx2 sse2 neon neon_dotprod/;
1082  specialize qw/aom_sad64x128x4d  avx2 sse2 neon neon_dotprod/;
1083  specialize qw/aom_sad64x64x4d   avx2 sse2 neon neon_dotprod/;
1084  specialize qw/aom_sad64x32x4d   avx2 sse2 neon neon_dotprod/;
1085  specialize qw/aom_sad32x64x4d   avx2 sse2 neon neon_dotprod/;
1086  specialize qw/aom_sad32x32x4d   avx2 sse2 neon neon_dotprod/;
1087  specialize qw/aom_sad32x16x4d   avx2 sse2 neon neon_dotprod/;
1088  specialize qw/aom_sad16x32x4d   avx2 sse2 neon neon_dotprod/;
1089  specialize qw/aom_sad16x16x4d   avx2 sse2 neon neon_dotprod/;
1090  specialize qw/aom_sad16x8x4d    avx2 sse2 neon neon_dotprod/;
1091
1092  specialize qw/aom_sad8x16x4d         sse2 neon/;
1093  specialize qw/aom_sad8x8x4d          sse2 neon/;
1094  specialize qw/aom_sad8x4x4d          sse2 neon/;
1095  specialize qw/aom_sad4x8x4d          sse2 neon/;
1096  specialize qw/aom_sad4x4x4d          sse2 neon/;
1097
1098  specialize qw/aom_sad64x16x4d   avx2 sse2 neon neon_dotprod/;
1099  specialize qw/aom_sad32x8x4d    avx2 sse2 neon neon_dotprod/;
1100  specialize qw/aom_sad16x64x4d   avx2 sse2 neon neon_dotprod/;
1101  specialize qw/aom_sad16x4x4d    avx2 sse2 neon neon_dotprod/;
1102  specialize qw/aom_sad8x32x4d         sse2 neon/;
1103  specialize qw/aom_sad4x16x4d         sse2 neon/;
1104
1105  specialize qw/aom_sad_skip_128x128x4d avx2 sse2 neon neon_dotprod/;
1106  specialize qw/aom_sad_skip_128x64x4d  avx2 sse2 neon neon_dotprod/;
1107  specialize qw/aom_sad_skip_64x128x4d  avx2 sse2 neon neon_dotprod/;
1108  specialize qw/aom_sad_skip_64x64x4d   avx2 sse2 neon neon_dotprod/;
1109  specialize qw/aom_sad_skip_64x32x4d   avx2 sse2 neon neon_dotprod/;
1110  specialize qw/aom_sad_skip_64x16x4d   avx2 sse2 neon neon_dotprod/;
1111  specialize qw/aom_sad_skip_32x64x4d   avx2 sse2 neon neon_dotprod/;
1112  specialize qw/aom_sad_skip_32x32x4d   avx2 sse2 neon neon_dotprod/;
1113  specialize qw/aom_sad_skip_32x16x4d   avx2 sse2 neon neon_dotprod/;
1114  specialize qw/aom_sad_skip_32x8x4d    avx2 sse2 neon neon_dotprod/;
1115
1116  specialize qw/aom_sad_skip_16x64x4d   avx2 sse2 neon neon_dotprod/;
1117  specialize qw/aom_sad_skip_16x32x4d   avx2 sse2 neon neon_dotprod/;
1118  specialize qw/aom_sad_skip_16x16x4d   avx2 sse2 neon neon_dotprod/;
1119  specialize qw/aom_sad_skip_16x8x4d    avx2 sse2 neon neon_dotprod/;
1120  specialize qw/aom_sad_skip_16x4x4d    avx2      neon neon_dotprod/;
1121  specialize qw/aom_sad_skip_8x32x4d         sse2 neon/;
1122  specialize qw/aom_sad_skip_8x16x4d         sse2 neon/;
1123  specialize qw/aom_sad_skip_8x8x4d          sse2 neon/;
1124  specialize qw/aom_sad_skip_8x4x4d               neon/;
1125  specialize qw/aom_sad_skip_4x16x4d         sse2 neon/;
1126  specialize qw/aom_sad_skip_4x8x4d          sse2 neon/;
1127  specialize qw/aom_sad_skip_4x4x4d               neon/;
1128
1129  specialize qw/aom_sad128x128x3d avx2 neon neon_dotprod/;
1130  specialize qw/aom_sad128x64x3d  avx2 neon neon_dotprod/;
1131  specialize qw/aom_sad64x128x3d  avx2 neon neon_dotprod/;
1132  specialize qw/aom_sad64x64x3d   avx2 neon neon_dotprod/;
1133  specialize qw/aom_sad64x32x3d   avx2 neon neon_dotprod/;
1134  specialize qw/aom_sad32x64x3d   avx2 neon neon_dotprod/;
1135  specialize qw/aom_sad32x32x3d   avx2 neon neon_dotprod/;
1136  specialize qw/aom_sad32x16x3d   avx2 neon neon_dotprod/;
1137  specialize qw/aom_sad16x32x3d   avx2 neon neon_dotprod/;
1138  specialize qw/aom_sad16x16x3d   avx2 neon neon_dotprod/;
1139  specialize qw/aom_sad16x8x3d    avx2 neon neon_dotprod/;
1140  specialize qw/aom_sad8x16x3d         neon/;
1141  specialize qw/aom_sad8x8x3d          neon/;
1142  specialize qw/aom_sad8x4x3d          neon/;
1143  specialize qw/aom_sad4x8x3d          neon/;
1144  specialize qw/aom_sad4x4x3d          neon/;
1145
1146  specialize qw/aom_sad64x16x3d   avx2 neon neon_dotprod/;
1147  specialize qw/aom_sad32x8x3d    avx2 neon neon_dotprod/;
1148  specialize qw/aom_sad16x64x3d   avx2 neon neon_dotprod/;
1149  specialize qw/aom_sad16x4x3d    avx2 neon neon_dotprod/;
1150  specialize qw/aom_sad8x32x3d         neon/;
1151  specialize qw/aom_sad4x16x3d         neon/;
1152
1153  #
1154  # Multi-block SAD, comparing a reference to N independent blocks
1155  #
1156  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1157    foreach (@encoder_block_sizes) {
1158      ($w, $h) = @$_;
1159      add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1160      add_proto qw/void/, "aom_highbd_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1161      add_proto qw/void/, "aom_highbd_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1162      if ($w != 128 && $h != 128) {
1163        specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
1164      }
1165    }
1166    specialize qw/aom_highbd_sad128x128x4d      avx2 neon/;
1167    specialize qw/aom_highbd_sad128x64x4d       avx2 neon/;
1168    specialize qw/aom_highbd_sad64x128x4d       avx2 neon/;
1169    specialize qw/aom_highbd_sad64x64x4d   sse2 avx2 neon/;
1170    specialize qw/aom_highbd_sad64x32x4d   sse2 avx2 neon/;
1171    specialize qw/aom_highbd_sad32x64x4d   sse2 avx2 neon/;
1172    specialize qw/aom_highbd_sad32x32x4d   sse2 avx2 neon/;
1173    specialize qw/aom_highbd_sad32x16x4d   sse2 avx2 neon/;
1174    specialize qw/aom_highbd_sad16x32x4d   sse2 avx2 neon/;
1175    specialize qw/aom_highbd_sad16x16x4d   sse2 avx2 neon/;
1176    specialize qw/aom_highbd_sad16x8x4d    sse2 avx2 neon/;
1177    specialize qw/aom_highbd_sad8x16x4d    sse2      neon/;
1178    specialize qw/aom_highbd_sad8x8x4d     sse2      neon/;
1179    specialize qw/aom_highbd_sad8x4x4d     sse2      neon/;
1180    specialize qw/aom_highbd_sad4x8x4d     sse2      neon/;
1181    specialize qw/aom_highbd_sad4x4x4d     sse2      neon/;
1182
1183    specialize qw/aom_highbd_sad4x16x4d         sse2 neon/;
1184    specialize qw/aom_highbd_sad16x4x4d    avx2 sse2 neon/;
1185    specialize qw/aom_highbd_sad8x32x4d         sse2 neon/;
1186    specialize qw/aom_highbd_sad32x8x4d    avx2 sse2 neon/;
1187    specialize qw/aom_highbd_sad16x64x4d   avx2 sse2 neon/;
1188    specialize qw/aom_highbd_sad64x16x4d   avx2 sse2 neon/;
1189
1190    specialize qw/aom_highbd_sad_skip_128x128x4d avx2      neon/;
1191    specialize qw/aom_highbd_sad_skip_128x64x4d  avx2      neon/;
1192    specialize qw/aom_highbd_sad_skip_64x128x4d  avx2      neon/;
1193    specialize qw/aom_highbd_sad_skip_64x64x4d   avx2 sse2 neon/;
1194    specialize qw/aom_highbd_sad_skip_64x32x4d   avx2 sse2 neon/;
1195    specialize qw/aom_highbd_sad_skip_32x64x4d   avx2 sse2 neon/;
1196    specialize qw/aom_highbd_sad_skip_32x32x4d   avx2 sse2 neon/;
1197    specialize qw/aom_highbd_sad_skip_32x16x4d   avx2 sse2 neon/;
1198    specialize qw/aom_highbd_sad_skip_16x32x4d   avx2 sse2 neon/;
1199    specialize qw/aom_highbd_sad_skip_16x16x4d   avx2 sse2 neon/;
1200    specialize qw/aom_highbd_sad_skip_16x8x4d    avx2 sse2 neon/;
1201    specialize qw/aom_highbd_sad_skip_16x4x4d              neon/;
1202    specialize qw/aom_highbd_sad_skip_8x16x4d         sse2 neon/;
1203    specialize qw/aom_highbd_sad_skip_8x8x4d          sse2 neon/;
1204    specialize qw/aom_highbd_sad_skip_8x4x4d               neon/;
1205    specialize qw/aom_highbd_sad_skip_4x8x4d          sse2 neon/;
1206    specialize qw/aom_highbd_sad_skip_4x4x4d               neon/;
1207
1208    specialize qw/aom_highbd_sad_skip_4x16x4d         sse2 neon/;
1209    specialize qw/aom_highbd_sad_skip_8x32x4d         sse2 neon/;
1210    specialize qw/aom_highbd_sad_skip_32x8x4d    avx2 sse2 neon/;
1211    specialize qw/aom_highbd_sad_skip_16x64x4d   avx2 sse2 neon/;
1212    specialize qw/aom_highbd_sad_skip_64x16x4d   avx2 sse2 neon/;
1213
1214    specialize qw/aom_highbd_sad128x128x3d avx2 neon/;
1215    specialize qw/aom_highbd_sad128x64x3d  avx2 neon/;
1216    specialize qw/aom_highbd_sad64x128x3d  avx2 neon/;
1217    specialize qw/aom_highbd_sad64x64x3d   avx2 neon/;
1218    specialize qw/aom_highbd_sad64x32x3d   avx2 neon/;
1219    specialize qw/aom_highbd_sad32x64x3d   avx2 neon/;
1220    specialize qw/aom_highbd_sad32x32x3d   avx2 neon/;
1221    specialize qw/aom_highbd_sad32x16x3d   avx2 neon/;
1222    specialize qw/aom_highbd_sad16x32x3d   avx2 neon/;
1223    specialize qw/aom_highbd_sad16x16x3d   avx2 neon/;
1224    specialize qw/aom_highbd_sad16x8x3d    avx2 neon/;
1225    specialize qw/aom_highbd_sad8x16x3d         neon/;
1226    specialize qw/aom_highbd_sad8x8x3d          neon/;
1227    specialize qw/aom_highbd_sad8x4x3d          neon/;
1228    specialize qw/aom_highbd_sad4x8x3d          neon/;
1229    specialize qw/aom_highbd_sad4x4x3d          neon/;
1230
1231    specialize qw/aom_highbd_sad64x16x3d   avx2 neon/;
1232    specialize qw/aom_highbd_sad32x8x3d    avx2 neon/;
1233    specialize qw/aom_highbd_sad16x64x3d   avx2 neon/;
1234    specialize qw/aom_highbd_sad16x4x3d    avx2 neon/;
1235    specialize qw/aom_highbd_sad8x32x3d         neon/;
1236    specialize qw/aom_highbd_sad4x16x3d         neon/;
1237  }
1238  #
1239  # Avg
1240  #
1241  add_proto qw/unsigned int aom_avg_8x8/, "const uint8_t *, int p";
1242  specialize qw/aom_avg_8x8 sse2 neon/;
1243
1244  add_proto qw/unsigned int aom_avg_4x4/, "const uint8_t *, int p";
1245  specialize qw/aom_avg_4x4 sse2 neon/;
1246
1247  add_proto qw/void aom_avg_8x8_quad/, "const uint8_t *s, int p, int x16_idx, int y16_idx, int *avg";
1248  specialize qw/aom_avg_8x8_quad avx2 sse2 neon/;
1249
1250  add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
1251  specialize qw/aom_minmax_8x8 sse2 neon/;
1252
1253  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1254    add_proto qw/unsigned int aom_highbd_avg_8x8/, "const uint8_t *, int p";
1255    specialize qw/aom_highbd_avg_8x8 neon/;
1256    add_proto qw/unsigned int aom_highbd_avg_4x4/, "const uint8_t *, int p";
1257    specialize qw/aom_highbd_avg_4x4 neon/;
1258    add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
1259    specialize qw/aom_highbd_minmax_8x8 neon/;
1260  }
1261
1262  add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor";
1263  specialize qw/aom_int_pro_row avx2 sse2 neon/;
1264
1265  add_proto qw/void aom_int_pro_col/, "int16_t *vbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor";
1266  specialize qw/aom_int_pro_col avx2 sse2 neon/;
1267
1268  add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
1269  specialize qw/aom_vector_var avx2 sse4_1 neon sve/;
1270
1271  #
1272  # hamadard transform and satd for implmenting temporal dependency model
1273  #
1274  add_proto qw/void aom_hadamard_4x4/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1275  specialize qw/aom_hadamard_4x4 sse2 neon/;
1276
1277  add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1278  specialize qw/aom_hadamard_8x8 sse2 neon/;
1279
1280  add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1281  specialize qw/aom_hadamard_16x16 avx2 sse2 neon/;
1282
1283  add_proto qw/void aom_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1284  specialize qw/aom_hadamard_32x32 avx2 sse2 neon/;
1285
1286  add_proto qw/void aom_hadamard_lp_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
1287  specialize qw/aom_hadamard_lp_8x8 sse2 neon/;
1288
1289  add_proto qw/void aom_hadamard_lp_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
1290  specialize qw/aom_hadamard_lp_16x16 sse2 avx2 neon/;
1291
1292  add_proto qw/void aom_hadamard_lp_8x8_dual/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
1293  specialize qw/aom_hadamard_lp_8x8_dual sse2 avx2 neon/;
1294
1295  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1296    add_proto qw/void aom_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1297    specialize qw/aom_highbd_hadamard_8x8 avx2 neon/;
1298
1299    add_proto qw/void aom_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1300    specialize qw/aom_highbd_hadamard_16x16 avx2 neon/;
1301
1302    add_proto qw/void aom_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1303    specialize qw/aom_highbd_hadamard_32x32 avx2 neon/;
1304  }
1305  add_proto qw/int aom_satd/, "const tran_low_t *coeff, int length";
1306  specialize qw/aom_satd neon sse2 avx2/;
1307
1308  add_proto qw/int aom_satd_lp/, "const int16_t *coeff, int length";
1309  specialize qw/aom_satd_lp sse2 avx2 neon/;
1310
1311
1312  #
1313  # Structured Similarity (SSIM)
1314  #
1315  add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
1316  specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
1317
1318  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1319    add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
1320  }
1321}  # CONFIG_AV1_ENCODER
1322
1323if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
1324
1325  #
1326  # Specialty Variance
1327  #
1328  add_proto qw/void aom_get_var_sse_sum_8x8_quad/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse8x8, int *sum8x8, unsigned int *tot_sse, int *tot_sum, uint32_t *var8x8";
1329  specialize qw/aom_get_var_sse_sum_8x8_quad        avx2 sse2 neon neon_dotprod/;
1330
1331  add_proto qw/void aom_get_var_sse_sum_16x16_dual/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse16x16, unsigned int *tot_sse, int *tot_sum, uint32_t *var16x16";
1332  specialize qw/aom_get_var_sse_sum_16x16_dual        avx2 sse2 neon neon_dotprod/;
1333
1334  add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1335  add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1336  add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1337  add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1338
1339  specialize qw/aom_mse16x16          sse2 avx2 neon neon_dotprod/;
1340  specialize qw/aom_mse16x8           sse2      neon neon_dotprod/;
1341  specialize qw/aom_mse8x16           sse2      neon neon_dotprod/;
1342  specialize qw/aom_mse8x8            sse2      neon neon_dotprod/;
1343
1344  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1345    foreach $bd (8, 10, 12) {
1346      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1347      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1348      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1349      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1350
1351      if ($bd eq 8) {
1352        specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon neon_dotprod/;
1353        specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/;
1354        specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/;
1355        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/;
1356      } elsif ($bd eq 10) {
1357        specialize "aom_highbd_${bd}_mse16x16", qw/avx2 sse2 neon sve/;
1358        specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
1359        specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/;
1360        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/;
1361      } else {
1362        specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/;
1363        specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
1364        specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/;
1365        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/;
1366      }
1367
1368    }
1369  }
1370
1371  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1372    add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
1373    specialize qw/aom_get_mb_ss sse2 neon/;
1374  }
1375
1376  #
1377  # Variance / Subpixel Variance / Subpixel Avg Variance
1378  #
1379  add_proto qw/uint64_t/, "aom_mse_wxh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int sstride, int w, int h";
1380  specialize qw/aom_mse_wxh_16bit  sse2 avx2 neon/;
1381
1382  add_proto qw/uint64_t/, "aom_mse_16xh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int w, int h";
1383  specialize qw/aom_mse_16xh_16bit sse2 avx2 neon/;
1384
1385  foreach (@encoder_block_sizes) {
1386    ($w, $h) = @$_;
1387    add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1388    add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1389    add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1390    add_proto qw/uint32_t/, "aom_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param";
1391  }
1392  specialize qw/aom_variance128x128   sse2 avx2 neon neon_dotprod/;
1393  specialize qw/aom_variance128x64    sse2 avx2 neon neon_dotprod/;
1394  specialize qw/aom_variance64x128    sse2 avx2 neon neon_dotprod/;
1395  specialize qw/aom_variance64x64     sse2 avx2 neon neon_dotprod/;
1396  specialize qw/aom_variance64x32     sse2 avx2 neon neon_dotprod/;
1397  specialize qw/aom_variance32x64     sse2 avx2 neon neon_dotprod/;
1398  specialize qw/aom_variance32x32     sse2 avx2 neon neon_dotprod/;
1399  specialize qw/aom_variance32x16     sse2 avx2 neon neon_dotprod/;
1400  specialize qw/aom_variance16x32     sse2 avx2 neon neon_dotprod/;
1401  specialize qw/aom_variance16x16     sse2 avx2 neon neon_dotprod/;
1402  specialize qw/aom_variance16x8      sse2 avx2 neon neon_dotprod/;
1403  specialize qw/aom_variance8x16      sse2      neon neon_dotprod/;
1404  specialize qw/aom_variance8x8       sse2      neon neon_dotprod/;
1405  specialize qw/aom_variance8x4       sse2      neon neon_dotprod/;
1406  specialize qw/aom_variance4x8       sse2      neon neon_dotprod/;
1407  specialize qw/aom_variance4x4       sse2      neon neon_dotprod/;
1408
1409  specialize qw/aom_sub_pixel_variance128x128   avx2 neon ssse3/;
1410  specialize qw/aom_sub_pixel_variance128x64    avx2 neon ssse3/;
1411  specialize qw/aom_sub_pixel_variance64x128    avx2 neon ssse3/;
1412  specialize qw/aom_sub_pixel_variance64x64     avx2 neon ssse3/;
1413  specialize qw/aom_sub_pixel_variance64x32     avx2 neon ssse3/;
1414  specialize qw/aom_sub_pixel_variance32x64     avx2 neon ssse3/;
1415  specialize qw/aom_sub_pixel_variance32x32     avx2 neon ssse3/;
1416  specialize qw/aom_sub_pixel_variance32x16     avx2 neon ssse3/;
1417  specialize qw/aom_sub_pixel_variance16x32     avx2 neon ssse3/;
1418  specialize qw/aom_sub_pixel_variance16x16     avx2 neon ssse3/;
1419  specialize qw/aom_sub_pixel_variance16x8      avx2 neon ssse3/;
1420  specialize qw/aom_sub_pixel_variance8x16           neon ssse3/;
1421  specialize qw/aom_sub_pixel_variance8x8            neon ssse3/;
1422  specialize qw/aom_sub_pixel_variance8x4            neon ssse3/;
1423  specialize qw/aom_sub_pixel_variance4x8            neon ssse3/;
1424  specialize qw/aom_sub_pixel_variance4x4            neon ssse3/;
1425
1426  specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon ssse3/;
1427  specialize qw/aom_sub_pixel_avg_variance128x64  avx2 neon ssse3/;
1428  specialize qw/aom_sub_pixel_avg_variance64x128  avx2 neon ssse3/;
1429  specialize qw/aom_sub_pixel_avg_variance64x64   avx2 neon ssse3/;
1430  specialize qw/aom_sub_pixel_avg_variance64x32   avx2 neon ssse3/;
1431  specialize qw/aom_sub_pixel_avg_variance32x64   avx2 neon ssse3/;
1432  specialize qw/aom_sub_pixel_avg_variance32x32   avx2 neon ssse3/;
1433  specialize qw/aom_sub_pixel_avg_variance32x16   avx2 neon ssse3/;
1434  specialize qw/aom_sub_pixel_avg_variance16x32        neon ssse3/;
1435  specialize qw/aom_sub_pixel_avg_variance16x16        neon ssse3/;
1436  specialize qw/aom_sub_pixel_avg_variance16x8         neon ssse3/;
1437  specialize qw/aom_sub_pixel_avg_variance8x16         neon ssse3/;
1438  specialize qw/aom_sub_pixel_avg_variance8x8          neon ssse3/;
1439  specialize qw/aom_sub_pixel_avg_variance8x4          neon ssse3/;
1440  specialize qw/aom_sub_pixel_avg_variance4x8          neon ssse3/;
1441  specialize qw/aom_sub_pixel_avg_variance4x4          neon ssse3/;
1442
1443  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1444    specialize qw/aom_variance4x16  neon neon_dotprod sse2/;
1445    specialize qw/aom_variance16x4  neon neon_dotprod sse2 avx2/;
1446    specialize qw/aom_variance8x32  neon neon_dotprod sse2/;
1447    specialize qw/aom_variance32x8  neon neon_dotprod sse2 avx2/;
1448    specialize qw/aom_variance16x64 neon neon_dotprod sse2 avx2/;
1449    specialize qw/aom_variance64x16 neon neon_dotprod sse2 avx2/;
1450
1451    specialize qw/aom_sub_pixel_variance4x16 neon ssse3/;
1452    specialize qw/aom_sub_pixel_variance16x4 neon avx2 ssse3/;
1453    specialize qw/aom_sub_pixel_variance8x32 neon ssse3/;
1454    specialize qw/aom_sub_pixel_variance32x8 neon ssse3/;
1455    specialize qw/aom_sub_pixel_variance16x64 neon avx2 ssse3/;
1456    specialize qw/aom_sub_pixel_variance64x16 neon ssse3/;
1457    specialize qw/aom_sub_pixel_avg_variance4x16 neon ssse3/;
1458    specialize qw/aom_sub_pixel_avg_variance16x4 neon ssse3/;
1459    specialize qw/aom_sub_pixel_avg_variance8x32 neon ssse3/;
1460    specialize qw/aom_sub_pixel_avg_variance32x8 neon ssse3/;
1461    specialize qw/aom_sub_pixel_avg_variance16x64 neon ssse3/;
1462    specialize qw/aom_sub_pixel_avg_variance64x16 neon ssse3/;
1463
1464    specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x16  neon ssse3/;
1465    specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x4  neon ssse3/;
1466    specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x32  neon ssse3/;
1467    specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x8  neon ssse3/;
1468    specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x64 neon ssse3/;
1469    specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x16 neon ssse3/;
1470  }
1471
1472  specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x64 neon ssse3/;
1473  specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x32 neon ssse3/;
1474  specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x64 neon ssse3/;
1475  specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x32 neon ssse3/;
1476  specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x16 neon ssse3/;
1477  specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x32 neon ssse3/;
1478  specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x16 neon ssse3/;
1479  specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x8  neon ssse3/;
1480  specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x16  neon ssse3/;
1481  specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x8   neon ssse3/;
1482  specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x4   neon ssse3/;
1483  specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x8   neon ssse3/;
1484  specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x4   neon ssse3/;
1485
1486  specialize qw/aom_dist_wtd_sub_pixel_avg_variance128x128  neon ssse3/;
1487  specialize qw/aom_dist_wtd_sub_pixel_avg_variance128x64   neon ssse3/;
1488  specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x128   neon ssse3/;
1489
1490  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1491    foreach $bd (8, 10, 12) {
1492      foreach (@encoder_block_sizes) {
1493        ($w, $h) = @$_;
1494        add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1495        add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1496        add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1497        add_proto qw/uint32_t/, "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param";
1498      }
1499    }
1500
1501    specialize qw/aom_highbd_12_variance128x128 sse2 neon sve/;
1502    specialize qw/aom_highbd_12_variance128x64  sse2 neon sve/;
1503    specialize qw/aom_highbd_12_variance64x128  sse2 neon sve/;
1504    specialize qw/aom_highbd_12_variance64x64   sse2 neon sve/;
1505    specialize qw/aom_highbd_12_variance64x32   sse2 neon sve/;
1506    specialize qw/aom_highbd_12_variance32x64   sse2 neon sve/;
1507    specialize qw/aom_highbd_12_variance32x32   sse2 neon sve/;
1508    specialize qw/aom_highbd_12_variance32x16   sse2 neon sve/;
1509    specialize qw/aom_highbd_12_variance16x32   sse2 neon sve/;
1510    specialize qw/aom_highbd_12_variance16x16   sse2 neon sve/;
1511    specialize qw/aom_highbd_12_variance16x8    sse2 neon sve/;
1512    specialize qw/aom_highbd_12_variance8x16    sse2 neon sve/;
1513    specialize qw/aom_highbd_12_variance8x8     sse2 neon sve/;
1514    specialize qw/aom_highbd_12_variance8x4          neon sve/;
1515    specialize qw/aom_highbd_12_variance4x8          neon sve/;
1516    specialize qw/aom_highbd_12_variance4x4   sse4_1 neon sve/;
1517
1518    specialize qw/aom_highbd_10_variance128x128 sse2 avx2 neon sve/;
1519    specialize qw/aom_highbd_10_variance128x64  sse2 avx2 neon sve/;
1520    specialize qw/aom_highbd_10_variance64x128  sse2 avx2 neon sve/;
1521    specialize qw/aom_highbd_10_variance64x64   sse2 avx2 neon sve/;
1522    specialize qw/aom_highbd_10_variance64x32   sse2 avx2 neon sve/;
1523    specialize qw/aom_highbd_10_variance32x64   sse2 avx2 neon sve/;
1524    specialize qw/aom_highbd_10_variance32x32   sse2 avx2 neon sve/;
1525    specialize qw/aom_highbd_10_variance32x16   sse2 avx2 neon sve/;
1526    specialize qw/aom_highbd_10_variance16x32   sse2 avx2 neon sve/;
1527    specialize qw/aom_highbd_10_variance16x16   sse2 avx2 neon sve/;
1528    specialize qw/aom_highbd_10_variance16x8    sse2 avx2 neon sve/;
1529    specialize qw/aom_highbd_10_variance8x16    sse2 avx2 neon sve/;
1530    specialize qw/aom_highbd_10_variance8x8     sse2 avx2 neon sve/;
1531    specialize qw/aom_highbd_10_variance8x4               neon sve/;
1532    specialize qw/aom_highbd_10_variance4x8               neon sve/;
1533    specialize qw/aom_highbd_10_variance4x4   sse4_1      neon sve/;
1534
1535    specialize qw/aom_highbd_8_variance128x128 sse2 neon sve/;
1536    specialize qw/aom_highbd_8_variance128x64  sse2 neon sve/;
1537    specialize qw/aom_highbd_8_variance64x128  sse2 neon sve/;
1538    specialize qw/aom_highbd_8_variance64x64   sse2 neon sve/;
1539    specialize qw/aom_highbd_8_variance64x32   sse2 neon sve/;
1540    specialize qw/aom_highbd_8_variance32x64   sse2 neon sve/;
1541    specialize qw/aom_highbd_8_variance32x32   sse2 neon sve/;
1542    specialize qw/aom_highbd_8_variance32x16   sse2 neon sve/;
1543    specialize qw/aom_highbd_8_variance16x32   sse2 neon sve/;
1544    specialize qw/aom_highbd_8_variance16x16   sse2 neon sve/;
1545    specialize qw/aom_highbd_8_variance16x8    sse2 neon sve/;
1546    specialize qw/aom_highbd_8_variance8x16    sse2 neon sve/;
1547    specialize qw/aom_highbd_8_variance8x8     sse2 neon sve/;
1548    specialize qw/aom_highbd_8_variance8x4          neon sve/;
1549    specialize qw/aom_highbd_8_variance4x8          neon sve/;
1550    specialize qw/aom_highbd_8_variance4x4   sse4_1 neon sve/;
1551
1552    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1553      foreach $bd (8, 10, 12) {
1554        my $avx2 = ($bd == 10) ? "avx2" : "";
1555        specialize "aom_highbd_${bd}_variance64x16" , $avx2, qw/sse2 neon sve/;
1556        specialize "aom_highbd_${bd}_variance32x8" , $avx2, qw/sse2 neon sve/;
1557        specialize "aom_highbd_${bd}_variance16x64" , $avx2, qw/sse2 neon sve/;
1558        specialize "aom_highbd_${bd}_variance16x4" , qw/neon sve/;
1559        specialize "aom_highbd_${bd}_variance8x32" , $avx2, qw/sse2 neon sve/;
1560        specialize "aom_highbd_${bd}_variance4x16" , qw/neon sve/;
1561      }
1562    }
1563
1564    specialize qw/aom_highbd_12_sub_pixel_variance128x128 sse2 neon/;
1565    specialize qw/aom_highbd_12_sub_pixel_variance128x64  sse2 neon/;
1566    specialize qw/aom_highbd_12_sub_pixel_variance64x128  sse2 neon/;
1567    specialize qw/aom_highbd_12_sub_pixel_variance64x64   sse2 neon/;
1568    specialize qw/aom_highbd_12_sub_pixel_variance64x32   sse2 neon/;
1569    specialize qw/aom_highbd_12_sub_pixel_variance32x64   sse2 neon/;
1570    specialize qw/aom_highbd_12_sub_pixel_variance32x32   sse2 neon/;
1571    specialize qw/aom_highbd_12_sub_pixel_variance32x16   sse2 neon/;
1572    specialize qw/aom_highbd_12_sub_pixel_variance16x32   sse2 neon/;
1573    specialize qw/aom_highbd_12_sub_pixel_variance16x16   sse2 neon/;
1574    specialize qw/aom_highbd_12_sub_pixel_variance16x8    sse2 neon/;
1575    specialize qw/aom_highbd_12_sub_pixel_variance8x16    sse2 neon/;
1576    specialize qw/aom_highbd_12_sub_pixel_variance8x8     sse2 neon/;
1577    specialize qw/aom_highbd_12_sub_pixel_variance8x4     sse2 neon/;
1578    specialize qw/aom_highbd_12_sub_pixel_variance4x8          neon/;
1579    specialize qw/aom_highbd_12_sub_pixel_variance4x4   sse4_1 neon/;
1580
1581    specialize qw/aom_highbd_10_sub_pixel_variance128x128 sse2 avx2 neon/;
1582    specialize qw/aom_highbd_10_sub_pixel_variance128x64  sse2 avx2 neon/;
1583    specialize qw/aom_highbd_10_sub_pixel_variance64x128  sse2 avx2 neon/;
1584    specialize qw/aom_highbd_10_sub_pixel_variance64x64   sse2 avx2 neon/;
1585    specialize qw/aom_highbd_10_sub_pixel_variance64x32   sse2 avx2 neon/;
1586    specialize qw/aom_highbd_10_sub_pixel_variance32x64   sse2 avx2 neon/;
1587    specialize qw/aom_highbd_10_sub_pixel_variance32x32   sse2 avx2 neon/;
1588    specialize qw/aom_highbd_10_sub_pixel_variance32x16   sse2 avx2 neon/;
1589    specialize qw/aom_highbd_10_sub_pixel_variance16x32   sse2 avx2 neon/;
1590    specialize qw/aom_highbd_10_sub_pixel_variance16x16   sse2 avx2 neon/;
1591    specialize qw/aom_highbd_10_sub_pixel_variance16x8    sse2 avx2 neon/;
1592    specialize qw/aom_highbd_10_sub_pixel_variance8x16    sse2 avx2 neon/;
1593    specialize qw/aom_highbd_10_sub_pixel_variance8x8     sse2 avx2 neon/;
1594    specialize qw/aom_highbd_10_sub_pixel_variance8x4     sse2      neon/;
1595    specialize qw/aom_highbd_10_sub_pixel_variance4x8               neon/;
1596    specialize qw/aom_highbd_10_sub_pixel_variance4x4   sse4_1      neon/;
1597
1598    specialize qw/aom_highbd_8_sub_pixel_variance128x128 sse2 neon/;
1599    specialize qw/aom_highbd_8_sub_pixel_variance128x64  sse2 neon/;
1600    specialize qw/aom_highbd_8_sub_pixel_variance64x128  sse2 neon/;
1601    specialize qw/aom_highbd_8_sub_pixel_variance64x64   sse2 neon/;
1602    specialize qw/aom_highbd_8_sub_pixel_variance64x32   sse2 neon/;
1603    specialize qw/aom_highbd_8_sub_pixel_variance32x64   sse2 neon/;
1604    specialize qw/aom_highbd_8_sub_pixel_variance32x32   sse2 neon/;
1605    specialize qw/aom_highbd_8_sub_pixel_variance32x16   sse2 neon/;
1606    specialize qw/aom_highbd_8_sub_pixel_variance16x32   sse2 neon/;
1607    specialize qw/aom_highbd_8_sub_pixel_variance16x16   sse2 neon/;
1608    specialize qw/aom_highbd_8_sub_pixel_variance16x8    sse2 neon/;
1609    specialize qw/aom_highbd_8_sub_pixel_variance8x16    sse2 neon/;
1610    specialize qw/aom_highbd_8_sub_pixel_variance8x8     sse2 neon/;
1611    specialize qw/aom_highbd_8_sub_pixel_variance8x4     sse2 neon/;
1612    specialize qw/aom_highbd_8_sub_pixel_variance4x8          neon/;
1613    specialize qw/aom_highbd_8_sub_pixel_variance4x4   sse4_1 neon/;
1614
1615    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1616      foreach $bd (8, 10, 12) {
1617        specialize "aom_highbd_${bd}_sub_pixel_variance64x16" , qw/sse2 neon/;
1618        specialize "aom_highbd_${bd}_sub_pixel_variance32x8" , qw/sse2 neon/;
1619        specialize "aom_highbd_${bd}_sub_pixel_variance16x64" , qw/sse2 neon/;
1620        specialize "aom_highbd_${bd}_sub_pixel_variance16x4" , qw/sse2 neon/;
1621        specialize "aom_highbd_${bd}_sub_pixel_variance8x32" , qw/sse2 neon/;
1622        specialize "aom_highbd_${bd}_sub_pixel_variance4x16" , qw/neon/;
1623      }
1624    }
1625
1626    specialize qw/aom_highbd_12_sub_pixel_avg_variance128x128      neon/;
1627    specialize qw/aom_highbd_12_sub_pixel_avg_variance128x64       neon/;
1628    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x128       neon/;
1629    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64   sse2 neon/;
1630    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32   sse2 neon/;
1631    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64   sse2 neon/;
1632    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32   sse2 neon/;
1633    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16   sse2 neon/;
1634    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32   sse2 neon/;
1635    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16   sse2 neon/;
1636    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8    sse2 neon/;
1637    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16    sse2 neon/;
1638    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8     sse2 neon/;
1639    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4     sse2 neon/;
1640    specialize qw/aom_highbd_12_sub_pixel_avg_variance4x8          neon/;
1641    specialize qw/aom_highbd_12_sub_pixel_avg_variance4x4   sse4_1 neon/;
1642
1643    specialize qw/aom_highbd_10_sub_pixel_avg_variance128x128      neon/;
1644    specialize qw/aom_highbd_10_sub_pixel_avg_variance128x64       neon/;
1645    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x128       neon/;
1646    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64   sse2 neon/;
1647    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32   sse2 neon/;
1648    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64   sse2 neon/;
1649    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32   sse2 neon/;
1650    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16   sse2 neon/;
1651    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32   sse2 neon/;
1652    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16   sse2 neon/;
1653    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8    sse2 neon/;
1654    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16    sse2 neon/;
1655    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8     sse2 neon/;
1656    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4     sse2 neon/;
1657    specialize qw/aom_highbd_10_sub_pixel_avg_variance4x8          neon/;
1658    specialize qw/aom_highbd_10_sub_pixel_avg_variance4x4   sse4_1 neon/;
1659
1660    specialize qw/aom_highbd_8_sub_pixel_avg_variance128x128      neon/;
1661    specialize qw/aom_highbd_8_sub_pixel_avg_variance128x64       neon/;
1662    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x128       neon/;
1663    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64   sse2 neon/;
1664    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32   sse2 neon/;
1665    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64   sse2 neon/;
1666    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32   sse2 neon/;
1667    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16   sse2 neon/;
1668    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32   sse2 neon/;
1669    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16   sse2 neon/;
1670    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8    sse2 neon/;
1671    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16    sse2 neon/;
1672    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8     sse2 neon/;
1673    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4     sse2 neon/;
1674    specialize qw/aom_highbd_8_sub_pixel_avg_variance4x8          neon/;
1675    specialize qw/aom_highbd_8_sub_pixel_avg_variance4x4   sse4_1 neon/;
1676
1677    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1678      foreach $bd (8, 10, 12) {
1679        specialize "aom_highbd_${bd}_sub_pixel_avg_variance64x16" , qw/sse2 neon/;
1680        specialize "aom_highbd_${bd}_sub_pixel_avg_variance32x8" , qw/sse2 neon/;
1681        specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x64" , qw/sse2 neon/;
1682        specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x4" , qw/sse2 neon/;
1683        specialize "aom_highbd_${bd}_sub_pixel_avg_variance8x32" , qw/sse2 neon/;
1684        specialize "aom_highbd_${bd}_sub_pixel_avg_variance4x16" , qw/neon/;
1685      }
1686    }
1687
1688    foreach $bd (8, 10, 12) {
1689      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance128x128", qw/neon/;
1690      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance128x64" , qw/neon/;
1691      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x128" , qw/neon/;
1692      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x64"  , qw/neon/;
1693      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x32"  , qw/neon/;
1694      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x64"  , qw/neon/;
1695      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x32"  , qw/neon/;
1696      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x16"  , qw/neon/;
1697      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x32"  , qw/neon/;
1698      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x16"  , qw/neon/;
1699      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x8"   , qw/neon/;
1700      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x16"   , qw/neon/;
1701      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x8"    , qw/neon/;
1702      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x4"    , qw/neon/;
1703      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x8"    , qw/neon/;
1704      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x4"    , qw/neon/;
1705    }
1706
1707    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1708      foreach $bd (8, 10, 12) {
1709        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x16", qw/neon/;
1710        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x8" , qw/neon/;
1711        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x64", qw/neon/;
1712        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x4" , qw/neon/;
1713        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x32" , qw/neon/;
1714        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x16" , qw/neon/;
1715      }
1716    }
1717  }
1718  #
1719  # Masked Variance / Masked Subpixel Variance
1720  #
1721  foreach (@encoder_block_sizes) {
1722    ($w, $h) = @$_;
1723    add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1724    specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/;
1725  }
1726
1727  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1728    foreach $bd ("_8_", "_10_", "_12_") {
1729      foreach (@encoder_block_sizes) {
1730        ($w, $h) = @$_;
1731        add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1732        specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/;
1733      }
1734    }
1735  }
1736
1737  #
1738  # OBMC Variance / OBMC Subpixel Variance
1739  #
1740  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1741    foreach (@encoder_block_sizes) {
1742      ($w, $h) = @$_;
1743      add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1744      add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1745      specialize "aom_obmc_variance${w}x${h}", qw/sse4_1 avx2 neon/;
1746      specialize "aom_obmc_sub_pixel_variance${w}x${h}", qw/sse4_1 neon/;
1747    }
1748
1749    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1750      foreach $bd ("_8_", "_10_", "_12_") {
1751        foreach (@encoder_block_sizes) {
1752          ($w, $h) = @$_;
1753          add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1754          add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1755          specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1 neon/;
1756          specialize "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", qw/neon/;
1757        }
1758      }
1759    }
1760  }
1761
1762  #
1763  # Comp Avg
1764  #
1765  add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
1766  specialize qw/aom_comp_avg_pred avx2 neon/;
1767
1768  add_proto qw/void aom_dist_wtd_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param";
1769  specialize qw/aom_dist_wtd_comp_avg_pred ssse3 neon/;
1770
1771  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1772    add_proto qw/void aom_highbd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
1773    specialize qw/aom_highbd_comp_avg_pred neon/;
1774
1775    add_proto qw/void aom_highbd_dist_wtd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param";
1776    specialize qw/aom_highbd_dist_wtd_comp_avg_pred sse2 neon/;
1777
1778    add_proto qw/uint64_t/, "aom_mse_wxh_16bit_highbd", "uint16_t *dst, int dstride,uint16_t *src, int sstride, int w, int h";
1779    specialize qw/aom_mse_wxh_16bit_highbd   sse2 avx2 neon sve/;
1780  }
1781
1782  add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
1783  specialize qw/aom_comp_mask_pred ssse3 avx2 neon/;
1784
1785  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1786    add_proto qw/void aom_highbd_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
1787    specialize qw/aom_highbd_comp_mask_pred sse2 avx2 neon/;
1788  }
1789
1790  # Flow estimation library
1791  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1792    add_proto qw/bool aom_compute_mean_stddev/, "const unsigned char *frame, int stride, int x, int y, double *mean, double *one_over_stddev";
1793    specialize qw/aom_compute_mean_stddev sse4_1 avx2/;
1794
1795    add_proto qw/double aom_compute_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, double mean1, double one_over_stddev1, const unsigned char *frame2, int stride2, int x2, int y2, double mean2, double one_over_stddev2";
1796    specialize qw/aom_compute_correlation sse4_1 avx2/;
1797
1798    add_proto qw/void aom_compute_flow_at_point/, "const uint8_t *src, const uint8_t *ref, int x, int y, int width, int height, int stride, double *u, double *v";
1799    specialize qw/aom_compute_flow_at_point sse4_1 avx2 neon sve/;
1800  }
1801
1802}  # CONFIG_AV1_ENCODER
1803
18041;
1805