1## 2## Copyright (c) 2017, Alliance for Open Media. All rights reserved. 3## 4## This source code is subject to the terms of the BSD 2 Clause License and 5## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6## was not distributed with this source code in the LICENSE file, you can 7## obtain it at www.aomedia.org/license/software. If the Alliance for Open 8## Media Patent License 1.0 was not distributed with this source code in the 9## PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10## 11sub aom_dsp_forward_decls() { 12print <<EOF 13/* 14 * DSP 15 */ 16 17#include "aom/aom_integer.h" 18#include "aom_dsp/aom_dsp_common.h" 19#include "av1/common/blockd.h" 20#include "av1/common/enums.h" 21 22EOF 23} 24forward_decls qw/aom_dsp_forward_decls/; 25 26# optimizations which depend on multiple features 27$avx2_ssse3 = ''; 28if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) { 29 $avx2_ssse3 = 'avx2'; 30} 31 32# functions that are 64 bit only. 33$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = ''; 34if ($opts{arch} eq "x86_64") { 35 $mmx_x86_64 = 'mmx'; 36 $sse2_x86_64 = 'sse2'; 37 $ssse3_x86_64 = 'ssse3'; 38 $avx_x86_64 = 'avx'; 39 $avx2_x86_64 = 'avx2'; 40} 41 42@block_widths = (4, 8, 16, 32, 64, 128); 43 44@encoder_block_sizes = (); 45foreach $w (@block_widths) { 46 foreach $h (@block_widths) { 47 push @encoder_block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w); 48 } 49} 50 51if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 52 push @encoder_block_sizes, [4, 16]; 53 push @encoder_block_sizes, [16, 4]; 54 push @encoder_block_sizes, [8, 32]; 55 push @encoder_block_sizes, [32, 8]; 56 push @encoder_block_sizes, [16, 64]; 57 push @encoder_block_sizes, [64, 16]; 58} 59 60@tx_dims = (4, 8, 16, 32, 64); 61@tx_sizes = (); 62foreach $w (@tx_dims) { 63 push @tx_sizes, [$w, $w]; 64 foreach $h (@tx_dims) { 65 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w)); 66 if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || 67 (aom_config("CONFIG_AV1_DECODER") eq "yes")) { 68 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w)); 69 } # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 70 } 71} 72 73@pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/; 74 75# 76# Intra prediction 77# 78 79foreach (@tx_sizes) { 80 ($w, $h) = @$_; 81 foreach $pred_name (@pred_names) { 82 add_proto "void", "aom_${pred_name}_predictor_${w}x${h}", 83 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 84 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 85 add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}", 86 "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; 87 } 88 } 89} 90 91specialize qw/aom_dc_top_predictor_4x4 neon sse2/; 92specialize qw/aom_dc_top_predictor_4x8 neon sse2/; 93specialize qw/aom_dc_top_predictor_8x4 neon sse2/; 94specialize qw/aom_dc_top_predictor_8x8 neon sse2/; 95specialize qw/aom_dc_top_predictor_8x16 neon sse2/; 96specialize qw/aom_dc_top_predictor_16x8 neon sse2/; 97specialize qw/aom_dc_top_predictor_16x16 neon sse2/; 98specialize qw/aom_dc_top_predictor_16x32 neon sse2/; 99specialize qw/aom_dc_top_predictor_32x16 neon sse2 avx2/; 100specialize qw/aom_dc_top_predictor_32x32 neon sse2 avx2/; 101specialize qw/aom_dc_top_predictor_32x64 neon sse2 avx2/; 102specialize qw/aom_dc_top_predictor_64x32 neon sse2 avx2/; 103specialize qw/aom_dc_top_predictor_64x64 neon sse2 avx2/; 104 105specialize qw/aom_dc_left_predictor_4x4 neon sse2/; 106specialize qw/aom_dc_left_predictor_4x8 neon sse2/; 107specialize qw/aom_dc_left_predictor_8x4 neon sse2/; 108specialize qw/aom_dc_left_predictor_8x8 neon sse2/; 109specialize qw/aom_dc_left_predictor_8x16 neon sse2/; 110specialize qw/aom_dc_left_predictor_16x8 neon sse2/; 111specialize qw/aom_dc_left_predictor_16x16 neon sse2/; 112specialize qw/aom_dc_left_predictor_16x32 neon sse2/; 113specialize qw/aom_dc_left_predictor_32x16 neon sse2 avx2/; 114specialize qw/aom_dc_left_predictor_32x32 neon sse2 avx2/; 115specialize qw/aom_dc_left_predictor_32x64 neon sse2 avx2/; 116specialize qw/aom_dc_left_predictor_64x32 neon sse2 avx2/; 117specialize qw/aom_dc_left_predictor_64x64 neon sse2 avx2/; 118 119specialize qw/aom_dc_128_predictor_4x4 neon sse2/; 120specialize qw/aom_dc_128_predictor_4x8 neon sse2/; 121specialize qw/aom_dc_128_predictor_8x4 neon sse2/; 122specialize qw/aom_dc_128_predictor_8x8 neon sse2/; 123specialize qw/aom_dc_128_predictor_8x16 neon sse2/; 124specialize qw/aom_dc_128_predictor_16x8 neon sse2/; 125specialize qw/aom_dc_128_predictor_16x16 neon sse2/; 126specialize qw/aom_dc_128_predictor_16x32 neon sse2/; 127specialize qw/aom_dc_128_predictor_32x16 neon sse2 avx2/; 128specialize qw/aom_dc_128_predictor_32x32 neon sse2 avx2/; 129specialize qw/aom_dc_128_predictor_32x64 neon sse2 avx2/; 130specialize qw/aom_dc_128_predictor_64x32 neon sse2 avx2/; 131specialize qw/aom_dc_128_predictor_64x64 neon sse2 avx2/; 132 133specialize qw/aom_v_predictor_4x4 neon sse2/; 134specialize qw/aom_v_predictor_4x8 neon sse2/; 135specialize qw/aom_v_predictor_8x4 neon sse2/; 136specialize qw/aom_v_predictor_8x8 neon sse2/; 137specialize qw/aom_v_predictor_8x16 neon sse2/; 138specialize qw/aom_v_predictor_16x8 neon sse2/; 139specialize qw/aom_v_predictor_16x16 neon sse2/; 140specialize qw/aom_v_predictor_16x32 neon sse2/; 141specialize qw/aom_v_predictor_32x16 neon sse2 avx2/; 142specialize qw/aom_v_predictor_32x32 neon sse2 avx2/; 143specialize qw/aom_v_predictor_32x64 neon sse2 avx2/; 144specialize qw/aom_v_predictor_64x32 neon sse2 avx2/; 145specialize qw/aom_v_predictor_64x64 neon sse2 avx2/; 146 147specialize qw/aom_h_predictor_4x4 neon sse2/; 148specialize qw/aom_h_predictor_4x8 neon sse2/; 149specialize qw/aom_h_predictor_8x4 neon sse2/; 150specialize qw/aom_h_predictor_8x8 neon sse2/; 151specialize qw/aom_h_predictor_8x16 neon sse2/; 152specialize qw/aom_h_predictor_16x8 neon sse2/; 153specialize qw/aom_h_predictor_16x16 neon sse2/; 154specialize qw/aom_h_predictor_16x32 neon sse2/; 155specialize qw/aom_h_predictor_32x16 neon sse2/; 156specialize qw/aom_h_predictor_32x32 neon sse2 avx2/; 157specialize qw/aom_h_predictor_32x64 neon sse2/; 158specialize qw/aom_h_predictor_64x32 neon sse2/; 159specialize qw/aom_h_predictor_64x64 neon sse2/; 160 161specialize qw/aom_paeth_predictor_4x4 ssse3 neon/; 162specialize qw/aom_paeth_predictor_4x8 ssse3 neon/; 163specialize qw/aom_paeth_predictor_8x4 ssse3 neon/; 164specialize qw/aom_paeth_predictor_8x8 ssse3 neon/; 165specialize qw/aom_paeth_predictor_8x16 ssse3 neon/; 166specialize qw/aom_paeth_predictor_16x8 ssse3 avx2 neon/; 167specialize qw/aom_paeth_predictor_16x16 ssse3 avx2 neon/; 168specialize qw/aom_paeth_predictor_16x32 ssse3 avx2 neon/; 169specialize qw/aom_paeth_predictor_32x16 ssse3 avx2 neon/; 170specialize qw/aom_paeth_predictor_32x32 ssse3 avx2 neon/; 171specialize qw/aom_paeth_predictor_32x64 ssse3 avx2 neon/; 172specialize qw/aom_paeth_predictor_64x32 ssse3 avx2 neon/; 173specialize qw/aom_paeth_predictor_64x64 ssse3 avx2 neon/; 174 175specialize qw/aom_smooth_predictor_4x4 neon ssse3/; 176specialize qw/aom_smooth_predictor_4x8 neon ssse3/; 177specialize qw/aom_smooth_predictor_8x4 neon ssse3/; 178specialize qw/aom_smooth_predictor_8x8 neon ssse3/; 179specialize qw/aom_smooth_predictor_8x16 neon ssse3/; 180specialize qw/aom_smooth_predictor_16x8 neon ssse3/; 181specialize qw/aom_smooth_predictor_16x16 neon ssse3/; 182specialize qw/aom_smooth_predictor_16x32 neon ssse3/; 183specialize qw/aom_smooth_predictor_32x16 neon ssse3/; 184specialize qw/aom_smooth_predictor_32x32 neon ssse3/; 185specialize qw/aom_smooth_predictor_32x64 neon ssse3/; 186specialize qw/aom_smooth_predictor_64x32 neon ssse3/; 187specialize qw/aom_smooth_predictor_64x64 neon ssse3/; 188 189specialize qw/aom_smooth_v_predictor_4x4 neon ssse3/; 190specialize qw/aom_smooth_v_predictor_4x8 neon ssse3/; 191specialize qw/aom_smooth_v_predictor_8x4 neon ssse3/; 192specialize qw/aom_smooth_v_predictor_8x8 neon ssse3/; 193specialize qw/aom_smooth_v_predictor_8x16 neon ssse3/; 194specialize qw/aom_smooth_v_predictor_16x8 neon ssse3/; 195specialize qw/aom_smooth_v_predictor_16x16 neon ssse3/; 196specialize qw/aom_smooth_v_predictor_16x32 neon ssse3/; 197specialize qw/aom_smooth_v_predictor_32x16 neon ssse3/; 198specialize qw/aom_smooth_v_predictor_32x32 neon ssse3/; 199specialize qw/aom_smooth_v_predictor_32x64 neon ssse3/; 200specialize qw/aom_smooth_v_predictor_64x32 neon ssse3/; 201specialize qw/aom_smooth_v_predictor_64x64 neon ssse3/; 202 203specialize qw/aom_smooth_h_predictor_4x4 neon ssse3/; 204specialize qw/aom_smooth_h_predictor_4x8 neon ssse3/; 205specialize qw/aom_smooth_h_predictor_8x4 neon ssse3/; 206specialize qw/aom_smooth_h_predictor_8x8 neon ssse3/; 207specialize qw/aom_smooth_h_predictor_8x16 neon ssse3/; 208specialize qw/aom_smooth_h_predictor_16x8 neon ssse3/; 209specialize qw/aom_smooth_h_predictor_16x16 neon ssse3/; 210specialize qw/aom_smooth_h_predictor_16x32 neon ssse3/; 211specialize qw/aom_smooth_h_predictor_32x16 neon ssse3/; 212specialize qw/aom_smooth_h_predictor_32x32 neon ssse3/; 213specialize qw/aom_smooth_h_predictor_32x64 neon ssse3/; 214specialize qw/aom_smooth_h_predictor_64x32 neon ssse3/; 215specialize qw/aom_smooth_h_predictor_64x64 neon ssse3/; 216 217# TODO(yunqingwang): optimize rectangular DC_PRED to replace division 218# by multiply and shift. 219specialize qw/aom_dc_predictor_4x4 neon sse2/; 220specialize qw/aom_dc_predictor_4x8 neon sse2/; 221specialize qw/aom_dc_predictor_8x4 neon sse2/; 222specialize qw/aom_dc_predictor_8x8 neon sse2/; 223specialize qw/aom_dc_predictor_8x16 neon sse2/; 224specialize qw/aom_dc_predictor_16x8 neon sse2/; 225specialize qw/aom_dc_predictor_16x16 neon sse2/; 226specialize qw/aom_dc_predictor_16x32 neon sse2/; 227specialize qw/aom_dc_predictor_32x16 neon sse2 avx2/; 228specialize qw/aom_dc_predictor_32x32 neon sse2 avx2/; 229specialize qw/aom_dc_predictor_32x64 neon sse2 avx2/; 230specialize qw/aom_dc_predictor_64x64 neon sse2 avx2/; 231specialize qw/aom_dc_predictor_64x32 neon sse2 avx2/; 232 233 234if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || (aom_config("CONFIG_AV1_DECODER") eq "yes")) { 235 specialize qw/aom_dc_top_predictor_4x16 neon sse2/; 236 specialize qw/aom_dc_top_predictor_8x32 neon sse2/; 237 specialize qw/aom_dc_top_predictor_16x4 neon sse2/; 238 specialize qw/aom_dc_top_predictor_16x64 neon sse2/; 239 specialize qw/aom_dc_top_predictor_32x8 neon sse2/; 240 specialize qw/aom_dc_top_predictor_64x16 neon sse2 avx2/; 241 242 specialize qw/aom_dc_left_predictor_4x16 neon sse2/; 243 specialize qw/aom_dc_left_predictor_8x32 neon sse2/; 244 specialize qw/aom_dc_left_predictor_16x4 neon sse2/; 245 specialize qw/aom_dc_left_predictor_16x64 neon sse2/; 246 specialize qw/aom_dc_left_predictor_32x8 neon sse2/; 247 specialize qw/aom_dc_left_predictor_64x16 neon sse2 avx2/; 248 249 specialize qw/aom_dc_128_predictor_4x16 neon sse2/; 250 specialize qw/aom_dc_128_predictor_8x32 neon sse2/; 251 specialize qw/aom_dc_128_predictor_16x4 neon sse2/; 252 specialize qw/aom_dc_128_predictor_16x64 neon sse2/; 253 specialize qw/aom_dc_128_predictor_32x8 neon sse2/; 254 specialize qw/aom_dc_128_predictor_64x16 neon sse2 avx2/; 255 256 specialize qw/aom_v_predictor_4x16 neon sse2/; 257 specialize qw/aom_v_predictor_8x32 neon sse2/; 258 specialize qw/aom_v_predictor_16x4 neon sse2/; 259 specialize qw/aom_v_predictor_16x64 neon sse2/; 260 specialize qw/aom_v_predictor_32x8 neon sse2/; 261 specialize qw/aom_v_predictor_64x16 neon sse2 avx2/; 262 263 specialize qw/aom_h_predictor_4x16 neon sse2/; 264 specialize qw/aom_h_predictor_8x32 neon sse2/; 265 specialize qw/aom_h_predictor_16x4 neon sse2/; 266 specialize qw/aom_h_predictor_16x64 neon sse2/; 267 specialize qw/aom_h_predictor_32x8 neon sse2/; 268 specialize qw/aom_h_predictor_64x16 neon sse2/; 269 270 specialize qw/aom_paeth_predictor_4x16 ssse3 neon/; 271 specialize qw/aom_paeth_predictor_8x32 ssse3 neon/; 272 specialize qw/aom_paeth_predictor_16x4 ssse3 neon/; 273 specialize qw/aom_paeth_predictor_16x64 ssse3 avx2 neon/; 274 specialize qw/aom_paeth_predictor_32x8 ssse3 neon/; 275 specialize qw/aom_paeth_predictor_64x16 ssse3 avx2 neon/; 276 277 specialize qw/aom_smooth_predictor_4x16 neon ssse3/; 278 specialize qw/aom_smooth_predictor_8x32 neon ssse3/; 279 specialize qw/aom_smooth_predictor_16x4 neon ssse3/; 280 specialize qw/aom_smooth_predictor_16x64 neon ssse3/; 281 specialize qw/aom_smooth_predictor_32x8 neon ssse3/; 282 specialize qw/aom_smooth_predictor_64x16 neon ssse3/; 283 284 specialize qw/aom_smooth_v_predictor_4x16 neon ssse3/; 285 specialize qw/aom_smooth_v_predictor_8x32 neon ssse3/; 286 specialize qw/aom_smooth_v_predictor_16x4 neon ssse3/; 287 specialize qw/aom_smooth_v_predictor_16x64 neon ssse3/; 288 specialize qw/aom_smooth_v_predictor_32x8 neon ssse3/; 289 specialize qw/aom_smooth_v_predictor_64x16 neon ssse3/; 290 291 specialize qw/aom_smooth_h_predictor_4x16 neon ssse3/; 292 specialize qw/aom_smooth_h_predictor_8x32 neon ssse3/; 293 specialize qw/aom_smooth_h_predictor_16x4 neon ssse3/; 294 specialize qw/aom_smooth_h_predictor_16x64 neon ssse3/; 295 specialize qw/aom_smooth_h_predictor_32x8 neon ssse3/; 296 specialize qw/aom_smooth_h_predictor_64x16 neon ssse3/; 297 298 specialize qw/aom_dc_predictor_4x16 neon sse2/; 299 specialize qw/aom_dc_predictor_8x32 neon sse2/; 300 specialize qw/aom_dc_predictor_16x4 neon sse2/; 301 specialize qw/aom_dc_predictor_16x64 neon sse2/; 302 specialize qw/aom_dc_predictor_32x8 neon sse2/; 303 specialize qw/aom_dc_predictor_64x16 neon sse2 avx2/; 304} # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 305 306if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 307 specialize qw/aom_highbd_v_predictor_4x4 sse2 neon/; 308 specialize qw/aom_highbd_v_predictor_4x8 sse2 neon/; 309 specialize qw/aom_highbd_v_predictor_8x4 sse2 neon/; 310 specialize qw/aom_highbd_v_predictor_8x8 sse2 neon/; 311 specialize qw/aom_highbd_v_predictor_8x16 sse2 neon/; 312 specialize qw/aom_highbd_v_predictor_16x8 sse2 neon/; 313 specialize qw/aom_highbd_v_predictor_16x16 sse2 neon/; 314 specialize qw/aom_highbd_v_predictor_16x32 sse2 neon/; 315 specialize qw/aom_highbd_v_predictor_32x16 sse2 neon/; 316 specialize qw/aom_highbd_v_predictor_32x32 sse2 neon/; 317 specialize qw/aom_highbd_v_predictor_32x64 neon/; 318 specialize qw/aom_highbd_v_predictor_64x32 neon/; 319 specialize qw/aom_highbd_v_predictor_64x64 neon/; 320 321 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division 322 # by multiply and shift. 323 specialize qw/aom_highbd_dc_predictor_4x4 sse2 neon/; 324 specialize qw/aom_highbd_dc_predictor_4x8 sse2 neon/; 325 specialize qw/aom_highbd_dc_predictor_8x4 sse2 neon/; 326 specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/; 327 specialize qw/aom_highbd_dc_predictor_8x16 sse2 neon/; 328 specialize qw/aom_highbd_dc_predictor_16x8 sse2 neon/; 329 specialize qw/aom_highbd_dc_predictor_16x16 sse2 neon/; 330 specialize qw/aom_highbd_dc_predictor_16x32 sse2 neon/; 331 specialize qw/aom_highbd_dc_predictor_32x16 sse2 neon/; 332 specialize qw/aom_highbd_dc_predictor_32x32 sse2 neon/; 333 specialize qw/aom_highbd_dc_predictor_32x64 neon/; 334 specialize qw/aom_highbd_dc_predictor_64x32 neon/; 335 specialize qw/aom_highbd_dc_predictor_64x64 neon/; 336 337 specialize qw/aom_highbd_h_predictor_4x4 sse2 neon/; 338 specialize qw/aom_highbd_h_predictor_4x8 sse2 neon/; 339 specialize qw/aom_highbd_h_predictor_8x4 sse2 neon/; 340 specialize qw/aom_highbd_h_predictor_8x8 sse2 neon/; 341 specialize qw/aom_highbd_h_predictor_8x16 sse2 neon/; 342 specialize qw/aom_highbd_h_predictor_16x8 sse2 neon/; 343 specialize qw/aom_highbd_h_predictor_16x16 sse2 neon/; 344 specialize qw/aom_highbd_h_predictor_16x32 sse2 neon/; 345 specialize qw/aom_highbd_h_predictor_32x16 sse2 neon/; 346 specialize qw/aom_highbd_h_predictor_32x32 sse2 neon/; 347 specialize qw/aom_highbd_h_predictor_32x64 neon/; 348 specialize qw/aom_highbd_h_predictor_64x32 neon/; 349 specialize qw/aom_highbd_h_predictor_64x64 neon/; 350 351 specialize qw/aom_highbd_dc_128_predictor_4x4 sse2 neon/; 352 specialize qw/aom_highbd_dc_128_predictor_4x8 sse2 neon/; 353 specialize qw/aom_highbd_dc_128_predictor_8x4 sse2 neon/; 354 specialize qw/aom_highbd_dc_128_predictor_8x8 sse2 neon/; 355 specialize qw/aom_highbd_dc_128_predictor_8x16 sse2 neon/; 356 specialize qw/aom_highbd_dc_128_predictor_16x8 sse2 neon/; 357 specialize qw/aom_highbd_dc_128_predictor_16x16 sse2 neon/; 358 specialize qw/aom_highbd_dc_128_predictor_16x32 sse2 neon/; 359 specialize qw/aom_highbd_dc_128_predictor_32x16 sse2 neon/; 360 specialize qw/aom_highbd_dc_128_predictor_32x32 sse2 neon/; 361 specialize qw/aom_highbd_dc_128_predictor_32x64 neon/; 362 specialize qw/aom_highbd_dc_128_predictor_64x32 neon/; 363 specialize qw/aom_highbd_dc_128_predictor_64x64 neon/; 364 365 specialize qw/aom_highbd_dc_left_predictor_4x4 sse2 neon/; 366 specialize qw/aom_highbd_dc_left_predictor_4x8 sse2 neon/; 367 specialize qw/aom_highbd_dc_left_predictor_8x4 sse2 neon/; 368 specialize qw/aom_highbd_dc_left_predictor_8x8 sse2 neon/; 369 specialize qw/aom_highbd_dc_left_predictor_8x16 sse2 neon/; 370 specialize qw/aom_highbd_dc_left_predictor_16x8 sse2 neon/; 371 specialize qw/aom_highbd_dc_left_predictor_16x16 sse2 neon/; 372 specialize qw/aom_highbd_dc_left_predictor_16x32 sse2 neon/; 373 specialize qw/aom_highbd_dc_left_predictor_32x16 sse2 neon/; 374 specialize qw/aom_highbd_dc_left_predictor_32x32 sse2 neon/; 375 specialize qw/aom_highbd_dc_left_predictor_32x64 neon/; 376 specialize qw/aom_highbd_dc_left_predictor_64x32 neon/; 377 specialize qw/aom_highbd_dc_left_predictor_64x64 neon/; 378 379 specialize qw/aom_highbd_dc_top_predictor_4x4 sse2 neon/; 380 specialize qw/aom_highbd_dc_top_predictor_4x8 sse2 neon/; 381 specialize qw/aom_highbd_dc_top_predictor_8x4 sse2 neon/; 382 specialize qw/aom_highbd_dc_top_predictor_8x8 sse2 neon/; 383 specialize qw/aom_highbd_dc_top_predictor_8x16 sse2 neon/; 384 specialize qw/aom_highbd_dc_top_predictor_16x8 sse2 neon/; 385 specialize qw/aom_highbd_dc_top_predictor_16x16 sse2 neon/; 386 specialize qw/aom_highbd_dc_top_predictor_16x32 sse2 neon/; 387 specialize qw/aom_highbd_dc_top_predictor_32x16 sse2 neon/; 388 specialize qw/aom_highbd_dc_top_predictor_32x32 sse2 neon/; 389 specialize qw/aom_highbd_dc_top_predictor_32x64 neon/; 390 specialize qw/aom_highbd_dc_top_predictor_64x32 neon/; 391 specialize qw/aom_highbd_dc_top_predictor_64x64 neon/; 392 393 specialize qw/aom_highbd_paeth_predictor_4x4 neon/; 394 specialize qw/aom_highbd_paeth_predictor_4x8 neon/; 395 specialize qw/aom_highbd_paeth_predictor_8x4 neon/; 396 specialize qw/aom_highbd_paeth_predictor_8x8 neon/; 397 specialize qw/aom_highbd_paeth_predictor_8x16 neon/; 398 specialize qw/aom_highbd_paeth_predictor_16x8 neon/; 399 specialize qw/aom_highbd_paeth_predictor_16x16 neon/; 400 specialize qw/aom_highbd_paeth_predictor_16x32 neon/; 401 specialize qw/aom_highbd_paeth_predictor_32x16 neon/; 402 specialize qw/aom_highbd_paeth_predictor_32x32 neon/; 403 specialize qw/aom_highbd_paeth_predictor_32x64 neon/; 404 specialize qw/aom_highbd_paeth_predictor_64x32 neon/; 405 specialize qw/aom_highbd_paeth_predictor_64x64 neon/; 406 407 specialize qw/aom_highbd_smooth_predictor_4x4 neon/; 408 specialize qw/aom_highbd_smooth_predictor_4x8 neon/; 409 specialize qw/aom_highbd_smooth_predictor_8x4 neon/; 410 specialize qw/aom_highbd_smooth_predictor_8x8 neon/; 411 specialize qw/aom_highbd_smooth_predictor_8x16 neon/; 412 specialize qw/aom_highbd_smooth_predictor_16x8 neon/; 413 specialize qw/aom_highbd_smooth_predictor_16x16 neon/; 414 specialize qw/aom_highbd_smooth_predictor_16x32 neon/; 415 specialize qw/aom_highbd_smooth_predictor_32x16 neon/; 416 specialize qw/aom_highbd_smooth_predictor_32x32 neon/; 417 specialize qw/aom_highbd_smooth_predictor_32x64 neon/; 418 specialize qw/aom_highbd_smooth_predictor_64x32 neon/; 419 specialize qw/aom_highbd_smooth_predictor_64x64 neon/; 420 421 specialize qw/aom_highbd_smooth_v_predictor_4x4 neon/; 422 specialize qw/aom_highbd_smooth_v_predictor_4x8 neon/; 423 specialize qw/aom_highbd_smooth_v_predictor_8x4 neon/; 424 specialize qw/aom_highbd_smooth_v_predictor_8x8 neon/; 425 specialize qw/aom_highbd_smooth_v_predictor_8x16 neon/; 426 specialize qw/aom_highbd_smooth_v_predictor_16x8 neon/; 427 specialize qw/aom_highbd_smooth_v_predictor_16x16 neon/; 428 specialize qw/aom_highbd_smooth_v_predictor_16x32 neon/; 429 specialize qw/aom_highbd_smooth_v_predictor_32x16 neon/; 430 specialize qw/aom_highbd_smooth_v_predictor_32x32 neon/; 431 specialize qw/aom_highbd_smooth_v_predictor_32x64 neon/; 432 specialize qw/aom_highbd_smooth_v_predictor_64x32 neon/; 433 specialize qw/aom_highbd_smooth_v_predictor_64x64 neon/; 434 specialize qw/aom_highbd_smooth_h_predictor_4x4 neon/; 435 specialize qw/aom_highbd_smooth_h_predictor_4x8 neon/; 436 437 specialize qw/aom_highbd_smooth_h_predictor_8x4 neon/; 438 specialize qw/aom_highbd_smooth_h_predictor_8x8 neon/; 439 specialize qw/aom_highbd_smooth_h_predictor_8x16 neon/; 440 specialize qw/aom_highbd_smooth_h_predictor_16x8 neon/; 441 specialize qw/aom_highbd_smooth_h_predictor_16x16 neon/; 442 specialize qw/aom_highbd_smooth_h_predictor_16x32 neon/; 443 specialize qw/aom_highbd_smooth_h_predictor_32x16 neon/; 444 specialize qw/aom_highbd_smooth_h_predictor_32x32 neon/; 445 specialize qw/aom_highbd_smooth_h_predictor_32x64 neon/; 446 specialize qw/aom_highbd_smooth_h_predictor_64x32 neon/; 447 specialize qw/aom_highbd_smooth_h_predictor_64x64 neon/; 448 449 if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || 450 (aom_config("CONFIG_AV1_DECODER") eq "yes")) { 451 specialize qw/aom_highbd_v_predictor_4x16 neon/; 452 specialize qw/aom_highbd_v_predictor_8x32 neon/; 453 specialize qw/aom_highbd_v_predictor_16x4 neon/; 454 specialize qw/aom_highbd_v_predictor_16x64 neon/; 455 specialize qw/aom_highbd_v_predictor_32x8 neon/; 456 specialize qw/aom_highbd_v_predictor_64x16 neon/; 457 458 specialize qw/aom_highbd_dc_predictor_4x16 neon/; 459 specialize qw/aom_highbd_dc_predictor_8x32 neon/; 460 specialize qw/aom_highbd_dc_predictor_16x4 neon/; 461 specialize qw/aom_highbd_dc_predictor_16x64 neon/; 462 specialize qw/aom_highbd_dc_predictor_32x8 neon/; 463 specialize qw/aom_highbd_dc_predictor_64x16 neon/; 464 465 specialize qw/aom_highbd_h_predictor_4x16 neon/; 466 specialize qw/aom_highbd_h_predictor_8x32 neon/; 467 specialize qw/aom_highbd_h_predictor_16x4 neon/; 468 specialize qw/aom_highbd_h_predictor_16x64 neon/; 469 specialize qw/aom_highbd_h_predictor_32x8 neon/; 470 specialize qw/aom_highbd_h_predictor_64x16 neon/; 471 472 specialize qw/aom_highbd_dc_128_predictor_4x16 neon/; 473 specialize qw/aom_highbd_dc_128_predictor_8x32 neon/; 474 specialize qw/aom_highbd_dc_128_predictor_16x4 neon/; 475 specialize qw/aom_highbd_dc_128_predictor_16x64 neon/; 476 specialize qw/aom_highbd_dc_128_predictor_32x8 neon/; 477 specialize qw/aom_highbd_dc_128_predictor_64x16 neon/; 478 479 specialize qw/aom_highbd_dc_left_predictor_4x16 neon/; 480 specialize qw/aom_highbd_dc_left_predictor_8x32 neon/; 481 specialize qw/aom_highbd_dc_left_predictor_16x4 neon/; 482 specialize qw/aom_highbd_dc_left_predictor_16x64 neon/; 483 specialize qw/aom_highbd_dc_left_predictor_32x8 neon/; 484 specialize qw/aom_highbd_dc_left_predictor_64x16 neon/; 485 486 specialize qw/aom_highbd_dc_top_predictor_4x16 neon/; 487 specialize qw/aom_highbd_dc_top_predictor_8x32 neon/; 488 specialize qw/aom_highbd_dc_top_predictor_16x4 neon/; 489 specialize qw/aom_highbd_dc_top_predictor_16x64 neon/; 490 specialize qw/aom_highbd_dc_top_predictor_32x8 neon/; 491 specialize qw/aom_highbd_dc_top_predictor_64x16 neon/; 492 493 specialize qw/aom_highbd_paeth_predictor_4x16 neon/; 494 specialize qw/aom_highbd_paeth_predictor_8x32 neon/; 495 specialize qw/aom_highbd_paeth_predictor_16x4 neon/; 496 specialize qw/aom_highbd_paeth_predictor_16x64 neon/; 497 specialize qw/aom_highbd_paeth_predictor_32x8 neon/; 498 specialize qw/aom_highbd_paeth_predictor_64x16 neon/; 499 500 specialize qw/aom_highbd_smooth_predictor_4x16 neon/; 501 specialize qw/aom_highbd_smooth_predictor_8x32 neon/; 502 specialize qw/aom_highbd_smooth_predictor_16x4 neon/; 503 specialize qw/aom_highbd_smooth_predictor_16x64 neon/; 504 specialize qw/aom_highbd_smooth_predictor_32x8 neon/; 505 specialize qw/aom_highbd_smooth_predictor_64x16 neon/; 506 507 specialize qw/aom_highbd_smooth_v_predictor_4x16 neon/; 508 specialize qw/aom_highbd_smooth_v_predictor_8x32 neon/; 509 specialize qw/aom_highbd_smooth_v_predictor_16x4 neon/; 510 specialize qw/aom_highbd_smooth_v_predictor_16x64 neon/; 511 specialize qw/aom_highbd_smooth_v_predictor_32x8 neon/; 512 specialize qw/aom_highbd_smooth_v_predictor_64x16 neon/; 513 514 specialize qw/aom_highbd_smooth_h_predictor_4x16 neon/; 515 specialize qw/aom_highbd_smooth_h_predictor_8x32 neon/; 516 specialize qw/aom_highbd_smooth_h_predictor_16x4 neon/; 517 specialize qw/aom_highbd_smooth_h_predictor_16x64 neon/; 518 specialize qw/aom_highbd_smooth_h_predictor_32x8 neon/; 519 specialize qw/aom_highbd_smooth_h_predictor_64x16 neon/; 520 } # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 521} 522# 523# Sub Pixel Filters 524# 525add_proto qw/void aom_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h"; 526add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 527add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 528 529specialize qw/aom_convolve_copy neon sse2 avx2/; 530specialize qw/aom_convolve8_horiz neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3"; 531specialize qw/aom_convolve8_vert neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3"; 532 533add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; 534specialize qw/aom_scaled_2d ssse3 neon neon_dotprod neon_i8mm/; 535 536if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 537 add_proto qw/void aom_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, int w, int h"; 538 specialize qw/aom_highbd_convolve_copy sse2 avx2 neon/; 539 540 add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd"; 541 specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon sve/; 542 543 add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd"; 544 specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon sve/; 545} 546 547# 548# Loopfilter 549# 550add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 551specialize qw/aom_lpf_vertical_14 sse2 neon/; 552 553add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 554specialize qw/aom_lpf_vertical_14_dual sse2 neon/; 555 556add_proto qw/void aom_lpf_vertical_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 557specialize qw/aom_lpf_vertical_14_quad avx2 sse2 neon/; 558 559add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 560specialize qw/aom_lpf_vertical_6 sse2 neon/; 561 562add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 563specialize qw/aom_lpf_vertical_8 sse2 neon/; 564 565add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 566specialize qw/aom_lpf_vertical_8_dual sse2 neon/; 567 568add_proto qw/void aom_lpf_vertical_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 569specialize qw/aom_lpf_vertical_8_quad sse2 neon/; 570 571add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 572specialize qw/aom_lpf_vertical_4 sse2 neon/; 573 574add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 575specialize qw/aom_lpf_vertical_4_dual sse2 neon/; 576 577add_proto qw/void aom_lpf_vertical_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 578specialize qw/aom_lpf_vertical_4_quad sse2 neon/; 579 580add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 581specialize qw/aom_lpf_horizontal_14 sse2 neon/; 582 583add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 584specialize qw/aom_lpf_horizontal_14_dual sse2 neon/; 585 586add_proto qw/void aom_lpf_horizontal_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 587specialize qw/aom_lpf_horizontal_14_quad sse2 avx2 neon/; 588 589add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 590specialize qw/aom_lpf_horizontal_6 sse2 neon/; 591 592add_proto qw/void aom_lpf_horizontal_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 593specialize qw/aom_lpf_horizontal_6_dual sse2 neon/; 594 595add_proto qw/void aom_lpf_horizontal_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 596specialize qw/aom_lpf_horizontal_6_quad sse2 avx2 neon/; 597 598add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 599specialize qw/aom_lpf_horizontal_8 sse2 neon/; 600 601add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 602specialize qw/aom_lpf_horizontal_8_dual sse2 neon/; 603 604add_proto qw/void aom_lpf_horizontal_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 605specialize qw/aom_lpf_horizontal_8_quad sse2 avx2 neon/; 606 607add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 608specialize qw/aom_lpf_horizontal_4 sse2 neon/; 609 610add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 611specialize qw/aom_lpf_horizontal_4_dual sse2 neon/; 612 613add_proto qw/void aom_lpf_horizontal_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 614specialize qw/aom_lpf_horizontal_4_quad sse2 neon/; 615 616add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 617specialize qw/aom_lpf_vertical_6_dual sse2 neon/; 618 619add_proto qw/void aom_lpf_vertical_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 620specialize qw/aom_lpf_vertical_6_quad sse2 neon/; 621 622if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 623 add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 624 specialize qw/aom_highbd_lpf_vertical_14 neon sse2/; 625 626 add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 627 specialize qw/aom_highbd_lpf_vertical_14_dual neon sse2 avx2/; 628 629 add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 630 specialize qw/aom_highbd_lpf_vertical_8 neon sse2/; 631 632 add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 633 specialize qw/aom_highbd_lpf_vertical_8_dual neon sse2 avx2/; 634 635 add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 636 specialize qw/aom_highbd_lpf_vertical_6 neon sse2/; 637 638 add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 639 specialize qw/aom_highbd_lpf_vertical_6_dual neon sse2/; 640 641 add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 642 specialize qw/aom_highbd_lpf_vertical_4 neon sse2/; 643 644 add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 645 specialize qw/aom_highbd_lpf_vertical_4_dual neon sse2 avx2/; 646 647 add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 648 specialize qw/aom_highbd_lpf_horizontal_14 neon sse2/; 649 650 add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd"; 651 specialize qw/aom_highbd_lpf_horizontal_14_dual neon sse2 avx2/; 652 653 add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 654 specialize qw/aom_highbd_lpf_horizontal_6 neon sse2/; 655 656 add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 657 specialize qw/aom_highbd_lpf_horizontal_6_dual neon sse2/; 658 659 add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 660 specialize qw/aom_highbd_lpf_horizontal_8 neon sse2/; 661 662 add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 663 specialize qw/aom_highbd_lpf_horizontal_8_dual neon sse2 avx2/; 664 665 add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 666 specialize qw/aom_highbd_lpf_horizontal_4 neon sse2/; 667 668 add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 669 specialize qw/aom_highbd_lpf_horizontal_4_dual neon sse2 avx2/; 670} 671 672# 673# Encoder functions. 674# 675 676# 677# Forward transform 678# 679if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){ 680 add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; 681 specialize qw/aom_fdct4x4 neon sse2/; 682 683 add_proto qw/void aom_fdct4x4_lp/, "const int16_t *input, int16_t *output, int stride"; 684 specialize qw/aom_fdct4x4_lp neon sse2/; 685 686 if (aom_config("CONFIG_INTERNAL_STATS") eq "yes"){ 687 # 8x8 DCT transform for psnr-hvs. Unlike other transforms isn't compatible 688 # with av1 scan orders, because it does two transposes. 689 add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; 690 specialize qw/aom_fdct8x8 neon sse2/, "$ssse3_x86_64"; 691 # High bit depth 692 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 693 add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; 694 specialize qw/aom_highbd_fdct8x8 sse2/; 695 } 696 } 697 # FFT/IFFT (float) only used for denoising (and noise power spectral density estimation) 698 add_proto qw/void aom_fft2x2_float/, "const float *input, float *temp, float *output"; 699 700 add_proto qw/void aom_fft4x4_float/, "const float *input, float *temp, float *output"; 701 specialize qw/aom_fft4x4_float sse2/; 702 703 add_proto qw/void aom_fft8x8_float/, "const float *input, float *temp, float *output"; 704 specialize qw/aom_fft8x8_float avx2 sse2/; 705 706 add_proto qw/void aom_fft16x16_float/, "const float *input, float *temp, float *output"; 707 specialize qw/aom_fft16x16_float avx2 sse2/; 708 709 add_proto qw/void aom_fft32x32_float/, "const float *input, float *temp, float *output"; 710 specialize qw/aom_fft32x32_float avx2 sse2/; 711 712 add_proto qw/void aom_ifft2x2_float/, "const float *input, float *temp, float *output"; 713 714 add_proto qw/void aom_ifft4x4_float/, "const float *input, float *temp, float *output"; 715 specialize qw/aom_ifft4x4_float sse2/; 716 717 add_proto qw/void aom_ifft8x8_float/, "const float *input, float *temp, float *output"; 718 specialize qw/aom_ifft8x8_float avx2 sse2/; 719 720 add_proto qw/void aom_ifft16x16_float/, "const float *input, float *temp, float *output"; 721 specialize qw/aom_ifft16x16_float avx2 sse2/; 722 723 add_proto qw/void aom_ifft32x32_float/, "const float *input, float *temp, float *output"; 724 specialize qw/aom_ifft32x32_float avx2 sse2/; 725} # CONFIG_AV1_ENCODER 726 727# 728# Quantization 729# 730if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 731 add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 732 specialize qw/aom_quantize_b sse2 neon avx avx2/, "$ssse3_x86_64"; 733 734 add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 735 specialize qw/aom_quantize_b_32x32 neon avx avx2/, "$ssse3_x86_64"; 736 737 add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 738 specialize qw/aom_quantize_b_64x64 neon ssse3 avx2/; 739 740 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 741 add_proto qw/void aom_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 742 specialize qw/aom_quantize_b_adaptive sse2 avx2/; 743 744 add_proto qw/void aom_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 745 specialize qw/aom_quantize_b_32x32_adaptive sse2/; 746 747 add_proto qw/void aom_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 748 specialize qw/aom_quantize_b_64x64_adaptive sse2/; 749 } 750} # CONFIG_AV1_ENCODER 751 752if (aom_config("CONFIG_AV1_ENCODER") eq "yes" && aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 753 add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 754 specialize qw/aom_highbd_quantize_b sse2 avx2 neon/; 755 756 add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 757 specialize qw/aom_highbd_quantize_b_32x32 sse2 avx2 neon/; 758 759 add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 760 specialize qw/aom_highbd_quantize_b_64x64 sse2 avx2 neon/; 761 762 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 763 add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 764 specialize qw/aom_highbd_quantize_b_adaptive sse2 avx2 neon/; 765 766 add_proto qw/void aom_highbd_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 767 specialize qw/aom_highbd_quantize_b_32x32_adaptive sse2 avx2 neon/; 768 769 add_proto qw/void aom_highbd_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 770 specialize qw/aom_highbd_quantize_b_64x64_adaptive sse2 neon/; 771 } 772} # CONFIG_AV1_ENCODER 773 774# 775# Alpha blending with mask 776# 777add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params"; 778specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1 avx2 neon/; 779add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh"; 780add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h"; 781add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h"; 782specialize "aom_blend_a64_mask", qw/sse4_1 neon avx2/; 783specialize "aom_blend_a64_hmask", qw/sse4_1 neon/; 784specialize "aom_blend_a64_vmask", qw/sse4_1 neon/; 785 786if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 787 add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, int bd"; 788 add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd"; 789 add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd"; 790 add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params, const int bd"; 791 specialize "aom_highbd_blend_a64_mask", qw/sse4_1 neon/; 792 specialize "aom_highbd_blend_a64_hmask", qw/sse4_1 neon/; 793 specialize "aom_highbd_blend_a64_vmask", qw/sse4_1 neon/; 794 specialize "aom_highbd_blend_a64_d16_mask", qw/sse4_1 neon avx2/; 795} 796 797if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 798 # 799 # Block subtraction 800 # 801 add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; 802 specialize qw/aom_subtract_block neon sse2 avx2/; 803 804 add_proto qw/int64_t/, "aom_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height"; 805 specialize qw/aom_sse sse4_1 avx2 neon neon_dotprod/; 806 807 add_proto qw/void/, "aom_get_blk_sse_sum", "const int16_t *data, int stride, int bw, int bh, int *x_sum, int64_t *x2_sum"; 808 specialize qw/aom_get_blk_sse_sum sse2 avx2 neon sve/; 809 810 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 811 add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; 812 specialize qw/aom_highbd_subtract_block sse2 neon/; 813 814 add_proto qw/int64_t/, "aom_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height"; 815 specialize qw/aom_highbd_sse sse4_1 avx2 neon sve/; 816 } 817 818 # 819 # Sum of Squares 820 # 821 add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height"; 822 specialize qw/aom_sum_squares_2d_i16 sse2 avx2 neon sve/; 823 824 add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N"; 825 specialize qw/aom_sum_squares_i16 sse2 neon sve/; 826 827 add_proto qw/uint64_t aom_var_2d_u8/, "uint8_t *src, int src_stride, int width, int height"; 828 specialize qw/aom_var_2d_u8 sse2 avx2 neon neon_dotprod/; 829 830 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 831 add_proto qw/uint64_t aom_var_2d_u16/, "uint8_t *src, int src_stride, int width, int height"; 832 specialize qw/aom_var_2d_u16 sse2 avx2 neon sve/; 833 } 834 835 # 836 # Single block SAD / Single block Avg SAD 837 # 838 foreach (@encoder_block_sizes) { 839 ($w, $h) = @$_; 840 add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 841 add_proto qw/unsigned int/, "aom_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 842 add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; 843 add_proto qw/unsigned int/, "aom_dist_wtd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param"; 844 } 845 846 add_proto qw/uint64_t aom_sum_sse_2d_i16/, "const int16_t *src, int src_stride, int width, int height, int *sum"; 847 specialize qw/aom_sum_sse_2d_i16 avx2 neon sse2 sve/; 848 specialize qw/aom_sad128x128 avx2 sse2 neon neon_dotprod/; 849 specialize qw/aom_sad128x64 avx2 sse2 neon neon_dotprod/; 850 specialize qw/aom_sad64x128 avx2 sse2 neon neon_dotprod/; 851 specialize qw/aom_sad64x64 avx2 sse2 neon neon_dotprod/; 852 specialize qw/aom_sad64x32 avx2 sse2 neon neon_dotprod/; 853 specialize qw/aom_sad32x64 avx2 sse2 neon neon_dotprod/; 854 specialize qw/aom_sad32x32 avx2 sse2 neon neon_dotprod/; 855 specialize qw/aom_sad32x16 avx2 sse2 neon neon_dotprod/; 856 specialize qw/aom_sad16x32 sse2 neon neon_dotprod/; 857 specialize qw/aom_sad16x16 sse2 neon neon_dotprod/; 858 specialize qw/aom_sad16x8 sse2 neon neon_dotprod/; 859 specialize qw/aom_sad8x16 sse2 neon/; 860 specialize qw/aom_sad8x8 sse2 neon/; 861 specialize qw/aom_sad8x4 sse2 neon/; 862 specialize qw/aom_sad4x8 sse2 neon/; 863 specialize qw/aom_sad4x4 sse2 neon/; 864 865 specialize qw/aom_sad4x16 sse2 neon/; 866 specialize qw/aom_sad16x4 sse2 neon neon_dotprod/; 867 specialize qw/aom_sad8x32 sse2 neon/; 868 specialize qw/aom_sad32x8 sse2 neon neon_dotprod/; 869 specialize qw/aom_sad16x64 sse2 neon neon_dotprod/; 870 specialize qw/aom_sad64x16 sse2 neon neon_dotprod/; 871 872 specialize qw/aom_sad_skip_128x128 avx2 sse2 neon neon_dotprod/; 873 specialize qw/aom_sad_skip_128x64 avx2 sse2 neon neon_dotprod/; 874 specialize qw/aom_sad_skip_64x128 avx2 sse2 neon neon_dotprod/; 875 specialize qw/aom_sad_skip_64x64 avx2 sse2 neon neon_dotprod/; 876 specialize qw/aom_sad_skip_64x32 avx2 sse2 neon neon_dotprod/; 877 specialize qw/aom_sad_skip_32x64 avx2 sse2 neon neon_dotprod/; 878 specialize qw/aom_sad_skip_32x32 avx2 sse2 neon neon_dotprod/; 879 specialize qw/aom_sad_skip_32x16 avx2 sse2 neon neon_dotprod/; 880 specialize qw/aom_sad_skip_16x32 sse2 neon neon_dotprod/; 881 specialize qw/aom_sad_skip_16x16 sse2 neon neon_dotprod/; 882 specialize qw/aom_sad_skip_16x8 sse2 neon neon_dotprod/; 883 specialize qw/aom_sad_skip_8x16 sse2 neon/; 884 specialize qw/aom_sad_skip_8x8 sse2 neon/; 885 specialize qw/aom_sad_skip_8x4 neon/; 886 specialize qw/aom_sad_skip_4x8 sse2 neon/; 887 specialize qw/aom_sad_skip_4x4 neon/; 888 889 specialize qw/aom_sad_skip_4x16 sse2 neon/; 890 specialize qw/aom_sad_skip_16x4 neon neon_dotprod/; 891 specialize qw/aom_sad_skip_8x32 sse2 neon/; 892 specialize qw/aom_sad_skip_32x8 sse2 neon neon_dotprod/; 893 specialize qw/aom_sad_skip_16x64 sse2 neon neon_dotprod/; 894 specialize qw/aom_sad_skip_64x16 sse2 neon neon_dotprod/; 895 896 specialize qw/aom_sad128x128_avg avx2 sse2 neon neon_dotprod/; 897 specialize qw/aom_sad128x64_avg avx2 sse2 neon neon_dotprod/; 898 specialize qw/aom_sad64x128_avg avx2 sse2 neon neon_dotprod/; 899 specialize qw/aom_sad64x64_avg avx2 sse2 neon neon_dotprod/; 900 specialize qw/aom_sad64x32_avg avx2 sse2 neon neon_dotprod/; 901 specialize qw/aom_sad32x64_avg avx2 sse2 neon neon_dotprod/; 902 specialize qw/aom_sad32x32_avg avx2 sse2 neon neon_dotprod/; 903 specialize qw/aom_sad32x16_avg avx2 sse2 neon neon_dotprod/; 904 specialize qw/aom_sad16x32_avg sse2 neon neon_dotprod/; 905 specialize qw/aom_sad16x16_avg sse2 neon neon_dotprod/; 906 specialize qw/aom_sad16x8_avg sse2 neon neon_dotprod/; 907 specialize qw/aom_sad8x16_avg sse2 neon/; 908 specialize qw/aom_sad8x8_avg sse2 neon/; 909 specialize qw/aom_sad8x4_avg sse2 neon/; 910 specialize qw/aom_sad4x8_avg sse2 neon/; 911 specialize qw/aom_sad4x4_avg sse2 neon/; 912 913 specialize qw/aom_sad4x16_avg sse2 neon/; 914 specialize qw/aom_sad16x4_avg sse2 neon neon_dotprod/; 915 specialize qw/aom_sad8x32_avg sse2 neon/; 916 specialize qw/aom_sad32x8_avg sse2 neon neon_dotprod/; 917 specialize qw/aom_sad16x64_avg sse2 neon neon_dotprod/; 918 specialize qw/aom_sad64x16_avg sse2 neon neon_dotprod/; 919 920 specialize qw/aom_dist_wtd_sad128x128_avg sse2 neon neon_dotprod/; 921 specialize qw/aom_dist_wtd_sad128x64_avg sse2 neon neon_dotprod/; 922 specialize qw/aom_dist_wtd_sad64x128_avg sse2 neon neon_dotprod/; 923 specialize qw/aom_dist_wtd_sad64x64_avg sse2 neon neon_dotprod/; 924 specialize qw/aom_dist_wtd_sad64x32_avg sse2 neon neon_dotprod/; 925 specialize qw/aom_dist_wtd_sad32x64_avg sse2 neon neon_dotprod/; 926 specialize qw/aom_dist_wtd_sad32x32_avg sse2 neon neon_dotprod/; 927 specialize qw/aom_dist_wtd_sad32x16_avg sse2 neon neon_dotprod/; 928 specialize qw/aom_dist_wtd_sad16x32_avg sse2 neon neon_dotprod/; 929 specialize qw/aom_dist_wtd_sad16x16_avg sse2 neon neon_dotprod/; 930 specialize qw/aom_dist_wtd_sad16x8_avg sse2 neon neon_dotprod/; 931 specialize qw/aom_dist_wtd_sad8x16_avg sse2 neon/; 932 specialize qw/aom_dist_wtd_sad8x8_avg sse2 neon/; 933 specialize qw/aom_dist_wtd_sad8x4_avg sse2 neon/; 934 specialize qw/aom_dist_wtd_sad4x8_avg sse2 neon/; 935 specialize qw/aom_dist_wtd_sad4x4_avg sse2 neon/; 936 937 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 938 specialize qw/aom_dist_wtd_sad4x16_avg sse2 neon/; 939 specialize qw/aom_dist_wtd_sad16x4_avg sse2 neon neon_dotprod/; 940 specialize qw/aom_dist_wtd_sad8x32_avg sse2 neon/; 941 specialize qw/aom_dist_wtd_sad32x8_avg sse2 neon neon_dotprod/; 942 specialize qw/aom_dist_wtd_sad16x64_avg sse2 neon neon_dotprod/; 943 specialize qw/aom_dist_wtd_sad64x16_avg sse2 neon neon_dotprod/; 944 } 945 946 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 947 foreach (@encoder_block_sizes) { 948 ($w, $h) = @$_; 949 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 950 add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 951 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; 952 if ($w != 128 && $h != 128 && $w != 4) { 953 specialize "aom_highbd_sad${w}x${h}", qw/sse2/; 954 specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/; 955 } 956 add_proto qw/unsigned int/, "aom_highbd_dist_wtd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param"; 957 } 958 specialize qw/aom_highbd_sad128x128 avx2 neon/; 959 specialize qw/aom_highbd_sad128x64 avx2 neon/; 960 specialize qw/aom_highbd_sad64x128 avx2 neon/; 961 specialize qw/aom_highbd_sad64x64 avx2 sse2 neon/; 962 specialize qw/aom_highbd_sad64x32 avx2 sse2 neon/; 963 specialize qw/aom_highbd_sad32x64 avx2 sse2 neon/; 964 specialize qw/aom_highbd_sad32x32 avx2 sse2 neon/; 965 specialize qw/aom_highbd_sad32x16 avx2 sse2 neon/; 966 specialize qw/aom_highbd_sad16x32 avx2 sse2 neon/; 967 specialize qw/aom_highbd_sad16x16 avx2 sse2 neon/; 968 specialize qw/aom_highbd_sad16x8 avx2 sse2 neon/; 969 specialize qw/aom_highbd_sad8x16 sse2 neon/; 970 specialize qw/aom_highbd_sad8x8 sse2 neon/; 971 specialize qw/aom_highbd_sad8x4 sse2 neon/; 972 specialize qw/aom_highbd_sad4x8 sse2 neon/; 973 specialize qw/aom_highbd_sad4x4 sse2 neon/; 974 975 specialize qw/aom_highbd_sad4x16 sse2 neon/; 976 specialize qw/aom_highbd_sad16x4 avx2 sse2 neon/; 977 specialize qw/aom_highbd_sad8x32 sse2 neon/; 978 specialize qw/aom_highbd_sad32x8 avx2 sse2 neon/; 979 specialize qw/aom_highbd_sad16x64 avx2 sse2 neon/; 980 specialize qw/aom_highbd_sad64x16 avx2 sse2 neon/; 981 982 specialize qw/aom_highbd_sad_skip_128x128 avx2 neon/; 983 specialize qw/aom_highbd_sad_skip_128x64 avx2 neon/; 984 specialize qw/aom_highbd_sad_skip_64x128 avx2 neon/; 985 specialize qw/aom_highbd_sad_skip_64x64 avx2 sse2 neon/; 986 specialize qw/aom_highbd_sad_skip_64x32 avx2 sse2 neon/; 987 specialize qw/aom_highbd_sad_skip_32x64 avx2 sse2 neon/; 988 specialize qw/aom_highbd_sad_skip_32x32 avx2 sse2 neon/; 989 specialize qw/aom_highbd_sad_skip_32x16 avx2 sse2 neon/; 990 specialize qw/aom_highbd_sad_skip_16x32 avx2 sse2 neon/; 991 specialize qw/aom_highbd_sad_skip_16x16 avx2 sse2 neon/; 992 specialize qw/aom_highbd_sad_skip_16x8 avx2 sse2 neon/; 993 specialize qw/aom_highbd_sad_skip_16x4 neon/; 994 specialize qw/aom_highbd_sad_skip_8x16 sse2 neon/; 995 specialize qw/aom_highbd_sad_skip_8x4 neon/; 996 specialize qw/aom_highbd_sad_skip_8x8 sse2 neon/; 997 specialize qw/aom_highbd_sad_skip_4x8 sse2 neon/; 998 specialize qw/aom_highbd_sad_skip_4x4 neon/; 999 1000 specialize qw/aom_highbd_sad_skip_4x16 sse2 neon/; 1001 specialize qw/aom_highbd_sad_skip_8x32 sse2 neon/; 1002 specialize qw/aom_highbd_sad_skip_32x8 avx2 sse2 neon/; 1003 specialize qw/aom_highbd_sad_skip_16x64 avx2 sse2 neon/; 1004 specialize qw/aom_highbd_sad_skip_64x16 avx2 sse2 neon/; 1005 1006 specialize qw/aom_highbd_sad128x128_avg avx2 neon/; 1007 specialize qw/aom_highbd_sad128x64_avg avx2 neon/; 1008 specialize qw/aom_highbd_sad64x128_avg avx2 neon/; 1009 specialize qw/aom_highbd_sad64x64_avg avx2 sse2 neon/; 1010 specialize qw/aom_highbd_sad64x32_avg avx2 sse2 neon/; 1011 specialize qw/aom_highbd_sad32x64_avg avx2 sse2 neon/; 1012 specialize qw/aom_highbd_sad32x32_avg avx2 sse2 neon/; 1013 specialize qw/aom_highbd_sad32x16_avg avx2 sse2 neon/; 1014 specialize qw/aom_highbd_sad16x32_avg avx2 sse2 neon/; 1015 specialize qw/aom_highbd_sad16x16_avg avx2 sse2 neon/; 1016 specialize qw/aom_highbd_sad16x8_avg avx2 sse2 neon/; 1017 specialize qw/aom_highbd_sad8x16_avg neon/; 1018 specialize qw/aom_highbd_sad8x8_avg neon/; 1019 specialize qw/aom_highbd_sad8x4_avg sse2 neon/; 1020 specialize qw/aom_highbd_sad4x8_avg sse2 neon/; 1021 specialize qw/aom_highbd_sad4x4_avg sse2 neon/; 1022 1023 specialize qw/aom_highbd_sad4x16_avg sse2 neon/; 1024 specialize qw/aom_highbd_sad8x32_avg sse2 neon/; 1025 specialize qw/aom_highbd_sad16x4_avg avx2 sse2 neon/; 1026 specialize qw/aom_highbd_sad16x64_avg avx2 sse2 neon/; 1027 specialize qw/aom_highbd_sad32x8_avg avx2 sse2 neon/; 1028 specialize qw/aom_highbd_sad64x16_avg avx2 sse2 neon/; 1029 } 1030 # 1031 # Masked SAD 1032 # 1033 foreach (@encoder_block_sizes) { 1034 ($w, $h) = @$_; 1035 add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask"; 1036 specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2 neon/; 1037 } 1038 1039 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1040 foreach (@encoder_block_sizes) { 1041 ($w, $h) = @$_; 1042 add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask"; 1043 specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2 neon/; 1044 } 1045 } 1046 1047 # 1048 # OBMC SAD 1049 # 1050 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1051 foreach (@encoder_block_sizes) { 1052 ($w, $h) = @$_; 1053 add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask"; 1054 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) { 1055 specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/; 1056 } 1057 } 1058 1059 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1060 foreach (@encoder_block_sizes) { 1061 ($w, $h) = @$_; 1062 add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask"; 1063 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) { 1064 specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/; 1065 } 1066 } 1067 } 1068 } 1069 1070 # 1071 # Multi-block SAD, comparing a reference to N independent blocks 1072 # 1073 foreach (@encoder_block_sizes) { 1074 ($w, $h) = @$_; 1075 add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1076 add_proto qw/void/, "aom_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1077 add_proto qw/void/, "aom_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1078 } 1079 1080 specialize qw/aom_sad128x128x4d avx2 sse2 neon neon_dotprod/; 1081 specialize qw/aom_sad128x64x4d avx2 sse2 neon neon_dotprod/; 1082 specialize qw/aom_sad64x128x4d avx2 sse2 neon neon_dotprod/; 1083 specialize qw/aom_sad64x64x4d avx2 sse2 neon neon_dotprod/; 1084 specialize qw/aom_sad64x32x4d avx2 sse2 neon neon_dotprod/; 1085 specialize qw/aom_sad32x64x4d avx2 sse2 neon neon_dotprod/; 1086 specialize qw/aom_sad32x32x4d avx2 sse2 neon neon_dotprod/; 1087 specialize qw/aom_sad32x16x4d avx2 sse2 neon neon_dotprod/; 1088 specialize qw/aom_sad16x32x4d avx2 sse2 neon neon_dotprod/; 1089 specialize qw/aom_sad16x16x4d avx2 sse2 neon neon_dotprod/; 1090 specialize qw/aom_sad16x8x4d avx2 sse2 neon neon_dotprod/; 1091 1092 specialize qw/aom_sad8x16x4d sse2 neon/; 1093 specialize qw/aom_sad8x8x4d sse2 neon/; 1094 specialize qw/aom_sad8x4x4d sse2 neon/; 1095 specialize qw/aom_sad4x8x4d sse2 neon/; 1096 specialize qw/aom_sad4x4x4d sse2 neon/; 1097 1098 specialize qw/aom_sad64x16x4d avx2 sse2 neon neon_dotprod/; 1099 specialize qw/aom_sad32x8x4d avx2 sse2 neon neon_dotprod/; 1100 specialize qw/aom_sad16x64x4d avx2 sse2 neon neon_dotprod/; 1101 specialize qw/aom_sad16x4x4d avx2 sse2 neon neon_dotprod/; 1102 specialize qw/aom_sad8x32x4d sse2 neon/; 1103 specialize qw/aom_sad4x16x4d sse2 neon/; 1104 1105 specialize qw/aom_sad_skip_128x128x4d avx2 sse2 neon neon_dotprod/; 1106 specialize qw/aom_sad_skip_128x64x4d avx2 sse2 neon neon_dotprod/; 1107 specialize qw/aom_sad_skip_64x128x4d avx2 sse2 neon neon_dotprod/; 1108 specialize qw/aom_sad_skip_64x64x4d avx2 sse2 neon neon_dotprod/; 1109 specialize qw/aom_sad_skip_64x32x4d avx2 sse2 neon neon_dotprod/; 1110 specialize qw/aom_sad_skip_64x16x4d avx2 sse2 neon neon_dotprod/; 1111 specialize qw/aom_sad_skip_32x64x4d avx2 sse2 neon neon_dotprod/; 1112 specialize qw/aom_sad_skip_32x32x4d avx2 sse2 neon neon_dotprod/; 1113 specialize qw/aom_sad_skip_32x16x4d avx2 sse2 neon neon_dotprod/; 1114 specialize qw/aom_sad_skip_32x8x4d avx2 sse2 neon neon_dotprod/; 1115 1116 specialize qw/aom_sad_skip_16x64x4d avx2 sse2 neon neon_dotprod/; 1117 specialize qw/aom_sad_skip_16x32x4d avx2 sse2 neon neon_dotprod/; 1118 specialize qw/aom_sad_skip_16x16x4d avx2 sse2 neon neon_dotprod/; 1119 specialize qw/aom_sad_skip_16x8x4d avx2 sse2 neon neon_dotprod/; 1120 specialize qw/aom_sad_skip_16x4x4d avx2 neon neon_dotprod/; 1121 specialize qw/aom_sad_skip_8x32x4d sse2 neon/; 1122 specialize qw/aom_sad_skip_8x16x4d sse2 neon/; 1123 specialize qw/aom_sad_skip_8x8x4d sse2 neon/; 1124 specialize qw/aom_sad_skip_8x4x4d neon/; 1125 specialize qw/aom_sad_skip_4x16x4d sse2 neon/; 1126 specialize qw/aom_sad_skip_4x8x4d sse2 neon/; 1127 specialize qw/aom_sad_skip_4x4x4d neon/; 1128 1129 specialize qw/aom_sad128x128x3d avx2 neon neon_dotprod/; 1130 specialize qw/aom_sad128x64x3d avx2 neon neon_dotprod/; 1131 specialize qw/aom_sad64x128x3d avx2 neon neon_dotprod/; 1132 specialize qw/aom_sad64x64x3d avx2 neon neon_dotprod/; 1133 specialize qw/aom_sad64x32x3d avx2 neon neon_dotprod/; 1134 specialize qw/aom_sad32x64x3d avx2 neon neon_dotprod/; 1135 specialize qw/aom_sad32x32x3d avx2 neon neon_dotprod/; 1136 specialize qw/aom_sad32x16x3d avx2 neon neon_dotprod/; 1137 specialize qw/aom_sad16x32x3d avx2 neon neon_dotprod/; 1138 specialize qw/aom_sad16x16x3d avx2 neon neon_dotprod/; 1139 specialize qw/aom_sad16x8x3d avx2 neon neon_dotprod/; 1140 specialize qw/aom_sad8x16x3d neon/; 1141 specialize qw/aom_sad8x8x3d neon/; 1142 specialize qw/aom_sad8x4x3d neon/; 1143 specialize qw/aom_sad4x8x3d neon/; 1144 specialize qw/aom_sad4x4x3d neon/; 1145 1146 specialize qw/aom_sad64x16x3d avx2 neon neon_dotprod/; 1147 specialize qw/aom_sad32x8x3d avx2 neon neon_dotprod/; 1148 specialize qw/aom_sad16x64x3d avx2 neon neon_dotprod/; 1149 specialize qw/aom_sad16x4x3d avx2 neon neon_dotprod/; 1150 specialize qw/aom_sad8x32x3d neon/; 1151 specialize qw/aom_sad4x16x3d neon/; 1152 1153 # 1154 # Multi-block SAD, comparing a reference to N independent blocks 1155 # 1156 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1157 foreach (@encoder_block_sizes) { 1158 ($w, $h) = @$_; 1159 add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1160 add_proto qw/void/, "aom_highbd_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1161 add_proto qw/void/, "aom_highbd_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1162 if ($w != 128 && $h != 128) { 1163 specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/; 1164 } 1165 } 1166 specialize qw/aom_highbd_sad128x128x4d avx2 neon/; 1167 specialize qw/aom_highbd_sad128x64x4d avx2 neon/; 1168 specialize qw/aom_highbd_sad64x128x4d avx2 neon/; 1169 specialize qw/aom_highbd_sad64x64x4d sse2 avx2 neon/; 1170 specialize qw/aom_highbd_sad64x32x4d sse2 avx2 neon/; 1171 specialize qw/aom_highbd_sad32x64x4d sse2 avx2 neon/; 1172 specialize qw/aom_highbd_sad32x32x4d sse2 avx2 neon/; 1173 specialize qw/aom_highbd_sad32x16x4d sse2 avx2 neon/; 1174 specialize qw/aom_highbd_sad16x32x4d sse2 avx2 neon/; 1175 specialize qw/aom_highbd_sad16x16x4d sse2 avx2 neon/; 1176 specialize qw/aom_highbd_sad16x8x4d sse2 avx2 neon/; 1177 specialize qw/aom_highbd_sad8x16x4d sse2 neon/; 1178 specialize qw/aom_highbd_sad8x8x4d sse2 neon/; 1179 specialize qw/aom_highbd_sad8x4x4d sse2 neon/; 1180 specialize qw/aom_highbd_sad4x8x4d sse2 neon/; 1181 specialize qw/aom_highbd_sad4x4x4d sse2 neon/; 1182 1183 specialize qw/aom_highbd_sad4x16x4d sse2 neon/; 1184 specialize qw/aom_highbd_sad16x4x4d avx2 sse2 neon/; 1185 specialize qw/aom_highbd_sad8x32x4d sse2 neon/; 1186 specialize qw/aom_highbd_sad32x8x4d avx2 sse2 neon/; 1187 specialize qw/aom_highbd_sad16x64x4d avx2 sse2 neon/; 1188 specialize qw/aom_highbd_sad64x16x4d avx2 sse2 neon/; 1189 1190 specialize qw/aom_highbd_sad_skip_128x128x4d avx2 neon/; 1191 specialize qw/aom_highbd_sad_skip_128x64x4d avx2 neon/; 1192 specialize qw/aom_highbd_sad_skip_64x128x4d avx2 neon/; 1193 specialize qw/aom_highbd_sad_skip_64x64x4d avx2 sse2 neon/; 1194 specialize qw/aom_highbd_sad_skip_64x32x4d avx2 sse2 neon/; 1195 specialize qw/aom_highbd_sad_skip_32x64x4d avx2 sse2 neon/; 1196 specialize qw/aom_highbd_sad_skip_32x32x4d avx2 sse2 neon/; 1197 specialize qw/aom_highbd_sad_skip_32x16x4d avx2 sse2 neon/; 1198 specialize qw/aom_highbd_sad_skip_16x32x4d avx2 sse2 neon/; 1199 specialize qw/aom_highbd_sad_skip_16x16x4d avx2 sse2 neon/; 1200 specialize qw/aom_highbd_sad_skip_16x8x4d avx2 sse2 neon/; 1201 specialize qw/aom_highbd_sad_skip_16x4x4d neon/; 1202 specialize qw/aom_highbd_sad_skip_8x16x4d sse2 neon/; 1203 specialize qw/aom_highbd_sad_skip_8x8x4d sse2 neon/; 1204 specialize qw/aom_highbd_sad_skip_8x4x4d neon/; 1205 specialize qw/aom_highbd_sad_skip_4x8x4d sse2 neon/; 1206 specialize qw/aom_highbd_sad_skip_4x4x4d neon/; 1207 1208 specialize qw/aom_highbd_sad_skip_4x16x4d sse2 neon/; 1209 specialize qw/aom_highbd_sad_skip_8x32x4d sse2 neon/; 1210 specialize qw/aom_highbd_sad_skip_32x8x4d avx2 sse2 neon/; 1211 specialize qw/aom_highbd_sad_skip_16x64x4d avx2 sse2 neon/; 1212 specialize qw/aom_highbd_sad_skip_64x16x4d avx2 sse2 neon/; 1213 1214 specialize qw/aom_highbd_sad128x128x3d avx2 neon/; 1215 specialize qw/aom_highbd_sad128x64x3d avx2 neon/; 1216 specialize qw/aom_highbd_sad64x128x3d avx2 neon/; 1217 specialize qw/aom_highbd_sad64x64x3d avx2 neon/; 1218 specialize qw/aom_highbd_sad64x32x3d avx2 neon/; 1219 specialize qw/aom_highbd_sad32x64x3d avx2 neon/; 1220 specialize qw/aom_highbd_sad32x32x3d avx2 neon/; 1221 specialize qw/aom_highbd_sad32x16x3d avx2 neon/; 1222 specialize qw/aom_highbd_sad16x32x3d avx2 neon/; 1223 specialize qw/aom_highbd_sad16x16x3d avx2 neon/; 1224 specialize qw/aom_highbd_sad16x8x3d avx2 neon/; 1225 specialize qw/aom_highbd_sad8x16x3d neon/; 1226 specialize qw/aom_highbd_sad8x8x3d neon/; 1227 specialize qw/aom_highbd_sad8x4x3d neon/; 1228 specialize qw/aom_highbd_sad4x8x3d neon/; 1229 specialize qw/aom_highbd_sad4x4x3d neon/; 1230 1231 specialize qw/aom_highbd_sad64x16x3d avx2 neon/; 1232 specialize qw/aom_highbd_sad32x8x3d avx2 neon/; 1233 specialize qw/aom_highbd_sad16x64x3d avx2 neon/; 1234 specialize qw/aom_highbd_sad16x4x3d avx2 neon/; 1235 specialize qw/aom_highbd_sad8x32x3d neon/; 1236 specialize qw/aom_highbd_sad4x16x3d neon/; 1237 } 1238 # 1239 # Avg 1240 # 1241 add_proto qw/unsigned int aom_avg_8x8/, "const uint8_t *, int p"; 1242 specialize qw/aom_avg_8x8 sse2 neon/; 1243 1244 add_proto qw/unsigned int aom_avg_4x4/, "const uint8_t *, int p"; 1245 specialize qw/aom_avg_4x4 sse2 neon/; 1246 1247 add_proto qw/void aom_avg_8x8_quad/, "const uint8_t *s, int p, int x16_idx, int y16_idx, int *avg"; 1248 specialize qw/aom_avg_8x8_quad avx2 sse2 neon/; 1249 1250 add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; 1251 specialize qw/aom_minmax_8x8 sse2 neon/; 1252 1253 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1254 add_proto qw/unsigned int aom_highbd_avg_8x8/, "const uint8_t *, int p"; 1255 specialize qw/aom_highbd_avg_8x8 neon/; 1256 add_proto qw/unsigned int aom_highbd_avg_4x4/, "const uint8_t *, int p"; 1257 specialize qw/aom_highbd_avg_4x4 neon/; 1258 add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; 1259 specialize qw/aom_highbd_minmax_8x8 neon/; 1260 } 1261 1262 add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor"; 1263 specialize qw/aom_int_pro_row avx2 sse2 neon/; 1264 1265 add_proto qw/void aom_int_pro_col/, "int16_t *vbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor"; 1266 specialize qw/aom_int_pro_col avx2 sse2 neon/; 1267 1268 add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl"; 1269 specialize qw/aom_vector_var avx2 sse4_1 neon sve/; 1270 1271 # 1272 # hamadard transform and satd for implmenting temporal dependency model 1273 # 1274 add_proto qw/void aom_hadamard_4x4/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1275 specialize qw/aom_hadamard_4x4 sse2 neon/; 1276 1277 add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1278 specialize qw/aom_hadamard_8x8 sse2 neon/; 1279 1280 add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1281 specialize qw/aom_hadamard_16x16 avx2 sse2 neon/; 1282 1283 add_proto qw/void aom_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1284 specialize qw/aom_hadamard_32x32 avx2 sse2 neon/; 1285 1286 add_proto qw/void aom_hadamard_lp_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1287 specialize qw/aom_hadamard_lp_8x8 sse2 neon/; 1288 1289 add_proto qw/void aom_hadamard_lp_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1290 specialize qw/aom_hadamard_lp_16x16 sse2 avx2 neon/; 1291 1292 add_proto qw/void aom_hadamard_lp_8x8_dual/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1293 specialize qw/aom_hadamard_lp_8x8_dual sse2 avx2 neon/; 1294 1295 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1296 add_proto qw/void aom_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1297 specialize qw/aom_highbd_hadamard_8x8 avx2 neon/; 1298 1299 add_proto qw/void aom_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1300 specialize qw/aom_highbd_hadamard_16x16 avx2 neon/; 1301 1302 add_proto qw/void aom_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1303 specialize qw/aom_highbd_hadamard_32x32 avx2 neon/; 1304 } 1305 add_proto qw/int aom_satd/, "const tran_low_t *coeff, int length"; 1306 specialize qw/aom_satd neon sse2 avx2/; 1307 1308 add_proto qw/int aom_satd_lp/, "const int16_t *coeff, int length"; 1309 specialize qw/aom_satd_lp sse2 avx2 neon/; 1310 1311 1312 # 1313 # Structured Similarity (SSIM) 1314 # 1315 add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr"; 1316 specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64"; 1317 1318 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1319 add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr"; 1320 } 1321} # CONFIG_AV1_ENCODER 1322 1323if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 1324 1325 # 1326 # Specialty Variance 1327 # 1328 add_proto qw/void aom_get_var_sse_sum_8x8_quad/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse8x8, int *sum8x8, unsigned int *tot_sse, int *tot_sum, uint32_t *var8x8"; 1329 specialize qw/aom_get_var_sse_sum_8x8_quad avx2 sse2 neon neon_dotprod/; 1330 1331 add_proto qw/void aom_get_var_sse_sum_16x16_dual/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse16x16, unsigned int *tot_sse, int *tot_sum, uint32_t *var16x16"; 1332 specialize qw/aom_get_var_sse_sum_16x16_dual avx2 sse2 neon neon_dotprod/; 1333 1334 add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1335 add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1336 add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1337 add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1338 1339 specialize qw/aom_mse16x16 sse2 avx2 neon neon_dotprod/; 1340 specialize qw/aom_mse16x8 sse2 neon neon_dotprod/; 1341 specialize qw/aom_mse8x16 sse2 neon neon_dotprod/; 1342 specialize qw/aom_mse8x8 sse2 neon neon_dotprod/; 1343 1344 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1345 foreach $bd (8, 10, 12) { 1346 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1347 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1348 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1349 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1350 1351 if ($bd eq 8) { 1352 specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon neon_dotprod/; 1353 specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/; 1354 specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/; 1355 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/; 1356 } elsif ($bd eq 10) { 1357 specialize "aom_highbd_${bd}_mse16x16", qw/avx2 sse2 neon sve/; 1358 specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/; 1359 specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/; 1360 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/; 1361 } else { 1362 specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/; 1363 specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/; 1364 specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/; 1365 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/; 1366 } 1367 1368 } 1369 } 1370 1371 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1372 add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *"; 1373 specialize qw/aom_get_mb_ss sse2 neon/; 1374 } 1375 1376 # 1377 # Variance / Subpixel Variance / Subpixel Avg Variance 1378 # 1379 add_proto qw/uint64_t/, "aom_mse_wxh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int sstride, int w, int h"; 1380 specialize qw/aom_mse_wxh_16bit sse2 avx2 neon/; 1381 1382 add_proto qw/uint64_t/, "aom_mse_16xh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int w, int h"; 1383 specialize qw/aom_mse_16xh_16bit sse2 avx2 neon/; 1384 1385 foreach (@encoder_block_sizes) { 1386 ($w, $h) = @$_; 1387 add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 1388 add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1389 add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; 1390 add_proto qw/uint32_t/, "aom_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param"; 1391 } 1392 specialize qw/aom_variance128x128 sse2 avx2 neon neon_dotprod/; 1393 specialize qw/aom_variance128x64 sse2 avx2 neon neon_dotprod/; 1394 specialize qw/aom_variance64x128 sse2 avx2 neon neon_dotprod/; 1395 specialize qw/aom_variance64x64 sse2 avx2 neon neon_dotprod/; 1396 specialize qw/aom_variance64x32 sse2 avx2 neon neon_dotprod/; 1397 specialize qw/aom_variance32x64 sse2 avx2 neon neon_dotprod/; 1398 specialize qw/aom_variance32x32 sse2 avx2 neon neon_dotprod/; 1399 specialize qw/aom_variance32x16 sse2 avx2 neon neon_dotprod/; 1400 specialize qw/aom_variance16x32 sse2 avx2 neon neon_dotprod/; 1401 specialize qw/aom_variance16x16 sse2 avx2 neon neon_dotprod/; 1402 specialize qw/aom_variance16x8 sse2 avx2 neon neon_dotprod/; 1403 specialize qw/aom_variance8x16 sse2 neon neon_dotprod/; 1404 specialize qw/aom_variance8x8 sse2 neon neon_dotprod/; 1405 specialize qw/aom_variance8x4 sse2 neon neon_dotprod/; 1406 specialize qw/aom_variance4x8 sse2 neon neon_dotprod/; 1407 specialize qw/aom_variance4x4 sse2 neon neon_dotprod/; 1408 1409 specialize qw/aom_sub_pixel_variance128x128 avx2 neon ssse3/; 1410 specialize qw/aom_sub_pixel_variance128x64 avx2 neon ssse3/; 1411 specialize qw/aom_sub_pixel_variance64x128 avx2 neon ssse3/; 1412 specialize qw/aom_sub_pixel_variance64x64 avx2 neon ssse3/; 1413 specialize qw/aom_sub_pixel_variance64x32 avx2 neon ssse3/; 1414 specialize qw/aom_sub_pixel_variance32x64 avx2 neon ssse3/; 1415 specialize qw/aom_sub_pixel_variance32x32 avx2 neon ssse3/; 1416 specialize qw/aom_sub_pixel_variance32x16 avx2 neon ssse3/; 1417 specialize qw/aom_sub_pixel_variance16x32 avx2 neon ssse3/; 1418 specialize qw/aom_sub_pixel_variance16x16 avx2 neon ssse3/; 1419 specialize qw/aom_sub_pixel_variance16x8 avx2 neon ssse3/; 1420 specialize qw/aom_sub_pixel_variance8x16 neon ssse3/; 1421 specialize qw/aom_sub_pixel_variance8x8 neon ssse3/; 1422 specialize qw/aom_sub_pixel_variance8x4 neon ssse3/; 1423 specialize qw/aom_sub_pixel_variance4x8 neon ssse3/; 1424 specialize qw/aom_sub_pixel_variance4x4 neon ssse3/; 1425 1426 specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon ssse3/; 1427 specialize qw/aom_sub_pixel_avg_variance128x64 avx2 neon ssse3/; 1428 specialize qw/aom_sub_pixel_avg_variance64x128 avx2 neon ssse3/; 1429 specialize qw/aom_sub_pixel_avg_variance64x64 avx2 neon ssse3/; 1430 specialize qw/aom_sub_pixel_avg_variance64x32 avx2 neon ssse3/; 1431 specialize qw/aom_sub_pixel_avg_variance32x64 avx2 neon ssse3/; 1432 specialize qw/aom_sub_pixel_avg_variance32x32 avx2 neon ssse3/; 1433 specialize qw/aom_sub_pixel_avg_variance32x16 avx2 neon ssse3/; 1434 specialize qw/aom_sub_pixel_avg_variance16x32 neon ssse3/; 1435 specialize qw/aom_sub_pixel_avg_variance16x16 neon ssse3/; 1436 specialize qw/aom_sub_pixel_avg_variance16x8 neon ssse3/; 1437 specialize qw/aom_sub_pixel_avg_variance8x16 neon ssse3/; 1438 specialize qw/aom_sub_pixel_avg_variance8x8 neon ssse3/; 1439 specialize qw/aom_sub_pixel_avg_variance8x4 neon ssse3/; 1440 specialize qw/aom_sub_pixel_avg_variance4x8 neon ssse3/; 1441 specialize qw/aom_sub_pixel_avg_variance4x4 neon ssse3/; 1442 1443 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1444 specialize qw/aom_variance4x16 neon neon_dotprod sse2/; 1445 specialize qw/aom_variance16x4 neon neon_dotprod sse2 avx2/; 1446 specialize qw/aom_variance8x32 neon neon_dotprod sse2/; 1447 specialize qw/aom_variance32x8 neon neon_dotprod sse2 avx2/; 1448 specialize qw/aom_variance16x64 neon neon_dotprod sse2 avx2/; 1449 specialize qw/aom_variance64x16 neon neon_dotprod sse2 avx2/; 1450 1451 specialize qw/aom_sub_pixel_variance4x16 neon ssse3/; 1452 specialize qw/aom_sub_pixel_variance16x4 neon avx2 ssse3/; 1453 specialize qw/aom_sub_pixel_variance8x32 neon ssse3/; 1454 specialize qw/aom_sub_pixel_variance32x8 neon ssse3/; 1455 specialize qw/aom_sub_pixel_variance16x64 neon avx2 ssse3/; 1456 specialize qw/aom_sub_pixel_variance64x16 neon ssse3/; 1457 specialize qw/aom_sub_pixel_avg_variance4x16 neon ssse3/; 1458 specialize qw/aom_sub_pixel_avg_variance16x4 neon ssse3/; 1459 specialize qw/aom_sub_pixel_avg_variance8x32 neon ssse3/; 1460 specialize qw/aom_sub_pixel_avg_variance32x8 neon ssse3/; 1461 specialize qw/aom_sub_pixel_avg_variance16x64 neon ssse3/; 1462 specialize qw/aom_sub_pixel_avg_variance64x16 neon ssse3/; 1463 1464 specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x16 neon ssse3/; 1465 specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x4 neon ssse3/; 1466 specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x32 neon ssse3/; 1467 specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x8 neon ssse3/; 1468 specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x64 neon ssse3/; 1469 specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x16 neon ssse3/; 1470 } 1471 1472 specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x64 neon ssse3/; 1473 specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x32 neon ssse3/; 1474 specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x64 neon ssse3/; 1475 specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x32 neon ssse3/; 1476 specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x16 neon ssse3/; 1477 specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x32 neon ssse3/; 1478 specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x16 neon ssse3/; 1479 specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x8 neon ssse3/; 1480 specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x16 neon ssse3/; 1481 specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x8 neon ssse3/; 1482 specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x4 neon ssse3/; 1483 specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x8 neon ssse3/; 1484 specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x4 neon ssse3/; 1485 1486 specialize qw/aom_dist_wtd_sub_pixel_avg_variance128x128 neon ssse3/; 1487 specialize qw/aom_dist_wtd_sub_pixel_avg_variance128x64 neon ssse3/; 1488 specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x128 neon ssse3/; 1489 1490 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1491 foreach $bd (8, 10, 12) { 1492 foreach (@encoder_block_sizes) { 1493 ($w, $h) = @$_; 1494 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1495 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1496 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; 1497 add_proto qw/uint32_t/, "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param"; 1498 } 1499 } 1500 1501 specialize qw/aom_highbd_12_variance128x128 sse2 neon sve/; 1502 specialize qw/aom_highbd_12_variance128x64 sse2 neon sve/; 1503 specialize qw/aom_highbd_12_variance64x128 sse2 neon sve/; 1504 specialize qw/aom_highbd_12_variance64x64 sse2 neon sve/; 1505 specialize qw/aom_highbd_12_variance64x32 sse2 neon sve/; 1506 specialize qw/aom_highbd_12_variance32x64 sse2 neon sve/; 1507 specialize qw/aom_highbd_12_variance32x32 sse2 neon sve/; 1508 specialize qw/aom_highbd_12_variance32x16 sse2 neon sve/; 1509 specialize qw/aom_highbd_12_variance16x32 sse2 neon sve/; 1510 specialize qw/aom_highbd_12_variance16x16 sse2 neon sve/; 1511 specialize qw/aom_highbd_12_variance16x8 sse2 neon sve/; 1512 specialize qw/aom_highbd_12_variance8x16 sse2 neon sve/; 1513 specialize qw/aom_highbd_12_variance8x8 sse2 neon sve/; 1514 specialize qw/aom_highbd_12_variance8x4 neon sve/; 1515 specialize qw/aom_highbd_12_variance4x8 neon sve/; 1516 specialize qw/aom_highbd_12_variance4x4 sse4_1 neon sve/; 1517 1518 specialize qw/aom_highbd_10_variance128x128 sse2 avx2 neon sve/; 1519 specialize qw/aom_highbd_10_variance128x64 sse2 avx2 neon sve/; 1520 specialize qw/aom_highbd_10_variance64x128 sse2 avx2 neon sve/; 1521 specialize qw/aom_highbd_10_variance64x64 sse2 avx2 neon sve/; 1522 specialize qw/aom_highbd_10_variance64x32 sse2 avx2 neon sve/; 1523 specialize qw/aom_highbd_10_variance32x64 sse2 avx2 neon sve/; 1524 specialize qw/aom_highbd_10_variance32x32 sse2 avx2 neon sve/; 1525 specialize qw/aom_highbd_10_variance32x16 sse2 avx2 neon sve/; 1526 specialize qw/aom_highbd_10_variance16x32 sse2 avx2 neon sve/; 1527 specialize qw/aom_highbd_10_variance16x16 sse2 avx2 neon sve/; 1528 specialize qw/aom_highbd_10_variance16x8 sse2 avx2 neon sve/; 1529 specialize qw/aom_highbd_10_variance8x16 sse2 avx2 neon sve/; 1530 specialize qw/aom_highbd_10_variance8x8 sse2 avx2 neon sve/; 1531 specialize qw/aom_highbd_10_variance8x4 neon sve/; 1532 specialize qw/aom_highbd_10_variance4x8 neon sve/; 1533 specialize qw/aom_highbd_10_variance4x4 sse4_1 neon sve/; 1534 1535 specialize qw/aom_highbd_8_variance128x128 sse2 neon sve/; 1536 specialize qw/aom_highbd_8_variance128x64 sse2 neon sve/; 1537 specialize qw/aom_highbd_8_variance64x128 sse2 neon sve/; 1538 specialize qw/aom_highbd_8_variance64x64 sse2 neon sve/; 1539 specialize qw/aom_highbd_8_variance64x32 sse2 neon sve/; 1540 specialize qw/aom_highbd_8_variance32x64 sse2 neon sve/; 1541 specialize qw/aom_highbd_8_variance32x32 sse2 neon sve/; 1542 specialize qw/aom_highbd_8_variance32x16 sse2 neon sve/; 1543 specialize qw/aom_highbd_8_variance16x32 sse2 neon sve/; 1544 specialize qw/aom_highbd_8_variance16x16 sse2 neon sve/; 1545 specialize qw/aom_highbd_8_variance16x8 sse2 neon sve/; 1546 specialize qw/aom_highbd_8_variance8x16 sse2 neon sve/; 1547 specialize qw/aom_highbd_8_variance8x8 sse2 neon sve/; 1548 specialize qw/aom_highbd_8_variance8x4 neon sve/; 1549 specialize qw/aom_highbd_8_variance4x8 neon sve/; 1550 specialize qw/aom_highbd_8_variance4x4 sse4_1 neon sve/; 1551 1552 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1553 foreach $bd (8, 10, 12) { 1554 my $avx2 = ($bd == 10) ? "avx2" : ""; 1555 specialize "aom_highbd_${bd}_variance64x16" , $avx2, qw/sse2 neon sve/; 1556 specialize "aom_highbd_${bd}_variance32x8" , $avx2, qw/sse2 neon sve/; 1557 specialize "aom_highbd_${bd}_variance16x64" , $avx2, qw/sse2 neon sve/; 1558 specialize "aom_highbd_${bd}_variance16x4" , qw/neon sve/; 1559 specialize "aom_highbd_${bd}_variance8x32" , $avx2, qw/sse2 neon sve/; 1560 specialize "aom_highbd_${bd}_variance4x16" , qw/neon sve/; 1561 } 1562 } 1563 1564 specialize qw/aom_highbd_12_sub_pixel_variance128x128 sse2 neon/; 1565 specialize qw/aom_highbd_12_sub_pixel_variance128x64 sse2 neon/; 1566 specialize qw/aom_highbd_12_sub_pixel_variance64x128 sse2 neon/; 1567 specialize qw/aom_highbd_12_sub_pixel_variance64x64 sse2 neon/; 1568 specialize qw/aom_highbd_12_sub_pixel_variance64x32 sse2 neon/; 1569 specialize qw/aom_highbd_12_sub_pixel_variance32x64 sse2 neon/; 1570 specialize qw/aom_highbd_12_sub_pixel_variance32x32 sse2 neon/; 1571 specialize qw/aom_highbd_12_sub_pixel_variance32x16 sse2 neon/; 1572 specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2 neon/; 1573 specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2 neon/; 1574 specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2 neon/; 1575 specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2 neon/; 1576 specialize qw/aom_highbd_12_sub_pixel_variance8x8 sse2 neon/; 1577 specialize qw/aom_highbd_12_sub_pixel_variance8x4 sse2 neon/; 1578 specialize qw/aom_highbd_12_sub_pixel_variance4x8 neon/; 1579 specialize qw/aom_highbd_12_sub_pixel_variance4x4 sse4_1 neon/; 1580 1581 specialize qw/aom_highbd_10_sub_pixel_variance128x128 sse2 avx2 neon/; 1582 specialize qw/aom_highbd_10_sub_pixel_variance128x64 sse2 avx2 neon/; 1583 specialize qw/aom_highbd_10_sub_pixel_variance64x128 sse2 avx2 neon/; 1584 specialize qw/aom_highbd_10_sub_pixel_variance64x64 sse2 avx2 neon/; 1585 specialize qw/aom_highbd_10_sub_pixel_variance64x32 sse2 avx2 neon/; 1586 specialize qw/aom_highbd_10_sub_pixel_variance32x64 sse2 avx2 neon/; 1587 specialize qw/aom_highbd_10_sub_pixel_variance32x32 sse2 avx2 neon/; 1588 specialize qw/aom_highbd_10_sub_pixel_variance32x16 sse2 avx2 neon/; 1589 specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2 avx2 neon/; 1590 specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2 avx2 neon/; 1591 specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2 avx2 neon/; 1592 specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2 avx2 neon/; 1593 specialize qw/aom_highbd_10_sub_pixel_variance8x8 sse2 avx2 neon/; 1594 specialize qw/aom_highbd_10_sub_pixel_variance8x4 sse2 neon/; 1595 specialize qw/aom_highbd_10_sub_pixel_variance4x8 neon/; 1596 specialize qw/aom_highbd_10_sub_pixel_variance4x4 sse4_1 neon/; 1597 1598 specialize qw/aom_highbd_8_sub_pixel_variance128x128 sse2 neon/; 1599 specialize qw/aom_highbd_8_sub_pixel_variance128x64 sse2 neon/; 1600 specialize qw/aom_highbd_8_sub_pixel_variance64x128 sse2 neon/; 1601 specialize qw/aom_highbd_8_sub_pixel_variance64x64 sse2 neon/; 1602 specialize qw/aom_highbd_8_sub_pixel_variance64x32 sse2 neon/; 1603 specialize qw/aom_highbd_8_sub_pixel_variance32x64 sse2 neon/; 1604 specialize qw/aom_highbd_8_sub_pixel_variance32x32 sse2 neon/; 1605 specialize qw/aom_highbd_8_sub_pixel_variance32x16 sse2 neon/; 1606 specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2 neon/; 1607 specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2 neon/; 1608 specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2 neon/; 1609 specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2 neon/; 1610 specialize qw/aom_highbd_8_sub_pixel_variance8x8 sse2 neon/; 1611 specialize qw/aom_highbd_8_sub_pixel_variance8x4 sse2 neon/; 1612 specialize qw/aom_highbd_8_sub_pixel_variance4x8 neon/; 1613 specialize qw/aom_highbd_8_sub_pixel_variance4x4 sse4_1 neon/; 1614 1615 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1616 foreach $bd (8, 10, 12) { 1617 specialize "aom_highbd_${bd}_sub_pixel_variance64x16" , qw/sse2 neon/; 1618 specialize "aom_highbd_${bd}_sub_pixel_variance32x8" , qw/sse2 neon/; 1619 specialize "aom_highbd_${bd}_sub_pixel_variance16x64" , qw/sse2 neon/; 1620 specialize "aom_highbd_${bd}_sub_pixel_variance16x4" , qw/sse2 neon/; 1621 specialize "aom_highbd_${bd}_sub_pixel_variance8x32" , qw/sse2 neon/; 1622 specialize "aom_highbd_${bd}_sub_pixel_variance4x16" , qw/neon/; 1623 } 1624 } 1625 1626 specialize qw/aom_highbd_12_sub_pixel_avg_variance128x128 neon/; 1627 specialize qw/aom_highbd_12_sub_pixel_avg_variance128x64 neon/; 1628 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x128 neon/; 1629 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64 sse2 neon/; 1630 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32 sse2 neon/; 1631 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64 sse2 neon/; 1632 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32 sse2 neon/; 1633 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16 sse2 neon/; 1634 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2 neon/; 1635 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2 neon/; 1636 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2 neon/; 1637 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2 neon/; 1638 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8 sse2 neon/; 1639 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4 sse2 neon/; 1640 specialize qw/aom_highbd_12_sub_pixel_avg_variance4x8 neon/; 1641 specialize qw/aom_highbd_12_sub_pixel_avg_variance4x4 sse4_1 neon/; 1642 1643 specialize qw/aom_highbd_10_sub_pixel_avg_variance128x128 neon/; 1644 specialize qw/aom_highbd_10_sub_pixel_avg_variance128x64 neon/; 1645 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x128 neon/; 1646 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64 sse2 neon/; 1647 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32 sse2 neon/; 1648 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64 sse2 neon/; 1649 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32 sse2 neon/; 1650 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16 sse2 neon/; 1651 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2 neon/; 1652 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2 neon/; 1653 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2 neon/; 1654 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2 neon/; 1655 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8 sse2 neon/; 1656 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4 sse2 neon/; 1657 specialize qw/aom_highbd_10_sub_pixel_avg_variance4x8 neon/; 1658 specialize qw/aom_highbd_10_sub_pixel_avg_variance4x4 sse4_1 neon/; 1659 1660 specialize qw/aom_highbd_8_sub_pixel_avg_variance128x128 neon/; 1661 specialize qw/aom_highbd_8_sub_pixel_avg_variance128x64 neon/; 1662 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x128 neon/; 1663 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64 sse2 neon/; 1664 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32 sse2 neon/; 1665 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64 sse2 neon/; 1666 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32 sse2 neon/; 1667 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16 sse2 neon/; 1668 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2 neon/; 1669 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2 neon/; 1670 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2 neon/; 1671 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2 neon/; 1672 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8 sse2 neon/; 1673 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4 sse2 neon/; 1674 specialize qw/aom_highbd_8_sub_pixel_avg_variance4x8 neon/; 1675 specialize qw/aom_highbd_8_sub_pixel_avg_variance4x4 sse4_1 neon/; 1676 1677 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1678 foreach $bd (8, 10, 12) { 1679 specialize "aom_highbd_${bd}_sub_pixel_avg_variance64x16" , qw/sse2 neon/; 1680 specialize "aom_highbd_${bd}_sub_pixel_avg_variance32x8" , qw/sse2 neon/; 1681 specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x64" , qw/sse2 neon/; 1682 specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x4" , qw/sse2 neon/; 1683 specialize "aom_highbd_${bd}_sub_pixel_avg_variance8x32" , qw/sse2 neon/; 1684 specialize "aom_highbd_${bd}_sub_pixel_avg_variance4x16" , qw/neon/; 1685 } 1686 } 1687 1688 foreach $bd (8, 10, 12) { 1689 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance128x128", qw/neon/; 1690 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance128x64" , qw/neon/; 1691 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x128" , qw/neon/; 1692 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x64" , qw/neon/; 1693 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x32" , qw/neon/; 1694 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x64" , qw/neon/; 1695 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x32" , qw/neon/; 1696 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x16" , qw/neon/; 1697 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x32" , qw/neon/; 1698 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x16" , qw/neon/; 1699 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x8" , qw/neon/; 1700 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x16" , qw/neon/; 1701 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x8" , qw/neon/; 1702 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x4" , qw/neon/; 1703 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x8" , qw/neon/; 1704 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x4" , qw/neon/; 1705 } 1706 1707 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1708 foreach $bd (8, 10, 12) { 1709 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x16", qw/neon/; 1710 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x8" , qw/neon/; 1711 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x64", qw/neon/; 1712 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x4" , qw/neon/; 1713 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x32" , qw/neon/; 1714 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x16" , qw/neon/; 1715 } 1716 } 1717 } 1718 # 1719 # Masked Variance / Masked Subpixel Variance 1720 # 1721 foreach (@encoder_block_sizes) { 1722 ($w, $h) = @$_; 1723 add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse"; 1724 specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/; 1725 } 1726 1727 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1728 foreach $bd ("_8_", "_10_", "_12_") { 1729 foreach (@encoder_block_sizes) { 1730 ($w, $h) = @$_; 1731 add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse"; 1732 specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/; 1733 } 1734 } 1735 } 1736 1737 # 1738 # OBMC Variance / OBMC Subpixel Variance 1739 # 1740 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1741 foreach (@encoder_block_sizes) { 1742 ($w, $h) = @$_; 1743 add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1744 add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1745 specialize "aom_obmc_variance${w}x${h}", qw/sse4_1 avx2 neon/; 1746 specialize "aom_obmc_sub_pixel_variance${w}x${h}", qw/sse4_1 neon/; 1747 } 1748 1749 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1750 foreach $bd ("_8_", "_10_", "_12_") { 1751 foreach (@encoder_block_sizes) { 1752 ($w, $h) = @$_; 1753 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1754 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1755 specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1 neon/; 1756 specialize "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", qw/neon/; 1757 } 1758 } 1759 } 1760 } 1761 1762 # 1763 # Comp Avg 1764 # 1765 add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; 1766 specialize qw/aom_comp_avg_pred avx2 neon/; 1767 1768 add_proto qw/void aom_dist_wtd_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param"; 1769 specialize qw/aom_dist_wtd_comp_avg_pred ssse3 neon/; 1770 1771 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1772 add_proto qw/void aom_highbd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride"; 1773 specialize qw/aom_highbd_comp_avg_pred neon/; 1774 1775 add_proto qw/void aom_highbd_dist_wtd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param"; 1776 specialize qw/aom_highbd_dist_wtd_comp_avg_pred sse2 neon/; 1777 1778 add_proto qw/uint64_t/, "aom_mse_wxh_16bit_highbd", "uint16_t *dst, int dstride,uint16_t *src, int sstride, int w, int h"; 1779 specialize qw/aom_mse_wxh_16bit_highbd sse2 avx2 neon sve/; 1780 } 1781 1782 add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask"; 1783 specialize qw/aom_comp_mask_pred ssse3 avx2 neon/; 1784 1785 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1786 add_proto qw/void aom_highbd_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask"; 1787 specialize qw/aom_highbd_comp_mask_pred sse2 avx2 neon/; 1788 } 1789 1790 # Flow estimation library 1791 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1792 add_proto qw/bool aom_compute_mean_stddev/, "const unsigned char *frame, int stride, int x, int y, double *mean, double *one_over_stddev"; 1793 specialize qw/aom_compute_mean_stddev sse4_1 avx2/; 1794 1795 add_proto qw/double aom_compute_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, double mean1, double one_over_stddev1, const unsigned char *frame2, int stride2, int x2, int y2, double mean2, double one_over_stddev2"; 1796 specialize qw/aom_compute_correlation sse4_1 avx2/; 1797 1798 add_proto qw/void aom_compute_flow_at_point/, "const uint8_t *src, const uint8_t *ref, int x, int y, int width, int height, int stride, double *u, double *v"; 1799 specialize qw/aom_compute_flow_at_point sse4_1 avx2 neon sve/; 1800 } 1801 1802} # CONFIG_AV1_ENCODER 1803 18041; 1805