1*77c1e3ccSAndroid Build Coastguard Worker /* 2*77c1e3ccSAndroid Build Coastguard Worker * Copyright (c) 2016, Alliance for Open Media. All rights reserved. 3*77c1e3ccSAndroid Build Coastguard Worker * 4*77c1e3ccSAndroid Build Coastguard Worker * This source code is subject to the terms of the BSD 2 Clause License and 5*77c1e3ccSAndroid Build Coastguard Worker * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6*77c1e3ccSAndroid Build Coastguard Worker * was not distributed with this source code in the LICENSE file, you can 7*77c1e3ccSAndroid Build Coastguard Worker * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8*77c1e3ccSAndroid Build Coastguard Worker * Media Patent License 1.0 was not distributed with this source code in the 9*77c1e3ccSAndroid Build Coastguard Worker * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10*77c1e3ccSAndroid Build Coastguard Worker */ 11*77c1e3ccSAndroid Build Coastguard Worker 12*77c1e3ccSAndroid Build Coastguard Worker #include <stddef.h> 13*77c1e3ccSAndroid Build Coastguard Worker #include <stdint.h> 14*77c1e3ccSAndroid Build Coastguard Worker 15*77c1e3ccSAndroid Build Coastguard Worker #include "config/aom_config.h" 16*77c1e3ccSAndroid Build Coastguard Worker #include "config/aom_dsp_rtcd.h" 17*77c1e3ccSAndroid Build Coastguard Worker 18*77c1e3ccSAndroid Build Coastguard Worker #include "aom_dsp/aom_dsp_common.h" 19*77c1e3ccSAndroid Build Coastguard Worker 20*77c1e3ccSAndroid Build Coastguard Worker // The 2 unused parameters are place holders for PIC enabled build. 21*77c1e3ccSAndroid Build Coastguard Worker // These definitions are for functions defined in subpel_variance.asm 22*77c1e3ccSAndroid Build Coastguard Worker #define DECL(w, opt) \ 23*77c1e3ccSAndroid Build Coastguard Worker int aom_sub_pixel_variance##w##xh_##opt( \ 24*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ 25*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *dst, ptrdiff_t dst_stride, int height, unsigned int *sse, \ 26*77c1e3ccSAndroid Build Coastguard Worker void *unused0, void *unused) 27*77c1e3ccSAndroid Build Coastguard Worker #define DECLS(opt) \ 28*77c1e3ccSAndroid Build Coastguard Worker DECL(4, opt); \ 29*77c1e3ccSAndroid Build Coastguard Worker DECL(8, opt); \ 30*77c1e3ccSAndroid Build Coastguard Worker DECL(16, opt) 31*77c1e3ccSAndroid Build Coastguard Worker 32*77c1e3ccSAndroid Build Coastguard Worker DECLS(ssse3); 33*77c1e3ccSAndroid Build Coastguard Worker #undef DECLS 34*77c1e3ccSAndroid Build Coastguard Worker #undef DECL 35*77c1e3ccSAndroid Build Coastguard Worker 36*77c1e3ccSAndroid Build Coastguard Worker #define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ 37*77c1e3ccSAndroid Build Coastguard Worker unsigned int aom_sub_pixel_variance##w##x##h##_##opt( \ 38*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *src, int src_stride, int x_offset, int y_offset, \ 39*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *dst, int dst_stride, unsigned int *sse_ptr) { \ 40*77c1e3ccSAndroid Build Coastguard Worker /*Avoid overflow in helper by capping height.*/ \ 41*77c1e3ccSAndroid Build Coastguard Worker const int hf = AOMMIN(h, 64); \ 42*77c1e3ccSAndroid Build Coastguard Worker unsigned int sse = 0; \ 43*77c1e3ccSAndroid Build Coastguard Worker int se = 0; \ 44*77c1e3ccSAndroid Build Coastguard Worker for (int i = 0; i < (w / wf); ++i) { \ 45*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *src_ptr = src; \ 46*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *dst_ptr = dst; \ 47*77c1e3ccSAndroid Build Coastguard Worker for (int j = 0; j < (h / hf); ++j) { \ 48*77c1e3ccSAndroid Build Coastguard Worker unsigned int sse2; \ 49*77c1e3ccSAndroid Build Coastguard Worker const int se2 = aom_sub_pixel_variance##wf##xh_##opt( \ 50*77c1e3ccSAndroid Build Coastguard Worker src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, hf, \ 51*77c1e3ccSAndroid Build Coastguard Worker &sse2, NULL, NULL); \ 52*77c1e3ccSAndroid Build Coastguard Worker dst_ptr += hf * dst_stride; \ 53*77c1e3ccSAndroid Build Coastguard Worker src_ptr += hf * src_stride; \ 54*77c1e3ccSAndroid Build Coastguard Worker se += se2; \ 55*77c1e3ccSAndroid Build Coastguard Worker sse += sse2; \ 56*77c1e3ccSAndroid Build Coastguard Worker } \ 57*77c1e3ccSAndroid Build Coastguard Worker src += wf; \ 58*77c1e3ccSAndroid Build Coastguard Worker dst += wf; \ 59*77c1e3ccSAndroid Build Coastguard Worker } \ 60*77c1e3ccSAndroid Build Coastguard Worker *sse_ptr = sse; \ 61*77c1e3ccSAndroid Build Coastguard Worker return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \ 62*77c1e3ccSAndroid Build Coastguard Worker } 63*77c1e3ccSAndroid Build Coastguard Worker 64*77c1e3ccSAndroid Build Coastguard Worker #if !CONFIG_REALTIME_ONLY 65*77c1e3ccSAndroid Build Coastguard Worker #define FNS(opt) \ 66*77c1e3ccSAndroid Build Coastguard Worker FN(128, 128, 16, 7, 7, opt, (int64_t), (int64_t)) \ 67*77c1e3ccSAndroid Build Coastguard Worker FN(128, 64, 16, 7, 6, opt, (int64_t), (int64_t)) \ 68*77c1e3ccSAndroid Build Coastguard Worker FN(64, 128, 16, 6, 7, opt, (int64_t), (int64_t)) \ 69*77c1e3ccSAndroid Build Coastguard Worker FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)) \ 70*77c1e3ccSAndroid Build Coastguard Worker FN(64, 32, 16, 6, 5, opt, (int64_t), (int64_t)) \ 71*77c1e3ccSAndroid Build Coastguard Worker FN(32, 64, 16, 5, 6, opt, (int64_t), (int64_t)) \ 72*77c1e3ccSAndroid Build Coastguard Worker FN(32, 32, 16, 5, 5, opt, (int64_t), (int64_t)) \ 73*77c1e3ccSAndroid Build Coastguard Worker FN(32, 16, 16, 5, 4, opt, (int64_t), (int64_t)) \ 74*77c1e3ccSAndroid Build Coastguard Worker FN(16, 32, 16, 4, 5, opt, (int64_t), (int64_t)) \ 75*77c1e3ccSAndroid Build Coastguard Worker FN(16, 16, 16, 4, 4, opt, (uint32_t), (int64_t)) \ 76*77c1e3ccSAndroid Build Coastguard Worker FN(16, 8, 16, 4, 3, opt, (int32_t), (int32_t)) \ 77*77c1e3ccSAndroid Build Coastguard Worker FN(8, 16, 8, 3, 4, opt, (int32_t), (int32_t)) \ 78*77c1e3ccSAndroid Build Coastguard Worker FN(8, 8, 8, 3, 3, opt, (int32_t), (int32_t)) \ 79*77c1e3ccSAndroid Build Coastguard Worker FN(8, 4, 8, 3, 2, opt, (int32_t), (int32_t)) \ 80*77c1e3ccSAndroid Build Coastguard Worker FN(4, 8, 4, 2, 3, opt, (int32_t), (int32_t)) \ 81*77c1e3ccSAndroid Build Coastguard Worker FN(4, 4, 4, 2, 2, opt, (int32_t), (int32_t)) \ 82*77c1e3ccSAndroid Build Coastguard Worker FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)) \ 83*77c1e3ccSAndroid Build Coastguard Worker FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)) \ 84*77c1e3ccSAndroid Build Coastguard Worker FN(8, 32, 8, 3, 5, opt, (uint32_t), (int64_t)) \ 85*77c1e3ccSAndroid Build Coastguard Worker FN(32, 8, 16, 5, 3, opt, (uint32_t), (int64_t)) \ 86*77c1e3ccSAndroid Build Coastguard Worker FN(16, 64, 16, 4, 6, opt, (int64_t), (int64_t)) \ 87*77c1e3ccSAndroid Build Coastguard Worker FN(64, 16, 16, 6, 4, opt, (int64_t), (int64_t)) 88*77c1e3ccSAndroid Build Coastguard Worker #else 89*77c1e3ccSAndroid Build Coastguard Worker #define FNS(opt) \ 90*77c1e3ccSAndroid Build Coastguard Worker FN(128, 128, 16, 7, 7, opt, (int64_t), (int64_t)) \ 91*77c1e3ccSAndroid Build Coastguard Worker FN(128, 64, 16, 7, 6, opt, (int64_t), (int64_t)) \ 92*77c1e3ccSAndroid Build Coastguard Worker FN(64, 128, 16, 6, 7, opt, (int64_t), (int64_t)) \ 93*77c1e3ccSAndroid Build Coastguard Worker FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)) \ 94*77c1e3ccSAndroid Build Coastguard Worker FN(64, 32, 16, 6, 5, opt, (int64_t), (int64_t)) \ 95*77c1e3ccSAndroid Build Coastguard Worker FN(32, 64, 16, 5, 6, opt, (int64_t), (int64_t)) \ 96*77c1e3ccSAndroid Build Coastguard Worker FN(32, 32, 16, 5, 5, opt, (int64_t), (int64_t)) \ 97*77c1e3ccSAndroid Build Coastguard Worker FN(32, 16, 16, 5, 4, opt, (int64_t), (int64_t)) \ 98*77c1e3ccSAndroid Build Coastguard Worker FN(16, 32, 16, 4, 5, opt, (int64_t), (int64_t)) \ 99*77c1e3ccSAndroid Build Coastguard Worker FN(16, 16, 16, 4, 4, opt, (uint32_t), (int64_t)) \ 100*77c1e3ccSAndroid Build Coastguard Worker FN(16, 8, 16, 4, 3, opt, (int32_t), (int32_t)) \ 101*77c1e3ccSAndroid Build Coastguard Worker FN(8, 16, 8, 3, 4, opt, (int32_t), (int32_t)) \ 102*77c1e3ccSAndroid Build Coastguard Worker FN(8, 8, 8, 3, 3, opt, (int32_t), (int32_t)) \ 103*77c1e3ccSAndroid Build Coastguard Worker FN(8, 4, 8, 3, 2, opt, (int32_t), (int32_t)) \ 104*77c1e3ccSAndroid Build Coastguard Worker FN(4, 8, 4, 2, 3, opt, (int32_t), (int32_t)) \ 105*77c1e3ccSAndroid Build Coastguard Worker FN(4, 4, 4, 2, 2, opt, (int32_t), (int32_t)) 106*77c1e3ccSAndroid Build Coastguard Worker #endif 107*77c1e3ccSAndroid Build Coastguard Worker 108*77c1e3ccSAndroid Build Coastguard Worker FNS(ssse3) 109*77c1e3ccSAndroid Build Coastguard Worker 110*77c1e3ccSAndroid Build Coastguard Worker #undef FNS 111*77c1e3ccSAndroid Build Coastguard Worker #undef FN 112*77c1e3ccSAndroid Build Coastguard Worker 113*77c1e3ccSAndroid Build Coastguard Worker // The 2 unused parameters are place holders for PIC enabled build. 114*77c1e3ccSAndroid Build Coastguard Worker #define DECL(w, opt) \ 115*77c1e3ccSAndroid Build Coastguard Worker int aom_sub_pixel_avg_variance##w##xh_##opt( \ 116*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ 117*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *sec, \ 118*77c1e3ccSAndroid Build Coastguard Worker ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \ 119*77c1e3ccSAndroid Build Coastguard Worker void *unused) 120*77c1e3ccSAndroid Build Coastguard Worker #define DECLS(opt) \ 121*77c1e3ccSAndroid Build Coastguard Worker DECL(4, opt); \ 122*77c1e3ccSAndroid Build Coastguard Worker DECL(8, opt); \ 123*77c1e3ccSAndroid Build Coastguard Worker DECL(16, opt) 124*77c1e3ccSAndroid Build Coastguard Worker 125*77c1e3ccSAndroid Build Coastguard Worker DECLS(ssse3); 126*77c1e3ccSAndroid Build Coastguard Worker #undef DECL 127*77c1e3ccSAndroid Build Coastguard Worker #undef DECLS 128*77c1e3ccSAndroid Build Coastguard Worker 129*77c1e3ccSAndroid Build Coastguard Worker #define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ 130*77c1e3ccSAndroid Build Coastguard Worker unsigned int aom_sub_pixel_avg_variance##w##x##h##_##opt( \ 131*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *src, int src_stride, int x_offset, int y_offset, \ 132*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *dst, int dst_stride, unsigned int *sse_ptr, \ 133*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *sec) { \ 134*77c1e3ccSAndroid Build Coastguard Worker /*Avoid overflow in helper by capping height.*/ \ 135*77c1e3ccSAndroid Build Coastguard Worker const int hf = AOMMIN(h, 64); \ 136*77c1e3ccSAndroid Build Coastguard Worker unsigned int sse = 0; \ 137*77c1e3ccSAndroid Build Coastguard Worker int se = 0; \ 138*77c1e3ccSAndroid Build Coastguard Worker for (int i = 0; i < (w / wf); ++i) { \ 139*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *src_ptr = src; \ 140*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *dst_ptr = dst; \ 141*77c1e3ccSAndroid Build Coastguard Worker const uint8_t *sec_ptr = sec; \ 142*77c1e3ccSAndroid Build Coastguard Worker for (int j = 0; j < (h / hf); ++j) { \ 143*77c1e3ccSAndroid Build Coastguard Worker unsigned int sse2; \ 144*77c1e3ccSAndroid Build Coastguard Worker const int se2 = aom_sub_pixel_avg_variance##wf##xh_##opt( \ 145*77c1e3ccSAndroid Build Coastguard Worker src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, \ 146*77c1e3ccSAndroid Build Coastguard Worker sec_ptr, w, hf, &sse2, NULL, NULL); \ 147*77c1e3ccSAndroid Build Coastguard Worker dst_ptr += hf * dst_stride; \ 148*77c1e3ccSAndroid Build Coastguard Worker src_ptr += hf * src_stride; \ 149*77c1e3ccSAndroid Build Coastguard Worker sec_ptr += hf * w; \ 150*77c1e3ccSAndroid Build Coastguard Worker se += se2; \ 151*77c1e3ccSAndroid Build Coastguard Worker sse += sse2; \ 152*77c1e3ccSAndroid Build Coastguard Worker } \ 153*77c1e3ccSAndroid Build Coastguard Worker src += wf; \ 154*77c1e3ccSAndroid Build Coastguard Worker dst += wf; \ 155*77c1e3ccSAndroid Build Coastguard Worker sec += wf; \ 156*77c1e3ccSAndroid Build Coastguard Worker } \ 157*77c1e3ccSAndroid Build Coastguard Worker *sse_ptr = sse; \ 158*77c1e3ccSAndroid Build Coastguard Worker return sse - (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \ 159*77c1e3ccSAndroid Build Coastguard Worker } 160*77c1e3ccSAndroid Build Coastguard Worker 161*77c1e3ccSAndroid Build Coastguard Worker #if !CONFIG_REALTIME_ONLY 162*77c1e3ccSAndroid Build Coastguard Worker #define FNS(opt) \ 163*77c1e3ccSAndroid Build Coastguard Worker FN(128, 128, 16, 7, 7, opt, (int64_t), (int64_t)) \ 164*77c1e3ccSAndroid Build Coastguard Worker FN(128, 64, 16, 7, 6, opt, (int64_t), (int64_t)) \ 165*77c1e3ccSAndroid Build Coastguard Worker FN(64, 128, 16, 6, 7, opt, (int64_t), (int64_t)) \ 166*77c1e3ccSAndroid Build Coastguard Worker FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)) \ 167*77c1e3ccSAndroid Build Coastguard Worker FN(64, 32, 16, 6, 5, opt, (int64_t), (int64_t)) \ 168*77c1e3ccSAndroid Build Coastguard Worker FN(32, 64, 16, 5, 6, opt, (int64_t), (int64_t)) \ 169*77c1e3ccSAndroid Build Coastguard Worker FN(32, 32, 16, 5, 5, opt, (int64_t), (int64_t)) \ 170*77c1e3ccSAndroid Build Coastguard Worker FN(32, 16, 16, 5, 4, opt, (int64_t), (int64_t)) \ 171*77c1e3ccSAndroid Build Coastguard Worker FN(16, 32, 16, 4, 5, opt, (int64_t), (int64_t)) \ 172*77c1e3ccSAndroid Build Coastguard Worker FN(16, 16, 16, 4, 4, opt, (uint32_t), (int64_t)) \ 173*77c1e3ccSAndroid Build Coastguard Worker FN(16, 8, 16, 4, 3, opt, (uint32_t), (int32_t)) \ 174*77c1e3ccSAndroid Build Coastguard Worker FN(8, 16, 8, 3, 4, opt, (uint32_t), (int32_t)) \ 175*77c1e3ccSAndroid Build Coastguard Worker FN(8, 8, 8, 3, 3, opt, (uint32_t), (int32_t)) \ 176*77c1e3ccSAndroid Build Coastguard Worker FN(8, 4, 8, 3, 2, opt, (uint32_t), (int32_t)) \ 177*77c1e3ccSAndroid Build Coastguard Worker FN(4, 8, 4, 2, 3, opt, (uint32_t), (int32_t)) \ 178*77c1e3ccSAndroid Build Coastguard Worker FN(4, 4, 4, 2, 2, opt, (uint32_t), (int32_t)) \ 179*77c1e3ccSAndroid Build Coastguard Worker FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)) \ 180*77c1e3ccSAndroid Build Coastguard Worker FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)) \ 181*77c1e3ccSAndroid Build Coastguard Worker FN(8, 32, 8, 3, 5, opt, (uint32_t), (int64_t)) \ 182*77c1e3ccSAndroid Build Coastguard Worker FN(32, 8, 16, 5, 3, opt, (uint32_t), (int64_t)) \ 183*77c1e3ccSAndroid Build Coastguard Worker FN(16, 64, 16, 4, 6, opt, (int64_t), (int64_t)) \ 184*77c1e3ccSAndroid Build Coastguard Worker FN(64, 16, 16, 6, 4, opt, (int64_t), (int64_t)) 185*77c1e3ccSAndroid Build Coastguard Worker #else 186*77c1e3ccSAndroid Build Coastguard Worker #define FNS(opt) \ 187*77c1e3ccSAndroid Build Coastguard Worker FN(128, 128, 16, 7, 7, opt, (int64_t), (int64_t)) \ 188*77c1e3ccSAndroid Build Coastguard Worker FN(128, 64, 16, 7, 6, opt, (int64_t), (int64_t)) \ 189*77c1e3ccSAndroid Build Coastguard Worker FN(64, 128, 16, 6, 7, opt, (int64_t), (int64_t)) \ 190*77c1e3ccSAndroid Build Coastguard Worker FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)) \ 191*77c1e3ccSAndroid Build Coastguard Worker FN(64, 32, 16, 6, 5, opt, (int64_t), (int64_t)) \ 192*77c1e3ccSAndroid Build Coastguard Worker FN(32, 64, 16, 5, 6, opt, (int64_t), (int64_t)) \ 193*77c1e3ccSAndroid Build Coastguard Worker FN(32, 32, 16, 5, 5, opt, (int64_t), (int64_t)) \ 194*77c1e3ccSAndroid Build Coastguard Worker FN(32, 16, 16, 5, 4, opt, (int64_t), (int64_t)) \ 195*77c1e3ccSAndroid Build Coastguard Worker FN(16, 32, 16, 4, 5, opt, (int64_t), (int64_t)) \ 196*77c1e3ccSAndroid Build Coastguard Worker FN(16, 16, 16, 4, 4, opt, (uint32_t), (int64_t)) \ 197*77c1e3ccSAndroid Build Coastguard Worker FN(16, 8, 16, 4, 3, opt, (uint32_t), (int32_t)) \ 198*77c1e3ccSAndroid Build Coastguard Worker FN(8, 16, 8, 3, 4, opt, (uint32_t), (int32_t)) \ 199*77c1e3ccSAndroid Build Coastguard Worker FN(8, 8, 8, 3, 3, opt, (uint32_t), (int32_t)) \ 200*77c1e3ccSAndroid Build Coastguard Worker FN(8, 4, 8, 3, 2, opt, (uint32_t), (int32_t)) \ 201*77c1e3ccSAndroid Build Coastguard Worker FN(4, 8, 4, 2, 3, opt, (uint32_t), (int32_t)) \ 202*77c1e3ccSAndroid Build Coastguard Worker FN(4, 4, 4, 2, 2, opt, (uint32_t), (int32_t)) 203*77c1e3ccSAndroid Build Coastguard Worker #endif 204*77c1e3ccSAndroid Build Coastguard Worker 205*77c1e3ccSAndroid Build Coastguard Worker FNS(ssse3) 206*77c1e3ccSAndroid Build Coastguard Worker 207*77c1e3ccSAndroid Build Coastguard Worker #undef FNS 208*77c1e3ccSAndroid Build Coastguard Worker #undef FN 209