1*fb1b10abSAndroid Build Coastguard Worker /* 2*fb1b10abSAndroid Build Coastguard Worker * Copyright (c) 2017 The WebM project authors. All Rights Reserved. 3*fb1b10abSAndroid Build Coastguard Worker * 4*fb1b10abSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license 5*fb1b10abSAndroid Build Coastguard Worker * that can be found in the LICENSE file in the root of the source 6*fb1b10abSAndroid Build Coastguard Worker * tree. An additional intellectual property rights grant can be found 7*fb1b10abSAndroid Build Coastguard Worker * in the file PATENTS. All contributing project authors may 8*fb1b10abSAndroid Build Coastguard Worker * be found in the AUTHORS file in the root of the source tree. 9*fb1b10abSAndroid Build Coastguard Worker */ 10*fb1b10abSAndroid Build Coastguard Worker 11*fb1b10abSAndroid Build Coastguard Worker #include <stdlib.h> 12*fb1b10abSAndroid Build Coastguard Worker 13*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h" 14*fb1b10abSAndroid Build Coastguard Worker 15*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/ppc/types_vsx.h" 16*fb1b10abSAndroid Build Coastguard Worker 17*fb1b10abSAndroid Build Coastguard Worker #include "vpx/vpx_integer.h" 18*fb1b10abSAndroid Build Coastguard Worker #include "vpx_ports/mem.h" 19*fb1b10abSAndroid Build Coastguard Worker 20*fb1b10abSAndroid Build Coastguard Worker #define PROCESS16(offset) \ 21*fb1b10abSAndroid Build Coastguard Worker v_a = vec_vsx_ld(offset, a); \ 22*fb1b10abSAndroid Build Coastguard Worker v_b = vec_vsx_ld(offset, b); \ 23*fb1b10abSAndroid Build Coastguard Worker v_abs = vec_absd(v_a, v_b); \ 24*fb1b10abSAndroid Build Coastguard Worker v_sad = vec_sum4s(v_abs, v_sad); 25*fb1b10abSAndroid Build Coastguard Worker 26*fb1b10abSAndroid Build Coastguard Worker #define SAD8(height) \ 27*fb1b10abSAndroid Build Coastguard Worker unsigned int vpx_sad8x##height##_vsx(const uint8_t *a, int a_stride, \ 28*fb1b10abSAndroid Build Coastguard Worker const uint8_t *b, int b_stride) { \ 29*fb1b10abSAndroid Build Coastguard Worker int y = 0; \ 30*fb1b10abSAndroid Build Coastguard Worker uint8x16_t v_a, v_b, v_abs; \ 31*fb1b10abSAndroid Build Coastguard Worker uint32x4_t v_sad = vec_zeros_u32; \ 32*fb1b10abSAndroid Build Coastguard Worker \ 33*fb1b10abSAndroid Build Coastguard Worker do { \ 34*fb1b10abSAndroid Build Coastguard Worker PROCESS16(0) \ 35*fb1b10abSAndroid Build Coastguard Worker \ 36*fb1b10abSAndroid Build Coastguard Worker a += a_stride; \ 37*fb1b10abSAndroid Build Coastguard Worker b += b_stride; \ 38*fb1b10abSAndroid Build Coastguard Worker y++; \ 39*fb1b10abSAndroid Build Coastguard Worker } while (y < height); \ 40*fb1b10abSAndroid Build Coastguard Worker \ 41*fb1b10abSAndroid Build Coastguard Worker return v_sad[1] + v_sad[0]; \ 42*fb1b10abSAndroid Build Coastguard Worker } 43*fb1b10abSAndroid Build Coastguard Worker 44*fb1b10abSAndroid Build Coastguard Worker #define SAD16(height) \ 45*fb1b10abSAndroid Build Coastguard Worker unsigned int vpx_sad16x##height##_vsx(const uint8_t *a, int a_stride, \ 46*fb1b10abSAndroid Build Coastguard Worker const uint8_t *b, int b_stride) { \ 47*fb1b10abSAndroid Build Coastguard Worker int y = 0; \ 48*fb1b10abSAndroid Build Coastguard Worker uint8x16_t v_a, v_b, v_abs; \ 49*fb1b10abSAndroid Build Coastguard Worker uint32x4_t v_sad = vec_zeros_u32; \ 50*fb1b10abSAndroid Build Coastguard Worker \ 51*fb1b10abSAndroid Build Coastguard Worker do { \ 52*fb1b10abSAndroid Build Coastguard Worker PROCESS16(0); \ 53*fb1b10abSAndroid Build Coastguard Worker \ 54*fb1b10abSAndroid Build Coastguard Worker a += a_stride; \ 55*fb1b10abSAndroid Build Coastguard Worker b += b_stride; \ 56*fb1b10abSAndroid Build Coastguard Worker y++; \ 57*fb1b10abSAndroid Build Coastguard Worker } while (y < height); \ 58*fb1b10abSAndroid Build Coastguard Worker \ 59*fb1b10abSAndroid Build Coastguard Worker return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ 60*fb1b10abSAndroid Build Coastguard Worker } 61*fb1b10abSAndroid Build Coastguard Worker 62*fb1b10abSAndroid Build Coastguard Worker #define SAD32(height) \ 63*fb1b10abSAndroid Build Coastguard Worker unsigned int vpx_sad32x##height##_vsx(const uint8_t *a, int a_stride, \ 64*fb1b10abSAndroid Build Coastguard Worker const uint8_t *b, int b_stride) { \ 65*fb1b10abSAndroid Build Coastguard Worker int y = 0; \ 66*fb1b10abSAndroid Build Coastguard Worker uint8x16_t v_a, v_b, v_abs; \ 67*fb1b10abSAndroid Build Coastguard Worker uint32x4_t v_sad = vec_zeros_u32; \ 68*fb1b10abSAndroid Build Coastguard Worker \ 69*fb1b10abSAndroid Build Coastguard Worker do { \ 70*fb1b10abSAndroid Build Coastguard Worker PROCESS16(0); \ 71*fb1b10abSAndroid Build Coastguard Worker PROCESS16(16); \ 72*fb1b10abSAndroid Build Coastguard Worker \ 73*fb1b10abSAndroid Build Coastguard Worker a += a_stride; \ 74*fb1b10abSAndroid Build Coastguard Worker b += b_stride; \ 75*fb1b10abSAndroid Build Coastguard Worker y++; \ 76*fb1b10abSAndroid Build Coastguard Worker } while (y < height); \ 77*fb1b10abSAndroid Build Coastguard Worker \ 78*fb1b10abSAndroid Build Coastguard Worker return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ 79*fb1b10abSAndroid Build Coastguard Worker } 80*fb1b10abSAndroid Build Coastguard Worker 81*fb1b10abSAndroid Build Coastguard Worker #define SAD64(height) \ 82*fb1b10abSAndroid Build Coastguard Worker unsigned int vpx_sad64x##height##_vsx(const uint8_t *a, int a_stride, \ 83*fb1b10abSAndroid Build Coastguard Worker const uint8_t *b, int b_stride) { \ 84*fb1b10abSAndroid Build Coastguard Worker int y = 0; \ 85*fb1b10abSAndroid Build Coastguard Worker uint8x16_t v_a, v_b, v_abs; \ 86*fb1b10abSAndroid Build Coastguard Worker uint32x4_t v_sad = vec_zeros_u32; \ 87*fb1b10abSAndroid Build Coastguard Worker \ 88*fb1b10abSAndroid Build Coastguard Worker do { \ 89*fb1b10abSAndroid Build Coastguard Worker PROCESS16(0); \ 90*fb1b10abSAndroid Build Coastguard Worker PROCESS16(16); \ 91*fb1b10abSAndroid Build Coastguard Worker PROCESS16(32); \ 92*fb1b10abSAndroid Build Coastguard Worker PROCESS16(48); \ 93*fb1b10abSAndroid Build Coastguard Worker \ 94*fb1b10abSAndroid Build Coastguard Worker a += a_stride; \ 95*fb1b10abSAndroid Build Coastguard Worker b += b_stride; \ 96*fb1b10abSAndroid Build Coastguard Worker y++; \ 97*fb1b10abSAndroid Build Coastguard Worker } while (y < height); \ 98*fb1b10abSAndroid Build Coastguard Worker \ 99*fb1b10abSAndroid Build Coastguard Worker return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ 100*fb1b10abSAndroid Build Coastguard Worker } 101*fb1b10abSAndroid Build Coastguard Worker 102*fb1b10abSAndroid Build Coastguard Worker SAD8(4); 103*fb1b10abSAndroid Build Coastguard Worker SAD8(8); 104*fb1b10abSAndroid Build Coastguard Worker SAD8(16); 105*fb1b10abSAndroid Build Coastguard Worker SAD16(8); 106*fb1b10abSAndroid Build Coastguard Worker SAD16(16); 107*fb1b10abSAndroid Build Coastguard Worker SAD16(32); 108*fb1b10abSAndroid Build Coastguard Worker SAD32(16); 109*fb1b10abSAndroid Build Coastguard Worker SAD32(32); 110*fb1b10abSAndroid Build Coastguard Worker SAD32(64); 111*fb1b10abSAndroid Build Coastguard Worker SAD64(32); 112*fb1b10abSAndroid Build Coastguard Worker SAD64(64); 113*fb1b10abSAndroid Build Coastguard Worker 114*fb1b10abSAndroid Build Coastguard Worker #define SAD16AVG(height) \ 115*fb1b10abSAndroid Build Coastguard Worker unsigned int vpx_sad16x##height##_avg_vsx( \ 116*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ 117*fb1b10abSAndroid Build Coastguard Worker const uint8_t *second_pred) { \ 118*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(16, uint8_t, comp_pred[16 * (height)]); \ 119*fb1b10abSAndroid Build Coastguard Worker vpx_comp_avg_pred_vsx(comp_pred, second_pred, 16, height, ref, \ 120*fb1b10abSAndroid Build Coastguard Worker ref_stride); \ 121*fb1b10abSAndroid Build Coastguard Worker \ 122*fb1b10abSAndroid Build Coastguard Worker return vpx_sad16x##height##_vsx(src, src_stride, comp_pred, 16); \ 123*fb1b10abSAndroid Build Coastguard Worker } 124*fb1b10abSAndroid Build Coastguard Worker 125*fb1b10abSAndroid Build Coastguard Worker #define SAD32AVG(height) \ 126*fb1b10abSAndroid Build Coastguard Worker unsigned int vpx_sad32x##height##_avg_vsx( \ 127*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ 128*fb1b10abSAndroid Build Coastguard Worker const uint8_t *second_pred) { \ 129*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(32, uint8_t, comp_pred[32 * (height)]); \ 130*fb1b10abSAndroid Build Coastguard Worker vpx_comp_avg_pred_vsx(comp_pred, second_pred, 32, height, ref, \ 131*fb1b10abSAndroid Build Coastguard Worker ref_stride); \ 132*fb1b10abSAndroid Build Coastguard Worker \ 133*fb1b10abSAndroid Build Coastguard Worker return vpx_sad32x##height##_vsx(src, src_stride, comp_pred, 32); \ 134*fb1b10abSAndroid Build Coastguard Worker } 135*fb1b10abSAndroid Build Coastguard Worker 136*fb1b10abSAndroid Build Coastguard Worker #define SAD64AVG(height) \ 137*fb1b10abSAndroid Build Coastguard Worker unsigned int vpx_sad64x##height##_avg_vsx( \ 138*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ 139*fb1b10abSAndroid Build Coastguard Worker const uint8_t *second_pred) { \ 140*fb1b10abSAndroid Build Coastguard Worker DECLARE_ALIGNED(64, uint8_t, comp_pred[64 * (height)]); \ 141*fb1b10abSAndroid Build Coastguard Worker vpx_comp_avg_pred_vsx(comp_pred, second_pred, 64, height, ref, \ 142*fb1b10abSAndroid Build Coastguard Worker ref_stride); \ 143*fb1b10abSAndroid Build Coastguard Worker return vpx_sad64x##height##_vsx(src, src_stride, comp_pred, 64); \ 144*fb1b10abSAndroid Build Coastguard Worker } 145*fb1b10abSAndroid Build Coastguard Worker 146*fb1b10abSAndroid Build Coastguard Worker SAD16AVG(8); 147*fb1b10abSAndroid Build Coastguard Worker SAD16AVG(16); 148*fb1b10abSAndroid Build Coastguard Worker SAD16AVG(32); 149*fb1b10abSAndroid Build Coastguard Worker SAD32AVG(16); 150*fb1b10abSAndroid Build Coastguard Worker SAD32AVG(32); 151*fb1b10abSAndroid Build Coastguard Worker SAD32AVG(64); 152*fb1b10abSAndroid Build Coastguard Worker SAD64AVG(32); 153*fb1b10abSAndroid Build Coastguard Worker SAD64AVG(64); 154*fb1b10abSAndroid Build Coastguard Worker 155*fb1b10abSAndroid Build Coastguard Worker #define PROCESS16_4D(offset, ref, v_h, v_l) \ 156*fb1b10abSAndroid Build Coastguard Worker v_b = vec_vsx_ld(offset, ref); \ 157*fb1b10abSAndroid Build Coastguard Worker v_bh = unpack_to_s16_h(v_b); \ 158*fb1b10abSAndroid Build Coastguard Worker v_bl = unpack_to_s16_l(v_b); \ 159*fb1b10abSAndroid Build Coastguard Worker v_subh = vec_sub(v_h, v_bh); \ 160*fb1b10abSAndroid Build Coastguard Worker v_subl = vec_sub(v_l, v_bl); \ 161*fb1b10abSAndroid Build Coastguard Worker v_absh = vec_abs(v_subh); \ 162*fb1b10abSAndroid Build Coastguard Worker v_absl = vec_abs(v_subl); \ 163*fb1b10abSAndroid Build Coastguard Worker v_sad = vec_sum4s(v_absh, v_sad); \ 164*fb1b10abSAndroid Build Coastguard Worker v_sad = vec_sum4s(v_absl, v_sad); 165*fb1b10abSAndroid Build Coastguard Worker 166*fb1b10abSAndroid Build Coastguard Worker #define UNPACK_SRC(offset, srcv_h, srcv_l) \ 167*fb1b10abSAndroid Build Coastguard Worker v_a = vec_vsx_ld(offset, src); \ 168*fb1b10abSAndroid Build Coastguard Worker srcv_h = unpack_to_s16_h(v_a); \ 169*fb1b10abSAndroid Build Coastguard Worker srcv_l = unpack_to_s16_l(v_a); 170*fb1b10abSAndroid Build Coastguard Worker 171*fb1b10abSAndroid Build Coastguard Worker #define SAD16_4D(height) \ 172*fb1b10abSAndroid Build Coastguard Worker void vpx_sad16x##height##x4d_vsx(const uint8_t *src, int src_stride, \ 173*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const ref_array[], \ 174*fb1b10abSAndroid Build Coastguard Worker int ref_stride, uint32_t *sad_array) { \ 175*fb1b10abSAndroid Build Coastguard Worker int i; \ 176*fb1b10abSAndroid Build Coastguard Worker int y; \ 177*fb1b10abSAndroid Build Coastguard Worker unsigned int sad[4]; \ 178*fb1b10abSAndroid Build Coastguard Worker uint8x16_t v_a, v_b; \ 179*fb1b10abSAndroid Build Coastguard Worker int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl; \ 180*fb1b10abSAndroid Build Coastguard Worker \ 181*fb1b10abSAndroid Build Coastguard Worker for (i = 0; i < 4; i++) sad_array[i] = 0; \ 182*fb1b10abSAndroid Build Coastguard Worker \ 183*fb1b10abSAndroid Build Coastguard Worker for (y = 0; y < height; y++) { \ 184*fb1b10abSAndroid Build Coastguard Worker UNPACK_SRC(y *src_stride, v_ah, v_al); \ 185*fb1b10abSAndroid Build Coastguard Worker for (i = 0; i < 4; i++) { \ 186*fb1b10abSAndroid Build Coastguard Worker int32x4_t v_sad = vec_splat_s32(0); \ 187*fb1b10abSAndroid Build Coastguard Worker PROCESS16_4D(y *ref_stride, ref_array[i], v_ah, v_al); \ 188*fb1b10abSAndroid Build Coastguard Worker \ 189*fb1b10abSAndroid Build Coastguard Worker vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ 190*fb1b10abSAndroid Build Coastguard Worker sad_array[i] += (sad[3] + sad[2] + sad[1] + sad[0]); \ 191*fb1b10abSAndroid Build Coastguard Worker } \ 192*fb1b10abSAndroid Build Coastguard Worker } \ 193*fb1b10abSAndroid Build Coastguard Worker } 194*fb1b10abSAndroid Build Coastguard Worker 195*fb1b10abSAndroid Build Coastguard Worker #define SAD32_4D(height) \ 196*fb1b10abSAndroid Build Coastguard Worker void vpx_sad32x##height##x4d_vsx(const uint8_t *src, int src_stride, \ 197*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const ref_array[], \ 198*fb1b10abSAndroid Build Coastguard Worker int ref_stride, uint32_t *sad_array) { \ 199*fb1b10abSAndroid Build Coastguard Worker int i; \ 200*fb1b10abSAndroid Build Coastguard Worker int y; \ 201*fb1b10abSAndroid Build Coastguard Worker unsigned int sad[4]; \ 202*fb1b10abSAndroid Build Coastguard Worker uint8x16_t v_a, v_b; \ 203*fb1b10abSAndroid Build Coastguard Worker int16x8_t v_ah1, v_al1, v_ah2, v_al2, v_bh, v_bl; \ 204*fb1b10abSAndroid Build Coastguard Worker int16x8_t v_absh, v_absl, v_subh, v_subl; \ 205*fb1b10abSAndroid Build Coastguard Worker \ 206*fb1b10abSAndroid Build Coastguard Worker for (i = 0; i < 4; i++) sad_array[i] = 0; \ 207*fb1b10abSAndroid Build Coastguard Worker \ 208*fb1b10abSAndroid Build Coastguard Worker for (y = 0; y < height; y++) { \ 209*fb1b10abSAndroid Build Coastguard Worker UNPACK_SRC(y *src_stride, v_ah1, v_al1); \ 210*fb1b10abSAndroid Build Coastguard Worker UNPACK_SRC(y *src_stride + 16, v_ah2, v_al2); \ 211*fb1b10abSAndroid Build Coastguard Worker for (i = 0; i < 4; i++) { \ 212*fb1b10abSAndroid Build Coastguard Worker int32x4_t v_sad = vec_splat_s32(0); \ 213*fb1b10abSAndroid Build Coastguard Worker PROCESS16_4D(y *ref_stride, ref_array[i], v_ah1, v_al1); \ 214*fb1b10abSAndroid Build Coastguard Worker PROCESS16_4D(y *ref_stride + 16, ref_array[i], v_ah2, v_al2); \ 215*fb1b10abSAndroid Build Coastguard Worker \ 216*fb1b10abSAndroid Build Coastguard Worker vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ 217*fb1b10abSAndroid Build Coastguard Worker sad_array[i] += (sad[3] + sad[2] + sad[1] + sad[0]); \ 218*fb1b10abSAndroid Build Coastguard Worker } \ 219*fb1b10abSAndroid Build Coastguard Worker } \ 220*fb1b10abSAndroid Build Coastguard Worker } 221*fb1b10abSAndroid Build Coastguard Worker 222*fb1b10abSAndroid Build Coastguard Worker #define SAD64_4D(height) \ 223*fb1b10abSAndroid Build Coastguard Worker void vpx_sad64x##height##x4d_vsx(const uint8_t *src, int src_stride, \ 224*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const ref_array[], \ 225*fb1b10abSAndroid Build Coastguard Worker int ref_stride, uint32_t *sad_array) { \ 226*fb1b10abSAndroid Build Coastguard Worker int i; \ 227*fb1b10abSAndroid Build Coastguard Worker int y; \ 228*fb1b10abSAndroid Build Coastguard Worker unsigned int sad[4]; \ 229*fb1b10abSAndroid Build Coastguard Worker uint8x16_t v_a, v_b; \ 230*fb1b10abSAndroid Build Coastguard Worker int16x8_t v_ah1, v_al1, v_ah2, v_al2, v_bh, v_bl; \ 231*fb1b10abSAndroid Build Coastguard Worker int16x8_t v_ah3, v_al3, v_ah4, v_al4; \ 232*fb1b10abSAndroid Build Coastguard Worker int16x8_t v_absh, v_absl, v_subh, v_subl; \ 233*fb1b10abSAndroid Build Coastguard Worker \ 234*fb1b10abSAndroid Build Coastguard Worker for (i = 0; i < 4; i++) sad_array[i] = 0; \ 235*fb1b10abSAndroid Build Coastguard Worker \ 236*fb1b10abSAndroid Build Coastguard Worker for (y = 0; y < height; y++) { \ 237*fb1b10abSAndroid Build Coastguard Worker UNPACK_SRC(y *src_stride, v_ah1, v_al1); \ 238*fb1b10abSAndroid Build Coastguard Worker UNPACK_SRC(y *src_stride + 16, v_ah2, v_al2); \ 239*fb1b10abSAndroid Build Coastguard Worker UNPACK_SRC(y *src_stride + 32, v_ah3, v_al3); \ 240*fb1b10abSAndroid Build Coastguard Worker UNPACK_SRC(y *src_stride + 48, v_ah4, v_al4); \ 241*fb1b10abSAndroid Build Coastguard Worker for (i = 0; i < 4; i++) { \ 242*fb1b10abSAndroid Build Coastguard Worker int32x4_t v_sad = vec_splat_s32(0); \ 243*fb1b10abSAndroid Build Coastguard Worker PROCESS16_4D(y *ref_stride, ref_array[i], v_ah1, v_al1); \ 244*fb1b10abSAndroid Build Coastguard Worker PROCESS16_4D(y *ref_stride + 16, ref_array[i], v_ah2, v_al2); \ 245*fb1b10abSAndroid Build Coastguard Worker PROCESS16_4D(y *ref_stride + 32, ref_array[i], v_ah3, v_al3); \ 246*fb1b10abSAndroid Build Coastguard Worker PROCESS16_4D(y *ref_stride + 48, ref_array[i], v_ah4, v_al4); \ 247*fb1b10abSAndroid Build Coastguard Worker \ 248*fb1b10abSAndroid Build Coastguard Worker vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ 249*fb1b10abSAndroid Build Coastguard Worker sad_array[i] += (sad[3] + sad[2] + sad[1] + sad[0]); \ 250*fb1b10abSAndroid Build Coastguard Worker } \ 251*fb1b10abSAndroid Build Coastguard Worker } \ 252*fb1b10abSAndroid Build Coastguard Worker } 253*fb1b10abSAndroid Build Coastguard Worker 254*fb1b10abSAndroid Build Coastguard Worker SAD16_4D(8); 255*fb1b10abSAndroid Build Coastguard Worker SAD16_4D(16); 256*fb1b10abSAndroid Build Coastguard Worker SAD16_4D(32); 257*fb1b10abSAndroid Build Coastguard Worker SAD32_4D(16); 258*fb1b10abSAndroid Build Coastguard Worker SAD32_4D(32); 259*fb1b10abSAndroid Build Coastguard Worker SAD32_4D(64); 260*fb1b10abSAndroid Build Coastguard Worker SAD64_4D(32); 261*fb1b10abSAndroid Build Coastguard Worker SAD64_4D(64); 262