xref: /aosp_15_r20/external/libvpx/vpx_dsp/ppc/subtract_vsx.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker  *  Copyright (c) 2018 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker  *
4*fb1b10abSAndroid Build Coastguard Worker  *  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker  *  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker  *  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker  *  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker  *  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker  */
10*fb1b10abSAndroid Build Coastguard Worker 
11*fb1b10abSAndroid Build Coastguard Worker #include <assert.h>
12*fb1b10abSAndroid Build Coastguard Worker 
13*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_config.h"
14*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
15*fb1b10abSAndroid Build Coastguard Worker #include "vpx/vpx_integer.h"
16*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/ppc/types_vsx.h"
17*fb1b10abSAndroid Build Coastguard Worker 
subtract_block4x4(int16_t * diff,ptrdiff_t diff_stride,const uint8_t * src,ptrdiff_t src_stride,const uint8_t * pred,ptrdiff_t pred_stride)18*fb1b10abSAndroid Build Coastguard Worker static VPX_FORCE_INLINE void subtract_block4x4(
19*fb1b10abSAndroid Build Coastguard Worker     int16_t *diff, ptrdiff_t diff_stride, const uint8_t *src,
20*fb1b10abSAndroid Build Coastguard Worker     ptrdiff_t src_stride, const uint8_t *pred, ptrdiff_t pred_stride) {
21*fb1b10abSAndroid Build Coastguard Worker   int16_t *diff1 = diff + 2 * diff_stride;
22*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *src1 = src + 2 * src_stride;
23*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *pred1 = pred + 2 * pred_stride;
24*fb1b10abSAndroid Build Coastguard Worker 
25*fb1b10abSAndroid Build Coastguard Worker   const int16x8_t d0 = vec_vsx_ld(0, diff);
26*fb1b10abSAndroid Build Coastguard Worker   const int16x8_t d1 = vec_vsx_ld(0, diff + diff_stride);
27*fb1b10abSAndroid Build Coastguard Worker   const int16x8_t d2 = vec_vsx_ld(0, diff1);
28*fb1b10abSAndroid Build Coastguard Worker   const int16x8_t d3 = vec_vsx_ld(0, diff1 + diff_stride);
29*fb1b10abSAndroid Build Coastguard Worker 
30*fb1b10abSAndroid Build Coastguard Worker   const uint8x16_t s0 = read4x2(src, (int)src_stride);
31*fb1b10abSAndroid Build Coastguard Worker   const uint8x16_t p0 = read4x2(pred, (int)pred_stride);
32*fb1b10abSAndroid Build Coastguard Worker   const uint8x16_t s1 = read4x2(src1, (int)src_stride);
33*fb1b10abSAndroid Build Coastguard Worker   const uint8x16_t p1 = read4x2(pred1, (int)pred_stride);
34*fb1b10abSAndroid Build Coastguard Worker 
35*fb1b10abSAndroid Build Coastguard Worker   const int16x8_t da = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
36*fb1b10abSAndroid Build Coastguard Worker   const int16x8_t db = vec_sub(unpack_to_s16_h(s1), unpack_to_s16_h(p1));
37*fb1b10abSAndroid Build Coastguard Worker 
38*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(xxpermdi(da, d0, 1), 0, diff);
39*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(xxpermdi(da, d1, 3), 0, diff + diff_stride);
40*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(xxpermdi(db, d2, 1), 0, diff1);
41*fb1b10abSAndroid Build Coastguard Worker   vec_vsx_st(xxpermdi(db, d3, 3), 0, diff1 + diff_stride);
42*fb1b10abSAndroid Build Coastguard Worker }
43*fb1b10abSAndroid Build Coastguard Worker 
vpx_subtract_block_vsx(int rows,int cols,int16_t * diff,ptrdiff_t diff_stride,const uint8_t * src,ptrdiff_t src_stride,const uint8_t * pred,ptrdiff_t pred_stride)44*fb1b10abSAndroid Build Coastguard Worker void vpx_subtract_block_vsx(int rows, int cols, int16_t *diff,
45*fb1b10abSAndroid Build Coastguard Worker                             ptrdiff_t diff_stride, const uint8_t *src,
46*fb1b10abSAndroid Build Coastguard Worker                             ptrdiff_t src_stride, const uint8_t *pred,
47*fb1b10abSAndroid Build Coastguard Worker                             ptrdiff_t pred_stride) {
48*fb1b10abSAndroid Build Coastguard Worker   int r = rows, c;
49*fb1b10abSAndroid Build Coastguard Worker 
50*fb1b10abSAndroid Build Coastguard Worker   switch (cols) {
51*fb1b10abSAndroid Build Coastguard Worker     case 64:
52*fb1b10abSAndroid Build Coastguard Worker     case 32:
53*fb1b10abSAndroid Build Coastguard Worker       do {
54*fb1b10abSAndroid Build Coastguard Worker         for (c = 0; c < cols; c += 32) {
55*fb1b10abSAndroid Build Coastguard Worker           const uint8x16_t s0 = vec_vsx_ld(0, src + c);
56*fb1b10abSAndroid Build Coastguard Worker           const uint8x16_t s1 = vec_vsx_ld(16, src + c);
57*fb1b10abSAndroid Build Coastguard Worker           const uint8x16_t p0 = vec_vsx_ld(0, pred + c);
58*fb1b10abSAndroid Build Coastguard Worker           const uint8x16_t p1 = vec_vsx_ld(16, pred + c);
59*fb1b10abSAndroid Build Coastguard Worker           const int16x8_t d0l =
60*fb1b10abSAndroid Build Coastguard Worker               vec_sub(unpack_to_s16_l(s0), unpack_to_s16_l(p0));
61*fb1b10abSAndroid Build Coastguard Worker           const int16x8_t d0h =
62*fb1b10abSAndroid Build Coastguard Worker               vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
63*fb1b10abSAndroid Build Coastguard Worker           const int16x8_t d1l =
64*fb1b10abSAndroid Build Coastguard Worker               vec_sub(unpack_to_s16_l(s1), unpack_to_s16_l(p1));
65*fb1b10abSAndroid Build Coastguard Worker           const int16x8_t d1h =
66*fb1b10abSAndroid Build Coastguard Worker               vec_sub(unpack_to_s16_h(s1), unpack_to_s16_h(p1));
67*fb1b10abSAndroid Build Coastguard Worker           vec_vsx_st(d0h, 0, diff + c);
68*fb1b10abSAndroid Build Coastguard Worker           vec_vsx_st(d0l, 16, diff + c);
69*fb1b10abSAndroid Build Coastguard Worker           vec_vsx_st(d1h, 0, diff + c + 16);
70*fb1b10abSAndroid Build Coastguard Worker           vec_vsx_st(d1l, 16, diff + c + 16);
71*fb1b10abSAndroid Build Coastguard Worker         }
72*fb1b10abSAndroid Build Coastguard Worker         diff += diff_stride;
73*fb1b10abSAndroid Build Coastguard Worker         pred += pred_stride;
74*fb1b10abSAndroid Build Coastguard Worker         src += src_stride;
75*fb1b10abSAndroid Build Coastguard Worker       } while (--r);
76*fb1b10abSAndroid Build Coastguard Worker       break;
77*fb1b10abSAndroid Build Coastguard Worker     case 16:
78*fb1b10abSAndroid Build Coastguard Worker       do {
79*fb1b10abSAndroid Build Coastguard Worker         const uint8x16_t s0 = vec_vsx_ld(0, src);
80*fb1b10abSAndroid Build Coastguard Worker         const uint8x16_t p0 = vec_vsx_ld(0, pred);
81*fb1b10abSAndroid Build Coastguard Worker         const int16x8_t d0l = vec_sub(unpack_to_s16_l(s0), unpack_to_s16_l(p0));
82*fb1b10abSAndroid Build Coastguard Worker         const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
83*fb1b10abSAndroid Build Coastguard Worker         vec_vsx_st(d0h, 0, diff);
84*fb1b10abSAndroid Build Coastguard Worker         vec_vsx_st(d0l, 16, diff);
85*fb1b10abSAndroid Build Coastguard Worker         diff += diff_stride;
86*fb1b10abSAndroid Build Coastguard Worker         pred += pred_stride;
87*fb1b10abSAndroid Build Coastguard Worker         src += src_stride;
88*fb1b10abSAndroid Build Coastguard Worker       } while (--r);
89*fb1b10abSAndroid Build Coastguard Worker       break;
90*fb1b10abSAndroid Build Coastguard Worker     case 8:
91*fb1b10abSAndroid Build Coastguard Worker       do {
92*fb1b10abSAndroid Build Coastguard Worker         const uint8x16_t s0 = vec_vsx_ld(0, src);
93*fb1b10abSAndroid Build Coastguard Worker         const uint8x16_t p0 = vec_vsx_ld(0, pred);
94*fb1b10abSAndroid Build Coastguard Worker         const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0));
95*fb1b10abSAndroid Build Coastguard Worker         vec_vsx_st(d0h, 0, diff);
96*fb1b10abSAndroid Build Coastguard Worker         diff += diff_stride;
97*fb1b10abSAndroid Build Coastguard Worker         pred += pred_stride;
98*fb1b10abSAndroid Build Coastguard Worker         src += src_stride;
99*fb1b10abSAndroid Build Coastguard Worker       } while (--r);
100*fb1b10abSAndroid Build Coastguard Worker       break;
101*fb1b10abSAndroid Build Coastguard Worker     case 4:
102*fb1b10abSAndroid Build Coastguard Worker       subtract_block4x4(diff, diff_stride, src, src_stride, pred, pred_stride);
103*fb1b10abSAndroid Build Coastguard Worker       if (r > 4) {
104*fb1b10abSAndroid Build Coastguard Worker         diff += 4 * diff_stride;
105*fb1b10abSAndroid Build Coastguard Worker         pred += 4 * pred_stride;
106*fb1b10abSAndroid Build Coastguard Worker         src += 4 * src_stride;
107*fb1b10abSAndroid Build Coastguard Worker 
108*fb1b10abSAndroid Build Coastguard Worker         subtract_block4x4(diff, diff_stride,
109*fb1b10abSAndroid Build Coastguard Worker 
110*fb1b10abSAndroid Build Coastguard Worker                           src, src_stride,
111*fb1b10abSAndroid Build Coastguard Worker 
112*fb1b10abSAndroid Build Coastguard Worker                           pred, pred_stride);
113*fb1b10abSAndroid Build Coastguard Worker       }
114*fb1b10abSAndroid Build Coastguard Worker       break;
115*fb1b10abSAndroid Build Coastguard Worker     default: assert(0);  // unreachable
116*fb1b10abSAndroid Build Coastguard Worker   }
117*fb1b10abSAndroid Build Coastguard Worker }
118