xref: /aosp_15_r20/external/libaom/av1/encoder/wedge_utils.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1*77c1e3ccSAndroid Build Coastguard Worker /*
2*77c1e3ccSAndroid Build Coastguard Worker  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3*77c1e3ccSAndroid Build Coastguard Worker  *
4*77c1e3ccSAndroid Build Coastguard Worker  * This source code is subject to the terms of the BSD 2 Clause License and
5*77c1e3ccSAndroid Build Coastguard Worker  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6*77c1e3ccSAndroid Build Coastguard Worker  * was not distributed with this source code in the LICENSE file, you can
7*77c1e3ccSAndroid Build Coastguard Worker  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8*77c1e3ccSAndroid Build Coastguard Worker  * Media Patent License 1.0 was not distributed with this source code in the
9*77c1e3ccSAndroid Build Coastguard Worker  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*77c1e3ccSAndroid Build Coastguard Worker  */
11*77c1e3ccSAndroid Build Coastguard Worker 
12*77c1e3ccSAndroid Build Coastguard Worker #include <assert.h>
13*77c1e3ccSAndroid Build Coastguard Worker 
14*77c1e3ccSAndroid Build Coastguard Worker #include "aom/aom_integer.h"
15*77c1e3ccSAndroid Build Coastguard Worker 
16*77c1e3ccSAndroid Build Coastguard Worker #include "aom_ports/mem.h"
17*77c1e3ccSAndroid Build Coastguard Worker 
18*77c1e3ccSAndroid Build Coastguard Worker #include "aom_dsp/aom_dsp_common.h"
19*77c1e3ccSAndroid Build Coastguard Worker 
20*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/reconinter.h"
21*77c1e3ccSAndroid Build Coastguard Worker 
22*77c1e3ccSAndroid Build Coastguard Worker #define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
23*77c1e3ccSAndroid Build Coastguard Worker 
24*77c1e3ccSAndroid Build Coastguard Worker /**
25*77c1e3ccSAndroid Build Coastguard Worker  * Computes SSE of a compound predictor constructed from 2 fundamental
26*77c1e3ccSAndroid Build Coastguard Worker  * predictors p0 and p1 using blending with mask.
27*77c1e3ccSAndroid Build Coastguard Worker  *
28*77c1e3ccSAndroid Build Coastguard Worker  * r1:  Residuals of p1.
29*77c1e3ccSAndroid Build Coastguard Worker  *      (source - p1)
30*77c1e3ccSAndroid Build Coastguard Worker  * d:   Difference of p1 and p0.
31*77c1e3ccSAndroid Build Coastguard Worker  *      (p1 - p0)
32*77c1e3ccSAndroid Build Coastguard Worker  * m:   The blending mask
33*77c1e3ccSAndroid Build Coastguard Worker  * N:   Number of pixels
34*77c1e3ccSAndroid Build Coastguard Worker  *
35*77c1e3ccSAndroid Build Coastguard Worker  * 'r1', 'd', and 'm' are contiguous.
36*77c1e3ccSAndroid Build Coastguard Worker  *
37*77c1e3ccSAndroid Build Coastguard Worker  * Computes:
38*77c1e3ccSAndroid Build Coastguard Worker  *  Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to:
39*77c1e3ccSAndroid Build Coastguard Worker  *  Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2),
40*77c1e3ccSAndroid Build Coastguard Worker  *    where r0 is (source - p0), and r1 is (source - p1), which is in turn
41*77c1e3ccSAndroid Build Coastguard Worker  *    is equivalent to:
42*77c1e3ccSAndroid Build Coastguard Worker  *  Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2),
43*77c1e3ccSAndroid Build Coastguard Worker  *    which is the SSE of the residuals of the compound predictor scaled up by
44*77c1e3ccSAndroid Build Coastguard Worker  *    MAX_MASK_VALUE**2.
45*77c1e3ccSAndroid Build Coastguard Worker  *
46*77c1e3ccSAndroid Build Coastguard Worker  * Note that we clamp the partial term in the loop to 16 bits signed. This is
47*77c1e3ccSAndroid Build Coastguard Worker  * to facilitate equivalent SIMD implementation. It should have no effect if
48*77c1e3ccSAndroid Build Coastguard Worker  * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always
49*77c1e3ccSAndroid Build Coastguard Worker  * holds for 8 bit input, and on real input, it should hold practically always,
50*77c1e3ccSAndroid Build Coastguard Worker  * as residuals are expected to be small.
51*77c1e3ccSAndroid Build Coastguard Worker  */
av1_wedge_sse_from_residuals_c(const int16_t * r1,const int16_t * d,const uint8_t * m,int N)52*77c1e3ccSAndroid Build Coastguard Worker uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
53*77c1e3ccSAndroid Build Coastguard Worker                                         const uint8_t *m, int N) {
54*77c1e3ccSAndroid Build Coastguard Worker   uint64_t csse = 0;
55*77c1e3ccSAndroid Build Coastguard Worker   int i;
56*77c1e3ccSAndroid Build Coastguard Worker 
57*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < N; i++) {
58*77c1e3ccSAndroid Build Coastguard Worker     int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
59*77c1e3ccSAndroid Build Coastguard Worker     t = clamp(t, INT16_MIN, INT16_MAX);
60*77c1e3ccSAndroid Build Coastguard Worker     csse += t * t;
61*77c1e3ccSAndroid Build Coastguard Worker   }
62*77c1e3ccSAndroid Build Coastguard Worker   return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
63*77c1e3ccSAndroid Build Coastguard Worker }
64*77c1e3ccSAndroid Build Coastguard Worker 
65*77c1e3ccSAndroid Build Coastguard Worker /**
66*77c1e3ccSAndroid Build Coastguard Worker  * Choose the mask sign for a compound predictor.
67*77c1e3ccSAndroid Build Coastguard Worker  *
68*77c1e3ccSAndroid Build Coastguard Worker  * ds:    Difference of the squares of the residuals.
69*77c1e3ccSAndroid Build Coastguard Worker  *        r0**2 - r1**2
70*77c1e3ccSAndroid Build Coastguard Worker  * m:     The blending mask
71*77c1e3ccSAndroid Build Coastguard Worker  * N:     Number of pixels
72*77c1e3ccSAndroid Build Coastguard Worker  * limit: Pre-computed threshold value.
73*77c1e3ccSAndroid Build Coastguard Worker  *        MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
74*77c1e3ccSAndroid Build Coastguard Worker  *
75*77c1e3ccSAndroid Build Coastguard Worker  * 'ds' and 'm' are contiguous.
76*77c1e3ccSAndroid Build Coastguard Worker  *
77*77c1e3ccSAndroid Build Coastguard Worker  * Returns true if the negated mask has lower SSE compared to the positive
78*77c1e3ccSAndroid Build Coastguard Worker  * mask. Computation is based on:
79*77c1e3ccSAndroid Build Coastguard Worker  *  Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2)
80*77c1e3ccSAndroid Build Coastguard Worker  *                                     >
81*77c1e3ccSAndroid Build Coastguard Worker  *                                Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2)
82*77c1e3ccSAndroid Build Coastguard Worker  *
83*77c1e3ccSAndroid Build Coastguard Worker  *  which can be simplified to:
84*77c1e3ccSAndroid Build Coastguard Worker  *
85*77c1e3ccSAndroid Build Coastguard Worker  *  Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2))
86*77c1e3ccSAndroid Build Coastguard Worker  *
87*77c1e3ccSAndroid Build Coastguard Worker  *  The right hand side does not depend on the mask, and needs to be passed as
88*77c1e3ccSAndroid Build Coastguard Worker  *  the 'limit' parameter.
89*77c1e3ccSAndroid Build Coastguard Worker  *
90*77c1e3ccSAndroid Build Coastguard Worker  *  After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left
91*77c1e3ccSAndroid Build Coastguard Worker  *  hand side is simply a scalar product between an int16_t and uint8_t vector.
92*77c1e3ccSAndroid Build Coastguard Worker  *
93*77c1e3ccSAndroid Build Coastguard Worker  *  Note that for efficiency, ds is stored on 16 bits. Real input residuals
94*77c1e3ccSAndroid Build Coastguard Worker  *  being small, this should not cause a noticeable issue.
95*77c1e3ccSAndroid Build Coastguard Worker  */
av1_wedge_sign_from_residuals_c(const int16_t * ds,const uint8_t * m,int N,int64_t limit)96*77c1e3ccSAndroid Build Coastguard Worker int8_t av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m,
97*77c1e3ccSAndroid Build Coastguard Worker                                        int N, int64_t limit) {
98*77c1e3ccSAndroid Build Coastguard Worker   int64_t acc = 0;
99*77c1e3ccSAndroid Build Coastguard Worker 
100*77c1e3ccSAndroid Build Coastguard Worker   do {
101*77c1e3ccSAndroid Build Coastguard Worker     acc += *ds++ * *m++;
102*77c1e3ccSAndroid Build Coastguard Worker   } while (--N);
103*77c1e3ccSAndroid Build Coastguard Worker 
104*77c1e3ccSAndroid Build Coastguard Worker   return acc > limit;
105*77c1e3ccSAndroid Build Coastguard Worker }
106*77c1e3ccSAndroid Build Coastguard Worker 
107*77c1e3ccSAndroid Build Coastguard Worker /**
108*77c1e3ccSAndroid Build Coastguard Worker  * Compute the element-wise difference of the squares of 2 arrays.
109*77c1e3ccSAndroid Build Coastguard Worker  *
110*77c1e3ccSAndroid Build Coastguard Worker  * d: Difference of the squares of the inputs: a**2 - b**2
111*77c1e3ccSAndroid Build Coastguard Worker  * a: First input array
112*77c1e3ccSAndroid Build Coastguard Worker  * b: Second input array
113*77c1e3ccSAndroid Build Coastguard Worker  * N: Number of elements
114*77c1e3ccSAndroid Build Coastguard Worker  *
115*77c1e3ccSAndroid Build Coastguard Worker  * 'd', 'a', and 'b' are contiguous.
116*77c1e3ccSAndroid Build Coastguard Worker  *
117*77c1e3ccSAndroid Build Coastguard Worker  * The result is saturated to signed 16 bits.
118*77c1e3ccSAndroid Build Coastguard Worker  */
av1_wedge_compute_delta_squares_c(int16_t * d,const int16_t * a,const int16_t * b,int N)119*77c1e3ccSAndroid Build Coastguard Worker void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a,
120*77c1e3ccSAndroid Build Coastguard Worker                                        const int16_t *b, int N) {
121*77c1e3ccSAndroid Build Coastguard Worker   int i;
122*77c1e3ccSAndroid Build Coastguard Worker 
123*77c1e3ccSAndroid Build Coastguard Worker   for (i = 0; i < N; i++)
124*77c1e3ccSAndroid Build Coastguard Worker     d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
125*77c1e3ccSAndroid Build Coastguard Worker }
126