xref: /aosp_15_r20/external/libdav1d/src/wedge.c (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "config.h"
29 
30 #include <stdint.h>
31 #include <string.h>
32 
33 #include "common/intops.h"
34 
35 #include "src/wedge.h"
36 
37 enum WedgeDirectionType {
38     WEDGE_HORIZONTAL = 0,
39     WEDGE_VERTICAL = 1,
40     WEDGE_OBLIQUE27 = 2,
41     WEDGE_OBLIQUE63 = 3,
42     WEDGE_OBLIQUE117 = 4,
43     WEDGE_OBLIQUE153 = 5,
44     N_WEDGE_DIRECTIONS
45 };
46 
47 typedef struct {
48     uint8_t /* enum WedgeDirectionType */ direction;
49     uint8_t x_offset;
50     uint8_t y_offset;
51 } wedge_code_type;
52 
53 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
54     { WEDGE_OBLIQUE27,  4, 4 }, { WEDGE_OBLIQUE63,  4, 4 },
55     { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
56     { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
57     { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL,   4, 4 },
58     { WEDGE_OBLIQUE27,  4, 2 }, { WEDGE_OBLIQUE27,  4, 6 },
59     { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
60     { WEDGE_OBLIQUE63,  2, 4 }, { WEDGE_OBLIQUE63,  6, 4 },
61     { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
62 };
63 
64 static const wedge_code_type wedge_codebook_16_hltw[16] = {
65     { WEDGE_OBLIQUE27,  4, 4 }, { WEDGE_OBLIQUE63,  4, 4 },
66     { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
67     { WEDGE_VERTICAL,   2, 4 }, { WEDGE_VERTICAL,   4, 4 },
68     { WEDGE_VERTICAL,   6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
69     { WEDGE_OBLIQUE27,  4, 2 }, { WEDGE_OBLIQUE27,  4, 6 },
70     { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
71     { WEDGE_OBLIQUE63,  2, 4 }, { WEDGE_OBLIQUE63,  6, 4 },
72     { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
73 };
74 
75 static const wedge_code_type wedge_codebook_16_heqw[16] = {
76     { WEDGE_OBLIQUE27,  4, 4 }, { WEDGE_OBLIQUE63,  4, 4 },
77     { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
78     { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
79     { WEDGE_VERTICAL,   2, 4 }, { WEDGE_VERTICAL,   6, 4 },
80     { WEDGE_OBLIQUE27,  4, 2 }, { WEDGE_OBLIQUE27,  4, 6 },
81     { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
82     { WEDGE_OBLIQUE63,  2, 4 }, { WEDGE_OBLIQUE63,  6, 4 },
83     { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
84 };
85 
86 Dav1dMasks dav1d_masks;
87 
insert_border(uint8_t * const dst,const uint8_t * const src,const int ctr)88 static void insert_border(uint8_t *const dst, const uint8_t *const src,
89                           const int ctr)
90 {
91     if (ctr > 4) memset(dst, 0, ctr - 4);
92     memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8));
93     if (ctr < 64 - 4)
94         memset(dst + ctr + 4, 64, 64 - 4 - ctr);
95 }
96 
transpose(uint8_t * const dst,const uint8_t * const src)97 static void transpose(uint8_t *const dst, const uint8_t *const src) {
98     for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
99         for (int x = 0, x_off = 0; x < 64; x++, x_off += 64)
100             dst[x_off + y] = src[y_off + x];
101 }
102 
hflip(uint8_t * const dst,const uint8_t * const src)103 static void hflip(uint8_t *const dst, const uint8_t *const src) {
104     for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
105         for (int x = 0; x < 64; x++)
106             dst[y_off + 64 - 1 - x] = src[y_off + x];
107 }
108 
copy2d(uint8_t * dst,const uint8_t * src,int sign,const int w,const int h,const int x_off,const int y_off)109 static void copy2d(uint8_t *dst, const uint8_t *src, int sign,
110                    const int w, const int h, const int x_off, const int y_off)
111 {
112     src += y_off * 64 + x_off;
113     if (sign) {
114         for (int y = 0; y < h; y++) {
115             for (int x = 0; x < w; x++)
116                 dst[x] = 64 - src[x];
117             src += 64;
118             dst += w;
119         }
120     } else {
121         for (int y = 0; y < h; y++) {
122             memcpy(dst, src, w);
123             src += 64;
124             dst += w;
125         }
126     }
127 }
128 
129 #define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
130 
init_chroma(uint8_t * chroma,const uint8_t * luma,const int sign,const int w,const int h,const int ss_ver)131 static COLD uint16_t init_chroma(uint8_t *chroma, const uint8_t *luma,
132                                  const int sign, const int w, const int h,
133                                  const int ss_ver)
134 {
135     const uint16_t offset = MASK_OFFSET(chroma);
136     for (int y = 0; y < h; y += 1 + ss_ver) {
137         for (int x = 0; x < w; x += 2) {
138             int sum = luma[x] + luma[x + 1] + 1;
139             if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1;
140             chroma[x >> 1] = (sum - sign) >> (1 + ss_ver);
141         }
142         luma += w << ss_ver;
143         chroma += w >> 1;
144     }
145     return offset;
146 }
147 
fill2d_16x2(const int w,const int h,const enum BlockSize bs,const uint8_t (* const master)[64* 64],const wedge_code_type * const cb,uint8_t * masks_444,uint8_t * masks_422,uint8_t * masks_420,unsigned signs)148 static COLD void fill2d_16x2(const int w, const int h, const enum BlockSize bs,
149                              const uint8_t (*const master)[64 * 64],
150                              const wedge_code_type *const cb,
151                              uint8_t *masks_444, uint8_t *masks_422,
152                              uint8_t *masks_420, unsigned signs)
153 {
154     const int n_stride_444 = (w * h);
155     const int n_stride_422 = n_stride_444 >> 1;
156     const int n_stride_420 = n_stride_444 >> 2;
157     const int sign_stride_422 = 16 * n_stride_422;
158     const int sign_stride_420 = 16 * n_stride_420;
159 
160     // assign pointer offsets in lookup table
161     for (int n = 0; n < 16; n++) {
162         const int sign = signs & 1;
163 
164         copy2d(masks_444, master[cb[n].direction], sign, w, h,
165                32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3));
166 
167         // not using !sign is intentional here, since 444 does not require
168         // any rounding since no chroma subsampling is applied.
169         dav1d_masks.offsets[0][bs].wedge[0][n] =
170         dav1d_masks.offsets[0][bs].wedge[1][n] = MASK_OFFSET(masks_444);
171 
172         dav1d_masks.offsets[1][bs].wedge[0][n] =
173             init_chroma(&masks_422[ sign * sign_stride_422], masks_444, 0, w, h, 0);
174         dav1d_masks.offsets[1][bs].wedge[1][n] =
175             init_chroma(&masks_422[!sign * sign_stride_422], masks_444, 1, w, h, 0);
176         dav1d_masks.offsets[2][bs].wedge[0][n] =
177             init_chroma(&masks_420[ sign * sign_stride_420], masks_444, 0, w, h, 1);
178         dav1d_masks.offsets[2][bs].wedge[1][n] =
179             init_chroma(&masks_420[!sign * sign_stride_420], masks_444, 1, w, h, 1);
180 
181         signs >>= 1;
182         masks_444 += n_stride_444;
183         masks_422 += n_stride_422;
184         masks_420 += n_stride_420;
185     }
186 }
187 
build_nondc_ii_masks(uint8_t * const mask_v,const int w,const int h,const int step)188 static COLD void build_nondc_ii_masks(uint8_t *const mask_v, const int w,
189                                       const int h, const int step)
190 {
191     static const uint8_t ii_weights_1d[32] = {
192         60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10,  8,  7,
193          6,  6,  5,  4,  4,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  1,
194     };
195 
196     uint8_t *const mask_h  = &mask_v[w * h];
197     uint8_t *const mask_sm = &mask_h[w * h];
198     for (int y = 0, off = 0; y < h; y++, off += w) {
199         memset(&mask_v[off], ii_weights_1d[y * step], w);
200         for (int x = 0; x < w; x++) {
201             mask_sm[off + x] = ii_weights_1d[imin(x, y) * step];
202             mask_h[off + x] = ii_weights_1d[x * step];
203         }
204     }
205 }
206 
dav1d_init_ii_wedge_masks(void)207 COLD void dav1d_init_ii_wedge_masks(void) {
208     // This function is guaranteed to be called only once
209 
210     enum WedgeMasterLineType {
211         WEDGE_MASTER_LINE_ODD,
212         WEDGE_MASTER_LINE_EVEN,
213         WEDGE_MASTER_LINE_VERT,
214         N_WEDGE_MASTER_LINES,
215     };
216     static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = {
217         [WEDGE_MASTER_LINE_ODD]  = {  1,  2,  6, 18, 37, 53, 60, 63 },
218         [WEDGE_MASTER_LINE_EVEN] = {  1,  4, 11, 27, 46, 58, 62, 63 },
219         [WEDGE_MASTER_LINE_VERT] = {  0,  2,  7, 21, 43, 57, 62, 64 },
220     };
221     uint8_t master[6][64 * 64];
222 
223     // create master templates
224     for (int y = 0, off = 0; y < 64; y++, off += 64)
225         insert_border(&master[WEDGE_VERTICAL][off],
226                       wedge_master_border[WEDGE_MASTER_LINE_VERT], 32);
227     for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--)
228     {
229         insert_border(&master[WEDGE_OBLIQUE63][off],
230                       wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr);
231         insert_border(&master[WEDGE_OBLIQUE63][off + 64],
232                       wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1);
233     }
234 
235     transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]);
236     transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]);
237     hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]);
238     hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]);
239 
240 #define fill(w, h, sz_422, sz_420, hvsw, signs) \
241     fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
242                 master, wedge_codebook_16_##hvsw, \
243                 dav1d_masks.wedge_444_##w##x##h, \
244                 dav1d_masks.wedge_422_##sz_422, \
245                 dav1d_masks.wedge_420_##sz_420, signs)
246 
247     fill(32, 32, 16x32, 16x16, heqw, 0x7bfb);
248     fill(32, 16, 16x16, 16x8,  hltw, 0x7beb);
249     fill(32,  8, 16x8,  16x4,  hltw, 0x6beb);
250     fill(16, 32,  8x32,  8x16, hgtw, 0x7beb);
251     fill(16, 16,  8x16,  8x8,  heqw, 0x7bfb);
252     fill(16,  8,  8x8,   8x4,  hltw, 0x7beb);
253     fill( 8, 32,  4x32,  4x16, hgtw, 0x7aeb);
254     fill( 8, 16,  4x16,  4x8,  hgtw, 0x7beb);
255     fill( 8,  8,  4x8,   4x4,  heqw, 0x7bfb);
256 #undef fill
257 
258     memset(dav1d_masks.ii_dc, 32, 32 * 32);
259     for (int c = 0; c < 3; c++) {
260         dav1d_masks.offsets[c][BS_32x32-BS_32x32].ii[II_DC_PRED] =
261         dav1d_masks.offsets[c][BS_32x16-BS_32x32].ii[II_DC_PRED] =
262         dav1d_masks.offsets[c][BS_16x32-BS_32x32].ii[II_DC_PRED] =
263         dav1d_masks.offsets[c][BS_16x16-BS_32x32].ii[II_DC_PRED] =
264         dav1d_masks.offsets[c][BS_16x8 -BS_32x32].ii[II_DC_PRED] =
265         dav1d_masks.offsets[c][BS_8x16 -BS_32x32].ii[II_DC_PRED] =
266         dav1d_masks.offsets[c][BS_8x8  -BS_32x32].ii[II_DC_PRED] =
267             MASK_OFFSET(dav1d_masks.ii_dc);
268     }
269 
270 #define BUILD_NONDC_II_MASKS(w, h, step) \
271     build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
272 
273 #define ASSIGN_NONDC_II_OFFSET(bs, w444, h444, w422, h422, w420, h420) \
274     dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
275         MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
276     dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
277         MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
278     dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
279         MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
280 
281     BUILD_NONDC_II_MASKS(32, 32, 1);
282     BUILD_NONDC_II_MASKS(16, 32, 1);
283     BUILD_NONDC_II_MASKS(16, 16, 2);
284     BUILD_NONDC_II_MASKS( 8, 32, 1);
285     BUILD_NONDC_II_MASKS( 8, 16, 2);
286     BUILD_NONDC_II_MASKS( 8,  8, 4);
287     BUILD_NONDC_II_MASKS( 4, 16, 2);
288     BUILD_NONDC_II_MASKS( 4,  8, 4);
289     BUILD_NONDC_II_MASKS( 4,  4, 8);
290     for (int p = 0; p < 3; p++) {
291         ASSIGN_NONDC_II_OFFSET(BS_32x32, 32, 32, 16, 32, 16, 16);
292         ASSIGN_NONDC_II_OFFSET(BS_32x16, 32, 32, 16, 16, 16, 16);
293         ASSIGN_NONDC_II_OFFSET(BS_16x32, 16, 32,  8, 32,  8, 16);
294         ASSIGN_NONDC_II_OFFSET(BS_16x16, 16, 16,  8, 16,  8,  8);
295         ASSIGN_NONDC_II_OFFSET(BS_16x8,  16, 16,  8,  8,  8,  8);
296         ASSIGN_NONDC_II_OFFSET(BS_8x16,   8, 16,  4, 16,  4,  8);
297         ASSIGN_NONDC_II_OFFSET(BS_8x8,    8,  8,  4,  8,  4,  4);
298     }
299 }
300