1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "config.h"
29
30 #include <stdint.h>
31 #include <string.h>
32
33 #include "common/intops.h"
34
35 #include "src/wedge.h"
36
37 enum WedgeDirectionType {
38 WEDGE_HORIZONTAL = 0,
39 WEDGE_VERTICAL = 1,
40 WEDGE_OBLIQUE27 = 2,
41 WEDGE_OBLIQUE63 = 3,
42 WEDGE_OBLIQUE117 = 4,
43 WEDGE_OBLIQUE153 = 5,
44 N_WEDGE_DIRECTIONS
45 };
46
47 typedef struct {
48 uint8_t /* enum WedgeDirectionType */ direction;
49 uint8_t x_offset;
50 uint8_t y_offset;
51 } wedge_code_type;
52
53 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
54 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
55 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
56 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
57 { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
58 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
59 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
60 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
61 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
62 };
63
64 static const wedge_code_type wedge_codebook_16_hltw[16] = {
65 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
66 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
67 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
68 { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
69 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
70 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
71 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
72 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
73 };
74
75 static const wedge_code_type wedge_codebook_16_heqw[16] = {
76 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
77 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
78 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
79 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
80 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
81 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
82 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
83 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
84 };
85
86 Dav1dMasks dav1d_masks;
87
insert_border(uint8_t * const dst,const uint8_t * const src,const int ctr)88 static void insert_border(uint8_t *const dst, const uint8_t *const src,
89 const int ctr)
90 {
91 if (ctr > 4) memset(dst, 0, ctr - 4);
92 memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8));
93 if (ctr < 64 - 4)
94 memset(dst + ctr + 4, 64, 64 - 4 - ctr);
95 }
96
transpose(uint8_t * const dst,const uint8_t * const src)97 static void transpose(uint8_t *const dst, const uint8_t *const src) {
98 for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
99 for (int x = 0, x_off = 0; x < 64; x++, x_off += 64)
100 dst[x_off + y] = src[y_off + x];
101 }
102
hflip(uint8_t * const dst,const uint8_t * const src)103 static void hflip(uint8_t *const dst, const uint8_t *const src) {
104 for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
105 for (int x = 0; x < 64; x++)
106 dst[y_off + 64 - 1 - x] = src[y_off + x];
107 }
108
copy2d(uint8_t * dst,const uint8_t * src,int sign,const int w,const int h,const int x_off,const int y_off)109 static void copy2d(uint8_t *dst, const uint8_t *src, int sign,
110 const int w, const int h, const int x_off, const int y_off)
111 {
112 src += y_off * 64 + x_off;
113 if (sign) {
114 for (int y = 0; y < h; y++) {
115 for (int x = 0; x < w; x++)
116 dst[x] = 64 - src[x];
117 src += 64;
118 dst += w;
119 }
120 } else {
121 for (int y = 0; y < h; y++) {
122 memcpy(dst, src, w);
123 src += 64;
124 dst += w;
125 }
126 }
127 }
128
129 #define MASK_OFFSET(x) ((uint16_t)(((uintptr_t)(x) - (uintptr_t)&dav1d_masks) >> 3))
130
init_chroma(uint8_t * chroma,const uint8_t * luma,const int sign,const int w,const int h,const int ss_ver)131 static COLD uint16_t init_chroma(uint8_t *chroma, const uint8_t *luma,
132 const int sign, const int w, const int h,
133 const int ss_ver)
134 {
135 const uint16_t offset = MASK_OFFSET(chroma);
136 for (int y = 0; y < h; y += 1 + ss_ver) {
137 for (int x = 0; x < w; x += 2) {
138 int sum = luma[x] + luma[x + 1] + 1;
139 if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1;
140 chroma[x >> 1] = (sum - sign) >> (1 + ss_ver);
141 }
142 luma += w << ss_ver;
143 chroma += w >> 1;
144 }
145 return offset;
146 }
147
fill2d_16x2(const int w,const int h,const enum BlockSize bs,const uint8_t (* const master)[64* 64],const wedge_code_type * const cb,uint8_t * masks_444,uint8_t * masks_422,uint8_t * masks_420,unsigned signs)148 static COLD void fill2d_16x2(const int w, const int h, const enum BlockSize bs,
149 const uint8_t (*const master)[64 * 64],
150 const wedge_code_type *const cb,
151 uint8_t *masks_444, uint8_t *masks_422,
152 uint8_t *masks_420, unsigned signs)
153 {
154 const int n_stride_444 = (w * h);
155 const int n_stride_422 = n_stride_444 >> 1;
156 const int n_stride_420 = n_stride_444 >> 2;
157 const int sign_stride_422 = 16 * n_stride_422;
158 const int sign_stride_420 = 16 * n_stride_420;
159
160 // assign pointer offsets in lookup table
161 for (int n = 0; n < 16; n++) {
162 const int sign = signs & 1;
163
164 copy2d(masks_444, master[cb[n].direction], sign, w, h,
165 32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3));
166
167 // not using !sign is intentional here, since 444 does not require
168 // any rounding since no chroma subsampling is applied.
169 dav1d_masks.offsets[0][bs].wedge[0][n] =
170 dav1d_masks.offsets[0][bs].wedge[1][n] = MASK_OFFSET(masks_444);
171
172 dav1d_masks.offsets[1][bs].wedge[0][n] =
173 init_chroma(&masks_422[ sign * sign_stride_422], masks_444, 0, w, h, 0);
174 dav1d_masks.offsets[1][bs].wedge[1][n] =
175 init_chroma(&masks_422[!sign * sign_stride_422], masks_444, 1, w, h, 0);
176 dav1d_masks.offsets[2][bs].wedge[0][n] =
177 init_chroma(&masks_420[ sign * sign_stride_420], masks_444, 0, w, h, 1);
178 dav1d_masks.offsets[2][bs].wedge[1][n] =
179 init_chroma(&masks_420[!sign * sign_stride_420], masks_444, 1, w, h, 1);
180
181 signs >>= 1;
182 masks_444 += n_stride_444;
183 masks_422 += n_stride_422;
184 masks_420 += n_stride_420;
185 }
186 }
187
build_nondc_ii_masks(uint8_t * const mask_v,const int w,const int h,const int step)188 static COLD void build_nondc_ii_masks(uint8_t *const mask_v, const int w,
189 const int h, const int step)
190 {
191 static const uint8_t ii_weights_1d[32] = {
192 60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10, 8, 7,
193 6, 6, 5, 4, 4, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1,
194 };
195
196 uint8_t *const mask_h = &mask_v[w * h];
197 uint8_t *const mask_sm = &mask_h[w * h];
198 for (int y = 0, off = 0; y < h; y++, off += w) {
199 memset(&mask_v[off], ii_weights_1d[y * step], w);
200 for (int x = 0; x < w; x++) {
201 mask_sm[off + x] = ii_weights_1d[imin(x, y) * step];
202 mask_h[off + x] = ii_weights_1d[x * step];
203 }
204 }
205 }
206
dav1d_init_ii_wedge_masks(void)207 COLD void dav1d_init_ii_wedge_masks(void) {
208 // This function is guaranteed to be called only once
209
210 enum WedgeMasterLineType {
211 WEDGE_MASTER_LINE_ODD,
212 WEDGE_MASTER_LINE_EVEN,
213 WEDGE_MASTER_LINE_VERT,
214 N_WEDGE_MASTER_LINES,
215 };
216 static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = {
217 [WEDGE_MASTER_LINE_ODD] = { 1, 2, 6, 18, 37, 53, 60, 63 },
218 [WEDGE_MASTER_LINE_EVEN] = { 1, 4, 11, 27, 46, 58, 62, 63 },
219 [WEDGE_MASTER_LINE_VERT] = { 0, 2, 7, 21, 43, 57, 62, 64 },
220 };
221 uint8_t master[6][64 * 64];
222
223 // create master templates
224 for (int y = 0, off = 0; y < 64; y++, off += 64)
225 insert_border(&master[WEDGE_VERTICAL][off],
226 wedge_master_border[WEDGE_MASTER_LINE_VERT], 32);
227 for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--)
228 {
229 insert_border(&master[WEDGE_OBLIQUE63][off],
230 wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr);
231 insert_border(&master[WEDGE_OBLIQUE63][off + 64],
232 wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1);
233 }
234
235 transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]);
236 transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]);
237 hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]);
238 hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]);
239
240 #define fill(w, h, sz_422, sz_420, hvsw, signs) \
241 fill2d_16x2(w, h, BS_##w##x##h - BS_32x32, \
242 master, wedge_codebook_16_##hvsw, \
243 dav1d_masks.wedge_444_##w##x##h, \
244 dav1d_masks.wedge_422_##sz_422, \
245 dav1d_masks.wedge_420_##sz_420, signs)
246
247 fill(32, 32, 16x32, 16x16, heqw, 0x7bfb);
248 fill(32, 16, 16x16, 16x8, hltw, 0x7beb);
249 fill(32, 8, 16x8, 16x4, hltw, 0x6beb);
250 fill(16, 32, 8x32, 8x16, hgtw, 0x7beb);
251 fill(16, 16, 8x16, 8x8, heqw, 0x7bfb);
252 fill(16, 8, 8x8, 8x4, hltw, 0x7beb);
253 fill( 8, 32, 4x32, 4x16, hgtw, 0x7aeb);
254 fill( 8, 16, 4x16, 4x8, hgtw, 0x7beb);
255 fill( 8, 8, 4x8, 4x4, heqw, 0x7bfb);
256 #undef fill
257
258 memset(dav1d_masks.ii_dc, 32, 32 * 32);
259 for (int c = 0; c < 3; c++) {
260 dav1d_masks.offsets[c][BS_32x32-BS_32x32].ii[II_DC_PRED] =
261 dav1d_masks.offsets[c][BS_32x16-BS_32x32].ii[II_DC_PRED] =
262 dav1d_masks.offsets[c][BS_16x32-BS_32x32].ii[II_DC_PRED] =
263 dav1d_masks.offsets[c][BS_16x16-BS_32x32].ii[II_DC_PRED] =
264 dav1d_masks.offsets[c][BS_16x8 -BS_32x32].ii[II_DC_PRED] =
265 dav1d_masks.offsets[c][BS_8x16 -BS_32x32].ii[II_DC_PRED] =
266 dav1d_masks.offsets[c][BS_8x8 -BS_32x32].ii[II_DC_PRED] =
267 MASK_OFFSET(dav1d_masks.ii_dc);
268 }
269
270 #define BUILD_NONDC_II_MASKS(w, h, step) \
271 build_nondc_ii_masks(dav1d_masks.ii_nondc_##w##x##h, w, h, step)
272
273 #define ASSIGN_NONDC_II_OFFSET(bs, w444, h444, w422, h422, w420, h420) \
274 dav1d_masks.offsets[0][bs-BS_32x32].ii[p + 1] = \
275 MASK_OFFSET(&dav1d_masks.ii_nondc_##w444##x##h444[p*w444*h444]); \
276 dav1d_masks.offsets[1][bs-BS_32x32].ii[p + 1] = \
277 MASK_OFFSET(&dav1d_masks.ii_nondc_##w422##x##h422[p*w422*h422]); \
278 dav1d_masks.offsets[2][bs-BS_32x32].ii[p + 1] = \
279 MASK_OFFSET(&dav1d_masks.ii_nondc_##w420##x##h420[p*w420*h420])
280
281 BUILD_NONDC_II_MASKS(32, 32, 1);
282 BUILD_NONDC_II_MASKS(16, 32, 1);
283 BUILD_NONDC_II_MASKS(16, 16, 2);
284 BUILD_NONDC_II_MASKS( 8, 32, 1);
285 BUILD_NONDC_II_MASKS( 8, 16, 2);
286 BUILD_NONDC_II_MASKS( 8, 8, 4);
287 BUILD_NONDC_II_MASKS( 4, 16, 2);
288 BUILD_NONDC_II_MASKS( 4, 8, 4);
289 BUILD_NONDC_II_MASKS( 4, 4, 8);
290 for (int p = 0; p < 3; p++) {
291 ASSIGN_NONDC_II_OFFSET(BS_32x32, 32, 32, 16, 32, 16, 16);
292 ASSIGN_NONDC_II_OFFSET(BS_32x16, 32, 32, 16, 16, 16, 16);
293 ASSIGN_NONDC_II_OFFSET(BS_16x32, 16, 32, 8, 32, 8, 16);
294 ASSIGN_NONDC_II_OFFSET(BS_16x16, 16, 16, 8, 16, 8, 8);
295 ASSIGN_NONDC_II_OFFSET(BS_16x8, 16, 16, 8, 8, 8, 8);
296 ASSIGN_NONDC_II_OFFSET(BS_8x16, 8, 16, 4, 16, 4, 8);
297 ASSIGN_NONDC_II_OFFSET(BS_8x8, 8, 8, 4, 8, 4, 4);
298 }
299 }
300