xref: /aosp_15_r20/external/libdav1d/src/filmgrain_tmpl.c (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1 /*
2  * Copyright © 2018, Niklas Haas
3  * Copyright © 2018, VideoLAN and dav1d authors
4  * Copyright © 2018, Two Orioles, LLC
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  *    list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  *    this list of conditions and the following disclaimer in the documentation
15  *    and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "common/attributes.h"
30 #include "common/intops.h"
31 
32 #include "src/filmgrain.h"
33 #include "src/tables.h"
34 
35 #define SUB_GRAIN_WIDTH 44
36 #define SUB_GRAIN_HEIGHT 38
37 
get_random_number(const int bits,unsigned * const state)38 static inline int get_random_number(const int bits, unsigned *const state) {
39     const int r = *state;
40     unsigned bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1;
41     *state = (r >> 1) | (bit << 15);
42 
43     return (*state >> (16 - bits)) & ((1 << bits) - 1);
44 }
45 
round2(const int x,const uint64_t shift)46 static inline int round2(const int x, const uint64_t shift) {
47     return (x + ((1 << shift) >> 1)) >> shift;
48 }
49 
generate_grain_y_c(entry buf[][GRAIN_WIDTH],const Dav1dFilmGrainData * const data HIGHBD_DECL_SUFFIX)50 static void generate_grain_y_c(entry buf[][GRAIN_WIDTH],
51                                const Dav1dFilmGrainData *const data
52                                HIGHBD_DECL_SUFFIX)
53 {
54     const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
55     unsigned seed = data->seed;
56     const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift;
57     const int grain_ctr = 128 << bitdepth_min_8;
58     const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
59 
60     for (int y = 0; y < GRAIN_HEIGHT; y++) {
61         for (int x = 0; x < GRAIN_WIDTH; x++) {
62             const int value = get_random_number(11, &seed);
63             buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);
64         }
65     }
66 
67     const int ar_pad = 3;
68     const int ar_lag = data->ar_coeff_lag;
69 
70     for (int y = ar_pad; y < GRAIN_HEIGHT; y++) {
71         for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) {
72             const int8_t *coeff = data->ar_coeffs_y;
73             int sum = 0;
74             for (int dy = -ar_lag; dy <= 0; dy++) {
75                 for (int dx = -ar_lag; dx <= ar_lag; dx++) {
76                     if (!dx && !dy)
77                         break;
78                     sum += *(coeff++) * buf[y + dy][x + dx];
79                 }
80             }
81 
82             const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
83             buf[y][x] = iclip(grain, grain_min, grain_max);
84         }
85     }
86 }
87 
88 static NOINLINE void
generate_grain_uv_c(entry buf[][GRAIN_WIDTH],const entry buf_y[][GRAIN_WIDTH],const Dav1dFilmGrainData * const data,const intptr_t uv,const int subx,const int suby HIGHBD_DECL_SUFFIX)89 generate_grain_uv_c(entry buf[][GRAIN_WIDTH],
90                     const entry buf_y[][GRAIN_WIDTH],
91                     const Dav1dFilmGrainData *const data, const intptr_t uv,
92                     const int subx, const int suby HIGHBD_DECL_SUFFIX)
93 {
94     const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
95     unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524);
96     const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift;
97     const int grain_ctr = 128 << bitdepth_min_8;
98     const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
99 
100     const int chromaW = subx ? SUB_GRAIN_WIDTH  : GRAIN_WIDTH;
101     const int chromaH = suby ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT;
102 
103     for (int y = 0; y < chromaH; y++) {
104         for (int x = 0; x < chromaW; x++) {
105             const int value = get_random_number(11, &seed);
106             buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);
107         }
108     }
109 
110     const int ar_pad = 3;
111     const int ar_lag = data->ar_coeff_lag;
112 
113     for (int y = ar_pad; y < chromaH; y++) {
114         for (int x = ar_pad; x < chromaW - ar_pad; x++) {
115             const int8_t *coeff = data->ar_coeffs_uv[uv];
116             int sum = 0;
117             for (int dy = -ar_lag; dy <= 0; dy++) {
118                 for (int dx = -ar_lag; dx <= ar_lag; dx++) {
119                     // For the final (current) pixel, we need to add in the
120                     // contribution from the luma grain texture
121                     if (!dx && !dy) {
122                         if (!data->num_y_points)
123                             break;
124                         int luma = 0;
125                         const int lumaX = ((x - ar_pad) << subx) + ar_pad;
126                         const int lumaY = ((y - ar_pad) << suby) + ar_pad;
127                         for (int i = 0; i <= suby; i++) {
128                             for (int j = 0; j <= subx; j++) {
129                                 luma += buf_y[lumaY + i][lumaX + j];
130                             }
131                         }
132                         luma = round2(luma, subx + suby);
133                         sum += luma * (*coeff);
134                         break;
135                     }
136 
137                     sum += *(coeff++) * buf[y + dy][x + dx];
138                 }
139             }
140 
141             const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
142             buf[y][x] = iclip(grain, grain_min, grain_max);
143         }
144     }
145 }
146 
147 #define gnuv_ss_fn(nm, ss_x, ss_y) \
148 static decl_generate_grain_uv_fn(generate_grain_uv_##nm##_c) { \
149     generate_grain_uv_c(buf, buf_y, data, uv, ss_x, ss_y HIGHBD_TAIL_SUFFIX); \
150 }
151 
152 gnuv_ss_fn(420, 1, 1);
153 gnuv_ss_fn(422, 1, 0);
154 gnuv_ss_fn(444, 0, 0);
155 
156 // samples from the correct block of a grain LUT, while taking into account the
157 // offsets provided by the offsets cache
sample_lut(const entry grain_lut[][GRAIN_WIDTH],const int offsets[2][2],const int subx,const int suby,const int bx,const int by,const int x,const int y)158 static inline entry sample_lut(const entry grain_lut[][GRAIN_WIDTH],
159                                const int offsets[2][2], const int subx, const int suby,
160                                const int bx, const int by, const int x, const int y)
161 {
162     const int randval = offsets[bx][by];
163     const int offx = 3 + (2 >> subx) * (3 + (randval >> 4));
164     const int offy = 3 + (2 >> suby) * (3 + (randval & 0xF));
165     return grain_lut[offy + y + (FG_BLOCK_SIZE >> suby) * by]
166                     [offx + x + (FG_BLOCK_SIZE >> subx) * bx];
167 }
168 
fgy_32x32xn_c(pixel * const dst_row,const pixel * const src_row,const ptrdiff_t stride,const Dav1dFilmGrainData * const data,const size_t pw,const uint8_t scaling[SCALING_SIZE],const entry grain_lut[][GRAIN_WIDTH],const int bh,const int row_num HIGHBD_DECL_SUFFIX)169 static void fgy_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
170                           const ptrdiff_t stride,
171                           const Dav1dFilmGrainData *const data, const size_t pw,
172                           const uint8_t scaling[SCALING_SIZE],
173                           const entry grain_lut[][GRAIN_WIDTH],
174                           const int bh, const int row_num HIGHBD_DECL_SUFFIX)
175 {
176     const int rows = 1 + (data->overlap_flag && row_num > 0);
177     const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
178     const int grain_ctr = 128 << bitdepth_min_8;
179     const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
180 
181     int min_value, max_value;
182     if (data->clip_to_restricted_range) {
183         min_value = 16 << bitdepth_min_8;
184         max_value = 235 << bitdepth_min_8;
185     } else {
186         min_value = 0;
187         max_value = BITDEPTH_MAX;
188     }
189 
190     // seed[0] contains the current row, seed[1] contains the previous
191     unsigned seed[2];
192     for (int i = 0; i < rows; i++) {
193         seed[i] = data->seed;
194         seed[i] ^= (((row_num - i) * 37  + 178) & 0xFF) << 8;
195         seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
196     }
197 
198     assert(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0);
199 
200     int offsets[2 /* col offset */][2 /* row offset */];
201 
202     // process this row in FG_BLOCK_SIZE^2 blocks
203     for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE) {
204         const int bw = imin(FG_BLOCK_SIZE, (int) pw - bx);
205 
206         if (data->overlap_flag && bx) {
207             // shift previous offsets left
208             for (int i = 0; i < rows; i++)
209                 offsets[1][i] = offsets[0][i];
210         }
211 
212         // update current offsets
213         for (int i = 0; i < rows; i++)
214             offsets[0][i] = get_random_number(8, &seed[i]);
215 
216         // x/y block offsets to compensate for overlapped regions
217         const int ystart = data->overlap_flag && row_num ? imin(2, bh) : 0;
218         const int xstart = data->overlap_flag && bx      ? imin(2, bw) : 0;
219 
220         static const int w[2][2] = { { 27, 17 }, { 17, 27 } };
221 
222 #define add_noise_y(x, y, grain)                                                  \
223         const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (x) + bx;     \
224         pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (x) + bx;           \
225         const int noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \
226         *dst = iclip(*src + noise, min_value, max_value);
227 
228         for (int y = ystart; y < bh; y++) {
229             // Non-overlapped image region (straightforward)
230             for (int x = xstart; x < bw; x++) {
231                 int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
232                 add_noise_y(x, y, grain);
233             }
234 
235             // Special case for overlapped column
236             for (int x = 0; x < xstart; x++) {
237                 int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
238                 int old   = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
239                 grain = round2(old * w[x][0] + grain * w[x][1], 5);
240                 grain = iclip(grain, grain_min, grain_max);
241                 add_noise_y(x, y, grain);
242             }
243         }
244 
245         for (int y = 0; y < ystart; y++) {
246             // Special case for overlapped row (sans corner)
247             for (int x = xstart; x < bw; x++) {
248                 int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
249                 int old   = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
250                 grain = round2(old * w[y][0] + grain * w[y][1], 5);
251                 grain = iclip(grain, grain_min, grain_max);
252                 add_noise_y(x, y, grain);
253             }
254 
255             // Special case for doubly-overlapped corner
256             for (int x = 0; x < xstart; x++) {
257                 // Blend the top pixel with the top left block
258                 int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
259                 int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y);
260                 top = round2(old * w[x][0] + top * w[x][1], 5);
261                 top = iclip(top, grain_min, grain_max);
262 
263                 // Blend the current pixel with the left block
264                 int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
265                 old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
266                 grain = round2(old * w[x][0] + grain * w[x][1], 5);
267                 grain = iclip(grain, grain_min, grain_max);
268 
269                 // Mix the row rows together and apply grain
270                 grain = round2(top * w[y][0] + grain * w[y][1], 5);
271                 grain = iclip(grain, grain_min, grain_max);
272                 add_noise_y(x, y, grain);
273             }
274         }
275     }
276 }
277 
278 static NOINLINE void
fguv_32x32xn_c(pixel * const dst_row,const pixel * const src_row,const ptrdiff_t stride,const Dav1dFilmGrainData * const data,const size_t pw,const uint8_t scaling[SCALING_SIZE],const entry grain_lut[][GRAIN_WIDTH],const int bh,const int row_num,const pixel * const luma_row,const ptrdiff_t luma_stride,const int uv,const int is_id,const int sx,const int sy HIGHBD_DECL_SUFFIX)279 fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
280                const ptrdiff_t stride, const Dav1dFilmGrainData *const data,
281                const size_t pw, const uint8_t scaling[SCALING_SIZE],
282                const entry grain_lut[][GRAIN_WIDTH], const int bh,
283                const int row_num, const pixel *const luma_row,
284                const ptrdiff_t luma_stride, const int uv, const int is_id,
285                const int sx, const int sy HIGHBD_DECL_SUFFIX)
286 {
287     const int rows = 1 + (data->overlap_flag && row_num > 0);
288     const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
289     const int grain_ctr = 128 << bitdepth_min_8;
290     const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
291 
292     int min_value, max_value;
293     if (data->clip_to_restricted_range) {
294         min_value = 16 << bitdepth_min_8;
295         max_value = (is_id ? 235 : 240) << bitdepth_min_8;
296     } else {
297         min_value = 0;
298         max_value = BITDEPTH_MAX;
299     }
300 
301     // seed[0] contains the current row, seed[1] contains the previous
302     unsigned seed[2];
303     for (int i = 0; i < rows; i++) {
304         seed[i] = data->seed;
305         seed[i] ^= (((row_num - i) * 37  + 178) & 0xFF) << 8;
306         seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
307     }
308 
309     assert(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0);
310 
311     int offsets[2 /* col offset */][2 /* row offset */];
312 
313     // process this row in FG_BLOCK_SIZE^2 blocks (subsampled)
314     for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) {
315         const int bw = imin(FG_BLOCK_SIZE >> sx, (int)(pw - bx));
316         if (data->overlap_flag && bx) {
317             // shift previous offsets left
318             for (int i = 0; i < rows; i++)
319                 offsets[1][i] = offsets[0][i];
320         }
321 
322         // update current offsets
323         for (int i = 0; i < rows; i++)
324             offsets[0][i] = get_random_number(8, &seed[i]);
325 
326         // x/y block offsets to compensate for overlapped regions
327         const int ystart = data->overlap_flag && row_num ? imin(2 >> sy, bh) : 0;
328         const int xstart = data->overlap_flag && bx      ? imin(2 >> sx, bw) : 0;
329 
330         static const int w[2 /* sub */][2 /* off */][2] = {
331             { { 27, 17 }, { 17, 27 } },
332             { { 23, 22 } },
333         };
334 
335 #define add_noise_uv(x, y, grain)                                                    \
336             const int lx = (bx + x) << sx;                                           \
337             const int ly = y << sy;                                                  \
338             const pixel *const luma = luma_row + ly * PXSTRIDE(luma_stride) + lx;    \
339             pixel avg = luma[0];                                                     \
340             if (sx)                                                                  \
341                 avg = (avg + luma[1] + 1) >> 1;                                      \
342             const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (bx + (x));  \
343             pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (bx + (x));        \
344             int val = avg;                                                           \
345             if (!data->chroma_scaling_from_luma) {                                   \
346                 const int combined = avg * data->uv_luma_mult[uv] +                  \
347                                *src * data->uv_mult[uv];                             \
348                 val = iclip_pixel( (combined >> 6) +                                 \
349                                    (data->uv_offset[uv] * (1 << bitdepth_min_8)) );  \
350             }                                                                        \
351             const int noise = round2(scaling[ val ] * (grain), data->scaling_shift); \
352             *dst = iclip(*src + noise, min_value, max_value);
353 
354         for (int y = ystart; y < bh; y++) {
355             // Non-overlapped image region (straightforward)
356             for (int x = xstart; x < bw; x++) {
357                 int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
358                 add_noise_uv(x, y, grain);
359             }
360 
361             // Special case for overlapped column
362             for (int x = 0; x < xstart; x++) {
363                 int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
364                 int old   = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
365                 grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5);
366                 grain = iclip(grain, grain_min, grain_max);
367                 add_noise_uv(x, y, grain);
368             }
369         }
370 
371         for (int y = 0; y < ystart; y++) {
372             // Special case for overlapped row (sans corner)
373             for (int x = xstart; x < bw; x++) {
374                 int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
375                 int old   = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
376                 grain = round2(old * w[sy][y][0] + grain * w[sy][y][1], 5);
377                 grain = iclip(grain, grain_min, grain_max);
378                 add_noise_uv(x, y, grain);
379             }
380 
381             // Special case for doubly-overlapped corner
382             for (int x = 0; x < xstart; x++) {
383                 // Blend the top pixel with the top left block
384                 int top = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
385                 int old = sample_lut(grain_lut, offsets, sx, sy, 1, 1, x, y);
386                 top = round2(old * w[sx][x][0] + top * w[sx][x][1], 5);
387                 top = iclip(top, grain_min, grain_max);
388 
389                 // Blend the current pixel with the left block
390                 int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
391                 old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
392                 grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5);
393                 grain = iclip(grain, grain_min, grain_max);
394 
395                 // Mix the row rows together and apply to image
396                 grain = round2(top * w[sy][y][0] + grain * w[sy][y][1], 5);
397                 grain = iclip(grain, grain_min, grain_max);
398                 add_noise_uv(x, y, grain);
399             }
400         }
401     }
402 }
403 
404 #define fguv_ss_fn(nm, ss_x, ss_y) \
405 static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \
406     fguv_32x32xn_c(dst_row, src_row, stride, data, pw, scaling, grain_lut, bh, \
407                    row_num, luma_row, luma_stride, uv_pl, is_id, ss_x, ss_y \
408                    HIGHBD_TAIL_SUFFIX); \
409 }
410 
411 fguv_ss_fn(420, 1, 1);
412 fguv_ss_fn(422, 1, 0);
413 fguv_ss_fn(444, 0, 0);
414 
415 #if HAVE_ASM
416 #if ARCH_AARCH64 || ARCH_ARM
417 #include "src/arm/filmgrain.h"
418 #elif ARCH_X86
419 #include "src/x86/filmgrain.h"
420 #endif
421 #endif
422 
bitfn(dav1d_film_grain_dsp_init)423 COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) {
424     c->generate_grain_y = generate_grain_y_c;
425     c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c;
426     c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c;
427     c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c;
428 
429     c->fgy_32x32xn = fgy_32x32xn_c;
430     c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c;
431     c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c;
432     c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c;
433 
434 #if HAVE_ASM
435 #if ARCH_AARCH64 || ARCH_ARM
436     film_grain_dsp_init_arm(c);
437 #elif ARCH_X86
438     film_grain_dsp_init_x86(c);
439 #endif
440 #endif
441 }
442