1*c0909341SAndroid Build Coastguard Worker /*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Niklas Haas
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
4*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Two Orioles, LLC
5*c0909341SAndroid Build Coastguard Worker * All rights reserved.
6*c0909341SAndroid Build Coastguard Worker *
7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
9*c0909341SAndroid Build Coastguard Worker *
10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
11*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer.
12*c0909341SAndroid Build Coastguard Worker *
13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
14*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation
15*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution.
16*c0909341SAndroid Build Coastguard Worker *
17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*c0909341SAndroid Build Coastguard Worker */
28*c0909341SAndroid Build Coastguard Worker
29*c0909341SAndroid Build Coastguard Worker #include "common/attributes.h"
30*c0909341SAndroid Build Coastguard Worker #include "common/intops.h"
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard Worker #include "src/filmgrain.h"
33*c0909341SAndroid Build Coastguard Worker #include "src/tables.h"
34*c0909341SAndroid Build Coastguard Worker
35*c0909341SAndroid Build Coastguard Worker #define SUB_GRAIN_WIDTH 44
36*c0909341SAndroid Build Coastguard Worker #define SUB_GRAIN_HEIGHT 38
37*c0909341SAndroid Build Coastguard Worker
get_random_number(const int bits,unsigned * const state)38*c0909341SAndroid Build Coastguard Worker static inline int get_random_number(const int bits, unsigned *const state) {
39*c0909341SAndroid Build Coastguard Worker const int r = *state;
40*c0909341SAndroid Build Coastguard Worker unsigned bit = ((r >> 0) ^ (r >> 1) ^ (r >> 3) ^ (r >> 12)) & 1;
41*c0909341SAndroid Build Coastguard Worker *state = (r >> 1) | (bit << 15);
42*c0909341SAndroid Build Coastguard Worker
43*c0909341SAndroid Build Coastguard Worker return (*state >> (16 - bits)) & ((1 << bits) - 1);
44*c0909341SAndroid Build Coastguard Worker }
45*c0909341SAndroid Build Coastguard Worker
round2(const int x,const uint64_t shift)46*c0909341SAndroid Build Coastguard Worker static inline int round2(const int x, const uint64_t shift) {
47*c0909341SAndroid Build Coastguard Worker return (x + ((1 << shift) >> 1)) >> shift;
48*c0909341SAndroid Build Coastguard Worker }
49*c0909341SAndroid Build Coastguard Worker
generate_grain_y_c(entry buf[][GRAIN_WIDTH],const Dav1dFilmGrainData * const data HIGHBD_DECL_SUFFIX)50*c0909341SAndroid Build Coastguard Worker static void generate_grain_y_c(entry buf[][GRAIN_WIDTH],
51*c0909341SAndroid Build Coastguard Worker const Dav1dFilmGrainData *const data
52*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
53*c0909341SAndroid Build Coastguard Worker {
54*c0909341SAndroid Build Coastguard Worker const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
55*c0909341SAndroid Build Coastguard Worker unsigned seed = data->seed;
56*c0909341SAndroid Build Coastguard Worker const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift;
57*c0909341SAndroid Build Coastguard Worker const int grain_ctr = 128 << bitdepth_min_8;
58*c0909341SAndroid Build Coastguard Worker const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
59*c0909341SAndroid Build Coastguard Worker
60*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < GRAIN_HEIGHT; y++) {
61*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < GRAIN_WIDTH; x++) {
62*c0909341SAndroid Build Coastguard Worker const int value = get_random_number(11, &seed);
63*c0909341SAndroid Build Coastguard Worker buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);
64*c0909341SAndroid Build Coastguard Worker }
65*c0909341SAndroid Build Coastguard Worker }
66*c0909341SAndroid Build Coastguard Worker
67*c0909341SAndroid Build Coastguard Worker const int ar_pad = 3;
68*c0909341SAndroid Build Coastguard Worker const int ar_lag = data->ar_coeff_lag;
69*c0909341SAndroid Build Coastguard Worker
70*c0909341SAndroid Build Coastguard Worker for (int y = ar_pad; y < GRAIN_HEIGHT; y++) {
71*c0909341SAndroid Build Coastguard Worker for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) {
72*c0909341SAndroid Build Coastguard Worker const int8_t *coeff = data->ar_coeffs_y;
73*c0909341SAndroid Build Coastguard Worker int sum = 0;
74*c0909341SAndroid Build Coastguard Worker for (int dy = -ar_lag; dy <= 0; dy++) {
75*c0909341SAndroid Build Coastguard Worker for (int dx = -ar_lag; dx <= ar_lag; dx++) {
76*c0909341SAndroid Build Coastguard Worker if (!dx && !dy)
77*c0909341SAndroid Build Coastguard Worker break;
78*c0909341SAndroid Build Coastguard Worker sum += *(coeff++) * buf[y + dy][x + dx];
79*c0909341SAndroid Build Coastguard Worker }
80*c0909341SAndroid Build Coastguard Worker }
81*c0909341SAndroid Build Coastguard Worker
82*c0909341SAndroid Build Coastguard Worker const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
83*c0909341SAndroid Build Coastguard Worker buf[y][x] = iclip(grain, grain_min, grain_max);
84*c0909341SAndroid Build Coastguard Worker }
85*c0909341SAndroid Build Coastguard Worker }
86*c0909341SAndroid Build Coastguard Worker }
87*c0909341SAndroid Build Coastguard Worker
88*c0909341SAndroid Build Coastguard Worker static NOINLINE void
generate_grain_uv_c(entry buf[][GRAIN_WIDTH],const entry buf_y[][GRAIN_WIDTH],const Dav1dFilmGrainData * const data,const intptr_t uv,const int subx,const int suby HIGHBD_DECL_SUFFIX)89*c0909341SAndroid Build Coastguard Worker generate_grain_uv_c(entry buf[][GRAIN_WIDTH],
90*c0909341SAndroid Build Coastguard Worker const entry buf_y[][GRAIN_WIDTH],
91*c0909341SAndroid Build Coastguard Worker const Dav1dFilmGrainData *const data, const intptr_t uv,
92*c0909341SAndroid Build Coastguard Worker const int subx, const int suby HIGHBD_DECL_SUFFIX)
93*c0909341SAndroid Build Coastguard Worker {
94*c0909341SAndroid Build Coastguard Worker const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
95*c0909341SAndroid Build Coastguard Worker unsigned seed = data->seed ^ (uv ? 0x49d8 : 0xb524);
96*c0909341SAndroid Build Coastguard Worker const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift;
97*c0909341SAndroid Build Coastguard Worker const int grain_ctr = 128 << bitdepth_min_8;
98*c0909341SAndroid Build Coastguard Worker const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
99*c0909341SAndroid Build Coastguard Worker
100*c0909341SAndroid Build Coastguard Worker const int chromaW = subx ? SUB_GRAIN_WIDTH : GRAIN_WIDTH;
101*c0909341SAndroid Build Coastguard Worker const int chromaH = suby ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT;
102*c0909341SAndroid Build Coastguard Worker
103*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < chromaH; y++) {
104*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < chromaW; x++) {
105*c0909341SAndroid Build Coastguard Worker const int value = get_random_number(11, &seed);
106*c0909341SAndroid Build Coastguard Worker buf[y][x] = round2(dav1d_gaussian_sequence[ value ], shift);
107*c0909341SAndroid Build Coastguard Worker }
108*c0909341SAndroid Build Coastguard Worker }
109*c0909341SAndroid Build Coastguard Worker
110*c0909341SAndroid Build Coastguard Worker const int ar_pad = 3;
111*c0909341SAndroid Build Coastguard Worker const int ar_lag = data->ar_coeff_lag;
112*c0909341SAndroid Build Coastguard Worker
113*c0909341SAndroid Build Coastguard Worker for (int y = ar_pad; y < chromaH; y++) {
114*c0909341SAndroid Build Coastguard Worker for (int x = ar_pad; x < chromaW - ar_pad; x++) {
115*c0909341SAndroid Build Coastguard Worker const int8_t *coeff = data->ar_coeffs_uv[uv];
116*c0909341SAndroid Build Coastguard Worker int sum = 0;
117*c0909341SAndroid Build Coastguard Worker for (int dy = -ar_lag; dy <= 0; dy++) {
118*c0909341SAndroid Build Coastguard Worker for (int dx = -ar_lag; dx <= ar_lag; dx++) {
119*c0909341SAndroid Build Coastguard Worker // For the final (current) pixel, we need to add in the
120*c0909341SAndroid Build Coastguard Worker // contribution from the luma grain texture
121*c0909341SAndroid Build Coastguard Worker if (!dx && !dy) {
122*c0909341SAndroid Build Coastguard Worker if (!data->num_y_points)
123*c0909341SAndroid Build Coastguard Worker break;
124*c0909341SAndroid Build Coastguard Worker int luma = 0;
125*c0909341SAndroid Build Coastguard Worker const int lumaX = ((x - ar_pad) << subx) + ar_pad;
126*c0909341SAndroid Build Coastguard Worker const int lumaY = ((y - ar_pad) << suby) + ar_pad;
127*c0909341SAndroid Build Coastguard Worker for (int i = 0; i <= suby; i++) {
128*c0909341SAndroid Build Coastguard Worker for (int j = 0; j <= subx; j++) {
129*c0909341SAndroid Build Coastguard Worker luma += buf_y[lumaY + i][lumaX + j];
130*c0909341SAndroid Build Coastguard Worker }
131*c0909341SAndroid Build Coastguard Worker }
132*c0909341SAndroid Build Coastguard Worker luma = round2(luma, subx + suby);
133*c0909341SAndroid Build Coastguard Worker sum += luma * (*coeff);
134*c0909341SAndroid Build Coastguard Worker break;
135*c0909341SAndroid Build Coastguard Worker }
136*c0909341SAndroid Build Coastguard Worker
137*c0909341SAndroid Build Coastguard Worker sum += *(coeff++) * buf[y + dy][x + dx];
138*c0909341SAndroid Build Coastguard Worker }
139*c0909341SAndroid Build Coastguard Worker }
140*c0909341SAndroid Build Coastguard Worker
141*c0909341SAndroid Build Coastguard Worker const int grain = buf[y][x] + round2(sum, data->ar_coeff_shift);
142*c0909341SAndroid Build Coastguard Worker buf[y][x] = iclip(grain, grain_min, grain_max);
143*c0909341SAndroid Build Coastguard Worker }
144*c0909341SAndroid Build Coastguard Worker }
145*c0909341SAndroid Build Coastguard Worker }
146*c0909341SAndroid Build Coastguard Worker
147*c0909341SAndroid Build Coastguard Worker #define gnuv_ss_fn(nm, ss_x, ss_y) \
148*c0909341SAndroid Build Coastguard Worker static decl_generate_grain_uv_fn(generate_grain_uv_##nm##_c) { \
149*c0909341SAndroid Build Coastguard Worker generate_grain_uv_c(buf, buf_y, data, uv, ss_x, ss_y HIGHBD_TAIL_SUFFIX); \
150*c0909341SAndroid Build Coastguard Worker }
151*c0909341SAndroid Build Coastguard Worker
152*c0909341SAndroid Build Coastguard Worker gnuv_ss_fn(420, 1, 1);
153*c0909341SAndroid Build Coastguard Worker gnuv_ss_fn(422, 1, 0);
154*c0909341SAndroid Build Coastguard Worker gnuv_ss_fn(444, 0, 0);
155*c0909341SAndroid Build Coastguard Worker
156*c0909341SAndroid Build Coastguard Worker // samples from the correct block of a grain LUT, while taking into account the
157*c0909341SAndroid Build Coastguard Worker // offsets provided by the offsets cache
sample_lut(const entry grain_lut[][GRAIN_WIDTH],const int offsets[2][2],const int subx,const int suby,const int bx,const int by,const int x,const int y)158*c0909341SAndroid Build Coastguard Worker static inline entry sample_lut(const entry grain_lut[][GRAIN_WIDTH],
159*c0909341SAndroid Build Coastguard Worker const int offsets[2][2], const int subx, const int suby,
160*c0909341SAndroid Build Coastguard Worker const int bx, const int by, const int x, const int y)
161*c0909341SAndroid Build Coastguard Worker {
162*c0909341SAndroid Build Coastguard Worker const int randval = offsets[bx][by];
163*c0909341SAndroid Build Coastguard Worker const int offx = 3 + (2 >> subx) * (3 + (randval >> 4));
164*c0909341SAndroid Build Coastguard Worker const int offy = 3 + (2 >> suby) * (3 + (randval & 0xF));
165*c0909341SAndroid Build Coastguard Worker return grain_lut[offy + y + (FG_BLOCK_SIZE >> suby) * by]
166*c0909341SAndroid Build Coastguard Worker [offx + x + (FG_BLOCK_SIZE >> subx) * bx];
167*c0909341SAndroid Build Coastguard Worker }
168*c0909341SAndroid Build Coastguard Worker
fgy_32x32xn_c(pixel * const dst_row,const pixel * const src_row,const ptrdiff_t stride,const Dav1dFilmGrainData * const data,const size_t pw,const uint8_t scaling[SCALING_SIZE],const entry grain_lut[][GRAIN_WIDTH],const int bh,const int row_num HIGHBD_DECL_SUFFIX)169*c0909341SAndroid Build Coastguard Worker static void fgy_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
170*c0909341SAndroid Build Coastguard Worker const ptrdiff_t stride,
171*c0909341SAndroid Build Coastguard Worker const Dav1dFilmGrainData *const data, const size_t pw,
172*c0909341SAndroid Build Coastguard Worker const uint8_t scaling[SCALING_SIZE],
173*c0909341SAndroid Build Coastguard Worker const entry grain_lut[][GRAIN_WIDTH],
174*c0909341SAndroid Build Coastguard Worker const int bh, const int row_num HIGHBD_DECL_SUFFIX)
175*c0909341SAndroid Build Coastguard Worker {
176*c0909341SAndroid Build Coastguard Worker const int rows = 1 + (data->overlap_flag && row_num > 0);
177*c0909341SAndroid Build Coastguard Worker const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
178*c0909341SAndroid Build Coastguard Worker const int grain_ctr = 128 << bitdepth_min_8;
179*c0909341SAndroid Build Coastguard Worker const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
180*c0909341SAndroid Build Coastguard Worker
181*c0909341SAndroid Build Coastguard Worker int min_value, max_value;
182*c0909341SAndroid Build Coastguard Worker if (data->clip_to_restricted_range) {
183*c0909341SAndroid Build Coastguard Worker min_value = 16 << bitdepth_min_8;
184*c0909341SAndroid Build Coastguard Worker max_value = 235 << bitdepth_min_8;
185*c0909341SAndroid Build Coastguard Worker } else {
186*c0909341SAndroid Build Coastguard Worker min_value = 0;
187*c0909341SAndroid Build Coastguard Worker max_value = BITDEPTH_MAX;
188*c0909341SAndroid Build Coastguard Worker }
189*c0909341SAndroid Build Coastguard Worker
190*c0909341SAndroid Build Coastguard Worker // seed[0] contains the current row, seed[1] contains the previous
191*c0909341SAndroid Build Coastguard Worker unsigned seed[2];
192*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++) {
193*c0909341SAndroid Build Coastguard Worker seed[i] = data->seed;
194*c0909341SAndroid Build Coastguard Worker seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8;
195*c0909341SAndroid Build Coastguard Worker seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
196*c0909341SAndroid Build Coastguard Worker }
197*c0909341SAndroid Build Coastguard Worker
198*c0909341SAndroid Build Coastguard Worker assert(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0);
199*c0909341SAndroid Build Coastguard Worker
200*c0909341SAndroid Build Coastguard Worker int offsets[2 /* col offset */][2 /* row offset */];
201*c0909341SAndroid Build Coastguard Worker
202*c0909341SAndroid Build Coastguard Worker // process this row in FG_BLOCK_SIZE^2 blocks
203*c0909341SAndroid Build Coastguard Worker for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE) {
204*c0909341SAndroid Build Coastguard Worker const int bw = imin(FG_BLOCK_SIZE, (int) pw - bx);
205*c0909341SAndroid Build Coastguard Worker
206*c0909341SAndroid Build Coastguard Worker if (data->overlap_flag && bx) {
207*c0909341SAndroid Build Coastguard Worker // shift previous offsets left
208*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++)
209*c0909341SAndroid Build Coastguard Worker offsets[1][i] = offsets[0][i];
210*c0909341SAndroid Build Coastguard Worker }
211*c0909341SAndroid Build Coastguard Worker
212*c0909341SAndroid Build Coastguard Worker // update current offsets
213*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++)
214*c0909341SAndroid Build Coastguard Worker offsets[0][i] = get_random_number(8, &seed[i]);
215*c0909341SAndroid Build Coastguard Worker
216*c0909341SAndroid Build Coastguard Worker // x/y block offsets to compensate for overlapped regions
217*c0909341SAndroid Build Coastguard Worker const int ystart = data->overlap_flag && row_num ? imin(2, bh) : 0;
218*c0909341SAndroid Build Coastguard Worker const int xstart = data->overlap_flag && bx ? imin(2, bw) : 0;
219*c0909341SAndroid Build Coastguard Worker
220*c0909341SAndroid Build Coastguard Worker static const int w[2][2] = { { 27, 17 }, { 17, 27 } };
221*c0909341SAndroid Build Coastguard Worker
222*c0909341SAndroid Build Coastguard Worker #define add_noise_y(x, y, grain) \
223*c0909341SAndroid Build Coastguard Worker const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (x) + bx; \
224*c0909341SAndroid Build Coastguard Worker pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (x) + bx; \
225*c0909341SAndroid Build Coastguard Worker const int noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \
226*c0909341SAndroid Build Coastguard Worker *dst = iclip(*src + noise, min_value, max_value);
227*c0909341SAndroid Build Coastguard Worker
228*c0909341SAndroid Build Coastguard Worker for (int y = ystart; y < bh; y++) {
229*c0909341SAndroid Build Coastguard Worker // Non-overlapped image region (straightforward)
230*c0909341SAndroid Build Coastguard Worker for (int x = xstart; x < bw; x++) {
231*c0909341SAndroid Build Coastguard Worker int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
232*c0909341SAndroid Build Coastguard Worker add_noise_y(x, y, grain);
233*c0909341SAndroid Build Coastguard Worker }
234*c0909341SAndroid Build Coastguard Worker
235*c0909341SAndroid Build Coastguard Worker // Special case for overlapped column
236*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < xstart; x++) {
237*c0909341SAndroid Build Coastguard Worker int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
238*c0909341SAndroid Build Coastguard Worker int old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
239*c0909341SAndroid Build Coastguard Worker grain = round2(old * w[x][0] + grain * w[x][1], 5);
240*c0909341SAndroid Build Coastguard Worker grain = iclip(grain, grain_min, grain_max);
241*c0909341SAndroid Build Coastguard Worker add_noise_y(x, y, grain);
242*c0909341SAndroid Build Coastguard Worker }
243*c0909341SAndroid Build Coastguard Worker }
244*c0909341SAndroid Build Coastguard Worker
245*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < ystart; y++) {
246*c0909341SAndroid Build Coastguard Worker // Special case for overlapped row (sans corner)
247*c0909341SAndroid Build Coastguard Worker for (int x = xstart; x < bw; x++) {
248*c0909341SAndroid Build Coastguard Worker int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
249*c0909341SAndroid Build Coastguard Worker int old = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
250*c0909341SAndroid Build Coastguard Worker grain = round2(old * w[y][0] + grain * w[y][1], 5);
251*c0909341SAndroid Build Coastguard Worker grain = iclip(grain, grain_min, grain_max);
252*c0909341SAndroid Build Coastguard Worker add_noise_y(x, y, grain);
253*c0909341SAndroid Build Coastguard Worker }
254*c0909341SAndroid Build Coastguard Worker
255*c0909341SAndroid Build Coastguard Worker // Special case for doubly-overlapped corner
256*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < xstart; x++) {
257*c0909341SAndroid Build Coastguard Worker // Blend the top pixel with the top left block
258*c0909341SAndroid Build Coastguard Worker int top = sample_lut(grain_lut, offsets, 0, 0, 0, 1, x, y);
259*c0909341SAndroid Build Coastguard Worker int old = sample_lut(grain_lut, offsets, 0, 0, 1, 1, x, y);
260*c0909341SAndroid Build Coastguard Worker top = round2(old * w[x][0] + top * w[x][1], 5);
261*c0909341SAndroid Build Coastguard Worker top = iclip(top, grain_min, grain_max);
262*c0909341SAndroid Build Coastguard Worker
263*c0909341SAndroid Build Coastguard Worker // Blend the current pixel with the left block
264*c0909341SAndroid Build Coastguard Worker int grain = sample_lut(grain_lut, offsets, 0, 0, 0, 0, x, y);
265*c0909341SAndroid Build Coastguard Worker old = sample_lut(grain_lut, offsets, 0, 0, 1, 0, x, y);
266*c0909341SAndroid Build Coastguard Worker grain = round2(old * w[x][0] + grain * w[x][1], 5);
267*c0909341SAndroid Build Coastguard Worker grain = iclip(grain, grain_min, grain_max);
268*c0909341SAndroid Build Coastguard Worker
269*c0909341SAndroid Build Coastguard Worker // Mix the row rows together and apply grain
270*c0909341SAndroid Build Coastguard Worker grain = round2(top * w[y][0] + grain * w[y][1], 5);
271*c0909341SAndroid Build Coastguard Worker grain = iclip(grain, grain_min, grain_max);
272*c0909341SAndroid Build Coastguard Worker add_noise_y(x, y, grain);
273*c0909341SAndroid Build Coastguard Worker }
274*c0909341SAndroid Build Coastguard Worker }
275*c0909341SAndroid Build Coastguard Worker }
276*c0909341SAndroid Build Coastguard Worker }
277*c0909341SAndroid Build Coastguard Worker
278*c0909341SAndroid Build Coastguard Worker static NOINLINE void
fguv_32x32xn_c(pixel * const dst_row,const pixel * const src_row,const ptrdiff_t stride,const Dav1dFilmGrainData * const data,const size_t pw,const uint8_t scaling[SCALING_SIZE],const entry grain_lut[][GRAIN_WIDTH],const int bh,const int row_num,const pixel * const luma_row,const ptrdiff_t luma_stride,const int uv,const int is_id,const int sx,const int sy HIGHBD_DECL_SUFFIX)279*c0909341SAndroid Build Coastguard Worker fguv_32x32xn_c(pixel *const dst_row, const pixel *const src_row,
280*c0909341SAndroid Build Coastguard Worker const ptrdiff_t stride, const Dav1dFilmGrainData *const data,
281*c0909341SAndroid Build Coastguard Worker const size_t pw, const uint8_t scaling[SCALING_SIZE],
282*c0909341SAndroid Build Coastguard Worker const entry grain_lut[][GRAIN_WIDTH], const int bh,
283*c0909341SAndroid Build Coastguard Worker const int row_num, const pixel *const luma_row,
284*c0909341SAndroid Build Coastguard Worker const ptrdiff_t luma_stride, const int uv, const int is_id,
285*c0909341SAndroid Build Coastguard Worker const int sx, const int sy HIGHBD_DECL_SUFFIX)
286*c0909341SAndroid Build Coastguard Worker {
287*c0909341SAndroid Build Coastguard Worker const int rows = 1 + (data->overlap_flag && row_num > 0);
288*c0909341SAndroid Build Coastguard Worker const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
289*c0909341SAndroid Build Coastguard Worker const int grain_ctr = 128 << bitdepth_min_8;
290*c0909341SAndroid Build Coastguard Worker const int grain_min = -grain_ctr, grain_max = grain_ctr - 1;
291*c0909341SAndroid Build Coastguard Worker
292*c0909341SAndroid Build Coastguard Worker int min_value, max_value;
293*c0909341SAndroid Build Coastguard Worker if (data->clip_to_restricted_range) {
294*c0909341SAndroid Build Coastguard Worker min_value = 16 << bitdepth_min_8;
295*c0909341SAndroid Build Coastguard Worker max_value = (is_id ? 235 : 240) << bitdepth_min_8;
296*c0909341SAndroid Build Coastguard Worker } else {
297*c0909341SAndroid Build Coastguard Worker min_value = 0;
298*c0909341SAndroid Build Coastguard Worker max_value = BITDEPTH_MAX;
299*c0909341SAndroid Build Coastguard Worker }
300*c0909341SAndroid Build Coastguard Worker
301*c0909341SAndroid Build Coastguard Worker // seed[0] contains the current row, seed[1] contains the previous
302*c0909341SAndroid Build Coastguard Worker unsigned seed[2];
303*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++) {
304*c0909341SAndroid Build Coastguard Worker seed[i] = data->seed;
305*c0909341SAndroid Build Coastguard Worker seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8;
306*c0909341SAndroid Build Coastguard Worker seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
307*c0909341SAndroid Build Coastguard Worker }
308*c0909341SAndroid Build Coastguard Worker
309*c0909341SAndroid Build Coastguard Worker assert(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0);
310*c0909341SAndroid Build Coastguard Worker
311*c0909341SAndroid Build Coastguard Worker int offsets[2 /* col offset */][2 /* row offset */];
312*c0909341SAndroid Build Coastguard Worker
313*c0909341SAndroid Build Coastguard Worker // process this row in FG_BLOCK_SIZE^2 blocks (subsampled)
314*c0909341SAndroid Build Coastguard Worker for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) {
315*c0909341SAndroid Build Coastguard Worker const int bw = imin(FG_BLOCK_SIZE >> sx, (int)(pw - bx));
316*c0909341SAndroid Build Coastguard Worker if (data->overlap_flag && bx) {
317*c0909341SAndroid Build Coastguard Worker // shift previous offsets left
318*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++)
319*c0909341SAndroid Build Coastguard Worker offsets[1][i] = offsets[0][i];
320*c0909341SAndroid Build Coastguard Worker }
321*c0909341SAndroid Build Coastguard Worker
322*c0909341SAndroid Build Coastguard Worker // update current offsets
323*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++)
324*c0909341SAndroid Build Coastguard Worker offsets[0][i] = get_random_number(8, &seed[i]);
325*c0909341SAndroid Build Coastguard Worker
326*c0909341SAndroid Build Coastguard Worker // x/y block offsets to compensate for overlapped regions
327*c0909341SAndroid Build Coastguard Worker const int ystart = data->overlap_flag && row_num ? imin(2 >> sy, bh) : 0;
328*c0909341SAndroid Build Coastguard Worker const int xstart = data->overlap_flag && bx ? imin(2 >> sx, bw) : 0;
329*c0909341SAndroid Build Coastguard Worker
330*c0909341SAndroid Build Coastguard Worker static const int w[2 /* sub */][2 /* off */][2] = {
331*c0909341SAndroid Build Coastguard Worker { { 27, 17 }, { 17, 27 } },
332*c0909341SAndroid Build Coastguard Worker { { 23, 22 } },
333*c0909341SAndroid Build Coastguard Worker };
334*c0909341SAndroid Build Coastguard Worker
335*c0909341SAndroid Build Coastguard Worker #define add_noise_uv(x, y, grain) \
336*c0909341SAndroid Build Coastguard Worker const int lx = (bx + x) << sx; \
337*c0909341SAndroid Build Coastguard Worker const int ly = y << sy; \
338*c0909341SAndroid Build Coastguard Worker const pixel *const luma = luma_row + ly * PXSTRIDE(luma_stride) + lx; \
339*c0909341SAndroid Build Coastguard Worker pixel avg = luma[0]; \
340*c0909341SAndroid Build Coastguard Worker if (sx) \
341*c0909341SAndroid Build Coastguard Worker avg = (avg + luma[1] + 1) >> 1; \
342*c0909341SAndroid Build Coastguard Worker const pixel *const src = src_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
343*c0909341SAndroid Build Coastguard Worker pixel *const dst = dst_row + (y) * PXSTRIDE(stride) + (bx + (x)); \
344*c0909341SAndroid Build Coastguard Worker int val = avg; \
345*c0909341SAndroid Build Coastguard Worker if (!data->chroma_scaling_from_luma) { \
346*c0909341SAndroid Build Coastguard Worker const int combined = avg * data->uv_luma_mult[uv] + \
347*c0909341SAndroid Build Coastguard Worker *src * data->uv_mult[uv]; \
348*c0909341SAndroid Build Coastguard Worker val = iclip_pixel( (combined >> 6) + \
349*c0909341SAndroid Build Coastguard Worker (data->uv_offset[uv] * (1 << bitdepth_min_8)) ); \
350*c0909341SAndroid Build Coastguard Worker } \
351*c0909341SAndroid Build Coastguard Worker const int noise = round2(scaling[ val ] * (grain), data->scaling_shift); \
352*c0909341SAndroid Build Coastguard Worker *dst = iclip(*src + noise, min_value, max_value);
353*c0909341SAndroid Build Coastguard Worker
354*c0909341SAndroid Build Coastguard Worker for (int y = ystart; y < bh; y++) {
355*c0909341SAndroid Build Coastguard Worker // Non-overlapped image region (straightforward)
356*c0909341SAndroid Build Coastguard Worker for (int x = xstart; x < bw; x++) {
357*c0909341SAndroid Build Coastguard Worker int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
358*c0909341SAndroid Build Coastguard Worker add_noise_uv(x, y, grain);
359*c0909341SAndroid Build Coastguard Worker }
360*c0909341SAndroid Build Coastguard Worker
361*c0909341SAndroid Build Coastguard Worker // Special case for overlapped column
362*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < xstart; x++) {
363*c0909341SAndroid Build Coastguard Worker int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
364*c0909341SAndroid Build Coastguard Worker int old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
365*c0909341SAndroid Build Coastguard Worker grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5);
366*c0909341SAndroid Build Coastguard Worker grain = iclip(grain, grain_min, grain_max);
367*c0909341SAndroid Build Coastguard Worker add_noise_uv(x, y, grain);
368*c0909341SAndroid Build Coastguard Worker }
369*c0909341SAndroid Build Coastguard Worker }
370*c0909341SAndroid Build Coastguard Worker
371*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < ystart; y++) {
372*c0909341SAndroid Build Coastguard Worker // Special case for overlapped row (sans corner)
373*c0909341SAndroid Build Coastguard Worker for (int x = xstart; x < bw; x++) {
374*c0909341SAndroid Build Coastguard Worker int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
375*c0909341SAndroid Build Coastguard Worker int old = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
376*c0909341SAndroid Build Coastguard Worker grain = round2(old * w[sy][y][0] + grain * w[sy][y][1], 5);
377*c0909341SAndroid Build Coastguard Worker grain = iclip(grain, grain_min, grain_max);
378*c0909341SAndroid Build Coastguard Worker add_noise_uv(x, y, grain);
379*c0909341SAndroid Build Coastguard Worker }
380*c0909341SAndroid Build Coastguard Worker
381*c0909341SAndroid Build Coastguard Worker // Special case for doubly-overlapped corner
382*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < xstart; x++) {
383*c0909341SAndroid Build Coastguard Worker // Blend the top pixel with the top left block
384*c0909341SAndroid Build Coastguard Worker int top = sample_lut(grain_lut, offsets, sx, sy, 0, 1, x, y);
385*c0909341SAndroid Build Coastguard Worker int old = sample_lut(grain_lut, offsets, sx, sy, 1, 1, x, y);
386*c0909341SAndroid Build Coastguard Worker top = round2(old * w[sx][x][0] + top * w[sx][x][1], 5);
387*c0909341SAndroid Build Coastguard Worker top = iclip(top, grain_min, grain_max);
388*c0909341SAndroid Build Coastguard Worker
389*c0909341SAndroid Build Coastguard Worker // Blend the current pixel with the left block
390*c0909341SAndroid Build Coastguard Worker int grain = sample_lut(grain_lut, offsets, sx, sy, 0, 0, x, y);
391*c0909341SAndroid Build Coastguard Worker old = sample_lut(grain_lut, offsets, sx, sy, 1, 0, x, y);
392*c0909341SAndroid Build Coastguard Worker grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5);
393*c0909341SAndroid Build Coastguard Worker grain = iclip(grain, grain_min, grain_max);
394*c0909341SAndroid Build Coastguard Worker
395*c0909341SAndroid Build Coastguard Worker // Mix the row rows together and apply to image
396*c0909341SAndroid Build Coastguard Worker grain = round2(top * w[sy][y][0] + grain * w[sy][y][1], 5);
397*c0909341SAndroid Build Coastguard Worker grain = iclip(grain, grain_min, grain_max);
398*c0909341SAndroid Build Coastguard Worker add_noise_uv(x, y, grain);
399*c0909341SAndroid Build Coastguard Worker }
400*c0909341SAndroid Build Coastguard Worker }
401*c0909341SAndroid Build Coastguard Worker }
402*c0909341SAndroid Build Coastguard Worker }
403*c0909341SAndroid Build Coastguard Worker
404*c0909341SAndroid Build Coastguard Worker #define fguv_ss_fn(nm, ss_x, ss_y) \
405*c0909341SAndroid Build Coastguard Worker static decl_fguv_32x32xn_fn(fguv_32x32xn_##nm##_c) { \
406*c0909341SAndroid Build Coastguard Worker fguv_32x32xn_c(dst_row, src_row, stride, data, pw, scaling, grain_lut, bh, \
407*c0909341SAndroid Build Coastguard Worker row_num, luma_row, luma_stride, uv_pl, is_id, ss_x, ss_y \
408*c0909341SAndroid Build Coastguard Worker HIGHBD_TAIL_SUFFIX); \
409*c0909341SAndroid Build Coastguard Worker }
410*c0909341SAndroid Build Coastguard Worker
411*c0909341SAndroid Build Coastguard Worker fguv_ss_fn(420, 1, 1);
412*c0909341SAndroid Build Coastguard Worker fguv_ss_fn(422, 1, 0);
413*c0909341SAndroid Build Coastguard Worker fguv_ss_fn(444, 0, 0);
414*c0909341SAndroid Build Coastguard Worker
415*c0909341SAndroid Build Coastguard Worker #if HAVE_ASM
416*c0909341SAndroid Build Coastguard Worker #if ARCH_AARCH64 || ARCH_ARM
417*c0909341SAndroid Build Coastguard Worker #include "src/arm/filmgrain.h"
418*c0909341SAndroid Build Coastguard Worker #elif ARCH_X86
419*c0909341SAndroid Build Coastguard Worker #include "src/x86/filmgrain.h"
420*c0909341SAndroid Build Coastguard Worker #endif
421*c0909341SAndroid Build Coastguard Worker #endif
422*c0909341SAndroid Build Coastguard Worker
bitfn(dav1d_film_grain_dsp_init)423*c0909341SAndroid Build Coastguard Worker COLD void bitfn(dav1d_film_grain_dsp_init)(Dav1dFilmGrainDSPContext *const c) {
424*c0909341SAndroid Build Coastguard Worker c->generate_grain_y = generate_grain_y_c;
425*c0909341SAndroid Build Coastguard Worker c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = generate_grain_uv_420_c;
426*c0909341SAndroid Build Coastguard Worker c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = generate_grain_uv_422_c;
427*c0909341SAndroid Build Coastguard Worker c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = generate_grain_uv_444_c;
428*c0909341SAndroid Build Coastguard Worker
429*c0909341SAndroid Build Coastguard Worker c->fgy_32x32xn = fgy_32x32xn_c;
430*c0909341SAndroid Build Coastguard Worker c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_c;
431*c0909341SAndroid Build Coastguard Worker c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_c;
432*c0909341SAndroid Build Coastguard Worker c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_c;
433*c0909341SAndroid Build Coastguard Worker
434*c0909341SAndroid Build Coastguard Worker #if HAVE_ASM
435*c0909341SAndroid Build Coastguard Worker #if ARCH_AARCH64 || ARCH_ARM
436*c0909341SAndroid Build Coastguard Worker film_grain_dsp_init_arm(c);
437*c0909341SAndroid Build Coastguard Worker #elif ARCH_X86
438*c0909341SAndroid Build Coastguard Worker film_grain_dsp_init_x86(c);
439*c0909341SAndroid Build Coastguard Worker #endif
440*c0909341SAndroid Build Coastguard Worker #endif
441*c0909341SAndroid Build Coastguard Worker }
442