1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 /*!\file
13 * \brief Describes film grain parameters and film grain synthesis
14 *
15 */
16
17 #include <stdbool.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <stdlib.h>
21 #include <assert.h>
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_mem/aom_mem.h"
24 #include "av1/decoder/grain_synthesis.h"
25
26 // Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits)
27 // with zero mean and standard deviation of about 512.
28 // should be divided by 4 for 10-bit range and 16 for 8-bit range.
29 static const int gaussian_sequence[2048] = {
30 56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820,
31 224, 1248, 996, 272, -8, -916, -388, -732, -104, -188, 800,
32 112, -652, -320, -376, 140, -252, 492, -168, 44, -788, 588,
33 -584, 500, -228, 12, 680, 272, -476, 972, -100, 652, 368,
34 432, -196, -720, -192, 1000, -332, 652, -136, -552, -604, -4,
35 192, -220, -136, 1000, -52, 372, -96, -624, 124, -24, 396,
36 540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740,
37 248, -968, -848, 608, 376, -60, -292, -40, -156, 252, -292,
38 248, 224, -280, 400, -244, 244, -60, 76, -80, 212, 532,
39 340, 128, -36, 824, -352, -60, -264, -96, -612, 416, -704,
40 220, -204, 640, -160, 1220, -408, 900, 336, 20, -336, -96,
41 -792, 304, 48, -28, -1232, -1172, -448, 104, -292, -520, 244,
42 60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136,
43 488, -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676,
44 -376, 168, -108, 464, 8, 564, 64, 240, 308, -300, -400,
45 -456, -136, 56, 120, -408, -116, 436, 504, -232, 328, 844,
46 -164, -84, 784, -168, 232, -224, 348, -376, 128, 568, 96,
47 -1244, -288, 276, 848, 832, -360, 656, 464, -384, -332, -356,
48 728, -388, 160, -192, 468, 296, 224, 140, -776, -100, 280,
49 4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808,
50 772, 20, 268, 88, -332, -284, 124, -384, -448, 208, -228,
51 -1044, -328, 660, 380, -148, -300, 588, 240, 540, 28, 136,
52 -88, -436, 256, 296, -1000, 1400, 0, -48, 1056, -136, 264,
53 -528, -1108, 632, -484, -592, -344, 796, 124, -668, -768, 388,
54 1296, -232, -188, -200, -288, -4, 308, 100, -168, 256, -500,
55 204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384,
56 548, -296, 428, -108, -8, -912, -324, -224, -88, -112, -220,
57 -100, 996, -796, 548, 360, -216, 180, 428, -200, -212, 148,
58 96, 148, 284, 216, -412, -320, 120, -300, -384, -604, -572,
59 -332, -8, -180, -176, 696, 116, -88, 628, 76, 44, -516,
60 240, -208, -40, 100, -592, 344, -308, -452, -228, 20, 916,
61 -1752, -136, -340, -804, 140, 40, 512, 340, 248, 184, -492,
62 896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560,
63 -1020, 180, -800, -64, 76, 576, 1068, 396, 660, 552, -108,
64 -28, 320, -628, 312, -92, -92, -472, 268, 16, 560, 516,
65 -672, -52, 492, -100, 260, 384, 284, 292, 304, -148, 88,
66 -152, 1012, 1064, -228, 164, -376, -684, 592, -392, 156, 196,
67 -524, -64, -884, 160, -176, 636, 648, 404, -396, -436, 864,
68 424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920,
69 436, -48, 1176, -884, 416, -776, -824, -884, 524, -548, -564,
70 -68, -164, -96, 692, 364, -692, -1012, -68, 260, -480, 876,
71 -1116, 452, -332, -352, 892, -1088, 1220, -676, 12, -292, 244,
72 496, 372, -32, 280, 200, 112, -440, -96, 24, -644, -184,
73 56, -432, 224, -980, 272, -260, 144, -436, 420, 356, 364,
74 -528, 76, 172, -744, -368, 404, -752, -416, 684, -688, 72,
75 540, 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24,
76 424, 264, 1040, 128, -912, -524, -356, 64, 876, -12, 4,
77 -88, 532, 272, -524, 320, 276, -508, 940, 24, -400, -120,
78 756, 60, 236, -412, 100, 376, -484, 400, -100, -740, -108,
79 -260, 328, -268, 224, -200, -416, 184, -604, -564, -20, 296,
80 60, 892, -888, 60, 164, 68, -760, 216, -296, 904, -336,
81 -28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164,
82 -1560, -776, 1156, -428, 164, -504, -112, 120, -216, -148, -264,
83 308, 32, 64, -72, 72, 116, 176, -64, -272, 460, -536,
84 -784, -280, 348, 108, -752, -132, 524, -540, -776, 116, -296,
85 -1196, -288, -560, 1040, -472, 116, -848, -1116, 116, 636, 696,
86 284, -176, 1016, 204, -864, -648, -248, 356, 972, -584, -204,
87 264, 880, 528, -24, -184, 116, 448, -144, 828, 524, 212,
88 -212, 52, 12, 200, 268, -488, -404, -880, 824, -672, -40,
89 908, -248, 500, 716, -576, 492, -576, 16, 720, -108, 384,
90 124, 344, 280, 576, -500, 252, 104, -308, 196, -188, -8,
91 1268, 296, 1032, -1196, 436, 316, 372, -432, -200, -660, 704,
92 -224, 596, -132, 268, 32, -452, 884, 104, -1008, 424, -1348,
93 -280, 4, -1168, 368, 476, 696, 300, -8, 24, 180, -592,
94 -196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420,
95 320, 208, -144, -156, 156, 364, 452, 28, 540, 316, 220,
96 -644, -248, 464, 72, 360, 32, -388, 496, -680, -48, 208,
97 -116, -408, 60, -604, -392, 548, -840, 784, -460, 656, -544,
98 -388, -264, 908, -800, -628, -612, -568, 572, -220, 164, 288,
99 -16, -308, 308, -112, -636, -760, 280, -668, 432, 364, 240,
100 -196, 604, 340, 384, 196, 592, -44, -500, 432, -580, -132,
101 636, -76, 392, 4, -412, 540, 508, 328, -356, -36, 16,
102 -220, -64, -248, -60, 24, -192, 368, 1040, 92, -24, -1044,
103 -32, 40, 104, 148, 192, -136, -520, 56, -816, -224, 732,
104 392, 356, 212, -80, -424, -1008, -324, 588, -1496, 576, 460,
105 -816, -848, 56, -580, -92, -1372, -112, -496, 200, 364, 52,
106 -140, 48, -48, -60, 84, 72, 40, 132, -356, -268, -104,
107 -284, -404, 732, -520, 164, -304, -540, 120, 328, -76, -460,
108 756, 388, 588, 236, -436, -72, -176, -404, -316, -148, 716,
109 -604, 404, -72, -88, -888, -68, 944, 88, -220, -344, 960,
110 472, 460, -232, 704, 120, 832, -228, 692, -508, 132, -476,
111 844, -748, -364, -44, 1116, -1104, -1056, 76, 428, 552, -692,
112 60, 356, 96, -384, -188, -612, -576, 736, 508, 892, 352,
113 -1132, 504, -24, -352, 324, 332, -600, -312, 292, 508, -144,
114 -8, 484, 48, 284, -260, -240, 256, -100, -292, -204, -44,
115 472, -204, 908, -188, -1000, -256, 92, 1164, -392, 564, 356,
116 652, -28, -884, 256, 484, -192, 760, -176, 376, -524, -452,
117 -436, 860, -736, 212, 124, 504, -476, 468, 76, -472, 552,
118 -692, -944, -620, 740, -240, 400, 132, 20, 192, -196, 264,
119 -668, -1012, -60, 296, -316, -828, 76, -156, 284, -768, -448,
120 -832, 148, 248, 652, 616, 1236, 288, -328, -400, -124, 588,
121 220, 520, -696, 1032, 768, -740, -92, -272, 296, 448, -464,
122 412, -200, 392, 440, -200, 264, -152, -260, 320, 1032, 216,
123 320, -8, -64, 156, -1016, 1084, 1172, 536, 484, -432, 132,
124 372, -52, -256, 84, 116, -352, 48, 116, 304, -384, 412,
125 924, -300, 528, 628, 180, 648, 44, -980, -220, 1320, 48,
126 332, 748, 524, -268, -720, 540, -276, 564, -344, -208, -196,
127 436, 896, 88, -392, 132, 80, -964, -288, 568, 56, -48,
128 -456, 888, 8, 552, -156, -292, 948, 288, 128, -716, -292,
129 1192, -152, 876, 352, -600, -260, -812, -468, -28, -120, -32,
130 -44, 1284, 496, 192, 464, 312, -76, -516, -380, -456, -1012,
131 -48, 308, -156, 36, 492, -156, -808, 188, 1652, 68, -120,
132 -116, 316, 160, -140, 352, 808, -416, 592, 316, -480, 56,
133 528, -204, -568, 372, -232, 752, -344, 744, -4, 324, -416,
134 -600, 768, 268, -248, -88, -132, -420, -432, 80, -288, 404,
135 -316, -1216, -588, 520, -108, 92, -320, 368, -480, -216, -92,
136 1688, -300, 180, 1020, -176, 820, -68, -228, -260, 436, -904,
137 20, 40, -508, 440, -736, 312, 332, 204, 760, -372, 728,
138 96, -20, -632, -520, -560, 336, 1076, -64, -532, 776, 584,
139 192, 396, -728, -520, 276, -188, 80, -52, -612, -252, -48,
140 648, 212, -688, 228, -52, -260, 428, -412, -272, -404, 180,
141 816, -796, 48, 152, 484, -88, -216, 988, 696, 188, -528,
142 648, -116, -180, 316, 476, 12, -564, 96, 476, -252, -364,
143 -376, -392, 556, -256, -576, 260, -352, 120, -16, -136, -260,
144 -492, 72, 556, 660, 580, 616, 772, 436, 424, -32, -324,
145 -1268, 416, -324, -80, 920, 160, 228, 724, 32, -516, 64,
146 384, 68, -128, 136, 240, 248, -204, -68, 252, -932, -120,
147 -480, -628, -84, 192, 852, -404, -288, -132, 204, 100, 168,
148 -68, -196, -868, 460, 1080, 380, -80, 244, 0, 484, -888,
149 64, 184, 352, 600, 460, 164, 604, -196, 320, -64, 588,
150 -184, 228, 12, 372, 48, -848, -344, 224, 208, -200, 484,
151 128, -20, 272, -468, -840, 384, 256, -720, -520, -464, -580,
152 112, -120, 644, -356, -208, -608, -528, 704, 560, -424, 392,
153 828, 40, 84, 200, -152, 0, -144, 584, 280, -120, 80,
154 -556, -972, -196, -472, 724, 80, 168, -32, 88, 160, -688,
155 0, 160, 356, 372, -776, 740, -128, 676, -248, -480, 4,
156 -364, 96, 544, 232, -1032, 956, 236, 356, 20, -40, 300,
157 24, -676, -596, 132, 1120, -104, 532, -1096, 568, 648, 444,
158 508, 380, 188, -376, -604, 1488, 424, 24, 756, -220, -192,
159 716, 120, 920, 688, 168, 44, -460, 568, 284, 1144, 1160,
160 600, 424, 888, 656, -356, -320, 220, 316, -176, -724, -188,
161 -816, -628, -348, -228, -380, 1012, -452, -660, 736, 928, 404,
162 -696, -72, -268, -892, 128, 184, -344, -780, 360, 336, 400,
163 344, 428, 548, -112, 136, -228, -216, -820, -516, 340, 92,
164 -136, 116, -300, 376, -244, 100, -316, -520, -284, -12, 824,
165 164, -548, -180, -128, 116, -924, -828, 268, -368, -580, 620,
166 192, 160, 0, -1676, 1068, 424, -56, -360, 468, -156, 720,
167 288, -528, 556, -364, 548, -148, 504, 316, 152, -648, -620,
168 -684, -24, -376, -384, -108, -920, -1032, 768, 180, -264, -508,
169 -1268, -260, -60, 300, -240, 988, 724, -376, -576, -212, -736,
170 556, 192, 1092, -620, -880, 376, -56, -4, -216, -32, 836,
171 268, 396, 1332, 864, -600, 100, 56, -412, -92, 356, 180,
172 884, -468, -436, 292, -388, -804, -704, -840, 368, -348, 140,
173 -724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32,
174 -228, 132, -48, -220, 868, -1016, -60, -1044, -464, 328, 916,
175 244, 12, -736, -296, 360, 468, -376, -108, -92, 788, 368,
176 -56, 544, 400, -672, -420, 728, 16, 320, 44, -284, -380,
177 -796, 488, 132, 204, -596, -372, 88, -152, -908, -636, -572,
178 -624, -116, -692, -200, -56, 276, -88, 484, -324, 948, 864,
179 1000, -456, -184, -276, 292, -296, 156, 676, 320, 160, 908,
180 -84, -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84,
181 344, -520, 348, -688, 240, -84, 216, -1044, -136, -676, -396,
182 -1500, 960, -40, 176, 168, 1516, 420, -504, -344, -364, -360,
183 1216, -940, -380, -212, 252, -660, -708, 484, -444, -152, 928,
184 -120, 1112, 476, -260, 560, -148, -344, 108, -196, 228, -288,
185 504, 560, -328, -88, 288, -1008, 460, -228, 468, -836, -196,
186 76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504,
187 116, 432, 528, 48, 476, -168, -608, 448, 160, -532, -272,
188 28, -676, -12, 828, 980, 456, 520, 104, -104, 256, -344,
189 -4, -28, -368, -52, -524, -572, -556, -200, 768, 1124, -208,
190 -512, 176, 232, 248, -148, -888, 604, -600, -304, 804, -156,
191 -212, 488, -192, -804, -256, 368, -360, -916, -328, 228, -240,
192 -448, -472, 856, -556, -364, 572, -12, -156, -368, -340, 432,
193 252, -752, -152, 288, 268, -580, -848, -592, 108, -76, 244,
194 312, -716, 592, -80, 436, 360, 4, -248, 160, 516, 584,
195 732, 44, -468, -280, -292, -156, -588, 28, 308, 912, 24,
196 124, 156, 180, -252, 944, -924, -772, -520, -428, -624, 300,
197 -212, -1144, 32, -724, 800, -1128, -212, -1288, -848, 180, -416,
198 440, 192, -576, -792, -76, -1080, 80, -532, -352, -132, 380,
199 -820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384,
200 648, -832, 508, 552, -52, -100, -656, 208, -568, 748, -88,
201 680, 232, 300, 192, -408, -1012, -152, -252, -268, 272, -876,
202 -664, -648, -332, -136, 16, 12, 1152, -28, 332, -536, 320,
203 -672, -460, -316, 532, -260, 228, -40, 1052, -816, 180, 88,
204 -496, -556, -672, -368, 428, 92, 356, 404, -408, 252, 196,
205 -176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120,
206 372, -900, -40, 472, -264, -592, 952, 128, 656, 112, 664,
207 -232, 420, 4, -344, -464, 556, 244, -416, -32, 252, 0,
208 -412, 188, -696, 508, -476, 324, -1096, 656, -312, 560, 264,
209 -136, 304, 160, -64, -580, 248, 336, -720, 560, -348, -288,
210 -276, -196, -500, 852, -544, -236, -1128, -992, -776, 116, 56,
211 52, 860, 884, 212, -12, 168, 1020, 512, -552, 924, -148,
212 716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156,
213 -300, -528, -472, 364, 100, -744, -1056, -32, 540, 280, 144,
214 -676, -32, -232, -280, -224, 96, 568, -76, 172, 148, 148,
215 104, 32, -296, -32, 788, -80, 32, -16, 280, 288, 944,
216 428, -484
217 };
218
219 static const int gauss_bits = 11;
220
221 static int luma_subblock_size_y = 32;
222 static int luma_subblock_size_x = 32;
223
224 static int chroma_subblock_size_y = 16;
225 static int chroma_subblock_size_x = 16;
226
227 static const int min_luma_legal_range = 16;
228 static const int max_luma_legal_range = 235;
229
230 static const int min_chroma_legal_range = 16;
231 static const int max_chroma_legal_range = 240;
232
233 static int scaling_lut_y[256];
234 static int scaling_lut_cb[256];
235 static int scaling_lut_cr[256];
236
237 static int grain_min;
238 static int grain_max;
239
240 static uint16_t random_register = 0; // random number generator register
241
dealloc_arrays(const aom_film_grain_t * params,int *** pred_pos_luma,int *** pred_pos_chroma,int ** luma_grain_block,int ** cb_grain_block,int ** cr_grain_block,int ** y_line_buf,int ** cb_line_buf,int ** cr_line_buf,int ** y_col_buf,int ** cb_col_buf,int ** cr_col_buf)242 static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
243 int ***pred_pos_chroma, int **luma_grain_block,
244 int **cb_grain_block, int **cr_grain_block,
245 int **y_line_buf, int **cb_line_buf,
246 int **cr_line_buf, int **y_col_buf, int **cb_col_buf,
247 int **cr_col_buf) {
248 int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
249 int num_pos_chroma = num_pos_luma;
250 if (params->num_y_points > 0) ++num_pos_chroma;
251
252 if (*pred_pos_luma) {
253 for (int row = 0; row < num_pos_luma; row++) {
254 aom_free((*pred_pos_luma)[row]);
255 }
256 aom_free(*pred_pos_luma);
257 *pred_pos_luma = NULL;
258 }
259
260 if (*pred_pos_chroma) {
261 for (int row = 0; row < num_pos_chroma; row++) {
262 aom_free((*pred_pos_chroma)[row]);
263 }
264 aom_free(*pred_pos_chroma);
265 *pred_pos_chroma = NULL;
266 }
267
268 aom_free(*y_line_buf);
269 *y_line_buf = NULL;
270
271 aom_free(*cb_line_buf);
272 *cb_line_buf = NULL;
273
274 aom_free(*cr_line_buf);
275 *cr_line_buf = NULL;
276
277 aom_free(*y_col_buf);
278 *y_col_buf = NULL;
279
280 aom_free(*cb_col_buf);
281 *cb_col_buf = NULL;
282
283 aom_free(*cr_col_buf);
284 *cr_col_buf = NULL;
285
286 aom_free(*luma_grain_block);
287 *luma_grain_block = NULL;
288
289 aom_free(*cb_grain_block);
290 *cb_grain_block = NULL;
291
292 aom_free(*cr_grain_block);
293 *cr_grain_block = NULL;
294 }
295
init_arrays(const aom_film_grain_t * params,int luma_stride,int chroma_stride,int *** pred_pos_luma_p,int *** pred_pos_chroma_p,int ** luma_grain_block,int ** cb_grain_block,int ** cr_grain_block,int ** y_line_buf,int ** cb_line_buf,int ** cr_line_buf,int ** y_col_buf,int ** cb_col_buf,int ** cr_col_buf,int luma_grain_samples,int chroma_grain_samples,int chroma_subsamp_y,int chroma_subsamp_x)296 static bool init_arrays(const aom_film_grain_t *params, int luma_stride,
297 int chroma_stride, int ***pred_pos_luma_p,
298 int ***pred_pos_chroma_p, int **luma_grain_block,
299 int **cb_grain_block, int **cr_grain_block,
300 int **y_line_buf, int **cb_line_buf, int **cr_line_buf,
301 int **y_col_buf, int **cb_col_buf, int **cr_col_buf,
302 int luma_grain_samples, int chroma_grain_samples,
303 int chroma_subsamp_y, int chroma_subsamp_x) {
304 *pred_pos_luma_p = NULL;
305 *pred_pos_chroma_p = NULL;
306 *luma_grain_block = NULL;
307 *cb_grain_block = NULL;
308 *cr_grain_block = NULL;
309 *y_line_buf = NULL;
310 *cb_line_buf = NULL;
311 *cr_line_buf = NULL;
312 *y_col_buf = NULL;
313 *cb_col_buf = NULL;
314 *cr_col_buf = NULL;
315
316 memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
317 memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
318 memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
319
320 int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
321 int num_pos_chroma = num_pos_luma;
322 if (params->num_y_points > 0) ++num_pos_chroma;
323
324 int **pred_pos_luma;
325 int **pred_pos_chroma;
326
327 pred_pos_luma = (int **)aom_calloc(num_pos_luma, sizeof(*pred_pos_luma));
328 if (!pred_pos_luma) return false;
329
330 for (int row = 0; row < num_pos_luma; row++) {
331 pred_pos_luma[row] = (int *)aom_malloc(sizeof(**pred_pos_luma) * 3);
332 if (!pred_pos_luma[row]) {
333 dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p,
334 luma_grain_block, cb_grain_block, cr_grain_block,
335 y_line_buf, cb_line_buf, cr_line_buf, y_col_buf,
336 cb_col_buf, cr_col_buf);
337 return false;
338 }
339 }
340
341 pred_pos_chroma =
342 (int **)aom_calloc(num_pos_chroma, sizeof(*pred_pos_chroma));
343 if (!pred_pos_chroma) {
344 dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p, luma_grain_block,
345 cb_grain_block, cr_grain_block, y_line_buf, cb_line_buf,
346 cr_line_buf, y_col_buf, cb_col_buf, cr_col_buf);
347 return false;
348 }
349
350 for (int row = 0; row < num_pos_chroma; row++) {
351 pred_pos_chroma[row] = (int *)aom_malloc(sizeof(**pred_pos_chroma) * 3);
352 if (!pred_pos_chroma[row]) {
353 dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p,
354 luma_grain_block, cb_grain_block, cr_grain_block,
355 y_line_buf, cb_line_buf, cr_line_buf, y_col_buf,
356 cb_col_buf, cr_col_buf);
357 return false;
358 }
359 }
360
361 int pos_ar_index = 0;
362
363 for (int row = -params->ar_coeff_lag; row < 0; row++) {
364 for (int col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1;
365 col++) {
366 pred_pos_luma[pos_ar_index][0] = row;
367 pred_pos_luma[pos_ar_index][1] = col;
368 pred_pos_luma[pos_ar_index][2] = 0;
369
370 pred_pos_chroma[pos_ar_index][0] = row;
371 pred_pos_chroma[pos_ar_index][1] = col;
372 pred_pos_chroma[pos_ar_index][2] = 0;
373 ++pos_ar_index;
374 }
375 }
376
377 for (int col = -params->ar_coeff_lag; col < 0; col++) {
378 pred_pos_luma[pos_ar_index][0] = 0;
379 pred_pos_luma[pos_ar_index][1] = col;
380 pred_pos_luma[pos_ar_index][2] = 0;
381
382 pred_pos_chroma[pos_ar_index][0] = 0;
383 pred_pos_chroma[pos_ar_index][1] = col;
384 pred_pos_chroma[pos_ar_index][2] = 0;
385
386 ++pos_ar_index;
387 }
388
389 if (params->num_y_points > 0) {
390 pred_pos_chroma[pos_ar_index][0] = 0;
391 pred_pos_chroma[pos_ar_index][1] = 0;
392 pred_pos_chroma[pos_ar_index][2] = 1;
393 }
394
395 *pred_pos_luma_p = pred_pos_luma;
396 *pred_pos_chroma_p = pred_pos_chroma;
397
398 *y_line_buf = (int *)aom_malloc(sizeof(**y_line_buf) * luma_stride * 2);
399 *cb_line_buf = (int *)aom_malloc(sizeof(**cb_line_buf) * chroma_stride *
400 (2 >> chroma_subsamp_y));
401 *cr_line_buf = (int *)aom_malloc(sizeof(**cr_line_buf) * chroma_stride *
402 (2 >> chroma_subsamp_y));
403
404 *y_col_buf =
405 (int *)aom_malloc(sizeof(**y_col_buf) * (luma_subblock_size_y + 2) * 2);
406 *cb_col_buf =
407 (int *)aom_malloc(sizeof(**cb_col_buf) *
408 (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
409 (2 >> chroma_subsamp_x));
410 *cr_col_buf =
411 (int *)aom_malloc(sizeof(**cr_col_buf) *
412 (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
413 (2 >> chroma_subsamp_x));
414
415 *luma_grain_block =
416 (int *)aom_malloc(sizeof(**luma_grain_block) * luma_grain_samples);
417 *cb_grain_block =
418 (int *)aom_malloc(sizeof(**cb_grain_block) * chroma_grain_samples);
419 *cr_grain_block =
420 (int *)aom_malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
421 if (!(*pred_pos_luma_p && *pred_pos_chroma_p && *y_line_buf && *cb_line_buf &&
422 *cr_line_buf && *y_col_buf && *cb_col_buf && *cr_col_buf &&
423 *luma_grain_block && *cb_grain_block && *cr_grain_block)) {
424 dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p, luma_grain_block,
425 cb_grain_block, cr_grain_block, y_line_buf, cb_line_buf,
426 cr_line_buf, y_col_buf, cb_col_buf, cr_col_buf);
427 return false;
428 }
429 return true;
430 }
431
432 // get a number between 0 and 2^bits - 1
get_random_number(int bits)433 static inline int get_random_number(int bits) {
434 uint16_t bit;
435 bit = ((random_register >> 0) ^ (random_register >> 1) ^
436 (random_register >> 3) ^ (random_register >> 12)) &
437 1;
438 random_register = (random_register >> 1) | (bit << 15);
439 return (random_register >> (16 - bits)) & ((1 << bits) - 1);
440 }
441
init_random_generator(int luma_line,uint16_t seed)442 static void init_random_generator(int luma_line, uint16_t seed) {
443 // same for the picture
444
445 uint16_t msb = (seed >> 8) & 255;
446 uint16_t lsb = seed & 255;
447
448 random_register = (msb << 8) + lsb;
449
450 // changes for each row
451 int luma_num = luma_line >> 5;
452
453 random_register ^= ((luma_num * 37 + 178) & 255) << 8;
454 random_register ^= ((luma_num * 173 + 105) & 255);
455 }
456
generate_luma_grain_block(const aom_film_grain_t * params,int ** pred_pos_luma,int * luma_grain_block,int luma_block_size_y,int luma_block_size_x,int luma_grain_stride,int left_pad,int top_pad,int right_pad,int bottom_pad)457 static void generate_luma_grain_block(
458 const aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
459 int luma_block_size_y, int luma_block_size_x, int luma_grain_stride,
460 int left_pad, int top_pad, int right_pad, int bottom_pad) {
461 if (params->num_y_points == 0) {
462 memset(luma_grain_block, 0,
463 sizeof(*luma_grain_block) * luma_block_size_y * luma_grain_stride);
464 return;
465 }
466
467 int bit_depth = params->bit_depth;
468 int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
469
470 int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
471 int rounding_offset = (1 << (params->ar_coeff_shift - 1));
472
473 for (int i = 0; i < luma_block_size_y; i++)
474 for (int j = 0; j < luma_block_size_x; j++)
475 luma_grain_block[i * luma_grain_stride + j] =
476 (gaussian_sequence[get_random_number(gauss_bits)] +
477 ((1 << gauss_sec_shift) >> 1)) >>
478 gauss_sec_shift;
479
480 for (int i = top_pad; i < luma_block_size_y - bottom_pad; i++)
481 for (int j = left_pad; j < luma_block_size_x - right_pad; j++) {
482 int wsum = 0;
483 for (int pos = 0; pos < num_pos_luma; pos++) {
484 wsum = wsum + params->ar_coeffs_y[pos] *
485 luma_grain_block[(i + pred_pos_luma[pos][0]) *
486 luma_grain_stride +
487 j + pred_pos_luma[pos][1]];
488 }
489 luma_grain_block[i * luma_grain_stride + j] =
490 clamp(luma_grain_block[i * luma_grain_stride + j] +
491 ((wsum + rounding_offset) >> params->ar_coeff_shift),
492 grain_min, grain_max);
493 }
494 }
495
generate_chroma_grain_blocks(const aom_film_grain_t * params,int ** pred_pos_chroma,int * luma_grain_block,int * cb_grain_block,int * cr_grain_block,int luma_grain_stride,int chroma_block_size_y,int chroma_block_size_x,int chroma_grain_stride,int left_pad,int top_pad,int right_pad,int bottom_pad,int chroma_subsamp_y,int chroma_subsamp_x)496 static bool generate_chroma_grain_blocks(
497 const aom_film_grain_t *params, int **pred_pos_chroma,
498 int *luma_grain_block, int *cb_grain_block, int *cr_grain_block,
499 int luma_grain_stride, int chroma_block_size_y, int chroma_block_size_x,
500 int chroma_grain_stride, int left_pad, int top_pad, int right_pad,
501 int bottom_pad, int chroma_subsamp_y, int chroma_subsamp_x) {
502 int bit_depth = params->bit_depth;
503 int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
504
505 int num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
506 if (params->num_y_points > 0) ++num_pos_chroma;
507 int rounding_offset = (1 << (params->ar_coeff_shift - 1));
508 int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
509
510 if (params->num_cb_points || params->chroma_scaling_from_luma) {
511 init_random_generator(7 << 5, params->random_seed);
512
513 for (int i = 0; i < chroma_block_size_y; i++)
514 for (int j = 0; j < chroma_block_size_x; j++)
515 cb_grain_block[i * chroma_grain_stride + j] =
516 (gaussian_sequence[get_random_number(gauss_bits)] +
517 ((1 << gauss_sec_shift) >> 1)) >>
518 gauss_sec_shift;
519 } else {
520 memset(cb_grain_block, 0,
521 sizeof(*cb_grain_block) * chroma_grain_block_size);
522 }
523
524 if (params->num_cr_points || params->chroma_scaling_from_luma) {
525 init_random_generator(11 << 5, params->random_seed);
526
527 for (int i = 0; i < chroma_block_size_y; i++)
528 for (int j = 0; j < chroma_block_size_x; j++)
529 cr_grain_block[i * chroma_grain_stride + j] =
530 (gaussian_sequence[get_random_number(gauss_bits)] +
531 ((1 << gauss_sec_shift) >> 1)) >>
532 gauss_sec_shift;
533 } else {
534 memset(cr_grain_block, 0,
535 sizeof(*cr_grain_block) * chroma_grain_block_size);
536 }
537
538 for (int i = top_pad; i < chroma_block_size_y - bottom_pad; i++)
539 for (int j = left_pad; j < chroma_block_size_x - right_pad; j++) {
540 int wsum_cb = 0;
541 int wsum_cr = 0;
542 for (int pos = 0; pos < num_pos_chroma; pos++) {
543 if (pred_pos_chroma[pos][2] == 0) {
544 wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] *
545 cb_grain_block[(i + pred_pos_chroma[pos][0]) *
546 chroma_grain_stride +
547 j + pred_pos_chroma[pos][1]];
548 wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] *
549 cr_grain_block[(i + pred_pos_chroma[pos][0]) *
550 chroma_grain_stride +
551 j + pred_pos_chroma[pos][1]];
552 } else if (pred_pos_chroma[pos][2] == 1) {
553 int av_luma = 0;
554 int luma_coord_y = ((i - top_pad) << chroma_subsamp_y) + top_pad;
555 int luma_coord_x = ((j - left_pad) << chroma_subsamp_x) + left_pad;
556
557 for (int k = luma_coord_y; k < luma_coord_y + chroma_subsamp_y + 1;
558 k++)
559 for (int l = luma_coord_x; l < luma_coord_x + chroma_subsamp_x + 1;
560 l++)
561 av_luma += luma_grain_block[k * luma_grain_stride + l];
562
563 av_luma =
564 (av_luma + ((1 << (chroma_subsamp_y + chroma_subsamp_x)) >> 1)) >>
565 (chroma_subsamp_y + chroma_subsamp_x);
566
567 wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] * av_luma;
568 wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] * av_luma;
569 } else {
570 fprintf(
571 stderr,
572 "Grain synthesis: prediction between two chroma components is "
573 "not supported!");
574 return false;
575 }
576 }
577 if (params->num_cb_points || params->chroma_scaling_from_luma)
578 cb_grain_block[i * chroma_grain_stride + j] =
579 clamp(cb_grain_block[i * chroma_grain_stride + j] +
580 ((wsum_cb + rounding_offset) >> params->ar_coeff_shift),
581 grain_min, grain_max);
582 if (params->num_cr_points || params->chroma_scaling_from_luma)
583 cr_grain_block[i * chroma_grain_stride + j] =
584 clamp(cr_grain_block[i * chroma_grain_stride + j] +
585 ((wsum_cr + rounding_offset) >> params->ar_coeff_shift),
586 grain_min, grain_max);
587 }
588 return true;
589 }
590
init_scaling_function(const int scaling_points[][2],int num_points,int scaling_lut[])591 static void init_scaling_function(const int scaling_points[][2], int num_points,
592 int scaling_lut[]) {
593 if (num_points == 0) return;
594
595 for (int i = 0; i < scaling_points[0][0]; i++)
596 scaling_lut[i] = scaling_points[0][1];
597
598 for (int point = 0; point < num_points - 1; point++) {
599 int delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
600 int delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
601
602 int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
603
604 for (int x = 0; x < delta_x; x++) {
605 scaling_lut[scaling_points[point][0] + x] =
606 scaling_points[point][1] + (int)((x * delta + 32768) >> 16);
607 }
608 }
609
610 for (int i = scaling_points[num_points - 1][0]; i < 256; i++)
611 scaling_lut[i] = scaling_points[num_points - 1][1];
612 }
613
614 // function that extracts samples from a LUT (and interpolates intemediate
615 // frames for 10- and 12-bit video)
scale_LUT(int * scaling_lut,int index,int bit_depth)616 static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
617 int x = index >> (bit_depth - 8);
618
619 if (!(bit_depth - 8) || x == 255)
620 return scaling_lut[x];
621 else
622 return scaling_lut[x] + (((scaling_lut[x + 1] - scaling_lut[x]) *
623 (index & ((1 << (bit_depth - 8)) - 1)) +
624 (1 << (bit_depth - 9))) >>
625 (bit_depth - 8));
626 }
627
add_noise_to_block(const aom_film_grain_t * params,uint8_t * luma,uint8_t * cb,uint8_t * cr,int luma_stride,int chroma_stride,int * luma_grain,int * cb_grain,int * cr_grain,int luma_grain_stride,int chroma_grain_stride,int half_luma_height,int half_luma_width,int bit_depth,int chroma_subsamp_y,int chroma_subsamp_x,int mc_identity)628 static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
629 uint8_t *cb, uint8_t *cr, int luma_stride,
630 int chroma_stride, int *luma_grain,
631 int *cb_grain, int *cr_grain,
632 int luma_grain_stride, int chroma_grain_stride,
633 int half_luma_height, int half_luma_width,
634 int bit_depth, int chroma_subsamp_y,
635 int chroma_subsamp_x, int mc_identity) {
636 int cb_mult = params->cb_mult - 128; // fixed scale
637 int cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
638 int cb_offset = params->cb_offset - 256;
639
640 int cr_mult = params->cr_mult - 128; // fixed scale
641 int cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
642 int cr_offset = params->cr_offset - 256;
643
644 int rounding_offset = (1 << (params->scaling_shift - 1));
645
646 int apply_y = params->num_y_points > 0 ? 1 : 0;
647 int apply_cb =
648 (params->num_cb_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
649 int apply_cr =
650 (params->num_cr_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
651
652 if (params->chroma_scaling_from_luma) {
653 cb_mult = 0; // fixed scale
654 cb_luma_mult = 64; // fixed scale
655 cb_offset = 0;
656
657 cr_mult = 0; // fixed scale
658 cr_luma_mult = 64; // fixed scale
659 cr_offset = 0;
660 }
661
662 int min_luma, max_luma, min_chroma, max_chroma;
663
664 if (params->clip_to_restricted_range) {
665 min_luma = min_luma_legal_range;
666 max_luma = max_luma_legal_range;
667
668 if (mc_identity) {
669 min_chroma = min_luma_legal_range;
670 max_chroma = max_luma_legal_range;
671 } else {
672 min_chroma = min_chroma_legal_range;
673 max_chroma = max_chroma_legal_range;
674 }
675 } else {
676 min_luma = min_chroma = 0;
677 max_luma = max_chroma = 255;
678 }
679
680 for (int i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
681 for (int j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
682 int average_luma = 0;
683 if (chroma_subsamp_x) {
684 average_luma = (luma[(i << chroma_subsamp_y) * luma_stride +
685 (j << chroma_subsamp_x)] +
686 luma[(i << chroma_subsamp_y) * luma_stride +
687 (j << chroma_subsamp_x) + 1] +
688 1) >>
689 1;
690 } else {
691 average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
692 }
693
694 if (apply_cb) {
695 cb[i * chroma_stride + j] = clamp(
696 cb[i * chroma_stride + j] +
697 ((scale_LUT(scaling_lut_cb,
698 clamp(((average_luma * cb_luma_mult +
699 cb_mult * cb[i * chroma_stride + j]) >>
700 6) +
701 cb_offset,
702 0, (256 << (bit_depth - 8)) - 1),
703 8) *
704 cb_grain[i * chroma_grain_stride + j] +
705 rounding_offset) >>
706 params->scaling_shift),
707 min_chroma, max_chroma);
708 }
709
710 if (apply_cr) {
711 cr[i * chroma_stride + j] = clamp(
712 cr[i * chroma_stride + j] +
713 ((scale_LUT(scaling_lut_cr,
714 clamp(((average_luma * cr_luma_mult +
715 cr_mult * cr[i * chroma_stride + j]) >>
716 6) +
717 cr_offset,
718 0, (256 << (bit_depth - 8)) - 1),
719 8) *
720 cr_grain[i * chroma_grain_stride + j] +
721 rounding_offset) >>
722 params->scaling_shift),
723 min_chroma, max_chroma);
724 }
725 }
726 }
727
728 if (apply_y) {
729 for (int i = 0; i < (half_luma_height << 1); i++) {
730 for (int j = 0; j < (half_luma_width << 1); j++) {
731 luma[i * luma_stride + j] =
732 clamp(luma[i * luma_stride + j] +
733 ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j], 8) *
734 luma_grain[i * luma_grain_stride + j] +
735 rounding_offset) >>
736 params->scaling_shift),
737 min_luma, max_luma);
738 }
739 }
740 }
741 }
742
add_noise_to_block_hbd(const aom_film_grain_t * params,uint16_t * luma,uint16_t * cb,uint16_t * cr,int luma_stride,int chroma_stride,int * luma_grain,int * cb_grain,int * cr_grain,int luma_grain_stride,int chroma_grain_stride,int half_luma_height,int half_luma_width,int bit_depth,int chroma_subsamp_y,int chroma_subsamp_x,int mc_identity)743 static void add_noise_to_block_hbd(
744 const aom_film_grain_t *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
745 int luma_stride, int chroma_stride, int *luma_grain, int *cb_grain,
746 int *cr_grain, int luma_grain_stride, int chroma_grain_stride,
747 int half_luma_height, int half_luma_width, int bit_depth,
748 int chroma_subsamp_y, int chroma_subsamp_x, int mc_identity) {
749 int cb_mult = params->cb_mult - 128; // fixed scale
750 int cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
751 // offset value depends on the bit depth
752 int cb_offset = (params->cb_offset << (bit_depth - 8)) - (1 << bit_depth);
753
754 int cr_mult = params->cr_mult - 128; // fixed scale
755 int cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
756 // offset value depends on the bit depth
757 int cr_offset = (params->cr_offset << (bit_depth - 8)) - (1 << bit_depth);
758
759 int rounding_offset = (1 << (params->scaling_shift - 1));
760
761 int apply_y = params->num_y_points > 0 ? 1 : 0;
762 int apply_cb =
763 (params->num_cb_points > 0 || params->chroma_scaling_from_luma) > 0 ? 1
764 : 0;
765 int apply_cr =
766 (params->num_cr_points > 0 || params->chroma_scaling_from_luma) > 0 ? 1
767 : 0;
768
769 if (params->chroma_scaling_from_luma) {
770 cb_mult = 0; // fixed scale
771 cb_luma_mult = 64; // fixed scale
772 cb_offset = 0;
773
774 cr_mult = 0; // fixed scale
775 cr_luma_mult = 64; // fixed scale
776 cr_offset = 0;
777 }
778
779 int min_luma, max_luma, min_chroma, max_chroma;
780
781 if (params->clip_to_restricted_range) {
782 min_luma = min_luma_legal_range << (bit_depth - 8);
783 max_luma = max_luma_legal_range << (bit_depth - 8);
784
785 if (mc_identity) {
786 min_chroma = min_luma_legal_range << (bit_depth - 8);
787 max_chroma = max_luma_legal_range << (bit_depth - 8);
788 } else {
789 min_chroma = min_chroma_legal_range << (bit_depth - 8);
790 max_chroma = max_chroma_legal_range << (bit_depth - 8);
791 }
792 } else {
793 min_luma = min_chroma = 0;
794 max_luma = max_chroma = (256 << (bit_depth - 8)) - 1;
795 }
796
797 for (int i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
798 for (int j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
799 int average_luma = 0;
800 if (chroma_subsamp_x) {
801 average_luma = (luma[(i << chroma_subsamp_y) * luma_stride +
802 (j << chroma_subsamp_x)] +
803 luma[(i << chroma_subsamp_y) * luma_stride +
804 (j << chroma_subsamp_x) + 1] +
805 1) >>
806 1;
807 } else {
808 average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
809 }
810
811 if (apply_cb) {
812 cb[i * chroma_stride + j] = clamp(
813 cb[i * chroma_stride + j] +
814 ((scale_LUT(scaling_lut_cb,
815 clamp(((average_luma * cb_luma_mult +
816 cb_mult * cb[i * chroma_stride + j]) >>
817 6) +
818 cb_offset,
819 0, (256 << (bit_depth - 8)) - 1),
820 bit_depth) *
821 cb_grain[i * chroma_grain_stride + j] +
822 rounding_offset) >>
823 params->scaling_shift),
824 min_chroma, max_chroma);
825 }
826 if (apply_cr) {
827 cr[i * chroma_stride + j] = clamp(
828 cr[i * chroma_stride + j] +
829 ((scale_LUT(scaling_lut_cr,
830 clamp(((average_luma * cr_luma_mult +
831 cr_mult * cr[i * chroma_stride + j]) >>
832 6) +
833 cr_offset,
834 0, (256 << (bit_depth - 8)) - 1),
835 bit_depth) *
836 cr_grain[i * chroma_grain_stride + j] +
837 rounding_offset) >>
838 params->scaling_shift),
839 min_chroma, max_chroma);
840 }
841 }
842 }
843
844 if (apply_y) {
845 for (int i = 0; i < (half_luma_height << 1); i++) {
846 for (int j = 0; j < (half_luma_width << 1); j++) {
847 luma[i * luma_stride + j] =
848 clamp(luma[i * luma_stride + j] +
849 ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j],
850 bit_depth) *
851 luma_grain[i * luma_grain_stride + j] +
852 rounding_offset) >>
853 params->scaling_shift),
854 min_luma, max_luma);
855 }
856 }
857 }
858 }
859
copy_rect(uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,int width,int height,int use_high_bit_depth)860 static void copy_rect(uint8_t *src, int src_stride, uint8_t *dst,
861 int dst_stride, int width, int height,
862 int use_high_bit_depth) {
863 int hbd_coeff = use_high_bit_depth ? 2 : 1;
864 while (height) {
865 memcpy(dst, src, width * sizeof(uint8_t) * hbd_coeff);
866 src += src_stride;
867 dst += dst_stride;
868 --height;
869 }
870 return;
871 }
872
copy_area(int * src,int src_stride,int * dst,int dst_stride,int width,int height)873 static void copy_area(int *src, int src_stride, int *dst, int dst_stride,
874 int width, int height) {
875 while (height) {
876 memcpy(dst, src, width * sizeof(*src));
877 src += src_stride;
878 dst += dst_stride;
879 --height;
880 }
881 return;
882 }
883
extend_even(uint8_t * dst,int dst_stride,int width,int height,int use_high_bit_depth)884 static void extend_even(uint8_t *dst, int dst_stride, int width, int height,
885 int use_high_bit_depth) {
886 if ((width & 1) == 0 && (height & 1) == 0) return;
887 if (use_high_bit_depth) {
888 uint16_t *dst16 = (uint16_t *)dst;
889 int dst16_stride = dst_stride / 2;
890 if (width & 1) {
891 for (int i = 0; i < height; ++i)
892 dst16[i * dst16_stride + width] = dst16[i * dst16_stride + width - 1];
893 }
894 width = (width + 1) & (~1);
895 if (height & 1) {
896 memcpy(&dst16[height * dst16_stride], &dst16[(height - 1) * dst16_stride],
897 sizeof(*dst16) * width);
898 }
899 } else {
900 if (width & 1) {
901 for (int i = 0; i < height; ++i)
902 dst[i * dst_stride + width] = dst[i * dst_stride + width - 1];
903 }
904 width = (width + 1) & (~1);
905 if (height & 1) {
906 memcpy(&dst[height * dst_stride], &dst[(height - 1) * dst_stride],
907 sizeof(*dst) * width);
908 }
909 }
910 }
911
ver_boundary_overlap(int * left_block,int left_stride,int * right_block,int right_stride,int * dst_block,int dst_stride,int width,int height)912 static void ver_boundary_overlap(int *left_block, int left_stride,
913 int *right_block, int right_stride,
914 int *dst_block, int dst_stride, int width,
915 int height) {
916 if (width == 1) {
917 while (height) {
918 *dst_block = clamp((*left_block * 23 + *right_block * 22 + 16) >> 5,
919 grain_min, grain_max);
920 left_block += left_stride;
921 right_block += right_stride;
922 dst_block += dst_stride;
923 --height;
924 }
925 return;
926 } else if (width == 2) {
927 while (height) {
928 dst_block[0] = clamp((27 * left_block[0] + 17 * right_block[0] + 16) >> 5,
929 grain_min, grain_max);
930 dst_block[1] = clamp((17 * left_block[1] + 27 * right_block[1] + 16) >> 5,
931 grain_min, grain_max);
932 left_block += left_stride;
933 right_block += right_stride;
934 dst_block += dst_stride;
935 --height;
936 }
937 return;
938 }
939 }
940
hor_boundary_overlap(int * top_block,int top_stride,int * bottom_block,int bottom_stride,int * dst_block,int dst_stride,int width,int height)941 static void hor_boundary_overlap(int *top_block, int top_stride,
942 int *bottom_block, int bottom_stride,
943 int *dst_block, int dst_stride, int width,
944 int height) {
945 if (height == 1) {
946 while (width) {
947 *dst_block = clamp((*top_block * 23 + *bottom_block * 22 + 16) >> 5,
948 grain_min, grain_max);
949 ++top_block;
950 ++bottom_block;
951 ++dst_block;
952 --width;
953 }
954 return;
955 } else if (height == 2) {
956 while (width) {
957 dst_block[0] = clamp((27 * top_block[0] + 17 * bottom_block[0] + 16) >> 5,
958 grain_min, grain_max);
959 dst_block[dst_stride] = clamp((17 * top_block[top_stride] +
960 27 * bottom_block[bottom_stride] + 16) >>
961 5,
962 grain_min, grain_max);
963 ++top_block;
964 ++bottom_block;
965 ++dst_block;
966 --width;
967 }
968 return;
969 }
970 }
971
av1_add_film_grain(const aom_film_grain_t * params,const aom_image_t * src,aom_image_t * dst)972 int av1_add_film_grain(const aom_film_grain_t *params, const aom_image_t *src,
973 aom_image_t *dst) {
974 uint8_t *luma, *cb, *cr;
975 int height, width, luma_stride, chroma_stride;
976 int use_high_bit_depth = 0;
977 int chroma_subsamp_x = 0;
978 int chroma_subsamp_y = 0;
979 int mc_identity = src->mc == AOM_CICP_MC_IDENTITY ? 1 : 0;
980
981 switch (src->fmt) {
982 case AOM_IMG_FMT_AOMI420:
983 case AOM_IMG_FMT_I420:
984 use_high_bit_depth = 0;
985 chroma_subsamp_x = 1;
986 chroma_subsamp_y = 1;
987 break;
988 case AOM_IMG_FMT_I42016:
989 use_high_bit_depth = 1;
990 chroma_subsamp_x = 1;
991 chroma_subsamp_y = 1;
992 break;
993 // case AOM_IMG_FMT_444A:
994 case AOM_IMG_FMT_I444:
995 use_high_bit_depth = 0;
996 chroma_subsamp_x = 0;
997 chroma_subsamp_y = 0;
998 break;
999 case AOM_IMG_FMT_I44416:
1000 use_high_bit_depth = 1;
1001 chroma_subsamp_x = 0;
1002 chroma_subsamp_y = 0;
1003 break;
1004 case AOM_IMG_FMT_I422:
1005 use_high_bit_depth = 0;
1006 chroma_subsamp_x = 1;
1007 chroma_subsamp_y = 0;
1008 break;
1009 case AOM_IMG_FMT_I42216:
1010 use_high_bit_depth = 1;
1011 chroma_subsamp_x = 1;
1012 chroma_subsamp_y = 0;
1013 break;
1014 default: // unknown input format
1015 fprintf(stderr, "Film grain error: input format is not supported!");
1016 return -1;
1017 }
1018
1019 assert(params->bit_depth == src->bit_depth);
1020
1021 dst->fmt = src->fmt;
1022 dst->bit_depth = src->bit_depth;
1023
1024 dst->r_w = src->r_w;
1025 dst->r_h = src->r_h;
1026 dst->d_w = src->d_w;
1027 dst->d_h = src->d_h;
1028
1029 dst->cp = src->cp;
1030 dst->tc = src->tc;
1031 dst->mc = src->mc;
1032
1033 dst->monochrome = src->monochrome;
1034 dst->csp = src->csp;
1035 dst->range = src->range;
1036
1037 dst->x_chroma_shift = src->x_chroma_shift;
1038 dst->y_chroma_shift = src->y_chroma_shift;
1039
1040 dst->temporal_id = src->temporal_id;
1041 dst->spatial_id = src->spatial_id;
1042
1043 width = src->d_w % 2 ? src->d_w + 1 : src->d_w;
1044 height = src->d_h % 2 ? src->d_h + 1 : src->d_h;
1045
1046 copy_rect(src->planes[AOM_PLANE_Y], src->stride[AOM_PLANE_Y],
1047 dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], src->d_w,
1048 src->d_h, use_high_bit_depth);
1049 // Note that dst is already assumed to be aligned to even.
1050 extend_even(dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], src->d_w,
1051 src->d_h, use_high_bit_depth);
1052
1053 if (!src->monochrome) {
1054 copy_rect(src->planes[AOM_PLANE_U], src->stride[AOM_PLANE_U],
1055 dst->planes[AOM_PLANE_U], dst->stride[AOM_PLANE_U],
1056 width >> chroma_subsamp_x, height >> chroma_subsamp_y,
1057 use_high_bit_depth);
1058
1059 copy_rect(src->planes[AOM_PLANE_V], src->stride[AOM_PLANE_V],
1060 dst->planes[AOM_PLANE_V], dst->stride[AOM_PLANE_V],
1061 width >> chroma_subsamp_x, height >> chroma_subsamp_y,
1062 use_high_bit_depth);
1063 }
1064
1065 luma = dst->planes[AOM_PLANE_Y];
1066 cb = dst->planes[AOM_PLANE_U];
1067 cr = dst->planes[AOM_PLANE_V];
1068
1069 // luma and chroma strides in samples
1070 luma_stride = dst->stride[AOM_PLANE_Y] >> use_high_bit_depth;
1071 chroma_stride = dst->stride[AOM_PLANE_U] >> use_high_bit_depth;
1072
1073 return av1_add_film_grain_run(
1074 params, luma, cb, cr, height, width, luma_stride, chroma_stride,
1075 use_high_bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1076 }
1077
av1_add_film_grain_run(const aom_film_grain_t * params,uint8_t * luma,uint8_t * cb,uint8_t * cr,int height,int width,int luma_stride,int chroma_stride,int use_high_bit_depth,int chroma_subsamp_y,int chroma_subsamp_x,int mc_identity)1078 int av1_add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
1079 uint8_t *cb, uint8_t *cr, int height, int width,
1080 int luma_stride, int chroma_stride,
1081 int use_high_bit_depth, int chroma_subsamp_y,
1082 int chroma_subsamp_x, int mc_identity) {
1083 int **pred_pos_luma;
1084 int **pred_pos_chroma;
1085 int *luma_grain_block;
1086 int *cb_grain_block;
1087 int *cr_grain_block;
1088
1089 int *y_line_buf;
1090 int *cb_line_buf;
1091 int *cr_line_buf;
1092
1093 int *y_col_buf;
1094 int *cb_col_buf;
1095 int *cr_col_buf;
1096
1097 random_register = params->random_seed;
1098
1099 int left_pad = 3;
1100 int right_pad = 3; // padding to offset for AR coefficients
1101 int top_pad = 3;
1102 int bottom_pad = 0;
1103
1104 int ar_padding = 3; // maximum lag used for stabilization of AR coefficients
1105
1106 luma_subblock_size_y = 32;
1107 luma_subblock_size_x = 32;
1108
1109 chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
1110 chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
1111
1112 // Initial padding is only needed for generation of
1113 // film grain templates (to stabilize the AR process)
1114 // Only a 64x64 luma and 32x32 chroma part of a template
1115 // is used later for adding grain, padding can be discarded
1116
1117 int luma_block_size_y =
1118 top_pad + 2 * ar_padding + luma_subblock_size_y * 2 + bottom_pad;
1119 int luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size_x * 2 +
1120 2 * ar_padding + right_pad;
1121
1122 int chroma_block_size_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
1123 chroma_subblock_size_y * 2 + bottom_pad;
1124 int chroma_block_size_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
1125 chroma_subblock_size_x * 2 +
1126 (2 >> chroma_subsamp_x) * ar_padding + right_pad;
1127
1128 int luma_grain_stride = luma_block_size_x;
1129 int chroma_grain_stride = chroma_block_size_x;
1130
1131 int overlap = params->overlap_flag;
1132 int bit_depth = params->bit_depth;
1133
1134 const int grain_center = 128 << (bit_depth - 8);
1135 grain_min = 0 - grain_center;
1136 grain_max = grain_center - 1;
1137
1138 if (!init_arrays(params, luma_stride, chroma_stride, &pred_pos_luma,
1139 &pred_pos_chroma, &luma_grain_block, &cb_grain_block,
1140 &cr_grain_block, &y_line_buf, &cb_line_buf, &cr_line_buf,
1141 &y_col_buf, &cb_col_buf, &cr_col_buf,
1142 luma_block_size_y * luma_block_size_x,
1143 chroma_block_size_y * chroma_block_size_x, chroma_subsamp_y,
1144 chroma_subsamp_x))
1145 return -1;
1146
1147 generate_luma_grain_block(params, pred_pos_luma, luma_grain_block,
1148 luma_block_size_y, luma_block_size_x,
1149 luma_grain_stride, left_pad, top_pad, right_pad,
1150 bottom_pad);
1151
1152 if (!generate_chroma_grain_blocks(
1153 params, pred_pos_chroma, luma_grain_block, cb_grain_block,
1154 cr_grain_block, luma_grain_stride, chroma_block_size_y,
1155 chroma_block_size_x, chroma_grain_stride, left_pad, top_pad,
1156 right_pad, bottom_pad, chroma_subsamp_y, chroma_subsamp_x))
1157 return -1;
1158
1159 init_scaling_function(params->scaling_points_y, params->num_y_points,
1160 scaling_lut_y);
1161
1162 if (params->chroma_scaling_from_luma) {
1163 memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
1164 memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
1165 } else {
1166 init_scaling_function(params->scaling_points_cb, params->num_cb_points,
1167 scaling_lut_cb);
1168 init_scaling_function(params->scaling_points_cr, params->num_cr_points,
1169 scaling_lut_cr);
1170 }
1171 for (int y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) {
1172 init_random_generator(y * 2, params->random_seed);
1173
1174 for (int x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) {
1175 int offset_y = get_random_number(8);
1176 int offset_x = (offset_y >> 4) & 15;
1177 offset_y &= 15;
1178
1179 int luma_offset_y = left_pad + 2 * ar_padding + (offset_y << 1);
1180 int luma_offset_x = top_pad + 2 * ar_padding + (offset_x << 1);
1181
1182 int chroma_offset_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
1183 offset_y * (2 >> chroma_subsamp_y);
1184 int chroma_offset_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
1185 offset_x * (2 >> chroma_subsamp_x);
1186
1187 if (overlap && x) {
1188 ver_boundary_overlap(
1189 y_col_buf, 2,
1190 luma_grain_block + luma_offset_y * luma_grain_stride +
1191 luma_offset_x,
1192 luma_grain_stride, y_col_buf, 2, 2,
1193 AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1194
1195 ver_boundary_overlap(
1196 cb_col_buf, 2 >> chroma_subsamp_x,
1197 cb_grain_block + chroma_offset_y * chroma_grain_stride +
1198 chroma_offset_x,
1199 chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
1200 2 >> chroma_subsamp_x,
1201 AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1202 (height - (y << 1)) >> chroma_subsamp_y));
1203
1204 ver_boundary_overlap(
1205 cr_col_buf, 2 >> chroma_subsamp_x,
1206 cr_grain_block + chroma_offset_y * chroma_grain_stride +
1207 chroma_offset_x,
1208 chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
1209 2 >> chroma_subsamp_x,
1210 AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1211 (height - (y << 1)) >> chroma_subsamp_y));
1212
1213 int i = y ? 1 : 0;
1214
1215 if (use_high_bit_depth) {
1216 add_noise_to_block_hbd(
1217 params,
1218 (uint16_t *)luma + ((y + i) << 1) * luma_stride + (x << 1),
1219 (uint16_t *)cb +
1220 ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1221 (x << (1 - chroma_subsamp_x)),
1222 (uint16_t *)cr +
1223 ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1224 (x << (1 - chroma_subsamp_x)),
1225 luma_stride, chroma_stride, y_col_buf + i * 4,
1226 cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1227 cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1228 2, (2 - chroma_subsamp_x),
1229 AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, 1,
1230 bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1231 } else {
1232 add_noise_to_block(
1233 params, luma + ((y + i) << 1) * luma_stride + (x << 1),
1234 cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1235 (x << (1 - chroma_subsamp_x)),
1236 cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1237 (x << (1 - chroma_subsamp_x)),
1238 luma_stride, chroma_stride, y_col_buf + i * 4,
1239 cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1240 cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1241 2, (2 - chroma_subsamp_x),
1242 AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, 1,
1243 bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1244 }
1245 }
1246
1247 if (overlap && y) {
1248 if (x) {
1249 hor_boundary_overlap(y_line_buf + (x << 1), luma_stride, y_col_buf, 2,
1250 y_line_buf + (x << 1), luma_stride, 2, 2);
1251
1252 hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x),
1253 chroma_stride, cb_col_buf, 2 >> chroma_subsamp_x,
1254 cb_line_buf + x * (2 >> chroma_subsamp_x),
1255 chroma_stride, 2 >> chroma_subsamp_x,
1256 2 >> chroma_subsamp_y);
1257
1258 hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x),
1259 chroma_stride, cr_col_buf, 2 >> chroma_subsamp_x,
1260 cr_line_buf + x * (2 >> chroma_subsamp_x),
1261 chroma_stride, 2 >> chroma_subsamp_x,
1262 2 >> chroma_subsamp_y);
1263 }
1264
1265 hor_boundary_overlap(
1266 y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
1267 luma_grain_block + luma_offset_y * luma_grain_stride +
1268 luma_offset_x + (x ? 2 : 0),
1269 luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
1270 AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1),
1271 width - ((x ? x + 1 : 0) << 1)),
1272 2);
1273
1274 hor_boundary_overlap(
1275 cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1276 chroma_stride,
1277 cb_grain_block + chroma_offset_y * chroma_grain_stride +
1278 chroma_offset_x + ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1279 chroma_grain_stride,
1280 cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1281 chroma_stride,
1282 AOMMIN(chroma_subblock_size_x -
1283 ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1284 (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1285 2 >> chroma_subsamp_y);
1286
1287 hor_boundary_overlap(
1288 cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1289 chroma_stride,
1290 cr_grain_block + chroma_offset_y * chroma_grain_stride +
1291 chroma_offset_x + ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1292 chroma_grain_stride,
1293 cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1294 chroma_stride,
1295 AOMMIN(chroma_subblock_size_x -
1296 ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1297 (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1298 2 >> chroma_subsamp_y);
1299
1300 if (use_high_bit_depth) {
1301 add_noise_to_block_hbd(
1302 params, (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
1303 (uint16_t *)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1304 (x << ((1 - chroma_subsamp_x))),
1305 (uint16_t *)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1306 (x << ((1 - chroma_subsamp_x))),
1307 luma_stride, chroma_stride, y_line_buf + (x << 1),
1308 cb_line_buf + (x << (1 - chroma_subsamp_x)),
1309 cr_line_buf + (x << (1 - chroma_subsamp_x)), luma_stride,
1310 chroma_stride, 1,
1311 AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), bit_depth,
1312 chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1313 } else {
1314 add_noise_to_block(
1315 params, luma + (y << 1) * luma_stride + (x << 1),
1316 cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1317 (x << ((1 - chroma_subsamp_x))),
1318 cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1319 (x << ((1 - chroma_subsamp_x))),
1320 luma_stride, chroma_stride, y_line_buf + (x << 1),
1321 cb_line_buf + (x << (1 - chroma_subsamp_x)),
1322 cr_line_buf + (x << (1 - chroma_subsamp_x)), luma_stride,
1323 chroma_stride, 1,
1324 AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), bit_depth,
1325 chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1326 }
1327 }
1328
1329 int i = overlap && y ? 1 : 0;
1330 int j = overlap && x ? 1 : 0;
1331
1332 if (use_high_bit_depth) {
1333 add_noise_to_block_hbd(
1334 params,
1335 (uint16_t *)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1336 (uint16_t *)cb +
1337 ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1338 ((x + j) << (1 - chroma_subsamp_x)),
1339 (uint16_t *)cr +
1340 ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1341 ((x + j) << (1 - chroma_subsamp_x)),
1342 luma_stride, chroma_stride,
1343 luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
1344 luma_offset_x + (j << 1),
1345 cb_grain_block +
1346 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1347 chroma_grain_stride +
1348 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1349 cr_grain_block +
1350 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1351 chroma_grain_stride +
1352 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1353 luma_grain_stride, chroma_grain_stride,
1354 AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1355 AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, bit_depth,
1356 chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1357 } else {
1358 add_noise_to_block(
1359 params, luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1360 cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1361 ((x + j) << (1 - chroma_subsamp_x)),
1362 cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1363 ((x + j) << (1 - chroma_subsamp_x)),
1364 luma_stride, chroma_stride,
1365 luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
1366 luma_offset_x + (j << 1),
1367 cb_grain_block +
1368 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1369 chroma_grain_stride +
1370 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1371 cr_grain_block +
1372 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
1373 chroma_grain_stride +
1374 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1375 luma_grain_stride, chroma_grain_stride,
1376 AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1377 AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, bit_depth,
1378 chroma_subsamp_y, chroma_subsamp_x, mc_identity);
1379 }
1380
1381 if (overlap) {
1382 if (x) {
1383 // Copy overlapped column bufer to line buffer
1384 copy_area(y_col_buf + (luma_subblock_size_y << 1), 2,
1385 y_line_buf + (x << 1), luma_stride, 2, 2);
1386
1387 copy_area(
1388 cb_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1389 2 >> chroma_subsamp_x,
1390 cb_line_buf + (x << (1 - chroma_subsamp_x)), chroma_stride,
1391 2 >> chroma_subsamp_x, 2 >> chroma_subsamp_y);
1392
1393 copy_area(
1394 cr_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1395 2 >> chroma_subsamp_x,
1396 cr_line_buf + (x << (1 - chroma_subsamp_x)), chroma_stride,
1397 2 >> chroma_subsamp_x, 2 >> chroma_subsamp_y);
1398 }
1399
1400 // Copy grain to the line buffer for overlap with a bottom block
1401 copy_area(
1402 luma_grain_block +
1403 (luma_offset_y + luma_subblock_size_y) * luma_grain_stride +
1404 luma_offset_x + ((x ? 2 : 0)),
1405 luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
1406 AOMMIN(luma_subblock_size_x, width - (x << 1)) - (x ? 2 : 0), 2);
1407
1408 copy_area(cb_grain_block +
1409 (chroma_offset_y + chroma_subblock_size_y) *
1410 chroma_grain_stride +
1411 chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1412 chroma_grain_stride,
1413 cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1414 chroma_stride,
1415 AOMMIN(chroma_subblock_size_x,
1416 ((width - (x << 1)) >> chroma_subsamp_x)) -
1417 (x ? 2 >> chroma_subsamp_x : 0),
1418 2 >> chroma_subsamp_y);
1419
1420 copy_area(cr_grain_block +
1421 (chroma_offset_y + chroma_subblock_size_y) *
1422 chroma_grain_stride +
1423 chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1424 chroma_grain_stride,
1425 cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1426 chroma_stride,
1427 AOMMIN(chroma_subblock_size_x,
1428 ((width - (x << 1)) >> chroma_subsamp_x)) -
1429 (x ? 2 >> chroma_subsamp_x : 0),
1430 2 >> chroma_subsamp_y);
1431
1432 // Copy grain to the column buffer for overlap with the next block to
1433 // the right
1434
1435 copy_area(luma_grain_block + luma_offset_y * luma_grain_stride +
1436 luma_offset_x + luma_subblock_size_x,
1437 luma_grain_stride, y_col_buf, 2, 2,
1438 AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1439
1440 copy_area(cb_grain_block + chroma_offset_y * chroma_grain_stride +
1441 chroma_offset_x + chroma_subblock_size_x,
1442 chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
1443 2 >> chroma_subsamp_x,
1444 AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1445 (height - (y << 1)) >> chroma_subsamp_y));
1446
1447 copy_area(cr_grain_block + chroma_offset_y * chroma_grain_stride +
1448 chroma_offset_x + chroma_subblock_size_x,
1449 chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
1450 2 >> chroma_subsamp_x,
1451 AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1452 (height - (y << 1)) >> chroma_subsamp_y));
1453 }
1454 }
1455 }
1456
1457 dealloc_arrays(params, &pred_pos_luma, &pred_pos_chroma, &luma_grain_block,
1458 &cb_grain_block, &cr_grain_block, &y_line_buf, &cb_line_buf,
1459 &cr_line_buf, &y_col_buf, &cb_col_buf, &cr_col_buf);
1460 return 0;
1461 }
1462