1*c0909341SAndroid Build Coastguard Worker /*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Niklas Haas
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
4*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Two Orioles, LLC
5*c0909341SAndroid Build Coastguard Worker * Copyright © 2021, Martin Storsjo
6*c0909341SAndroid Build Coastguard Worker * All rights reserved.
7*c0909341SAndroid Build Coastguard Worker *
8*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
9*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
10*c0909341SAndroid Build Coastguard Worker *
11*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
12*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer.
13*c0909341SAndroid Build Coastguard Worker *
14*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
15*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation
16*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution.
17*c0909341SAndroid Build Coastguard Worker *
18*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
22*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*c0909341SAndroid Build Coastguard Worker */
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard Worker #include "src/cpu.h"
31*c0909341SAndroid Build Coastguard Worker #include "src/filmgrain.h"
32*c0909341SAndroid Build Coastguard Worker #include "asm-offsets.h"
33*c0909341SAndroid Build Coastguard Worker
34*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, seed, FGD_SEED);
35*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, ar_coeff_lag, FGD_AR_COEFF_LAG);
36*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, ar_coeffs_y, FGD_AR_COEFFS_Y);
37*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, ar_coeffs_uv, FGD_AR_COEFFS_UV);
38*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, ar_coeff_shift, FGD_AR_COEFF_SHIFT);
39*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, grain_scale_shift, FGD_GRAIN_SCALE_SHIFT);
40*c0909341SAndroid Build Coastguard Worker
41*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, scaling_shift, FGD_SCALING_SHIFT);
42*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, uv_mult, FGD_UV_MULT);
43*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, uv_luma_mult, FGD_UV_LUMA_MULT);
44*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, uv_offset, FGD_UV_OFFSET);
45*c0909341SAndroid Build Coastguard Worker CHECK_OFFSET(Dav1dFilmGrainData, clip_to_restricted_range, FGD_CLIP_TO_RESTRICTED_RANGE);
46*c0909341SAndroid Build Coastguard Worker
47*c0909341SAndroid Build Coastguard Worker void BF(dav1d_generate_grain_y, neon)(entry buf[][GRAIN_WIDTH],
48*c0909341SAndroid Build Coastguard Worker const Dav1dFilmGrainData *const data
49*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX);
50*c0909341SAndroid Build Coastguard Worker
51*c0909341SAndroid Build Coastguard Worker #define GEN_GRAIN_UV(suff) \
52*c0909341SAndroid Build Coastguard Worker void BF(dav1d_generate_grain_uv_ ## suff, neon)(entry buf[][GRAIN_WIDTH], \
53*c0909341SAndroid Build Coastguard Worker const entry buf_y[][GRAIN_WIDTH], \
54*c0909341SAndroid Build Coastguard Worker const Dav1dFilmGrainData *const data, \
55*c0909341SAndroid Build Coastguard Worker const intptr_t uv \
56*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
57*c0909341SAndroid Build Coastguard Worker
58*c0909341SAndroid Build Coastguard Worker GEN_GRAIN_UV(420);
59*c0909341SAndroid Build Coastguard Worker GEN_GRAIN_UV(422);
60*c0909341SAndroid Build Coastguard Worker GEN_GRAIN_UV(444);
61*c0909341SAndroid Build Coastguard Worker
62*c0909341SAndroid Build Coastguard Worker // Use ptrdiff_t instead of int for the last few parameters, to get the
63*c0909341SAndroid Build Coastguard Worker // same layout of parameters on the stack across platforms.
64*c0909341SAndroid Build Coastguard Worker void BF(dav1d_fgy_32x32, neon)(pixel *const dst,
65*c0909341SAndroid Build Coastguard Worker const pixel *const src,
66*c0909341SAndroid Build Coastguard Worker const ptrdiff_t stride,
67*c0909341SAndroid Build Coastguard Worker const uint8_t scaling[SCALING_SIZE],
68*c0909341SAndroid Build Coastguard Worker const int scaling_shift,
69*c0909341SAndroid Build Coastguard Worker const entry grain_lut[][GRAIN_WIDTH],
70*c0909341SAndroid Build Coastguard Worker const int offsets[][2],
71*c0909341SAndroid Build Coastguard Worker const int h, const ptrdiff_t clip,
72*c0909341SAndroid Build Coastguard Worker const ptrdiff_t type
73*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX);
74*c0909341SAndroid Build Coastguard Worker
fgy_32x32xn_neon(pixel * const dst_row,const pixel * const src_row,const ptrdiff_t stride,const Dav1dFilmGrainData * const data,const size_t pw,const uint8_t scaling[SCALING_SIZE],const entry grain_lut[][GRAIN_WIDTH],const int bh,const int row_num HIGHBD_DECL_SUFFIX)75*c0909341SAndroid Build Coastguard Worker static void fgy_32x32xn_neon(pixel *const dst_row, const pixel *const src_row,
76*c0909341SAndroid Build Coastguard Worker const ptrdiff_t stride,
77*c0909341SAndroid Build Coastguard Worker const Dav1dFilmGrainData *const data, const size_t pw,
78*c0909341SAndroid Build Coastguard Worker const uint8_t scaling[SCALING_SIZE],
79*c0909341SAndroid Build Coastguard Worker const entry grain_lut[][GRAIN_WIDTH],
80*c0909341SAndroid Build Coastguard Worker const int bh, const int row_num HIGHBD_DECL_SUFFIX)
81*c0909341SAndroid Build Coastguard Worker {
82*c0909341SAndroid Build Coastguard Worker const int rows = 1 + (data->overlap_flag && row_num > 0);
83*c0909341SAndroid Build Coastguard Worker
84*c0909341SAndroid Build Coastguard Worker // seed[0] contains the current row, seed[1] contains the previous
85*c0909341SAndroid Build Coastguard Worker unsigned seed[2];
86*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++) {
87*c0909341SAndroid Build Coastguard Worker seed[i] = data->seed;
88*c0909341SAndroid Build Coastguard Worker seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8;
89*c0909341SAndroid Build Coastguard Worker seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF);
90*c0909341SAndroid Build Coastguard Worker }
91*c0909341SAndroid Build Coastguard Worker
92*c0909341SAndroid Build Coastguard Worker int offsets[2 /* col offset */][2 /* row offset */];
93*c0909341SAndroid Build Coastguard Worker
94*c0909341SAndroid Build Coastguard Worker // process this row in FG_BLOCK_SIZE^2 blocks
95*c0909341SAndroid Build Coastguard Worker for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE) {
96*c0909341SAndroid Build Coastguard Worker
97*c0909341SAndroid Build Coastguard Worker if (data->overlap_flag && bx) {
98*c0909341SAndroid Build Coastguard Worker // shift previous offsets left
99*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++)
100*c0909341SAndroid Build Coastguard Worker offsets[1][i] = offsets[0][i];
101*c0909341SAndroid Build Coastguard Worker }
102*c0909341SAndroid Build Coastguard Worker
103*c0909341SAndroid Build Coastguard Worker // update current offsets
104*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++)
105*c0909341SAndroid Build Coastguard Worker offsets[0][i] = get_random_number(8, &seed[i]);
106*c0909341SAndroid Build Coastguard Worker
107*c0909341SAndroid Build Coastguard Worker int type = 0;
108*c0909341SAndroid Build Coastguard Worker if (data->overlap_flag && row_num)
109*c0909341SAndroid Build Coastguard Worker type |= 1; /* overlap y */
110*c0909341SAndroid Build Coastguard Worker if (data->overlap_flag && bx)
111*c0909341SAndroid Build Coastguard Worker type |= 2; /* overlap x */
112*c0909341SAndroid Build Coastguard Worker
113*c0909341SAndroid Build Coastguard Worker BF(dav1d_fgy_32x32, neon)(dst_row + bx, src_row + bx, stride,
114*c0909341SAndroid Build Coastguard Worker scaling, data->scaling_shift,
115*c0909341SAndroid Build Coastguard Worker grain_lut, offsets, bh,
116*c0909341SAndroid Build Coastguard Worker data->clip_to_restricted_range, type
117*c0909341SAndroid Build Coastguard Worker HIGHBD_TAIL_SUFFIX);
118*c0909341SAndroid Build Coastguard Worker }
119*c0909341SAndroid Build Coastguard Worker }
120*c0909341SAndroid Build Coastguard Worker
121*c0909341SAndroid Build Coastguard Worker // Use ptrdiff_t instead of int for the last few parameters, to get the
122*c0909341SAndroid Build Coastguard Worker // parameters on the stack with the same layout across platforms.
123*c0909341SAndroid Build Coastguard Worker #define FGUV(nm, sx, sy) \
124*c0909341SAndroid Build Coastguard Worker void BF(dav1d_fguv_32x32_##nm, neon)(pixel *const dst, \
125*c0909341SAndroid Build Coastguard Worker const pixel *const src, \
126*c0909341SAndroid Build Coastguard Worker const ptrdiff_t stride, \
127*c0909341SAndroid Build Coastguard Worker const uint8_t scaling[SCALING_SIZE], \
128*c0909341SAndroid Build Coastguard Worker const Dav1dFilmGrainData *const data, \
129*c0909341SAndroid Build Coastguard Worker const entry grain_lut[][GRAIN_WIDTH], \
130*c0909341SAndroid Build Coastguard Worker const pixel *const luma_row, \
131*c0909341SAndroid Build Coastguard Worker const ptrdiff_t luma_stride, \
132*c0909341SAndroid Build Coastguard Worker const int offsets[][2], \
133*c0909341SAndroid Build Coastguard Worker const ptrdiff_t h, const ptrdiff_t uv, \
134*c0909341SAndroid Build Coastguard Worker const ptrdiff_t is_id, \
135*c0909341SAndroid Build Coastguard Worker const ptrdiff_t type \
136*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX); \
137*c0909341SAndroid Build Coastguard Worker static void \
138*c0909341SAndroid Build Coastguard Worker fguv_32x32xn_##nm##_neon(pixel *const dst_row, const pixel *const src_row, \
139*c0909341SAndroid Build Coastguard Worker const ptrdiff_t stride, const Dav1dFilmGrainData *const data, \
140*c0909341SAndroid Build Coastguard Worker const size_t pw, const uint8_t scaling[SCALING_SIZE], \
141*c0909341SAndroid Build Coastguard Worker const entry grain_lut[][GRAIN_WIDTH], const int bh, \
142*c0909341SAndroid Build Coastguard Worker const int row_num, const pixel *const luma_row, \
143*c0909341SAndroid Build Coastguard Worker const ptrdiff_t luma_stride, const int uv, const int is_id \
144*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX) \
145*c0909341SAndroid Build Coastguard Worker { \
146*c0909341SAndroid Build Coastguard Worker const int rows = 1 + (data->overlap_flag && row_num > 0); \
147*c0909341SAndroid Build Coastguard Worker \
148*c0909341SAndroid Build Coastguard Worker /* seed[0] contains the current row, seed[1] contains the previous */ \
149*c0909341SAndroid Build Coastguard Worker unsigned seed[2]; \
150*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++) { \
151*c0909341SAndroid Build Coastguard Worker seed[i] = data->seed; \
152*c0909341SAndroid Build Coastguard Worker seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8; \
153*c0909341SAndroid Build Coastguard Worker seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF); \
154*c0909341SAndroid Build Coastguard Worker } \
155*c0909341SAndroid Build Coastguard Worker \
156*c0909341SAndroid Build Coastguard Worker int offsets[2 /* col offset */][2 /* row offset */]; \
157*c0909341SAndroid Build Coastguard Worker \
158*c0909341SAndroid Build Coastguard Worker /* process this row in FG_BLOCK_SIZE^2 blocks (subsampled) */ \
159*c0909341SAndroid Build Coastguard Worker for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) { \
160*c0909341SAndroid Build Coastguard Worker if (data->overlap_flag && bx) { \
161*c0909341SAndroid Build Coastguard Worker /* shift previous offsets left */ \
162*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++) \
163*c0909341SAndroid Build Coastguard Worker offsets[1][i] = offsets[0][i]; \
164*c0909341SAndroid Build Coastguard Worker } \
165*c0909341SAndroid Build Coastguard Worker \
166*c0909341SAndroid Build Coastguard Worker /* update current offsets */ \
167*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < rows; i++) \
168*c0909341SAndroid Build Coastguard Worker offsets[0][i] = get_random_number(8, &seed[i]); \
169*c0909341SAndroid Build Coastguard Worker \
170*c0909341SAndroid Build Coastguard Worker int type = 0; \
171*c0909341SAndroid Build Coastguard Worker if (data->overlap_flag && row_num) \
172*c0909341SAndroid Build Coastguard Worker type |= 1; /* overlap y */ \
173*c0909341SAndroid Build Coastguard Worker if (data->overlap_flag && bx) \
174*c0909341SAndroid Build Coastguard Worker type |= 2; /* overlap x */ \
175*c0909341SAndroid Build Coastguard Worker if (data->chroma_scaling_from_luma) \
176*c0909341SAndroid Build Coastguard Worker type |= 4; \
177*c0909341SAndroid Build Coastguard Worker \
178*c0909341SAndroid Build Coastguard Worker BF(dav1d_fguv_32x32_##nm, neon)(dst_row + bx, src_row + bx, stride, \
179*c0909341SAndroid Build Coastguard Worker scaling, data, grain_lut, \
180*c0909341SAndroid Build Coastguard Worker luma_row + (bx << sx), luma_stride, \
181*c0909341SAndroid Build Coastguard Worker offsets, bh, uv, is_id, type \
182*c0909341SAndroid Build Coastguard Worker HIGHBD_TAIL_SUFFIX); \
183*c0909341SAndroid Build Coastguard Worker } \
184*c0909341SAndroid Build Coastguard Worker }
185*c0909341SAndroid Build Coastguard Worker
186*c0909341SAndroid Build Coastguard Worker FGUV(420, 1, 1);
187*c0909341SAndroid Build Coastguard Worker FGUV(422, 1, 0);
188*c0909341SAndroid Build Coastguard Worker FGUV(444, 0, 0);
189*c0909341SAndroid Build Coastguard Worker
film_grain_dsp_init_arm(Dav1dFilmGrainDSPContext * const c)190*c0909341SAndroid Build Coastguard Worker static ALWAYS_INLINE void film_grain_dsp_init_arm(Dav1dFilmGrainDSPContext *const c) {
191*c0909341SAndroid Build Coastguard Worker const unsigned flags = dav1d_get_cpu_flags();
192*c0909341SAndroid Build Coastguard Worker
193*c0909341SAndroid Build Coastguard Worker if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
194*c0909341SAndroid Build Coastguard Worker
195*c0909341SAndroid Build Coastguard Worker c->generate_grain_y = BF(dav1d_generate_grain_y, neon);
196*c0909341SAndroid Build Coastguard Worker c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_generate_grain_uv_420, neon);
197*c0909341SAndroid Build Coastguard Worker c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_generate_grain_uv_422, neon);
198*c0909341SAndroid Build Coastguard Worker c->generate_grain_uv[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_generate_grain_uv_444, neon);
199*c0909341SAndroid Build Coastguard Worker
200*c0909341SAndroid Build Coastguard Worker c->fgy_32x32xn = fgy_32x32xn_neon;
201*c0909341SAndroid Build Coastguard Worker c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I420 - 1] = fguv_32x32xn_420_neon;
202*c0909341SAndroid Build Coastguard Worker c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I422 - 1] = fguv_32x32xn_422_neon;
203*c0909341SAndroid Build Coastguard Worker c->fguv_32x32xn[DAV1D_PIXEL_LAYOUT_I444 - 1] = fguv_32x32xn_444_neon;
204*c0909341SAndroid Build Coastguard Worker }
205