1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Support for Intel Camera Imaging ISP subsystem.
4  * Copyright (c) 2015, Intel Corporation.
5  */
6 
7 #include "ia_css_types.h"
8 #include "sh_css_defs.h"
9 #ifndef IA_CSS_NO_DEBUG
10 #include "ia_css_debug.h"
11 #endif
12 #include "sh_css_frac.h"
13 #include "assert_support.h"
14 
15 #include "bh/bh_2/ia_css_bh.host.h"
16 #include "ia_css_s3a.host.h"
17 
18 const struct ia_css_3a_config default_3a_config = {
19 	25559,
20 	32768,
21 	7209,
22 	65535,
23 	0,
24 	65535,
25 	{-3344, -6104, -19143, 19143, 6104, 3344, 0},
26 	{1027, 0, -9219, 16384, -9219, 1027, 0}
27 };
28 
29 static unsigned int s3a_raw_bit_depth;
30 
31 void
ia_css_s3a_configure(unsigned int raw_bit_depth)32 ia_css_s3a_configure(unsigned int raw_bit_depth)
33 {
34 	s3a_raw_bit_depth = raw_bit_depth;
35 }
36 
37 static void
ia_css_ae_encode(struct sh_css_isp_ae_params * to,const struct ia_css_3a_config * from,unsigned int size)38 ia_css_ae_encode(
39     struct sh_css_isp_ae_params *to,
40     const struct ia_css_3a_config *from,
41     unsigned int size)
42 {
43 	(void)size;
44 	/* coefficients to calculate Y */
45 	to->y_coef_r =
46 	    uDIGIT_FITTING(from->ae_y_coef_r, 16, SH_CSS_AE_YCOEF_SHIFT);
47 	to->y_coef_g =
48 	    uDIGIT_FITTING(from->ae_y_coef_g, 16, SH_CSS_AE_YCOEF_SHIFT);
49 	to->y_coef_b =
50 	    uDIGIT_FITTING(from->ae_y_coef_b, 16, SH_CSS_AE_YCOEF_SHIFT);
51 }
52 
53 static void
ia_css_awb_encode(struct sh_css_isp_awb_params * to,const struct ia_css_3a_config * from,unsigned int size)54 ia_css_awb_encode(
55     struct sh_css_isp_awb_params *to,
56     const struct ia_css_3a_config *from,
57     unsigned int size)
58 {
59 	(void)size;
60 	/* AWB level gate */
61 	to->lg_high_raw =
62 	    uDIGIT_FITTING(from->awb_lg_high_raw, 16, s3a_raw_bit_depth);
63 	to->lg_low =
64 	    uDIGIT_FITTING(from->awb_lg_low, 16, SH_CSS_BAYER_BITS);
65 	to->lg_high =
66 	    uDIGIT_FITTING(from->awb_lg_high, 16, SH_CSS_BAYER_BITS);
67 }
68 
69 static void
ia_css_af_encode(struct sh_css_isp_af_params * to,const struct ia_css_3a_config * from,unsigned int size)70 ia_css_af_encode(
71     struct sh_css_isp_af_params *to,
72     const struct ia_css_3a_config *from,
73     unsigned int size)
74 {
75 	unsigned int i;
76 	(void)size;
77 
78 	/* af fir coefficients */
79 	for (i = 0; i < 7; ++i) {
80 		to->fir1[i] =
81 		    sDIGIT_FITTING(from->af_fir1_coef[i], 15,
82 				   SH_CSS_AF_FIR_SHIFT);
83 		to->fir2[i] =
84 		    sDIGIT_FITTING(from->af_fir2_coef[i], 15,
85 				   SH_CSS_AF_FIR_SHIFT);
86 	}
87 }
88 
89 void
ia_css_s3a_encode(struct sh_css_isp_s3a_params * to,const struct ia_css_3a_config * from,unsigned int size)90 ia_css_s3a_encode(
91     struct sh_css_isp_s3a_params *to,
92     const struct ia_css_3a_config *from,
93     unsigned int size)
94 {
95 	(void)size;
96 
97 	ia_css_ae_encode(&to->ae,   from, sizeof(to->ae));
98 	ia_css_awb_encode(&to->awb, from, sizeof(to->awb));
99 	ia_css_af_encode(&to->af,   from, sizeof(to->af));
100 }
101 
102 #if 0
103 void
104 ia_css_process_s3a(
105     unsigned int pipe_id,
106     const struct ia_css_pipeline_stage *stage,
107     struct ia_css_isp_parameters *params)
108 {
109 	short dmem_offset = stage->binary->info->mem_offsets->dmem.s3a;
110 
111 	assert(params);
112 
113 	if (dmem_offset >= 0) {
114 		ia_css_s3a_encode((struct sh_css_isp_s3a_params *)
115 				  &stage->isp_mem_params[IA_CSS_ISP_DMEM0].address[dmem_offset],
116 				  &params->s3a_config);
117 		ia_css_bh_encode((struct sh_css_isp_bh_params *)
118 				 &stage->isp_mem_params[IA_CSS_ISP_DMEM0].address[dmem_offset],
119 				 &params->s3a_config);
120 		params->isp_params_changed = true;
121 		params->isp_mem_params_changed[pipe_id][stage->stage_num][IA_CSS_ISP_DMEM0] =
122 		    true;
123 	}
124 
125 	params->isp_params_changed = true;
126 }
127 #endif
128 
129 #ifndef IA_CSS_NO_DEBUG
130 void
ia_css_ae_dump(const struct sh_css_isp_ae_params * ae,unsigned int level)131 ia_css_ae_dump(
132     const struct sh_css_isp_ae_params *ae,
133     unsigned int level)
134 {
135 	if (!ae) return;
136 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
137 			    "ae_y_coef_r", ae->y_coef_r);
138 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
139 			    "ae_y_coef_g", ae->y_coef_g);
140 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
141 			    "ae_y_coef_b", ae->y_coef_b);
142 }
143 
144 void
ia_css_awb_dump(const struct sh_css_isp_awb_params * awb,unsigned int level)145 ia_css_awb_dump(
146     const struct sh_css_isp_awb_params *awb,
147     unsigned int level)
148 {
149 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
150 			    "awb_lg_high_raw", awb->lg_high_raw);
151 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
152 			    "awb_lg_low", awb->lg_low);
153 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
154 			    "awb_lg_high", awb->lg_high);
155 }
156 
157 void
ia_css_af_dump(const struct sh_css_isp_af_params * af,unsigned int level)158 ia_css_af_dump(
159     const struct sh_css_isp_af_params *af,
160     unsigned int level)
161 {
162 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
163 			    "af_fir1[0]", af->fir1[0]);
164 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
165 			    "af_fir1[1]", af->fir1[1]);
166 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
167 			    "af_fir1[2]", af->fir1[2]);
168 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
169 			    "af_fir1[3]", af->fir1[3]);
170 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
171 			    "af_fir1[4]", af->fir1[4]);
172 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
173 			    "af_fir1[5]", af->fir1[5]);
174 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
175 			    "af_fir1[6]", af->fir1[6]);
176 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
177 			    "af_fir2[0]", af->fir2[0]);
178 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
179 			    "af_fir2[1]", af->fir2[1]);
180 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
181 			    "af_fir2[2]", af->fir2[2]);
182 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
183 			    "af_fir2[3]", af->fir2[3]);
184 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
185 			    "af_fir2[4]", af->fir2[4]);
186 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
187 			    "af_fir2[5]", af->fir2[5]);
188 	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
189 			    "af_fir2[6]", af->fir2[6]);
190 }
191 
192 void
ia_css_s3a_dump(const struct sh_css_isp_s3a_params * s3a,unsigned int level)193 ia_css_s3a_dump(
194     const struct sh_css_isp_s3a_params *s3a,
195     unsigned int level)
196 {
197 	ia_css_debug_dtrace(level, "S3A Support:\n");
198 	ia_css_ae_dump(&s3a->ae, level);
199 	ia_css_awb_dump(&s3a->awb, level);
200 	ia_css_af_dump(&s3a->af, level);
201 }
202 
203 void
ia_css_s3a_debug_dtrace(const struct ia_css_3a_config * config,unsigned int level)204 ia_css_s3a_debug_dtrace(
205     const struct ia_css_3a_config *config,
206     unsigned int level)
207 {
208 	ia_css_debug_dtrace(level,
209 			    "config.ae_y_coef_r=%d, config.ae_y_coef_g=%d, config.ae_y_coef_b=%d, config.awb_lg_high_raw=%d, config.awb_lg_low=%d, config.awb_lg_high=%d\n",
210 			    config->ae_y_coef_r, config->ae_y_coef_g,
211 			    config->ae_y_coef_b, config->awb_lg_high_raw,
212 			    config->awb_lg_low, config->awb_lg_high);
213 }
214 #endif
215 
216 void
ia_css_s3a_hmem_decode(struct ia_css_3a_statistics * host_stats,const struct ia_css_bh_table * hmem_buf)217 ia_css_s3a_hmem_decode(
218     struct ia_css_3a_statistics *host_stats,
219     const struct ia_css_bh_table *hmem_buf)
220 {
221 	struct ia_css_3a_rgby_output	*out_ptr;
222 	int			i;
223 
224 	/* pixel counts(BQ) for 3A area */
225 	int count_for_3a;
226 	int sum_r, diff;
227 
228 	assert(host_stats);
229 	assert(host_stats->rgby_data);
230 	assert(hmem_buf);
231 
232 	count_for_3a = host_stats->grid.width * host_stats->grid.height
233 		       * host_stats->grid.bqs_per_grid_cell
234 		       * host_stats->grid.bqs_per_grid_cell;
235 
236 	out_ptr = host_stats->rgby_data;
237 
238 	ia_css_bh_hmem_decode(out_ptr, hmem_buf);
239 
240 	/* Calculate sum of histogram of R,
241 	   which should not be less than count_for_3a */
242 	sum_r = 0;
243 	for (i = 0; i < HMEM_UNIT_SIZE; i++) {
244 		sum_r += out_ptr[i].r;
245 	}
246 	if (sum_r < count_for_3a) {
247 		/* histogram is invalid */
248 		return;
249 	}
250 
251 	/* Verify for sum of histogram of R/G/B/Y */
252 #if 0
253 	{
254 		int sum_g = 0;
255 		int sum_b = 0;
256 		int sum_y = 0;
257 
258 		for (i = 0; i < HMEM_UNIT_SIZE; i++) {
259 			sum_g += out_ptr[i].g;
260 			sum_b += out_ptr[i].b;
261 			sum_y += out_ptr[i].y;
262 		}
263 		if (sum_g != sum_r || sum_b != sum_r || sum_y != sum_r) {
264 			/* histogram is invalid */
265 			return;
266 		}
267 	}
268 #endif
269 
270 	/*
271 	 * Limit the histogram area only to 3A area.
272 	 * In DSP, the histogram of 0 is incremented for pixels
273 	 * which are outside of 3A area. That amount should be subtracted here.
274 	 *   hist[0] = hist[0] - ((sum of all hist[]) - (pixel count for 3A area))
275 	 */
276 	diff = sum_r - count_for_3a;
277 	out_ptr[0].r -= diff;
278 	out_ptr[0].g -= diff;
279 	out_ptr[0].b -= diff;
280 	out_ptr[0].y -= diff;
281 }
282 
283 void
ia_css_s3a_dmem_decode(struct ia_css_3a_statistics * host_stats,const struct ia_css_3a_output * isp_stats)284 ia_css_s3a_dmem_decode(
285     struct ia_css_3a_statistics *host_stats,
286     const struct ia_css_3a_output *isp_stats)
287 {
288 	int isp_width, host_width, height, i;
289 	struct ia_css_3a_output *host_ptr;
290 
291 	assert(host_stats);
292 	assert(host_stats->data);
293 	assert(isp_stats);
294 
295 	isp_width  = host_stats->grid.aligned_width;
296 	host_width = host_stats->grid.width;
297 	height     = host_stats->grid.height;
298 	host_ptr   = host_stats->data;
299 
300 	/* Getting 3A statistics from DMEM does not involve any
301 	 * transformation (like the VMEM version), we just copy the data
302 	 * using a different output width. */
303 	for (i = 0; i < height; i++) {
304 		memcpy(host_ptr, isp_stats, host_width * sizeof(*host_ptr));
305 		isp_stats += isp_width;
306 		host_ptr += host_width;
307 	}
308 }
309 
310 /* MW: this is an ISP function */
311 static inline int
merge_hi_lo_14(unsigned short hi,unsigned short lo)312 merge_hi_lo_14(unsigned short hi, unsigned short lo)
313 {
314 	int val = (int)((((unsigned int)hi << 14) & 0xfffc000) |
315 			((unsigned int)lo & 0x3fff));
316 	return val;
317 }
318 
319 void
ia_css_s3a_vmem_decode(struct ia_css_3a_statistics * host_stats,const u16 * isp_stats_hi,const uint16_t * isp_stats_lo)320 ia_css_s3a_vmem_decode(
321     struct ia_css_3a_statistics *host_stats,
322     const u16 *isp_stats_hi,
323     const uint16_t *isp_stats_lo)
324 {
325 	int out_width, out_height, chunk, rest, kmax, y, x, k, elm_start, elm, ofs;
326 	const u16 *hi, *lo;
327 	struct ia_css_3a_output *output;
328 
329 	assert(host_stats);
330 	assert(host_stats->data);
331 	assert(isp_stats_hi);
332 	assert(isp_stats_lo);
333 
334 	output = host_stats->data;
335 	out_width  = host_stats->grid.width;
336 	out_height = host_stats->grid.height;
337 	hi = isp_stats_hi;
338 	lo = isp_stats_lo;
339 
340 	chunk = ISP_VEC_NELEMS >> host_stats->grid.deci_factor_log2;
341 	chunk = max(chunk, 1);
342 
343 	for (y = 0; y < out_height; y++) {
344 		elm_start = y * ISP_S3ATBL_HI_LO_STRIDE;
345 		rest = out_width;
346 		x = 0;
347 		while (x < out_width) {
348 			kmax = (rest > chunk) ? chunk : rest;
349 			ofs = y * out_width + x;
350 			elm = elm_start + x * sizeof(*output) / sizeof(int32_t);
351 			for (k = 0; k < kmax; k++, elm++) {
352 				output[ofs + k].ae_y    = merge_hi_lo_14(
353 							      hi[elm + chunk * 0], lo[elm + chunk * 0]);
354 				output[ofs + k].awb_cnt = merge_hi_lo_14(
355 							      hi[elm + chunk * 1], lo[elm + chunk * 1]);
356 				output[ofs + k].awb_gr  = merge_hi_lo_14(
357 							      hi[elm + chunk * 2], lo[elm + chunk * 2]);
358 				output[ofs + k].awb_r   = merge_hi_lo_14(
359 							      hi[elm + chunk * 3], lo[elm + chunk * 3]);
360 				output[ofs + k].awb_b   = merge_hi_lo_14(
361 							      hi[elm + chunk * 4], lo[elm + chunk * 4]);
362 				output[ofs + k].awb_gb  = merge_hi_lo_14(
363 							      hi[elm + chunk * 5], lo[elm + chunk * 5]);
364 				output[ofs + k].af_hpf1 = merge_hi_lo_14(
365 							      hi[elm + chunk * 6], lo[elm + chunk * 6]);
366 				output[ofs + k].af_hpf2 = merge_hi_lo_14(
367 							      hi[elm + chunk * 7], lo[elm + chunk * 7]);
368 			}
369 			x += chunk;
370 			rest -= chunk;
371 		}
372 	}
373 }
374