xref: /aosp_15_r20/external/libavc/encoder/svc/isvce_residual_pred.c (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1 /******************************************************************************
2  *
3  * Copyright (C) 2022 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19  */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  isvce_residual_pred.c
25 *
26 * @brief
27 *  Contains functions used for SVC residual prediction
28 *
29 *******************************************************************************
30 */
31 #include <stdint.h>
32 #include <math.h>
33 
34 #include "ih264_typedefs.h"
35 #include "iv2.h"
36 #include "isvc_macros.h"
37 #include "ih264_debug.h"
38 #include "isvc_defs.h"
39 #include "isvc_structs.h"
40 #include "isvce_defs.h"
41 #include "isvce_structs.h"
42 #include "isvce_res_pred_private_defs.h"
43 #include "isvce_residual_pred.h"
44 #include "isvce_utils.h"
45 #include "isvc_defs.h"
46 
isvce_chroma_residual_sampler_2x(coordinates_t * ps_ref_array_positions,coordinates_t * ps_ref_array_phases,buffer_container_t * ps_inp,buffer_container_t * ps_out,buffer_container_t * ps_scratch,UWORD32 u4_ref_nnz,UWORD8 u1_ref_tx_size)47 void isvce_chroma_residual_sampler_2x(coordinates_t *ps_ref_array_positions,
48                                       coordinates_t *ps_ref_array_phases,
49                                       buffer_container_t *ps_inp, buffer_container_t *ps_out,
50                                       buffer_container_t *ps_scratch, UWORD32 u4_ref_nnz,
51                                       UWORD8 u1_ref_tx_size)
52 {
53     WORD32 i4_i;
54     WORD16 *pi2_ref_data_byte;
55     WORD32 *pi4_ref_array;
56     WORD32 i4_phase1, i4_phase2;
57 
58     WORD16 *pi2_inp_data = ps_inp->pv_data;
59     WORD16 *pi2_out_res = ps_out->pv_data;
60     WORD32 i4_inp_data_stride = ps_inp->i4_data_stride;
61     WORD32 i4_out_res_stride = ps_out->i4_data_stride;
62 
63     UNUSED(u4_ref_nnz);
64 
65     UNUSED(ps_ref_array_positions);
66     UNUSED(u1_ref_tx_size);
67 
68     /* For 2x scaling, offsets always point to TL pixel outside MB */
69     /* Hence, refTransBlkIdc will be different and since phase */
70     /* for first refArray pos for horiz filtering samples > 8, */
71     /* first row and first column from the refArray is never used */
72     pi2_inp_data += 2 + i4_inp_data_stride;
73 
74     pi2_ref_data_byte = pi2_inp_data;
75 
76     i4_phase1 = ps_ref_array_phases[0].i4_abscissa;
77     i4_phase2 = ps_ref_array_phases[1].i4_abscissa;
78 
79     ASSERT(i4_phase1 >= 8);
80 
81     pi4_ref_array = (WORD32 *) ps_scratch->pv_data;
82 
83     for(i4_i = 0; i4_i < BLK_SIZE; i4_i++)
84     {
85         WORD16 i2_coeff1, i2_coeff2;
86 
87         i2_coeff1 = (WORD16) (pi2_ref_data_byte[0]);
88 
89         /* populate the first inter sample */
90         *pi4_ref_array++ = i2_coeff1 << 4;
91 
92         {
93             /* unroll count 1 */
94             i2_coeff2 = (WORD16) (pi2_ref_data_byte[2]);
95 
96             /* populate 2 samples based on current coeffs */
97             *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff1 + i4_phase2 * i2_coeff2);
98 
99             /* unroll count 2 */
100             *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff1 + i4_phase1 * i2_coeff2);
101 
102             /* unroll count 3 */
103             i2_coeff1 = (WORD16) (pi2_ref_data_byte[4]);
104 
105             /* populate 2 samples based on current coeffs */
106             *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff2 + i4_phase2 * i2_coeff1);
107 
108             /* unroll count 4 */
109             *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff2 + i4_phase1 * i2_coeff1);
110 
111             /* unroll count 5 */
112             i2_coeff2 = (WORD16) (pi2_ref_data_byte[6]);
113 
114             /* populate 2 samples based on current coeffs */
115             *pi4_ref_array++ = ((16 - i4_phase2) * i2_coeff1 + i4_phase2 * i2_coeff2);
116 
117             /* unroll count 6 */
118             *pi4_ref_array++ = ((16 - i4_phase1) * i2_coeff1 + i4_phase1 * i2_coeff2);
119         }
120 
121         /* populate the last inter sample */
122         *pi4_ref_array++ = i2_coeff2 << 4;
123 
124         /* vertical loop uopdates */
125         pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride);
126     }
127 
128     /* ----------- Vertical Interpolation ---------------- */
129     pi4_ref_array = (WORD32 *) ps_scratch->pv_data;
130 
131     i4_phase1 = ps_ref_array_phases[0].i4_ordinate;
132     i4_phase2 = ps_ref_array_phases[2].i4_ordinate;
133 
134     for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++)
135     {
136         WORD16 *pi2_out;
137         WORD32 *pi4_ref_array_temp;
138         WORD32 i4_horz_samp_1, i4_horz_samp_2;
139         pi2_out = pi2_out_res;
140         pi4_ref_array_temp = pi4_ref_array;
141 
142         /* populate the first inter sample */
143         i4_horz_samp_1 = *pi4_ref_array_temp;
144         pi4_ref_array_temp += BLK8x8SIZE;
145         *pi2_out = (i4_horz_samp_1 + 8) >> 4;
146         pi2_out += i4_out_res_stride;
147 
148         {
149             /* unroll count 1 */
150             i4_horz_samp_2 = *pi4_ref_array_temp;
151             pi4_ref_array_temp += BLK8x8SIZE;
152 
153             /* populate 2 samples based on current coeffs */
154             *pi2_out = ((16 - i4_phase2) * i4_horz_samp_1 + i4_phase2 * i4_horz_samp_2 + 128) >> 8;
155             pi2_out += i4_out_res_stride;
156 
157             /* unroll count 2 */
158             *pi2_out = ((16 - i4_phase1) * i4_horz_samp_1 + i4_phase1 * i4_horz_samp_2 + 128) >> 8;
159             pi2_out += i4_out_res_stride;
160 
161             /* unroll count 3 */
162             i4_horz_samp_1 = *pi4_ref_array_temp;
163             pi4_ref_array_temp += BLK8x8SIZE;
164 
165             /* populate 2 samples based on current coeffs */
166             *pi2_out = ((16 - i4_phase2) * i4_horz_samp_2 + i4_phase2 * i4_horz_samp_1 + 128) >> 8;
167             pi2_out += i4_out_res_stride;
168 
169             /* unroll count 4 */
170             *pi2_out = ((16 - i4_phase1) * i4_horz_samp_2 + i4_phase1 * i4_horz_samp_1 + 128) >> 8;
171             pi2_out += i4_out_res_stride;
172 
173             /* unroll count 5 */
174             i4_horz_samp_2 = *pi4_ref_array_temp;
175 
176             /* populate 2 samples based on current coeffs */
177             *pi2_out = ((16 - i4_phase2) * i4_horz_samp_1 + i4_phase2 * i4_horz_samp_2 + 128) >> 8;
178             pi2_out += i4_out_res_stride;
179 
180             /* unroll count 6 */
181             *pi2_out = ((16 - i4_phase1) * i4_horz_samp_1 + i4_phase1 * i4_horz_samp_2 + 128) >> 8;
182             pi2_out += i4_out_res_stride;
183         }
184 
185         /* populate the last inter sample */
186         *pi2_out = (i4_horz_samp_2 + 8) >> 4;
187 
188         /* horizontal loop updates */
189         pi4_ref_array++;
190         pi2_out_res += 2;
191     }
192 }
193 
isvce_luma_residual_sampler_2x(coordinates_t * ps_ref_array_positions,coordinates_t * ps_ref_array_phases,buffer_container_t * ps_inp,buffer_container_t * ps_out,buffer_container_t * ps_scratch,UWORD32 u4_ref_nnz,UWORD8 u1_ref_tx_size)194 void isvce_luma_residual_sampler_2x(coordinates_t *ps_ref_array_positions,
195                                     coordinates_t *ps_ref_array_phases, buffer_container_t *ps_inp,
196                                     buffer_container_t *ps_out, buffer_container_t *ps_scratch,
197                                     UWORD32 u4_ref_nnz, UWORD8 u1_ref_tx_size)
198 {
199     WORD16 *pi2_inp_data = ps_inp->pv_data;
200     WORD16 *pi2_out_res = ps_out->pv_data;
201     WORD32 i4_inp_data_stride = ps_inp->i4_data_stride;
202     WORD32 i4_out_res_stride = ps_out->i4_data_stride;
203     WORD16 *pi2_refarray_buffer = ps_scratch->pv_data;
204     WORD32 i4_blk_ctr;
205 
206     UNUSED(ps_ref_array_positions);
207     UNUSED(ps_ref_array_phases);
208 
209     /* For 2x scaling, offsets always point to TL pixel outside MB */
210     /* Hence, refTransBlkIdc will be different and since phase */
211     /* for first refArray pos for horiz filtering samples > 8, */
212     /* first row and first column from the refArray is never used */
213     pi2_inp_data += 1 + i4_inp_data_stride;
214 
215     if((u1_ref_tx_size) && (0 != u4_ref_nnz))
216     {
217         WORD16 *pi2_ref_data_byte;
218         WORD32 *pi4_ref_array;
219         WORD32 i4_i, i4_j;
220 
221         pi2_ref_data_byte = pi2_inp_data;
222 
223         /* ----------- Horizontal Interpolation ---------------- */
224         pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
225 
226         for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++)
227         {
228             WORD16 i2_coeff1, i2_coeff2;
229 
230             i2_coeff1 = (WORD16) (*pi2_ref_data_byte++);
231 
232             /* populate the first inter sample */
233             *pi4_ref_array++ = i2_coeff1 << 2;
234 
235             for(i4_j = 0; i4_j < 14; i4_j += 2)
236             {
237                 i2_coeff2 = (WORD16) (*pi2_ref_data_byte++);
238 
239                 /* populate 2 samples based on current coeffs */
240                 *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
241 
242                 *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
243 
244                 /* store the coeff 2 to coeff 1 */
245                 /* (used in next iteration)     */
246                 i2_coeff1 = i2_coeff2;
247             }
248 
249             /* populate the last inter sample */
250             *pi4_ref_array++ = i2_coeff1 << 2;
251 
252             /* vertical loop uopdates */
253             pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride);
254         }
255 
256         /* ----------- Vertical Interpolation ---------------- */
257         pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
258 
259         for(i4_i = 0; i4_i < MB_SIZE; i4_i++)
260         {
261             WORD32 *pi4_ref_array_temp;
262             WORD16 *pi2_out;
263             WORD32 i4_horz_samp_1, i4_horz_samp_2;
264 
265             pi4_ref_array_temp = pi4_ref_array;
266             pi2_out = pi2_out_res;
267             i4_horz_samp_1 = *pi4_ref_array_temp;
268 
269             /* populate the first inter sample */
270             *pi2_out = (i4_horz_samp_1 + 2) >> 2;
271             pi2_out += i4_out_res_stride;
272 
273             for(i4_j = 0; i4_j < 14; i4_j += 2)
274             {
275                 pi4_ref_array_temp += MB_SIZE;
276                 i4_horz_samp_2 = *pi4_ref_array_temp;
277 
278                 /* populate 2 samples based on current coeffs */
279                 *pi2_out = ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
280                 pi2_out += i4_out_res_stride;
281 
282                 *pi2_out = ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
283                 pi2_out += i4_out_res_stride;
284 
285                 /* store the coeff 2 to coeff 1 */
286                 /* (used in next iteration)     */
287                 i4_horz_samp_1 = i4_horz_samp_2;
288             }
289 
290             /* populate the first inter sample */
291             *pi2_out = (i4_horz_samp_1 + 2) >> 2;
292 
293             /* horizontal loop updates */
294             pi4_ref_array++;
295             pi2_out_res++;
296         }
297     }
298     else
299     {
300         /* ----------------------------------------------------------------- */
301         /* LOOP over number of blocks                                        */
302         /* ----------------------------------------------------------------- */
303         for(i4_blk_ctr = 0; i4_blk_ctr < BLK_SIZE; i4_blk_ctr++)
304         {
305             WORD16 *pi2_ref_data_byte;
306             WORD32 *pi4_ref_array;
307             WORD32 i4_i;
308 
309             /* if reference layer is not coded then no processing */
310             if(0 != (u4_ref_nnz & 0x1))
311             {
312                 pi2_ref_data_byte = pi2_inp_data;
313 
314                 /* ----------- Horizontal Interpolation ---------------- */
315                 pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
316 
317                 for(i4_i = 0; i4_i < BLK_SIZE; i4_i++)
318                 {
319                     WORD16 i2_coeff1, i2_coeff2;
320 
321                     i2_coeff1 = (WORD16) (*pi2_ref_data_byte++);
322 
323                     /* populate the first inter sample */
324                     *pi4_ref_array++ = i2_coeff1 << 2;
325 
326                     {
327                         i2_coeff2 = (WORD16) (*pi2_ref_data_byte++);
328 
329                         /* populate 2 samples based on current coeffs */
330                         *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
331 
332                         *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
333 
334                         i2_coeff1 = (WORD16) (*pi2_ref_data_byte++);
335 
336                         /* populate 2 samples based on current coeffs */
337                         *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
338 
339                         *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
340 
341                         i2_coeff2 = (WORD16) (*pi2_ref_data_byte++);
342 
343                         /* populate 2 samples based on current coeffs */
344                         *pi4_ref_array++ = ((i2_coeff1 << 1) + (i2_coeff1) + (i2_coeff2));
345 
346                         *pi4_ref_array++ = ((i2_coeff2 << 1) + (i2_coeff2) + (i2_coeff1));
347                     }
348 
349                     /* populate the last inter sample */
350                     *pi4_ref_array++ = i2_coeff2 << 2;
351 
352                     /* vertical loop uopdates */
353                     pi2_ref_data_byte = pi2_inp_data + ((i4_i + 1) * i4_inp_data_stride);
354                 }
355 
356                 /* ----------- Vertical Interpolation ---------------- */
357                 pi4_ref_array = (WORD32 *) pi2_refarray_buffer;
358 
359                 for(i4_i = 0; i4_i < BLK8x8SIZE; i4_i++)
360                 {
361                     WORD32 *pi4_ref_array_temp;
362                     WORD16 *pi2_out;
363                     WORD32 i4_horz_samp_1, i4_horz_samp_2;
364 
365                     pi4_ref_array_temp = pi4_ref_array;
366                     pi2_out = pi2_out_res;
367                     i4_horz_samp_1 = *pi4_ref_array_temp;
368 
369                     /* populate the first inter sample */
370                     *pi2_out = (i4_horz_samp_1 + 2) >> 2;
371                     pi2_out += i4_out_res_stride;
372 
373                     {
374                         /* unroll loop count 1 */
375                         pi4_ref_array_temp += BLK8x8SIZE;
376                         i4_horz_samp_2 = *pi4_ref_array_temp;
377 
378                         /* populate 2 samples based on current coeffs */
379                         *pi2_out =
380                             ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
381                         pi2_out += i4_out_res_stride;
382 
383                         *pi2_out =
384                             ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
385                         pi2_out += i4_out_res_stride;
386 
387                         /* unroll loop count 2 */
388                         pi4_ref_array_temp += BLK8x8SIZE;
389                         i4_horz_samp_1 = *pi4_ref_array_temp;
390 
391                         /* populate 2 samples based on current coeffs */
392                         *pi2_out =
393                             ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
394                         pi2_out += i4_out_res_stride;
395 
396                         *pi2_out =
397                             ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
398                         pi2_out += i4_out_res_stride;
399 
400                         /* unroll loop count 3 */
401                         pi4_ref_array_temp += BLK8x8SIZE;
402                         i4_horz_samp_2 = *pi4_ref_array_temp;
403 
404                         /* populate 2 samples based on current coeffs */
405                         *pi2_out =
406                             ((i4_horz_samp_1 << 1) + (i4_horz_samp_1) + (i4_horz_samp_2) + 8) >> 4;
407                         pi2_out += i4_out_res_stride;
408 
409                         *pi2_out =
410                             ((i4_horz_samp_2 << 1) + (i4_horz_samp_2) + (i4_horz_samp_1) + 8) >> 4;
411                         pi2_out += i4_out_res_stride;
412                     }
413 
414                     /* populate the last inter sample */
415                     *pi2_out = (i4_horz_samp_2 + 2) >> 2;
416 
417                     /* horizontal loop updates */
418                     pi4_ref_array++;
419                     pi2_out_res++;
420                 }
421             }
422             else
423             {
424                 pi2_out_res += BLK8x8SIZE;
425             }
426 
427             if(1 == i4_blk_ctr)
428             {
429                 pi2_inp_data -= BLK_SIZE;
430                 pi2_inp_data += (i4_inp_data_stride * BLK_SIZE);
431                 pi2_out_res -= MB_SIZE;
432                 pi2_out_res += (i4_out_res_stride * BLK8x8SIZE);
433                 u4_ref_nnz >>= 2;
434             }
435             else
436             {
437                 pi2_inp_data += BLK_SIZE;
438             }
439 
440             u4_ref_nnz >>= 1;
441         }
442     }
443 }
444 
445 /**
446 *******************************************************************************
447 *
448 * @brief
449 *  Returns size of buffers for storing residual pred ctxt
450 *
451 * @param[in] u1_num_spatial_layers
452 *  Num Spatial Layers
453 *
454 * @param[in] d_spatial_res_ratio
455 *  Resolution Ratio b/w spatial layers
456 *
457 * @param[in] u4_wd
458 *  Input Width
459 *
460 * @param[in] u4_ht
461 *  Input Height
462 *
463 * @returns  Size of buffers
464 *
465 *******************************************************************************
466 */
isvce_get_svc_res_pred_ctxt_size(UWORD8 u1_num_spatial_layers,DOUBLE d_spatial_res_ratio,UWORD32 u4_wd,UWORD32 u4_ht)467 UWORD32 isvce_get_svc_res_pred_ctxt_size(UWORD8 u1_num_spatial_layers, DOUBLE d_spatial_res_ratio,
468                                          UWORD32 u4_wd, UWORD32 u4_ht)
469 {
470     UWORD32 u4_size = 0;
471 
472     if(u1_num_spatial_layers > 1)
473     {
474         WORD32 i;
475 
476         u4_size += MAX_PROCESS_CTXT * sizeof(svc_res_pred_ctxt_t);
477         u4_size += MAX_PROCESS_CTXT * sizeof(res_pred_state_t);
478 
479         /* Mem for storing pred */
480         u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(WORD16);
481         u4_size += MAX_PROCESS_CTXT * MB_SIZE * (MB_SIZE / 2) * sizeof(WORD16);
482 
483         /* Mem for storing intermediates */
484         u4_size += MAX_PROCESS_CTXT * REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16);
485 
486         /* Mem for pu1_ref_x_ptr_incr and pu1_ref_y_ptr_incr*/
487         u4_size +=
488             2 * MAX_PROCESS_CTXT * REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(UWORD8);
489 
490         u4_size += MAX_PROCESS_CTXT * u1_num_spatial_layers * sizeof(res_pred_layer_state_t);
491 
492         for(i = u1_num_spatial_layers - 1; i >= 1; i--)
493         {
494             WORD32 i4_layer_luma_wd =
495                 (WORD32) ((DOUBLE) u4_wd /
496                           pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) +
497                 0.99;
498             WORD32 i4_layer_luma_ht =
499                 ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99;
500             WORD32 i4_layer_luma_mbs = (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE);
501             WORD32 i4_layer_u_wd = i4_layer_luma_wd / 2.0 + 0.99;
502             WORD32 i4_layer_u_ht = i4_layer_luma_ht / 2.0 + 0.99;
503             WORD32 i4_layer_u_mbs =
504                 (i4_layer_u_wd / (MB_SIZE / 2)) * (i4_layer_u_ht / (MB_SIZE / 2));
505 
506             /* ps_luma_mb_states */
507             {
508                 u4_size += i4_layer_luma_mbs * sizeof(res_pred_mb_state_t);
509 
510                 /* ps_ref_array_positions */
511                 u4_size +=
512                     ((1.5 == d_spatial_res_ratio) ? (i4_layer_luma_mbs * MB_SIZE * MB_SIZE) : 0) *
513                     sizeof(coordinates_t);
514 
515                 /* ps_ref_array_phases */
516                 u4_size += ((1.5 == d_spatial_res_ratio) ? (i4_layer_luma_mbs * 5) : 0) *
517                            sizeof(coordinates_t);
518             }
519 
520             /* ps_chroma_mb_states */
521             {
522                 u4_size += i4_layer_u_mbs * sizeof(res_pred_mb_state_t);
523 
524                 /* ps_ref_array_positions */
525                 u4_size +=
526                     ((1.5 == d_spatial_res_ratio) ? (i4_layer_u_mbs * (MB_SIZE / 2) * (MB_SIZE / 2))
527                                                   : 0) *
528                     sizeof(coordinates_t);
529 
530                 /* ps_ref_array_phases */
531                 u4_size += ((1.5 == d_spatial_res_ratio) ? (i4_layer_u_mbs * 5) : 3) *
532                            sizeof(coordinates_t);
533             }
534         }
535 
536         for(i = u1_num_spatial_layers - 1; i >= 0; i--)
537         {
538             WORD32 i4_layer_luma_wd =
539                 (WORD32) ((DOUBLE) u4_wd /
540                           pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) +
541                 0.99;
542             WORD32 i4_layer_luma_ht =
543                 ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - i)) + 0.99;
544             WORD32 i4_layer_luma_mbs =
545                 ((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2);
546 
547             /* pi1_mb_mode */
548             u4_size += i4_layer_luma_mbs * sizeof(WORD8);
549         }
550     }
551     else
552     {
553         u4_size += MAX_PROCESS_CTXT * sizeof(yuv_buf_props_t);
554 
555         /* Mem for storing pred */
556         u4_size += MAX_PROCESS_CTXT * MB_SIZE * MB_SIZE * sizeof(WORD16);
557         u4_size += MAX_PROCESS_CTXT * MB_SIZE * (MB_SIZE / 2) * sizeof(WORD16);
558     }
559 
560     return u4_size;
561 }
562 
isvce_get_scaled_pixel_pos(layer_resampler_props_t * ps_layer_props,WORD32 i4_pixel_pos,UWORD8 u1_dim_id)563 static FORCEINLINE WORD32 isvce_get_scaled_pixel_pos(layer_resampler_props_t *ps_layer_props,
564                                                      WORD32 i4_pixel_pos, UWORD8 u1_dim_id)
565 {
566     if(1 == u1_dim_id)
567     {
568         return (((i4_pixel_pos - ps_layer_props->i4_offset_y) *
569                      ((WORD64) ps_layer_props->u4_scale_y) +
570                  ps_layer_props->i4_add_y) >>
571                 (ps_layer_props->u4_shift_y - 4)) -
572                ps_layer_props->i4_delta_y;
573     }
574     else
575     {
576         return (((i4_pixel_pos - ps_layer_props->i4_offset_x) *
577                      ((WORD64) ps_layer_props->u4_scale_x) +
578                  ps_layer_props->i4_add_x) >>
579                 (ps_layer_props->u4_shift_x - 4)) -
580                ps_layer_props->i4_delta_x;
581     }
582 }
583 
isvce_ref_array_pos_and_phase_init_dyadic(layer_resampler_props_t * ps_layer_props,res_pred_mb_state_t * ps_mb_state,coordinates_t * ps_mb_pos,UWORD8 u1_frame_mbs_only_flag,UWORD8 u1_field_mb_flag,UWORD8 u1_ref_layer_frame_mbs_only_flag)584 static FORCEINLINE void isvce_ref_array_pos_and_phase_init_dyadic(
585     layer_resampler_props_t *ps_layer_props, res_pred_mb_state_t *ps_mb_state,
586     coordinates_t *ps_mb_pos, UWORD8 u1_frame_mbs_only_flag, UWORD8 u1_field_mb_flag,
587     UWORD8 u1_ref_layer_frame_mbs_only_flag)
588 {
589     UWORD32 i, j;
590 
591     coordinates_t *ps_ref_array_phases = ps_mb_state->ps_ref_array_phases;
592 
593     WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa;
594     WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate;
595 
596     for(i = 0; i < 2; i++)
597     {
598         WORD32 i4_y_ref16;
599 
600         WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i;
601 
602         if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag))
603         {
604             i4_yc = i4_yc >> (1 - u1_field_mb_flag);
605         }
606 
607         i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1);
608 
609         for(j = 0; j < ((0 == i) ? 2 : 1); j++)
610         {
611             WORD32 i4_x_ref16;
612 
613             WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j;
614 
615             i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0);
616 
617             ps_ref_array_phases[j + i * 2].i4_abscissa = (i4_x_ref16 - (16 * i4_x_offset)) & 15;
618             ps_ref_array_phases[j + i * 2].i4_ordinate = (i4_y_ref16 - (16 * i4_y_offset)) & 15;
619         }
620     }
621 }
622 
isvce_ref_array_pos_and_phase_init(layer_resampler_props_t * ps_layer_props,res_pred_mb_state_t * ps_mb_state,coordinates_t * ps_mb_pos,UWORD8 u1_frame_mbs_only_flag,UWORD8 u1_field_mb_flag,UWORD8 u1_ref_layer_frame_mbs_only_flag)623 static FORCEINLINE void isvce_ref_array_pos_and_phase_init(layer_resampler_props_t *ps_layer_props,
624                                                            res_pred_mb_state_t *ps_mb_state,
625                                                            coordinates_t *ps_mb_pos,
626                                                            UWORD8 u1_frame_mbs_only_flag,
627                                                            UWORD8 u1_field_mb_flag,
628                                                            UWORD8 u1_ref_layer_frame_mbs_only_flag)
629 {
630     UWORD32 i, j;
631 
632     coordinates_t *ps_ref_array_positions = ps_mb_state->ps_ref_array_positions;
633     coordinates_t *ps_ref_array_phases = ps_mb_state->ps_ref_array_phases;
634 
635     WORD32 i4_x_offset = ps_mb_state->s_offsets.i4_abscissa;
636     WORD32 i4_y_offset = ps_mb_state->s_offsets.i4_ordinate;
637     UWORD32 u4_phase_array_idx = 0;
638 
639     for(i = 0; i < ps_layer_props->u4_mb_ht; i++)
640     {
641         WORD32 i4_y_ref16;
642 
643         WORD32 i4_yc = ps_mb_pos->i4_ordinate * ps_layer_props->u4_mb_ht + i;
644 
645         if((0 == u1_frame_mbs_only_flag) || (0 == u1_ref_layer_frame_mbs_only_flag))
646         {
647             i4_yc = i4_yc >> (1 - u1_field_mb_flag);
648         }
649 
650         i4_y_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_yc, 1);
651 
652         for(j = 0; j < ps_layer_props->u4_mb_wd; j++)
653         {
654             WORD32 i4_x_ref16;
655 
656             WORD32 i4_xc = ps_mb_pos->i4_abscissa * ps_layer_props->u4_mb_wd + j;
657 
658             i4_x_ref16 = isvce_get_scaled_pixel_pos(ps_layer_props, i4_xc, 0);
659 
660             ps_ref_array_positions[j + i * ps_layer_props->u4_mb_wd].i4_abscissa =
661                 (i4_x_ref16 >> 4) - i4_x_offset;
662             ps_ref_array_positions[j + i * ps_layer_props->u4_mb_wd].i4_ordinate =
663                 (i4_y_ref16 >> 4) - i4_y_offset;
664 
665             if(((0 == i) && (j < 3)) || ((0 == j) && (i < 3)))
666             {
667                 ps_ref_array_phases[u4_phase_array_idx].i4_abscissa =
668                     (i4_x_ref16 - (16 * i4_x_offset)) & 15;
669                 ps_ref_array_phases[u4_phase_array_idx].i4_ordinate =
670                     (i4_y_ref16 - (16 * i4_y_offset)) & 15;
671 
672                 u4_phase_array_idx++;
673             }
674         }
675     }
676 }
677 
isvce_res_pred_layer_state_init(res_pred_layer_state_t * ps_layer_state,DOUBLE d_spatial_res_ratio,UWORD32 u4_wd,UWORD32 u4_ht,IV_COLOR_FORMAT_T e_color_format)678 static void isvce_res_pred_layer_state_init(res_pred_layer_state_t *ps_layer_state,
679                                             DOUBLE d_spatial_res_ratio, UWORD32 u4_wd,
680                                             UWORD32 u4_ht, IV_COLOR_FORMAT_T e_color_format)
681 {
682     UWORD32 i, j, k;
683 
684     const UWORD8 u1_ref_layer_field_pic_flag = 0;
685     const UWORD8 u1_field_pic_flag = 0;
686     const UWORD8 u1_frame_mbs_only_flag = 1;
687     const UWORD8 u1_ref_layer_frame_mbs_only_flag = 1;
688     const UWORD8 u1_field_mb_flag = 0;
689 
690     ASSERT((IV_YUV_420P == e_color_format) || (IV_YUV_420SP_UV == e_color_format));
691 
692     UNUSED(e_color_format);
693 
694     for(i = 0; i < 2; i++)
695     {
696         res_pred_mb_state_t *ps_mb_states;
697         layer_resampler_props_t *ps_layer_props;
698 
699         UWORD32 u4_wd_in_mbs;
700         UWORD32 u4_ht_in_mbs;
701 
702         UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i));
703         UWORD32 u4_ref_wd = (u4_wd / d_spatial_res_ratio);
704         UWORD32 u4_ref_ht = (u4_ht / d_spatial_res_ratio) * (1 + u1_ref_layer_field_pic_flag);
705         UWORD32 u4_scaled_wd = u4_wd;
706         UWORD32 u4_scaled_ht = u4_ht * (1 + u1_field_pic_flag);
707 
708         ps_mb_states =
709             u1_is_chroma ? ps_layer_state->ps_chroma_mb_states : ps_layer_state->ps_luma_mb_states;
710         ps_layer_props =
711             u1_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props;
712 
713         u4_ref_wd = u4_ref_wd >> u1_is_chroma;
714         u4_ref_ht = u4_ref_ht >> u1_is_chroma;
715         u4_scaled_wd = u4_scaled_wd >> u1_is_chroma;
716         u4_scaled_ht = u4_scaled_ht >> u1_is_chroma;
717 
718         u4_wd_in_mbs = u4_scaled_wd / ps_layer_props->u4_mb_wd;
719         u4_ht_in_mbs = u4_scaled_ht / ps_layer_props->u4_mb_ht;
720 
721         for(j = 0; j < u4_ht_in_mbs; j++)
722         {
723             WORD32 i4_y_refmin16;
724             WORD32 i4_y_refmax16;
725             WORD32 i4_y_offset;
726 
727             i4_y_refmin16 =
728                 isvce_get_scaled_pixel_pos(ps_layer_props, j * ps_layer_props->u4_mb_ht, 1);
729             i4_y_refmax16 = isvce_get_scaled_pixel_pos(
730                 ps_layer_props, j * ps_layer_props->u4_mb_ht + ps_layer_props->u4_mb_ht - 1, 1);
731             i4_y_offset = i4_y_refmin16 >> 4;
732 
733             for(k = 0; k < u4_wd_in_mbs; k++)
734             {
735                 WORD32 i4_x_refmin16;
736                 WORD32 i4_x_refmax16;
737                 WORD32 i4_x_offset;
738 
739                 coordinates_t s_mb_pos = {k, j};
740 
741                 i4_x_refmin16 =
742                     isvce_get_scaled_pixel_pos(ps_layer_props, k * ps_layer_props->u4_mb_wd, 0);
743                 i4_x_refmax16 = isvce_get_scaled_pixel_pos(
744                     ps_layer_props, k * ps_layer_props->u4_mb_wd + ps_layer_props->u4_mb_wd - 1, 0);
745                 i4_x_offset = i4_x_refmin16 >> 4;
746 
747                 ps_mb_states[k + j * u4_wd_in_mbs].s_offsets.i4_abscissa = i4_x_offset;
748                 ps_mb_states[k + j * u4_wd_in_mbs].s_offsets.i4_ordinate = i4_y_offset;
749                 ps_mb_states[k + j * u4_wd_in_mbs].s_ref_array_dims.i4_abscissa =
750                     (i4_x_refmax16 >> 4) - i4_x_offset + 2;
751                 ps_mb_states[k + j * u4_wd_in_mbs].s_ref_array_dims.i4_ordinate =
752                     (i4_y_refmax16 >> 4) - i4_y_offset + 2;
753 
754                 if((0 == k) && (0 == j) && (2 == d_spatial_res_ratio) && u1_is_chroma)
755                 {
756                     isvce_ref_array_pos_and_phase_init_dyadic(
757                         ps_layer_props, &ps_mb_states[k + j * u4_wd_in_mbs], &s_mb_pos,
758                         u1_frame_mbs_only_flag, u1_field_mb_flag, u1_ref_layer_frame_mbs_only_flag);
759                 }
760                 else if(1.5 == d_spatial_res_ratio)
761                 {
762                     isvce_ref_array_pos_and_phase_init(
763                         ps_layer_props, &ps_mb_states[k + j * u4_wd_in_mbs], &s_mb_pos,
764                         u1_frame_mbs_only_flag, u1_field_mb_flag, u1_ref_layer_frame_mbs_only_flag);
765                 }
766             }
767         }
768     }
769 }
770 
isvce_svc_residual_sampling_function_selector(res_pred_state_t * ps_res_pred_state,DOUBLE d_spatial_res_ratio,IV_ARCH_T e_arch)771 void isvce_svc_residual_sampling_function_selector(res_pred_state_t *ps_res_pred_state,
772                                                    DOUBLE d_spatial_res_ratio, IV_ARCH_T e_arch)
773 {
774     if(2. == d_spatial_res_ratio)
775     {
776         ps_res_pred_state->apf_residual_samplers[U] = isvce_chroma_residual_sampler_2x;
777         ps_res_pred_state->apf_residual_samplers[V] = isvce_chroma_residual_sampler_2x;
778 
779         switch(e_arch)
780         {
781 #if defined(X86)
782             case ARCH_X86_SSE42:
783             {
784                 ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_sse42;
785 
786                 break;
787             }
788 #elif defined(ARMV8)
789             case ARCH_ARM_A53:
790             case ARCH_ARM_A57:
791             case ARCH_ARM_V8_NEON:
792             {
793                 ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_neon;
794 
795                 break;
796             }
797 #elif defined(ARM) && !defined(DISABLE_NEON)
798             case ARCH_ARM_A9Q:
799             case ARCH_ARM_A9A:
800             case ARCH_ARM_A9:
801             case ARCH_ARM_A7:
802             case ARCH_ARM_A5:
803             case ARCH_ARM_A15:
804             {
805                 ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x_neon;
806 
807                 break;
808             }
809 #endif
810             default:
811             {
812                 ps_res_pred_state->apf_residual_samplers[Y] = isvce_luma_residual_sampler_2x;
813 
814                 break;
815             }
816         }
817     }
818 
819     switch(e_arch)
820     {
821 #if defined(X86)
822         case ARCH_X86_SSE42:
823         {
824             ps_res_pred_state->pf_get_sad_with_residual_pred =
825                 isvce_get_sad_with_residual_pred_sse42;
826 
827             break;
828     }
829 #elif defined(ARMV8)
830         case ARCH_ARM_A53:
831         case ARCH_ARM_A57:
832         case ARCH_ARM_V8_NEON:
833         {
834             ps_res_pred_state->pf_get_sad_with_residual_pred =
835                 isvce_get_sad_with_residual_pred_neon;
836 
837             break;
838     }
839 #elif defined(ARM) && !defined(DISABLE_NEON)
840         case ARCH_ARM_A9Q:
841         case ARCH_ARM_A9A:
842         case ARCH_ARM_A9:
843         case ARCH_ARM_A7:
844         case ARCH_ARM_A5:
845         case ARCH_ARM_A15:
846         {
847             ps_res_pred_state->pf_get_sad_with_residual_pred =
848                 isvce_get_sad_with_residual_pred_neon;
849 
850             break;
851     }
852 #endif
853     default:
854     {
855             ps_res_pred_state->pf_get_sad_with_residual_pred = isvce_get_sad_with_residual_pred;
856 
857             break;
858     }
859     }
860 }
861 
862 /**
863 *******************************************************************************
864 *
865 * @brief
866 *  Function to initialize svc ilp buffers
867 *
868 * @param[in] ps_codec
869 *  Pointer to codec context
870 *
871 * @param[in] ps_mem_rec
872 *  Pointer to memory allocated for input buffers
873 *
874 *******************************************************************************
875 */
isvce_svc_res_pred_ctxt_init(isvce_codec_t * ps_codec,iv_mem_rec_t * ps_mem_rec)876 void isvce_svc_res_pred_ctxt_init(isvce_codec_t *ps_codec, iv_mem_rec_t *ps_mem_rec)
877 {
878     WORD32 i, j, k;
879 
880     const WORD32 i4_num_proc_ctxts = sizeof(ps_codec->as_process) / sizeof(ps_codec->as_process[0]);
881     UWORD8 u1_num_spatial_layers = ps_codec->s_cfg.s_svc_params.u1_num_spatial_layers;
882     DOUBLE d_spatial_res_ratio = ps_codec->s_cfg.s_svc_params.d_spatial_res_ratio;
883     UWORD32 u4_wd = ps_codec->s_cfg.u4_wd;
884     UWORD32 u4_ht = ps_codec->s_cfg.u4_ht;
885     UWORD8 *pu1_buf = ps_mem_rec->pv_base;
886     WORD64 i8_alloc_mem_size =
887         isvce_get_svc_res_pred_ctxt_size(u1_num_spatial_layers, d_spatial_res_ratio, u4_wd, u4_ht);
888 
889     if(u1_num_spatial_layers > 1)
890     {
891         res_pred_mb_state_t *aps_luma_mb_states[MAX_NUM_SPATIAL_LAYERS];
892         res_pred_mb_state_t *aps_chroma_mb_states[MAX_NUM_SPATIAL_LAYERS];
893 
894         WORD8 *api1_mb_mode[MAX_NUM_SPATIAL_LAYERS];
895         WORD32 ai4_mb_mode_stride[MAX_NUM_SPATIAL_LAYERS];
896 
897         WORD32 i4_size;
898 
899         for(i = 0; i < i4_num_proc_ctxts; i++)
900         {
901             res_pred_state_t *ps_res_pred_state;
902             svc_res_pred_ctxt_t *ps_res_pred_ctxt;
903             yuv_buf_props_t *ps_mb_res_buf;
904             res_pred_mem_store_t *ps_mem_store;
905 
906             isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i;
907 
908             ps_res_pred_ctxt = ps_proc->ps_res_pred_ctxt = (svc_res_pred_ctxt_t *) pu1_buf;
909             pu1_buf += sizeof(svc_res_pred_ctxt_t);
910             i8_alloc_mem_size -= sizeof(svc_res_pred_ctxt_t);
911 
912             ps_res_pred_ctxt->s_res_pred_constants.pv_state = pu1_buf;
913             ps_res_pred_state = (res_pred_state_t *) pu1_buf;
914             pu1_buf += sizeof(res_pred_state_t);
915             i8_alloc_mem_size -= sizeof(res_pred_state_t);
916 
917             ps_res_pred_state->ps_layer_state = (res_pred_layer_state_t *) pu1_buf;
918             pu1_buf += u1_num_spatial_layers * sizeof(ps_res_pred_state->ps_layer_state[0]);
919             i8_alloc_mem_size -=
920                 u1_num_spatial_layers * sizeof(ps_res_pred_state->ps_layer_state[0]);
921 
922             i4_size = REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(UWORD8);
923             ps_res_pred_state->pu1_ref_x_ptr_incr = (UWORD8 *) pu1_buf;
924             pu1_buf += i4_size;
925             ps_res_pred_state->pu1_ref_y_ptr_incr = (UWORD8 *) pu1_buf;
926             pu1_buf += i4_size;
927 
928             ASSERT(i8_alloc_mem_size >= 0);
929 
930             if(0 == i)
931             {
932                 UWORD32 au4_ref_pos_array_size[NUM_SP_COMPONENTS];
933                 UWORD32 au4_ref_phase_array_size[NUM_SP_COMPONENTS];
934 
935                 if(1.5 == d_spatial_res_ratio)
936                 {
937                     au4_ref_pos_array_size[Y] = MB_SIZE * MB_SIZE;
938                     au4_ref_phase_array_size[Y] = 5;
939                     au4_ref_pos_array_size[U] = (MB_SIZE / 2) * (MB_SIZE / 2);
940                     au4_ref_phase_array_size[U] = 5;
941                 }
942                 else
943                 {
944                     au4_ref_pos_array_size[Y] = au4_ref_pos_array_size[U] = 0;
945                     au4_ref_phase_array_size[Y] = 0;
946                     au4_ref_phase_array_size[U] = 3;
947                 }
948 
949                 for(j = u1_num_spatial_layers - 1; j >= 1; j--)
950                 {
951                     res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j];
952 
953                     WORD32 i4_layer_luma_wd =
954                         ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
955                         0.99;
956                     WORD32 i4_layer_luma_ht =
957                         ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
958                         0.99;
959                     WORD32 i4_layer_luma_mbs =
960                         (i4_layer_luma_wd / MB_SIZE) * (i4_layer_luma_ht / MB_SIZE);
961                     WORD32 i4_layer_u_wd = i4_layer_luma_wd / 2.0 + 0.99;
962                     WORD32 i4_layer_u_ht = i4_layer_luma_ht / 2.0 + 0.99;
963                     WORD32 i4_layer_u_mbs =
964                         (i4_layer_u_wd / (MB_SIZE / 2)) * (i4_layer_u_ht / (MB_SIZE / 2));
965 
966                     ps_layer->ps_luma_mb_states = (res_pred_mb_state_t *) pu1_buf;
967                     aps_luma_mb_states[j] = ps_layer->ps_luma_mb_states;
968                     pu1_buf += i4_layer_luma_mbs * sizeof(ps_layer->ps_luma_mb_states[0]);
969                     i8_alloc_mem_size -=
970                         u1_num_spatial_layers * sizeof(ps_layer->ps_luma_mb_states[0]);
971 
972                     ps_layer->ps_chroma_mb_states = (res_pred_mb_state_t *) pu1_buf;
973                     aps_chroma_mb_states[j] = ps_layer->ps_chroma_mb_states;
974                     pu1_buf += i4_layer_u_mbs * sizeof(ps_layer->ps_chroma_mb_states[0]);
975                     i8_alloc_mem_size -= i4_layer_u_mbs * sizeof(ps_layer->ps_chroma_mb_states[0]);
976 
977                     if(1.5 == d_spatial_res_ratio)
978                     {
979                         coordinates_t *ps_ref_array_pos = (coordinates_t *) pu1_buf;
980                         coordinates_t *ps_ref_array_phases =
981                             ps_ref_array_pos + i4_layer_luma_mbs * au4_ref_pos_array_size[Y];
982 
983                         for(k = 0; k < i4_layer_luma_mbs; k++)
984                         {
985                             ps_layer->ps_luma_mb_states[k].ps_ref_array_positions =
986                                 ps_ref_array_pos + k * au4_ref_pos_array_size[Y];
987                             ps_layer->ps_luma_mb_states[k].ps_ref_array_phases =
988                                 ps_ref_array_phases + k * au4_ref_phase_array_size[Y];
989                             pu1_buf += au4_ref_pos_array_size[Y] * sizeof(ps_ref_array_pos[0]);
990                             i8_alloc_mem_size -=
991                                 au4_ref_pos_array_size[Y] * sizeof(ps_ref_array_pos[0]);
992                             pu1_buf += au4_ref_phase_array_size[Y] * sizeof(ps_ref_array_phases[0]);
993                             i8_alloc_mem_size -=
994                                 au4_ref_phase_array_size[Y] * sizeof(ps_ref_array_phases[0]);
995                         }
996 
997                         ps_ref_array_pos = (coordinates_t *) pu1_buf;
998                         ps_ref_array_phases =
999                             ps_ref_array_pos + i4_layer_u_mbs * au4_ref_pos_array_size[U];
1000 
1001                         for(k = 0; k < i4_layer_u_mbs; k++)
1002                         {
1003                             ps_layer->ps_chroma_mb_states[k].ps_ref_array_positions =
1004                                 ps_ref_array_pos + k * au4_ref_pos_array_size[U];
1005                             ps_layer->ps_chroma_mb_states[k].ps_ref_array_phases =
1006                                 ps_ref_array_phases + k * au4_ref_phase_array_size[U];
1007                             pu1_buf += au4_ref_pos_array_size[U] * sizeof(ps_ref_array_pos[0]);
1008                             i8_alloc_mem_size -=
1009                                 au4_ref_pos_array_size[U] * sizeof(ps_ref_array_pos[0]);
1010                             pu1_buf += au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]);
1011                             i8_alloc_mem_size -=
1012                                 au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]);
1013                         }
1014                     }
1015                     else
1016                     {
1017                         coordinates_t *ps_ref_array_pos = NULL;
1018                         coordinates_t *ps_ref_array_phases = NULL;
1019 
1020                         for(k = 0; k < i4_layer_luma_mbs; k++)
1021                         {
1022                             ps_layer->ps_luma_mb_states[k].ps_ref_array_positions =
1023                                 ps_ref_array_pos;
1024                             ps_layer->ps_luma_mb_states[k].ps_ref_array_phases =
1025                                 ps_ref_array_phases;
1026                         }
1027 
1028                         ps_ref_array_pos = NULL;
1029                         ps_ref_array_phases = (coordinates_t *) pu1_buf;
1030 
1031                         for(k = 0; k < i4_layer_u_mbs; k++)
1032                         {
1033                             ps_layer->ps_chroma_mb_states[k].ps_ref_array_positions =
1034                                 ps_ref_array_pos;
1035                             ps_layer->ps_chroma_mb_states[k].ps_ref_array_phases =
1036                                 ps_ref_array_phases;
1037                         }
1038 
1039                         pu1_buf += au4_ref_phase_array_size[U] * sizeof(ps_ref_array_pos[0]);
1040                         i8_alloc_mem_size -=
1041                             au4_ref_phase_array_size[U] * sizeof(ps_ref_array_phases[0]);
1042                     }
1043 
1044                     ASSERT(i8_alloc_mem_size >= 0);
1045                     /* Asserts below verify that
1046                      * 'ps_codec->s_svc_ilp_data.aps_layer_resampler_props' is initialised
1047                      */
1048                     ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j].u4_mb_wd ==
1049                            MB_SIZE);
1050                     ASSERT(ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j].u4_mb_wd ==
1051                            (MB_SIZE / 2));
1052 
1053                     ps_layer->ps_luma_props =
1054                         &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j];
1055                     ps_layer->ps_chroma_props =
1056                         &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j];
1057 
1058                     isvce_res_pred_layer_state_init(ps_layer, d_spatial_res_ratio, i4_layer_luma_wd,
1059                                                     i4_layer_luma_ht,
1060                                                     ps_codec->s_cfg.e_inp_color_fmt);
1061                 }
1062 
1063                 for(j = u1_num_spatial_layers - 1; j >= 0; j--)
1064                 {
1065                     res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j];
1066 
1067                     WORD32 i4_layer_luma_wd =
1068                         ((DOUBLE) u4_wd / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
1069                         0.99;
1070                     WORD32 i4_layer_luma_ht =
1071                         ((DOUBLE) u4_ht / pow(d_spatial_res_ratio, u1_num_spatial_layers - 1 - j)) +
1072                         0.99;
1073                     WORD32 i4_layer_luma_mbs =
1074                         ((i4_layer_luma_wd / MB_SIZE) + 2) * ((i4_layer_luma_ht / MB_SIZE) + 2);
1075 
1076                     ps_layer->pi1_mb_mode = (WORD8 *) pu1_buf;
1077                     pu1_buf += i4_layer_luma_mbs * sizeof(WORD8);
1078                     memset(ps_layer->pi1_mb_mode, -1, i4_layer_luma_mbs);
1079 
1080                     ps_layer->i4_mb_mode_stride = ai4_mb_mode_stride[j] =
1081                         (i4_layer_luma_wd / MB_SIZE) + 2;
1082                     ps_layer->pi1_mb_mode += 1 + ps_layer->i4_mb_mode_stride;
1083                     api1_mb_mode[j] = ps_layer->pi1_mb_mode;
1084                 }
1085             }
1086             else
1087             {
1088                 for(j = u1_num_spatial_layers - 1; j >= 1; j--)
1089                 {
1090                     res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j];
1091 
1092                     ps_layer->ps_luma_mb_states = aps_luma_mb_states[j];
1093                     ps_layer->ps_chroma_mb_states = aps_chroma_mb_states[j];
1094 
1095                     ps_layer->ps_luma_props =
1096                         &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[Y][j];
1097                     ps_layer->ps_chroma_props =
1098                         &ps_codec->s_svc_ilp_data.aps_layer_resampler_props[UV][j];
1099                 }
1100                 for(j = u1_num_spatial_layers - 1; j >= 0; j--)
1101                 {
1102                     res_pred_layer_state_t *ps_layer = &ps_res_pred_state->ps_layer_state[j];
1103 
1104                     ps_layer->pi1_mb_mode = api1_mb_mode[j];
1105                     ps_layer->i4_mb_mode_stride = ai4_mb_mode_stride[j];
1106                 }
1107             }
1108 
1109             ps_mb_res_buf = &ps_res_pred_ctxt->s_res_pred_outputs.s_res_pred;
1110             ps_mem_store = &ps_res_pred_state->s_mem_store;
1111             ps_proc->ps_mb_res_buf = ps_mb_res_buf;
1112 
1113             for(j = 0; j < NUM_SP_COMPONENTS; j++)
1114             {
1115                 buffer_container_t *ps_comp_buf = &ps_mb_res_buf->as_component_bufs[j];
1116 
1117                 UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) j));
1118 
1119                 ps_comp_buf->pv_data = pu1_buf;
1120                 ps_comp_buf->i4_data_stride = MB_SIZE;
1121                 pu1_buf += MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16);
1122                 i8_alloc_mem_size -= MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16);
1123             }
1124 
1125             ps_mem_store->s_scratch.pv_data = pu1_buf;
1126             ps_mem_store->s_scratch.i4_data_stride = REF_ARRAY_MAX_WIDTH;
1127             pu1_buf += REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16);
1128             i8_alloc_mem_size -= REF_ARRAY_MAX_WIDTH * REF_ARRAY_MAX_HEIGHT * sizeof(WORD16);
1129 
1130             ASSERT(i8_alloc_mem_size >= 0);
1131 
1132             ps_mb_res_buf->as_component_bufs[V].pv_data = NULL;
1133             ps_mb_res_buf->e_color_format = IV_YUV_420SP_UV;
1134             ps_mb_res_buf->u1_bit_depth = 10;
1135             ps_mb_res_buf->u4_width = MB_SIZE;
1136             ps_mb_res_buf->u4_height = MB_SIZE;
1137 
1138             isvce_svc_residual_sampling_function_selector(ps_res_pred_state, d_spatial_res_ratio,
1139                                                           ps_codec->s_cfg.e_arch);
1140         }
1141     }
1142     else
1143     {
1144         for(i = 0; i < i4_num_proc_ctxts; i++)
1145         {
1146             isvce_process_ctxt_t *ps_proc = ps_codec->as_process + i;
1147 
1148             ps_proc->ps_res_pred_ctxt = NULL;
1149 
1150             ps_proc->ps_mb_res_buf = (yuv_buf_props_t *) pu1_buf;
1151             pu1_buf += sizeof(yuv_buf_props_t);
1152             i8_alloc_mem_size -= sizeof(yuv_buf_props_t);
1153 
1154             for(j = 0; j < NUM_SP_COMPONENTS; j++)
1155             {
1156                 buffer_container_t *ps_comp_buf = &ps_proc->ps_mb_res_buf->as_component_bufs[j];
1157 
1158                 UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) j));
1159 
1160                 ps_comp_buf->pv_data = pu1_buf;
1161                 ps_comp_buf->i4_data_stride = MB_SIZE;
1162                 pu1_buf += MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16);
1163                 i8_alloc_mem_size -= MB_SIZE * (MB_SIZE >> u1_is_chroma) * sizeof(WORD16);
1164             }
1165 
1166             ASSERT(i8_alloc_mem_size >= 0);
1167         }
1168     }
1169 }
1170 
isvce_get_mb_residual_pred(svc_res_pred_ctxt_t * ps_res_pred_ctxt)1171 void isvce_get_mb_residual_pred(svc_res_pred_ctxt_t *ps_res_pred_ctxt)
1172 {
1173     buffer_container_t s_inp;
1174     buffer_container_t s_out;
1175     coordinates_t s_frame_dims;
1176     coordinates_t s_frame_dims_in_mbs;
1177     coordinates_t s_ref_array_offsets;
1178     svc_layer_data_t *ps_ref_layer_data;
1179     res_pred_layer_state_t *ps_layer_state;
1180     yuv_buf_props_t *ps_ref_residual_buf;
1181     res_pred_mb_state_t *ps_luma_mb_state;
1182     res_pred_mb_state_t *ps_chroma_mb_state;
1183     isvce_mb_info_t *ps_ref_mb;
1184 
1185     WORD32 i;
1186 
1187     res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
1188     res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables;
1189     res_pred_outputs_t *ps_res_pred_outputs = &ps_res_pred_ctxt->s_res_pred_outputs;
1190     res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
1191     res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store;
1192     svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data;
1193     coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos;
1194 
1195     UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id;
1196 
1197     ASSERT(u1_spatial_layer_id > 0);
1198 
1199     s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
1200     s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
1201     s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
1202     s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
1203 
1204     ps_ref_layer_data =
1205         &ps_svc_ilp_data->ps_svc_au_data->ps_svc_layer_data[u1_spatial_layer_id - 1];
1206     ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id];
1207     ps_ref_residual_buf = &ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1];
1208     ps_luma_mb_state = ps_layer_state->ps_luma_mb_states + ps_mb_pos->i4_abscissa +
1209                        ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa;
1210     ps_chroma_mb_state = ps_layer_state->ps_chroma_mb_states + ps_mb_pos->i4_abscissa +
1211                          ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa;
1212 
1213     for(i = 0; i < NUM_COMPONENTS; i++)
1214     {
1215         res_pred_mb_state_t *ps_mb_state;
1216         layer_resampler_props_t *ps_layer_props;
1217 
1218         UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i));
1219 
1220         ps_mb_state = u1_is_chroma ? ps_chroma_mb_state : ps_luma_mb_state;
1221         ps_layer_props =
1222             u1_is_chroma ? ps_layer_state->ps_chroma_props : ps_layer_state->ps_luma_props;
1223 
1224         /* Presence of appropriate padding is assumed */
1225         s_ref_array_offsets = ps_mb_state->s_offsets;
1226 
1227         s_inp = ps_ref_residual_buf->as_component_bufs[u1_is_chroma ? UV : Y];
1228         s_inp.pv_data = ((WORD16 *) s_inp.pv_data) + (V == ((COMPONENT_TYPE) i)) +
1229                         (s_ref_array_offsets.i4_abscissa << u1_is_chroma) +
1230                         s_ref_array_offsets.i4_ordinate * s_inp.i4_data_stride;
1231 
1232         s_out = ps_res_pred_outputs->s_res_pred.as_component_bufs[u1_is_chroma ? UV : Y];
1233         s_out.pv_data = ((WORD16 *) s_out.pv_data) + (V == ((COMPONENT_TYPE) i));
1234 
1235         ps_ref_mb =
1236             ps_ref_layer_data->ps_mb_info +
1237             ((s_ref_array_offsets.i4_abscissa + (ps_mb_state->s_ref_array_dims.i4_abscissa / 2)) /
1238              ps_layer_props->u4_mb_wd) +
1239             ((s_ref_array_offsets.i4_ordinate + (ps_mb_state->s_ref_array_dims.i4_ordinate / 2)) /
1240              ps_layer_props->u4_mb_ht) *
1241                 (s_frame_dims_in_mbs.i4_abscissa / 2);
1242 
1243         ps_res_pred_state->apf_residual_samplers[i](
1244             ps_mb_state->ps_ref_array_positions, ps_mb_state->ps_ref_array_phases, &s_inp, &s_out,
1245             &ps_mem_store->s_scratch, UINT32_MAX, ps_ref_mb->u1_tx_size == 8);
1246     }
1247 }
1248 
isvce_get_ref_layer_mbtype_tx_size(WORD8 * pi1_ref_mb_modes,WORD32 i4_ref_mode_stride,WORD32 i4_element_size,WORD32 i4_x_ref,WORD32 i4_y_ref,WORD32 * pi4_mb_type,WORD32 * pi4_tx_size,WORD32 i4_chroma_flag)1249 void isvce_get_ref_layer_mbtype_tx_size(WORD8 *pi1_ref_mb_modes, WORD32 i4_ref_mode_stride,
1250                                         WORD32 i4_element_size, WORD32 i4_x_ref, WORD32 i4_y_ref,
1251                                         WORD32 *pi4_mb_type, WORD32 *pi4_tx_size,
1252                                         WORD32 i4_chroma_flag)
1253 {
1254     WORD32 i4_mb_wd_sft, i4_mb_ht_sft;
1255     WORD32 i4_mb_x, i4_mb_y;
1256     WORD8 i1_mb_mode;
1257 
1258     if(i4_x_ref < 0)
1259     {
1260         i4_x_ref = 0;
1261     }
1262     if(i4_y_ref < 0)
1263     {
1264         i4_y_ref = 0;
1265     }
1266 
1267     i4_mb_wd_sft = (MB_WIDTH_SHIFT - i4_chroma_flag);
1268     i4_mb_ht_sft = (MB_HEIGHT_SHIFT - i4_chroma_flag);
1269     i4_mb_x = (i4_x_ref >> i4_mb_wd_sft);
1270     i4_mb_y = (i4_y_ref >> i4_mb_ht_sft);
1271 
1272     pi1_ref_mb_modes += (i4_mb_y * i4_ref_mode_stride * i4_element_size);
1273     pi1_ref_mb_modes += (i4_mb_x * i4_element_size);
1274     i1_mb_mode = *pi1_ref_mb_modes;
1275     i1_mb_mode = (i1_mb_mode < 0) ? i1_mb_mode : SVC_EXTRACT_MB_MODE(*pi1_ref_mb_modes);
1276 
1277     if(i1_mb_mode <= SVC_INTER_MB)
1278     {
1279         *pi4_mb_type = SVC_INTER_MB;
1280         *pi4_tx_size = GET_BIT_TX_SIZE(*pi1_ref_mb_modes, 1);
1281     }
1282     else
1283     {
1284         *pi4_mb_type = SVC_INTRA_MB;
1285         *pi4_tx_size = 1;
1286     }
1287 }
1288 
isvce_ref_layer_ptr_incr(WORD8 * pi1_ref_mb_modes,WORD32 i4_ref_mode_stride,WORD32 i4_element_size,WORD32 i4_x_offset,WORD32 i4_y_offset,WORD32 i4_refary_wd,WORD32 i4_refary_ht,UWORD8 * pu1_ref_x_ptr_incr,UWORD8 * pu1_ref_y_ptr_incr,WORD32 i4_chroma_flag)1289 void isvce_ref_layer_ptr_incr(WORD8 *pi1_ref_mb_modes, WORD32 i4_ref_mode_stride,
1290                               WORD32 i4_element_size, WORD32 i4_x_offset, WORD32 i4_y_offset,
1291                               WORD32 i4_refary_wd, WORD32 i4_refary_ht, UWORD8 *pu1_ref_x_ptr_incr,
1292                               UWORD8 *pu1_ref_y_ptr_incr, WORD32 i4_chroma_flag)
1293 {
1294     WORD32 i4_x, i4_y;
1295     WORD32 i4_x_idx, i4_y_idx;
1296     WORD32 i4_prev_x, i4_prev_y;
1297     WORD32 i4_const_val;
1298     WORD32 i4_pos_x, i4_pos_y;
1299     WORD32 i4_trans_size;
1300     WORD32 i4_mb_type, i4_tx_size;
1301     WORD32 i4_act_ary_wd, i4_act_ary_ht;
1302     WORD32 i4_and_const;
1303     UWORD8 *pu1_incr_x, *pu1_incr_y;
1304 
1305     memset(pu1_ref_x_ptr_incr, 1, (i4_refary_wd * i4_refary_ht));
1306     memset(pu1_ref_y_ptr_incr, 1, (i4_refary_wd * i4_refary_ht));
1307 
1308     i4_act_ary_wd = i4_refary_wd;
1309     i4_act_ary_ht = i4_refary_ht;
1310 
1311     i4_x = 0;
1312     i4_y = 0;
1313     i4_prev_y = 0;
1314 
1315     if(0 == i4_chroma_flag)
1316     {
1317         do
1318         {
1319             WORD32 i4_x_ref, i4_y_ref;
1320             WORD32 i4_idx;
1321             WORD32 i4_wd, i4_ht;
1322             WORD32 i4_max_pos_x, i4_max_pos_y;
1323 
1324             i4_prev_x = i4_x;
1325 
1326             i4_x_ref = i4_x_offset + i4_x;
1327             i4_y_ref = i4_y_offset + i4_y;
1328 
1329             isvce_get_ref_layer_mbtype_tx_size(pi1_ref_mb_modes, i4_ref_mode_stride,
1330                                                i4_element_size, i4_x_ref, i4_y_ref, &i4_mb_type,
1331                                                &i4_tx_size, i4_chroma_flag);
1332 
1333             i4_trans_size = ((i4_tx_size + 1) << 2);
1334             i4_const_val = i4_trans_size - 1;
1335             i4_and_const = i4_const_val;
1336 
1337             /* Fill horizontal tx block edges of current reference mb with 0 */
1338             pu1_incr_x = pu1_ref_x_ptr_incr + i4_x;
1339             pu1_incr_x += (i4_y * i4_refary_wd);
1340 
1341             i4_ht = (16 - (i4_y_ref & 0xF));
1342             i4_ht = MIN((i4_act_ary_ht - i4_y), i4_ht);
1343 
1344             i4_x_idx = i4_x;
1345 
1346             i4_pos_x = i4_x_ref & 0xF;
1347 
1348             i4_max_pos_x = 16;
1349             i4_x += (16 - i4_pos_x);
1350 
1351             /* Get the transform block edge pos */
1352             i4_idx = (i4_const_val - (i4_pos_x & i4_and_const));
1353 
1354             i4_x_idx += i4_idx;
1355 
1356             while((i4_pos_x < i4_max_pos_x) && (i4_x_idx < i4_act_ary_wd))
1357             {
1358                 WORD32 i4_i;
1359                 UWORD8 *pu1_incr;
1360 
1361                 pu1_incr = pu1_incr_x + i4_idx;
1362 
1363                 for(i4_i = 0; i4_i < i4_ht; i4_i++)
1364                 { /* Fill the block edge with 0s */
1365                     *pu1_incr = 0;
1366                     pu1_incr += i4_refary_wd;
1367                 }
1368 
1369                 i4_pos_x += i4_trans_size;
1370                 pu1_incr_x += i4_trans_size;
1371                 i4_x_idx += MIN(i4_trans_size, (i4_act_ary_wd - i4_x_idx));
1372             }
1373 
1374             /* Fill vertical tx block edges of current reference mb with 0 */
1375             pu1_incr_y = pu1_ref_y_ptr_incr + i4_prev_x;
1376             pu1_incr_y += (i4_y * i4_refary_wd);
1377 
1378             i4_wd = (16 - (i4_x_ref & 0xF));
1379             i4_wd = MIN((i4_act_ary_wd - i4_prev_x), i4_wd);
1380 
1381             i4_y_idx = i4_y;
1382 
1383             i4_pos_y = i4_y_ref & 0xF;
1384 
1385             i4_max_pos_y = 16;
1386             i4_y += (16 - i4_pos_y);
1387 
1388             /* Get the transform block edge pos */
1389             i4_idx = (i4_const_val - (i4_pos_y & i4_and_const));
1390 
1391             i4_y_idx += i4_idx;
1392 
1393             while((i4_pos_y < i4_max_pos_y) && (i4_y_idx < i4_act_ary_ht))
1394             {
1395                 WORD32 i4_i;
1396                 UWORD8 *pu1_incr;
1397 
1398                 pu1_incr = pu1_incr_y + i4_idx * i4_refary_wd;
1399 
1400                 for(i4_i = 0; i4_i < i4_wd; i4_i++)
1401                 { /* Fill the block edge with 0s */
1402                     *pu1_incr = 0;
1403                     pu1_incr++;
1404                 }
1405 
1406                 i4_pos_y += i4_trans_size;
1407                 pu1_incr_y += i4_trans_size * i4_refary_wd;
1408                 i4_y_idx += MIN(i4_trans_size, (i4_act_ary_ht - i4_y_idx));
1409             }
1410 
1411             if(i4_x < i4_act_ary_wd)
1412             {
1413                 i4_y = i4_prev_y;
1414             }
1415             else if(i4_y < i4_act_ary_ht)
1416             {
1417                 i4_prev_y = i4_y;
1418                 i4_x = 0;
1419             }
1420         } while((i4_y < i4_act_ary_ht) || (i4_x < i4_act_ary_wd));
1421     }
1422     else
1423     {
1424         i4_trans_size = 4;
1425         i4_const_val = 3;
1426 
1427         do
1428         {
1429             WORD32 i4_x_ref, i4_y_ref;
1430             WORD32 i4_idx;
1431             WORD32 i4_wd, i4_ht;
1432             WORD32 i4_max_pos_x, i4_max_pos_y;
1433 
1434             i4_prev_x = i4_x;
1435 
1436             i4_x_ref = i4_x_offset + i4_x;
1437             i4_y_ref = i4_y_offset + i4_y;
1438 
1439             /* Fill horizontal tx block edges of current reference mb with 0 */
1440             pu1_incr_x = pu1_ref_x_ptr_incr + i4_x;
1441             pu1_incr_x += (i4_y * i4_refary_wd);
1442 
1443             i4_ht = (8 - (i4_y_ref & 0x7));
1444             i4_ht = MIN((i4_act_ary_ht - i4_y), i4_ht);
1445 
1446             i4_x_idx = i4_x;
1447 
1448             i4_pos_x = i4_x_ref & 0x7;
1449 
1450             i4_max_pos_x = 8;
1451             i4_x += (8 - i4_pos_x);
1452 
1453             /* Get the transform block edge pos */
1454             i4_idx = (i4_const_val - (i4_pos_x & 0x3));
1455 
1456             i4_x_idx += i4_idx;
1457 
1458             while((i4_pos_x < i4_max_pos_x) && (i4_x_idx < i4_act_ary_wd))
1459             {
1460                 WORD32 i4_i;
1461                 UWORD8 *pu1_incr;
1462 
1463                 pu1_incr = pu1_incr_x + i4_idx;
1464 
1465                 for(i4_i = 0; i4_i < i4_ht; i4_i++)
1466                 { /* Fill the block edge with 0s */
1467                     *pu1_incr = 0;
1468                     pu1_incr += i4_refary_wd;
1469                 }
1470 
1471                 i4_pos_x += i4_trans_size;
1472                 pu1_incr_x += i4_trans_size;
1473                 i4_x_idx += MIN(i4_trans_size, (i4_act_ary_wd - i4_x_idx));
1474             }
1475 
1476             /* Fill vertical tx block edges of current reference mb with 0 */
1477             pu1_incr_y = pu1_ref_y_ptr_incr + i4_prev_x;
1478             pu1_incr_y += (i4_y * i4_refary_wd);
1479 
1480             i4_wd = (8 - (i4_x_ref & 0x7));
1481             i4_wd = MIN((i4_act_ary_wd - i4_prev_x), i4_wd);
1482 
1483             i4_y_idx = i4_y;
1484 
1485             i4_pos_y = i4_y_ref & 0x7;
1486 
1487             i4_max_pos_y = 8;
1488             i4_y += (8 - i4_pos_y);
1489 
1490             /* Get the transform block edge pos */
1491             i4_idx = (i4_const_val - (i4_pos_y & 0x3));
1492 
1493             i4_y_idx += i4_idx;
1494 
1495             while((i4_pos_y < i4_max_pos_y) && (i4_y_idx < i4_act_ary_ht))
1496             {
1497                 WORD32 i4_i;
1498                 UWORD8 *pu1_incr;
1499 
1500                 pu1_incr = pu1_incr_y + i4_idx * i4_refary_wd;
1501 
1502                 for(i4_i = 0; i4_i < i4_wd; i4_i++)
1503                 { /* Fill the block edge with 0s */
1504                     *pu1_incr = 0;
1505                     pu1_incr++;
1506                 }
1507 
1508                 i4_pos_y += i4_trans_size;
1509                 pu1_incr_y += i4_trans_size * i4_refary_wd;
1510                 i4_y_idx += MIN(i4_trans_size, (i4_act_ary_ht - i4_y_idx));
1511             }
1512 
1513             if(i4_x < i4_act_ary_wd)
1514             {
1515                 i4_y = i4_prev_y;
1516             }
1517             else if(i4_y < i4_act_ary_ht)
1518             {
1519                 i4_prev_y = i4_y;
1520                 i4_x = 0;
1521             }
1522         } while((i4_y < i4_act_ary_ht) || (i4_x < i4_act_ary_wd));
1523     }
1524 }
1525 
isvce_residual_reflayer_const(svc_res_pred_ctxt_t * ps_res_pred_ctxt,WORD16 * pi2_inp_data,WORD32 i4_inp_data_stride,WORD8 * ps_ref_mb_mode,WORD32 i4_ref_mb_mode_stride,WORD32 * pi4_refarr_wd,WORD32 i4_chroma_flag)1526 void isvce_residual_reflayer_const(svc_res_pred_ctxt_t *ps_res_pred_ctxt, WORD16 *pi2_inp_data,
1527                                    WORD32 i4_inp_data_stride, WORD8 *ps_ref_mb_mode,
1528                                    WORD32 i4_ref_mb_mode_stride, WORD32 *pi4_refarr_wd,
1529                                    WORD32 i4_chroma_flag)
1530 {
1531     WORD8 *pi1_ref_mb_modes;
1532     WORD32 i4_ref_mode_stride;
1533 
1534     WORD32 i4_x, i4_y;
1535     WORD32 i4_ref_wd;
1536     WORD32 i4_ref_ht;
1537     WORD32 i4_x_offset;
1538     WORD32 i4_y_offset;
1539     WORD32 i4_refarray_wd;
1540     WORD32 i4_refarray_ht;
1541 
1542     WORD16 *pi2_ref_array;
1543 
1544     res_pred_mb_state_t *ps_mb_states;
1545     res_pred_layer_state_t *ps_layer_state;
1546 
1547     res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
1548     res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables;
1549     res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
1550     res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store;
1551     svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data;
1552     coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos;
1553 
1554     UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id;
1555 
1556     ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id];
1557     pi2_ref_array = (WORD16 *) ps_mem_store->s_scratch.pv_data;
1558 
1559     pi1_ref_mb_modes = (WORD8 *) ps_ref_mb_mode;
1560     i4_ref_mode_stride = i4_ref_mb_mode_stride;
1561 
1562     ASSERT(NULL != pi1_ref_mb_modes);
1563 
1564     {
1565         WORD32 i4_base_width;
1566         WORD32 i4_base_height;
1567 
1568         coordinates_t s_frame_dims, s_frame_dims_in_mbs;
1569 
1570         s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
1571         s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
1572         s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
1573         s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
1574 
1575         ps_mb_states = i4_chroma_flag ? ps_layer_state->ps_chroma_mb_states
1576                                       : ps_layer_state->ps_luma_mb_states;
1577 
1578         ps_mb_states +=
1579             ps_mb_pos->i4_abscissa + ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa;
1580 
1581         i4_base_width = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_width;
1582         i4_base_height = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1].u4_height;
1583 
1584         i4_ref_wd = i4_base_width >> i4_chroma_flag;
1585         i4_ref_ht = i4_base_height >> i4_chroma_flag;
1586 
1587         i4_x_offset = ps_mb_states->s_offsets.i4_abscissa;
1588         i4_y_offset = ps_mb_states->s_offsets.i4_ordinate;
1589         i4_refarray_wd = ps_mb_states->s_ref_array_dims.i4_abscissa;
1590         i4_refarray_ht = ps_mb_states->s_ref_array_dims.i4_ordinate;
1591     }
1592 
1593     {
1594         isvce_ref_layer_ptr_incr(pi1_ref_mb_modes, i4_ref_mode_stride, 1, i4_x_offset, i4_y_offset,
1595                                  i4_refarray_wd, i4_refarray_ht,
1596                                  ps_res_pred_state->pu1_ref_x_ptr_incr,
1597                                  ps_res_pred_state->pu1_ref_y_ptr_incr, i4_chroma_flag);
1598     }
1599 
1600     for(i4_y = 0; i4_y < i4_refarray_ht; i4_y++)
1601     {
1602         for(i4_x = 0; i4_x < i4_refarray_wd; i4_x++)
1603         {
1604             WORD32 i4_x_ref;
1605             WORD32 i4_y_ref;
1606             WORD32 i4_ref_mb_type, i4_ref_tx_size;
1607             WORD16 *pi2_ref_data_byte;
1608             WORD16 *pi2_ref_array_temp;
1609 
1610             i4_x_ref = MAX(0, MIN(i4_ref_wd - 1, i4_x + i4_x_offset));
1611             i4_y_ref = MAX(0, MIN(i4_ref_ht - 1, i4_y + i4_y_offset));
1612 
1613             isvce_get_ref_layer_mbtype_tx_size(pi1_ref_mb_modes, i4_ref_mode_stride, 1, i4_x_ref,
1614                                                i4_y_ref, &i4_ref_mb_type, &i4_ref_tx_size,
1615                                                i4_chroma_flag);
1616 
1617             if(0 <= i4_x_offset)
1618             {
1619                 i4_x_ref = i4_x_ref - i4_x_offset;
1620             }
1621 
1622             if(0 <= i4_y_offset)
1623             {
1624                 i4_y_ref = i4_y_ref - i4_y_offset;
1625             }
1626 
1627             pi2_ref_array_temp = pi2_ref_array + i4_x;
1628             pi2_ref_array_temp += i4_y * i4_refarray_wd;
1629 
1630             if(SVC_INTER_MB == i4_ref_mb_type)
1631             {
1632                 pi2_ref_data_byte = pi2_inp_data + (i4_x_ref << i4_chroma_flag);
1633                 pi2_ref_data_byte += i4_y_ref * i4_inp_data_stride;
1634 
1635                 *pi2_ref_array_temp = (WORD16) (*pi2_ref_data_byte);
1636             }
1637             else
1638             {
1639                 *pi2_ref_array_temp = 0;
1640             }
1641         }
1642     }
1643     *pi4_refarr_wd = i4_refarray_wd;
1644 }
1645 
isvce_interpolate_residual(svc_res_pred_ctxt_t * ps_res_pred_ctxt,WORD16 * pi2_out,WORD32 i4_out_stride,WORD32 i4_refarray_wd,WORD32 i4_chroma_flag,coordinates_t * ps_mb_pos)1646 void isvce_interpolate_residual(svc_res_pred_ctxt_t *ps_res_pred_ctxt, WORD16 *pi2_out,
1647                                 WORD32 i4_out_stride, WORD32 i4_refarray_wd, WORD32 i4_chroma_flag,
1648                                 coordinates_t *ps_mb_pos)
1649 {
1650     res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
1651     res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables;
1652     res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
1653     res_pred_mem_store_t *ps_mem_store = &ps_res_pred_state->s_mem_store;
1654 
1655     WORD32 i4_x, i4_y;
1656     WORD32 i4_temp_array_ht;
1657     WORD32 i4_mb_wd;
1658     WORD32 i4_mb_ht;
1659     WORD16 *pi2_ref_array;
1660     UWORD8 *pu1_ref_x_ptr_incr, *pu1_ref_y_ptr_incr;
1661 
1662     coordinates_t *ps_phase;
1663     coordinates_t *ps_pos;
1664     res_pred_mb_state_t *ps_mb_states;
1665 
1666     coordinates_t s_frame_dims;
1667     coordinates_t s_frame_dims_in_mbs;
1668 
1669     UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id;
1670 
1671     svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data;
1672 
1673     res_pred_mb_state_t *ps_mb_state;
1674 
1675     s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
1676     s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
1677     s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
1678     s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
1679 
1680     pu1_ref_x_ptr_incr = ps_res_pred_state->pu1_ref_x_ptr_incr;
1681     pu1_ref_y_ptr_incr = ps_res_pred_state->pu1_ref_y_ptr_incr;
1682 
1683     ps_mb_states = i4_chroma_flag
1684                        ? ps_res_pred_state->ps_layer_state[u1_spatial_layer_id].ps_chroma_mb_states
1685                        : ps_res_pred_state->ps_layer_state[u1_spatial_layer_id].ps_luma_mb_states;
1686 
1687     i4_mb_wd = MB_SIZE >> i4_chroma_flag;
1688     i4_mb_ht = MB_SIZE >> i4_chroma_flag;
1689 
1690     ps_mb_state = &ps_mb_states[ps_mb_pos->i4_abscissa +
1691                                 (ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa)];
1692 
1693     ps_phase = ps_mb_state->ps_ref_array_phases;
1694     ps_pos = ps_mb_state->ps_ref_array_positions;
1695 
1696     i4_temp_array_ht = i4_mb_ht;
1697 
1698     pi2_ref_array = (WORD16 *) ps_mem_store->s_scratch.pv_data;
1699 
1700     for(i4_y = 0; i4_y < i4_temp_array_ht; i4_y++)
1701     {
1702         for(i4_x = 0; i4_x < i4_mb_wd; i4_x++)
1703         {
1704             WORD32 i4_i;
1705             WORD32 i4_y_ref;
1706             WORD32 i4_y_phase;
1707             WORD32 i4_x_ref;
1708             WORD32 i4_x_phase;
1709             WORD32 i4_x_ref_round;
1710             WORD16 *pi2_out_curr;
1711             WORD32 ai4_temp_pred[2];
1712             UWORD8 *pu1_ref_y_ptr_incr_temp;
1713             WORD32 *pi4_temp_pred;
1714             UWORD8 u1_incr_y;
1715             WORD16 i2_res;
1716 
1717             pi2_out_curr = pi2_out + (i4_x << i4_chroma_flag) + (i4_y * i4_out_stride);
1718 
1719             i4_y_ref = ps_pos[(i4_mb_wd * i4_y) + i4_x].i4_ordinate;
1720             i4_y_phase = ps_phase[((i4_y % 3) > 0) * 2 + (i4_y % 3)].i4_ordinate;
1721 
1722             i4_x_ref = ps_pos[(i4_mb_wd * i4_y) + i4_x].i4_abscissa;
1723             i4_x_phase = ps_phase[i4_x % 3].i4_abscissa;
1724 
1725             /* horizontal processing*/
1726             for(i4_i = 0; i4_i < 2; i4_i++)
1727             {
1728                 UWORD8 *pu1_ref_x_ptr_incr_temp;
1729                 UWORD8 u1_incr;
1730                 WORD16 *pi2_ref_array_1, *pi2_ref_array_2;
1731 
1732                 pu1_ref_x_ptr_incr_temp = pu1_ref_x_ptr_incr + i4_x_ref;
1733                 pu1_ref_x_ptr_incr_temp += ((i4_y_ref + i4_i) * i4_refarray_wd);
1734                 u1_incr = *pu1_ref_x_ptr_incr_temp;
1735 
1736                 pi2_ref_array_1 = pi2_ref_array + i4_x_ref;
1737                 pi2_ref_array_1 += ((i4_y_ref + i4_i) * i4_refarray_wd);
1738 
1739                 if(!u1_incr)
1740                 {
1741                     pi2_ref_array_1 += (i4_x_phase >> 3);
1742                 }
1743 
1744                 pi2_ref_array_2 = pi2_ref_array_1 + u1_incr;
1745 
1746                 ai4_temp_pred[i4_i] =
1747                     (16 - i4_x_phase) * (*pi2_ref_array_1) + i4_x_phase * (*pi2_ref_array_2);
1748             }
1749 
1750             /* vertical processing */
1751             i4_x_ref_round = (i4_x_ref + (i4_x_phase >> 3));
1752 
1753             pu1_ref_y_ptr_incr_temp =
1754                 pu1_ref_y_ptr_incr + i4_x_ref_round + (i4_y_ref * i4_refarray_wd);
1755             u1_incr_y = *pu1_ref_y_ptr_incr_temp;
1756 
1757             pi4_temp_pred = &ai4_temp_pred[0];
1758             if(!u1_incr_y)
1759             {
1760                 pi4_temp_pred += (i4_y_phase >> 3);
1761             }
1762             i2_res = (((16 - i4_y_phase) * pi4_temp_pred[0] +
1763                        i4_y_phase * pi4_temp_pred[u1_incr_y] + 128) >>
1764                       8);
1765             *pi2_out_curr = i2_res;
1766         }
1767     }
1768 }
1769 
isvce_get_mb_residual_pred_non_dyadic(svc_res_pred_ctxt_t * ps_res_pred_ctxt)1770 void isvce_get_mb_residual_pred_non_dyadic(svc_res_pred_ctxt_t *ps_res_pred_ctxt)
1771 {
1772     buffer_container_t s_inp;
1773     buffer_container_t s_out;
1774     coordinates_t s_frame_dims;
1775     coordinates_t s_frame_dims_in_mbs;
1776     coordinates_t s_ref_array_offsets;
1777     res_pred_layer_state_t *ps_layer_state, *ps_ref_layer_state;
1778     yuv_buf_props_t *ps_ref_residual_buf;
1779     res_pred_mb_state_t *ps_luma_mb_state;
1780     res_pred_mb_state_t *ps_chroma_mb_state;
1781 
1782     WORD16 *pi2_inp, *pi2_out;
1783     WORD32 i4_inp_stride, i4_out_stride;
1784 
1785     res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
1786     res_pred_variables_t *ps_res_pred_variables = &ps_res_pred_ctxt->s_res_pred_variables;
1787     res_pred_outputs_t *ps_res_pred_outputs = &ps_res_pred_ctxt->s_res_pred_outputs;
1788     res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
1789     svc_ilp_data_t *ps_svc_ilp_data = ps_res_pred_variables->ps_svc_ilp_data;
1790     coordinates_t *ps_mb_pos = &ps_res_pred_variables->s_mb_pos;
1791 
1792     UWORD8 u1_spatial_layer_id = ps_res_pred_variables->u1_spatial_layer_id;
1793 
1794     WORD32 i4_refarray_wd;
1795 
1796     WORD32 i;
1797 
1798     ASSERT(u1_spatial_layer_id > 0);
1799 
1800     s_frame_dims.i4_abscissa = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_width;
1801     s_frame_dims.i4_ordinate = ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id].u4_height;
1802     s_frame_dims_in_mbs.i4_abscissa = s_frame_dims.i4_abscissa / MB_SIZE;
1803     s_frame_dims_in_mbs.i4_ordinate = s_frame_dims.i4_ordinate / MB_SIZE;
1804 
1805     ps_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id];
1806     ps_ref_layer_state = &ps_res_pred_state->ps_layer_state[u1_spatial_layer_id - 1];
1807     ps_ref_residual_buf = &ps_svc_ilp_data->ps_residual_bufs[u1_spatial_layer_id - 1];
1808     ps_luma_mb_state = ps_layer_state->ps_luma_mb_states + ps_mb_pos->i4_abscissa +
1809                        ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa;
1810     ps_chroma_mb_state = ps_layer_state->ps_chroma_mb_states + ps_mb_pos->i4_abscissa +
1811                          ps_mb_pos->i4_ordinate * s_frame_dims_in_mbs.i4_abscissa;
1812 
1813     for(i = 0; i < NUM_COMPONENTS; i++)
1814     {
1815         res_pred_mb_state_t *ps_mb_state;
1816 
1817         UWORD8 u1_is_chroma = (Y != ((COMPONENT_TYPE) i));
1818 
1819         ps_mb_state = u1_is_chroma ? ps_chroma_mb_state : ps_luma_mb_state;
1820 
1821         s_ref_array_offsets.i4_abscissa =
1822             MAX(0, MIN(ps_mb_state->s_offsets.i4_abscissa,
1823                        (s_frame_dims.i4_abscissa >> u1_is_chroma) - 1));
1824         s_ref_array_offsets.i4_ordinate =
1825             MAX(0, MIN(ps_mb_state->s_offsets.i4_ordinate,
1826                        (s_frame_dims.i4_ordinate >> u1_is_chroma) - 1));
1827 
1828         s_inp = ps_ref_residual_buf->as_component_bufs[u1_is_chroma ? UV : Y];
1829         s_inp.pv_data = ((WORD16 *) s_inp.pv_data) + (V == ((COMPONENT_TYPE) i)) +
1830                         (s_ref_array_offsets.i4_abscissa << u1_is_chroma) +
1831                         s_ref_array_offsets.i4_ordinate * s_inp.i4_data_stride;
1832 
1833         s_out = ps_res_pred_outputs->s_res_pred.as_component_bufs[u1_is_chroma ? UV : Y];
1834         s_out.pv_data = ((WORD16 *) s_out.pv_data) + (V == ((COMPONENT_TYPE) i));
1835 
1836         pi2_inp = (WORD16 *) s_inp.pv_data;
1837         pi2_out = (WORD16 *) s_out.pv_data;
1838 
1839         i4_inp_stride = s_inp.i4_data_stride;
1840         i4_out_stride = s_out.i4_data_stride;
1841 
1842         /* ------- Constructing refSampleArray ----------------------- */
1843         isvce_residual_reflayer_const(
1844             ps_res_pred_ctxt, pi2_inp, i4_inp_stride, ps_ref_layer_state->pi1_mb_mode,
1845             ps_ref_layer_state->i4_mb_mode_stride, &i4_refarray_wd, u1_is_chroma);
1846 
1847         /* ---- Interpolation process for Residual prediction	 ------ */
1848         isvce_interpolate_residual(ps_res_pred_ctxt, pi2_out, i4_out_stride, i4_refarray_wd,
1849                                    u1_is_chroma, ps_mb_pos);
1850     }
1851 }
1852 
isvce_get_sad_with_residual_pred(buffer_container_t * ps_src,buffer_container_t * ps_pred,buffer_container_t * ps_res,UWORD32 u4_mb_wd,UWORD32 u4_mb_ht)1853 UWORD32 isvce_get_sad_with_residual_pred(buffer_container_t *ps_src, buffer_container_t *ps_pred,
1854                                          buffer_container_t *ps_res, UWORD32 u4_mb_wd,
1855                                          UWORD32 u4_mb_ht)
1856 {
1857     UWORD32 i, j;
1858 
1859     UWORD32 u4_sad = 0;
1860 
1861     for(i = 0; i < u4_mb_ht; i++)
1862     {
1863         for(j = 0; j < u4_mb_wd; j++)
1864         {
1865             WORD16 i2_src = ((UWORD8 *) ps_src->pv_data)[j + i * ps_src->i4_data_stride];
1866             WORD16 i2_pred = ((UWORD8 *) ps_pred->pv_data)[j + i * ps_pred->i4_data_stride];
1867             WORD16 i2_res = ((WORD16 *) ps_res->pv_data)[j + i * ps_res->i4_data_stride];
1868 
1869             u4_sad += ABS(i2_src - i2_pred - i2_res);
1870         }
1871     }
1872     return u4_sad;
1873 }
1874 
1875 /**
1876 *******************************************************************************
1877 *
1878 * @brief
1879 *  Function to evaluate residual_prediction_flag
1880 *
1881 * @param[in] ps_src
1882 *  Pointer to MB src buffers
1883 *
1884 * @param[in] ps_pred
1885 *  Pointer to MB pred buffers
1886 *
1887 * @param[in] ps_res
1888 *  Pointer to MB res buffers
1889 *
1890 * @param[out] pu4_res_pred_sad
1891 *  Output variable for SAD
1892 *
1893 * @param[out] pu1_residual_prediction_flag
1894 *  Output variable for residual_prediction_flag
1895 *
1896 * @param[in] u4_winning_sad
1897 *  Winning mode's SAD
1898 *
1899 * @notes The algorithm currently uses only luma for evaluating
1900 *        residual_prediction_flag.
1901 *
1902 *******************************************************************************
1903 */
isvce_residual_pred_eval(svc_res_pred_ctxt_t * ps_res_pred_ctxt,yuv_buf_props_t * ps_src,yuv_buf_props_t * ps_pred,yuv_buf_props_t * ps_res,UWORD32 * pu4_res_pred_sad,UWORD8 * pu1_residual_prediction_flag,UWORD32 u4_winning_sad)1904 void isvce_residual_pred_eval(svc_res_pred_ctxt_t *ps_res_pred_ctxt, yuv_buf_props_t *ps_src,
1905                               yuv_buf_props_t *ps_pred, yuv_buf_props_t *ps_res,
1906                               UWORD32 *pu4_res_pred_sad, UWORD8 *pu1_residual_prediction_flag,
1907                               UWORD32 u4_winning_sad)
1908 {
1909     res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
1910     res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
1911     pu4_res_pred_sad[0] = ps_res_pred_state->pf_get_sad_with_residual_pred(
1912         &ps_src->as_component_bufs[Y], &ps_pred->as_component_bufs[Y],
1913         &ps_res->as_component_bufs[Y], MB_SIZE, MB_SIZE);
1914 
1915     pu1_residual_prediction_flag[0] = pu4_res_pred_sad[0] < u4_winning_sad;
1916 }
1917 
isvce_update_res_pred_info(isvce_process_ctxt_t * ps_proc)1918 void isvce_update_res_pred_info(isvce_process_ctxt_t *ps_proc)
1919 {
1920     if(ps_proc->s_svc_params.u1_num_spatial_layers > 1)
1921     {
1922         svc_res_pred_ctxt_t *ps_res_pred_ctxt = ps_proc->ps_res_pred_ctxt;
1923         res_pred_constants_t *ps_res_pred_constants = &ps_res_pred_ctxt->s_res_pred_constants;
1924         res_pred_state_t *ps_res_pred_state = (res_pred_state_t *) ps_res_pred_constants->pv_state;
1925         res_pred_layer_state_t *ps_layer_state =
1926             &ps_res_pred_state->ps_layer_state[ps_proc->u1_spatial_layer_id];
1927 
1928         WORD8 i1_is_intra = ps_proc->ps_mb_info->u1_is_intra;
1929 
1930         WORD8 *pi1_mb_mode =
1931             &ps_layer_state->pi1_mb_mode[ps_proc->i4_mb_x +
1932                                          (ps_proc->i4_mb_y * (ps_layer_state->i4_mb_mode_stride))];
1933 
1934         if(ps_proc->ps_mb_info->u1_base_mode_flag == 1 && i1_is_intra)
1935         {
1936             *pi1_mb_mode = SVC_IBL_MB;
1937         }
1938         else
1939         {
1940             if(i1_is_intra)
1941             {
1942                 *pi1_mb_mode = SVC_INTRA_MB;
1943             }
1944             else
1945             {
1946                 *pi1_mb_mode = SVC_INTER_MB;
1947             }
1948         }
1949     }
1950 }
1951