1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264_weighted_pred.c
25 *
26 * @brief
27 * Contains function definitions for weighted prediction functions
28 *
29 * @author
30 * ittiam
31 *
32 * @par List of Functions:
33 * - ih264_default_weighted_pred_luma
34 * - ih264_default_weighted_pred_chroma
35 * - ih264_weighted_pred_luma
36 * - ih264_weighted_pred_chroma
37 * - ih264_weighted_bipred_luma
38 * - ih264_weighted_bipred_chroma
39 *
40 * @remarks
41 *
42 *******************************************************************************
43 */
44
45 /*****************************************************************************/
46 /* File Includes */
47 /*****************************************************************************/
48
49 /* User Include Files */
50 #include "ih264_typedefs.h"
51 #include "ih264_macros.h"
52 #include "ih264_weighted_pred.h"
53 #include "ih264_platform_macros.h"
54
55
56 /*****************************************************************************/
57 /* Function definitions */
58 /*****************************************************************************/
59
60 /**
61 *******************************************************************************
62 *
63 * @brief default weighted prediction luma.
64 *
65 * @par Description
66 * This function performs the default weighted prediction as described in
67 * sec 8.4.2.3.1 titled "Default weighted sample prediction process" for luma.
68 * The function gets two ht x wd blocks, calculates their rounded-average and
69 * stores it in the destination block. (ht,wd) can be (4,4), (8,4), (4,8),
70 * (8,8), (16,8), (8,16) or (16,16)
71 *
72 * @param[in] pu1_src1
73 * Pointer to source 1
74 *
75 * @param[in] pu1_src2
76 * Pointer to source 2
77 *
78 * @param[in] pu1_dst
79 * Pointer to destination
80 *
81 * @param[in] src_strd1
82 * stride for source 1
83 *
84 * @param[in] src_strd2
85 * stride for source 2
86 *
87 * @param[in] dst_strd
88 * stride for destination
89 *
90 * @param[in] ht
91 * height of the block
92 *
93 * @param[in] wd
94 * width of the block
95 *
96 * @returns none
97 *
98 * @remarks none
99 *
100 *******************************************************************************
101 */
ih264_default_weighted_pred_luma(UWORD8 * pu1_src1,UWORD8 * pu1_src2,UWORD8 * pu1_dst,WORD32 src_strd1,WORD32 src_strd2,WORD32 dst_strd,WORD32 ht,WORD32 wd)102 void ih264_default_weighted_pred_luma(UWORD8 *pu1_src1,
103 UWORD8 *pu1_src2,
104 UWORD8 *pu1_dst,
105 WORD32 src_strd1,
106 WORD32 src_strd2,
107 WORD32 dst_strd,
108 WORD32 ht,
109 WORD32 wd)
110 {
111 WORD32 i, j;
112
113 src_strd1 -= wd;
114 src_strd2 -= wd;
115 dst_strd -= wd;
116
117 for(i = 0; i < ht; i++)
118 {
119 for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
120 *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;
121
122 pu1_src1 += src_strd1;
123 pu1_src2 += src_strd2;
124 pu1_dst += dst_strd;
125 }
126 }
127
128 /**
129 *******************************************************************************
130 *
131 * @brief default weighted prediction chroma.
132 *
133 * @par Description
134 * This function performs the default weighted prediction as described in
135 * sec 8.4.2.3.1 titled "Default weighted sample prediction process" for chroma.
136 * The function gets two ht x wd blocks, calculates their rounded-average and
137 * stores it in the destination block. (ht,wd) can be (2,2), (4,2), (2,4),
138 * (4,4), (8,4), (4,8) or (8,8).
139 *
140 * @param[in] pu1_src1
141 * Pointer to source 1
142 *
143 * @param[in] pu1_src2
144 * Pointer to source 2
145 *
146 * @param[in] pu1_dst
147 * Pointer to destination
148 *
149 * @param[in] src_strd1
150 * stride for source 1
151 *
152 * @param[in] src_strd2
153 * stride for source 2
154 *
155 * @param[in] dst_strd
156 * stride for destination
157 *
158 * @param[in] ht
159 * height of the block
160 *
161 * @param[in] wd
162 * width of the block
163 *
164 * @returns none
165 *
166 * @remarks none
167 *
168 *******************************************************************************
169 */
ih264_default_weighted_pred_chroma(UWORD8 * pu1_src1,UWORD8 * pu1_src2,UWORD8 * pu1_dst,WORD32 src_strd1,WORD32 src_strd2,WORD32 dst_strd,WORD32 ht,WORD32 wd)170 void ih264_default_weighted_pred_chroma(UWORD8 *pu1_src1,
171 UWORD8 *pu1_src2,
172 UWORD8 *pu1_dst,
173 WORD32 src_strd1,
174 WORD32 src_strd2,
175 WORD32 dst_strd,
176 WORD32 ht,
177 WORD32 wd)
178 {
179 WORD32 i, j;
180
181 wd = wd << 1;
182
183 src_strd1 -= wd;
184 src_strd2 -= wd;
185 dst_strd -= wd;
186
187 for(i = 0; i < ht; i++)
188 {
189 for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
190 *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;
191
192 pu1_src1 += src_strd1;
193 pu1_src2 += src_strd2;
194 pu1_dst += dst_strd;
195 }
196 }
197
198 /**
199 *******************************************************************************
200 *
201 * @brief weighted prediction luma.
202 *
203 * @par Description
204 * This function performs the weighted prediction as described in
205 * sec 8.4.2.3.2 titled "weighted sample prediction process" for luma.
206 * The function gets one ht x wd block, weights it, rounds it off, offsets it,
207 * saturates it to unsigned 8-bit and stores it in the destination block.
208 * (ht,wd) can be (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16)
209 *
210 * @param[in] pu1_src
211 * Pointer to source
212 *
213 * @param[in] pu1_dst
214 * Pointer to destination
215 *
216 * @param[in] src_strd
217 * stride for source
218 *
219 * @param[in] dst_strd
220 * stride for destination
221 *
222 * @param[in] log_wd
223 * number of bits to be rounded off
224 *
225 * @param[in] wt
226 * weight value
227 *
228 * @param[in] ofst
229 * offset value
230 *
231 * @param[in] ht
232 * height of the block
233 *
234 * @param[in] wd
235 * width of the block
236 *
237 * @returns none
238 *
239 * @remarks none
240 *
241 *******************************************************************************
242 */
ih264_weighted_pred_luma(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 log_wd,WORD32 wt,WORD32 ofst,WORD32 ht,WORD32 wd)243 void ih264_weighted_pred_luma(UWORD8 *pu1_src,
244 UWORD8 *pu1_dst,
245 WORD32 src_strd,
246 WORD32 dst_strd,
247 WORD32 log_wd,
248 WORD32 wt,
249 WORD32 ofst,
250 WORD32 ht,
251 WORD32 wd)
252 {
253 WORD32 i, j;
254
255 wt = (WORD16)(wt & 0xffff);
256 ofst = (WORD8)(ofst & 0xff);
257
258 src_strd -= wd;
259 dst_strd -= wd;
260
261 if(log_wd >= 1)
262 {
263 WORD32 i_ofst = (1 << (log_wd - 1)) + (ofst << log_wd);
264 for(i = 0; i < ht; i++)
265 {
266 for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
267 *pu1_dst = CLIP_U8((wt * (*pu1_src) + i_ofst) >> log_wd);
268
269 pu1_src += src_strd;
270 pu1_dst += dst_strd;
271 }
272 }
273 else
274 {
275 for(i = 0; i < ht; i++)
276 {
277 for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
278 *pu1_dst = CLIP_U8(wt * (*pu1_src) + ofst);
279
280 pu1_src += src_strd;
281 pu1_dst += dst_strd;
282 }
283 }
284 }
285
286 /**
287 *******************************************************************************
288 *
289 * @brief weighted prediction chroma.
290 *
291 * @par Description
292 * This function performs the weighted prediction as described in
293 * sec 8.4.2.3.2 titled "weighted sample prediction process" for chroma.
294 * The function gets one ht x wd block, weights it, rounds it off, offsets it,
295 * saturates it to unsigned 8-bit and stores it in the destination block.
296 * (ht,wd) can be (2,2), (4,2), (2,4), (4,4), (8,4), (4,8) or (8,8).
297 *
298 * @param[in] pu1_src
299 * Pointer to source
300 *
301 * @param[in] pu1_dst
302 * Pointer to destination
303 *
304 * @param[in] src_strd
305 * stride for source
306 *
307 * @param[in] dst_strd
308 * stride for destination
309 *
310 * @param[in] log_wd
311 * number of bits to be rounded off
312 *
313 * @param[in] wt
314 * weight values for u and v
315 *
316 * @param[in] ofst
317 * offset values for u and v
318 *
319 * @param[in] ht
320 * height of the block
321 *
322 * @param[in] wd
323 * width of the block
324 *
325 * @returns none
326 *
327 * @remarks none
328 *
329 *******************************************************************************
330 */
ih264_weighted_pred_chroma(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD32 log_wd,WORD32 wt,WORD32 ofst,WORD32 ht,WORD32 wd)331 void ih264_weighted_pred_chroma(UWORD8 *pu1_src,
332 UWORD8 *pu1_dst,
333 WORD32 src_strd,
334 WORD32 dst_strd,
335 WORD32 log_wd,
336 WORD32 wt,
337 WORD32 ofst,
338 WORD32 ht,
339 WORD32 wd)
340 {
341 WORD32 i, j;
342 WORD32 wt_u, wt_v;
343 WORD32 ofst_u, ofst_v;
344
345 wt_u = (WORD16)(wt & 0xffff);
346 wt_v = (WORD16)(wt >> 16);
347
348 ofst_u = (WORD8)(ofst & 0xff);
349 ofst_v = (WORD8)(ofst >> 8);
350
351 src_strd -= wd << 1;
352 dst_strd -= wd << 1;
353
354 if(log_wd >= 1)
355 {
356 ofst_u = (1 << (log_wd - 1)) + (ofst_u << log_wd);
357 ofst_v = (1 << (log_wd - 1)) + (ofst_v << log_wd);
358
359 for(i = 0; i < ht; i++)
360 {
361 for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
362 {
363 *pu1_dst = CLIP_U8((wt_u * (*pu1_src) + ofst_u) >> log_wd);
364 pu1_src++;
365 pu1_dst++;
366 *pu1_dst = CLIP_U8((wt_v * (*pu1_src) + ofst_v) >> log_wd);
367 }
368 pu1_src += src_strd;
369 pu1_dst += dst_strd;
370 }
371 }
372 else
373 {
374 for(i = 0; i < ht; i++)
375 {
376 for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
377 {
378 *pu1_dst = CLIP_U8(wt_u * (*pu1_src) + ofst_u);
379 pu1_src++;
380 pu1_dst++;
381 *pu1_dst = CLIP_U8(wt_v * (*pu1_src) + ofst_v);
382 }
383 pu1_src += src_strd;
384 pu1_dst += dst_strd;
385 }
386 }
387 }
388
389 /**
390 *******************************************************************************
391 *
392 * @brief weighted bi-prediction luma.
393 *
394 * @par Description
395 * This function performs the weighted biprediction as described in
396 * sec 8.4.2.3.2 titled "weighted sample prediction process" for luma.
397 * The function gets two ht x wd blocks, weights them, adds them, rounds off
398 * the sum, offsets it, saturates it to unsigned 8-bit and stores it in the
399 * destination block. (ht,wd) can be (4,4), (8,4), (4,8), (8,8), (16,8), (8,16)
400 * or (16,16)
401 *
402 * @param[in] pu1_src1
403 * Pointer to source 1
404 *
405 * @param[in] pu1_src2
406 * Pointer to source 2
407 *
408 * @param[in] pu1_dst
409 * Pointer to destination
410 *
411 * @param[in] src_strd1
412 * stride for source 1
413 *
414 * @param[in] src_strd2
415 * stride for source 2
416 *
417 * @param[in] dst_strd
418 * stride for destination
419 *
420 * @param[in] log_wd
421 * number of bits to be rounded off
422 *
423 * @param[in] wt1
424 * weight value for source 1
425 *
426 * @param[in] wt2
427 * weight value for source 2
428 *
429 * @param[in] ofst1
430 * offset value for source 1
431 *
432 * @param[in] ofst2
433 * offset value for source 2
434 *
435 * @param[in] ht
436 * height of the block
437 *
438 * @param[in] wd
439 * width of the block
440 *
441 * @returns none
442 *
443 * @remarks none
444 *
445 *******************************************************************************
446 */
ih264_weighted_bi_pred_luma(UWORD8 * pu1_src1,UWORD8 * pu1_src2,UWORD8 * pu1_dst,WORD32 src_strd1,WORD32 src_strd2,WORD32 dst_strd,WORD32 log_wd,WORD32 wt1,WORD32 wt2,WORD32 ofst1,WORD32 ofst2,WORD32 ht,WORD32 wd)447 void ih264_weighted_bi_pred_luma(UWORD8 *pu1_src1,
448 UWORD8 *pu1_src2,
449 UWORD8 *pu1_dst,
450 WORD32 src_strd1,
451 WORD32 src_strd2,
452 WORD32 dst_strd,
453 WORD32 log_wd,
454 WORD32 wt1,
455 WORD32 wt2,
456 WORD32 ofst1,
457 WORD32 ofst2,
458 WORD32 ht,
459 WORD32 wd)
460 {
461 WORD32 i, j;
462 WORD32 shft, ofst;
463
464 ofst1 = (WORD8)(ofst1 & 0xff);
465 ofst2 = (WORD8)(ofst2 & 0xff);
466 wt1 = (WORD16)(wt1 & 0xffff);
467 wt2 = (WORD16)(wt2 & 0xffff);
468 ofst = (ofst1 + ofst2 + 1) >> 1;
469
470 shft = log_wd + 1;
471 ofst = (1 << log_wd) + (ofst << shft);
472
473 src_strd1 -= wd;
474 src_strd2 -= wd;
475 dst_strd -= wd;
476
477 for(i = 0; i < ht; i++)
478 {
479 for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
480 *pu1_dst = CLIP_U8((wt1 * (*pu1_src1) + wt2 * (*pu1_src2) + ofst) >> shft);
481
482 pu1_src1 += src_strd1;
483 pu1_src2 += src_strd2;
484 pu1_dst += dst_strd;
485 }
486 }
487
488 /**
489 *******************************************************************************
490 *
491 * @brief weighted bi-prediction chroma.
492 *
493 * @par Description
494 * This function performs the weighted biprediction as described in
495 * sec 8.4.2.3.2 titled "weighted sample prediction process" for chroma.
496 * The function gets two ht x wd blocks, weights them, adds them, rounds off
497 * the sum, offsets it, saturates it to unsigned 8-bit and stores it in the
498 * destination block. (ht,wd) can be (2,2), (4,2), (2,4), (4,4), (8,4), (4,8)
499 * or (8,8)
500 *
501 * @param[in] pu1_src1
502 * Pointer to source 1
503 *
504 * @param[in] pu1_src2
505 * Pointer to source 2
506 *
507 * @param[in] pu1_dst
508 * Pointer to destination
509 *
510 * @param[in] src_strd1
511 * stride for source 1
512 *
513 * @param[in] src_strd2
514 * stride for source 2
515 *
516 * @param[in] dst_strd
517 * stride for destination
518 *
519 * @param[in] log_wd
520 * number of bits to be rounded off
521 *
522 * @param[in] wt1
523 * weight value for source 1
524 *
525 * @param[in] wt2
526 * weight value for source 2
527 *
528 * @param[in] ofst1
529 * offset value for source 1
530 *
531 * @param[in] ofst2
532 * offset value for source 2
533 *
534 * @param[in] ht
535 * height of the block
536 *
537 * @param[in] wd
538 * width of the block
539 *
540 * @returns none
541 *
542 * @remarks none
543 *
544 *******************************************************************************
545 */
ih264_weighted_bi_pred_chroma(UWORD8 * pu1_src1,UWORD8 * pu1_src2,UWORD8 * pu1_dst,WORD32 src_strd1,WORD32 src_strd2,WORD32 dst_strd,WORD32 log_wd,WORD32 wt1,WORD32 wt2,WORD32 ofst1,WORD32 ofst2,WORD32 ht,WORD32 wd)546 void ih264_weighted_bi_pred_chroma(UWORD8 *pu1_src1,
547 UWORD8 *pu1_src2,
548 UWORD8 *pu1_dst,
549 WORD32 src_strd1,
550 WORD32 src_strd2,
551 WORD32 dst_strd,
552 WORD32 log_wd,
553 WORD32 wt1,
554 WORD32 wt2,
555 WORD32 ofst1,
556 WORD32 ofst2,
557 WORD32 ht,
558 WORD32 wd)
559 {
560 WORD32 i, j;
561 WORD32 wt1_u, wt1_v, wt2_u, wt2_v;
562 WORD32 ofst1_u, ofst1_v, ofst2_u, ofst2_v;
563 WORD32 ofst_u, ofst_v;
564 WORD32 shft;
565
566 ofst1_u = (WORD8)(ofst1 & 0xff);
567 ofst1_v = (WORD8)(ofst1 >> 8);
568 ofst2_u = (WORD8)(ofst2 & 0xff);
569 ofst2_v = (WORD8)(ofst2 >> 8);
570 wt1_u = (WORD16)(wt1 & 0xffff);
571 wt1_v = (WORD16)(wt1 >> 16);
572 wt2_u = (WORD16)(wt2 & 0xffff);
573 wt2_v = (WORD16)(wt2 >> 16);
574 ofst_u = (ofst1_u + ofst2_u + 1) >> 1;
575 ofst_v = (ofst1_v + ofst2_v + 1) >> 1;
576
577 src_strd1 -= wd << 1;
578 src_strd2 -= wd << 1;
579 dst_strd -= wd << 1;
580
581 shft = log_wd + 1;
582 ofst_u = (1 << log_wd) + (ofst_u << shft);
583 ofst_v = (1 << log_wd) + (ofst_v << shft);
584
585 for(i = 0; i < ht; i++)
586 {
587 for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
588 {
589 *pu1_dst = CLIP_U8((wt1_u * (*pu1_src1) + wt2_u * (*pu1_src2) + ofst_u) >> shft);
590 pu1_src1++;
591 pu1_src2++;
592 pu1_dst++;
593 *pu1_dst = CLIP_U8((wt1_v * (*pu1_src1) + wt2_v * (*pu1_src2) + ofst_v) >> shft);
594 }
595 pu1_src1 += src_strd1;
596 pu1_src2 += src_strd2;
597 pu1_dst += dst_strd;
598 }
599 }
600