1 /*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_fei_hevc_g9_skl.cpp
24 //! \brief HEVC FEI dual-pipe encoder for GEN9 SKL.
25 //!
26
27 #include "codechal_fei_hevc_g9_skl.h"
28 #include "igcodeckrn_g9.h"
29 #include "codeckrnheader.h"
30
31 #define GPUMMU_WA_PADDING (64 * 1024)
32
33 //! HEVC encoder kernel header structure for G9 SKL
34 struct CODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL
35 {
36 int nKernelCount; //!< Total number of kernels
37
38 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_2xDownSampling_Kernel; //!< 2x down sampling kernel
39 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_32x32_PU_ModeDecision_Kernel; //!< Intra 32x32 PU mode decision kernel
40 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_16x16_PU_SADComputation_Kernel; //!< Intra 16x16 PU SAD computation kernel
41 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_16x16_PU_ModeDecision_Kernel; //!< Intra 16x16 PU mode decision kernel
42 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_8x8_PU_Kernel; //!< Intra 8x8 PU mode decision kernel
43 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_8x8_PU_FMode_Kernel; //!< Intra 8x8 PU final mode decision kernel
44 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_PB_32x32_PU_IntraCheck; //!< P/B 32x32 PU intra mode check kernel
45 CODECHAL_KERNEL_HEADER HEVC_FEI_LCUEnc_PB_MB; //!< P/B MbEnc Kernel
46 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_DS4HME; //!< 4x Scaling kernel
47 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_P_HME; //!< P frame HME kernel
48 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_B_HME; //!< B frame HME kernel
49 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_COARSE; //!< Intra coarse kernel
50 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_PB_Pak; //!< P/B frame PAK kernel
51 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_BRC_Blockcopy; //!< BRC blockcopy kerenel
52 CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_DS_Combined; //!< Down scale and format conversion kernel for 10 bit for KBL
53 CODECHAL_KERNEL_HEADER HEVC_FEI_LCUEnc_P_MB; //!< P frame MbEnc kernel
54 };
55
56 using PCODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL = struct CODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL*;
57
58 //! HEVC encoder FEI intra 8x8 PU final mode decision kernel curbe for GEN9
59 struct CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9
60 {
61 union {
62 struct {
63 uint32_t FrameWidth : MOS_BITFIELD_RANGE(0, 15);
64 uint32_t FrameHeight : MOS_BITFIELD_RANGE(16, 31);
65 };
66 uint32_t Value;
67 } DW0;
68
69 union {
70 struct {
71 uint32_t SliceType : MOS_BITFIELD_RANGE(0, 1);
72 uint32_t PuType : MOS_BITFIELD_RANGE(2, 3);
73 uint32_t PakReordingFlag : MOS_BITFIELD_BIT(4);
74 uint32_t ReservedMBZ : MOS_BITFIELD_BIT(5);
75 uint32_t LCUType : MOS_BITFIELD_BIT(6);
76 uint32_t ScreenContentFlag : MOS_BITFIELD_BIT(7);
77 uint32_t IntraRefreshEn : MOS_BITFIELD_RANGE(8, 9);
78 uint32_t EnableRollingIntra : MOS_BITFIELD_BIT(10);
79 uint32_t HalfUpdateMixedLCU : MOS_BITFIELD_BIT(11);
80 uint32_t Reserved_12_23 : MOS_BITFIELD_RANGE(12, 23);
81 uint32_t EnableIntraEarlyExit : MOS_BITFIELD_BIT(24);
82 uint32_t BRCEnable : MOS_BITFIELD_BIT(25);
83 uint32_t LCUBRCEnable : MOS_BITFIELD_BIT(26);
84 uint32_t ROIEnable : MOS_BITFIELD_BIT(27);
85 uint32_t FASTSurveillanceFlag : MOS_BITFIELD_BIT(28);
86 uint32_t EnableFlexibleParam : MOS_BITFIELD_BIT(29);
87 uint32_t EnableQualityImprovement : MOS_BITFIELD_BIT(30);
88 uint32_t EnableDebugDump : MOS_BITFIELD_BIT(31);
89 };
90 uint32_t Value;
91 } DW1;
92
93 union {
94 struct {
95 uint32_t LambdaForLuma;
96 };
97 uint32_t Value;
98 } DW2;
99
100 union {
101 // For inter frame or enable statictics data dump
102 struct {
103 uint32_t LambdaForDistCalculation;
104 };
105 uint32_t Value;
106 } DW3;
107
108 union {
109 struct {
110 uint32_t ModeCostFor8x8PU_TU8;
111 };
112 uint32_t Value;
113 } DW4;
114
115 union {
116 struct {
117 uint32_t ModeCostFor8x8PU_TU4;
118 };
119 uint32_t Value;
120 } DW5;
121
122 union {
123 struct {
124 uint32_t SATD16x16PuThreshold : MOS_BITFIELD_RANGE(0, 15);
125 uint32_t BiasFactorToward8x8 : MOS_BITFIELD_RANGE(16, 31);
126 };
127 uint32_t Value;
128 } DW6;
129
130 union {
131 struct {
132 uint32_t Qp : MOS_BITFIELD_RANGE(0, 15);
133 uint32_t QpForInter : MOS_BITFIELD_RANGE(16, 31);
134 };
135 uint32_t Value;
136 } DW7;
137
138 union {
139 struct {
140 uint32_t SimplifiedFlagForInter : MOS_BITFIELD_BIT(0);
141 uint32_t EnableStatsDataDump : MOS_BITFIELD_BIT(1);
142 uint32_t Reserved_2_7 : MOS_BITFIELD_RANGE(2, 7);
143 uint32_t KBLControlFlag : MOS_BITFIELD_BIT(8);
144 uint32_t Reserved_9_31 : MOS_BITFIELD_RANGE(9, 31);
145 };
146 uint32_t Value;
147 } DW8;
148
149 union {
150 struct {
151 uint32_t IntraRefreshMBNum : MOS_BITFIELD_RANGE(0, 15);
152 uint32_t IntraRefreshUnitInMB : MOS_BITFIELD_RANGE(16, 23);
153 uint32_t IntraRefreshQPDelta : MOS_BITFIELD_RANGE(24, 31);
154 };
155 uint32_t Value;
156 } DW9;
157
158 union {
159 struct {
160 uint32_t Reserved;
161 };
162 uint32_t Value;
163 } DW10;
164
165 union {
166 struct {
167 uint32_t Reserved;
168 };
169 uint32_t Value;
170 } DW11;
171
172 union {
173 struct {
174 uint32_t Reserved;
175 };
176 uint32_t Value;
177 } DW12;
178
179 union {
180 struct {
181 uint32_t Reserved;
182 };
183 uint32_t Value;
184 } DW13;
185
186 union {
187 struct {
188 uint32_t Reserved;
189 };
190 uint32_t Value;
191 } DW14;
192
193 union {
194 struct {
195 uint32_t Reserved;
196 };
197 uint32_t Value;
198 } DW15;
199
200 union {
201 struct {
202 uint32_t BTI_PAK_Object;
203 };
204 uint32_t Value;
205 } DW16;
206
207 union {
208 struct {
209 uint32_t BTI_VME_8x8_Mode;
210 };
211 uint32_t Value;
212 } DW17;
213
214 union {
215 struct {
216 uint32_t BTI_Intra_Mode;
217 };
218 uint32_t Value;
219 } DW18;
220
221 union {
222 struct {
223 uint32_t BTI_PAK_Command;
224 };
225 uint32_t Value;
226 } DW19;
227
228 union {
229 struct {
230 uint32_t BTI_Slice_Map;
231 };
232 uint32_t Value;
233 } DW20;
234
235 union {
236 struct {
237 uint32_t BTI_IntraDist;
238 };
239 uint32_t Value;
240 } DW21;
241
242 union {
243 struct {
244 uint32_t BTI_BRC_Input;
245 };
246 uint32_t Value;
247 } DW22;
248
249 union {
250 struct {
251 uint32_t BTI_Simplest_Intra;
252 };
253 uint32_t Value;
254 } DW23;
255
256 union {
257 struct {
258 uint32_t BTI_LCU_Qp_Surface;
259 };
260 uint32_t Value;
261 } DW24;
262
263 union {
264 struct {
265 uint32_t BTI_BRC_Data;
266 };
267 uint32_t Value;
268 } DW25;
269
270 union {
271 //Output (for inter and statictics data dump only)
272 struct {
273 uint32_t BTI_Haar_Dist16x16;
274 };
275 uint32_t Value;
276 } DW26;
277
278 union {
279 // This surface should take the statistics surface from Hevc_LCUEnc_I_32x32_PU_ModeDecision as input
280 struct {
281 uint32_t BTI_Stats_Data;
282 };
283 uint32_t Value;
284 } DW27;
285
286 union {
287 // Frame level Statistics data surface
288 struct {
289 uint32_t BTI_Frame_Stats_Data;
290 };
291 uint32_t Value;
292 } DW28;
293
294 union {
295 // Frame level CTB Distortion data surface
296 struct {
297 uint32_t BTI_CTB_Distortion_Surface;
298 };
299 uint32_t Value;
300 } DW29;
301
302 union {
303 struct {
304 uint32_t BTI_Debug;
305 };
306 uint32_t Value;
307 } DW30;
308 };
309
310 using PCODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9 = struct CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9*;
311 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9)) == 31);
312
313 //! HEVC encoder FEI B 32x32 PU intra check kernel curbe for GEN9
314 struct CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9
315 {
316 union {
317 struct {
318 uint32_t FrameWidth : MOS_BITFIELD_RANGE(0, 15);
319 uint32_t FrameHeight : MOS_BITFIELD_RANGE(16, 31);
320 };
321 uint32_t Value;
322 } DW0;
323
324 union {
325 struct {
326 uint32_t SliceType : MOS_BITFIELD_RANGE(0, 1);
327 uint32_t Reserved : MOS_BITFIELD_RANGE(2, 7);
328 uint32_t Log2MinTUSize : MOS_BITFIELD_RANGE(8, 15);
329 uint32_t Flags : MOS_BITFIELD_RANGE(16, 23);
330 uint32_t EnableIntraEarlyExit : MOS_BITFIELD_BIT(24);
331 uint32_t HMEEnable : MOS_BITFIELD_BIT(25);
332 uint32_t FASTSurveillanceFlag : MOS_BITFIELD_BIT(26);
333 uint32_t Res_27_30 : MOS_BITFIELD_RANGE(27, 30);
334 uint32_t EnableDebugDump : MOS_BITFIELD_BIT(31);
335 };
336 uint32_t Value;
337 } DW1;
338
339 union {
340 struct {
341 uint32_t QpValue : MOS_BITFIELD_RANGE(0, 15);
342 uint32_t QpMultiplier : MOS_BITFIELD_RANGE(16, 31);
343 };
344 uint32_t Value;
345 } DW2;
346
347 union {
348 struct {
349 uint32_t Reserved;
350 };
351 uint32_t Value;
352 } DW3;
353
354 union {
355 struct {
356 uint32_t Reserved;
357 };
358 uint32_t Value;
359 } DW4;
360
361 union {
362 struct {
363 uint32_t Reserved;
364 };
365 uint32_t Value;
366 } DW5;
367
368 union {
369 struct {
370 uint32_t Reserved;
371 };
372 uint32_t Value;
373 } DW6;
374
375 union {
376 struct {
377 uint32_t Reserved;
378 };
379 uint32_t Value;
380 } DW7;
381
382 union {
383 struct {
384 uint32_t BTI_Per32x32PuIntraCheck;
385 };
386 uint32_t Value;
387 } DW8;
388
389 union {
390 struct {
391 uint32_t BTI_Src_Y;
392 };
393 uint32_t Value;
394 } DW9;
395
396 union {
397 struct {
398 uint32_t BTI_Src_Y2X;
399 };
400 uint32_t Value;
401 } DW10;
402
403 union {
404 struct {
405 uint32_t BTI_Slice_Map;
406 };
407 uint32_t Value;
408 } DW11;
409
410 union {
411 struct {
412 uint32_t BTI_VME_Y2X;
413 };
414 uint32_t Value;
415 } DW12;
416
417 union {
418 struct {
419 uint32_t BTI_Simplest_Intra; // output only
420 };
421 uint32_t Value;
422 } DW13;
423
424 union {
425 struct {
426 uint32_t BTI_HME_MVPred;
427 };
428 uint32_t Value;
429 } DW14;
430
431 union {
432 struct {
433 uint32_t BTI_HME_Dist;
434 };
435 uint32_t Value;
436 } DW15;
437
438 union {
439 struct {
440 uint32_t BTI_LCU_Skip;
441 };
442 uint32_t Value;
443 } DW16;
444
445 union {
446 struct {
447 uint32_t BTI_Debug;
448 };
449 uint32_t Value;
450 } DW17;
451 };
452
453 using PCODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 = struct CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9;
454 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9)) == 18);
455
456 //! HEVC encoder FEI B Pak kernel curbe for GEN9
457 struct CODECHAL_FEI_HEVC_B_PAK_CURBE_G9
458 {
459 union
460 {
461 struct
462 {
463 uint32_t FrameWidth : MOS_BITFIELD_RANGE(0, 15);
464 uint32_t FrameHeight : MOS_BITFIELD_RANGE(16, 31);
465 };
466 struct
467 {
468 uint32_t Value;
469 };
470 } DW0;
471
472 union
473 {
474 struct
475 {
476 uint32_t Qp : MOS_BITFIELD_RANGE(0, 7);
477 uint32_t Res_8_15 : MOS_BITFIELD_RANGE(8, 15);
478 uint32_t MaxVmvR : MOS_BITFIELD_RANGE(16, 31);
479 };
480 struct
481 {
482 uint32_t Value;
483 };
484 } DW1;
485
486 union
487 {
488 struct
489 {
490 uint32_t SliceType : MOS_BITFIELD_RANGE(0, 1);
491 uint32_t EnableWA : MOS_BITFIELD_BIT( 2);
492 uint32_t Res_3_7 : MOS_BITFIELD_RANGE(3, 7);
493 uint32_t SimplestIntraEnable : MOS_BITFIELD_BIT(8);
494 uint32_t BrcEnable : MOS_BITFIELD_BIT(9);
495 uint32_t LcuBrcEnable : MOS_BITFIELD_BIT(10);
496 uint32_t ROIEnable : MOS_BITFIELD_BIT(11);
497 uint32_t FASTSurveillanceFlag : MOS_BITFIELD_BIT(12);
498 uint32_t EnableRollingIntra : MOS_BITFIELD_BIT(13);
499 uint32_t Res_14 : MOS_BITFIELD_BIT(14);
500 uint32_t EnableQualityImprovement : MOS_BITFIELD_BIT(15);
501 uint32_t KBLControlFlag : MOS_BITFIELD_BIT(16);
502 uint32_t Res_17_30 : MOS_BITFIELD_RANGE(17, 30);
503 uint32_t ScreenContent : MOS_BITFIELD_BIT(31);
504 };
505 struct
506 {
507 uint32_t Value;
508 };
509 } DW2;
510
511 union
512 {
513 struct
514 {
515 uint32_t IntraRefreshMBNum : MOS_BITFIELD_RANGE(0, 15);
516 uint32_t IntraRefreshUnitInMB : MOS_BITFIELD_RANGE(16, 23);
517 uint32_t IntraRefreshQPDelta : MOS_BITFIELD_RANGE(24, 31);
518 };
519 struct
520 {
521 uint32_t Value;
522 };
523 } DW3;
524
525 union
526 {
527 struct
528 {
529 uint32_t Reserved;
530 };
531 struct
532 {
533 uint32_t Value;
534 };
535 } DW4_15[12];
536
537 union
538 {
539 struct
540 {
541 uint32_t BTI_CU_Record;
542 };
543 struct
544 {
545 uint32_t Value;
546 };
547 } DW16;
548
549 union
550 {
551 struct
552 {
553 uint32_t BTI_PAK_Obj;
554 };
555 struct
556 {
557 uint32_t Value;
558 };
559 } DW17;
560
561 union
562 {
563 struct
564 {
565 uint32_t BTI_Slice_Map;
566 };
567 struct
568 {
569 uint32_t Value;
570 };
571 } DW18;
572
573 union
574 {
575 struct
576 {
577 uint32_t BTI_Brc_Input;
578 };
579 struct
580 {
581 uint32_t Value;
582 };
583 } DW19;
584
585 union
586 {
587 struct
588 {
589 uint32_t BTI_LCU_Qp;
590 };
591 struct
592 {
593 uint32_t Value;
594 };
595 } DW20;
596
597 union
598 {
599 struct
600 {
601 uint32_t BTI_Brc_Data;
602 };
603 struct
604 {
605 uint32_t Value;
606 };
607 } DW21;
608
609 union
610 {
611 struct
612 {
613 uint32_t BTI_MB_Data;
614 };
615 struct
616 {
617 uint32_t Value;
618 };
619 } DW22;
620
621 union
622 {
623 struct
624 {
625 uint32_t BTI_MVP_Surface;
626 };
627 struct
628 {
629 uint32_t Value;
630 };
631 } DW23;
632
633 union
634 {
635 struct
636 {
637 uint32_t BTI_WA_PAK_Data;
638 };
639 struct
640 {
641 uint32_t Value;
642 };
643 } DW24;
644
645 union
646 {
647 struct
648 {
649 uint32_t BTI_WA_PAK_Obj;
650 };
651 struct
652 {
653 uint32_t Value;
654 };
655 } DW25;
656
657 union
658 {
659 struct
660 {
661 uint32_t BTI_Debug;
662 };
663 struct
664 {
665 uint32_t Value;
666 };
667 } DW26;
668
669 };
670
671 using PCODECHAL_FEI_HEVC_B_PAK_CURBE_G9 = struct CODECHAL_FEI_HEVC_B_PAK_CURBE_G9*;
672 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_B_PAK_CURBE_G9)) == 27);
673
674 //! HEVC encoder B MBEnc kernel curbe for GEN9
675 struct CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9
676 {
677 // DW0
678 union
679 {
680 struct
681 {
682 uint32_t SkipModeEn : MOS_BITFIELD_BIT(0);
683 uint32_t AdaptiveEn : MOS_BITFIELD_BIT(1);
684 uint32_t BiMixDis : MOS_BITFIELD_BIT(2);
685 uint32_t : MOS_BITFIELD_RANGE(3, 4);
686 uint32_t EarlyImeSuccessEn : MOS_BITFIELD_BIT(5);
687 uint32_t : MOS_BITFIELD_BIT(6);
688 uint32_t T8x8FlagForInterEn : MOS_BITFIELD_BIT(7);
689 uint32_t : MOS_BITFIELD_RANGE(8, 23);
690 uint32_t EarlyImeStop : MOS_BITFIELD_RANGE(24, 31);
691 };
692 struct
693 {
694 uint32_t Value;
695 };
696 } DW0;
697
698 // DW1
699 union
700 {
701 struct
702 {
703 uint32_t MaxNumMVs : MOS_BITFIELD_RANGE(0, 5);
704 uint32_t : MOS_BITFIELD_RANGE(6, 15);
705 uint32_t BiWeight : MOS_BITFIELD_RANGE(16, 21);
706 uint32_t : MOS_BITFIELD_RANGE(22, 27);
707 uint32_t UniMixDisable : MOS_BITFIELD_BIT(28);
708 uint32_t : MOS_BITFIELD_RANGE(29, 31);
709 };
710 struct
711 {
712 uint32_t Value;
713 };
714 } DW1;
715
716 // DW2
717 union
718 {
719 struct
720 {
721 uint32_t LenSP : MOS_BITFIELD_RANGE(0, 7);
722 uint32_t MaxNumSU : MOS_BITFIELD_RANGE(8, 15);
723 uint32_t PicWidth : MOS_BITFIELD_RANGE(16, 31);
724 };
725 struct
726 {
727 uint32_t Value;
728 };
729 } DW2;
730
731 // DW3
732 union
733 {
734 struct
735 {
736 uint32_t SrcSize : MOS_BITFIELD_RANGE(0, 1);
737 uint32_t : MOS_BITFIELD_RANGE(2, 3);
738 uint32_t MbTypeRemap : MOS_BITFIELD_RANGE(4, 5);
739 uint32_t SrcAccess : MOS_BITFIELD_BIT(6);
740 uint32_t RefAccess : MOS_BITFIELD_BIT(7);
741 uint32_t SearchCtrl : MOS_BITFIELD_RANGE(8, 10);
742 uint32_t DualSearchPathOption : MOS_BITFIELD_BIT(11);
743 uint32_t SubPelMode : MOS_BITFIELD_RANGE(12, 13);
744 uint32_t SkipType : MOS_BITFIELD_BIT(14);
745 uint32_t DisableFieldCacheAlloc : MOS_BITFIELD_BIT(15);
746 uint32_t InterChromaMode : MOS_BITFIELD_BIT(16);
747 uint32_t FTEnable : MOS_BITFIELD_BIT(17);
748 uint32_t BMEDisableFBR : MOS_BITFIELD_BIT(18);
749 uint32_t BlockBasedSkipEnable : MOS_BITFIELD_BIT(19);
750 uint32_t InterSAD : MOS_BITFIELD_RANGE(20, 21);
751 uint32_t IntraSAD : MOS_BITFIELD_RANGE(22, 23);
752 uint32_t SubMbPartMask : MOS_BITFIELD_RANGE(24, 30);
753 uint32_t : MOS_BITFIELD_BIT(31);
754 };
755 struct
756 {
757 uint32_t Value;
758 };
759 } DW3;
760
761 union
762 {
763 struct
764 {
765 uint32_t PicHeightMinus1 : MOS_BITFIELD_RANGE(0, 15);
766 uint32_t Res_16_22 : MOS_BITFIELD_RANGE(16,22);
767 uint32_t EnableQualityImprovement : MOS_BITFIELD_BIT(23);
768 uint32_t EnableDebug : MOS_BITFIELD_BIT(24);
769 uint32_t EnableFlexibleParam : MOS_BITFIELD_BIT(25);
770 uint32_t EnableStatsDataDump : MOS_BITFIELD_BIT(26);
771 uint32_t Res_27 : MOS_BITFIELD_BIT(27);
772 uint32_t HMEEnable : MOS_BITFIELD_BIT(28);
773 uint32_t SliceType : MOS_BITFIELD_RANGE(29, 30);
774 uint32_t UseActualRefQPValue : MOS_BITFIELD_BIT(31);
775 };
776 struct
777 {
778 uint32_t Value;
779 };
780 } DW4;
781
782 // DW5
783 union
784 {
785 struct
786 {
787 uint32_t Res_0_15 : MOS_BITFIELD_RANGE(0, 15);
788 uint32_t RefWidth : MOS_BITFIELD_RANGE(16, 23);
789 uint32_t RefHeight : MOS_BITFIELD_RANGE(24, 31);
790 };
791 struct
792 {
793 uint32_t Value;
794 };
795 } DW5;
796
797 union
798 {
799 struct
800 {
801 uint32_t FrameWidth : MOS_BITFIELD_RANGE(0, 15);
802 uint32_t FrameHeight : MOS_BITFIELD_RANGE(16, 31);
803 };
804 struct
805 {
806 uint32_t Value;
807 };
808 } DW6;
809
810 // DW7
811 union
812 {
813 struct
814 {
815 uint32_t IntraPartMask : MOS_BITFIELD_RANGE(0, 4);
816 uint32_t NonSkipZMvAdded : MOS_BITFIELD_BIT(5);
817 uint32_t NonSkipModeAdded : MOS_BITFIELD_BIT(6);
818 uint32_t LumaIntraSrcCornerSwap : MOS_BITFIELD_BIT(7);
819 uint32_t : MOS_BITFIELD_RANGE(8, 15);
820 uint32_t MVCostScaleFactor : MOS_BITFIELD_RANGE(16, 17);
821 uint32_t BilinearEnable : MOS_BITFIELD_BIT(18);
822 uint32_t Res_19 : MOS_BITFIELD_BIT(19);
823 uint32_t WeightedSADHAAR : MOS_BITFIELD_BIT(20);
824 uint32_t AConlyHAAR : MOS_BITFIELD_BIT(21);
825 uint32_t RefIDCostMode : MOS_BITFIELD_BIT(22);
826 uint32_t : MOS_BITFIELD_BIT(23);
827 uint32_t SkipCenterMask : MOS_BITFIELD_RANGE(24, 31);
828 };
829 struct
830 {
831 uint32_t Value;
832 };
833 } DW7;
834
835 // DW8
836 union
837 {
838 struct
839 {
840 uint32_t Mode0Cost : MOS_BITFIELD_RANGE(0, 7);
841 uint32_t Mode1Cost : MOS_BITFIELD_RANGE(8, 15);
842 uint32_t Mode2Cost : MOS_BITFIELD_RANGE(16, 23);
843 uint32_t Mode3Cost : MOS_BITFIELD_RANGE(24, 31);
844 };
845 struct
846 {
847 uint32_t Value;
848 };
849 } DW8;
850
851 // DW9
852 union
853 {
854 struct
855 {
856 uint32_t Mode4Cost : MOS_BITFIELD_RANGE(0, 7);
857 uint32_t Mode5Cost : MOS_BITFIELD_RANGE(8, 15);
858 uint32_t Mode6Cost : MOS_BITFIELD_RANGE(16, 23);
859 uint32_t Mode7Cost : MOS_BITFIELD_RANGE(24, 31);
860 };
861 struct
862 {
863 uint32_t Value;
864 };
865 } DW9;
866
867 // DW10
868 union
869 {
870 struct
871 {
872 uint32_t Mode8Cost : MOS_BITFIELD_RANGE(0, 7);
873 uint32_t Mode9Cost : MOS_BITFIELD_RANGE(8, 15);
874 uint32_t RefIDCost : MOS_BITFIELD_RANGE(16, 23);
875 uint32_t ChromaIntraModeCost : MOS_BITFIELD_RANGE(24, 31);
876 };
877 struct
878 {
879 uint32_t Value;
880 };
881 } DW10;
882
883 // DW11
884 union
885 {
886 struct
887 {
888 uint32_t MV0Cost : MOS_BITFIELD_RANGE(0, 7);
889 uint32_t MV1Cost : MOS_BITFIELD_RANGE(8, 15);
890 uint32_t MV2Cost : MOS_BITFIELD_RANGE(16, 23);
891 uint32_t MV3Cost : MOS_BITFIELD_RANGE(24, 31);
892 };
893 struct
894 {
895 uint32_t Value;
896 };
897 } DW11;
898
899 // DW12
900 union
901 {
902 struct
903 {
904 uint32_t MV4Cost : MOS_BITFIELD_RANGE(0, 7);
905 uint32_t MV5Cost : MOS_BITFIELD_RANGE(8, 15);
906 uint32_t MV6Cost : MOS_BITFIELD_RANGE(16, 23);
907 uint32_t MV7Cost : MOS_BITFIELD_RANGE(24, 31);
908 };
909 struct
910 {
911 uint32_t Value;
912 };
913 } DW12;
914
915 // DW13
916 union
917 {
918 struct
919 {
920 uint32_t QpPrimeY : MOS_BITFIELD_RANGE(0, 7);
921 uint32_t QpPrimeCb : MOS_BITFIELD_RANGE(8, 15);
922 uint32_t QpPrimeCr : MOS_BITFIELD_RANGE(16, 23);
923 uint32_t TargetSizeInWord : MOS_BITFIELD_RANGE(24, 31);
924 };
925 struct
926 {
927 uint32_t Value;
928 };
929 } DW13;
930
931 // DW14
932 union
933 {
934 struct
935 {
936 uint32_t SICFwdTransCoeffThreshold_0 : MOS_BITFIELD_RANGE(0, 15);
937 uint32_t SICFwdTransCoeffThreshold_1 : MOS_BITFIELD_RANGE(16, 23);
938 uint32_t SICFwdTransCoeffThreshold_2 : MOS_BITFIELD_RANGE(24, 31);
939 };
940 struct
941 {
942 uint32_t Value;
943 };
944 } DW14;
945
946 // DW15
947 union
948 {
949 struct
950 {
951 uint32_t SICFwdTransCoeffThreshold_3 : MOS_BITFIELD_RANGE(0, 7);
952 uint32_t SICFwdTransCoeffThreshold_4 : MOS_BITFIELD_RANGE(8, 15);
953 uint32_t SICFwdTransCoeffThreshold_5 : MOS_BITFIELD_RANGE(16, 23);
954 uint32_t SICFwdTransCoeffThreshold_6 : MOS_BITFIELD_RANGE(24, 31); // Highest Freq
955 };
956 struct
957 {
958 uint32_t Value;
959 };
960 } DW15;
961
962 // DW16
963 union
964 {
965 struct
966 {
967 SearchPathDelta SPDelta_0;
968 SearchPathDelta SPDelta_1;
969 SearchPathDelta SPDelta_2;
970 SearchPathDelta SPDelta_3;
971 };
972 struct
973 {
974 uint32_t Value;
975 };
976 } DW16;
977
978 // DW17
979 union
980 {
981 struct
982 {
983 SearchPathDelta SPDelta_4;
984 SearchPathDelta SPDelta_5;
985 SearchPathDelta SPDelta_6;
986 SearchPathDelta SPDelta_7;
987 };
988 struct
989 {
990 uint32_t Value;
991 };
992 } DW17;
993
994 // DW18
995 union
996 {
997 struct
998 {
999 SearchPathDelta SPDelta_8;
1000 SearchPathDelta SPDelta_9;
1001 SearchPathDelta SPDelta_10;
1002 SearchPathDelta SPDelta_11;
1003 };
1004 struct
1005 {
1006 uint32_t Value;
1007 };
1008 } DW18;
1009
1010 // DW19
1011 union
1012 {
1013 struct
1014 {
1015 SearchPathDelta SPDelta_12;
1016 SearchPathDelta SPDelta_13;
1017 SearchPathDelta SPDelta_14;
1018 SearchPathDelta SPDelta_15;
1019 };
1020 struct
1021 {
1022 uint32_t Value;
1023 };
1024 } DW19;
1025
1026 // DW20
1027 union
1028 {
1029 struct
1030 {
1031 SearchPathDelta SPDelta_16;
1032 SearchPathDelta SPDelta_17;
1033 SearchPathDelta SPDelta_18;
1034 SearchPathDelta SPDelta_19;
1035 };
1036 struct
1037 {
1038 uint32_t Value;
1039 };
1040 } DW20;
1041
1042 // DW21
1043 union
1044 {
1045 struct
1046 {
1047 SearchPathDelta SPDelta_20;
1048 SearchPathDelta SPDelta_21;
1049 SearchPathDelta SPDelta_22;
1050 SearchPathDelta SPDelta_23;
1051 };
1052 struct
1053 {
1054 uint32_t Value;
1055 };
1056 } DW21;
1057
1058 // DW22
1059 union
1060 {
1061 struct
1062 {
1063 SearchPathDelta SPDelta_24;
1064 SearchPathDelta SPDelta_25;
1065 SearchPathDelta SPDelta_26;
1066 SearchPathDelta SPDelta_27;
1067 };
1068 struct
1069 {
1070 uint32_t Value;
1071 };
1072 } DW22;
1073
1074 // DW23
1075 union
1076 {
1077 struct
1078 {
1079 SearchPathDelta SPDelta_28;
1080 SearchPathDelta SPDelta_29;
1081 SearchPathDelta SPDelta_30;
1082 SearchPathDelta SPDelta_31;
1083 };
1084 struct
1085 {
1086 uint32_t Value;
1087 };
1088 } DW23;
1089
1090 // DW24
1091 union
1092 {
1093 struct
1094 {
1095 SearchPathDelta SPDelta_32;
1096 SearchPathDelta SPDelta_33;
1097 SearchPathDelta SPDelta_34;
1098 SearchPathDelta SPDelta_35;
1099 };
1100 struct
1101 {
1102 uint32_t Value;
1103 };
1104 } DW24;
1105
1106 // DW25
1107 union
1108 {
1109 struct
1110 {
1111 SearchPathDelta SPDelta_36;
1112 SearchPathDelta SPDelta_37;
1113 SearchPathDelta SPDelta_38;
1114 SearchPathDelta SPDelta_39;
1115 };
1116 struct
1117 {
1118 uint32_t Value;
1119 };
1120 } DW25;
1121
1122 // DW26
1123 union
1124 {
1125 struct
1126 {
1127 SearchPathDelta SPDelta_40;
1128 SearchPathDelta SPDelta_41;
1129 SearchPathDelta SPDelta_42;
1130 SearchPathDelta SPDelta_43;
1131 };
1132 struct
1133 {
1134 uint32_t Value;
1135 };
1136 } DW26;
1137
1138 // DW27
1139 union
1140 {
1141 struct
1142 {
1143 SearchPathDelta SPDelta_44;
1144 SearchPathDelta SPDelta_45;
1145 SearchPathDelta SPDelta_46;
1146 SearchPathDelta SPDelta_47;
1147 };
1148 struct
1149 {
1150 uint32_t Value;
1151 };
1152 } DW27;
1153
1154 // DW28
1155 union
1156 {
1157 struct
1158 {
1159 SearchPathDelta SPDelta_48;
1160 SearchPathDelta SPDelta_49;
1161 SearchPathDelta SPDelta_50;
1162 SearchPathDelta SPDelta_51;
1163 };
1164 struct
1165 {
1166 uint32_t Value;
1167 };
1168 } DW28;
1169
1170 // DW29
1171 union
1172 {
1173 struct
1174 {
1175 SearchPathDelta SPDelta_52;
1176 SearchPathDelta SPDelta_53;
1177 SearchPathDelta SPDelta_54;
1178 SearchPathDelta SPDelta_55;
1179 };
1180 struct
1181 {
1182 uint32_t Value;
1183 };
1184 } DW29;
1185
1186 // DW30
1187 union
1188 {
1189 struct
1190 {
1191 uint32_t Intra4x4ModeMask : MOS_BITFIELD_RANGE(0, 8);
1192 uint32_t : MOS_BITFIELD_RANGE(9, 15);
1193 uint32_t Intra8x8ModeMask : MOS_BITFIELD_RANGE(16, 24);
1194 uint32_t : MOS_BITFIELD_RANGE(25, 31);
1195 };
1196 struct
1197 {
1198 uint32_t Value;
1199 };
1200 } DW30;
1201
1202 // DW31
1203 union
1204 {
1205 struct
1206 {
1207 uint32_t Intra16x16ModeMask : MOS_BITFIELD_RANGE(0, 3);
1208 uint32_t IntraChromaModeMask : MOS_BITFIELD_RANGE(4, 7);
1209 uint32_t IntraComputeType : MOS_BITFIELD_RANGE(8, 9);
1210 uint32_t : MOS_BITFIELD_RANGE(10, 31);
1211 };
1212 struct
1213 {
1214 uint32_t Value;
1215 };
1216 } DW31;
1217
1218 // DW32
1219 union
1220 {
1221 struct
1222 {
1223 uint32_t SkipVal : MOS_BITFIELD_RANGE(0, 15);
1224 uint32_t MultiPredL0Disable : MOS_BITFIELD_RANGE(16, 23);
1225 uint32_t MultiPredL1Disable : MOS_BITFIELD_RANGE(24, 31);
1226 };
1227 struct
1228 {
1229 uint32_t Value;
1230 };
1231 } DW32;
1232
1233 // DW33
1234 union
1235 {
1236 struct
1237 {
1238 uint32_t Intra16x16NonDCPredPenalty : MOS_BITFIELD_RANGE(0, 7);
1239 uint32_t Intra8x8NonDCPredPenalty : MOS_BITFIELD_RANGE(8, 15);
1240 uint32_t Intra4x4NonDCPredPenalty : MOS_BITFIELD_RANGE(16, 23);
1241 uint32_t : MOS_BITFIELD_RANGE(24, 31);
1242 };
1243 struct
1244 {
1245 uint32_t Value;
1246 };
1247 } DW33;
1248
1249 union {
1250 struct {
1251 uint32_t LambdaME;
1252 };
1253 uint32_t Value;
1254 } DW34;
1255
1256 union {
1257 struct {
1258 uint32_t SimpIntraInterThreshold : MOS_BITFIELD_RANGE(0, 15);
1259 uint32_t ModeCostSp : MOS_BITFIELD_RANGE(16, 23);
1260 uint32_t IntraRefreshEn : MOS_BITFIELD_RANGE(24, 25);
1261 uint32_t FirstIntraRefresh : MOS_BITFIELD_BIT(26);
1262 uint32_t EnableRollingIntra : MOS_BITFIELD_BIT(27);
1263 uint32_t HalfUpdateMixedLCU : MOS_BITFIELD_BIT(28);
1264 uint32_t Res_29_31 : MOS_BITFIELD_RANGE(29, 31);
1265 };
1266 uint32_t Value;
1267 } DW35;
1268
1269 union {
1270 struct {
1271 uint32_t NumRefIdxL0MinusOne : MOS_BITFIELD_RANGE(0, 7);
1272 uint32_t HMECombinedExtraSUs : MOS_BITFIELD_RANGE(8, 15);
1273 uint32_t NumRefIdxL1MinusOne : MOS_BITFIELD_RANGE(16, 23);
1274 uint32_t PowerSaving : MOS_BITFIELD_BIT(24);
1275 uint32_t BRCEnable : MOS_BITFIELD_BIT(25);
1276 uint32_t LCUBRCEnable : MOS_BITFIELD_BIT(26);
1277 uint32_t ROIEnable : MOS_BITFIELD_BIT(27);
1278 uint32_t FASTSurveillanceFlag : MOS_BITFIELD_BIT(28);
1279 uint32_t CheckAllFractionalEnable : MOS_BITFIELD_BIT(29);
1280 uint32_t HMECombinedOverlap : MOS_BITFIELD_RANGE(30, 31);
1281 };
1282 uint32_t Value;
1283 } DW36;
1284
1285 union {
1286 struct {
1287 uint32_t ActualQpRefID0List0 : MOS_BITFIELD_RANGE(0, 7);
1288 uint32_t ActualQpRefID1List0 : MOS_BITFIELD_RANGE(8, 15);
1289 uint32_t ActualQpRefID2List0 : MOS_BITFIELD_RANGE(16, 23);
1290 uint32_t ActualQpRefID3List0 : MOS_BITFIELD_RANGE(24, 31);
1291 };
1292 uint32_t Value;
1293 } DW37;
1294
1295 union {
1296 struct {
1297 uint32_t NumIntraRefreshOffFrames : MOS_BITFIELD_RANGE(0, 15);
1298 uint32_t NumFrameInGOB : MOS_BITFIELD_RANGE(16, 31);
1299 };
1300 uint32_t Value;
1301 } DW38;
1302
1303 union {
1304 struct {
1305 uint32_t ActualQpRefID0List1 : MOS_BITFIELD_RANGE(0, 7);
1306 uint32_t ActualQpRefID1List1 : MOS_BITFIELD_RANGE(8, 15);
1307 uint32_t RefCost : MOS_BITFIELD_RANGE(16, 31);
1308 };
1309 uint32_t Value;
1310 } DW39;
1311
1312 union {
1313 struct {
1314 uint32_t TransformThreshold0 : MOS_BITFIELD_RANGE(0, 15);
1315 uint32_t TransformThreshold1 : MOS_BITFIELD_RANGE(16, 31);
1316 };
1317 uint32_t Value;
1318 } DW40;
1319
1320 union {
1321 struct {
1322 uint32_t TransformThreshold2 : MOS_BITFIELD_RANGE(0, 15);
1323 uint32_t TextureIntraCostThreshold : MOS_BITFIELD_RANGE(16, 31);
1324 };
1325 uint32_t Value;
1326 } DW41;
1327
1328 union {
1329 struct
1330 {
1331 uint32_t NumMVPredictorsL0 : MOS_BITFIELD_RANGE(0, 3);
1332 uint32_t NumMVPredictorsL1 : MOS_BITFIELD_RANGE(4, 7);
1333 uint32_t Res_8 : MOS_BITFIELD_BIT(8);
1334 uint32_t PerLCUQP : MOS_BITFIELD_BIT(9);
1335 uint32_t PerCTBInput : MOS_BITFIELD_BIT(10);
1336 uint32_t CTBDistortionOutput : MOS_BITFIELD_BIT(11);
1337 uint32_t MVPredictorBlockSize : MOS_BITFIELD_RANGE(12, 14);
1338 uint32_t Res_15 : MOS_BITFIELD_BIT(15);
1339 uint32_t MultiPredL0 : MOS_BITFIELD_RANGE(16, 19);
1340 uint32_t MultiPredL1 : MOS_BITFIELD_RANGE(20, 23);
1341 uint32_t Res_24_31 : MOS_BITFIELD_RANGE(24, 31);
1342 };
1343 uint32_t Value;
1344 } DW42;
1345
1346 union {
1347 struct {
1348 uint32_t Reserved;
1349 };
1350 uint32_t Value;
1351 } DW43;
1352
1353 union {
1354 struct {
1355 uint32_t MaxNumMergeCandidates : MOS_BITFIELD_RANGE(0, 3);
1356 uint32_t MaxNumRefList0 : MOS_BITFIELD_RANGE(4, 7);
1357 uint32_t MaxNumRefList1 : MOS_BITFIELD_RANGE(8, 11);
1358 uint32_t Res_12_15 : MOS_BITFIELD_RANGE(12, 15);
1359 uint32_t MaxVmvR : MOS_BITFIELD_RANGE(16, 31);
1360 };
1361 uint32_t Value;
1362 } DW44;
1363
1364 union {
1365 struct {
1366 uint32_t TemporalMvpEnableFlag : MOS_BITFIELD_BIT(0);
1367 uint32_t Res_1_7 : MOS_BITFIELD_RANGE(1, 7);
1368 uint32_t Log2ParallelMergeLevel : MOS_BITFIELD_RANGE(8, 15);
1369 uint32_t HMECombineLenPslice : MOS_BITFIELD_RANGE(16, 23);
1370 uint32_t HMECombineLenBslice : MOS_BITFIELD_RANGE(24, 31);
1371 };
1372 uint32_t Value;
1373 } DW45;
1374
1375 union {
1376 struct {
1377 uint32_t Log2MinTUSize : MOS_BITFIELD_RANGE(0, 7);
1378 uint32_t Log2MaxTUSize : MOS_BITFIELD_RANGE(8, 15);
1379 uint32_t Log2MinCUSize : MOS_BITFIELD_RANGE(16, 23);
1380 uint32_t Log2MaxCUSize : MOS_BITFIELD_RANGE(24, 31);
1381 };
1382 uint32_t Value;
1383 } DW46;
1384
1385 union {
1386 struct {
1387 uint32_t NumRegionsInSlice : MOS_BITFIELD_RANGE(0, 7);
1388 uint32_t TypeOfWalkingPattern : MOS_BITFIELD_RANGE(8, 11);
1389 uint32_t ChromaFlatnessCheckFlag : MOS_BITFIELD_BIT(12);
1390 uint32_t EnableIntraEarlyExit : MOS_BITFIELD_BIT(13);
1391 uint32_t SkipIntraKrnFlag : MOS_BITFIELD_BIT(14);
1392 uint32_t ScreenContentFlag : MOS_BITFIELD_BIT(15);
1393 uint32_t IsLowDelay : MOS_BITFIELD_BIT(16);
1394 uint32_t CollocatedFromL0Flag : MOS_BITFIELD_BIT(17);
1395 uint32_t ArbitarySliceFlag : MOS_BITFIELD_BIT(18);
1396 uint32_t MultiSliceFlag : MOS_BITFIELD_BIT(19);
1397 uint32_t Res_20_23 : MOS_BITFIELD_RANGE(20, 23);
1398 uint32_t isCurrRefL0LongTerm : MOS_BITFIELD_BIT(24);
1399 uint32_t isCurrRefL1LongTerm : MOS_BITFIELD_BIT(25);
1400 uint32_t NumRegionMinus1 : MOS_BITFIELD_RANGE(26, 31);
1401 };
1402 uint32_t Value;
1403 } DW47;
1404
1405 union {
1406 struct {
1407 uint32_t CurrentTdL0_0 : MOS_BITFIELD_RANGE(0, 15);
1408 uint32_t CurrentTdL0_1 : MOS_BITFIELD_RANGE(16, 31);
1409 };
1410 uint32_t Value;
1411 } DW48;
1412
1413 union {
1414 struct {
1415 uint32_t CurrentTdL0_2 : MOS_BITFIELD_RANGE(0, 15);
1416 uint32_t CurrentTdL0_3 : MOS_BITFIELD_RANGE(16, 31);
1417 };
1418 uint32_t Value;
1419 } DW49;
1420
1421 union {
1422 struct {
1423 uint32_t CurrentTdL1_0 : MOS_BITFIELD_RANGE(0, 15);
1424 uint32_t CurrentTdL1_1 : MOS_BITFIELD_RANGE(16, 31);
1425 };
1426 uint32_t Value;
1427 } DW50;
1428
1429 union {
1430 struct {
1431 uint32_t IntraRefreshMBNum : MOS_BITFIELD_RANGE(0, 15);
1432 uint32_t IntraRefreshUnitInMB : MOS_BITFIELD_RANGE(16, 23);
1433 uint32_t IntraRefreshQPDelta : MOS_BITFIELD_RANGE(24, 31);
1434 };
1435 uint32_t Value;
1436 } DW51;
1437
1438 union {
1439 struct {
1440 uint32_t NumofUnitInRegion : MOS_BITFIELD_RANGE(0, 15);
1441 uint32_t MaxHeightInRegion : MOS_BITFIELD_RANGE(16, 31);
1442 };
1443 uint32_t Value;
1444 } DW52;
1445
1446 union {
1447 struct {
1448 uint32_t IntraRefreshRefWidth : MOS_BITFIELD_RANGE(0, 7);
1449 uint32_t IntraRefreshRefHeight : MOS_BITFIELD_RANGE(8, 15);
1450 uint32_t Res_16_31 : MOS_BITFIELD_RANGE(16, 31);
1451 };
1452 uint32_t Value;
1453 } DW53;
1454
1455 union {
1456 struct {
1457 uint32_t Reserved;
1458 };
1459 uint32_t Value;
1460 } DW54;
1461
1462 union {
1463 struct {
1464 uint32_t Reserved;
1465 };
1466 uint32_t Value;
1467 } DW55;
1468
1469 union {
1470 struct {
1471 uint32_t BTI_CU_Record;
1472 };
1473 uint32_t Value;
1474 } DW56;
1475
1476 union {
1477 struct {
1478 uint32_t BTI_PAK_Cmd;
1479 };
1480 uint32_t Value;
1481 } DW57;
1482
1483 union {
1484 struct {
1485 uint32_t BTI_Src_Y;
1486 };
1487 uint32_t Value;
1488 } DW58;
1489
1490 union {
1491 struct {
1492 uint32_t BTI_Intra_Dist;
1493 };
1494 uint32_t Value;
1495 } DW59;
1496
1497 union {
1498 struct {
1499 uint32_t BTI_Min_Dist;
1500 };
1501 uint32_t Value;
1502 } DW60;
1503
1504 union {
1505 struct {
1506 uint32_t BTI_HMEMVPredFwdBwdSurfIndex;
1507 };
1508 uint32_t Value;
1509 } DW61;
1510
1511 union {
1512 struct {
1513 uint32_t BTI_HMEDistSurfIndex;
1514 };
1515 uint32_t Value;
1516 } DW62;
1517
1518 union {
1519 struct {
1520 uint32_t BTI_Slice_Map;
1521 };
1522 uint32_t Value;
1523 } DW63;
1524
1525 union {
1526 struct {
1527 uint32_t BTI_VME_Saved_UNI_SIC;
1528 };
1529 uint32_t Value;
1530 } DW64;
1531
1532 union {
1533 struct {
1534 uint32_t BTI_Simplest_Intra;
1535 };
1536 uint32_t Value;
1537 } DW65;
1538
1539 union {
1540 struct {
1541 uint32_t BTI_Collocated_RefFrame;
1542 };
1543 uint32_t Value;
1544 } DW66;
1545
1546 union {
1547 struct {
1548 uint32_t BTI_Reserved;
1549 };
1550 uint32_t Value;
1551 } DW67;
1552
1553 union {
1554 struct {
1555 uint32_t BTI_BRC_Input;
1556 };
1557 uint32_t Value;
1558 } DW68;
1559
1560 union {
1561 struct {
1562 uint32_t BTI_LCU_QP;
1563 };
1564 uint32_t Value;
1565 } DW69;
1566
1567 union {
1568 struct {
1569 uint32_t BTI_BRC_Data;
1570 };
1571 uint32_t Value;
1572 } DW70;
1573
1574 union {
1575 struct {
1576 uint32_t BTI_VMEInterPredictionSurfIndex;
1577 };
1578 uint32_t Value;
1579 } DW71;
1580
1581 union {
1582 //For B frame
1583 struct {
1584 uint32_t BTI_VMEInterPredictionBSurfIndex;
1585 };
1586 //For P frame
1587 struct {
1588 uint32_t BTI_ConcurrentThreadMap;
1589 };
1590 uint32_t Value;
1591 } DW72;
1592
1593 union {
1594 //For B frame
1595 struct {
1596 uint32_t BTI_ConcurrentThreadMap;
1597 };
1598 //For P frame
1599 struct {
1600 uint32_t BTI_MB_Data_CurFrame;
1601 };
1602 uint32_t Value;
1603 } DW73;
1604
1605 union {
1606 //For B frame
1607 struct {
1608 uint32_t BTI_MB_Data_CurFrame;
1609 };
1610 //For P frame
1611 struct {
1612 uint32_t BTI_MVP_CurFrame;
1613 };
1614 uint32_t Value;
1615 } DW74;
1616
1617 union {
1618 //For B frame
1619 struct {
1620 uint32_t BTI_MVP_CurFrame;
1621 };
1622 //For P frame
1623 struct {
1624 uint32_t BTI_Haar_Dist16x16;
1625 };
1626 uint32_t Value;
1627 } DW75;
1628
1629 union {
1630 // this surface need to take same surface name from Hevc_LCUEnc_I_8x8_PU_FMode_inLCU as input
1631 //For B frame
1632 struct {
1633 uint32_t BTI_Haar_Dist16x16;
1634 };
1635 //For P frame
1636 struct {
1637 uint32_t BTI_Stats_Data;
1638 };
1639 uint32_t Value;
1640 } DW76;
1641
1642 union {
1643 //For B frame
1644 struct {
1645 uint32_t BTI_Stats_Data;
1646 };
1647 //For P frame
1648 struct {
1649 uint32_t BTI_Frame_Stats_Data;
1650 };
1651 uint32_t Value;
1652 } DW77;
1653
1654 union {
1655 //For B frame
1656 struct {
1657 uint32_t BTI_Frame_Stats_Data;
1658 };
1659 //For P frame
1660 struct {
1661 uint32_t BTI_MVPredictor_Surface;
1662 };
1663 uint32_t Value;
1664 } DW78;
1665
1666 union {
1667 //For B frame
1668 struct {
1669 uint32_t BTI_MVPredictor_Surface;
1670 };
1671 //For P frame
1672 struct {
1673 uint32_t BTI_CTB_Input_Surface;
1674 };
1675 uint32_t Value;
1676 } DW79;
1677
1678 union {
1679 //For B frame
1680 struct {
1681 uint32_t BTI_CTB_Input_Surface;
1682 };
1683 //For P frame
1684 struct {
1685 uint32_t BTI_CTB_Distortion_Output_Surface;
1686 };
1687 uint32_t Value;
1688 } DW80;
1689
1690 union {
1691 //For B frame
1692 struct {
1693 uint32_t BTI_CTB_Distortion_Output_Surface;
1694 };
1695 //For P frame
1696 struct {
1697 uint32_t BTI_Debug;
1698 };
1699 uint32_t Value;
1700 } DW81;
1701
1702 union {
1703 //For B frame
1704 struct {
1705 uint32_t BTI_Debug;
1706 };
1707 uint32_t Value;
1708 } DW82;
1709 };
1710
1711 using PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 = struct CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9*;
1712 C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9)) == 83 );
1713
SetMbEncKernelParams(MHW_KERNEL_PARAM * kernelParams,uint32_t idx)1714 MOS_STATUS CodechalFeiHevcStateG9Skl::SetMbEncKernelParams(MHW_KERNEL_PARAM* kernelParams, uint32_t idx)
1715 {
1716 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1717
1718 CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
1719
1720 auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
1721
1722 kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
1723 kernelParams->iIdCount = 1;
1724
1725 switch (idx)
1726 {
1727 case CODECHAL_HEVC_MBENC_2xSCALING:
1728 kernelParams->iBTCount = CODECHAL_HEVC_FEI_SCALING_FRAME_END - CODECHAL_HEVC_FEI_SCALING_FRAME_BEGIN;
1729 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9), curbeAlignment);
1730 kernelParams->iBlockWidth = 32;
1731 kernelParams->iBlockHeight = 32;
1732 break;
1733
1734 case CODECHAL_HEVC_MBENC_32x32MD:
1735 kernelParams->iBTCount = CODECHAL_HEVC_FEI_32x32_PU_END - CODECHAL_HEVC_FEI_32x32_PU_BEGIN;
1736 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9), curbeAlignment);
1737 kernelParams->iBlockWidth = 32;
1738 kernelParams->iBlockHeight = 32;
1739 break;
1740
1741 case CODECHAL_HEVC_MBENC_16x16SAD:
1742 kernelParams->iBTCount = CODECHAL_HEVC_FEI_16x16_PU_SAD_END - CODECHAL_HEVC_FEI_16x16_PU_SAD_BEGIN;
1743 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9), curbeAlignment);
1744 kernelParams->iBlockWidth = 16;
1745 kernelParams->iBlockHeight = 16;
1746 break;
1747
1748 case CODECHAL_HEVC_MBENC_16x16MD:
1749 kernelParams->iBTCount = CODECHAL_HEVC_FEI_16x16_PU_MD_END - CODECHAL_HEVC_FEI_16x16_PU_MD_BEGIN;
1750 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9), curbeAlignment);
1751 kernelParams->iBlockWidth = 32;
1752 kernelParams->iBlockHeight = 32;
1753 break;
1754
1755 case CODECHAL_HEVC_MBENC_8x8PU:
1756 kernelParams->iBTCount = CODECHAL_HEVC_FEI_8x8_PU_END - CODECHAL_HEVC_FEI_8x8_PU_BEGIN;
1757 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_8x8_PU_CURBE_G9), curbeAlignment);
1758 kernelParams->iBlockWidth = 8;
1759 kernelParams->iBlockHeight = 8;
1760 break;
1761
1762 case CODECHAL_HEVC_MBENC_8x8FMODE:
1763 kernelParams->iBTCount = CODECHAL_HEVC_FEI_8x8_PU_FMODE_END - CODECHAL_HEVC_FEI_8x8_PU_FMODE_BEGIN;
1764 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9), curbeAlignment);
1765 kernelParams->iBlockWidth = 32;
1766 kernelParams->iBlockHeight = 32;
1767 break;
1768
1769 case CODECHAL_HEVC_MBENC_32x32INTRACHECK:
1770 kernelParams->iBTCount = CODECHAL_HEVC_FEI_B_32x32_PU_END - CODECHAL_HEVC_FEI_B_32x32_PU_BEGIN;
1771 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9), curbeAlignment);
1772 kernelParams->iBlockWidth = 32;
1773 kernelParams->iBlockHeight = 32;
1774 break;
1775
1776 case CODECHAL_HEVC_FEI_MBENC_BENC:
1777 kernelParams->iBTCount = CODECHAL_HEVC_FEI_B_MBENC_END - CODECHAL_HEVC_FEI_B_MBENC_BEGIN;
1778 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9), curbeAlignment);
1779 kernelParams->iBlockWidth = 16;
1780 kernelParams->iBlockHeight = 16;
1781 break;
1782
1783 case CODECHAL_HEVC_FEI_MBENC_BPAK:
1784 kernelParams->iBTCount = CODECHAL_HEVC_FEI_B_PAK_END - CODECHAL_HEVC_FEI_B_PAK_BEGIN;
1785 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_PAK_CURBE_G9), curbeAlignment);
1786 kernelParams->iBlockWidth = 32;
1787 kernelParams->iBlockHeight = 32;
1788 break;
1789
1790 case CODECHAL_HEVC_FEI_MBENC_DS_COMBINED:
1791 if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
1792 {
1793 kernelParams->iBTCount = CODECHAL_HEVC_FEI_DS_COMBINED_END - CODECHAL_HEVC_FEI_DS_COMBINED_BEGIN;
1794 uint32_t uiDSCombinedKernelCurbeSize = sizeof(CODECHAL_ENC_HEVC_DS_COMBINED_CURBE_G9);
1795 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(uiDSCombinedKernelCurbeSize, curbeAlignment);
1796 kernelParams->iBlockWidth = 8;
1797 kernelParams->iBlockHeight = 8;
1798 }
1799 else
1800 {
1801 CODECHAL_ENCODE_ASSERT(false);
1802 eStatus = MOS_STATUS_INVALID_PARAMETER;
1803 }
1804 break;
1805
1806 case CODECHAL_HEVC_FEI_MBENC_PENC:
1807 kernelParams->iBTCount = CODECHAL_HEVC_FEI_P_MBENC_END - CODECHAL_HEVC_FEI_P_MBENC_BEGIN;
1808 //P MBEnc curbe has one less DWord than B MBEnc curbe
1809 kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9) - sizeof(uint32_t), (size_t)curbeAlignment);
1810 kernelParams->iBlockWidth = 16;
1811 kernelParams->iBlockHeight = 16;
1812 break;
1813
1814 default:
1815 CODECHAL_ENCODE_ASSERT(false);
1816 eStatus = MOS_STATUS_INVALID_PARAMETER;
1817 }
1818
1819 return eStatus;
1820 }
1821
SetMbEncBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable,uint32_t idx)1822 MOS_STATUS CodechalFeiHevcStateG9Skl::SetMbEncBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable, uint32_t idx)
1823 {
1824 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1825
1826 CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
1827
1828 MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
1829 bindingTable->dwMediaState = ConvertKrnOpsToMediaState(ENC_MBENC, idx);
1830
1831 switch (idx)
1832 {
1833 case CODECHAL_HEVC_MBENC_2xSCALING:
1834 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_SCALING_FRAME_END - CODECHAL_HEVC_FEI_SCALING_FRAME_BEGIN;
1835 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_SCALING_FRAME_BEGIN;
1836 break;
1837
1838 case CODECHAL_HEVC_MBENC_32x32MD:
1839 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_32x32_PU_END - CODECHAL_HEVC_FEI_32x32_PU_BEGIN;
1840 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_32x32_PU_BEGIN;
1841 break;
1842
1843 case CODECHAL_HEVC_MBENC_16x16SAD:
1844 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_16x16_PU_SAD_END - CODECHAL_HEVC_FEI_16x16_PU_SAD_BEGIN;
1845 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_16x16_PU_SAD_BEGIN;
1846 break;
1847
1848 case CODECHAL_HEVC_MBENC_16x16MD:
1849 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_16x16_PU_MD_END - CODECHAL_HEVC_FEI_16x16_PU_MD_BEGIN;
1850 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_16x16_PU_MD_BEGIN;
1851 break;
1852
1853 case CODECHAL_HEVC_MBENC_8x8PU:
1854 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_8x8_PU_END - CODECHAL_HEVC_FEI_8x8_PU_BEGIN;
1855 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_8x8_PU_BEGIN;
1856 break;
1857
1858 case CODECHAL_HEVC_MBENC_8x8FMODE:
1859 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_8x8_PU_FMODE_END - CODECHAL_HEVC_FEI_8x8_PU_FMODE_BEGIN;
1860 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_8x8_PU_FMODE_BEGIN;
1861 break;
1862
1863 case CODECHAL_HEVC_MBENC_32x32INTRACHECK:
1864 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_B_32x32_PU_END - CODECHAL_HEVC_FEI_B_32x32_PU_BEGIN;
1865 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_B_32x32_PU_BEGIN;
1866 break;
1867
1868 case CODECHAL_HEVC_FEI_MBENC_BENC:
1869 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_B_MBENC_END - CODECHAL_HEVC_FEI_B_MBENC_BEGIN;
1870 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_B_MBENC_BEGIN;
1871 break;
1872
1873 case CODECHAL_HEVC_FEI_MBENC_BPAK:
1874 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_B_PAK_END - CODECHAL_HEVC_FEI_B_PAK_BEGIN;
1875 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_B_PAK_BEGIN;
1876 break;
1877
1878 case CODECHAL_HEVC_FEI_MBENC_DS_COMBINED:
1879 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_DS_COMBINED_END - CODECHAL_HEVC_FEI_DS_COMBINED_BEGIN;
1880 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_DS_COMBINED_BEGIN;
1881 break;
1882
1883 case CODECHAL_HEVC_FEI_MBENC_PENC:
1884 bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_P_MBENC_END - CODECHAL_HEVC_FEI_P_MBENC_BEGIN;
1885 bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_P_MBENC_BEGIN;
1886 break;
1887
1888 default:
1889 CODECHAL_ENCODE_ASSERT(false);
1890 eStatus = MOS_STATUS_INVALID_PARAMETER;
1891 return eStatus;
1892 }
1893
1894 for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
1895 {
1896 bindingTable->dwBindingTableEntries[i] = i;
1897 }
1898
1899 return eStatus;
1900 }
1901
EndKernelCall(CODECHAL_MEDIA_STATE_TYPE mediaStateType,PMHW_KERNEL_STATE kernelState,PMOS_COMMAND_BUFFER cmdBuffer)1902 MOS_STATUS CodechalFeiHevcStateG9Skl::EndKernelCall(
1903 CODECHAL_MEDIA_STATE_TYPE mediaStateType,
1904 PMHW_KERNEL_STATE kernelState,
1905 PMOS_COMMAND_BUFFER cmdBuffer)
1906 {
1907 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1908
1909 CODECHAL_ENCODE_FUNCTION_ENTER;
1910
1911 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcStateG9::EndKernelCall(mediaStateType, kernelState, cmdBuffer));
1912
1913 // skip haar distortion surface, statstics data dump surface
1914 // and frame level statstics data surface because they are not used
1915 #if 0
1916 CODECHAL_DEBUG_TOOL(
1917 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1918 &m_encStatsBuffers.m_puStatsSurface,
1919 CodechalDbgAttr::attrOutput,
1920 "HEVC_B_MBENC_PU_StatsSurface",
1921 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1922
1923 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1924 &m_encStatsBuffers.m_8x8PuHaarDist,
1925 CodechalDbgAttr::attrOutput,
1926 "HEVC_B_MBENC_8X8_PU_HaarDistSurface",
1927 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1928
1929 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1930 &m_encStatsBuffers.m_8x8PuFrameStats.sResource,
1931 "HEVC_B_MBENC_ConstantData_In",
1932 CodechalDbgAttr::attrOutput,
1933 m_encStatsBuffers.m_8x8PuFrameStats.dwSize,
1934 0,
1935 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1936
1937 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
1938 &m_encStatsBuffers.m_mbEncStatsSurface,
1939 CodechalDbgAttr::attrOutput,
1940 "HEVC_B_MBENC_MB_ENC_StatsSurface",
1941 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1942
1943 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
1944 &m_encStatsBuffers.m_mbEncFrameStats.sResource,
1945 "HEVC_B_MBENC_ConstantData_In",
1946 CodechalDbgAttr::attrOutput,
1947 m_encStatsBuffers.m_mbEncFrameStats.dwSize,
1948 0,
1949 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
1950 )
1951 #endif
1952 return eStatus;
1953 }
1954
InitKernelState()1955 MOS_STATUS CodechalFeiHevcStateG9Skl::InitKernelState()
1956 {
1957 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1958
1959 CODECHAL_ENCODE_FUNCTION_ENTER;
1960
1961 // InitKernelStateMbEnc
1962 m_numMbEncEncKrnStates = CODECHAL_HEVC_FEI_MBENC_NUM_BXT_SKL;
1963
1964 m_mbEncKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates);
1965 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
1966
1967 m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
1968 sizeof(GenericBindingTable) * m_numMbEncEncKrnStates);
1969 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
1970
1971 auto krnStateIdx = m_mbEncKernelStates;
1972
1973 for (uint32_t KrnStateIdx = 0; KrnStateIdx < m_numMbEncEncKrnStates; KrnStateIdx++)
1974 {
1975 auto kernelSize = m_combinedKernelSize;
1976 CODECHAL_KERNEL_HEADER currKrnHeader;
1977
1978 if (KrnStateIdx == CODECHAL_HEVC_FEI_MBENC_DS_COMBINED &&
1979 m_numMbEncEncKrnStates == CODECHAL_HEVC_FEI_MBENC_NUM_BXT_SKL) //Ignore. It isn't used on BXT.
1980 {
1981 krnStateIdx++;
1982 continue;
1983 }
1984
1985 CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
1986 m_kernelBinary,
1987 ENC_MBENC,
1988 KrnStateIdx,
1989 &currKrnHeader,
1990 &kernelSize));
1991
1992 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMbEncKernelParams(
1993 &krnStateIdx->KernelParams,
1994 KrnStateIdx));
1995
1996 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMbEncBindingTable(
1997 &m_mbEncKernelBindingTable[KrnStateIdx], KrnStateIdx));
1998
1999 krnStateIdx->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
2000 krnStateIdx->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
2001 krnStateIdx->KernelParams.iSize = kernelSize;
2002
2003 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
2004 m_stateHeapInterface,
2005 krnStateIdx->KernelParams.iBTCount,
2006 &krnStateIdx->dwSshSize,
2007 &krnStateIdx->dwBindingTableSize));
2008
2009 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, krnStateIdx));
2010
2011 krnStateIdx++;
2012 }
2013
2014 return eStatus;
2015 }
2016
GetKernelHeaderAndSize(void * binary,EncOperation operation,uint32_t krnStateIdx,void * krnHeader,uint32_t * krnSize)2017 MOS_STATUS CodechalFeiHevcStateG9Skl::GetKernelHeaderAndSize(
2018 void *binary,
2019 EncOperation operation,
2020 uint32_t krnStateIdx,
2021 void *krnHeader,
2022 uint32_t *krnSize)
2023 {
2024 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2025
2026 CODECHAL_ENCODE_FUNCTION_ENTER;
2027
2028 CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
2029 CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
2030 CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
2031
2032 PCODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL kernelHeaderTable = (PCODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL)binary;
2033 PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr;
2034
2035 if (operation == ENC_SCALING4X)
2036 {
2037 currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_I_DS4HME;
2038 }
2039 else if (operation == ENC_ME)
2040 {
2041 // SKL supports P frame. P HME index CODECHAL_ENCODE_ME_IDX_P is 0 and B HME index CODECHAL_ENCODE_ME_IDX_B is 1
2042 if (krnStateIdx == 0)
2043 {
2044 currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_P_HME;
2045 }
2046 else
2047 {
2048 currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_B_HME;
2049 }
2050 }
2051 else if (operation == ENC_BRC)
2052 {
2053 switch (krnStateIdx)
2054 {
2055 case CODECHAL_HEVC_BRC_COARSE_INTRA:
2056 currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_I_COARSE;
2057 break;
2058
2059 default:
2060 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
2061 eStatus = MOS_STATUS_INVALID_PARAMETER;
2062 return eStatus;
2063 }
2064 }
2065 else if (operation == ENC_MBENC)
2066 {
2067 switch (krnStateIdx)
2068 {
2069 case CODECHAL_HEVC_MBENC_2xSCALING:
2070 case CODECHAL_HEVC_MBENC_32x32MD:
2071 case CODECHAL_HEVC_MBENC_16x16SAD:
2072 case CODECHAL_HEVC_MBENC_16x16MD:
2073 case CODECHAL_HEVC_MBENC_8x8PU:
2074 case CODECHAL_HEVC_MBENC_8x8FMODE:
2075 case CODECHAL_HEVC_MBENC_32x32INTRACHECK:
2076 case CODECHAL_HEVC_FEI_MBENC_BENC:
2077 currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_I_2xDownSampling_Kernel;
2078 currKrnHeader += krnStateIdx;
2079 break;
2080
2081 case CODECHAL_HEVC_FEI_MBENC_BPAK:
2082 currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_PB_Pak;
2083 break;
2084
2085 case CODECHAL_HEVC_FEI_MBENC_DS_COMBINED:
2086 currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_DS_Combined;
2087 break;
2088
2089 case CODECHAL_HEVC_FEI_MBENC_PENC:
2090 currKrnHeader = &kernelHeaderTable->HEVC_FEI_LCUEnc_P_MB;
2091 break;
2092
2093 default:
2094 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
2095 eStatus = MOS_STATUS_INVALID_PARAMETER;
2096 return eStatus;
2097 }
2098 }
2099 else
2100 {
2101 CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
2102 eStatus = MOS_STATUS_INVALID_PARAMETER;
2103 return eStatus;
2104 }
2105
2106 *((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader;
2107
2108 PCODECHAL_KERNEL_HEADER nextKrnHeader = (currKrnHeader + 1);
2109 PCODECHAL_KERNEL_HEADER invalidEntry = (PCODECHAL_KERNEL_HEADER)(((uint8_t*)binary) + sizeof(*kernelHeaderTable));
2110 uint32_t nextKrnOffset = *krnSize;
2111
2112 if (nextKrnHeader < invalidEntry)
2113 {
2114 nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT;
2115 }
2116 *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
2117
2118 return eStatus;
2119 }
2120
2121 #ifndef HEVC_FEI_ENABLE_CMRT
2122
Encode2xScalingKernel()2123 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode2xScalingKernel()
2124 {
2125 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2126
2127 PerfTagSetting perfTag;
2128 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL);
2129
2130 uint32_t krnIdx = CODECHAL_HEVC_MBENC_2xSCALING;
2131 auto kernelState = &m_mbEncKernelStates[krnIdx];
2132 auto pScalingBindingTable = &m_mbEncKernelBindingTable[krnIdx];
2133 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
2134 {
2135 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
2136 }
2137
2138 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
2139 m_osInterface,
2140 &m_scaled2xSurface));
2141
2142 // Setup DSH
2143 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
2144 m_stateHeapInterface,
2145 kernelState,
2146 false,
2147 0,
2148 false,
2149 m_storeData));
2150
2151 //Setup CURBE
2152 MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9 cmd, *curbe = &cmd;
2153 MOS_ZeroMemory(curbe, sizeof(*curbe));
2154 curbe->DW0.PicWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
2155 curbe->DW0.PicHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
2156
2157 uint32_t startBTI = 0;
2158 curbe->DW8.BTI_Src_Y = pScalingBindingTable->dwBindingTableEntries[startBTI++];
2159 curbe->DW9.BTI_Dst_Y = pScalingBindingTable->dwBindingTableEntries[startBTI++];
2160
2161 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING;
2162 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2163 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
2164
2165 MOS_COMMAND_BUFFER cmdBuffer;
2166 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
2167 &cmdBuffer,
2168 kernelState,
2169 encFunctionType,
2170 nullptr));
2171
2172 // Add surface states, 2X scaling uses U16Norm surface format
2173 startBTI = 0;
2174
2175 // Source surface/s
2176 auto surfaceCodecParams = &m_surfaceParams[SURFACE_RAW_Y];
2177 surfaceCodecParams->bUse16UnormSurfaceFormat = true;
2178
2179 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2180 kernelState,
2181 &cmdBuffer,
2182 SURFACE_RAW_Y,
2183 &pScalingBindingTable->dwBindingTableEntries[startBTI++]
2184 ));
2185
2186 CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
2187 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceParams(surfaceCodecParams));
2188
2189 // Destination surface/s
2190 m_scaled2xSurface.dwWidth = MOS_ALIGN_CEIL((m_frameWidth / SCALE_FACTOR_2x), CODECHAL_MACROBLOCK_WIDTH);
2191 m_scaled2xSurface.dwHeight = MOS_ALIGN_CEIL((m_frameHeight / SCALE_FACTOR_2x), CODECHAL_MACROBLOCK_HEIGHT);
2192
2193 m_surfaceParams[SURFACE_Y_2X].bUse16UnormSurfaceFormat =
2194 m_surfaceParams[SURFACE_Y_2X].bIsWritable =
2195 m_surfaceParams[SURFACE_Y_2X].bRenderTarget = true;
2196 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2197 kernelState,
2198 &cmdBuffer,
2199 SURFACE_Y_2X,
2200 &pScalingBindingTable->dwBindingTableEntries[startBTI++]
2201 ));
2202
2203 if (!m_hwWalker)
2204 {
2205 eStatus = MOS_STATUS_UNKNOWN;
2206 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
2207 return eStatus;
2208 }
2209
2210 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
2211 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
2212 walkerCodecParams.WalkerMode = m_walkerMode;
2213 // check kernel of Downscaling 2x kernels for Ultra HME.
2214 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
2215 // The frame kernel process 32x32 input pixels and output 16x16 down sampled pixels
2216 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
2217 /* Enforce no dependency dispatch order for Scaling kernel, */
2218 walkerCodecParams.bNoDependency = true;
2219
2220 MHW_WALKER_PARAMS walkerParams;
2221 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
2222 m_hwInterface,
2223 &walkerParams,
2224 &walkerCodecParams));
2225
2226 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
2227 &cmdBuffer,
2228 &walkerParams));
2229
2230 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
2231 encFunctionType,
2232 kernelState,
2233 &cmdBuffer));
2234
2235 return eStatus;
2236 }
2237
Encode32x32PuModeDecisionKernel()2238 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32x32PuModeDecisionKernel()
2239 {
2240 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2241
2242 PerfTagSetting perfTag;
2243 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_PU_MD);
2244
2245 uint32_t krnIdx = CODECHAL_HEVC_MBENC_32x32MD;
2246 auto kernelState = &m_mbEncKernelStates[krnIdx];
2247 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
2248 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
2249 {
2250 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
2251 }
2252
2253 // Setup DSH
2254 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
2255 m_stateHeapInterface,
2256 kernelState,
2257 false,
2258 0,
2259 false,
2260 m_storeData));
2261
2262 //Setup CURBE
2263 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
2264
2265 CalcLambda(CODECHAL_ENCODE_HEVC_I_SLICE, INTRA_TRANSFORM_HAAR);
2266 int32_t sliceQp = CalSliceQp();
2267
2268 double lambdaScalingFactor = 1.0;
2269 double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
2270 double squaredQpLambda = qpLambda * qpLambda;
2271 m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
2272
2273 CODECHAL_FEI_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9 cmd, *curbe = &cmd;
2274 MOS_ZeroMemory(curbe, sizeof(*curbe));
2275 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
2276 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
2277
2278 curbe->DW1.EnableDebugDump = false;
2279 curbe->DW1.LCUType = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/;
2280 curbe->DW1.PuType = 0; // 32x32 PU
2281 curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
2282 curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
2283 curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
2284 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
2285 curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0);
2286 curbe->DW1.SliceQp = sliceQp;
2287 curbe->DW1.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled;
2288 curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
2289
2290 curbe->DW2.Lambda = m_fixedPointLambda;
2291
2292 curbe->DW3.ModeCost32x32 = 0;
2293
2294 curbe->DW4.EarlyExit = (uint32_t)-1;
2295 if (curbe->DW1.EnableStatsDataDump)
2296 {
2297 double lambdaMd;
2298 float hadBias = 2.0f;
2299
2300 lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp];
2301 lambdaMd = lambdaMd * hadBias;
2302 curbe->DW5.NewLambdaForHaarTransform = (uint32_t)(lambdaMd*(1<<10));
2303 }
2304
2305 uint32_t startIndex = 0;
2306 curbe->DW8.BTI_32x32PU_Output = bindingTable->dwBindingTableEntries[startIndex++];
2307 curbe->DW9.BTI_Src_Y = bindingTable->dwBindingTableEntries[startIndex++];
2308 startIndex++; // skip one BTI for Y and UV have the same BTI
2309 curbe->DW10.BTI_Src_Y2x = bindingTable->dwBindingTableEntries[startIndex++];
2310 curbe->DW11.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startIndex++];
2311 curbe->DW12.BTI_Src_Y2x_VME = bindingTable->dwBindingTableEntries[startIndex++];
2312 curbe->DW13.BTI_Brc_Input = bindingTable->dwBindingTableEntries[startIndex++];
2313 curbe->DW14.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startIndex++];
2314 curbe->DW15.BTI_Brc_Data = bindingTable->dwBindingTableEntries[startIndex++];
2315 curbe->DW16.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startIndex++];
2316 curbe->DW17.BTI_Kernel_Debug = bindingTable->dwBindingTableEntries[startIndex++];
2317
2318 CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
2319
2320 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION;
2321 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2322 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
2323
2324 MOS_COMMAND_BUFFER cmdBuffer;
2325 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
2326 &cmdBuffer,
2327 kernelState,
2328 encFunctionType,
2329 nullptr));
2330
2331 //Add surface states
2332 startIndex = 0;
2333
2334 // 32x32 PU output
2335 m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bIsWritable =
2336 m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bRenderTarget = true;
2337 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2338 kernelState,
2339 &cmdBuffer,
2340 SURFACE_32x32_PU_OUTPUT,
2341 &bindingTable->dwBindingTableEntries[startIndex++]));
2342
2343 // Source Y and UV
2344 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2345 kernelState,
2346 &cmdBuffer,
2347 SURFACE_RAW_Y_UV,
2348 &bindingTable->dwBindingTableEntries[startIndex++]));
2349 startIndex ++; // UV index
2350
2351 // Source Y2x
2352 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2353 kernelState,
2354 &cmdBuffer,
2355 SURFACE_Y_2X,
2356 &bindingTable->dwBindingTableEntries[startIndex++]));
2357
2358 // Slice map
2359 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2360 kernelState,
2361 &cmdBuffer,
2362 SURFACE_SLICE_MAP,
2363 &bindingTable->dwBindingTableEntries[startIndex++]));
2364
2365 // Source Y2x for VME
2366 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2367 kernelState,
2368 &cmdBuffer,
2369 SURFACE_Y_2X_VME,
2370 &bindingTable->dwBindingTableEntries[startIndex++]));
2371
2372 // BRC Input
2373 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2374 kernelState,
2375 &cmdBuffer,
2376 SURFACE_BRC_INPUT,
2377 &bindingTable->dwBindingTableEntries[startIndex++]));
2378
2379 // LCU Qp surface
2380 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2381 kernelState,
2382 &cmdBuffer,
2383 SURFACE_LCU_QP,
2384 &bindingTable->dwBindingTableEntries[startIndex++]));
2385
2386 // BRC data surface
2387 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2388 kernelState,
2389 &cmdBuffer,
2390 SURFACE_BRC_DATA,
2391 &bindingTable->dwBindingTableEntries[startIndex++]));
2392
2393 // skip statstics data dump surface because it is not used
2394
2395 if (!m_hwWalker)
2396 {
2397 eStatus = MOS_STATUS_UNKNOWN;
2398 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
2399 return eStatus;
2400 }
2401
2402 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
2403 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
2404 walkerCodecParams.WalkerMode = m_walkerMode;
2405 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5; /* looping for Walker is needed at 8x8 block level */
2406 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
2407 walkerCodecParams.bNoDependency = true; /* Enforce no dependency dispatch order for 32x32 MD kernel */
2408
2409 MHW_WALKER_PARAMS walkerParams;
2410 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
2411 m_hwInterface,
2412 &walkerParams,
2413 &walkerCodecParams));
2414
2415 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
2416 &cmdBuffer,
2417 &walkerParams));
2418
2419 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
2420 encFunctionType,
2421 kernelState,
2422 &cmdBuffer));
2423
2424 return eStatus;
2425 }
2426
Encode16x16SadPuComputationKernel()2427 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16SadPuComputationKernel()
2428 {
2429 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2430
2431 CODECHAL_ENCODE_FUNCTION_ENTER;
2432
2433 PerfTagSetting perfTag;
2434 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_SAD);
2435
2436 uint32_t krnIdx = CODECHAL_HEVC_MBENC_16x16SAD;
2437 auto kernelState = &m_mbEncKernelStates[krnIdx];
2438 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
2439 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
2440 {
2441 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
2442 }
2443
2444 //Setup DSH
2445 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
2446 m_stateHeapInterface,
2447 kernelState,
2448 false,
2449 0,
2450 false,
2451 m_storeData));
2452
2453 // Setup CURBE
2454 CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9 cmd, *curbe = &cmd;
2455
2456 MOS_ZeroMemory(curbe, sizeof(*curbe));
2457 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
2458 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
2459
2460 curbe->DW1.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
2461 curbe->DW1.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
2462 curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
2463 curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
2464
2465 curbe->DW2.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
2466 curbe->DW2.SimFlagForInter = false;
2467 if(m_hevcPicParams->CodingType != I_TYPE)
2468 {
2469 curbe->DW2.FASTSurveillanceFlag = m_hevcSeqParams->bVideoSurveillance;
2470 }
2471
2472 uint32_t startIndex = 0;
2473 curbe->DW8.BTI_Src_Y = bindingTable->dwBindingTableEntries[startIndex++];
2474 startIndex++; // skip UV BTI
2475 curbe->DW9.BTI_Sad_16x16_PU_Output = bindingTable->dwBindingTableEntries[startIndex++];
2476 curbe->DW10.BTI_32x32_Pu_ModeDecision = bindingTable->dwBindingTableEntries[startIndex++];
2477 curbe->DW11.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startIndex++];
2478 curbe->DW12.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startIndex++];
2479 curbe->DW13.BTI_Debug = bindingTable->dwBindingTableEntries[startIndex++];
2480
2481 CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
2482
2483 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_SAD;
2484 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2485 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
2486
2487 MOS_COMMAND_BUFFER cmdBuffer;
2488 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
2489 &cmdBuffer,
2490 kernelState,
2491 encFunctionType,
2492 nullptr));
2493
2494 //Add surface states
2495 startIndex = 0;
2496
2497 // Source Y and UV
2498 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2499 kernelState,
2500 &cmdBuffer,
2501 SURFACE_RAW_Y_UV,
2502 &bindingTable->dwBindingTableEntries[startIndex++]));
2503 startIndex++;
2504
2505 // 16x16 PU SAD output
2506 m_surfaceParams[SURFACE_16x16PU_SAD].bIsWritable =
2507 m_surfaceParams[SURFACE_16x16PU_SAD].bRenderTarget = true;
2508 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2509 kernelState,
2510 &cmdBuffer,
2511 SURFACE_16x16PU_SAD,
2512 &bindingTable->dwBindingTableEntries[startIndex++]));
2513
2514 // 32x32 PU MD data
2515 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2516 kernelState,
2517 &cmdBuffer,
2518 SURFACE_32x32_PU_OUTPUT,
2519 &bindingTable->dwBindingTableEntries[startIndex++]));
2520
2521 // Slice map
2522 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2523 kernelState,
2524 &cmdBuffer,
2525 SURFACE_SLICE_MAP,
2526 &bindingTable->dwBindingTableEntries[startIndex++]));
2527
2528 // Simplest Intra
2529 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2530 kernelState,
2531 &cmdBuffer,
2532 SURFACE_SIMPLIFIED_INTRA,
2533 &bindingTable->dwBindingTableEntries[startIndex++]));
2534
2535 if (!m_hwWalker)
2536 {
2537 eStatus = MOS_STATUS_UNKNOWN;
2538 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
2539 return eStatus;
2540 }
2541
2542 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
2543 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
2544 walkerCodecParams.WalkerMode = m_walkerMode;
2545 /* looping for Walker is needed at 16x16 block level */
2546 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 16) >> 4;
2547 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 16) >> 4;
2548 /* Enforce no dependency dispatch order for the 16x16 SAD kernel */
2549 walkerCodecParams.bNoDependency = true;
2550
2551 MHW_WALKER_PARAMS walkerParams;
2552 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
2553 m_hwInterface,
2554 &walkerParams,
2555 &walkerCodecParams));
2556
2557 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
2558 &cmdBuffer,
2559 &walkerParams));
2560
2561 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
2562 encFunctionType,
2563 kernelState,
2564 &cmdBuffer));
2565
2566 return eStatus;
2567 }
2568
Encode16x16PuModeDecisionKernel()2569 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16PuModeDecisionKernel()
2570 {
2571 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2572
2573 CODECHAL_ENCODE_FUNCTION_ENTER;
2574
2575 PerfTagSetting perfTag;
2576 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_PU_MD);
2577
2578 uint32_t krnIdx = CODECHAL_HEVC_MBENC_16x16MD;
2579 auto kernelState = &m_mbEncKernelStates[krnIdx];
2580 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
2581 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
2582 {
2583 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
2584 }
2585
2586 // Setup DSH
2587 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
2588 m_stateHeapInterface,
2589 kernelState,
2590 false,
2591 0,
2592 false,
2593 m_storeData));
2594
2595 // Setup CURBE
2596 int32_t sliceQp = CalSliceQp();
2597 uint8_t sliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
2598
2599 double lambdaScaleFactor = 0.46 + sliceQp - 22;
2600 if (lambdaScaleFactor < 0)
2601 {
2602 lambdaScaleFactor = 0.46;
2603 }
2604
2605 if (lambdaScaleFactor > 15)
2606 {
2607 lambdaScaleFactor = 15;
2608 }
2609
2610 double squredLambda = lambdaScaleFactor * pow(2.0, ((double)sliceQp-12.0)/6);
2611 m_fixedPointLambdaForLuma = (uint32_t)(squredLambda * (1<<10));
2612
2613 double lambdaScalingFactor = 1.0;
2614 double qpLambda = m_qpLambdaMd[sliceType][sliceQp];
2615 double squaredQpLambda = qpLambda * qpLambda;
2616 m_fixedPointLambdaForChroma = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
2617
2618 LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_HAAR);
2619
2620 CODECHAL_FEI_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9 cmd, *curbe = &cmd;
2621 MOS_ZeroMemory(curbe, sizeof(*curbe));
2622
2623 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
2624 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
2625 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
2626
2627 curbe->DW1.Log2MaxCUSize = log2MaxCUSize;
2628 curbe->DW1.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
2629 curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
2630 curbe->DW1.SliceQp = sliceQp;
2631
2632 curbe->DW2.FixedPoint_Lambda_PredMode = m_fixedPointLambdaForChroma;
2633
2634 curbe->DW3.LambdaScalingFactor = 1;
2635 curbe->DW3.SliceType = sliceType;
2636 curbe->DW3.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
2637 curbe->DW3.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
2638 curbe->DW3.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
2639 curbe->DW3.ROIEnable = (m_hevcPicParams->NumROI > 0);
2640 curbe->DW3.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
2641 curbe->DW3.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh;
2642 //Given only Column Rolling I is supported, if in future, Row Rolling I support to be added, then, need to make change here as per Kernel
2643 curbe->DW3.IntraRefreshEn = m_hevcPicParams->bEnableRollingIntraRefresh;
2644 curbe->DW3.HalfUpdateMixedLCU = 0;
2645 curbe->DW3.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
2646
2647 curbe->DW4.PenaltyForIntra8x8NonDCPredMode = 0;
2648 curbe->DW4.IntraComputeType = 1;
2649 curbe->DW4.AVCIntra8x8Mask = 0;
2650 curbe->DW4.IntraSadAdjust = 2;
2651
2652 double lambdaMd = sqrt(0.57*pow(2.0, ((double)sliceQp-12.0)/3));
2653 squredLambda = lambdaMd * lambdaMd;
2654 uint32_t newLambda = (uint32_t)(squredLambda*(1<<10));
2655 curbe->DW5.FixedPoint_Lambda_CU_Mode_for_Cost_Calculation = newLambda;
2656
2657 curbe->DW6.ScreenContentFlag = m_hevcPicParams->bScreenContent;
2658
2659 curbe->DW7.ModeCostIntraNonPred = m_modeCost[0];
2660 curbe->DW7.ModeCostIntra16x16 = m_modeCost[1];
2661 curbe->DW7.ModeCostIntra8x8 = m_modeCost[2];
2662 curbe->DW7.ModeCostIntra4x4 = m_modeCost[3];
2663
2664 curbe->DW8.FixedPoint_Lambda_CU_Mode_for_Luma = m_fixedPointLambdaForLuma;
2665
2666 if (m_hevcPicParams->bEnableRollingIntraRefresh)
2667 {
2668 curbe->DW9.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
2669 curbe->DW9.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
2670 curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
2671 }
2672
2673 curbe->DW10.SimplifiedFlagForInter = 0;
2674 if (m_encodeParams.bReportStatisticsEnabled)
2675 {
2676 curbe->DW10.HaarTransformMode = true;
2677 }
2678 else
2679 {
2680 curbe->DW10.HaarTransformMode = (m_hevcPicParams->CodingType == I_TYPE)? false: true;
2681 }
2682
2683 uint32_t startBTI = 0;
2684 curbe->DW16.BTI_Src_Y = bindingTable->dwBindingTableEntries[startBTI++];
2685 startBTI++; // skip UV BTI
2686 curbe->DW17.BTI_Sad_16x16_PU = bindingTable->dwBindingTableEntries[startBTI++];
2687 curbe->DW18.BTI_PAK_Object = bindingTable->dwBindingTableEntries[startBTI++];
2688 curbe->DW19.BTI_SAD_32x32_PU_mode = bindingTable->dwBindingTableEntries[startBTI++];
2689 curbe->DW20.BTI_VME_Mode_8x8 = bindingTable->dwBindingTableEntries[startBTI++];
2690 curbe->DW21.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++];
2691 curbe->DW22.BTI_VME_Src = bindingTable->dwBindingTableEntries[startBTI++];
2692 curbe->DW23.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++];
2693 curbe->DW24.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++];
2694 curbe->DW25.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++];
2695 curbe->DW26.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++];
2696 curbe->DW27.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
2697
2698 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
2699
2700 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION;
2701 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2702 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
2703
2704 MOS_COMMAND_BUFFER cmdBuffer;
2705 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
2706 &cmdBuffer,
2707 kernelState,
2708 encFunctionType,
2709 nullptr));
2710
2711 //Add surface states
2712 startBTI = 0;
2713
2714 // Source Y and UV:
2715 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2716 kernelState,
2717 &cmdBuffer,
2718 SURFACE_RAW_Y_UV,
2719 &bindingTable->dwBindingTableEntries[startBTI++]));
2720 startBTI++;
2721
2722 // 16x16 PU SAD output
2723 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2724 kernelState,
2725 &cmdBuffer,
2726 SURFACE_16x16PU_SAD,
2727 &bindingTable->dwBindingTableEntries[startBTI++]));
2728
2729 // PAK object output
2730 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2731 kernelState,
2732 &cmdBuffer,
2733 SURFACE_CU_RECORD,
2734 &bindingTable->dwBindingTableEntries[startBTI++]));
2735
2736 // 32x32 PU MD data
2737 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2738 kernelState,
2739 &cmdBuffer,
2740 SURFACE_32x32_PU_OUTPUT,
2741 &bindingTable->dwBindingTableEntries[startBTI++]));
2742
2743 // VME 8x8 mode
2744 m_surfaceParams[SURFACE_VME_8x8].bIsWritable =
2745 m_surfaceParams[SURFACE_VME_8x8].bRenderTarget = true;
2746 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2747 kernelState,
2748 &cmdBuffer,
2749 SURFACE_VME_8x8,
2750 &bindingTable->dwBindingTableEntries[startBTI++]));
2751
2752 // Slice map
2753 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2754 kernelState,
2755 &cmdBuffer,
2756 SURFACE_SLICE_MAP,
2757 &bindingTable->dwBindingTableEntries[startBTI++]));
2758
2759 // Source Y for VME
2760 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2761 kernelState,
2762 &cmdBuffer,
2763 SURFACE_RAW_VME,
2764 &bindingTable->dwBindingTableEntries[startBTI++]));
2765
2766 // BRC Input
2767 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2768 kernelState,
2769 &cmdBuffer,
2770 SURFACE_BRC_INPUT,
2771 &bindingTable->dwBindingTableEntries[startBTI++]));
2772
2773 // Simplest Intra
2774 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2775 kernelState,
2776 &cmdBuffer,
2777 SURFACE_SIMPLIFIED_INTRA,
2778 &bindingTable->dwBindingTableEntries[startBTI++]));
2779
2780 // LCU Qp surface
2781 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2782 kernelState,
2783 &cmdBuffer,
2784 SURFACE_LCU_QP,
2785 &bindingTable->dwBindingTableEntries[startBTI++]));
2786
2787 // BRC data surface
2788 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2789 kernelState,
2790 &cmdBuffer,
2791 SURFACE_BRC_DATA,
2792 &bindingTable->dwBindingTableEntries[startBTI++]));
2793
2794 if (!m_hwWalker)
2795 {
2796 eStatus = MOS_STATUS_UNKNOWN;
2797 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
2798 return eStatus;
2799 }
2800
2801 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
2802 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
2803 walkerCodecParams.WalkerMode = m_walkerMode;
2804 /* looping for Walker is needed at 32x32 block level in OPT case*/
2805 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
2806 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
2807 walkerCodecParams.bNoDependency = true;
2808
2809 MHW_WALKER_PARAMS walkerParams;
2810 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
2811 m_hwInterface,
2812 &walkerParams,
2813 &walkerCodecParams));
2814
2815 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
2816 &cmdBuffer,
2817 &walkerParams));
2818
2819 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
2820 encFunctionType,
2821 kernelState,
2822 &cmdBuffer));
2823
2824 return eStatus;
2825 }
2826
Encode8x8PUKernel()2827 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUKernel()
2828 {
2829 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2830
2831 CODECHAL_ENCODE_FUNCTION_ENTER;
2832
2833 PerfTagSetting perfTag;
2834 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
2835
2836 uint32_t krnIdx = CODECHAL_HEVC_MBENC_8x8PU;
2837 auto kernelState = &m_mbEncKernelStates[krnIdx];
2838 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
2839 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
2840 {
2841 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
2842 }
2843
2844 // Setup DSH
2845 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
2846 m_stateHeapInterface,
2847 kernelState,
2848 false,
2849 0,
2850 false,
2851 m_storeData));
2852
2853 // Setup CURBE
2854 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
2855 CODECHAL_FEI_HEVC_I_8x8_PU_CURBE_G9 cmd, *curbe = &cmd;
2856 MOS_ZeroMemory(curbe, sizeof(*curbe));
2857
2858 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
2859 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
2860 curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
2861 curbe->DW1.PuType = 2; // 8x8
2862 curbe->DW1.DcFilterFlag = true;
2863 curbe->DW1.AngleRefineFlag = true;
2864 curbe->DW1.LCUType = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/;
2865 curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent;
2866 curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
2867 curbe->DW1.EnableDebugDump = false;
2868 curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
2869 curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
2870 curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0);
2871 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
2872 curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
2873 curbe->DW1.QPValue = CalSliceQp();
2874 if (m_hevcPicParams->bEnableRollingIntraRefresh)
2875 {
2876 curbe->DW1.EnableRollingIntra = true;
2877 curbe->DW1.IntraRefreshEn = true;
2878 curbe->DW1.HalfUpdateMixedLCU = 0;
2879
2880 curbe->DW5.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
2881 curbe->DW5.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
2882 curbe->DW5.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
2883
2884 int32_t qp = CalSliceQp();
2885 curbe->DW1.QPValue = (uint32_t)qp;
2886 }
2887
2888 curbe->DW2.LumaLambda = m_fixedPointLambdaForLuma;
2889
2890 curbe->DW3.ChromaLambda = m_fixedPointLambdaForChroma;
2891
2892 if (m_encodeParams.bReportStatisticsEnabled)
2893 {
2894 curbe->DW4.HaarTransformFlag = true;
2895 }
2896 else
2897 {
2898 curbe->DW4.HaarTransformFlag = (m_hevcPicParams->CodingType == I_TYPE) ? false : true;
2899 }
2900 curbe->DW4.SimplifiedFlagForInter = false;
2901
2902 uint32_t startBTI = 0;
2903 curbe->DW8.BTI_Src_Y = bindingTable->dwBindingTableEntries[startBTI++];
2904 startBTI++; // skip one BTI for Y and UV have the same BTI
2905 curbe->DW9.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++];
2906 curbe->DW10.BTI_VME_8x8_Mode = bindingTable->dwBindingTableEntries[startBTI++];
2907 curbe->DW11.BTI_Intra_Mode = bindingTable->dwBindingTableEntries[startBTI++];
2908 curbe->DW12.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++];
2909 curbe->DW13.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++];
2910 curbe->DW14.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++];
2911 curbe->DW15.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++];
2912 curbe->DW16.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
2913
2914 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
2915
2916 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU;
2917 CODECHAL_ENCODE_CHK_STATUS_RETURN(
2918 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
2919
2920 MOS_COMMAND_BUFFER cmdBuffer;
2921 if(m_numMb8x8IntraKernelSplit == 0)
2922 {
2923 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer,
2924 kernelState,
2925 encFunctionType,
2926 nullptr));
2927 }
2928 else
2929 {
2930 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
2931
2932 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
2933 MOS_ZeroMemory(&idParams, sizeof(idParams));
2934 idParams.pKernelState = kernelState;
2935 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
2936 m_stateHeapInterface,
2937 1,
2938 &idParams));
2939
2940 // Add binding table
2941 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
2942 m_stateHeapInterface,
2943 kernelState));
2944 }
2945
2946 //Add surface states
2947 startBTI = 0;
2948
2949 // Source Y and UV
2950 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2951 kernelState,
2952 &cmdBuffer,
2953 SURFACE_RAW_Y_UV,
2954 &bindingTable->dwBindingTableEntries[startBTI++]));
2955 startBTI++;
2956
2957 // Slice Map
2958 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2959 kernelState,
2960 &cmdBuffer,
2961 SURFACE_SLICE_MAP,
2962 &bindingTable->dwBindingTableEntries[startBTI++]));
2963
2964 // VME 8x8 mode
2965 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2966 kernelState,
2967 &cmdBuffer,
2968 SURFACE_VME_8x8,
2969 &bindingTable->dwBindingTableEntries[startBTI++]));
2970
2971 // Intra mode
2972 m_surfaceParams[SURFACE_INTRA_MODE].bIsWritable =
2973 m_surfaceParams[SURFACE_INTRA_MODE].bRenderTarget = true;
2974 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2975 kernelState,
2976 &cmdBuffer,
2977 SURFACE_INTRA_MODE,
2978 &bindingTable->dwBindingTableEntries[startBTI++]));
2979
2980 // BRC Input
2981 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2982 kernelState,
2983 &cmdBuffer,
2984 SURFACE_BRC_INPUT,
2985 &bindingTable->dwBindingTableEntries[startBTI++]));
2986
2987 // Simplest Intra
2988 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2989 kernelState,
2990 &cmdBuffer,
2991 SURFACE_SIMPLIFIED_INTRA,
2992 &bindingTable->dwBindingTableEntries[startBTI++]));
2993
2994 // LCU Qp surface
2995 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
2996 kernelState,
2997 &cmdBuffer,
2998 SURFACE_LCU_QP,
2999 &bindingTable->dwBindingTableEntries[startBTI++]));
3000
3001 // BRC data surface
3002 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3003 kernelState,
3004 &cmdBuffer,
3005 SURFACE_BRC_DATA,
3006 &bindingTable->dwBindingTableEntries[startBTI++]));
3007
3008 if (!m_hwWalker)
3009 {
3010 eStatus = MOS_STATUS_UNKNOWN;
3011 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
3012 return eStatus;
3013 }
3014
3015 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
3016 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
3017 walkerCodecParams.WalkerMode = m_walkerMode;
3018 // each EU is based on one 8x8 block
3019 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) >> 3;
3020 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT) >> 3;
3021 /* Enforce no dependency dispatch order for 8x8 PU kernel */
3022 walkerCodecParams.bNoDependency = true;
3023
3024 if(m_numMb8x8IntraKernelSplit == 0)
3025 {
3026 MHW_WALKER_PARAMS walkerParams;
3027 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
3028 m_hwInterface,
3029 &walkerParams,
3030 &walkerCodecParams));
3031
3032 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
3033 &cmdBuffer,
3034 &walkerParams));
3035 }
3036 else
3037 {
3038 uint32_t numRowPerSplit = (walkerCodecParams.dwResolutionY + m_numMb8x8IntraKernelSplit - 1) / m_numMb8x8IntraKernelSplit;
3039 uint32_t currentNumRow = 0;
3040
3041 for(uint32_t i = 0; i < m_numMb8x8IntraKernelSplit; i++)
3042 {
3043 // Program render engine pipe commands
3044 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
3045 sendKernelCmdsParams.EncFunctionType = encFunctionType;
3046 sendKernelCmdsParams.pKernelState = kernelState;
3047 sendKernelCmdsParams.bEnableCustomScoreBoard= true;
3048 sendKernelCmdsParams.pCustomScoreBoard = &m_walkingPatternParam.ScoreBoard;
3049 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
3050
3051 MHW_WALKER_PARAMS walkerParams;
3052 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
3053 m_hwInterface,
3054 &walkerParams,
3055 &walkerCodecParams));
3056
3057 if(currentNumRow + numRowPerSplit >= walkerCodecParams.dwResolutionY)
3058 {
3059 // the last split may not have the same number of rows as previous splits
3060 numRowPerSplit = walkerCodecParams.dwResolutionY - currentNumRow;
3061 }
3062
3063 walkerParams.LocalStart.y = currentNumRow;
3064 walkerParams.dwLocalLoopExecCount = numRowPerSplit * walkerCodecParams.dwResolutionX;
3065
3066 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
3067 &cmdBuffer,
3068 &walkerParams));
3069
3070 currentNumRow += numRowPerSplit;
3071 if(currentNumRow >= walkerCodecParams.dwResolutionY)
3072 {
3073 break;
3074 }
3075 }
3076 }
3077
3078 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
3079 encFunctionType,
3080 kernelState,
3081 &cmdBuffer));
3082
3083 return eStatus;
3084 }
3085
Encode8x8PUFMODEKernel()3086 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUFMODEKernel()
3087 {
3088 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3089
3090 CODECHAL_ENCODE_FUNCTION_ENTER;
3091
3092 PerfTagSetting perfTag;
3093 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_FMODE);
3094
3095 uint32_t krnIdx = CODECHAL_HEVC_MBENC_8x8FMODE;
3096 auto kernelState = &m_mbEncKernelStates[krnIdx];
3097 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
3098 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
3099 {
3100 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
3101 }
3102
3103 // Setup DSH
3104 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
3105 m_stateHeapInterface,
3106 kernelState,
3107 false,
3108 0,
3109 false,
3110 m_storeData));
3111
3112 // Setup CURBE
3113 int32_t qp = CalSliceQp();
3114 uint32_t sliceQp = (uint32_t)qp;
3115 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
3116
3117 CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9 cmd, *curbe = &cmd;
3118 MOS_ZeroMemory(curbe, sizeof(*curbe));
3119 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
3120 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
3121
3122 curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
3123 curbe->DW1.PuType = 2;
3124 curbe->DW1.PakReordingFlag = (m_hevcPicParams->CodingType == I_TYPE)? true : false;
3125 curbe->DW1.LCUType = (log2MaxCUSize == 6)? 0 /*64x64*/: 1 /*32x32*/;
3126 curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent;
3127 curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
3128 curbe->DW1.EnableDebugDump = false;
3129 curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
3130 curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
3131 curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0);
3132 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
3133 curbe->DW1.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh;
3134 curbe->DW1.IntraRefreshEn = m_hevcPicParams->bEnableRollingIntraRefresh;
3135 curbe->DW1.HalfUpdateMixedLCU = 0;
3136 curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
3137 curbe->DW2.LambdaForLuma = m_fixedPointLambdaForLuma;
3138
3139 if (m_hevcPicParams->CodingType != I_TYPE ||
3140 m_encodeParams.bReportStatisticsEnabled)
3141 {
3142 float hadBias = 2.0f;
3143
3144 double lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp];
3145 lambdaMd = lambdaMd * hadBias;
3146 curbe->DW3.LambdaForDistCalculation = (uint32_t)(lambdaMd*(1<<10));
3147 }
3148 curbe->DW4.ModeCostFor8x8PU_TU8 = 0;
3149 curbe->DW5.ModeCostFor8x8PU_TU4 = 0;
3150 curbe->DW6.SATD16x16PuThreshold = MOS_MAX(200 * ((int32_t)sliceQp - 12), 0);
3151 curbe->DW6.BiasFactorToward8x8 = (m_hevcPicParams->bScreenContent) ? 1024 : 1126+102;
3152 curbe->DW7.Qp = sliceQp;
3153 curbe->DW7.QpForInter = 0;
3154 curbe->DW8.SimplifiedFlagForInter = false;
3155 curbe->DW8.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled;
3156 // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
3157 curbe->DW8.KBLControlFlag = UsePlatformControlFlag();
3158 curbe->DW9.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
3159 curbe->DW9.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
3160 curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
3161
3162 uint32_t startBTI = 0;
3163 curbe->DW16.BTI_PAK_Object = bindingTable->dwBindingTableEntries[startBTI++];
3164 curbe->DW17.BTI_VME_8x8_Mode = bindingTable->dwBindingTableEntries[startBTI++];
3165 curbe->DW18.BTI_Intra_Mode = bindingTable->dwBindingTableEntries[startBTI++];
3166 curbe->DW19.BTI_PAK_Command = bindingTable->dwBindingTableEntries[startBTI++];
3167 curbe->DW20.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++];
3168 curbe->DW21.BTI_IntraDist = bindingTable->dwBindingTableEntries[startBTI++];
3169 curbe->DW22.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++];
3170 curbe->DW23.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++];
3171 curbe->DW24.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++];
3172 curbe->DW25.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++];
3173 curbe->DW26.BTI_Haar_Dist16x16 = bindingTable->dwBindingTableEntries[startBTI++];
3174 curbe->DW27.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
3175 curbe->DW28.BTI_Frame_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
3176 curbe->DW29.BTI_CTB_Distortion_Surface = 0;
3177 startBTI++;
3178 curbe->DW30.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
3179
3180 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
3181
3182 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU_FMODE;
3183 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3184 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
3185
3186 MOS_COMMAND_BUFFER cmdBuffer;
3187 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
3188 &cmdBuffer,
3189 kernelState,
3190 encFunctionType,
3191 nullptr));
3192
3193 //Add surface states
3194 startBTI = 0;
3195
3196 // PAK object
3197 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3198 kernelState,
3199 &cmdBuffer,
3200 SURFACE_CU_RECORD,
3201 &bindingTable->dwBindingTableEntries[startBTI++]));
3202
3203 // VME 8x8 mode
3204 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3205 kernelState,
3206 &cmdBuffer,
3207 SURFACE_VME_8x8,
3208 &bindingTable->dwBindingTableEntries[startBTI++]));
3209
3210 // Intra mode
3211 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3212 kernelState,
3213 &cmdBuffer,
3214 SURFACE_INTRA_MODE,
3215 &bindingTable->dwBindingTableEntries[startBTI++]));
3216
3217 // PAK command
3218 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3219 kernelState,
3220 &cmdBuffer,
3221 SURFACE_HCP_PAK,
3222 &bindingTable->dwBindingTableEntries[startBTI++]));
3223
3224 // Slice Map
3225 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3226 kernelState,
3227 &cmdBuffer,
3228 SURFACE_SLICE_MAP,
3229 &bindingTable->dwBindingTableEntries[startBTI++]));
3230
3231 // Intra dist
3232 m_surfaceParams[SURFACE_INTRA_DIST].bIsWritable =
3233 m_surfaceParams[SURFACE_INTRA_DIST].bRenderTarget = true;
3234 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3235 kernelState,
3236 &cmdBuffer,
3237 SURFACE_INTRA_DIST,
3238 &bindingTable->dwBindingTableEntries[startBTI++]));
3239
3240 // BRC Input
3241 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3242 kernelState,
3243 &cmdBuffer,
3244 SURFACE_BRC_INPUT,
3245 &bindingTable->dwBindingTableEntries[startBTI++]));
3246
3247 // Simplest Intra
3248 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3249 kernelState,
3250 &cmdBuffer,
3251 SURFACE_SIMPLIFIED_INTRA,
3252 &bindingTable->dwBindingTableEntries[startBTI++]));
3253
3254 // LCU Qp surface
3255 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3256 kernelState,
3257 &cmdBuffer,
3258 SURFACE_LCU_QP,
3259 &bindingTable->dwBindingTableEntries[startBTI++]));
3260
3261 // BRC data surface
3262 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3263 kernelState,
3264 &cmdBuffer,
3265 SURFACE_BRC_DATA,
3266 &bindingTable->dwBindingTableEntries[startBTI++]));
3267
3268 // skip haar distortion surface, statstics data dump surface
3269 // and frame level statstics data surface because they are not used
3270
3271 if (!m_hwWalker)
3272 {
3273 eStatus = MOS_STATUS_UNKNOWN;
3274 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
3275 return eStatus;
3276 }
3277
3278 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
3279 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
3280 walkerCodecParams.WalkerMode = m_walkerMode;
3281 // each EU is based on one LCU
3282 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, (1<<log2MaxCUSize)) >> log2MaxCUSize;
3283 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, (1<<log2MaxCUSize)) >> log2MaxCUSize;
3284 /* Enforce no dependency dispatch order for 8x8 PU FMODE kernel */
3285 walkerCodecParams.bNoDependency = true;
3286
3287 MHW_WALKER_PARAMS walkerParams;
3288 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
3289 m_hwInterface,
3290 &walkerParams,
3291 &walkerCodecParams));
3292
3293 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
3294 &cmdBuffer,
3295 &walkerParams));
3296
3297 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
3298 encFunctionType,
3299 kernelState,
3300 &cmdBuffer));
3301
3302 return eStatus;
3303 }
3304
Encode32X32BIntraCheckKernel()3305 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32X32BIntraCheckKernel()
3306 {
3307 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3308
3309 CODECHAL_ENCODE_FUNCTION_ENTER;
3310
3311 PerfTagSetting perfTag;
3312 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_B_IC);
3313
3314 uint32_t krnIdx = CODECHAL_HEVC_MBENC_32x32INTRACHECK;
3315 auto kernelState = &m_mbEncKernelStates[krnIdx];
3316 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
3317
3318 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
3319 {
3320 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
3321 }
3322
3323 // Setup DSH
3324 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
3325 m_stateHeapInterface,
3326 kernelState,
3327 false,
3328 0,
3329 false,
3330 m_storeData));
3331
3332 // Setup CURBE
3333 if (m_pictureCodingType == P_TYPE)
3334 {
3335 CalcLambda(CODECHAL_ENCODE_HEVC_P_SLICE, INTRA_TRANSFORM_HAAR);
3336 }
3337 else
3338 {
3339 CalcLambda(CODECHAL_ENCODE_HEVC_B_SLICE, INTRA_TRANSFORM_HAAR);
3340 }
3341 int32_t sliceQp = CalSliceQp();
3342
3343 double lambdaScalingFactor = 1.0;
3344 double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
3345 double squaredQpLambda = qpLambda * qpLambda;
3346 m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
3347
3348 CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 cmd, *curbe = &cmd;
3349 MOS_ZeroMemory(curbe, sizeof(*curbe));
3350 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
3351 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
3352
3353 curbe->DW1.EnableDebugDump = false;
3354 curbe->DW1.EnableIntraEarlyExit = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1;
3355 curbe->DW1.Flags = 0;
3356 curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
3357 curbe->DW1.SliceType = m_hevcSliceParams->slice_type;
3358 curbe->DW1.HMEEnable = 0;
3359 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
3360
3361 curbe->DW2.QpMultiplier = 100;
3362 curbe->DW2.QpValue = 0; // MBZ
3363
3364 uint32_t startIndex = 0;
3365 curbe->DW8.BTI_Per32x32PuIntraCheck = bindingTable->dwBindingTableEntries[startIndex++];
3366 curbe->DW9.BTI_Src_Y = bindingTable->dwBindingTableEntries[startIndex++];
3367 startIndex++; // skip one BTI for Y and UV have the same BTI
3368 curbe->DW10.BTI_Src_Y2X = bindingTable->dwBindingTableEntries[startIndex++];
3369 curbe->DW11.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startIndex++];
3370 curbe->DW12.BTI_VME_Y2X = bindingTable->dwBindingTableEntries[startIndex++];
3371 curbe->DW13.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startIndex++];
3372 curbe->DW14.BTI_HME_MVPred = bindingTable->dwBindingTableEntries[startIndex++];
3373 curbe->DW15.BTI_HME_Dist = bindingTable->dwBindingTableEntries[startIndex++];
3374 curbe->DW16.BTI_LCU_Skip = bindingTable->dwBindingTableEntries[startIndex++];
3375 curbe->DW17.BTI_Debug = bindingTable->dwBindingTableEntries[startIndex++];
3376
3377 CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries);
3378
3379 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_B_INTRA_CHECK;
3380 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3381 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
3382
3383 MOS_COMMAND_BUFFER cmdBuffer;
3384 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
3385 &cmdBuffer,
3386 kernelState,
3387 encFunctionType,
3388 nullptr));
3389
3390 //Add surface states
3391 startIndex = 0;
3392
3393 // 32x32 PU B Intra Check Output
3394 m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bIsWritable =
3395 m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bRenderTarget = true;
3396 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3397 kernelState,
3398 &cmdBuffer,
3399 SURFACE_32x32_PU_OUTPUT,
3400 &bindingTable->dwBindingTableEntries[startIndex++]));
3401
3402 // Source Y and UV
3403 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3404 kernelState,
3405 &cmdBuffer,
3406 SURFACE_RAW_Y_UV,
3407 &bindingTable->dwBindingTableEntries[startIndex++]));
3408 startIndex++;
3409
3410 // Source Y2x
3411 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3412 kernelState,
3413 &cmdBuffer,
3414 SURFACE_Y_2X,
3415 &bindingTable->dwBindingTableEntries[startIndex++]));
3416
3417 // Slice map
3418 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3419 kernelState,
3420 &cmdBuffer,
3421 SURFACE_SLICE_MAP,
3422 &bindingTable->dwBindingTableEntries[startIndex++]));
3423
3424 // Source Y2x for VME
3425 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3426 kernelState,
3427 &cmdBuffer,
3428 SURFACE_Y_2X_VME,
3429 &bindingTable->dwBindingTableEntries[startIndex++]));
3430
3431 // Simplest Intra
3432 m_surfaceParams[SURFACE_SIMPLIFIED_INTRA].bIsWritable =
3433 m_surfaceParams[SURFACE_SIMPLIFIED_INTRA].bRenderTarget = true;
3434 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3435 kernelState,
3436 &cmdBuffer,
3437 SURFACE_SIMPLIFIED_INTRA,
3438 &bindingTable->dwBindingTableEntries[startIndex++]));
3439
3440 // skip SURFACE_HME_MVP and SURFACE_HME_DIST from HME since FEI alsways disables HME
3441 startIndex += 2;
3442
3443 // LCU Qp/Skip surface
3444 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3445 kernelState,
3446 &cmdBuffer,
3447 SURFACE_LCU_QP,
3448 &bindingTable->dwBindingTableEntries[startIndex++]));
3449
3450 if (!m_hwWalker)
3451 {
3452 eStatus = MOS_STATUS_UNKNOWN;
3453 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
3454 return eStatus;
3455 }
3456
3457 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
3458 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
3459 walkerCodecParams.WalkerMode = m_walkerMode;
3460 /* looping for Walker is needed at 8x8 block level */
3461 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
3462 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
3463 /* Enforce no dependency dispatch order for 32x32 B Intra Check kernel */
3464 walkerCodecParams.bNoDependency = true;
3465
3466 MHW_WALKER_PARAMS walkerParams;
3467 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
3468 m_hwInterface,
3469 &walkerParams,
3470 &walkerCodecParams));
3471
3472 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
3473 &cmdBuffer,
3474 &walkerParams));
3475
3476 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
3477 encFunctionType,
3478 kernelState,
3479 &cmdBuffer));
3480
3481 return eStatus;
3482 }
3483
Encode8x8BPakKernel(PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 pEncBCurbe)3484 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8BPakKernel(
3485 PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 pEncBCurbe)
3486 {
3487 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3488
3489 CODECHAL_ENCODE_FUNCTION_ENTER;
3490
3491 CODECHAL_ENCODE_CHK_NULL_RETURN(pEncBCurbe);
3492
3493 PerfTagSetting perfTag;
3494 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL);
3495
3496 uint32_t krnIdx = CODECHAL_HEVC_FEI_MBENC_BPAK;
3497 auto kernelState = &m_mbEncKernelStates[krnIdx];
3498 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
3499 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
3500 {
3501 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
3502 }
3503
3504 //Setup DSH
3505 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
3506 m_stateHeapInterface,
3507 kernelState,
3508 false,
3509 0,
3510 false,
3511 m_storeData));
3512
3513 //Setup CURBE
3514 CODECHAL_FEI_HEVC_B_PAK_CURBE_G9 cmd, *curbe = &cmd;
3515 MOS_ZeroMemory(curbe, sizeof(*curbe));
3516 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
3517 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
3518
3519 curbe->DW1.MaxVmvR = pEncBCurbe->DW44.MaxVmvR;
3520 curbe->DW1.Qp = pEncBCurbe->DW13.QpPrimeY;
3521 curbe->DW2.BrcEnable = pEncBCurbe->DW36.BRCEnable;
3522 curbe->DW2.LcuBrcEnable = pEncBCurbe->DW36.LCUBRCEnable;
3523 curbe->DW2.ScreenContent = pEncBCurbe->DW47.ScreenContentFlag;
3524 curbe->DW2.SimplestIntraEnable = pEncBCurbe->DW47.SkipIntraKrnFlag;
3525 curbe->DW2.SliceType = pEncBCurbe->DW4.SliceType;
3526 curbe->DW2.EnableWA = 0;
3527 curbe->DW2.ROIEnable = (m_hevcPicParams->NumROI > 0);
3528 curbe->DW2.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
3529 // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
3530 curbe->DW2.KBLControlFlag = UsePlatformControlFlag();
3531 curbe->DW2.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh;
3532 curbe->DW2.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
3533 curbe->DW3.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
3534 curbe->DW3.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
3535 curbe->DW3.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
3536
3537 uint32_t startBTI = 0;
3538 curbe->DW16.BTI_CU_Record = bindingTable->dwBindingTableEntries[startBTI++];
3539 curbe->DW17.BTI_PAK_Obj = bindingTable->dwBindingTableEntries[startBTI++];
3540 curbe->DW18.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++];
3541 curbe->DW19.BTI_Brc_Input = bindingTable->dwBindingTableEntries[startBTI++];
3542 curbe->DW20.BTI_LCU_Qp = bindingTable->dwBindingTableEntries[startBTI++];
3543 curbe->DW21.BTI_Brc_Data = bindingTable->dwBindingTableEntries[startBTI++];
3544 curbe->DW22.BTI_MB_Data = bindingTable->dwBindingTableEntries[startBTI++];
3545 curbe->DW23.BTI_MVP_Surface = bindingTable->dwBindingTableEntries[startBTI++];
3546 curbe->DW24.BTI_WA_PAK_Data = bindingTable->dwBindingTableEntries[startBTI++];
3547 curbe->DW25.BTI_WA_PAK_Obj = bindingTable->dwBindingTableEntries[startBTI++];
3548 curbe->DW26.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
3549
3550 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
3551
3552 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_PAK;
3553 CODECHAL_ENCODE_CHK_STATUS_RETURN(
3554 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
3555
3556 MOS_COMMAND_BUFFER cmdBuffer;
3557 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(
3558 &cmdBuffer,
3559 kernelState,
3560 encFunctionType,
3561 nullptr));
3562
3563 //Add surface states
3564 startBTI = 0;
3565 //0: CU record
3566 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3567 kernelState,
3568 &cmdBuffer,
3569 SURFACE_CU_RECORD,
3570 &bindingTable->dwBindingTableEntries[startBTI++]));
3571
3572 //1: PAK command
3573 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3574 kernelState,
3575 &cmdBuffer,
3576 SURFACE_HCP_PAK,
3577 &bindingTable->dwBindingTableEntries[startBTI++]));
3578
3579 //2: slice map
3580 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3581 kernelState,
3582 &cmdBuffer,
3583 SURFACE_SLICE_MAP,
3584 &bindingTable->dwBindingTableEntries[startBTI++]));
3585
3586 // 3: BRC Input
3587 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3588 kernelState,
3589 &cmdBuffer,
3590 SURFACE_BRC_INPUT,
3591 &bindingTable->dwBindingTableEntries[startBTI++]));
3592
3593 // 4: LCU Qp
3594 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3595 kernelState,
3596 &cmdBuffer,
3597 SURFACE_LCU_QP,
3598 &bindingTable->dwBindingTableEntries[startBTI++]));
3599
3600 // 5: LCU BRC constant
3601 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3602 kernelState,
3603 &cmdBuffer,
3604 SURFACE_BRC_DATA,
3605 &bindingTable->dwBindingTableEntries[startBTI++]));
3606
3607 // 6: MV index buffer or MB data
3608 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3609 kernelState,
3610 &cmdBuffer,
3611 SURFACE_MB_MV_INDEX,
3612 &bindingTable->dwBindingTableEntries[startBTI++]));
3613
3614 // 7: MVP index buffer
3615 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
3616 kernelState,
3617 &cmdBuffer,
3618 SURFACE_MVP_INDEX,
3619 &bindingTable->dwBindingTableEntries[startBTI++]));
3620
3621 // skip 8 and 9 for SURFACE_WA_CU_RECORD and SURFACE_WA_HCP_PAK
3622
3623 if (!m_hwWalker)
3624 {
3625 eStatus = MOS_STATUS_UNKNOWN;
3626 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
3627 return eStatus;
3628 }
3629
3630 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
3631 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
3632 walkerCodecParams.WalkerMode = m_walkerMode;
3633 /* looping for Walker is needed at 8x8 block level */
3634 walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
3635 walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
3636 /* Enforce no dependency dispatch order for 32x32 B Intra Check kernel */
3637 walkerCodecParams.bNoDependency = true;
3638 walkerCodecParams.wPictureCodingType = m_pictureCodingType;
3639 walkerCodecParams.bUseScoreboard = m_useHwScoreboard;
3640
3641 MHW_WALKER_PARAMS walkerParams;
3642 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
3643 m_hwInterface,
3644 &walkerParams,
3645 &walkerCodecParams));
3646
3647 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
3648 &cmdBuffer,
3649 &walkerParams));
3650
3651 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
3652 encFunctionType,
3653 kernelState,
3654 &cmdBuffer));
3655
3656 return eStatus;
3657 }
3658
Encode8x8PBMbEncKernel()3659 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PBMbEncKernel()
3660 {
3661 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3662
3663 CODECHAL_ENCODE_FUNCTION_ENTER;
3664
3665 PerfTagSetting perfTag;
3666 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
3667
3668 uint32_t krnIdx = CODECHAL_HEVC_FEI_MBENC_BENC;
3669 if (m_pictureCodingType == P_TYPE)
3670 {
3671 //krnIdx = m_hevcPicParams->bEnableRollingIntraRefresh ? CODECHAL_HEVC_FEI_MBENC_ADV_P : CODECHAL_HEVC_FEI_MBENC_PENC;
3672 krnIdx = CODECHAL_HEVC_FEI_MBENC_PENC;
3673 }
3674 else if (m_pictureCodingType == B_TYPE)
3675 {
3676 // In TU7, we still need the original ENC B kernel to process the I frame
3677 //krnIdx = m_hevcPicParams->bEnableRollingIntraRefresh ? CODECHAL_HEVC_FEI_MBENC_ADV : CODECHAL_HEVC_FEI_MBENC_BENC;
3678 krnIdx = CODECHAL_HEVC_FEI_MBENC_BENC;
3679 }
3680
3681 auto kernelState = &m_mbEncKernelStates[krnIdx];
3682 auto bindingTable = &m_mbEncKernelBindingTable[krnIdx];
3683 if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
3684 {
3685 CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState));
3686 }
3687
3688 int32_t sliceQp = CalSliceQp();
3689 uint8_t sliceType = PicCodingTypeToSliceType(m_pictureCodingType);
3690
3691 if (m_feiPicParams->FastIntraMode)
3692 {
3693 // When TU=7, lambda is not computed in the 32x32 MD stage for it is skipped.
3694 CalcLambda(sliceType, INTRA_TRANSFORM_HAAR);
3695 }
3696 LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_REGULAR);
3697
3698 uint8_t mbCodeIdxForTempMVP = 0xFF;
3699 if(m_pictureCodingType != I_TYPE)
3700 {
3701 if(m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
3702 {
3703 uint8_t FrameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
3704
3705 mbCodeIdxForTempMVP = m_refList[FrameIdx]->ucScalingIdx;
3706 }
3707
3708 if(mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
3709 {
3710 // Temporal reference MV index is invalid and so disable the temporal MVP
3711 CODECHAL_ENCODE_ASSERT(false);
3712 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
3713 }
3714 }
3715
3716 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateWalkingControlRegion());
3717
3718 //Setup DSH
3719 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
3720 m_stateHeapInterface,
3721 kernelState,
3722 false,
3723 0,
3724 false,
3725 m_storeData));
3726
3727 //Setup CURBE
3728 uint8_t forwardTransformThd[7] = { 0 };
3729 CalcForwardCoeffThd(forwardTransformThd, sliceQp);
3730
3731 uint32_t curbeSize = 0;
3732 void *defaultCurbe = (void *)GetDefaultCurbeEncBKernel(curbeSize);
3733 CODECHAL_ENCODE_ASSERT(defaultCurbe);
3734
3735 CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 cmd, *curbe = &cmd;
3736 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(curbe, sizeof(cmd), defaultCurbe, curbeSize));
3737
3738 bool transform_8x8_mode_flag = true;
3739 uint32_t SearchPath = (m_feiPicParams->SearchWindow == 5) ? 2 : 1; // 2 means full search, 1 means diamand search
3740 uint32_t LenSP = m_feiPicParams->LenSP;
3741 uint32_t RefWidth = m_feiPicParams->RefWidth;
3742 uint32_t RefHeight = m_feiPicParams->RefHeight;
3743
3744 switch (m_feiPicParams->SearchWindow)
3745 {
3746 case 0:
3747 // not use predefined search window
3748 if((m_feiPicParams->SearchPath != 0) && (m_feiPicParams->SearchPath != 1) && (m_feiPicParams->SearchPath != 2))
3749 {
3750 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input SearchPath for SearchWindow=0 case!!!.");
3751 eStatus = MOS_STATUS_INVALID_PARAMETER;
3752 return eStatus;
3753 }
3754 SearchPath = m_feiPicParams->SearchPath;
3755 if(((RefWidth * RefHeight) > 2048) || (RefWidth > 64) || (RefHeight > 64))
3756 {
3757 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input RefWidth/RefHeight size for SearchWindow=0 case!!!.");
3758 eStatus = MOS_STATUS_INVALID_PARAMETER;
3759 return eStatus;
3760 }
3761 break;
3762 case 1:
3763 // Tiny SUs 24x24 window
3764 RefWidth = 24;
3765 RefHeight = 24;
3766 LenSP = 4;
3767 break;
3768 case 2:
3769 // Small SUs 28x28 window
3770 RefWidth = 28;
3771 RefHeight = 28;
3772 LenSP = 9;
3773 break;
3774 case 3:
3775 // Diamond SUs 48x40 window
3776 RefWidth = 48;
3777 RefHeight = 40;
3778 LenSP = 16;
3779 break;
3780 case 4:
3781 // Large Diamond SUs 48x40 window
3782 RefWidth = 48;
3783 RefHeight = 40;
3784 LenSP = 32;
3785 break;
3786 case 5:
3787 // Exhaustive SUs 48x40 window
3788 RefWidth = 48;
3789 RefHeight = 40;
3790 LenSP = 48;
3791 if (m_hevcSeqParams->TargetUsage != 7)
3792 {
3793 if (m_pictureCodingType == B_TYPE)
3794 {
3795 LenSP = 48;
3796 } else {
3797 LenSP = 57;
3798 }
3799 } else {
3800 LenSP = 25;
3801 }
3802 break;
3803 default:
3804 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC SearchWindow value for HEVC FEI on SKL!!!.");
3805 eStatus = MOS_STATUS_INVALID_PARAMETER;
3806 return eStatus;
3807 }
3808
3809 if((m_pictureCodingType == B_TYPE) && (curbe->DW3.BMEDisableFBR == 0))
3810 {
3811 if(RefWidth > 32)
3812 {
3813 RefWidth = 32;
3814 }
3815 if(RefHeight > 32)
3816 {
3817 RefHeight = 32;
3818 }
3819 }
3820
3821 curbe->DW0.AdaptiveEn = m_feiPicParams->AdaptiveSearch;
3822 curbe->DW0.T8x8FlagForInterEn = transform_8x8_mode_flag;
3823 curbe->DW2.PicWidth = m_picWidthInMb;
3824 curbe->DW2.LenSP = LenSP;
3825 curbe->DW3.SrcAccess = curbe->DW3.RefAccess = 0;
3826 if (m_feiPicParams->FastIntraMode)
3827 {
3828 curbe->DW3.FTEnable = (m_ftqBasedSkip[0x07] >> 1) & 0x01;
3829 }
3830 else
3831 {
3832 curbe->DW3.FTEnable = (m_ftqBasedSkip[0x04] >> 1) & 0x01;
3833 }
3834 curbe->DW3.SubPelMode = m_feiPicParams->SubPelMode;
3835
3836 curbe->DW4.PicHeightMinus1 = m_picHeightInMb - 1;
3837 curbe->DW4.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled;
3838 curbe->DW4.HMEEnable = 0;
3839 curbe->DW4.SliceType = sliceType;
3840 curbe->DW4.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
3841 curbe->DW4.UseActualRefQPValue = false;
3842
3843 curbe->DW5.RefWidth = RefWidth;
3844 curbe->DW5.RefHeight = RefHeight;
3845
3846 curbe->DW7.IntraPartMask = 0x3;
3847
3848 curbe->DW6.FrameWidth = m_picWidthInMb * CODECHAL_MACROBLOCK_WIDTH;
3849 curbe->DW6.FrameHeight = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
3850
3851 curbe->DW8.Mode0Cost = m_modeCost[0];
3852 curbe->DW8.Mode1Cost = m_modeCost[1];
3853 curbe->DW8.Mode2Cost = m_modeCost[2];
3854 curbe->DW8.Mode3Cost = m_modeCost[3];
3855
3856 curbe->DW9.Mode4Cost = m_modeCost[4];
3857 curbe->DW9.Mode5Cost = m_modeCost[5];
3858 curbe->DW9.Mode6Cost = m_modeCost[6];
3859 curbe->DW9.Mode7Cost = m_modeCost[7];
3860
3861 curbe->DW10.Mode8Cost= m_modeCost[8];
3862 curbe->DW10.Mode9Cost= m_modeCost[9];
3863 curbe->DW10.RefIDCost = m_modeCost[10];
3864 curbe->DW10.ChromaIntraModeCost = m_modeCost[11];
3865
3866 curbe->DW11.MV0Cost = m_mvCost[0];
3867 curbe->DW11.MV1Cost = m_mvCost[1];
3868 curbe->DW11.MV2Cost = m_mvCost[2];
3869 curbe->DW11.MV3Cost = m_mvCost[3];
3870
3871 curbe->DW12.MV4Cost = m_mvCost[4];
3872 curbe->DW12.MV5Cost = m_mvCost[5];
3873 curbe->DW12.MV6Cost = m_mvCost[6];
3874 curbe->DW12.MV7Cost = m_mvCost[7];
3875
3876 curbe->DW13.QpPrimeY = sliceQp;
3877 uint8_t bitDepthChromaMinus8 = 0; // support 4:2:0 only
3878 int32_t qpBdOffsetC = 6 * bitDepthChromaMinus8;
3879 int32_t qPi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cb_qp_offset));
3880 int32_t QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30];
3881 curbe->DW13.QpPrimeCb= QPc + qpBdOffsetC;
3882 qPi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cr_qp_offset));
3883 QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30];
3884 curbe->DW13.QpPrimeCr= QPc;
3885
3886 curbe->DW14.SICFwdTransCoeffThreshold_0 = forwardTransformThd[0];
3887 curbe->DW14.SICFwdTransCoeffThreshold_1 = forwardTransformThd[1];
3888 curbe->DW14.SICFwdTransCoeffThreshold_2 = forwardTransformThd[2];
3889
3890 curbe->DW15.SICFwdTransCoeffThreshold_3 = forwardTransformThd[3];
3891 curbe->DW15.SICFwdTransCoeffThreshold_4 = forwardTransformThd[4];
3892 curbe->DW15.SICFwdTransCoeffThreshold_5 = forwardTransformThd[5];
3893 curbe->DW15.SICFwdTransCoeffThreshold_6 = forwardTransformThd[6];
3894
3895 if (SearchPath == 1)
3896 {
3897 // diamond search
3898 if (m_pictureCodingType == P_TYPE)
3899 {
3900 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7PCurbeInit[16]), 14 * sizeof(uint32_t)));
3901 }
3902 else if (m_pictureCodingType == B_TYPE)
3903 {
3904 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7BCurbeInit[16]), 14 * sizeof(uint32_t)));
3905 }
3906 }
3907 else if((SearchPath != 0) && (SearchPath != 2))
3908 {
3909 // default 0 and 2 are full sparil search
3910 CODECHAL_ENCODE_ASSERT(false);
3911 }
3912
3913 curbe->DW32.SkipVal = m_skipValB[curbe->DW3.BlockBasedSkipEnable][transform_8x8_mode_flag][sliceQp];
3914
3915 if(m_pictureCodingType == I_TYPE)
3916 {
3917 *(float*)&(curbe->DW34.LambdaME) = 0.0;
3918 }
3919 else if (m_pictureCodingType == P_TYPE)
3920 {
3921 *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_P_SLICE][sliceQp];
3922 }
3923 else
3924 {
3925 *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp];
3926 }
3927
3928 curbe->DW35.ModeCostSp = m_modeCostSp;
3929 curbe->DW35.SimpIntraInterThreshold = m_simplestIntraInterThreshold;
3930
3931 curbe->DW36.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
3932 curbe->DW36.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
3933 curbe->DW36.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
3934 curbe->DW36.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
3935 curbe->DW36.PowerSaving = m_powerSavingEnabled;
3936 curbe->DW36.ROIEnable = (m_hevcPicParams->NumROI > 0);
3937 curbe->DW36.FASTSurveillanceFlag= (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
3938
3939 if(m_pictureCodingType != I_TYPE)
3940 {
3941 curbe->DW37.ActualQpRefID0List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_0);
3942 curbe->DW37.ActualQpRefID1List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_1);
3943 curbe->DW37.ActualQpRefID2List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_2);
3944 curbe->DW37.ActualQpRefID3List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_3);
3945 curbe->DW41.TextureIntraCostThreshold = 500;
3946
3947 if(m_pictureCodingType == B_TYPE) {
3948 curbe->DW39.ActualQpRefID0List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_0);
3949 curbe->DW39.ActualQpRefID1List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_1);
3950 float lambda_me = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp];
3951 if (m_encodeParams.bQualityImprovementEnable)
3952 {
3953 curbe->DW40.TransformThreshold0 = (uint16_t) (lambda_me * 56.25 + 0.5);
3954 curbe->DW40.TransformThreshold1 = (uint16_t) (lambda_me * 21 + 0.5);
3955 curbe->DW41.TransformThreshold2 = (uint16_t) (lambda_me * 9 + 0.5);
3956 }
3957 }
3958 }
3959
3960 curbe->DW42.NumMVPredictorsL0 = m_feiPicParams->NumMVPredictorsL0;
3961 curbe->DW42.NumMVPredictorsL1 = m_feiPicParams->NumMVPredictorsL1;
3962 curbe->DW42.PerLCUQP = m_encodeParams.bMbQpDataEnabled;
3963 curbe->DW42.PerCTBInput = m_feiPicParams->bPerCTBInput;
3964 curbe->DW42.CTBDistortionOutput = m_feiPicParams->bDistortionEnable;
3965 curbe->DW42.MultiPredL0 = m_feiPicParams->MultiPredL0;
3966 curbe->DW42.MultiPredL1 = m_feiPicParams->MultiPredL1;
3967 curbe->DW42.MVPredictorBlockSize = m_feiPicParams->MVPredictorInput;
3968
3969 curbe->DW44.MaxVmvR = 511 * 4;
3970 curbe->DW44.MaxNumMergeCandidates = m_hevcSliceParams->MaxNumMergeCand;
3971
3972 if(m_pictureCodingType != I_TYPE)
3973 {
3974 curbe->DW44.MaxNumRefList0 = curbe->DW36.NumRefIdxL0MinusOne + 1;
3975
3976 curbe->DW45.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
3977 curbe->DW45.HMECombineLenPslice = 8;
3978 if(m_pictureCodingType == B_TYPE)
3979 {
3980 curbe->DW44.MaxNumRefList1 = curbe->DW36.NumRefIdxL1MinusOne + 1;
3981 curbe->DW45.HMECombineLenBslice = 8;
3982 }
3983 }
3984
3985 curbe->DW45.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
3986
3987 curbe->DW46.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
3988 curbe->DW46.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
3989 curbe->DW46.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
3990 curbe->DW46.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
3991
3992 curbe->DW47.NumRegionsInSlice = m_numRegionsInSlice;
3993 curbe->DW47.TypeOfWalkingPattern = m_enable26WalkingPattern;
3994 curbe->DW47.ChromaFlatnessCheckFlag= (m_feiPicParams->FastIntraMode) ? 0 : 1;
3995 curbe->DW47.EnableIntraEarlyExit = (m_feiPicParams->FastIntraMode) ? 0 : 1;
3996 curbe->DW47.SkipIntraKrnFlag = (m_feiPicParams->FastIntraMode) ? 1 : 0;
3997 curbe->DW47.CollocatedFromL0Flag = m_hevcSliceParams->collocated_from_l0_flag;
3998 curbe->DW47.IsLowDelay = m_lowDelay;
3999 curbe->DW47.ScreenContentFlag = m_hevcPicParams->bScreenContent;
4000 curbe->DW47.MultiSliceFlag = (m_numSlices > 1);
4001 curbe->DW47.ArbitarySliceFlag = m_arbitraryNumMbsInSlice;
4002 curbe->DW47.NumRegionMinus1 = m_walkingPatternParam.dwNumRegion - 1;
4003
4004 if(m_pictureCodingType != I_TYPE)
4005 {
4006 curbe->DW48.CurrentTdL0_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][0]);
4007 curbe->DW48.CurrentTdL0_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][1]);
4008 curbe->DW49.CurrentTdL0_2 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][2]);
4009 curbe->DW49.CurrentTdL0_3 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][3]);
4010 if(m_pictureCodingType == B_TYPE) {
4011 curbe->DW50.CurrentTdL1_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][0]);
4012 curbe->DW50.CurrentTdL1_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][1]);
4013 }
4014 }
4015
4016 curbe->DW52.NumofUnitInRegion = m_walkingPatternParam.dwNumUnitsInRegion;
4017 curbe->DW52.MaxHeightInRegion = m_walkingPatternParam.dwMaxHeightInRegion;
4018
4019 uint32_t startBTI = 0;
4020 curbe->DW56.BTI_CU_Record = bindingTable->dwBindingTableEntries[startBTI++];
4021 curbe->DW57.BTI_PAK_Cmd = bindingTable->dwBindingTableEntries[startBTI++];
4022 curbe->DW58.BTI_Src_Y = bindingTable->dwBindingTableEntries[startBTI++];
4023 startBTI++; //skip UV index
4024 curbe->DW59.BTI_Intra_Dist = bindingTable->dwBindingTableEntries[startBTI++];
4025 curbe->DW60.BTI_Min_Dist = bindingTable->dwBindingTableEntries[startBTI++];
4026 curbe->DW61.BTI_HMEMVPredFwdBwdSurfIndex = bindingTable->dwBindingTableEntries[startBTI++];
4027 curbe->DW62.BTI_HMEDistSurfIndex = bindingTable->dwBindingTableEntries[startBTI++];
4028 curbe->DW63.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++];
4029 curbe->DW64.BTI_VME_Saved_UNI_SIC = bindingTable->dwBindingTableEntries[startBTI++];
4030 curbe->DW65.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++];
4031 curbe->DW66.BTI_Collocated_RefFrame = bindingTable->dwBindingTableEntries[startBTI++];
4032 curbe->DW67.BTI_Reserved = bindingTable->dwBindingTableEntries[startBTI++];
4033 curbe->DW68.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++];
4034 curbe->DW69.BTI_LCU_QP = bindingTable->dwBindingTableEntries[startBTI++];
4035 curbe->DW70.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++];
4036 curbe->DW71.BTI_VMEInterPredictionSurfIndex= bindingTable->dwBindingTableEntries[startBTI++];
4037 if(m_pictureCodingType == P_TYPE)
4038 {
4039 //P MBEnc curbe 72~75 are different from B frame.
4040 startBTI += (CODECHAL_HEVC_P_MBENC_CONCURRENT_THD_MAP - CODECHAL_HEVC_P_MBENC_VME_FORWARD_0);
4041 curbe->DW72.BTI_ConcurrentThreadMap= bindingTable->dwBindingTableEntries[startBTI++];
4042 curbe->DW73.BTI_MB_Data_CurFrame = bindingTable->dwBindingTableEntries[startBTI++];
4043 curbe->DW74.BTI_MVP_CurFrame = bindingTable->dwBindingTableEntries[startBTI++];
4044 curbe->DW75.BTI_Haar_Dist16x16 = bindingTable->dwBindingTableEntries[startBTI++];
4045 curbe->DW76.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
4046 curbe->DW77.BTI_Frame_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
4047 curbe->DW78.BTI_MVPredictor_Surface= bindingTable->dwBindingTableEntries[startBTI++];
4048 curbe->DW79.BTI_CTB_Input_Surface = bindingTable->dwBindingTableEntries[startBTI++];
4049 curbe->DW80.BTI_CTB_Distortion_Output_Surface = bindingTable->dwBindingTableEntries[startBTI++];
4050 curbe->DW81.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
4051 }
4052 else
4053 {
4054 startBTI += (CODECHAL_HEVC_B_MBENC_VME_BACKWARD_7 - CODECHAL_HEVC_B_MBENC_VME_FORWARD_0 + 1);
4055
4056 curbe->DW72.BTI_VMEInterPredictionBSurfIndex = bindingTable->dwBindingTableEntries[startBTI++];
4057 startBTI += (CODECHAL_HEVC_B_MBENC_VME_MUL_NOUSE_3 - CODECHAL_HEVC_B_MBENC_VME_MUL_BACKWARD_0 + 1);
4058
4059 curbe->DW73.BTI_ConcurrentThreadMap= bindingTable->dwBindingTableEntries[startBTI++];
4060 curbe->DW74.BTI_MB_Data_CurFrame = bindingTable->dwBindingTableEntries[startBTI++];
4061 curbe->DW75.BTI_MVP_CurFrame = bindingTable->dwBindingTableEntries[startBTI++];
4062 curbe->DW76.BTI_Haar_Dist16x16 = bindingTable->dwBindingTableEntries[startBTI++];
4063 curbe->DW77.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
4064 curbe->DW78.BTI_Frame_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++];
4065 curbe->DW79.BTI_MVPredictor_Surface= bindingTable->dwBindingTableEntries[startBTI++];
4066 curbe->DW80.BTI_CTB_Input_Surface = bindingTable->dwBindingTableEntries[startBTI++];
4067 curbe->DW81.BTI_CTB_Distortion_Output_Surface = bindingTable->dwBindingTableEntries[startBTI++];
4068 curbe->DW82.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++];
4069 }
4070
4071 // Intra refresh is enabled. Program related CURBE fields
4072 if (m_hevcPicParams->bEnableRollingIntraRefresh)
4073 {
4074 curbe->DW35.IntraRefreshEn = true;
4075 curbe->DW35.FirstIntraRefresh = m_firstIntraRefresh;
4076 curbe->DW35.HalfUpdateMixedLCU = 0;
4077 curbe->DW35.EnableRollingIntra = true;
4078
4079 curbe->DW38.NumFrameInGOB = m_frameNumInGob;
4080 curbe->DW38.NumIntraRefreshOffFrames = m_frameNumWithoutIntraRefresh;
4081
4082 curbe->DW51.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
4083 curbe->DW51.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
4084 curbe->DW51.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
4085
4086 curbe->DW53.IntraRefreshRefHeight = 40;
4087 curbe->DW53.IntraRefreshRefWidth = 48;
4088
4089 m_firstIntraRefresh = false;
4090 m_frameNumWithoutIntraRefresh = 0;
4091 }
4092 else if (m_pictureCodingType != I_TYPE) // don't increment num frames w/o refresh in case of TU7 I frames
4093 {
4094 m_frameNumWithoutIntraRefresh++;
4095 }
4096
4097 CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries);
4098
4099 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_MBENC;
4100 if (m_pictureCodingType == P_TYPE)
4101 {
4102 //P frame curbe only use the DW0~DW75
4103 CODECHAL_ENCODE_CHK_STATUS_RETURN(
4104 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd) - sizeof(uint32_t)));
4105 }
4106 else
4107 {
4108 CODECHAL_ENCODE_CHK_STATUS_RETURN(
4109 AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd)));
4110 }
4111
4112 MOS_COMMAND_BUFFER cmdBuffer;
4113 if(m_numMbBKernelSplit == 0)
4114 {
4115 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer,
4116 kernelState,
4117 encFunctionType,
4118 &m_walkingPatternParam.ScoreBoard));
4119 }
4120 else
4121 {
4122 CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
4123
4124 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
4125 MOS_ZeroMemory(&idParams, sizeof(idParams));
4126 idParams.pKernelState = kernelState;
4127 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
4128 m_stateHeapInterface,
4129 1,
4130 &idParams));
4131
4132 // Add binding table
4133 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
4134 m_stateHeapInterface,
4135 kernelState));
4136 }
4137
4138 //Add surface states
4139 startBTI = 0;
4140
4141 //0: CU record
4142 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4143 kernelState,
4144 &cmdBuffer,
4145 SURFACE_CU_RECORD,
4146 &bindingTable->dwBindingTableEntries[startBTI++]));
4147
4148 //1: PAK command
4149 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4150 kernelState,
4151 &cmdBuffer,
4152 SURFACE_HCP_PAK,
4153 &bindingTable->dwBindingTableEntries[startBTI++]));
4154
4155 //2 and 3 Source Y and UV
4156 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4157 kernelState,
4158 &cmdBuffer,
4159 SURFACE_RAW_Y_UV,
4160 &bindingTable->dwBindingTableEntries[startBTI++]));
4161 startBTI++;
4162
4163 //4: Intra dist
4164 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4165 kernelState,
4166 &cmdBuffer,
4167 SURFACE_INTRA_DIST,
4168 &bindingTable->dwBindingTableEntries[startBTI++]));
4169
4170 //5: min distortion
4171 m_surfaceParams[SURFACE_MIN_DIST].bIsWritable =
4172 m_surfaceParams[SURFACE_MIN_DIST].bRenderTarget = true;
4173 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4174 kernelState,
4175 &cmdBuffer,
4176 SURFACE_MIN_DIST,
4177 &bindingTable->dwBindingTableEntries[startBTI++]));
4178
4179 // 6 and 7, skip SURFACE_HME_MVP and SURFACE_HME_DIST from HME since FEI alsways disables HME
4180 startBTI += 2;
4181
4182 //8: slice map
4183 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4184 kernelState,
4185 &cmdBuffer,
4186 SURFACE_SLICE_MAP,
4187 &bindingTable->dwBindingTableEntries[startBTI++]));
4188
4189 //9: VME UNI and SIC data
4190 m_surfaceParams[SURFACE_VME_UNI_SIC_DATA].bIsWritable =
4191 m_surfaceParams[SURFACE_VME_UNI_SIC_DATA].bRenderTarget = true;
4192 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4193 kernelState,
4194 &cmdBuffer,
4195 SURFACE_VME_UNI_SIC_DATA,
4196 &bindingTable->dwBindingTableEntries[startBTI++]));
4197
4198 //10: Simplest Intra
4199 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4200 kernelState,
4201 &cmdBuffer,
4202 SURFACE_SIMPLIFIED_INTRA,
4203 &bindingTable->dwBindingTableEntries[startBTI++]));
4204
4205 // 11: Reference frame col-located data surface
4206 if(mbCodeIdxForTempMVP == 0xFF)
4207 {
4208 startBTI++;
4209 }
4210 else
4211 {
4212 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4213 kernelState,
4214 &cmdBuffer,
4215 SURFACE_COL_MB_MV,
4216 &bindingTable->dwBindingTableEntries[startBTI++],
4217 m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP)));
4218 }
4219
4220 // 12: Current frame col-located data surface -- reserved now
4221 startBTI++;
4222
4223 // 13: BRC Input
4224 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4225 kernelState,
4226 &cmdBuffer,
4227 SURFACE_BRC_INPUT,
4228 &bindingTable->dwBindingTableEntries[startBTI++]));
4229
4230 // 14: LCU Qp
4231 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4232 kernelState,
4233 &cmdBuffer,
4234 SURFACE_LCU_QP,
4235 &bindingTable->dwBindingTableEntries[startBTI++]));
4236
4237 // 15: LCU BRC constant
4238 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4239 kernelState,
4240 &cmdBuffer,
4241 SURFACE_BRC_DATA,
4242 &bindingTable->dwBindingTableEntries[startBTI++]));
4243
4244 // 16 - 32 Current plus forward and backward surface 0-7
4245 //16: Source Y for VME
4246 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4247 kernelState,
4248 &cmdBuffer,
4249 SURFACE_RAW_VME,
4250 &bindingTable->dwBindingTableEntries[startBTI++]));
4251
4252 for(uint32_t surfaceIdx = 0; surfaceIdx < 8; surfaceIdx++)
4253 {
4254 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surfaceIdx];
4255 if(!CodecHal_PictureIsInvalid(refPic) &&
4256 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4257 {
4258 uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4259
4260 // Picture Y VME
4261 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4262 kernelState,
4263 &cmdBuffer,
4264 SURFACE_REF_FRAME_VME,
4265 &bindingTable->dwBindingTableEntries[startBTI++],
4266 &m_refList[idx]->sRefBuffer,
4267 curbe->DW6.FrameWidth,
4268 curbe->DW6.FrameHeight));
4269
4270 }
4271 else
4272 {
4273 // Skip the binding table index because it is not used
4274 startBTI++;
4275 }
4276
4277 refPic = m_hevcSliceParams->RefPicList[LIST_1][surfaceIdx];
4278 if(!CodecHal_PictureIsInvalid(refPic) &&
4279 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4280 {
4281 uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4282
4283 // Picture Y VME
4284 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4285 kernelState,
4286 &cmdBuffer,
4287 SURFACE_REF_FRAME_VME,
4288 &bindingTable->dwBindingTableEntries[startBTI++],
4289 &m_refList[idx]->sRefBuffer,
4290 curbe->DW6.FrameWidth,
4291 curbe->DW6.FrameHeight));
4292
4293 }
4294 else
4295 {
4296 // Skip the binding table index because it is not used
4297 startBTI++;
4298 }
4299 }
4300 CODECHAL_ENCODE_ASSERT(startBTI == CODECHAL_HEVC_B_MBENC_VME_BACKWARD_7 - CODECHAL_HEVC_B_MBENC_BEGIN + 1);
4301
4302 if (m_pictureCodingType != P_TYPE)
4303 {
4304 //33-41 VME multi-ref BTI -- Current plus [backward, nil][0..3]
4305 //33: Current Y VME surface
4306 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4307 kernelState,
4308 &cmdBuffer,
4309 SURFACE_RAW_VME,
4310 &bindingTable->dwBindingTableEntries[startBTI++]));
4311
4312 for(uint32_t surfaceIdx = 0; surfaceIdx < 4; surfaceIdx++)
4313 {
4314 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[1][surfaceIdx];
4315 if(!CodecHal_PictureIsInvalid(refPic) &&
4316 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
4317 {
4318 uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
4319
4320 // Picture Y VME
4321 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4322 kernelState,
4323 &cmdBuffer,
4324 SURFACE_REF_FRAME_VME,
4325 &bindingTable->dwBindingTableEntries[startBTI++],
4326 &m_refList[idx]->sRefBuffer,
4327 curbe->DW6.FrameWidth,
4328 curbe->DW6.FrameHeight));
4329 }
4330 else
4331 {
4332 // Skip the binding table index because it is not used
4333 startBTI++;
4334 }
4335
4336 // Skip the binding table index because it is not used
4337 startBTI++;
4338 }
4339 CODECHAL_ENCODE_ASSERT(startBTI == CODECHAL_HEVC_B_MBENC_VME_MUL_NOUSE_3 - CODECHAL_HEVC_B_MBENC_BEGIN + 1);
4340 }
4341
4342 // B 42 or P 33: Concurrent thread
4343 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4344 kernelState,
4345 &cmdBuffer,
4346 (SURFACE_ID)(SURFACE_CONCURRENT_THREAD + m_concurrentThreadIndex),
4347 &bindingTable->dwBindingTableEntries[startBTI++]));
4348
4349 if (++m_concurrentThreadIndex >= NUM_CONCURRENT_THREAD)
4350 {
4351 m_concurrentThreadIndex = 0;
4352 }
4353
4354 // B 43 or P 34: MV index buffer
4355 m_surfaceParams[SURFACE_MB_MV_INDEX].bIsWritable =
4356 m_surfaceParams[SURFACE_MB_MV_INDEX].bRenderTarget = true;
4357 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4358 kernelState,
4359 &cmdBuffer,
4360 SURFACE_MB_MV_INDEX,
4361 &bindingTable->dwBindingTableEntries[startBTI++]));
4362
4363 // B 44: or P 35: MVP index buffer
4364 m_surfaceParams[SURFACE_MVP_INDEX].bIsWritable =
4365 m_surfaceParams[SURFACE_MVP_INDEX].bRenderTarget = true;
4366 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4367 kernelState,
4368 &cmdBuffer,
4369 SURFACE_MVP_INDEX,
4370 &bindingTable->dwBindingTableEntries[startBTI++]));
4371
4372 // skip three BTI for haar distortion surface, statstics data dump surface
4373 // and frame level statstics data surface because they are not used
4374 startBTI += 3;
4375
4376 // 48: FEI external MVPredictor surface
4377 if (m_feiPicParams->MVPredictorInput)
4378 {
4379 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4380 kernelState,
4381 &cmdBuffer,
4382 SURFACE_FEI_EXTERNAL_MVP,
4383 &bindingTable->dwBindingTableEntries[startBTI++]));
4384 }
4385 else
4386 {
4387 startBTI++;
4388 }
4389
4390 if (m_feiPicParams->bPerCTBInput)
4391 {
4392 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState(
4393 kernelState,
4394 &cmdBuffer,
4395 SURFACE_FEI_PER_CTB_CTRL,
4396 &bindingTable->dwBindingTableEntries[startBTI++]));
4397 }
4398 else
4399 {
4400 startBTI ++;
4401 }
4402 startBTI += 1;
4403
4404 if (!m_hwWalker)
4405 {
4406 eStatus = MOS_STATUS_UNKNOWN;
4407 CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel.");
4408 return eStatus;
4409 }
4410
4411 if(m_numMbBKernelSplit == 0)
4412 {
4413 // always use customized media walker
4414 MHW_WALKER_PARAMS walkerParams;
4415 MOS_SecureMemcpy(&walkerParams, sizeof(walkerParams), &m_walkingPatternParam.MediaWalker, sizeof(m_walkingPatternParam.MediaWalker));
4416 walkerParams.ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1;
4417
4418 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
4419 &cmdBuffer,
4420 &walkerParams));
4421 }
4422 else
4423 {
4424 int32_t localOuterLoopExecCount = m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount;
4425 int32_t localInitialStartPointY = m_walkingPatternParam.MediaWalker.LocalStart.y;
4426 int32_t phase = MOS_MIN(m_numMbBKernelSplit, MAX_NUM_KERNEL_SPLIT);
4427 int32_t totalExecCount = localOuterLoopExecCount + 1;
4428 int32_t deltaExecCount = (((totalExecCount+phase - 1) / phase) + 1) & 0xfffe;
4429 int32_t remainExecCount = totalExecCount;
4430
4431 int32_t deltaY = 0;
4432 if (m_enable26WalkingPattern)
4433 {
4434 deltaY = deltaExecCount / 2;
4435 }
4436 else
4437 {
4438 deltaY = deltaExecCount * 2;
4439 }
4440
4441 int32_t startPointY[MAX_NUM_KERNEL_SPLIT] = { 0 };
4442 int32_t currentExecCount[MAX_NUM_KERNEL_SPLIT] = { -1 };
4443 currentExecCount[0] = (remainExecCount > deltaExecCount)?(deltaExecCount-1) : (remainExecCount-1);
4444 startPointY[0] = localInitialStartPointY;
4445
4446 for (auto i = 1; i < phase; i++)
4447 {
4448 remainExecCount -= deltaExecCount;
4449 if (remainExecCount < 1)
4450 {
4451 remainExecCount = 1;
4452 }
4453
4454 currentExecCount[i] = (remainExecCount > deltaExecCount)?(deltaExecCount-1) : (remainExecCount-1);
4455 startPointY[i] = startPointY[i-1] + deltaY;
4456 }
4457
4458 for(auto i = 0; i < phase; i++)
4459 {
4460 if(currentExecCount[i] < 0)
4461 {
4462 break;
4463 }
4464
4465 // Program render engine pipe commands
4466 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
4467 sendKernelCmdsParams.EncFunctionType = encFunctionType;
4468 sendKernelCmdsParams.pKernelState = kernelState;
4469 sendKernelCmdsParams.bEnableCustomScoreBoard= true;
4470 sendKernelCmdsParams.pCustomScoreBoard = &m_walkingPatternParam.ScoreBoard;
4471 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
4472
4473 // Change walker execution count and local start Y for different phases
4474 m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount = currentExecCount[i];
4475 m_walkingPatternParam.MediaWalker.LocalStart.y = startPointY[i];
4476
4477 // always use customized media walker
4478 MHW_WALKER_PARAMS walkerParams;
4479 MOS_SecureMemcpy(&walkerParams, sizeof(walkerParams), &m_walkingPatternParam.MediaWalker, sizeof(m_walkingPatternParam.MediaWalker));
4480 walkerParams.ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1;
4481
4482 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
4483 &cmdBuffer,
4484 &walkerParams));
4485 }
4486 }
4487
4488 CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall(
4489 encFunctionType,
4490 kernelState,
4491 &cmdBuffer));
4492
4493 CODECHAL_DEBUG_TOOL(
4494 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4495 &m_mvIndex.sResource,
4496 CodechalDbgAttr::attrOutput,
4497 "MbData",
4498 m_mvpIndex.dwSize,
4499 0,
4500 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
4501
4502 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
4503 &m_mvpIndex.sResource,
4504 CodechalDbgAttr::attrOutput,
4505 "MvData",
4506 m_mvpIndex.dwSize,
4507 0,
4508 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
4509 )
4510
4511 m_lastTaskInPhase = true;
4512 eStatus = Encode8x8BPakKernel(curbe);
4513
4514 return eStatus;
4515 }
4516
4517 #else
4518
Encode2xScalingKernel()4519 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode2xScalingKernel()
4520 {
4521 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4522
4523 PerfTagSetting perfTag;
4524 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL);
4525
4526 //Setup CURBE
4527 MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9 cmd, *curbe = &cmd;
4528 MOS_ZeroMemory(curbe, sizeof(*curbe));
4529 curbe->DW0.PicWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4530 curbe->DW0.PicHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4531
4532 DownScalingKernelParams scalingParams;
4533 MOS_ZeroMemory(&scalingParams, sizeof(scalingParams));
4534
4535 scalingParams.m_cmSurfDS_TopIn = &m_rawSurfaceToEnc->OsResource;
4536 scalingParams.m_cmSurfDS_TopOut = &m_scaled2xSurface.OsResource;
4537 scalingParams.m_cmSurfTopVProc = nullptr;
4538
4539 if (m_cmKernelMap.count("2xScaling") == 0)
4540 {
4541 m_cmKernelMap["2xScaling"] = new CMRTKernelDownScalingUMD();
4542 m_cmKernelMap["2xScaling"]->Init((void *)m_osInterface->pOsContext);
4543 }
4544
4545 m_cmKernelMap["2xScaling"]->SetupCurbe(curbe);
4546
4547 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING;
4548 CODECHAL_DEBUG_TOOL(
4549 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
4550 encFunctionType,
4551 (uint8_t *)curbe, sizeof(*curbe)));
4552 )
4553
4554 m_cmKernelMap["2xScaling"]->AllocateSurfaces(&scalingParams);
4555
4556 //No need to wait for task finished
4557 m_cmEvent = CM_NO_EVENT;
4558 m_cmKernelMap["2xScaling"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
4559
4560 return eStatus;
4561 }
4562
Encode32x32PuModeDecisionKernel()4563 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32x32PuModeDecisionKernel()
4564 {
4565 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4566
4567 PerfTagSetting perfTag;
4568 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_PU_MD);
4569
4570 //Setup CURBE
4571 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4572
4573 CalcLambda(CODECHAL_ENCODE_HEVC_I_SLICE, INTRA_TRANSFORM_HAAR);
4574 int32_t sliceQp = CalSliceQp();
4575
4576 double lambdaScalingFactor = 1.0;
4577 double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
4578 double squaredQpLambda = qpLambda * qpLambda;
4579 m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
4580
4581 CODECHAL_FEI_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9 cmd, *curbe = &cmd;
4582 MOS_ZeroMemory(curbe, sizeof(*curbe));
4583 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4584 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4585
4586 curbe->DW1.EnableDebugDump = false;
4587 curbe->DW1.LCUType = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/;
4588 curbe->DW1.PuType = 0; // 32x32 PU
4589 curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
4590 curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
4591 curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
4592 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
4593 curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0);
4594 curbe->DW1.SliceQp = sliceQp;
4595 curbe->DW1.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled;
4596 curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
4597
4598 curbe->DW2.Lambda = m_fixedPointLambda;
4599
4600 curbe->DW3.ModeCost32x32 = 0;
4601
4602 curbe->DW4.EarlyExit = (uint32_t)-1;
4603 if (curbe->DW1.EnableStatsDataDump)
4604 {
4605 double lambdaMd;
4606 float hadBias = 2.0f;
4607
4608 lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp];
4609 lambdaMd = lambdaMd * hadBias;
4610 curbe->DW5.NewLambdaForHaarTransform = (uint32_t)(lambdaMd*(1<<10));
4611 }
4612
4613 IFrameKernelParams I32x32Params;
4614 MOS_ZeroMemory(&I32x32Params, sizeof(I32x32Params));
4615
4616 I32x32Params.m_cmSurfPer32x32PUDataOut = &m_32x32PuOutputData.sResource;
4617 I32x32Params.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
4618 I32x32Params.m_cmSurfCurrY2 = &m_scaled2xSurface.OsResource;
4619 I32x32Params.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
4620 I32x32Params.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
4621 I32x32Params.m_cmLCUQPSurf = &m_lcuQP.OsResource;
4622 I32x32Params.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
4623
4624 if (m_cmKernelMap.count("I_32X32") == 0)
4625 {
4626 m_cmKernelMap["I_32X32"] = new CMRTKernelI32x32UMD();
4627 m_cmKernelMap["I_32X32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr);
4628 }
4629
4630 m_cmKernelMap["I_32X32"]->SetupCurbe(curbe);
4631
4632 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION;
4633 CODECHAL_DEBUG_TOOL(
4634 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
4635 encFunctionType,
4636 (uint8_t *)curbe, sizeof(*curbe)));
4637 )
4638
4639 m_cmKernelMap["I_32X32"]->AllocateSurfaces(&I32x32Params);
4640
4641 //No need to wait for task finished
4642 m_cmEvent = CM_NO_EVENT;
4643 m_cmKernelMap["I_32X32"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
4644
4645 return eStatus;
4646 }
4647
Encode16x16SadPuComputationKernel()4648 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16SadPuComputationKernel()
4649 {
4650 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4651
4652 CODECHAL_ENCODE_FUNCTION_ENTER;
4653
4654 PerfTagSetting perfTag;
4655 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_SAD);
4656
4657 // Setup CURBE
4658 CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9 cmd, *curbe = &cmd;
4659
4660 MOS_ZeroMemory(curbe, sizeof(*curbe));
4661 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4662 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4663
4664 curbe->DW1.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4665 curbe->DW1.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
4666 curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
4667 curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
4668
4669 curbe->DW2.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
4670 curbe->DW2.SimFlagForInter = false;
4671 if (m_hevcPicParams->CodingType != I_TYPE)
4672 {
4673 curbe->DW2.FASTSurveillanceFlag = m_hevcSeqParams->bVideoSurveillance;
4674 }
4675
4676 IFrameKernelParams I16x16SadParams;
4677 MOS_ZeroMemory(&I16x16SadParams, sizeof(I16x16SadParams));
4678
4679 I16x16SadParams.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
4680 I16x16SadParams.m_cmSurfPer32x32PUDataOut = &m_32x32PuOutputData.sResource;
4681 I16x16SadParams.m_cmSurfSAD16x16 = &m_sad16x16Pu.sResource;
4682 I16x16SadParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
4683 I16x16SadParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
4684
4685 //in case I_32x32 isn't initialized when using FastIntraMode for per-frame control (I: enable; P/B: disable)
4686 if (m_cmKernelMap.count("I_32X32") == 0)
4687 {
4688 m_cmKernelMap["I_32X32"] = new CMRTKernelI32x32UMD();
4689 m_cmKernelMap["I_32X32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr);
4690 }
4691
4692 if (m_cmKernelMap.count("I_16X16_SAD") == 0)
4693 {
4694 m_cmKernelMap["I_16X16_SAD"] = new CMRTKernelI16x16SadUMD();
4695 m_cmKernelMap["I_16X16_SAD"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram);
4696 }
4697
4698 m_cmKernelMap["I_16X16_SAD"]->SetupCurbe(curbe);
4699
4700 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_SAD;
4701 CODECHAL_DEBUG_TOOL(
4702 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
4703 encFunctionType,
4704 (uint8_t *)curbe, sizeof(*curbe)));
4705 )
4706
4707 m_cmKernelMap["I_16X16_SAD"]->AllocateSurfaces(&I16x16SadParams);
4708
4709 //No need to wait for task finished
4710 m_cmEvent = CM_NO_EVENT;
4711 m_cmKernelMap["I_16X16_SAD"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
4712
4713 return eStatus;
4714 }
4715
Encode16x16PuModeDecisionKernel()4716 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16PuModeDecisionKernel()
4717 {
4718 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4719
4720 CODECHAL_ENCODE_FUNCTION_ENTER;
4721
4722 PerfTagSetting perfTag;
4723 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_PU_MD);
4724
4725 // Setup CURBE
4726 int32_t sliceQp = CalSliceQp();
4727 uint8_t sliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
4728
4729 double lambdaScaleFactor = 0.46 + sliceQp - 22;
4730 if (lambdaScaleFactor < 0)
4731 {
4732 lambdaScaleFactor = 0.46;
4733 }
4734
4735 if (lambdaScaleFactor > 15)
4736 {
4737 lambdaScaleFactor = 15;
4738 }
4739
4740 double squredLambda = lambdaScaleFactor * pow(2.0, ((double)sliceQp-12.0)/6);
4741 m_fixedPointLambdaForLuma = (uint32_t)(squredLambda * (1<<10));
4742
4743 double lambdaScalingFactor = 1.0;
4744 double qpLambda = m_qpLambdaMd[sliceType][sliceQp];
4745 double squaredQpLambda = qpLambda * qpLambda;
4746 m_fixedPointLambdaForChroma = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
4747
4748 LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_HAAR);
4749
4750 CODECHAL_FEI_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9 cmd, *curbe = &cmd;
4751 MOS_ZeroMemory(curbe, sizeof(*curbe));
4752
4753 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4754 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4755 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4756
4757 curbe->DW1.Log2MaxCUSize = log2MaxCUSize;
4758 curbe->DW1.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
4759 curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
4760 curbe->DW1.SliceQp = sliceQp;
4761
4762 curbe->DW2.FixedPoint_Lambda_PredMode = m_fixedPointLambdaForChroma;
4763
4764 curbe->DW3.LambdaScalingFactor = 1;
4765 curbe->DW3.SliceType = sliceType;
4766 curbe->DW3.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
4767 curbe->DW3.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
4768 curbe->DW3.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
4769 curbe->DW3.ROIEnable = (m_hevcPicParams->NumROI > 0);
4770 curbe->DW3.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
4771 curbe->DW3.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh;
4772 //Given only Column Rolling I is supported, if in future, Row Rolling I support to be added, then, need to make change here as per Kernel
4773 curbe->DW3.IntraRefreshEn = m_hevcPicParams->bEnableRollingIntraRefresh;
4774 curbe->DW3.HalfUpdateMixedLCU = 0;
4775 curbe->DW3.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
4776
4777 curbe->DW4.PenaltyForIntra8x8NonDCPredMode = 0;
4778 curbe->DW4.IntraComputeType = 1;
4779 curbe->DW4.AVCIntra8x8Mask = 0;
4780 curbe->DW4.IntraSadAdjust = 2;
4781
4782 double lambdaMd = sqrt(0.57*pow(2.0, ((double)sliceQp-12.0)/3));
4783 squredLambda = lambdaMd * lambdaMd;
4784 uint32_t newLambda = (uint32_t)(squredLambda*(1<<10));
4785 curbe->DW5.FixedPoint_Lambda_CU_Mode_for_Cost_Calculation = newLambda;
4786
4787 curbe->DW6.ScreenContentFlag = m_hevcPicParams->bScreenContent;
4788
4789 curbe->DW7.ModeCostIntraNonPred = m_modeCost[0];
4790 curbe->DW7.ModeCostIntra16x16 = m_modeCost[1];
4791 curbe->DW7.ModeCostIntra8x8 = m_modeCost[2];
4792 curbe->DW7.ModeCostIntra4x4 = m_modeCost[3];
4793
4794 curbe->DW8.FixedPoint_Lambda_CU_Mode_for_Luma = m_fixedPointLambdaForLuma;
4795
4796 if (m_hevcPicParams->bEnableRollingIntraRefresh)
4797 {
4798 curbe->DW9.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
4799 curbe->DW9.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
4800 curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
4801 }
4802
4803 curbe->DW10.SimplifiedFlagForInter = 0;
4804 if (m_encodeParams.bReportStatisticsEnabled)
4805 {
4806 curbe->DW10.HaarTransformMode = true;
4807 }
4808 else
4809 {
4810 curbe->DW10.HaarTransformMode = (m_hevcPicParams->CodingType == I_TYPE) ? false : true;
4811 }
4812
4813 IFrameKernelParams I16x16ModeParams;
4814 MOS_ZeroMemory(&I16x16ModeParams, sizeof(I16x16ModeParams));
4815
4816 I16x16ModeParams.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
4817 I16x16ModeParams.m_cmSurfSAD16x16 = &m_sad16x16Pu.sResource;
4818 I16x16ModeParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface;
4819 I16x16ModeParams.m_bufSize = m_mbCodeSize - m_mvOffset;
4820 I16x16ModeParams.m_bufOffset = m_mvOffset;
4821 I16x16ModeParams.m_cmSurfPer32x32PUDataOut = &m_32x32PuOutputData.sResource;
4822 I16x16ModeParams.m_cmSurfVMEMode = &m_vme8x8Mode.sResource;
4823 I16x16ModeParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
4824 I16x16ModeParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
4825 I16x16ModeParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
4826 I16x16ModeParams.m_cmLCUQPSurf = &m_lcuQP.OsResource;
4827 I16x16ModeParams.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
4828
4829 if (m_cmKernelMap.count("I_16X16_MODE") == 0)
4830 {
4831 m_cmKernelMap["I_16X16_MODE"] = new CMRTKernelI16x16ModeUMD();
4832 m_cmKernelMap["I_16X16_MODE"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram);
4833 }
4834
4835 m_cmKernelMap["I_16X16_MODE"]->SetupCurbe(curbe);
4836
4837 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION;
4838 CODECHAL_DEBUG_TOOL(
4839 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
4840 encFunctionType,
4841 (uint8_t *)curbe, sizeof(*curbe)));
4842 )
4843
4844 m_cmKernelMap["I_16X16_MODE"]->AllocateSurfaces(&I16x16ModeParams);
4845
4846 //No need to wait for task finished
4847 m_cmEvent = CM_NO_EVENT;
4848 m_cmKernelMap["I_16X16_MODE"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
4849
4850 return eStatus;
4851 }
4852
Encode8x8PUKernel()4853 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUKernel()
4854 {
4855 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4856
4857 CODECHAL_ENCODE_FUNCTION_ENTER;
4858
4859 PerfTagSetting perfTag;
4860 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
4861
4862 // Setup CURBE
4863 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4864 CODECHAL_FEI_HEVC_I_8x8_PU_CURBE_G9 cmd, *curbe = &cmd;
4865 MOS_ZeroMemory(curbe, sizeof(*curbe));
4866
4867 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4868 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4869
4870 curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
4871 curbe->DW1.PuType = 2; // 8x8
4872 curbe->DW1.DcFilterFlag = true;
4873 curbe->DW1.AngleRefineFlag = true;
4874 curbe->DW1.LCUType = (log2MaxCUSize==6)? 0 : 1;
4875 curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent;
4876 curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
4877 curbe->DW1.EnableDebugDump = false;
4878 curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
4879 curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
4880 curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0);
4881 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
4882 curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
4883 curbe->DW1.QPValue = CalSliceQp();
4884 if (m_hevcPicParams->bEnableRollingIntraRefresh)
4885 {
4886 curbe->DW1.EnableRollingIntra = true;
4887 curbe->DW1.IntraRefreshEn = true;
4888 curbe->DW1.HalfUpdateMixedLCU = 0;
4889
4890 curbe->DW5.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
4891 curbe->DW5.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
4892 curbe->DW5.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
4893
4894 int32_t qp = CalSliceQp();
4895 curbe->DW1.QPValue = (uint32_t)qp;
4896 }
4897
4898 curbe->DW2.LumaLambda = m_fixedPointLambdaForLuma;
4899
4900 curbe->DW3.ChromaLambda = m_fixedPointLambdaForChroma;
4901
4902 if (m_encodeParams.bReportStatisticsEnabled)
4903 {
4904 curbe->DW4.HaarTransformFlag = true;
4905 }
4906 else
4907 {
4908 curbe->DW4.HaarTransformFlag = (m_hevcPicParams->CodingType == I_TYPE) ? false : true;
4909 }
4910 curbe->DW4.SimplifiedFlagForInter = false;
4911
4912 IFrameKernelParams I8x8Params;
4913 MOS_ZeroMemory(&I8x8Params, sizeof(I8x8Params));
4914
4915 I8x8Params.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
4916 I8x8Params.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
4917 I8x8Params.m_cmSurfVMEMode = &m_vme8x8Mode.sResource;
4918 I8x8Params.m_cmSurfMode = &m_intraMode.sResource;
4919 I8x8Params.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
4920 I8x8Params.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
4921 I8x8Params.m_cmLCUQPSurf = &m_lcuQP.OsResource;
4922 I8x8Params.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
4923
4924 if (m_cmKernelMap.count("I_8X8") == 0)
4925 {
4926 m_cmKernelMap["I_8X8"] = new CMRTKernelI8x8UMD();
4927 m_cmKernelMap["I_8X8"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram);
4928 }
4929
4930 m_cmKernelMap["I_8X8"]->SetupCurbe(curbe);
4931
4932 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU;
4933 CODECHAL_DEBUG_TOOL(
4934 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
4935 encFunctionType,
4936 (uint8_t *)curbe, sizeof(*curbe)));
4937 )
4938
4939 m_cmKernelMap["I_8X8"]->AllocateSurfaces(&I8x8Params);
4940
4941 //No need to wait for task finished
4942 m_cmEvent = CM_NO_EVENT;
4943 m_cmKernelMap["I_8X8"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
4944
4945 return eStatus;
4946 }
4947
Encode8x8PUFMODEKernel()4948 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUFMODEKernel()
4949 {
4950 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
4951
4952 CODECHAL_ENCODE_FUNCTION_ENTER;
4953
4954 PerfTagSetting perfTag;
4955 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_FMODE);
4956
4957 // Setup CURBE
4958 int32_t qp = CalSliceQp();
4959 uint32_t sliceQp = (uint32_t)qp;
4960 uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
4961
4962 CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9 cmd, *curbe = &cmd;
4963 MOS_ZeroMemory(curbe, sizeof(*curbe));
4964 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
4965 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
4966
4967 curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType);
4968 curbe->DW1.PuType = 2;
4969 curbe->DW1.PakReordingFlag = (m_hevcPicParams->CodingType == I_TYPE) ? true : false;
4970 curbe->DW1.LCUType = (log2MaxCUSize == 6)? 0 : 1;
4971 curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent;
4972 curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0;
4973 curbe->DW1.EnableDebugDump = false;
4974 curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
4975 curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
4976 curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0);
4977 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
4978 curbe->DW1.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh;
4979 curbe->DW1.IntraRefreshEn = m_hevcPicParams->bEnableRollingIntraRefresh;
4980 curbe->DW1.HalfUpdateMixedLCU = 0;
4981 curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
4982 curbe->DW2.LambdaForLuma = m_fixedPointLambdaForLuma;
4983 if (m_hevcPicParams->CodingType != I_TYPE ||
4984 m_encodeParams.bReportStatisticsEnabled)
4985 {
4986 float hadBias = 2.0f;
4987
4988 double lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp];
4989 lambdaMd = lambdaMd * hadBias;
4990 curbe->DW3.LambdaForDistCalculation = (uint32_t)(lambdaMd*(1<<10));
4991 }
4992 curbe->DW4.ModeCostFor8x8PU_TU8 = 0;
4993 curbe->DW5.ModeCostFor8x8PU_TU4 = 0;
4994 curbe->DW6.SATD16x16PuThreshold = MOS_MAX(200 * ((int32_t)sliceQp - 12), 0);
4995 curbe->DW6.BiasFactorToward8x8 = (m_hevcPicParams->bScreenContent) ? 1024 : 1126 + 102;
4996 curbe->DW7.Qp = sliceQp;
4997 curbe->DW7.QpForInter = 0;
4998 curbe->DW8.SimplifiedFlagForInter = false;
4999 curbe->DW8.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled;
5000 // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
5001 curbe->DW8.KBLControlFlag = UsePlatformControlFlag();
5002 curbe->DW9.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
5003 curbe->DW9.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
5004 curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
5005
5006 IFrameKernelParams I8x8ModeParams;
5007 MOS_ZeroMemory(&I8x8ModeParams, sizeof(I8x8ModeParams));
5008
5009 I8x8ModeParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface;
5010 I8x8ModeParams.m_bufSize = m_mbCodeSize - m_mvOffset;
5011 I8x8ModeParams.m_bufOffset = m_mvOffset;
5012 I8x8ModeParams.m_cmSurfVMEMode = &m_vme8x8Mode.sResource;
5013 I8x8ModeParams.m_cmSurfMode = &m_intraMode.sResource;
5014 I8x8ModeParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
5015 I8x8ModeParams.m_cmSurfIntraDist = &m_intraDist.sResource;
5016 I8x8ModeParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
5017 I8x8ModeParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
5018 I8x8ModeParams.m_cmLCUQPSurf = &m_lcuQP.OsResource;
5019 I8x8ModeParams.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
5020
5021 if (m_cmKernelMap.count("I_8X8_MODE") == 0)
5022 {
5023 m_cmKernelMap["I_8X8_MODE"] = new CMRTKernelI8x8ModeUMD();
5024 m_cmKernelMap["I_8X8_MODE"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram);
5025 }
5026
5027 m_cmKernelMap["I_8X8_MODE"]->SetupCurbe(curbe);
5028
5029 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU_FMODE;
5030 CODECHAL_DEBUG_TOOL(
5031 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
5032 encFunctionType,
5033 (uint8_t *)curbe, sizeof(*curbe)));
5034 )
5035
5036 m_cmKernelMap["I_8X8_MODE"]->AllocateSurfaces(&I8x8ModeParams);
5037
5038 //No need to wait for task finished
5039 m_cmEvent = CM_NO_EVENT;
5040 m_cmKernelMap["I_8X8_MODE"]->CreateAndDispatchKernel(m_cmEvent, false, ((!m_singleTaskPhaseSupported)|| m_lastTaskInPhase));
5041
5042 return eStatus;
5043 }
5044
Encode32X32BIntraCheckKernel()5045 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32X32BIntraCheckKernel()
5046 {
5047 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5048
5049 CODECHAL_ENCODE_FUNCTION_ENTER;
5050
5051 PerfTagSetting perfTag;
5052 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_B_IC);
5053
5054 // Setup CURBE
5055 if (m_pictureCodingType == P_TYPE)
5056 {
5057 CalcLambda(CODECHAL_ENCODE_HEVC_P_SLICE, INTRA_TRANSFORM_HAAR);
5058 }
5059 else
5060 {
5061 CalcLambda(CODECHAL_ENCODE_HEVC_B_SLICE, INTRA_TRANSFORM_HAAR);
5062 }
5063 int32_t sliceQp = CalSliceQp();
5064
5065 double lambdaScalingFactor = 1.0;
5066 double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp];
5067 double squaredQpLambda = qpLambda * qpLambda;
5068 m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10));
5069
5070 CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 cmd, *curbe = &cmd;
5071 MOS_ZeroMemory(curbe, sizeof(*curbe));
5072 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
5073 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
5074
5075 curbe->DW1.EnableDebugDump = false;
5076 curbe->DW1.EnableIntraEarlyExit = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1;
5077 curbe->DW1.Flags = 0;
5078 curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
5079 curbe->DW1.SliceType = m_hevcSliceParams->slice_type;
5080 curbe->DW1.HMEEnable = 0;
5081 curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
5082
5083 curbe->DW2.QpMultiplier = 100;
5084 curbe->DW2.QpValue = 0; // MBZ
5085
5086 PBFrameKernelParams PB32x32Params;
5087 MOS_ZeroMemory(&PB32x32Params, sizeof(PB32x32Params));
5088
5089 PB32x32Params.m_cmSurfPer32x32ICOut = &m_32x32PuOutputData.sResource;
5090 PB32x32Params.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
5091 PB32x32Params.m_cmSurfCurrY2 = &m_scaled2xSurface.OsResource;
5092 PB32x32Params.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
5093 PB32x32Params.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
5094 PB32x32Params.m_cmLCUQPSurf = &m_lcuQP.OsResource;
5095
5096 if (m_cmKernelMap.count("PB_32x32") == 0)
5097 {
5098 m_cmKernelMap["PB_32x32"] = new CMRTKernelPB32x32UMD();
5099 m_cmKernelMap["PB_32x32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr);
5100 }
5101
5102 m_cmKernelMap["PB_32x32"]->SetupCurbe(curbe);
5103
5104 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_B_INTRA_CHECK;
5105 CODECHAL_DEBUG_TOOL(
5106 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
5107 encFunctionType,
5108 (uint8_t *)curbe, sizeof(*curbe)));
5109 )
5110
5111 m_cmKernelMap["PB_32x32"]->AllocateSurfaces(&PB32x32Params);
5112
5113 //No need to wait for task finished
5114 m_cmEvent = CM_NO_EVENT;
5115 m_cmKernelMap["PB_32x32"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
5116
5117 return eStatus;
5118 }
5119
Encode8x8BPakKernel(PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 pEncBCurbe)5120 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8BPakKernel(
5121 PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 pEncBCurbe)
5122 {
5123 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5124
5125 CODECHAL_ENCODE_FUNCTION_ENTER;
5126
5127 CODECHAL_ENCODE_CHK_NULL_RETURN(pEncBCurbe);
5128
5129 PerfTagSetting perfTag;
5130 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL);
5131
5132 //Setup CURBE
5133 CODECHAL_FEI_HEVC_B_PAK_CURBE_G9 cmd, *curbe = &cmd;
5134 MOS_ZeroMemory(curbe, sizeof(*curbe));
5135 curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH);
5136 curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT);
5137
5138 curbe->DW1.MaxVmvR = pEncBCurbe->DW44.MaxVmvR;
5139 curbe->DW1.Qp = pEncBCurbe->DW13.QpPrimeY;
5140 curbe->DW2.BrcEnable = pEncBCurbe->DW36.BRCEnable;
5141 curbe->DW2.LcuBrcEnable = pEncBCurbe->DW36.LCUBRCEnable;
5142 curbe->DW2.ScreenContent = pEncBCurbe->DW47.ScreenContentFlag;
5143 curbe->DW2.SimplestIntraEnable = pEncBCurbe->DW47.SkipIntraKrnFlag;
5144 curbe->DW2.SliceType = pEncBCurbe->DW4.SliceType;
5145 curbe->DW2.EnableWA = 0;
5146 curbe->DW2.ROIEnable = (m_hevcPicParams->NumROI > 0);
5147 curbe->DW2.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
5148 // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+
5149 curbe->DW2.KBLControlFlag = UsePlatformControlFlag();
5150 curbe->DW2.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh;
5151 curbe->DW2.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
5152 curbe->DW3.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
5153 curbe->DW3.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
5154 curbe->DW3.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
5155
5156 PBFrameKernelParams PB8x8PakParams;
5157 MOS_ZeroMemory(&PB8x8PakParams, sizeof(PB8x8PakParams));
5158
5159 PB8x8PakParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface;
5160 PB8x8PakParams.m_bufSize = m_mbCodeSize - m_mvOffset;
5161 PB8x8PakParams.m_bufOffset = m_mvOffset;
5162 PB8x8PakParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
5163 PB8x8PakParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
5164 PB8x8PakParams.m_cmLCUQPSurf = &m_lcuQP.OsResource;
5165 PB8x8PakParams.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
5166 PB8x8PakParams.m_cmSurfMVIndex = &m_mvIndex.sResource;
5167 PB8x8PakParams.m_cmSurfMVPred = &m_mvpIndex.sResource;
5168
5169 if (m_cmKernelMap.count("PB_8x8_PAK") == 0)
5170 {
5171 m_cmKernelMap["PB_8x8_PAK"] = new CMRTKernelPB8x8PakUMD();
5172 m_cmKernelMap["PB_8x8_PAK"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram);
5173 }
5174
5175 m_cmKernelMap["PB_8x8_PAK"]->SetupCurbe(curbe);
5176
5177 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_PAK;
5178 CODECHAL_DEBUG_TOOL(
5179 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
5180 encFunctionType,
5181 (uint8_t *)curbe, sizeof(*curbe)));
5182 )
5183
5184 m_cmKernelMap["PB_8x8_PAK"]->AllocateSurfaces(&PB8x8PakParams);
5185
5186 //No need to wait for task finished
5187 m_cmEvent = CM_NO_EVENT;
5188 m_cmKernelMap["PB_8x8_PAK"]->CreateAndDispatchKernel(m_cmEvent, false, ((!m_singleTaskPhaseSupported)|| m_lastTaskInPhase));
5189
5190 return eStatus;
5191 }
5192
Encode8x8PBMbEncKernel()5193 MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PBMbEncKernel()
5194 {
5195 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5196
5197 CODECHAL_ENCODE_FUNCTION_ENTER;
5198
5199 PerfTagSetting perfTag;
5200 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
5201
5202 int32_t sliceQp = CalSliceQp();
5203 uint8_t sliceType = PicCodingTypeToSliceType(m_pictureCodingType);
5204
5205 if (m_feiPicParams->FastIntraMode)
5206 {
5207 // When TU=7, lambda is not computed in the 32x32 MD stage for it is skipped.
5208 CalcLambda(sliceType, INTRA_TRANSFORM_HAAR);
5209 }
5210 LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_REGULAR);
5211
5212 uint8_t mbCodeIdxForTempMVP = 0xFF;
5213 if(m_pictureCodingType != I_TYPE)
5214 {
5215 if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
5216 {
5217 uint8_t FrameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
5218
5219 mbCodeIdxForTempMVP = m_refList[FrameIdx]->ucScalingIdx;
5220 }
5221
5222 if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
5223 {
5224 // Temporal reference MV index is invalid and so disable the temporal MVP
5225 CODECHAL_ENCODE_ASSERT(false);
5226 m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
5227 }
5228 }
5229 else
5230 {
5231 mbCodeIdxForTempMVP = 0;
5232 }
5233
5234 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateWalkingControlRegion());
5235
5236 //Setup CURBE
5237 uint8_t forwardTransformThd[7] = { 0 };
5238 CalcForwardCoeffThd(forwardTransformThd, sliceQp);
5239
5240 uint32_t curbeSize = 0;
5241 void *defaultCurbe = (void *)GetDefaultCurbeEncBKernel(curbeSize);
5242 CODECHAL_ENCODE_ASSERT(defaultCurbe);
5243
5244 CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 cmd, *curbe = &cmd;
5245 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(curbe, sizeof(cmd), defaultCurbe, curbeSize));
5246
5247 bool transform_8x8_mode_flag = true;
5248 uint32_t SearchPath = (m_feiPicParams->SearchWindow == 5) ? 2 : 1; // 2 means full search, 1 means diamand search
5249 uint32_t LenSP = m_feiPicParams->LenSP;
5250 uint32_t RefWidth = (m_feiPicParams->RefWidth < 20) ? 20 : m_feiPicParams->RefWidth;
5251 uint32_t RefHeight = (m_feiPicParams->RefHeight < 20) ? 20 : m_feiPicParams->RefHeight;
5252
5253 switch (m_feiPicParams->SearchWindow)
5254 {
5255 case 0:
5256 // not use predefined search window
5257 if ((m_feiPicParams->SearchPath != 0) && (m_feiPicParams->SearchPath != 1) && (m_feiPicParams->SearchPath != 2))
5258 {
5259 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input SearchPath for SearchWindow=0 case!!!.");
5260 eStatus = MOS_STATUS_INVALID_PARAMETER;
5261 return eStatus;
5262 }
5263 SearchPath = m_feiPicParams->SearchPath;
5264 if(((RefWidth * RefHeight) > 2048) || (RefWidth > 64) || (RefHeight > 64))
5265 {
5266 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input RefWidth/RefHeight size for SearchWindow=0 case!!!.");
5267 eStatus = MOS_STATUS_INVALID_PARAMETER;
5268 return eStatus;
5269 }
5270 break;
5271 case 1:
5272 // Tiny SUs 24x24 window
5273 RefWidth = 24;
5274 RefHeight = 24;
5275 LenSP = 4;
5276 break;
5277 case 2:
5278 // Small SUs 28x28 window
5279 RefWidth = 28;
5280 RefHeight = 28;
5281 LenSP = 9;
5282 break;
5283 case 3:
5284 // Diamond SUs 48x40 window
5285 RefWidth = 48;
5286 RefHeight = 40;
5287 LenSP = 16;
5288 break;
5289 case 4:
5290 // Large Diamond SUs 48x40 window
5291 RefWidth = 48;
5292 RefHeight = 40;
5293 LenSP = 32;
5294 break;
5295 case 5:
5296 // Exhaustive SUs 48x40 window
5297 RefWidth = 48;
5298 RefHeight = 40;
5299 LenSP = 48;
5300 if (m_hevcSeqParams->TargetUsage != 7)
5301 {
5302 if (m_pictureCodingType == B_TYPE)
5303 {
5304 LenSP = 48;
5305 } else {
5306 LenSP = 57;
5307 }
5308 } else {
5309 LenSP = 25;
5310 }
5311 break;
5312 default:
5313 CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC SearchWindow value for HEVC FEI on SKL!!!.");
5314 eStatus = MOS_STATUS_INVALID_PARAMETER;
5315 return eStatus;
5316 }
5317
5318 if((m_pictureCodingType == B_TYPE) && (curbe->DW3.BMEDisableFBR == 0))
5319 {
5320 if(RefWidth > 32)
5321 {
5322 RefWidth = 32;
5323 }
5324 if(RefHeight > 32)
5325 {
5326 RefHeight = 32;
5327 }
5328 }
5329
5330 curbe->DW0.AdaptiveEn = m_feiPicParams->AdaptiveSearch;
5331 curbe->DW0.T8x8FlagForInterEn = transform_8x8_mode_flag;
5332 curbe->DW2.PicWidth = m_picWidthInMb;
5333 curbe->DW2.LenSP = LenSP;
5334 curbe->DW3.SrcAccess = curbe->DW3.RefAccess = 0;
5335 if (m_feiPicParams->FastIntraMode)
5336 {
5337 curbe->DW3.FTEnable = (m_ftqBasedSkip[0x07] >> 1) & 0x01;
5338 }
5339 else
5340 {
5341 curbe->DW3.FTEnable = (m_ftqBasedSkip[0x04] >> 1) & 0x01;
5342 }
5343 curbe->DW3.SubPelMode = m_feiPicParams->SubPelMode;
5344
5345 curbe->DW4.PicHeightMinus1 = m_picHeightInMb - 1;
5346 curbe->DW4.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled;
5347 curbe->DW4.HMEEnable = 0;
5348 curbe->DW4.SliceType = sliceType;
5349 curbe->DW4.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable;
5350 curbe->DW4.UseActualRefQPValue = false;
5351
5352 curbe->DW5.RefWidth = RefWidth;
5353 curbe->DW5.RefHeight = RefHeight;
5354
5355 curbe->DW7.IntraPartMask = 0x3;
5356
5357 curbe->DW6.FrameWidth = m_picWidthInMb * CODECHAL_MACROBLOCK_WIDTH;
5358 curbe->DW6.FrameHeight = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT;
5359
5360 curbe->DW8.Mode0Cost = m_modeCost[0];
5361 curbe->DW8.Mode1Cost = m_modeCost[1];
5362 curbe->DW8.Mode2Cost = m_modeCost[2];
5363 curbe->DW8.Mode3Cost = m_modeCost[3];
5364
5365 curbe->DW9.Mode4Cost = m_modeCost[4];
5366 curbe->DW9.Mode5Cost = m_modeCost[5];
5367 curbe->DW9.Mode6Cost = m_modeCost[6];
5368 curbe->DW9.Mode7Cost = m_modeCost[7];
5369
5370 curbe->DW10.Mode8Cost= m_modeCost[8];
5371 curbe->DW10.Mode9Cost= m_modeCost[9];
5372 curbe->DW10.RefIDCost = m_modeCost[10];
5373 curbe->DW10.ChromaIntraModeCost = m_modeCost[11];
5374
5375 curbe->DW11.MV0Cost = m_mvCost[0];
5376 curbe->DW11.MV1Cost = m_mvCost[1];
5377 curbe->DW11.MV2Cost = m_mvCost[2];
5378 curbe->DW11.MV3Cost = m_mvCost[3];
5379
5380 curbe->DW12.MV4Cost = m_mvCost[4];
5381 curbe->DW12.MV5Cost = m_mvCost[5];
5382 curbe->DW12.MV6Cost = m_mvCost[6];
5383 curbe->DW12.MV7Cost = m_mvCost[7];
5384
5385 curbe->DW13.QpPrimeY = sliceQp;
5386 uint8_t bitDepthChromaMinus8 = 0; // support 4:2:0 only
5387 int32_t qpBdOffsetC = 6 * bitDepthChromaMinus8;
5388 int32_t qPi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cb_qp_offset));
5389 int32_t QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30];
5390 curbe->DW13.QpPrimeCb= QPc + qpBdOffsetC;
5391 qPi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cr_qp_offset));
5392 QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30];
5393 curbe->DW13.QpPrimeCr= QPc;
5394
5395 curbe->DW14.SICFwdTransCoeffThreshold_0 = forwardTransformThd[0];
5396 curbe->DW14.SICFwdTransCoeffThreshold_1 = forwardTransformThd[1];
5397 curbe->DW14.SICFwdTransCoeffThreshold_2 = forwardTransformThd[2];
5398
5399 curbe->DW15.SICFwdTransCoeffThreshold_3 = forwardTransformThd[3];
5400 curbe->DW15.SICFwdTransCoeffThreshold_4 = forwardTransformThd[4];
5401 curbe->DW15.SICFwdTransCoeffThreshold_5 = forwardTransformThd[5];
5402 curbe->DW15.SICFwdTransCoeffThreshold_6 = forwardTransformThd[6];
5403
5404 if (SearchPath == 1)
5405 {
5406 // diamond search
5407 if (m_pictureCodingType == P_TYPE)
5408 {
5409 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7PCurbeInit[16]), 14 * sizeof(uint32_t)));
5410 }
5411 else if (m_pictureCodingType == B_TYPE)
5412 {
5413 CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7BCurbeInit[16]), 14 * sizeof(uint32_t)));
5414 }
5415 }
5416 else if((SearchPath != 0) && (SearchPath != 2))
5417 {
5418 // default 0 and 2 are full sparil search
5419 CODECHAL_ENCODE_ASSERT(false);
5420 }
5421
5422 curbe->DW32.SkipVal = m_skipValB[curbe->DW3.BlockBasedSkipEnable][transform_8x8_mode_flag][sliceQp];
5423
5424 if(m_pictureCodingType == I_TYPE)
5425 {
5426 *(float*)&(curbe->DW34.LambdaME) = 0.0;
5427 }
5428 else if (m_pictureCodingType == P_TYPE)
5429 {
5430 *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_P_SLICE][sliceQp];
5431 }
5432 else
5433 {
5434 *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp];
5435 }
5436
5437 curbe->DW35.ModeCostSp = m_modeCostSp;
5438 curbe->DW35.SimpIntraInterThreshold = m_simplestIntraInterThreshold;
5439
5440 curbe->DW36.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1;
5441 curbe->DW36.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1;
5442 curbe->DW36.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled;
5443 curbe->DW36.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled;
5444 curbe->DW36.PowerSaving = m_powerSavingEnabled;
5445 curbe->DW36.ROIEnable = (m_hevcPicParams->NumROI > 0);
5446 curbe->DW36.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance;
5447
5448 if(m_pictureCodingType != I_TYPE)
5449 {
5450 curbe->DW37.ActualQpRefID0List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_0);
5451 curbe->DW37.ActualQpRefID1List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_1);
5452 curbe->DW37.ActualQpRefID2List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_2);
5453 curbe->DW37.ActualQpRefID3List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_3);
5454 curbe->DW41.TextureIntraCostThreshold = 500;
5455
5456 if(m_pictureCodingType == B_TYPE) {
5457 curbe->DW39.ActualQpRefID0List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_0);
5458 curbe->DW39.ActualQpRefID1List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_1);
5459 float lambda_me = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp];
5460 if (m_encodeParams.bQualityImprovementEnable)
5461 {
5462 curbe->DW40.TransformThreshold0 = (uint16_t) (lambda_me * 56.25 + 0.5);
5463 curbe->DW40.TransformThreshold1 = (uint16_t) (lambda_me * 21 + 0.5);
5464 curbe->DW41.TransformThreshold2 = (uint16_t) (lambda_me * 9 + 0.5);
5465 }
5466 }
5467 }
5468
5469 curbe->DW42.NumMVPredictorsL0 = m_feiPicParams->NumMVPredictorsL0;
5470 curbe->DW42.NumMVPredictorsL1 = m_feiPicParams->NumMVPredictorsL1;
5471 curbe->DW42.PerLCUQP = m_encodeParams.bMbQpDataEnabled;
5472 curbe->DW42.PerCTBInput = m_feiPicParams->bPerCTBInput;
5473 curbe->DW42.CTBDistortionOutput = m_feiPicParams->bDistortionEnable;
5474 curbe->DW42.MultiPredL0 = m_feiPicParams->MultiPredL0;
5475 curbe->DW42.MultiPredL1 = m_feiPicParams->MultiPredL1;
5476 curbe->DW42.MVPredictorBlockSize = m_feiPicParams->MVPredictorInput;
5477
5478 curbe->DW44.MaxVmvR = 511 * 4;
5479 curbe->DW44.MaxNumMergeCandidates = m_hevcSliceParams->MaxNumMergeCand;
5480
5481 if(m_pictureCodingType != I_TYPE)
5482 {
5483 curbe->DW44.MaxNumRefList0 = curbe->DW36.NumRefIdxL0MinusOne + 1;
5484
5485 curbe->DW45.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag;
5486 curbe->DW45.HMECombineLenPslice = 8;
5487 if(m_pictureCodingType == B_TYPE)
5488 {
5489 curbe->DW44.MaxNumRefList1 = curbe->DW36.NumRefIdxL1MinusOne + 1;
5490 curbe->DW45.HMECombineLenBslice = 8;
5491 }
5492 }
5493
5494 curbe->DW45.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2;
5495
5496 curbe->DW46.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
5497 curbe->DW46.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
5498 curbe->DW46.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
5499 curbe->DW46.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
5500
5501 curbe->DW47.NumRegionsInSlice = m_numRegionsInSlice;
5502 curbe->DW47.TypeOfWalkingPattern = m_enable26WalkingPattern;
5503 curbe->DW47.ChromaFlatnessCheckFlag= (m_feiPicParams->FastIntraMode) ? 0 : 1;
5504 curbe->DW47.EnableIntraEarlyExit = (m_feiPicParams->FastIntraMode) ? 0 : 1;
5505 curbe->DW47.SkipIntraKrnFlag = (m_feiPicParams->FastIntraMode) ? 1 : 0;
5506 curbe->DW47.CollocatedFromL0Flag = m_hevcSliceParams->collocated_from_l0_flag;
5507 curbe->DW47.IsLowDelay = m_lowDelay;
5508 curbe->DW47.ScreenContentFlag = m_hevcPicParams->bScreenContent;
5509 curbe->DW47.MultiSliceFlag = (m_numSlices > 1);
5510 curbe->DW47.ArbitarySliceFlag = m_arbitraryNumMbsInSlice;
5511 curbe->DW47.NumRegionMinus1 = m_walkingPatternParam.dwNumRegion - 1;
5512
5513 if(m_pictureCodingType != I_TYPE)
5514 {
5515 curbe->DW48.CurrentTdL0_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][0]);
5516 curbe->DW48.CurrentTdL0_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][1]);
5517 curbe->DW49.CurrentTdL0_2 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][2]);
5518 curbe->DW49.CurrentTdL0_3 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][3]);
5519 if(m_pictureCodingType == B_TYPE) {
5520 curbe->DW50.CurrentTdL1_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][0]);
5521 curbe->DW50.CurrentTdL1_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][1]);
5522 }
5523 }
5524
5525 curbe->DW52.NumofUnitInRegion = m_walkingPatternParam.dwNumUnitsInRegion;
5526 curbe->DW52.MaxHeightInRegion = m_walkingPatternParam.dwMaxHeightInRegion;
5527
5528 // Intra refresh is enabled. Program related CURBE fields
5529 if (m_hevcPicParams->bEnableRollingIntraRefresh)
5530 {
5531 curbe->DW35.IntraRefreshEn = true;
5532 curbe->DW35.FirstIntraRefresh = m_firstIntraRefresh;
5533 curbe->DW35.HalfUpdateMixedLCU = 0;
5534 curbe->DW35.EnableRollingIntra = true;
5535
5536 curbe->DW38.NumFrameInGOB = m_frameNumInGob;
5537 curbe->DW38.NumIntraRefreshOffFrames = m_frameNumWithoutIntraRefresh;
5538
5539 curbe->DW51.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra;
5540 curbe->DW51.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation;
5541 curbe->DW51.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize;
5542
5543 curbe->DW53.IntraRefreshRefHeight = 40;
5544 curbe->DW53.IntraRefreshRefWidth = 48;
5545
5546 m_firstIntraRefresh = false;
5547 m_frameNumWithoutIntraRefresh = 0;
5548 }
5549 else if (m_pictureCodingType != I_TYPE) // don't increment num frames w/o refresh in case of TU7 I frames
5550 {
5551 m_frameNumWithoutIntraRefresh++;
5552 }
5553
5554 PBFrameKernelParams PB8x8MbEncParams;
5555 MOS_ZeroMemory(&PB8x8MbEncParams, sizeof(PB8x8MbEncParams));
5556
5557 PB8x8MbEncParams.m_width = curbe->DW6.FrameWidth;
5558 PB8x8MbEncParams.m_height = curbe->DW6.FrameHeight;
5559
5560 for(uint32_t surfaceIdx = 0; surfaceIdx < 8; surfaceIdx++)
5561 {
5562 CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surfaceIdx];
5563 if (!CodecHal_PictureIsInvalid(refPic) &&
5564 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
5565 {
5566 uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
5567 PB8x8MbEncParams.m_cmSurfRef0[PB8x8MbEncParams.m_ucRefNum0] = &m_refList[idx]->sRefBuffer.OsResource;
5568 PB8x8MbEncParams.m_ucRefNum0++;
5569 }
5570
5571 refPic = m_hevcSliceParams->RefPicList[LIST_1][surfaceIdx];
5572 if (!CodecHal_PictureIsInvalid(refPic) &&
5573 !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
5574 {
5575 uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
5576 PB8x8MbEncParams.m_cmSurfRef1[PB8x8MbEncParams.m_ucRefNum1] = &m_refList[idx]->sRefBuffer.OsResource;
5577 PB8x8MbEncParams.m_ucRefNum1++;
5578 }
5579 }
5580
5581 PB8x8MbEncParams.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource;
5582 PB8x8MbEncParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface;
5583 PB8x8MbEncParams.m_bufSize = m_mbCodeSize - m_mvOffset;
5584 PB8x8MbEncParams.m_bufOffset = m_mvOffset;
5585 if(mbCodeIdxForTempMVP == 0xFF)
5586 {
5587 PB8x8MbEncParams.m_cmSurfColRefData = nullptr;
5588 }
5589 else
5590 {
5591 PB8x8MbEncParams.m_cmSurfColRefData = m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP);
5592 }
5593 PB8x8MbEncParams.m_cmSurfIntraDist = &m_intraDist.sResource;
5594 PB8x8MbEncParams.m_cmSurfMinDist = &m_minDistortion.OsResource;
5595 PB8x8MbEncParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource;
5596 PB8x8MbEncParams.m_cmSurfVMEIN = &m_vmeSavedUniSic.sResource;
5597 PB8x8MbEncParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource;
5598 PB8x8MbEncParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel);
5599 PB8x8MbEncParams.m_cmLCUQPSurf = &m_lcuQP.OsResource;
5600 PB8x8MbEncParams.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource;
5601 PB8x8MbEncParams.m_cmWaveFrontMap = &m_concurrentThreadSurface[m_concurrentThreadIndex].OsResource;
5602 if (++m_concurrentThreadIndex >= NUM_CONCURRENT_THREAD)
5603 {
5604 m_concurrentThreadIndex = 0;
5605 }
5606 PB8x8MbEncParams.m_cmSurfMVIndex = &m_mvIndex.sResource;
5607 PB8x8MbEncParams.m_cmSurfMVPred = &m_mvpIndex.sResource;
5608 if (m_feiPicParams->MVPredictorInput)
5609 {
5610 PB8x8MbEncParams.m_cmSurfMVPredictor = &m_feiPicParams->resMVPredictor;
5611 }
5612 else
5613 {
5614 PB8x8MbEncParams.m_cmSurfMVPredictor = nullptr;
5615 }
5616
5617 if (m_feiPicParams->bPerCTBInput)
5618 {
5619 PB8x8MbEncParams.m_cmSurfPerCTBInput = &m_feiPicParams->resCTBCtrl;
5620 }
5621 else
5622 {
5623 PB8x8MbEncParams.m_cmSurfPerCTBInput = nullptr;
5624 }
5625
5626 //to avoid multi contexts in case per-frame control of FastIntraMode, always use 2xScaling kernel to initialize the context.
5627 if (m_cmKernelMap.count("2xScaling") == 0)
5628 {
5629 m_cmKernelMap["2xScaling"] = new CMRTKernelDownScalingUMD();
5630 m_cmKernelMap["2xScaling"]->Init((void *)m_osInterface->pOsContext);
5631 }
5632
5633 //in case PB_32x32 isn't initialized when using FastIntraMode for per-frame control (I: disable; P/B: enable)
5634 if (m_cmKernelMap.count("PB_32x32") == 0)
5635 {
5636 m_cmKernelMap["PB_32x32"] = new CMRTKernelPB32x32UMD();
5637 m_cmKernelMap["PB_32x32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr);
5638 }
5639
5640 if (m_pictureCodingType == I_TYPE && m_feiPicParams->FastIntraMode)
5641 {
5642 if (m_cmKernelMap.count("I_8x8_MBENC") == 0)
5643 {
5644 m_cmKernelMap["I_8x8_MBENC"] = new CMRTKernelB8x8MbEncUMD();
5645 m_cmKernelMap["I_8x8_MBENC"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram);
5646 }
5647
5648 m_cmKernelMap["I_8x8_MBENC"]->SetupCurbe(curbe);
5649 m_cmKernelMap["I_8x8_MBENC"]->AllocateSurfaces(&PB8x8MbEncParams);
5650
5651 //No need to wait for task finished
5652 m_cmEvent = CM_NO_EVENT;
5653 m_cmKernelMap["I_8x8_MBENC"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
5654 }
5655 else if (m_pictureCodingType == B_TYPE)
5656 {
5657 if (m_cmKernelMap.count("B_8x8_MBENC") == 0)
5658 {
5659 m_cmKernelMap["B_8x8_MBENC"] = new CMRTKernelB8x8MbEncUMD();
5660 m_cmKernelMap["B_8x8_MBENC"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram);
5661 }
5662
5663 m_cmKernelMap["B_8x8_MBENC"]->SetupCurbe(curbe);
5664 m_cmKernelMap["B_8x8_MBENC"]->AllocateSurfaces(&PB8x8MbEncParams);
5665
5666 //No need to wait for task finished
5667 m_cmEvent = CM_NO_EVENT;
5668 m_cmKernelMap["B_8x8_MBENC"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
5669 }
5670 else if (m_pictureCodingType == P_TYPE)
5671 {
5672 if (m_cmKernelMap.count("P_8x8_MBENC") == 0)
5673 {
5674 m_cmKernelMap["P_8x8_MBENC"] = new CMRTKernelP8x8MbEncUMD();
5675 m_cmKernelMap["P_8x8_MBENC"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram);
5676 }
5677 m_cmKernelMap["P_8x8_MBENC"]->SetupCurbe(curbe);
5678 m_cmKernelMap["P_8x8_MBENC"]->AllocateSurfaces(&PB8x8MbEncParams);
5679
5680 //No need to wait for task finished
5681 m_cmEvent = CM_NO_EVENT;
5682 m_cmKernelMap["P_8x8_MBENC"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported));
5683 }
5684
5685 CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_MBENC;
5686 if (m_pictureCodingType == P_TYPE)
5687 {
5688 //P frame curbe only use the DW0~DW75
5689 CODECHAL_DEBUG_TOOL(
5690 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
5691 encFunctionType,
5692 (uint8_t *)curbe, sizeof(*curbe) - sizeof(uint32_t)));
5693 )
5694 }
5695 else
5696 {
5697 CODECHAL_DEBUG_TOOL(
5698 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe(
5699 encFunctionType,
5700 (uint8_t *)curbe, sizeof(*curbe)));
5701 )
5702 }
5703
5704 m_lastTaskInPhase = true;
5705 eStatus = Encode8x8BPakKernel(curbe);
5706 return eStatus;
5707 }
5708
5709 #endif
5710
EncodeKernelFunctions()5711 MOS_STATUS CodechalFeiHevcStateG9Skl::EncodeKernelFunctions()
5712 {
5713 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5714
5715 CODECHAL_ENCODE_FUNCTION_ENTER;
5716
5717 m_feiPicParams = (CodecEncodeHevcFeiPicParams *)m_encodeParams.pFeiPicParams;
5718
5719 CODECHAL_DEBUG_TOOL(
5720 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
5721 m_rawSurfaceToEnc,
5722 CodechalDbgAttr::attrEncodeRawInputSurface,
5723 "SrcSurf"));
5724 )
5725
5726 if (m_pakOnlyTest)
5727 {
5728 // Skip all ENC kernel operations for now it is in the PAK only test mode.
5729 // PAK and CU records will be passed via the app
5730 return eStatus;
5731 }
5732
5733 if (m_brcEnabled || m_hmeEnabled)
5734 {
5735 eStatus = MOS_STATUS_UNKNOWN;
5736 CODECHAL_ENCODE_ASSERTMESSAGE("HEVC FEI does not support BRC and HMEenabled.");
5737 return eStatus;
5738 }
5739
5740 if(m_osInterface->bSimIsActive)
5741 {
5742 MOS_LOCK_PARAMS lockFlags;
5743 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
5744 lockFlags.WriteOnly = 1;
5745
5746 uint8_t* data = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &m_resMbCodeSurface, &lockFlags);
5747 if (data)
5748 {
5749 MOS_ZeroMemory(data, m_mbCodeSize);
5750 m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface);
5751 }
5752 }
5753
5754 // Generate slice map for kernel
5755 CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateSliceMap());
5756
5757 //Reset to use a different performance tag ID for I kernels. Each kernel has a different buffer ID
5758 m_osInterface->pfnResetPerfBufferID(m_osInterface);
5759
5760 m_firstTaskInPhase = true;
5761 m_lastTaskInPhase = false;
5762
5763 // ROI uses the BRC LCU update kernel, even in CQP. So we will call it
5764 // first if in CQP. It has no other kernel execution dependencies, even
5765 // that brc is not initialized is not a dependency
5766 if (m_hevcPicParams->NumROI && !m_brcEnabled)
5767 {
5768 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateLCUBasedKernel(nullptr));
5769 }
5770
5771 // config LCU QP input
5772 if (m_encodeParams.bMbQpDataEnabled)
5773 {
5774 // Setup Lamda/Cost table for LCU QP mode
5775 auto psBrcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
5776 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(psBrcConstantData));
5777
5778 if (m_encodeParams.psMbQpDataSurface)
5779 {
5780 CODECHAL_ENCODE_CHK_STATUS_RETURN(Convert1byteTo2bytesQPperLCU(m_encodeParams.psMbQpDataSurface, &m_lcuQP));
5781 m_surfaceParams[SURFACE_LCU_QP].psSurface = &m_lcuQP;
5782 }
5783 }
5784
5785 CODECHAL_DEBUG_TOOL(
5786 if (m_feiPicParams->bPerBlockQP) {
5787 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
5788 &m_lcuQP,
5789 CodechalDbgAttr::attrInput,
5790 "HEVC_B_MBENC_MB_QP",
5791 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
5792 }
5793
5794 if (m_feiPicParams->MVPredictorInput) {
5795 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5796 &m_feiPicParams->resMVPredictor,
5797 "HEVC_B_MBENC_ConstantData_In",
5798 CodechalDbgAttr::attrInput,
5799 m_feiPicParams->resMVPredictor.iSize,
5800 0,
5801 CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
5802 })
5803
5804 if(m_feiPicParams->FastIntraMode)
5805 {
5806 if (m_hevcPicParams->CodingType == I_TYPE)
5807 {
5808 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PBMbEncKernel());
5809 }
5810 }
5811 else
5812 {
5813 //Step 1: perform 2:1 down-scaling
5814 if (m_hevcSeqParams->bit_depth_luma_minus8 == 0) // use this for 8 bit only case.
5815 {
5816 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode2xScalingKernel());
5817 }
5818
5819 //Step 2: 32x32 PU Mode Decision or 32x32 PU Intra check kernel
5820 if (m_hevcPicParams->CodingType == I_TYPE)
5821 {
5822 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode32x32PuModeDecisionKernel());
5823 }
5824 else
5825 {
5826 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode32X32BIntraCheckKernel());
5827 }
5828
5829 //Step 3: 16x16 SAD Computation
5830 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode16x16SadPuComputationKernel());
5831
5832 CODECHAL_DEBUG_TOOL(
5833 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5834 &m_sad16x16Pu.sResource,
5835 CodechalDbgAttr::attrOutput,
5836 "HEVC_16x16_PU_SAD_Out",
5837 m_sad16x16Pu.dwSize,
5838 0,
5839 CODECHAL_MEDIA_STATE_16x16_PU_SAD));
5840 )
5841
5842 //Step 4: 16x16 PU Mode Decision
5843 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode16x16PuModeDecisionKernel());
5844
5845 CODECHAL_DEBUG_TOOL(
5846 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5847 &m_vme8x8Mode.sResource,
5848 CodechalDbgAttr::attrOutput,
5849 "HEVC_16x16_PU_MD_Out",
5850 m_vme8x8Mode.dwSize,
5851 0,
5852 CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION));
5853 )
5854
5855 //Step 5: 8x8 PU
5856 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PUKernel());
5857
5858 //Step 6: 8x8 PU FMODE
5859 m_lastTaskInPhase = true;
5860 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PUFMODEKernel());
5861
5862 CODECHAL_DEBUG_TOOL(
5863 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
5864 &m_scaled2xSurface,
5865 CodechalDbgAttr::attrReferenceSurfaces,
5866 "2xScaledSurf"))
5867
5868 if (m_pictureCodingType == I_TYPE)
5869 {
5870 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5871 &m_32x32PuOutputData.sResource,
5872 CodechalDbgAttr::attrOutput,
5873 "HEVC_32x32_PU_MD_Out",
5874 m_32x32PuOutputData.dwSize,
5875 0,
5876 CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION));
5877 }
5878 else
5879 {
5880 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5881 &m_32x32PuOutputData.sResource,
5882 CodechalDbgAttr::attrOutput,
5883 "HEVC_32x32_B_INTRA_CHECK_Out",
5884 m_32x32PuOutputData.dwSize,
5885 0,
5886 CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION));
5887
5888 }
5889
5890 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5891 &m_intraMode.sResource,
5892 CodechalDbgAttr::attrOutput,
5893 "HEVC_8x8_PU_MD_Out",
5894 m_intraMode.dwSize,
5895 0,
5896 CODECHAL_MEDIA_STATE_8x8_PU));
5897
5898 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
5899 &m_intraDist.sResource,
5900 CodechalDbgAttr::attrOutput,
5901 "HEVC_8x8_PU_FMOD_Out",
5902 m_intraDist.dwSize,
5903 0,
5904 CODECHAL_MEDIA_STATE_8x8_PU_FMODE));
5905 )
5906 }
5907
5908 // Sync-wait can be executed after I-kernel is submitted before there is no dependency for I to wait for PAK to be ready
5909 CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
5910
5911 //Step 7: B MB ENC kernel for B picture only
5912 if (m_hevcPicParams->CodingType != I_TYPE)
5913 {
5914 m_firstTaskInPhase = true;
5915 m_lastTaskInPhase = false;
5916
5917 if (m_feiPicParams->MVPredictorInput)
5918 {
5919 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
5920 &m_surfaceParams[SURFACE_FEI_EXTERNAL_MVP],
5921 &m_feiPicParams->resMVPredictor,
5922 m_feiPicParams->resMVPredictor.iSize,
5923 0,
5924 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
5925 0,
5926 false));
5927 }
5928
5929 if ((m_hevcSeqParams->bit_depth_luma_minus8))
5930 {
5931 bool formatConversionDone[NUM_FORMAT_CONV_FRAMES] = { false };
5932 formatConversionDone[0] = true; // always true since its for the input surface.
5933
5934 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
5935 {
5936 if (!m_picIdx[i].bValid || !m_currUsedRefPic[i])
5937 {
5938 continue;
5939 }
5940
5941 uint8_t picIdx = m_picIdx[i].ucPicIdx;
5942 CODECHAL_ENCODE_ASSERT(picIdx < 127);
5943
5944 uint8_t frameStoreId = (uint8_t)m_refIdxMapping[i];
5945
5946 if (frameStoreId >= CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC)
5947 {
5948 CODECHAL_ENCODE_ASSERT(0);
5949 eStatus = MOS_STATUS_INVALID_PARAMETER;
5950 return eStatus;
5951 }
5952
5953 if (formatConversionDone[frameStoreId + 1] != true)
5954 {
5955 CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeDSCombinedKernel(dsDisabled, (frameStoreId + 1), picIdx));
5956 formatConversionDone[frameStoreId + 1] = true;
5957 m_refList[picIdx]->sRefBuffer = m_formatConvertedSurface[frameStoreId + 1];
5958 }
5959 }
5960 }
5961
5962 CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PBMbEncKernel());
5963 }
5964 #ifdef HEVC_FEI_ENABLE_CMRT
5965
5966 for (CmKernelMapType::iterator it = m_cmKernelMap.begin(); it != m_cmKernelMap.end(); it++)
5967 {
5968 it->second->DestroySurfResources();
5969 }
5970
5971 #endif
5972
5973 // Notify PAK engine once ENC is done
5974 if (!m_pakOnlyTest && !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
5975 {
5976 MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
5977 syncParams.GpuContext = m_renderContext;
5978 syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
5979
5980 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
5981 }
5982
5983 return eStatus;
5984 }
5985
Initialize(CodechalSetting * settings)5986 MOS_STATUS CodechalFeiHevcStateG9Skl::Initialize(CodechalSetting * settings)
5987 {
5988 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5989
5990 CODECHAL_ENCODE_FUNCTION_ENTER;
5991
5992 // common initilization
5993 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings));
5994
5995 m_cscDsState->EnableMmc();
5996
5997 m_brcBuffers.dwBrcConstantSurfaceWidth = BRC_CONSTANT_SURFACE_WIDTH;
5998 m_brcBuffers.dwBrcConstantSurfaceHeight = BRC_CONSTANT_SURFACE_HEIGHT;
5999
6000 // LCU size is 32x32 in Gen9
6001 m_widthAlignedMaxLcu = MOS_ALIGN_CEIL(m_frameWidth, 32);
6002 m_heightAlignedMaxLcu = MOS_ALIGN_CEIL(m_frameHeight, 32);
6003
6004 m_brcEnabled = false;
6005 m_hmeEnabled = false;
6006 m_hmeSupported = false;
6007 m_16xMeUserfeatureControl = false;
6008 m_16xMeSupported = false;
6009 m_32xMeUserfeatureControl = false;
6010 m_32xMeSupported = false;
6011
6012 // regkey setup
6013 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
6014 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6015 MOS_UserFeature_ReadValue_ID(
6016 nullptr,
6017 __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
6018 &userFeatureData,
6019 m_osInterface->pOsContext);
6020 m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
6021
6022 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6023 MOS_UserFeature_ReadValue_ID(
6024 nullptr,
6025 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID,
6026 &userFeatureData,
6027 m_osInterface->pOsContext);
6028 m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true;
6029
6030 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6031 eStatus = MOS_UserFeature_ReadValue_ID(
6032 nullptr,
6033 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID,
6034 &userFeatureData,
6035 m_osInterface->pOsContext);
6036
6037 if (eStatus == MOS_STATUS_SUCCESS)
6038 {
6039 // Region number must be greater than 1
6040 m_numRegionsInSlice = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data;
6041 }
6042 else
6043 {
6044 // Reset the status to success if regkey is not set
6045 eStatus = MOS_STATUS_SUCCESS;
6046 }
6047
6048 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6049 MOS_UserFeature_ReadValue_ID(
6050 nullptr,
6051 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_8x8_INTRA_KERNEL_SPLIT,
6052 &userFeatureData,
6053 m_osInterface->pOsContext);
6054 m_numMb8x8IntraKernelSplit = (userFeatureData.i32Data < 0) ? 0 : userFeatureData.i32Data;
6055
6056 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6057 MOS_UserFeature_ReadValue_ID(
6058 nullptr,
6059 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_B_KERNEL_SPLIT,
6060 &userFeatureData,
6061 m_osInterface->pOsContext);
6062 m_numMbBKernelSplit = (userFeatureData.i32Data < 0) ? 0 : userFeatureData.i32Data;
6063
6064 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
6065 MOS_UserFeature_ReadValue_ID(
6066 nullptr,
6067 __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_POWER_SAVING,
6068 &userFeatureData,
6069 m_osInterface->pOsContext);
6070 m_powerSavingEnabled = (userFeatureData.i32Data) ? true : false;
6071
6072 if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
6073 {
6074 /* Make the width aligned to a multiple of 32 and then get the no of macroblocks.*/
6075 /* This is done to facilitate the use of format conversion kernel for downscaling to 4x and 2x along with formatconversion of 10 bit data to 8 bit data.
6076 Refer format conversion kernel for further details .
6077 We will use only 4x downscale for HME, Super and ultra HME use the traditional scaling kernels.
6078 */
6079 uint32_t downscaledSurfaceWidth4x = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x* CODECHAL_MACROBLOCK_WIDTH), (CODECHAL_MACROBLOCK_WIDTH * 2));
6080 m_downscaledWidthInMb4x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(downscaledSurfaceWidth4x);
6081
6082 }
6083
6084 return eStatus;
6085 }
6086
GetMaxBtCount()6087 uint32_t CodechalFeiHevcStateG9Skl::GetMaxBtCount()
6088 {
6089 auto wBtIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
6090
6091 // 6 I kernels
6092 uint32_t uiBtCountPhase1 = MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_2xSCALING].KernelParams.iBTCount, wBtIdxAlignment) +
6093 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_16x16SAD].KernelParams.iBTCount, wBtIdxAlignment) +
6094 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_16x16MD].KernelParams.iBTCount, wBtIdxAlignment) +
6095 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_8x8PU].KernelParams.iBTCount, wBtIdxAlignment) +
6096 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_8x8FMODE].KernelParams.iBTCount, wBtIdxAlignment);
6097
6098 uiBtCountPhase1 += MOS_MAX(
6099 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_32x32MD].KernelParams.iBTCount, wBtIdxAlignment),
6100 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_32x32INTRACHECK].KernelParams.iBTCount, wBtIdxAlignment));
6101
6102 if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
6103 {
6104 uiBtCountPhase1 += MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_FEI_MBENC_DS_COMBINED].KernelParams.iBTCount, wBtIdxAlignment);
6105 }
6106
6107 // two B kernels
6108 uint32_t uiBtCountPhase2 = MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_FEI_MBENC_BENC].KernelParams.iBTCount, wBtIdxAlignment) +
6109 MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_FEI_MBENC_BPAK].KernelParams.iBTCount, wBtIdxAlignment);
6110
6111 uint32_t uiMaxBtCount = MOS_MAX(uiBtCountPhase1, uiBtCountPhase2);
6112
6113 return uiMaxBtCount;
6114 }
6115
AllocateEncResources()6116 MOS_STATUS CodechalFeiHevcStateG9Skl::AllocateEncResources()
6117 {
6118 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6119
6120 CODECHAL_ENCODE_FUNCTION_ENTER;
6121
6122 m_sliceMap = (PCODECHAL_ENCODE_HEVC_SLICE_MAP)MOS_AllocAndZeroMemory(
6123 m_widthAlignedMaxLcu * m_heightAlignedMaxLcu * sizeof(m_sliceMap[0]));
6124 CODECHAL_ENCODE_CHK_NULL_RETURN(m_sliceMap);
6125
6126 uint32_t Downscaling2xWidth = m_widthAlignedMaxLcu >> 1;
6127 uint32_t Downscaling2xHeight = m_heightAlignedMaxLcu >> 1;
6128 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
6129 &m_scaled2xSurface,
6130 Downscaling2xWidth,
6131 Downscaling2xHeight,
6132 "2x Downscaling"));
6133
6134 uint32_t uiWidth = m_widthAlignedMaxLcu >> 3;
6135 uint32_t uiHeight = m_heightAlignedMaxLcu >> 5;
6136 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6137 &m_sliceMapSurface,
6138 uiWidth,
6139 uiHeight,
6140 "Slice Map"));
6141
6142 uint32_t uiSize = 32 * (m_widthAlignedMaxLcu >> 5) * (m_heightAlignedMaxLcu >> 5);
6143 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6144 &m_32x32PuOutputData,
6145 uiSize,
6146 "32x32 PU Output Data"));
6147
6148 uiSize = 8 * 4 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
6149 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6150 &m_sad16x16Pu,
6151 uiSize,
6152 "SAD 16x16 PU"));
6153
6154 // need 64 bytes for statistics report .
6155 uiSize = 64 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
6156 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6157 &m_vme8x8Mode,
6158 uiSize,
6159 "VME 8x8 mode"));
6160
6161 uiSize = 32 * (m_widthAlignedMaxLcu >> 3) * (m_heightAlignedMaxLcu >> 3);
6162 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6163 &m_intraMode,
6164 uiSize,
6165 "Intra mode"));
6166
6167 uiSize = 16 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4);
6168 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6169 &m_intraDist,
6170 uiSize,
6171 "Intra dist"));
6172
6173 // Change the surface size
6174 uiWidth = m_widthAlignedMaxLcu >> 1;
6175 uiHeight = m_heightAlignedMaxLcu >> 4;
6176 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6177 &m_minDistortion,
6178 uiWidth,
6179 uiHeight,
6180 "Min distortion surface"));
6181
6182 // Allocate FEI 2D 2bytes LCU QP surface
6183 uiWidth = MOS_ALIGN_CEIL((m_widthAlignedMaxLcu >> 4), 64);
6184 uiHeight = MOS_ALIGN_CEIL((m_heightAlignedMaxLcu >> 5), 4);
6185 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6186 &m_lcuQP,
6187 uiWidth,
6188 uiHeight,
6189 "LCU_QP surface"));
6190
6191 uiWidth = sizeof(CODECHAL_ENCODE_HEVC_WALKING_CONTROL_REGION);
6192 uiHeight = HEVC_CONCURRENT_SURFACE_HEIGHT;
6193 for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++)
6194 {
6195 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6196 &m_concurrentThreadSurface[i],
6197 uiWidth,
6198 uiHeight,
6199 "Concurrent Thread"));
6200 }
6201
6202 //uiSize = (dwWidthAlignedMaxLCU * dwHeightAlignedMaxLCU / 4);
6203 uiSize = (m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 4) + GPUMMU_WA_PADDING;
6204 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6205 &m_mvIndex,
6206 uiSize,
6207 "MV index surface"));
6208
6209 //uiSize = (dwWidthAlignedMaxLCU * dwHeightAlignedMaxLCU / 2);
6210 uiSize = (m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 2) + GPUMMU_WA_PADDING;
6211 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6212 &m_mvpIndex,
6213 uiSize,
6214 "MVP index surface"));
6215
6216 uiSize = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu;
6217 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6218 &m_vmeSavedUniSic,
6219 uiSize,
6220 "VME Saved UniSic surface"));
6221
6222 uiWidth = m_widthAlignedMaxLcu >> 3;
6223 uiHeight = m_heightAlignedMaxLcu >> 5;
6224 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6225 &m_simplestIntraSurface,
6226 uiWidth,
6227 uiHeight,
6228 "Simplest Intra surface"));
6229
6230 m_allocator->AllocateResource(m_standard, 1024, 1, brcInputForEncKernel, "brcInputForEncKernel", true);
6231
6232 if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit))
6233 {
6234 // adding 10 bit support for KBL : output surface for format conversion from 10bit to 8 bit
6235 for (uint32_t i = 0; i < NUM_FORMAT_CONV_FRAMES; i++)
6236 {
6237 if (Mos_ResourceIsNull(&m_formatConvertedSurface[i].OsResource))
6238 {
6239 uiWidth = m_widthAlignedMaxLcu;
6240 uiHeight = m_heightAlignedMaxLcu;
6241
6242 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
6243 &m_formatConvertedSurface[i],
6244 uiWidth,
6245 uiHeight,
6246 "Format Converted Surface"));
6247 }
6248 }
6249
6250 if (Mos_ResourceIsNull(&m_resMbStatisticsSurface.sResource))
6251 {
6252 uiSize = 52 * m_picWidthInMb * m_picHeightInMb; // 13 DWs or 52 bytes for statistics per MB
6253
6254 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
6255 &m_resMbStatisticsSurface,
6256 uiSize,
6257 "MB stats surface"));
6258 }
6259 }
6260
6261 // ROI
6262 // ROI buffer size uses MB units for HEVC, not LCU
6263 uiWidth = MOS_ALIGN_CEIL(m_picWidthInMb * 4, 64);
6264 uiHeight = MOS_ALIGN_CEIL(m_picHeightInMb, 8);
6265
6266 MOS_ZeroMemory(&m_roiSurface, sizeof(m_roiSurface));
6267 m_roiSurface.TileType = MOS_TILE_LINEAR;
6268 m_roiSurface.bArraySpacing = true;
6269 m_roiSurface.Format = Format_Buffer_2D;
6270 m_roiSurface.dwWidth = uiWidth;
6271 m_roiSurface.dwPitch = uiWidth;
6272 m_roiSurface.dwHeight = uiHeight;
6273
6274 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
6275 &m_roiSurface,
6276 uiWidth,
6277 uiHeight,
6278 "ROI Buffer"));
6279
6280 return eStatus;
6281 }
6282
FreeEncResources()6283 MOS_STATUS CodechalFeiHevcStateG9Skl::FreeEncResources()
6284 {
6285 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6286
6287 CODECHAL_ENCODE_FUNCTION_ENTER;
6288
6289 MOS_Delete(m_meKernelState);
6290 m_meKernelState = nullptr;
6291 MOS_FreeMemory(m_meKernelBindingTable);
6292 m_meKernelBindingTable = nullptr;
6293
6294 MOS_DeleteArray(m_mbEncKernelStates);
6295 m_mbEncKernelStates = nullptr;
6296 MOS_FreeMemory(m_mbEncKernelBindingTable);
6297 m_mbEncKernelBindingTable = nullptr;
6298
6299 MOS_DeleteArray(m_brcKernelStates);
6300 m_brcKernelStates = nullptr;
6301 MOS_FreeMemory(m_brcKernelBindingTable);
6302 m_brcKernelBindingTable = nullptr;
6303
6304 MOS_FreeMemory(m_surfaceParams); m_surfaceParams = nullptr;
6305
6306 for (auto i = 0; i < NUM_FORMAT_CONV_FRAMES; i++)
6307 {
6308 m_osInterface->pfnFreeResource(
6309 m_osInterface,
6310 &m_formatConvertedSurface[i].OsResource);
6311 }
6312
6313 m_osInterface->pfnFreeResource(
6314 m_osInterface,
6315 &m_scaled2xSurface.OsResource);
6316
6317 m_osInterface->pfnFreeResource(
6318 m_osInterface,
6319 &m_resMbStatisticsSurface.sResource);
6320
6321 m_osInterface->pfnFreeResource(
6322 m_osInterface,
6323 &m_sliceMapSurface.OsResource);
6324
6325 m_osInterface->pfnFreeResource(
6326 m_osInterface,
6327 &m_32x32PuOutputData.sResource);
6328
6329 m_osInterface->pfnFreeResource(
6330 m_osInterface,
6331 &m_sad16x16Pu.sResource);
6332
6333 m_osInterface->pfnFreeResource(
6334 m_osInterface,
6335 &m_vme8x8Mode.sResource);
6336
6337 m_osInterface->pfnFreeResource(
6338 m_osInterface,
6339 &m_intraMode.sResource);
6340
6341 m_osInterface->pfnFreeResource(
6342 m_osInterface,
6343 &m_intraDist.sResource);
6344
6345 m_osInterface->pfnFreeResource(
6346 m_osInterface,
6347 &m_mvIndex.sResource);
6348
6349 m_osInterface->pfnFreeResource(
6350 m_osInterface,
6351 &m_mvpIndex.sResource);
6352
6353 m_osInterface->pfnFreeResource(
6354 m_osInterface,
6355 &m_vmeSavedUniSic.sResource);
6356
6357 m_osInterface->pfnFreeResource(
6358 m_osInterface,
6359 &m_minDistortion.OsResource);
6360
6361 m_osInterface->pfnFreeResource(
6362 m_osInterface,
6363 &m_lcuQP.OsResource);
6364
6365 for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++)
6366 {
6367 m_osInterface->pfnFreeResource(
6368 m_osInterface,
6369 &m_concurrentThreadSurface[i].OsResource);
6370 }
6371
6372 m_osInterface->pfnFreeResource(
6373 m_osInterface,
6374 &m_simplestIntraSurface.OsResource);
6375
6376 MOS_FreeMemory(m_sliceMap);
6377 m_sliceMap = nullptr;
6378
6379 m_osInterface->pfnFreeResource(
6380 m_osInterface,
6381 &m_roiSurface.OsResource);
6382
6383 #ifdef HEVC_FEI_ENABLE_CMRT
6384
6385 for (CmKernelMapType::iterator it = m_cmKernelMap.begin(); it != m_cmKernelMap.end(); it++)
6386 {
6387 it->second->DestroyKernelResources();
6388 }
6389 if (m_cmKernelMap.count("2xScaling"))
6390 {
6391 m_cmKernelMap["2xScaling"]->DestroyProgramResources();
6392 }
6393 if (m_cmKernelMap.count("I_32x32"))
6394 {
6395 m_cmKernelMap["I_32x32"]->DestroyProgramResources();
6396 }
6397 if (m_cmKernelMap.count("PB_32x32"))
6398 {
6399 m_cmKernelMap["PB_32x32"]->DestroyProgramResources();
6400 }
6401 if (m_cmKernelMap.count("2xScaling"))
6402 {
6403 m_cmKernelMap["2xScaling"]->Destroy();
6404 }
6405
6406 for (CmKernelMapType::iterator it = m_cmKernelMap.begin(); it != m_cmKernelMap.end(); it++)
6407 {
6408 delete it->second;
6409 }
6410
6411 m_cmKernelMap.clear();
6412
6413 #endif
6414
6415 return eStatus;
6416 }
6417
InitSurfaceInfoTable()6418 MOS_STATUS CodechalFeiHevcStateG9Skl::InitSurfaceInfoTable()
6419 {
6420 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6421
6422 m_surfaceParams = (PCODECHAL_SURFACE_CODEC_PARAMS)MOS_AllocAndZeroMemory(
6423 sizeof(*m_surfaceParams) * SURFACE_NUM_TOTAL);
6424 CODECHAL_ENCODE_CHK_NULL_RETURN(m_surfaceParams);
6425
6426 PCODECHAL_SURFACE_CODEC_PARAMS param = &m_surfaceParams[SURFACE_RAW_Y];
6427 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6428 param,
6429 m_rawSurfaceToEnc,
6430 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6431 0,
6432 m_verticalLineStride,
6433 false));
6434
6435 param = &m_surfaceParams[SURFACE_RAW_10bit_Y];
6436 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6437 param,
6438 m_rawSurfaceToEnc,
6439 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6440 0,
6441 m_verticalLineStride,
6442 false));
6443
6444 // MB stats surface -- currently not used
6445 param = &m_surfaceParams[SURFACE_RAW_MBSTAT];
6446 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6447 param,
6448 &m_resMbStatisticsSurface.sResource,
6449 m_resMbStatisticsSurface.dwSize,
6450 0,
6451 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6452 0,
6453 true));
6454 param->bRawSurface = true;
6455
6456 param = &m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV];
6457 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6458 param,
6459 &m_formatConvertedSurface[0],
6460 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6461 0,
6462 m_verticalLineStride,
6463 true)); //this should be writable as it is output of formatconversion
6464 param->bUseUVPlane = true;
6465
6466 param = &m_surfaceParams[SURFACE_RAW_Y_UV];
6467 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6468 param,
6469 m_rawSurfaceToEnc,
6470 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6471 0,
6472 m_verticalLineStride,
6473 false));
6474 param->bUseUVPlane = true;
6475
6476 param = &m_surfaceParams[SURFACE_RAW_10bit_Y_UV];
6477 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6478 param,
6479 m_rawSurfaceToEnc,
6480 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6481 0,
6482 m_verticalLineStride,
6483 false));//this should be writable as it is output of formatconversion
6484 param->bUseUVPlane = true;
6485
6486 param = &m_surfaceParams[SURFACE_Y_2X];
6487 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6488 param,
6489 &m_scaled2xSurface,
6490 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6491 0,
6492 m_verticalLineStride,
6493 false));
6494
6495 param = &m_surfaceParams[SURFACE_32x32_PU_OUTPUT];
6496 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6497 param,
6498 &m_32x32PuOutputData.sResource,
6499 m_32x32PuOutputData.dwSize,
6500 0,
6501 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6502 0,
6503 false));
6504
6505 param = &m_surfaceParams[SURFACE_SLICE_MAP];
6506 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6507 param,
6508 &m_sliceMapSurface,
6509 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6510 0,
6511 m_verticalLineStride,
6512 false));
6513
6514 param = &m_surfaceParams[SURFACE_Y_2X_VME];
6515 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
6516 param,
6517 &m_scaled2xSurface,
6518 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
6519 0));
6520
6521 param = &m_surfaceParams[SURFACE_BRC_INPUT];
6522 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6523 param,
6524 (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel),
6525 m_allocator->GetResourceSize(m_standard, brcInputForEncKernel),
6526 0,
6527 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6528 0,
6529 false));
6530
6531 param = &m_surfaceParams[SURFACE_LCU_QP];
6532 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6533 param,
6534 &m_lcuQP,
6535 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6536 0,
6537 m_verticalLineStride,
6538 false));
6539
6540 param = &m_surfaceParams[SURFACE_ROI];
6541 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6542 param,
6543 &m_roiSurface,
6544 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6545 0,
6546 m_verticalLineStride,
6547 false));
6548
6549 param = &m_surfaceParams[SURFACE_BRC_DATA];
6550 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6551 param,
6552 &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
6553 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6554 0,
6555 m_verticalLineStride,
6556 false));
6557
6558 param = &m_surfaceParams[SURFACE_SIMPLIFIED_INTRA];
6559 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6560 param,
6561 &m_simplestIntraSurface,
6562 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6563 0,
6564 m_verticalLineStride,
6565 false));
6566
6567 // skip SURFACE_HME_MVP and SURFACE_HME_DIST from HME since FEI alsways disables HME
6568
6569 param = &m_surfaceParams[SURFACE_16x16PU_SAD];
6570 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6571 param,
6572 &m_sad16x16Pu.sResource,
6573 m_sad16x16Pu.dwSize,
6574 0,
6575 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6576 0,
6577 false));
6578
6579 param = &m_surfaceParams[SURFACE_RAW_VME];
6580 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
6581 param,
6582 m_rawSurfaceToEnc,
6583 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6584 0));
6585
6586 param = &m_surfaceParams[SURFACE_VME_8x8];
6587 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6588 param,
6589 &m_vme8x8Mode.sResource,
6590 m_vme8x8Mode.dwSize,
6591 0,
6592 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6593 0,
6594 false));
6595
6596 param = &m_surfaceParams[SURFACE_CU_RECORD];
6597 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6598 param,
6599 &m_resMbCodeSurface,
6600 m_mbCodeSize - m_mvOffset,
6601 m_mvOffset,
6602 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6603 0,
6604 true));
6605
6606 param = &m_surfaceParams[SURFACE_INTRA_MODE];
6607 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6608 param,
6609 &m_intraMode.sResource,
6610 m_intraMode.dwSize,
6611 0,
6612 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6613 0,
6614 false));
6615
6616 param = &m_surfaceParams[SURFACE_HCP_PAK];
6617 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6618 param,
6619 &m_resMbCodeSurface,
6620 m_mvOffset,
6621 0,
6622 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6623 0,
6624 true));
6625
6626 param = &m_surfaceParams[SURFACE_INTRA_DIST];
6627 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6628 param,
6629 &m_intraDist.sResource,
6630 m_intraDist.dwSize,
6631 0,
6632 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6633 0,
6634 false));
6635
6636 param = &m_surfaceParams[SURFACE_MIN_DIST];
6637 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6638 param,
6639 &m_minDistortion,
6640 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
6641 0,
6642 m_verticalLineStride,
6643 false));
6644
6645 param = &m_surfaceParams[SURFACE_VME_UNI_SIC_DATA];
6646 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6647 param,
6648 &m_vmeSavedUniSic.sResource,
6649 m_vmeSavedUniSic.dwSize,
6650 0,
6651 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6652 0,
6653 false));
6654
6655 param = &m_surfaceParams[SURFACE_COL_MB_MV];
6656 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6657 param,
6658 nullptr,
6659 m_sizeOfMvTemporalBuffer,
6660 0,
6661 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6662 0,
6663 false));
6664
6665 m_concurrentThreadIndex = 0;
6666 for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++)
6667 {
6668 param = &m_surfaceParams[SURFACE_CONCURRENT_THREAD + i];
6669 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6670 param,
6671 &m_concurrentThreadSurface[i],
6672 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
6673 0,
6674 m_verticalLineStride,
6675 false));
6676 }
6677
6678 param = &m_surfaceParams[SURFACE_MB_MV_INDEX];
6679 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6680 param,
6681 &m_mvIndex.sResource,
6682 m_mvIndex.dwSize,
6683 0,
6684 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6685 0,
6686 false));
6687
6688 param = &m_surfaceParams[SURFACE_MVP_INDEX];
6689 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6690 param,
6691 &m_mvpIndex.sResource,
6692 m_mvpIndex.dwSize,
6693 0,
6694 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6695 0,
6696 false));
6697
6698 param = &m_surfaceParams[SURFACE_REF_FRAME_VME];
6699 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
6700 param,
6701 0,
6702 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
6703 0));
6704
6705 param = &m_surfaceParams[SURFACE_Y_4X];
6706 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6707 param,
6708 nullptr,
6709 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6710 0,
6711 m_verticalLineStride,
6712 false));
6713
6714 param = &m_surfaceParams[SURFACE_Y_4X_VME];
6715 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
6716 param,
6717 nullptr,
6718 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
6719 0));
6720
6721 param = &m_surfaceParams[SURFACE_BRC_HISTORY];
6722 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6723 param,
6724 &m_brcBuffers.resBrcHistoryBuffer,
6725 m_brcHistoryBufferSize,
6726 0,
6727 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6728 0,
6729 true));
6730
6731 param = &m_surfaceParams[SURFACE_BRC_ME_DIST];
6732 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6733 param,
6734 &m_brcBuffers.sMeBrcDistortionBuffer,
6735 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6736 0,
6737 m_verticalLineStride,
6738 true));
6739
6740 param = &m_surfaceParams[SURFACE_BRC_PAST_PAK_INFO];
6741 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6742 param,
6743 &m_brcBuffers.resBrcPakStatisticBuffer[0],
6744 m_hevcBrcPakStatisticsSize,
6745 0,
6746 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6747 0,
6748 false));
6749
6750 param = &m_surfaceParams[SURFACE_BRC_HCP_PIC_STATE];
6751 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6752 param,
6753 &m_brcBuffers.resBrcImageStatesWriteBuffer[0],
6754 m_brcBuffers.dwBrcHcpPicStateSize,
6755 0,
6756 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6757 0,
6758 false));
6759
6760 #if 0
6761 param = &m_surfaceParams[SURFACE_PU_STATS];
6762 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6763 param,
6764 &m_encStatsBuffers.m_puStatsSurface,
6765 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6766 0,
6767 m_verticalLineStride,
6768 true));
6769
6770 param = &m_surfaceParams[SURFACE_8X8_PU_HAAR_DIST];
6771 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6772 param,
6773 &m_encStatsBuffers.m_8x8PuHaarDist,
6774 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6775 0,
6776 m_verticalLineStride,
6777 true));
6778
6779 param = &m_surfaceParams[SURFACE_8X8_PU_FRAME_STATS];
6780 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6781 param,
6782 &m_encStatsBuffers.m_8x8PuFrameStats.sResource,
6783 m_encStatsBuffers.m_8x8PuFrameStats.dwSize,
6784 0,
6785 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6786 0,
6787 true));
6788
6789 param = &m_surfaceParams[SURFACE_MB_ENC_STATS];
6790 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
6791 param,
6792 &m_encStatsBuffers.m_mbEncStatsSurface,
6793 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
6794 0,
6795 m_verticalLineStride,
6796 true));
6797
6798 param = &m_surfaceParams[SURFACE_MB_ENC_FRAME_STATS];
6799 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6800 param,
6801 &m_encStatsBuffers.m_mbEncFrameStats.sResource,
6802 m_encStatsBuffers.m_mbEncFrameStats.dwSize,
6803 0,
6804 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6805 0,
6806 true));
6807
6808 param = &m_surfaceParams[SURFACE_FEI_EXTERNAL_MVP];
6809 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6810 param,
6811 &m_feiPicParams->resMVPredictor,
6812 m_feiPicParams->resMVPredictor.iSize,
6813 0,
6814 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6815 0,
6816 false));
6817
6818 param = &m_surfaceParams[SURFACE_FEI_PER_LCU_QP];
6819 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6820 param,
6821 &m_feiPicParams->resCTBQp,
6822 m_feiPicParams->resCTBQp.iSize,
6823 0,
6824 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6825 0,
6826 false));
6827
6828 param = &m_surfaceParams[SURFACE_FEI_PER_CTB_CTRL];
6829 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6830 param,
6831 &m_feiPicParams->resCTBCtrl,
6832 m_feiPicParams->resCTBCtrl.iSize,
6833 0,
6834 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6835 0,
6836 false));
6837
6838 param = &m_surfaceParams[SURFACE_FEI_CTB_DISTORTION];
6839 CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
6840 param,
6841 &m_feiPicParams->resDistortion,
6842 m_feiPicParams->resDistortion.iSize,
6843 0,
6844 m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
6845 0,
6846 false));
6847 #endif
6848
6849 return eStatus;
6850 }
6851
SetSequenceStructs()6852 MOS_STATUS CodechalFeiHevcStateG9Skl::SetSequenceStructs()
6853 {
6854 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
6855
6856 CODECHAL_ENCODE_FUNCTION_ENTER;
6857
6858 m_feiPicParams = (CodecEncodeHevcFeiPicParams *)m_encodeParams.pFeiPicParams;
6859
6860 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetSequenceStructs());
6861
6862 m_enable26WalkingPattern = m_feiPicParams->bForceLCUSplit;
6863 m_numRegionsInSlice = m_feiPicParams->NumConcurrentEncFramePartition;
6864 m_encodeParams.bReportStatisticsEnabled = 0;
6865 m_encodeParams.bQualityImprovementEnable = 0;
6866
6867 if (m_feiPicParams->FastIntraMode)
6868 {
6869 m_hevcSeqParams->TargetUsage = 0x07;
6870 }
6871
6872 return eStatus;
6873 }
6874
CodechalFeiHevcStateG9Skl(CodechalHwInterface * hwInterface,CodechalDebugInterface * debugInterface,PCODECHAL_STANDARD_INFO standardInfo)6875 CodechalFeiHevcStateG9Skl::CodechalFeiHevcStateG9Skl(CodechalHwInterface* hwInterface,
6876 CodechalDebugInterface* debugInterface,
6877 PCODECHAL_STANDARD_INFO standardInfo)
6878 :CodechalEncHevcStateG9(hwInterface, debugInterface, standardInfo)
6879 {
6880 m_kernelBase = (uint8_t *)IGCODECKRN_G9;
6881 m_kuid = IDR_CODEC_HEVC_FEI_COMBINED_KENREL_INTEL;
6882 pfnGetKernelHeaderAndSize = GetKernelHeaderAndSize;
6883 m_noMeKernelForPFrame = false;
6884 m_feiEnable = true;
6885
6886 MOS_STATUS eStatus = InitMhw();
6887 if (eStatus != MOS_STATUS_SUCCESS)
6888 {
6889 CODECHAL_ENCODE_ASSERTMESSAGE("HEVC FEI encoder MHW initialization failed.");
6890 }
6891 }
6892
6893